diff --git a/mdrsclient/client.py b/mdrsclient/client.py index 0ac1bb3..6b7ee5e 100644 --- a/mdrsclient/client.py +++ b/mdrsclient/client.py @@ -1,69 +1,47 @@ import os -from concurrent.futures import ThreadPoolExecutor from typing import Any from unicodedata import normalize from mdrsclient.api import DoiApi, FilesApi, FoldersApi, LaboratoriesApi, UsersApi -from mdrsclient.commands.base import BaseCommand +from mdrsclient.cache import CacheInterface from mdrsclient.connection import MDRSConnection from mdrsclient.exceptions import IllegalArgumentException, MDRSException, UnauthorizedException, UnexpectedException from mdrsclient.models import File, Folder, Laboratory, Token, User from mdrsclient.models.file import find_file -from mdrsclient.settings import CONCURRENT +from mdrsclient.services import MdrsService -class MdrsClient: +class MdrsClient(MdrsService): """Service layer client for MDRS.""" def __init__(self, connection: MDRSConnection): - self.connection = connection + super().__init__(connection) @classmethod - def from_remote(cls, remote: str) -> "MdrsClient": - return cls(BaseCommand._create_connection(remote)) - - def login(self, username: str, password: str) -> tuple[Token, User]: - user_api = UsersApi(self.connection) - token = user_api.token(username, password) - self.connection.token = token - user = user_api.current() - self.connection.user = user - return token, user - - def logout(self) -> None: - self.connection.logout() - - def whoami(self) -> User: - user_api = UsersApi(self.connection) - return user_api.current() - - def get_laboratories(self) -> list[Laboratory]: - laboratory_api = LaboratoriesApi(self.connection) - labs = laboratory_api.list() - self.connection.laboratories = labs - return list(labs) + def from_remote(cls, remote: str, cache: CacheInterface | None = None) -> "MdrsClient": + return cls(cls.create_connection(remote, cache)) def mkdir(self, remote_path: str) -> None: - remote, laboratory_name, r_path = BaseCommand._parse_remote_host_with_path(remote_path) + remote, laboratory_name, r_path = self.parse_remote_host_with_path(remote_path) r_path = r_path.rstrip("/") r_dirname = os.path.dirname(r_path) r_basename = os.path.basename(r_path) - laboratory = BaseCommand._find_laboratory(self.connection, laboratory_name) - parent_folder = BaseCommand._find_folder(self.connection, laboratory, r_dirname) - files = BaseCommand._find_files(self.connection, parent_folder.id) + laboratory = self.find_laboratory(laboratory_name) + parent_folder = self.find_folder(laboratory, r_dirname) + files = self.find_files(parent_folder.id) if parent_folder.find_sub_folder(r_basename) is not None or find_file(files, r_basename) is not None: raise IllegalArgumentException(f"Cannot create folder `{r_path}`: File exists.") folder_api = FoldersApi(self.connection) folder_api.create(normalize("NFC", r_basename), parent_folder.id) def rm(self, remote_path: str, is_recursive: bool = False) -> None: - remote, laboratory_name, r_path = BaseCommand._parse_remote_host_with_path(remote_path) + remote, laboratory_name, r_path = self.parse_remote_host_with_path(remote_path) r_path = r_path.rstrip("/") r_dirname = os.path.dirname(r_path) r_basename = os.path.basename(r_path) - laboratory = BaseCommand._find_laboratory(self.connection, laboratory_name) - parent_folder = BaseCommand._find_folder(self.connection, laboratory, r_dirname) - parent_files = BaseCommand._find_files(self.connection, parent_folder.id) + laboratory = self.find_laboratory(laboratory_name) + parent_folder = self.find_folder(laboratory, r_dirname) + parent_files = self.find_files(parent_folder.id) file = find_file(parent_files, r_basename) if file is not None: file_api = FilesApi(self.connection) @@ -78,13 +56,13 @@ class MdrsClient: folder_api.destroy(folder.id, True) def ls(self, remote_path: str, password: str | None = None) -> tuple[Folder, list[File]]: - folder, laboratory = BaseCommand._resolve_folder(self.connection, remote_path, password) - files = BaseCommand._find_files(self.connection, folder.id) + folder, laboratory = self.resolve_folder(remote_path, password) + files = self.find_files(folder.id) return folder, files def cp(self, src_path: str, dest_path: str, is_recursive: bool = False) -> None: - s_remote, s_laboratory_name, s_path = BaseCommand._parse_remote_host_with_path(src_path) - d_remote, d_laboratory_name, d_path = BaseCommand._parse_remote_host_with_path(dest_path) + s_remote, s_laboratory_name, s_path = self.parse_remote_host_with_path(src_path) + d_remote, d_laboratory_name, d_path = self.parse_remote_host_with_path(dest_path) if s_remote != d_remote: raise IllegalArgumentException("Remote host mismatched.") if s_laboratory_name != d_laboratory_name: @@ -98,11 +76,11 @@ class MdrsClient: else: d_dirname = os.path.dirname(d_path) d_basename = os.path.basename(d_path) - laboratory = BaseCommand._find_laboratory(self.connection, s_laboratory_name) - s_parent_folder = BaseCommand._find_folder(self.connection, laboratory, s_dirname) - s_parent_files = BaseCommand._find_files(self.connection, s_parent_folder.id) - d_parent_folder = BaseCommand._find_folder(self.connection, laboratory, d_dirname) - d_parent_files = BaseCommand._find_files(self.connection, d_parent_folder.id) + laboratory = self.find_laboratory(s_laboratory_name) + s_parent_folder = self.find_folder(laboratory, s_dirname) + s_parent_files = self.find_files(s_parent_folder.id) + d_parent_folder = self.find_folder(laboratory, d_dirname) + d_parent_files = self.find_files(d_parent_folder.id) s_file = find_file(s_parent_files, s_basename) if s_file is not None: d_file = find_file(d_parent_files, d_basename) @@ -132,8 +110,8 @@ class MdrsClient: folder_api.copy(s_folder, d_parent_folder.id, normalize("NFC", d_basename)) def mv(self, src_path: str, dest_path: str) -> None: - s_remote, s_laboratory_name, s_path = BaseCommand._parse_remote_host_with_path(src_path) - d_remote, d_laboratory_name, d_path = BaseCommand._parse_remote_host_with_path(dest_path) + s_remote, s_laboratory_name, s_path = self.parse_remote_host_with_path(src_path) + d_remote, d_laboratory_name, d_path = self.parse_remote_host_with_path(dest_path) if s_remote != d_remote: raise IllegalArgumentException("Remote host mismatched.") if s_laboratory_name != d_laboratory_name: @@ -147,11 +125,11 @@ class MdrsClient: else: d_dirname = os.path.dirname(d_path) d_basename = os.path.basename(d_path) - laboratory = BaseCommand._find_laboratory(self.connection, s_laboratory_name) - s_parent_folder = BaseCommand._find_folder(self.connection, laboratory, s_dirname) - s_parent_files = BaseCommand._find_files(self.connection, s_parent_folder.id) - d_parent_folder = BaseCommand._find_folder(self.connection, laboratory, d_dirname) - d_parent_files = BaseCommand._find_files(self.connection, d_parent_folder.id) + laboratory = self.find_laboratory(s_laboratory_name) + s_parent_folder = self.find_folder(laboratory, s_dirname) + s_parent_files = self.find_files(s_parent_folder.id) + d_parent_folder = self.find_folder(laboratory, d_dirname) + d_parent_files = self.find_files(d_parent_folder.id) s_file = find_file(s_parent_files, s_basename) if s_file is not None: d_file = find_file(d_parent_files, d_basename) @@ -181,36 +159,34 @@ class MdrsClient: def chacl( self, remote_path: str, access_level: int, is_recursive: bool = False, password: str | None = None ) -> None: - remote, laboratory_name, r_path = BaseCommand._parse_remote_host_with_path(remote_path) + remote, laboratory_name, r_path = self.parse_remote_host_with_path(remote_path) r_path = r_path.rstrip("/") - laboratory = BaseCommand._find_laboratory(self.connection, laboratory_name) - folder = BaseCommand._find_folder(self.connection, laboratory, r_path) + laboratory = self.find_laboratory(laboratory_name) + folder = self.find_folder(laboratory, r_path) folder_api = FoldersApi(self.connection) folder_api.acl(folder.id, access_level, is_recursive, password) def metadata(self, remote_path: str, password: str | None = None) -> dict: - folder, laboratory = BaseCommand._resolve_folder(self.connection, remote_path, password) + folder, laboratory = self.resolve_folder(remote_path, password) folder_api = FoldersApi(self.connection) return folder_api.metadata(folder.id) def file_metadata(self, remote_path: str, password: str | None = None) -> dict: - folder, laboratory, r_basename = BaseCommand._resolve_file(self.connection, remote_path, password) - files = BaseCommand._find_files(self.connection, folder.id) + folder, laboratory, r_basename = self.resolve_file(remote_path, password) + files = self.find_files(folder.id) file = find_file(files, r_basename) if file is None: raise IllegalArgumentException(f"File `{r_basename}` not found.") file_api = FilesApi(self.connection) return file_api.metadata(file) - def _create_connection(self, remote: str): - return self.connection - def upload( self, local_path: str, remote_path: str, is_recursive: bool = False, is_skip_if_exists: bool = False ) -> None: - from mdrsclient.commands.upload import UploadCommand + from mdrsclient.transfer import Uploader - UploadCommand._upload_logic(self.connection, local_path, remote_path, is_recursive, is_skip_if_exists) + uploader = Uploader(self) + uploader.upload(local_path, remote_path, is_recursive, is_skip_if_exists) def download( self, @@ -221,23 +197,10 @@ class MdrsClient: password: str | None = None, excludes: list[str] | None = None, ) -> None: - from mdrsclient.commands.download import DownloadCommand + from mdrsclient.transfer import Downloader - DownloadCommand._download_logic( - self.connection, remote_path, local_path, is_recursive, is_skip_if_exists, password, excludes or [] - ) - - def ls_command( - self, - remote_path: str, - password: str | None = None, - is_json: bool = False, - is_recursive: bool = False, - is_quiet: bool = False, - ) -> None: - from mdrsclient.commands.ls import LsCommand - - LsCommand._ls_logic(self.connection, remote_path, password, is_json, is_recursive, is_quiet) + downloader = Downloader(self) + downloader.download(remote_path, local_path, is_recursive, is_skip_if_exists, password, excludes) def version(self) -> str: from mdrsclient.__version__ import __version__ @@ -245,14 +208,24 @@ class MdrsClient: return f"mdrs {__version__}" def config_create(self, remote: str, url: str) -> None: - from mdrsclient.commands.config import ConfigCommand + from mdrsclient.config import ConfigFile - ConfigCommand.create(remote, url) + remote = self.parse_remote_host(remote) + config = ConfigFile(remote) + if config.url is not None: + raise IllegalArgumentException(f"Remote host `{remote}` is already exists.") + else: + config.url = url def config_update(self, remote: str, url: str) -> None: - from mdrsclient.commands.config import ConfigCommand + from mdrsclient.config import ConfigFile - ConfigCommand.update(remote, url) + remote = self.parse_remote_host(remote) + config = ConfigFile(remote) + if config.url is None: + raise IllegalArgumentException(f"Remote host `{remote}` is not exists.") + else: + config.url = url def config_list(self) -> list: from mdrsclient.config import ConfigFile @@ -261,6 +234,11 @@ class MdrsClient: return config.list() def config_delete(self, remote: str) -> None: - from mdrsclient.commands.config import ConfigCommand + from mdrsclient.config import ConfigFile - ConfigCommand.delete(remote) + remote = self.parse_remote_host(remote) + config = ConfigFile(remote) + if config.url is None: + raise IllegalArgumentException(f"Remote host `{remote}` is not exists.") + else: + del config.url diff --git a/mdrsclient/commands/base.py b/mdrsclient/commands/base.py index d60f4de..71e37a4 100644 --- a/mdrsclient/commands/base.py +++ b/mdrsclient/commands/base.py @@ -1,20 +1,7 @@ -import os -import re from abc import ABC, abstractmethod from typing import Any -from unicodedata import normalize -from mdrsclient.api import DoiApi, FilesApi, FoldersApi, LaboratoriesApi -from mdrsclient.config import ConfigFile -from mdrsclient.connection import MDRSConnection -from mdrsclient.exceptions import ( - IllegalArgumentException, - MissingConfigurationException, - UnauthorizedException, - UnexpectedException, -) -from mdrsclient.models import File, Folder, Laboratory -from mdrsclient.utils import page_num_from_url +from mdrsclient.exceptions import UnexpectedException class BaseCommand(ABC): @@ -22,246 +9,3 @@ class BaseCommand(ABC): @abstractmethod def register(cls, parsers: Any) -> None: raise UnexpectedException("Not implemented.") - - @classmethod - def _create_connection(cls, remote: str) -> MDRSConnection: - config = ConfigFile(remote) - if config.url is None: - raise MissingConfigurationException(f"Remote host `{remote}` is not found.") - return MDRSConnection(config.remote, config.url) - - @classmethod - def _find_laboratory(cls, connection: MDRSConnection, name: str) -> Laboratory: - if connection.laboratories.empty() or connection.token is not None and connection.token.is_expired: - laboratory_api = LaboratoriesApi(connection) - connection.laboratories = laboratory_api.list() - laboratory = connection.laboratories.find_by_name(name) - if laboratory is None: - raise IllegalArgumentException(f"Laboratory `{name}` not found.") - return laboratory - - @classmethod - def _find_folder( - cls, connection: MDRSConnection, laboratory: Laboratory, path: str, password: str | None = None - ) -> Folder: - folder_api = FoldersApi(connection) - folders = folder_api.list(laboratory.id, normalize("NFC", path)) - if len(folders) != 1: - raise UnexpectedException(f"Folder `{path}` not found.") - if folders[0].lock: - if password is None: - raise UnauthorizedException(f"Folder `{path}` is locked.") - folder_api.auth(folders[0].id, password) - return folder_api.retrieve(folders[0].id) - - @classmethod - def _find_files(cls, connection: MDRSConnection, folder_id: str) -> list[File]: - files_api = FilesApi(connection) - page = 1 - results_file = [] - while page: - result = files_api.list(folder_id, page) - results_file.extend(result.results) - page = 0 - if result.next: - page = page_num_from_url(result.next) - return results_file - - @classmethod - def _parse_remote_host(cls, path: str) -> str: - path_array = path.split(":") - remote_host = path_array[0] - if len(path_array) == 2 and path_array[1] != "" or len(path_array) > 2: - raise IllegalArgumentException("Invalid remote host") - return remote_host - - @classmethod - def _parse_remote_host_with_path(cls, path: str) -> tuple[str, str, str]: - path = re.sub(r"//+|/\./+|/\.$", "/", path) - if re.search(r"/\.\./|/\.\.$", path) is not None: - raise IllegalArgumentException("Path traversal found.") - path_array = path.split(":") - if len(path_array) != 2: - raise IllegalArgumentException("Invalid remote host.") - remote_host = path_array[0] - folder_array = path_array[1].split("/") - is_absolute_path = folder_array[0] == "" - if not is_absolute_path: - raise IllegalArgumentException("Must be absolute paths.") - del folder_array[0] - if len(folder_array) == 0: - laboratory = "" - folder = "" - else: - laboratory = folder_array.pop(0) - folder = "/" + "/".join(folder_array) - return (remote_host, laboratory, folder) - - # ------------------------------------------------------------------ - # DOI helpers - # ------------------------------------------------------------------ - - @staticmethod - def _is_doi(path_component: str) -> bool: - """Return True if path_component looks like a DOI string. - - A DOI is recognised as a string that starts with ``10.`` and - contains a ``/``. - """ - return path_component.startswith("10.") and "/" in path_component - - @staticmethod - def _doi_suffix_id(doi: str) -> str: - """Extract the internal system ID from a full DOI string. - - MDRS uses the segment after the last ``.`` in the suffix (the part - after the ``/``) as its identifier. - Example: ``10.xxxx/prefix.20230511-001`` → ``20230511-001``. - If there is no ``.`` in the suffix, the whole suffix is returned. - Trailing slashes are stripped before processing. - """ - # Strip any trailing slash first. - doi = doi.rstrip("/") - slash_pos = doi.find("/") - if slash_pos == -1: - return doi - suffix = doi[slash_pos + 1 :] - dot_pos = suffix.rfind(".") - return suffix[dot_pos + 1 :] if dot_pos != -1 else suffix - - @staticmethod - def _split_doi_and_subpath(doi_with_path: str) -> tuple[str, str]: - """Split a DOI-with-optional-path string into (doi, subpath).""" - # Find the first '/' that separates registrant from suffix. - first_slash = doi_with_path.find("/") - if first_slash != -1: - after_suffix_start = first_slash + 1 - after_first = doi_with_path[after_suffix_start:] - # Find the next '/' inside the suffix portion — this starts the subpath. - second_slash = after_first.find("/") - if second_slash != -1: - doi_end = after_suffix_start + second_slash - doi = doi_with_path[:doi_end] - subpath = doi_with_path[doi_end:] # begins with "/" - # Treat a bare trailing slash as no subpath (root of DOI folder). - if subpath == "/": - return (doi, "") - else: - return (doi, subpath) - else: - # No second slash — the whole string is the DOI, no subpath. - return (doi_with_path, "") - else: - return (doi_with_path, "") - - @classmethod - def _parse_doi_remote_host(cls, path: str) -> tuple[str, str, str]: - """Parse ``remote:10.xxxx/prefix.ID[/optional/sub/path]`` into ``(remote, doi, subpath)``.""" - parts = path.split(":", 1) - if len(parts) != 2: - raise IllegalArgumentException("remote_path must be in the form 'remote:10.xxxx/prefix.ID'") - remote, doi_with_path = parts - if not cls._is_doi(doi_with_path): - raise IllegalArgumentException( - f"Path `{doi_with_path}` does not look like a DOI (must start with '10.' and contain '/')." - ) - doi, subpath = cls._split_doi_and_subpath(doi_with_path) - return (remote, doi, subpath) - - @classmethod - def _find_folder_by_doi( - cls, - connection: MDRSConnection, - doi: str, - password: str | None = None, - ) -> tuple[Folder, Laboratory]: - """Resolve a DOI to a (Folder, Laboratory) pair. - - Calls GET v3/doi/{id}/ to look up the folder ID, retrieves the full - folder detail (which carries ``laboratory_id``), and resolves the - laboratory from that field. - """ - doi_clean = doi.rstrip("/") - doi_id = cls._doi_suffix_id(doi_clean) - doi_api = DoiApi(connection) - doi_resp = doi_api.retrieve(doi_id) - - # Verify the returned DOI matches the one supplied (case-insensitive). - returned_doi = doi_resp.doi.rstrip("/") - if returned_doi.lower() != doi_clean.lower(): - raise IllegalArgumentException( - f"DOI mismatch: requested `{doi_clean}` but server returned `{returned_doi}`." - ) - - folder_api = FoldersApi(connection) - - # Retrieve full folder detail directly by ID; laboratory_id is here. - folder = folder_api.retrieve(doi_resp.folder_id) - - if folder.lock: - if password is None: - raise UnauthorizedException(f"Folder for DOI `{doi_clean}` is locked.") - folder_api.auth(doi_resp.folder.id, password) - - # Resolve laboratory using laboratory_id from the full folder detail. - lab_api = LaboratoriesApi(connection) - labs = lab_api.list() - lab = labs.find_by_id(folder.laboratory_id) - if lab is None: - raise UnexpectedException(f"Laboratory with id {folder.laboratory_id} not found.") - - connection.laboratories = labs - return (folder, lab) - - @classmethod - def _resolve_folder( - cls, - connection: MDRSConnection, - remote_path: str, - password: str | None = None, - ) -> tuple[Folder, Laboratory]: - """Resolve any remote path (normal or DOI) into a (Folder, Laboratory) pair.""" - path_component = remote_path.split(":", 1)[1] if ":" in remote_path else "" - if cls._is_doi(path_component): - remote, doi, subpath = cls._parse_doi_remote_host(remote_path) - doi_folder, laboratory = cls._find_folder_by_doi(connection, doi, password) - if not subpath: - return (doi_folder, laboratory) - else: - abs_path = doi_folder.path.rstrip("/") + subpath - folder = cls._find_folder(connection, laboratory, abs_path, password) - return (folder, laboratory) - else: - remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) - laboratory = cls._find_laboratory(connection, laboratory_name) - folder = cls._find_folder(connection, laboratory, r_path, password) - return (folder, laboratory) - - @classmethod - def _resolve_file( - cls, - connection: MDRSConnection, - remote_path: str, - password: str | None = None, - ) -> tuple[Folder, Laboratory, str]: - """Resolve a remote path pointing to a file into the parent Folder, its Laboratory, and the file's basename.""" - path_component = remote_path.split(":", 1)[1] if ":" in remote_path else "" - if cls._is_doi(path_component): - remote, doi, subpath = cls._parse_doi_remote_host(remote_path) - doi_folder, laboratory = cls._find_folder_by_doi(connection, doi, password) - subpath_clean = subpath.rstrip("/") - if not subpath_clean: - raise IllegalArgumentException("DOI path must point to a file, not a folder.") - r_dirname = os.path.dirname(subpath_clean) - r_basename = os.path.basename(subpath_clean) - abs_path = doi_folder.path.rstrip("/") + r_dirname - parent_folder = cls._find_folder(connection, laboratory, abs_path, password) - return (parent_folder, laboratory, r_basename) - else: - remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) - r_path = r_path.rstrip("/") - r_dirname = os.path.dirname(r_path) - r_basename = os.path.basename(r_path) - laboratory = cls._find_laboratory(connection, laboratory_name) - parent_folder = cls._find_folder(connection, laboratory, r_dirname, password) - return (parent_folder, laboratory, r_basename) diff --git a/mdrsclient/commands/config.py b/mdrsclient/commands/config.py index bb9ee6d..c18b387 100644 --- a/mdrsclient/commands/config.py +++ b/mdrsclient/commands/config.py @@ -2,8 +2,6 @@ from argparse import Namespace from typing import Any, Callable from mdrsclient.commands.base import BaseCommand -from mdrsclient.config import ConfigFile -from mdrsclient.exceptions import IllegalArgumentException class ConfigCommand(BaseCommand): @@ -52,35 +50,6 @@ class ConfigCommand(BaseCommand): @classmethod def func_list(cls, args: Namespace) -> None: - cls.list() - - @classmethod - def func_delete(cls, args: Namespace) -> None: - remote = str(args.remote) - from mdrsclient.client import MdrsClient - - MdrsClient(None).config_delete(remote) - - @classmethod - def create(cls, remote: str, url: str) -> None: - remote = cls._parse_remote_host(remote) - config = ConfigFile(remote) - if config.url is not None: - raise IllegalArgumentException(f"Remote host `{remote}` is already exists.") - else: - config.url = url - - @classmethod - def update(cls, remote: str, url: str) -> None: - remote = cls._parse_remote_host(remote) - config = ConfigFile(remote) - if config.url is None: - raise IllegalArgumentException(f"Remote host `{remote}` is not exists.") - else: - config.url = url - - @classmethod - def list(cls) -> None: from mdrsclient.client import MdrsClient client = MdrsClient(None) @@ -88,10 +57,8 @@ class ConfigCommand(BaseCommand): print(f"{remote}:\t{url}") @classmethod - def delete(cls, remote: str) -> None: - remote = cls._parse_remote_host(remote) - config = ConfigFile(remote) - if config.url is None: - raise IllegalArgumentException(f"Remote host `{remote}` is not exists.") - else: - del config.url + def func_delete(cls, args: Namespace) -> None: + remote = str(args.remote) + from mdrsclient.client import MdrsClient + + MdrsClient(None).config_delete(remote) diff --git a/mdrsclient/commands/download.py b/mdrsclient/commands/download.py index d913cf7..1a56003 100644 --- a/mdrsclient/commands/download.py +++ b/mdrsclient/commands/download.py @@ -1,30 +1,7 @@ -import os from argparse import Namespace -from concurrent.futures import ThreadPoolExecutor from typing import Any -from pydantic.dataclasses import dataclass - -from mdrsclient.api import FilesApi, FoldersApi from mdrsclient.commands.base import BaseCommand -from mdrsclient.connection import MDRSConnection -from mdrsclient.exceptions import IllegalArgumentException, UnexpectedException -from mdrsclient.models import File, Folder, Laboratory -from mdrsclient.models.file import find_file -from mdrsclient.settings import CONCURRENT - - -@dataclass(frozen=True) -class DownloadFileInfo: - file: File - path: str - - -@dataclass -class DownloadContext: - hasError: bool - isSkipIfExists: bool - files: list[DownloadFileInfo] class DownloadCommand(BaseCommand): @@ -37,7 +14,7 @@ class DownloadCommand(BaseCommand): download_parser.add_argument( "-s", "--skip-if-exists", - help="skip the download if file is already downloaded and file size is the same", + help="skip the download if file is already uploaded and file size is the same", action="store_true", ) download_parser.add_argument( @@ -74,160 +51,3 @@ class DownloadCommand(BaseCommand): client = MdrsClient.from_remote(remote) client.download(remote_path, local_path, is_recursive, is_skip_if_exists, password, excludes) return - - @classmethod - def _download_logic( - cls, - connection: MDRSConnection, - remote_path: str, - local_path: str, - is_recursive: bool, - is_skip_if_exists: bool, - password: str | None, - excludes: list[str], - ) -> None: - # Detect DOI path: "remote:10.xxxx/prefix.ID[/optional/sub/path]" - path_component = remote_path.split(":", 1)[1] if ":" in remote_path else "" - if cls._is_doi(path_component): - remote, doi, subpath = cls._parse_doi_remote_host(remote_path) - - l_dirname = os.path.realpath(local_path) - if not os.path.isdir(l_dirname): - raise IllegalArgumentException(f"Local directory `{local_path}` not found.") - doi_folder, laboratory = cls._find_folder_by_doi(connection, doi, password) - - subpath_clean = subpath.rstrip("/") - if not subpath_clean: - folder = doi_folder - is_folder = True - else: - r_dirname = os.path.dirname(subpath_clean) - r_basename = os.path.basename(subpath_clean) - abs_parent_path = doi_folder.path.rstrip("/") + r_dirname - r_parent_folder = cls._find_folder(connection, laboratory, abs_parent_path, password) - r_parent_files = cls._find_files(connection, r_parent_folder.id) - file = find_file(r_parent_files, r_basename) - if file is not None: - if cls.__check_excludes(excludes, laboratory, r_parent_folder, file): - return - context = DownloadContext(False, is_skip_if_exists, []) - l_path = os.path.join(l_dirname, r_basename) - context.files.append(DownloadFileInfo(file, l_path)) - cls.__multiple_download(connection, context) - return - else: - folder_simple = r_parent_folder.find_sub_folder(r_basename) - if folder_simple is None: - raise IllegalArgumentException(f"File or folder `{subpath_clean}` not found.") - folder = FoldersApi(connection).retrieve(folder_simple.id) - is_folder = True - - # For a DOI target the whole folder is the download target. - if not is_recursive: - # Non-recursive: download only the files at the top level of the DOI folder. - files = cls._find_files(connection, folder.id) - context = DownloadContext(False, is_skip_if_exists, []) - for file in files: - if cls.__check_excludes(excludes, laboratory, folder, file): - continue - l_path = os.path.join(l_dirname, file.name) - context.files.append(DownloadFileInfo(file, l_path)) - cls.__multiple_download(connection, context) - return - folder_api = FoldersApi(connection) - cls.__multiple_download_pickup_recursive_files( - connection, folder_api, laboratory, folder.id, l_dirname, excludes, is_skip_if_exists - ) - return - - remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) - r_path = r_path.rstrip("/") - r_dirname = os.path.dirname(r_path) - r_basename = os.path.basename(r_path) - - l_dirname = os.path.realpath(local_path) - if not os.path.isdir(l_dirname): - raise IllegalArgumentException(f"Local directory `{local_path}` not found.") - laboratory = cls._find_laboratory(connection, laboratory_name) - r_parent_folder = cls._find_folder(connection, laboratory, r_dirname, password) - r_parent_files = cls._find_files(connection, r_parent_folder.id) - file = find_file(r_parent_files, r_basename) - if file is not None: - if cls.__check_excludes(excludes, laboratory, r_parent_folder, file): - return - context = DownloadContext(False, is_skip_if_exists, []) - l_path = os.path.join(l_dirname, r_basename) - context.files.append(DownloadFileInfo(file, l_path)) - cls.__multiple_download(connection, context) - else: - folder = r_parent_folder.find_sub_folder(r_basename) - if folder is None: - raise IllegalArgumentException(f"File or folder `{r_path}` not found.") - if not is_recursive: - raise IllegalArgumentException(f"Cannot download `{r_path}`: Is a folder.") - folder_api = FoldersApi(connection) - cls.__multiple_download_pickup_recursive_files( - connection, folder_api, laboratory, folder.id, l_dirname, excludes, is_skip_if_exists - ) - - @classmethod - def __multiple_download_pickup_recursive_files( - cls, - connection: MDRSConnection, - folder_api: FoldersApi, - laboratory: Laboratory, - folder_id: str, - basedir: str, - excludes: list[str], - is_skip_if_exists: bool, - ) -> None: - context = DownloadContext(False, is_skip_if_exists, []) - folder = folder_api.retrieve(folder_id) - files = cls._find_files(connection, folder.id) - dirname = os.path.join(basedir, folder.name) - if cls.__check_excludes(excludes, laboratory, folder, None): - return - if not os.path.exists(dirname): - os.makedirs(dirname) - print(dirname) - for file in files: - if cls.__check_excludes(excludes, laboratory, folder, file): - continue - path = os.path.join(dirname, file.name) - context.files.append(DownloadFileInfo(file, path)) - cls.__multiple_download(connection, context) - if context.hasError: - raise UnexpectedException("Some files failed to download.") - for sub_folder in folder.sub_folders: - cls.__multiple_download_pickup_recursive_files( - connection, folder_api, laboratory, sub_folder.id, dirname, excludes, is_skip_if_exists - ) - - @classmethod - def __multiple_download(cls, connection: MDRSConnection, context: DownloadContext) -> None: - file_api = FilesApi(connection) - with ThreadPoolExecutor(max_workers=CONCURRENT) as pool: - results = pool.map( - lambda x: cls.__multiple_download_worker(file_api, x, context.isSkipIfExists), context.files - ) - hasError = next(filter(lambda x: x is False, results), None) - if hasError is not None: - context.hasError = True - - @classmethod - def __multiple_download_worker(cls, file_api: FilesApi, info: DownloadFileInfo, is_skip_if_exists: bool) -> bool: - if not is_skip_if_exists or not os.path.exists(info.path) or info.file.size != os.path.getsize(info.path): - try: - file_api.download(info.file, info.path) - except Exception: - print(f"Failed: ${info.path}") - if os.path.isfile(info.path): - os.remove(info.path) - return False - print(info.path) - return True - - @classmethod - def __check_excludes(cls, excludes: list[str], laboratory: Laboratory, folder: Folder, file: File | None) -> bool: - path = f"/{laboratory.name}{folder.path}{file.name if file is not None else ''}".rstrip("/").lower() - return path in excludes diff --git a/mdrsclient/commands/labs.py b/mdrsclient/commands/labs.py index a4793bb..c7fbc5f 100644 --- a/mdrsclient/commands/labs.py +++ b/mdrsclient/commands/labs.py @@ -19,9 +19,9 @@ class LabsCommand(BaseCommand): @classmethod def labs(cls, remote: str) -> None: - remote_host = cls._parse_remote_host(remote) from mdrsclient.client import MdrsClient + remote_host = MdrsClient.parse_remote_host(remote) client = MdrsClient.from_remote(remote_host) laboratories = client.get_laboratories() label = {"id": "ID", "name": "Name", "pi_name": "PI", "full_name": "Laboratory"} diff --git a/mdrsclient/commands/login.py b/mdrsclient/commands/login.py index a2ed315..22a98f4 100644 --- a/mdrsclient/commands/login.py +++ b/mdrsclient/commands/login.py @@ -27,9 +27,9 @@ class LoginCommand(BaseCommand): @classmethod def login(cls, remote: str, username: str, password: str) -> None: - remote_host = cls._parse_remote_host(remote) from mdrsclient.client import MdrsClient + remote_host = MdrsClient.parse_remote_host(remote) client = MdrsClient.from_remote(remote_host) client.login(username, password) print("Login Successful") diff --git a/mdrsclient/commands/logout.py b/mdrsclient/commands/logout.py index 055ba94..4f95983 100644 --- a/mdrsclient/commands/logout.py +++ b/mdrsclient/commands/logout.py @@ -21,8 +21,8 @@ class LogoutCommand(BaseCommand): @classmethod def logout(cls, remote: str) -> None: - remote_host = cls._parse_remote_host(remote) from mdrsclient.client import MdrsClient + remote_host = MdrsClient.parse_remote_host(remote) client = MdrsClient.from_remote(remote_host) client.logout() diff --git a/mdrsclient/commands/ls.py b/mdrsclient/commands/ls.py index cfdd75e..a3ad62c 100644 --- a/mdrsclient/commands/ls.py +++ b/mdrsclient/commands/ls.py @@ -5,8 +5,8 @@ from typing import Any from pydantic.dataclasses import dataclass from mdrsclient.api import FilesApi, FoldersApi +from mdrsclient.client import MdrsClient from mdrsclient.commands.base import BaseCommand -from mdrsclient.connection import MDRSConnection from mdrsclient.exceptions import UnauthorizedException from mdrsclient.models import File, Folder, FolderSimple, Laboratory @@ -19,7 +19,7 @@ class Config: @dataclass(config=Config) class LsCommandContext: prefix: str - connection: MDRSConnection + client: MdrsClient laboratory: Laboratory password: str is_json: bool @@ -58,21 +58,27 @@ class LsCommand(BaseCommand): from mdrsclient.client import MdrsClient client = MdrsClient.from_remote(remote) - client.ls_command(remote_path, password, is_json, is_recursive, is_quiet) + cls._ls_logic(client, remote_path, password, is_json, is_recursive, is_quiet) return @classmethod def _ls_logic( - cls, connection, remote_path: str, password: str | None, is_json: bool, is_recursive: bool, is_quiet: bool + cls, + client: MdrsClient, + remote_path: str, + password: str | None, + is_json: bool, + is_recursive: bool, + is_quiet: bool, ) -> None: remote = remote_path.split(":", 1)[0] if ":" in remote_path else "" - folder, laboratory = cls._resolve_folder(connection, remote_path, password) + folder, laboratory = client.resolve_folder(remote_path, password) laboratory_name = laboratory.name - files = cls._find_files(connection, folder.id) + files = client.find_files(folder.id) context = LsCommandContext( f"{remote}:/{laboratory_name}", - connection, + client, laboratory, password if password is not None else "", is_json, @@ -102,7 +108,7 @@ class LsCommand(BaseCommand): for key in label.keys(): length[key] = len(label[key]) if not context.is_quiet else 0 for sub_folder in folder.sub_folders: - sub_laboratory = context.connection.laboratories.find_by_id(sub_folder.laboratory_id) + sub_laboratory = context.client.connection.laboratories.find_by_id(sub_folder.laboratory_id) sub_laboratory_name = sub_laboratory.name if sub_laboratory is not None else "(invalid)" length["acl"] = max(length["acl"], len(sub_folder.access_level_name)) length["laboratory"] = max(length["laboratory"], len(sub_laboratory_name)) @@ -147,12 +153,12 @@ class LsCommand(BaseCommand): if context.is_recursive: print("") for sub_folder in sorted(folder.sub_folders, key=lambda x: x.name): - folder_api = FoldersApi(context.connection) + folder_api = FoldersApi(context.client.connection) try: if sub_folder.lock: folder_api.auth(sub_folder.id, context.password) folder = folder_api.retrieve(sub_folder.id) - files = cls._find_files(context.connection, sub_folder.id) + files = context.client.find_files(sub_folder.id) cls._ls_plain(context, folder, files) except UnauthorizedException: pass @@ -174,7 +180,7 @@ class LsCommand(BaseCommand): "updated_at": folder.updated_at, } if isinstance(folder, Folder): - folder_api = FoldersApi(context.connection) + folder_api = FoldersApi(context.client.connection) data["metadata"] = folder_api.metadata(folder.id) if context.is_recursive: sub_folders: list[dict[str, Any]] = [] @@ -183,7 +189,7 @@ class LsCommand(BaseCommand): if sub_folder.lock: folder_api.auth(sub_folder.id, context.password) folder2 = folder_api.retrieve(sub_folder.id) - files2 = cls._find_files(context.connection, sub_folder.id) + files2 = context.client.find_files(sub_folder.id) sub_folders.append(cls._folder2dict(context, folder2, files2)) except UnauthorizedException: pass @@ -205,7 +211,7 @@ class LsCommand(BaseCommand): # "thumbnail": file.thumbnail, "description": file.description, "metadata": file.metadata, - "download_url": f"{context.connection.url}/{file.download_url}", + "download_url": f"{context.client.connection.url}/{file.download_url}", "created_at": file.created_at, "updated_at": file.updated_at, } @@ -213,5 +219,5 @@ class LsCommand(BaseCommand): @classmethod def _laboratory_name(cls, context: LsCommandContext, laboratory_id: int) -> str: - laboratory = context.connection.laboratories.find_by_id(laboratory_id) + laboratory = context.client.connection.laboratories.find_by_id(laboratory_id) return laboratory.name if laboratory is not None else "(invalid)" diff --git a/mdrsclient/commands/upload.py b/mdrsclient/commands/upload.py index 6101050..09770ea 100644 --- a/mdrsclient/commands/upload.py +++ b/mdrsclient/commands/upload.py @@ -1,25 +1,7 @@ -import os from argparse import Namespace -from concurrent.futures import ThreadPoolExecutor from typing import Any -from unicodedata import normalize -from pydantic.dataclasses import dataclass - -from mdrsclient.api import FilesApi, FoldersApi from mdrsclient.commands.base import BaseCommand -from mdrsclient.connection import MDRSConnection -from mdrsclient.exceptions import IllegalArgumentException, MDRSException -from mdrsclient.models import File, Folder -from mdrsclient.models.file import find_file -from mdrsclient.settings import CONCURRENT - - -@dataclass(frozen=True) -class UploadFileInfo: - folder: Folder - files: list[File] - path: str class UploadCommand(BaseCommand): @@ -55,78 +37,3 @@ class UploadCommand(BaseCommand): client = MdrsClient.from_remote(remote) client.upload(local_path, remote_path, is_recursive, is_skip_if_exists) return - - @classmethod - def _upload_logic( - cls, connection, local_path: str, remote_path: str, is_recursive: bool, is_skip_if_exists: bool - ) -> None: - remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) - l_path = os.path.abspath(local_path) - if not os.path.exists(l_path): - raise IllegalArgumentException(f"File or directory `{local_path}` not found.") - - laboratory = cls._find_laboratory(connection, laboratory_name) - folder = cls._find_folder(connection, laboratory, r_path) - files = cls._find_files(connection, folder.id) - infos: list[UploadFileInfo] = [] - if os.path.isdir(l_path): - if not is_recursive: - raise IllegalArgumentException(f"Cannot upload `{local_path}`: Is a directory.") - folder_api = FoldersApi(connection) - folder_map: dict[str, Folder] = {} - folder_map[r_path] = folder - files_map: dict[str, list[File]] = {} - files_map[r_path] = files - l_basename = os.path.basename(l_path) - for dirpath, _, filenames in os.walk(l_path, followlinks=True): - sub = l_basename if dirpath == l_path else os.path.join(l_basename, os.path.relpath(dirpath, l_path)) - d_dirname = os.path.join(r_path, sub) - d_basename = os.path.basename(d_dirname) - # prepare destination parent path - d_parent_dirname = os.path.dirname(d_dirname) - if folder_map.get(d_parent_dirname) is None: - parent_folder = cls._find_folder(connection, laboratory, d_parent_dirname) - folder_map[d_parent_dirname] = parent_folder - parent_files = cls._find_files(connection, parent_folder.id) - files_map[d_parent_dirname] = parent_files - # prepare destination path - if folder_map.get(d_dirname) is None: - d_folder = folder_map[d_parent_dirname].find_sub_folder(d_basename) - if d_folder is None: - d_folder_id = folder_api.create(normalize("NFC", d_basename), folder_map[d_parent_dirname].id) - else: - d_folder_id = d_folder.id - print(d_dirname) - folder_map[d_dirname] = folder_api.retrieve(d_folder_id) - files_map[d_dirname] = cls._find_files(connection, d_folder_id) - if d_folder is None: - folder_map[d_parent_dirname].sub_folders.append(folder_map[d_dirname]) - # register upload file list - for filename in filenames: - infos.append( - UploadFileInfo(folder_map[d_dirname], files_map[d_dirname], os.path.join(dirpath, filename)) - ) - else: - infos.append(UploadFileInfo(folder, files, l_path)) - cls.__multiple_upload(connection, infos, is_skip_if_exists) - - @classmethod - def __multiple_upload( - cls, connection: MDRSConnection, infos: list[UploadFileInfo], is_skip_if_exists: bool - ) -> None: - file_api = FilesApi(connection) - with ThreadPoolExecutor(max_workers=CONCURRENT) as pool: - pool.map(lambda x: cls.__multiple_upload_worker(file_api, x, is_skip_if_exists), infos) - - @classmethod - def __multiple_upload_worker(cls, file_api: FilesApi, info: UploadFileInfo, is_skip_if_exists: bool) -> None: - basename = os.path.basename(info.path) - file = find_file(info.files, basename) - try: - if file is None: - file_api.create(info.folder.id, info.path) - elif not is_skip_if_exists or file.size != os.path.getsize(info.path): - file_api.update(file, info.path) - print(os.path.join(info.folder.path, basename)) - except MDRSException as e: - print(f"Error: {e}") diff --git a/mdrsclient/commands/whoami.py b/mdrsclient/commands/whoami.py index 7a99e36..e625d4e 100644 --- a/mdrsclient/commands/whoami.py +++ b/mdrsclient/commands/whoami.py @@ -23,9 +23,9 @@ class WhoamiCommand(BaseCommand): @classmethod def whoami(cls, remote: str) -> None: - remote_host = cls._parse_remote_host(remote) from mdrsclient.client import MdrsClient + remote_host = MdrsClient.parse_remote_host(remote) client = MdrsClient.from_remote(remote_host) if client.connection.token is not None and client.connection.token.is_expired: client.logout() diff --git a/mdrsclient/services.py b/mdrsclient/services.py index b4784d2..47fd718 100644 --- a/mdrsclient/services.py +++ b/mdrsclient/services.py @@ -1,9 +1,10 @@ -from typing import Any import os import re +from typing import Any from unicodedata import normalize from mdrsclient.api import DoiApi, FilesApi, FoldersApi, LaboratoriesApi, UsersApi +from mdrsclient.cache import CacheInterface from mdrsclient.config import ConfigFile from mdrsclient.connection import MDRSConnection from mdrsclient.exceptions import ( @@ -21,11 +22,11 @@ class MdrsService: self.connection = connection @classmethod - def create_connection(cls, remote: str) -> MDRSConnection: + def create_connection(cls, remote: str, cache: CacheInterface | None = None) -> MDRSConnection: config = ConfigFile(remote) if config.url is None: raise MissingConfigurationException(f"Remote host `{remote}` is not found.") - return MDRSConnection(config.remote, config.url) + return MDRSConnection(config.remote, config.url, cache=cache) def login(self, username: str, password: str) -> tuple[Token, User]: user_api = UsersApi(self.connection) diff --git a/mdrsclient/transfer.py b/mdrsclient/transfer.py new file mode 100644 index 0000000..d119557 --- /dev/null +++ b/mdrsclient/transfer.py @@ -0,0 +1,263 @@ +import os +from concurrent.futures import ThreadPoolExecutor +from typing import Any +from unicodedata import normalize + +from pydantic.dataclasses import dataclass + +from mdrsclient.api import FilesApi, FoldersApi +from mdrsclient.exceptions import IllegalArgumentException, MDRSException, UnexpectedException +from mdrsclient.models import File, Folder, Laboratory +from mdrsclient.models.file import find_file +from mdrsclient.settings import CONCURRENT + + +@dataclass(frozen=True) +class UploadFileInfo: + folder: Folder + files: list[File] + path: str + + +@dataclass(frozen=True) +class DownloadFileInfo: + file: File + path: str + + +@dataclass +class DownloadContext: + hasError: bool + isSkipIfExists: bool + files: list[DownloadFileInfo] + + +class Uploader: + def __init__(self, client: Any) -> None: + self.client = client + + def upload( + self, local_path: str, remote_path: str, is_recursive: bool = False, is_skip_if_exists: bool = False + ) -> None: + remote, laboratory_name, r_path = self.client.parse_remote_host_with_path(remote_path) + l_path = os.path.abspath(local_path) + if not os.path.exists(l_path): + raise IllegalArgumentException(f"File or directory `{local_path}` not found.") + + laboratory = self.client.find_laboratory(laboratory_name) + folder = self.client.find_folder(laboratory, r_path) + files = self.client.find_files(folder.id) + infos: list[UploadFileInfo] = [] + if os.path.isdir(l_path): + if not is_recursive: + raise IllegalArgumentException(f"Cannot upload `{local_path}`: Is a directory.") + folder_api = FoldersApi(self.client.connection) + folder_map: dict[str, Folder] = {} + folder_map[r_path] = folder + files_map: dict[str, list[File]] = {} + files_map[r_path] = files + l_basename = os.path.basename(l_path) + for dirpath, _, filenames in os.walk(l_path, followlinks=True): + sub = l_basename if dirpath == l_path else os.path.join(l_basename, os.path.relpath(dirpath, l_path)) + d_dirname = os.path.join(r_path, sub) + d_basename = os.path.basename(d_dirname) + # prepare destination parent path + d_parent_dirname = os.path.dirname(d_dirname) + if folder_map.get(d_parent_dirname) is None: + parent_folder = self.client.find_folder(laboratory, d_parent_dirname) + folder_map[d_parent_dirname] = parent_folder + parent_files = self.client.find_files(parent_folder.id) + files_map[d_parent_dirname] = parent_files + # prepare destination path + if folder_map.get(d_dirname) is None: + d_folder = folder_map[d_parent_dirname].find_sub_folder(d_basename) + if d_folder is None: + d_folder_id = folder_api.create(normalize("NFC", d_basename), folder_map[d_parent_dirname].id) + else: + d_folder_id = d_folder.id + print(d_dirname) + folder_map[d_dirname] = folder_api.retrieve(d_folder_id) + files_map[d_dirname] = self.client.find_files(d_folder_id) + if d_folder is None: + folder_map[d_parent_dirname].sub_folders.append(folder_map[d_dirname]) + # register upload file list + for filename in filenames: + infos.append( + UploadFileInfo(folder_map[d_dirname], files_map[d_dirname], os.path.join(dirpath, filename)) + ) + else: + infos.append(UploadFileInfo(folder, files, l_path)) + self.__multiple_upload(infos, is_skip_if_exists) + + def __multiple_upload(self, infos: list[UploadFileInfo], is_skip_if_exists: bool) -> None: + file_api = FilesApi(self.client.connection) + with ThreadPoolExecutor(max_workers=CONCURRENT) as pool: + pool.map(lambda x: self.__multiple_upload_worker(file_api, x, is_skip_if_exists), infos) + + def __multiple_upload_worker(self, file_api: FilesApi, info: UploadFileInfo, is_skip_if_exists: bool) -> None: + basename = os.path.basename(info.path) + file = find_file(info.files, basename) + try: + if file is None: + file_api.create(info.folder.id, info.path) + elif not is_skip_if_exists or file.size != os.path.getsize(info.path): + file_api.update(file, info.path) + print(os.path.join(info.folder.path, basename)) + except MDRSException as e: + print(f"Error: {e}") + + +class Downloader: + def __init__(self, client: Any) -> None: + self.client = client + + def download( + self, + remote_path: str, + local_path: str, + is_recursive: bool = False, + is_skip_if_exists: bool = False, + password: str | None = None, + excludes: list[str] | None = None, + ) -> None: + excludes_clean = excludes or [] + # Detect DOI path: "remote:10.xxxx/prefix.ID[/optional/sub/path]" + path_component = remote_path.split(":", 1)[1] if ":" in remote_path else "" + if self.client.is_doi(path_component): + remote, doi, subpath = self.client.parse_doi_remote_host(remote_path) + + l_dirname = os.path.realpath(local_path) + if not os.path.isdir(l_dirname): + raise IllegalArgumentException(f"Local directory `{local_path}` not found.") + doi_folder, laboratory = self.client.find_folder_by_doi(doi, password) + + subpath_clean = subpath.rstrip("/") + if not subpath_clean: + folder = doi_folder + is_folder = True + else: + r_dirname = os.path.dirname(subpath_clean) + r_basename = os.path.basename(subpath_clean) + abs_path = doi_folder.path.rstrip("/") + r_dirname + r_parent_folder = self.client.find_folder(laboratory, abs_path, password) + r_parent_files = self.client.find_files(r_parent_folder.id) + file = find_file(r_parent_files, r_basename) + if file is not None: + if self.__check_excludes(excludes_clean, laboratory, r_parent_folder, file): + return + context = DownloadContext(False, is_skip_if_exists, []) + l_path = os.path.join(l_dirname, r_basename) + context.files.append(DownloadFileInfo(file, l_path)) + self.__multiple_download(context) + return + else: + folder_simple = r_parent_folder.find_sub_folder(r_basename) + if folder_simple is None: + raise IllegalArgumentException(f"File or folder `{subpath_clean}` not found.") + folder = FoldersApi(self.client.connection).retrieve(folder_simple.id) + is_folder = True + + # For a DOI target the whole folder is the download target. + if not is_recursive: + # Non-recursive: download only the files at the top level of the DOI folder. + files = self.client.find_files(folder.id) + context = DownloadContext(False, is_skip_if_exists, []) + for file in files: + if self.__check_excludes(excludes_clean, laboratory, folder, file): + continue + l_path = os.path.join(l_dirname, file.name) + context.files.append(DownloadFileInfo(file, l_path)) + self.__multiple_download(context) + return + folder_api = FoldersApi(self.client.connection) + self.__multiple_download_pickup_recursive_files( + folder_api, laboratory, folder.id, l_dirname, excludes_clean, is_skip_if_exists + ) + return + + remote, laboratory_name, r_path = self.client.parse_remote_host_with_path(remote_path) + r_path = r_path.rstrip("/") + r_dirname = os.path.dirname(r_path) + r_basename = os.path.basename(r_path) + + l_dirname = os.path.realpath(local_path) + if not os.path.isdir(l_dirname): + raise IllegalArgumentException(f"Local directory `{local_path}` not found.") + laboratory = self.client.find_laboratory(laboratory_name) + r_parent_folder = self.client.find_folder(laboratory, r_dirname, password) + r_parent_files = self.client.find_files(r_parent_folder.id) + file = find_file(r_parent_files, r_basename) + if file is not None: + if self.__check_excludes(excludes_clean, laboratory, r_parent_folder, file): + return + context = DownloadContext(False, is_skip_if_exists, []) + l_path = os.path.join(l_dirname, r_basename) + context.files.append(DownloadFileInfo(file, l_path)) + self.__multiple_download(context) + else: + folder = r_parent_folder.find_sub_folder(r_basename) + if folder is None: + raise IllegalArgumentException(f"File or folder `{r_path}` not found.") + if not is_recursive: + raise IllegalArgumentException(f"Cannot download `{r_path}`: Is a folder.") + folder_api = FoldersApi(self.client.connection) + self.__multiple_download_pickup_recursive_files( + folder_api, laboratory, folder.id, l_dirname, excludes_clean, is_skip_if_exists + ) + + def __multiple_download_pickup_recursive_files( + self, + folder_api: FoldersApi, + laboratory: Laboratory, + folder_id: str, + basedir: str, + excludes: list[str], + is_skip_if_exists: bool, + ) -> None: + context = DownloadContext(False, is_skip_if_exists, []) + folder = folder_api.retrieve(folder_id) + files = self.client.find_files(folder.id) + dirname = os.path.join(basedir, folder.name) + if self.__check_excludes(excludes, laboratory, folder, None): + return + if not os.path.exists(dirname): + os.makedirs(dirname) + print(dirname) + for file in files: + if self.__check_excludes(excludes, laboratory, folder, file): + continue + path = os.path.join(dirname, file.name) + context.files.append(DownloadFileInfo(file, path)) + self.__multiple_download(context) + if context.hasError: + raise UnexpectedException("Some files failed to download.") + for sub_folder in folder.sub_folders: + self.__multiple_download_pickup_recursive_files( + folder_api, laboratory, sub_folder.id, dirname, excludes, is_skip_if_exists + ) + + def __multiple_download(self, context: DownloadContext) -> None: + file_api = FilesApi(self.client.connection) + with ThreadPoolExecutor(max_workers=CONCURRENT) as pool: + results = pool.map( + lambda x: self.__multiple_download_worker(file_api, x, context.isSkipIfExists), context.files + ) + hasError = next(filter(lambda x: x is False, results), None) + if hasError is not None: + context.hasError = True + + def __multiple_download_worker(self, file_api: FilesApi, info: DownloadFileInfo, is_skip_if_exists: bool) -> bool: + if not is_skip_if_exists or not os.path.exists(info.path) or info.file.size != os.path.getsize(info.path): + try: + file_api.download(info.file, info.path) + except Exception: + print(f"Failed: {info.path}") + if os.path.isfile(info.path): + os.remove(info.path) + return False + print(info.path) + return True + + def __check_excludes(self, excludes: list[str], laboratory: Laboratory, folder: Folder, file: File | None) -> bool: + path = f"/{laboratory.name}{folder.path}{file.name if file is not None else ''}".rstrip("/").lower() + return path in excludes