refactor: use services layer and modularize transfer operations

Decouple CLI commands from internal helper logic and consolidate the
core file transfer operations in the service layer to improve library
portability.

- Make MdrsClient subclass MdrsService to inherit resource resolution.
- Remove all deprecated helper methods from BaseCommand.
- Move core upload and download logic to a new transfer module.
- Refactor all CLI commands to route actions through MdrsClient.
- Eliminate circular imports between client and CLI command modules.
This commit is contained in:
2026-07-02 23:16:53 +09:00
parent 36cad6db52
commit 8ce9e09e69
12 changed files with 361 additions and 675 deletions
+263
View File
@@ -0,0 +1,263 @@
import os
from concurrent.futures import ThreadPoolExecutor
from typing import Any
from unicodedata import normalize
from pydantic.dataclasses import dataclass
from mdrsclient.api import FilesApi, FoldersApi
from mdrsclient.exceptions import IllegalArgumentException, MDRSException, UnexpectedException
from mdrsclient.models import File, Folder, Laboratory
from mdrsclient.models.file import find_file
from mdrsclient.settings import CONCURRENT
@dataclass(frozen=True)
class UploadFileInfo:
folder: Folder
files: list[File]
path: str
@dataclass(frozen=True)
class DownloadFileInfo:
file: File
path: str
@dataclass
class DownloadContext:
hasError: bool
isSkipIfExists: bool
files: list[DownloadFileInfo]
class Uploader:
def __init__(self, client: Any) -> None:
self.client = client
def upload(
self, local_path: str, remote_path: str, is_recursive: bool = False, is_skip_if_exists: bool = False
) -> None:
remote, laboratory_name, r_path = self.client.parse_remote_host_with_path(remote_path)
l_path = os.path.abspath(local_path)
if not os.path.exists(l_path):
raise IllegalArgumentException(f"File or directory `{local_path}` not found.")
laboratory = self.client.find_laboratory(laboratory_name)
folder = self.client.find_folder(laboratory, r_path)
files = self.client.find_files(folder.id)
infos: list[UploadFileInfo] = []
if os.path.isdir(l_path):
if not is_recursive:
raise IllegalArgumentException(f"Cannot upload `{local_path}`: Is a directory.")
folder_api = FoldersApi(self.client.connection)
folder_map: dict[str, Folder] = {}
folder_map[r_path] = folder
files_map: dict[str, list[File]] = {}
files_map[r_path] = files
l_basename = os.path.basename(l_path)
for dirpath, _, filenames in os.walk(l_path, followlinks=True):
sub = l_basename if dirpath == l_path else os.path.join(l_basename, os.path.relpath(dirpath, l_path))
d_dirname = os.path.join(r_path, sub)
d_basename = os.path.basename(d_dirname)
# prepare destination parent path
d_parent_dirname = os.path.dirname(d_dirname)
if folder_map.get(d_parent_dirname) is None:
parent_folder = self.client.find_folder(laboratory, d_parent_dirname)
folder_map[d_parent_dirname] = parent_folder
parent_files = self.client.find_files(parent_folder.id)
files_map[d_parent_dirname] = parent_files
# prepare destination path
if folder_map.get(d_dirname) is None:
d_folder = folder_map[d_parent_dirname].find_sub_folder(d_basename)
if d_folder is None:
d_folder_id = folder_api.create(normalize("NFC", d_basename), folder_map[d_parent_dirname].id)
else:
d_folder_id = d_folder.id
print(d_dirname)
folder_map[d_dirname] = folder_api.retrieve(d_folder_id)
files_map[d_dirname] = self.client.find_files(d_folder_id)
if d_folder is None:
folder_map[d_parent_dirname].sub_folders.append(folder_map[d_dirname])
# register upload file list
for filename in filenames:
infos.append(
UploadFileInfo(folder_map[d_dirname], files_map[d_dirname], os.path.join(dirpath, filename))
)
else:
infos.append(UploadFileInfo(folder, files, l_path))
self.__multiple_upload(infos, is_skip_if_exists)
def __multiple_upload(self, infos: list[UploadFileInfo], is_skip_if_exists: bool) -> None:
file_api = FilesApi(self.client.connection)
with ThreadPoolExecutor(max_workers=CONCURRENT) as pool:
pool.map(lambda x: self.__multiple_upload_worker(file_api, x, is_skip_if_exists), infos)
def __multiple_upload_worker(self, file_api: FilesApi, info: UploadFileInfo, is_skip_if_exists: bool) -> None:
basename = os.path.basename(info.path)
file = find_file(info.files, basename)
try:
if file is None:
file_api.create(info.folder.id, info.path)
elif not is_skip_if_exists or file.size != os.path.getsize(info.path):
file_api.update(file, info.path)
print(os.path.join(info.folder.path, basename))
except MDRSException as e:
print(f"Error: {e}")
class Downloader:
def __init__(self, client: Any) -> None:
self.client = client
def download(
self,
remote_path: str,
local_path: str,
is_recursive: bool = False,
is_skip_if_exists: bool = False,
password: str | None = None,
excludes: list[str] | None = None,
) -> None:
excludes_clean = excludes or []
# Detect DOI path: "remote:10.xxxx/prefix.ID[/optional/sub/path]"
path_component = remote_path.split(":", 1)[1] if ":" in remote_path else ""
if self.client.is_doi(path_component):
remote, doi, subpath = self.client.parse_doi_remote_host(remote_path)
l_dirname = os.path.realpath(local_path)
if not os.path.isdir(l_dirname):
raise IllegalArgumentException(f"Local directory `{local_path}` not found.")
doi_folder, laboratory = self.client.find_folder_by_doi(doi, password)
subpath_clean = subpath.rstrip("/")
if not subpath_clean:
folder = doi_folder
is_folder = True
else:
r_dirname = os.path.dirname(subpath_clean)
r_basename = os.path.basename(subpath_clean)
abs_path = doi_folder.path.rstrip("/") + r_dirname
r_parent_folder = self.client.find_folder(laboratory, abs_path, password)
r_parent_files = self.client.find_files(r_parent_folder.id)
file = find_file(r_parent_files, r_basename)
if file is not None:
if self.__check_excludes(excludes_clean, laboratory, r_parent_folder, file):
return
context = DownloadContext(False, is_skip_if_exists, [])
l_path = os.path.join(l_dirname, r_basename)
context.files.append(DownloadFileInfo(file, l_path))
self.__multiple_download(context)
return
else:
folder_simple = r_parent_folder.find_sub_folder(r_basename)
if folder_simple is None:
raise IllegalArgumentException(f"File or folder `{subpath_clean}` not found.")
folder = FoldersApi(self.client.connection).retrieve(folder_simple.id)
is_folder = True
# For a DOI target the whole folder is the download target.
if not is_recursive:
# Non-recursive: download only the files at the top level of the DOI folder.
files = self.client.find_files(folder.id)
context = DownloadContext(False, is_skip_if_exists, [])
for file in files:
if self.__check_excludes(excludes_clean, laboratory, folder, file):
continue
l_path = os.path.join(l_dirname, file.name)
context.files.append(DownloadFileInfo(file, l_path))
self.__multiple_download(context)
return
folder_api = FoldersApi(self.client.connection)
self.__multiple_download_pickup_recursive_files(
folder_api, laboratory, folder.id, l_dirname, excludes_clean, is_skip_if_exists
)
return
remote, laboratory_name, r_path = self.client.parse_remote_host_with_path(remote_path)
r_path = r_path.rstrip("/")
r_dirname = os.path.dirname(r_path)
r_basename = os.path.basename(r_path)
l_dirname = os.path.realpath(local_path)
if not os.path.isdir(l_dirname):
raise IllegalArgumentException(f"Local directory `{local_path}` not found.")
laboratory = self.client.find_laboratory(laboratory_name)
r_parent_folder = self.client.find_folder(laboratory, r_dirname, password)
r_parent_files = self.client.find_files(r_parent_folder.id)
file = find_file(r_parent_files, r_basename)
if file is not None:
if self.__check_excludes(excludes_clean, laboratory, r_parent_folder, file):
return
context = DownloadContext(False, is_skip_if_exists, [])
l_path = os.path.join(l_dirname, r_basename)
context.files.append(DownloadFileInfo(file, l_path))
self.__multiple_download(context)
else:
folder = r_parent_folder.find_sub_folder(r_basename)
if folder is None:
raise IllegalArgumentException(f"File or folder `{r_path}` not found.")
if not is_recursive:
raise IllegalArgumentException(f"Cannot download `{r_path}`: Is a folder.")
folder_api = FoldersApi(self.client.connection)
self.__multiple_download_pickup_recursive_files(
folder_api, laboratory, folder.id, l_dirname, excludes_clean, is_skip_if_exists
)
def __multiple_download_pickup_recursive_files(
self,
folder_api: FoldersApi,
laboratory: Laboratory,
folder_id: str,
basedir: str,
excludes: list[str],
is_skip_if_exists: bool,
) -> None:
context = DownloadContext(False, is_skip_if_exists, [])
folder = folder_api.retrieve(folder_id)
files = self.client.find_files(folder.id)
dirname = os.path.join(basedir, folder.name)
if self.__check_excludes(excludes, laboratory, folder, None):
return
if not os.path.exists(dirname):
os.makedirs(dirname)
print(dirname)
for file in files:
if self.__check_excludes(excludes, laboratory, folder, file):
continue
path = os.path.join(dirname, file.name)
context.files.append(DownloadFileInfo(file, path))
self.__multiple_download(context)
if context.hasError:
raise UnexpectedException("Some files failed to download.")
for sub_folder in folder.sub_folders:
self.__multiple_download_pickup_recursive_files(
folder_api, laboratory, sub_folder.id, dirname, excludes, is_skip_if_exists
)
def __multiple_download(self, context: DownloadContext) -> None:
file_api = FilesApi(self.client.connection)
with ThreadPoolExecutor(max_workers=CONCURRENT) as pool:
results = pool.map(
lambda x: self.__multiple_download_worker(file_api, x, context.isSkipIfExists), context.files
)
hasError = next(filter(lambda x: x is False, results), None)
if hasError is not None:
context.hasError = True
def __multiple_download_worker(self, file_api: FilesApi, info: DownloadFileInfo, is_skip_if_exists: bool) -> bool:
if not is_skip_if_exists or not os.path.exists(info.path) or info.file.size != os.path.getsize(info.path):
try:
file_api.download(info.file, info.path)
except Exception:
print(f"Failed: {info.path}")
if os.path.isfile(info.path):
os.remove(info.path)
return False
print(info.path)
return True
def __check_excludes(self, excludes: list[str], laboratory: Laboratory, folder: Folder, file: File | None) -> bool:
path = f"/{laboratory.name}{folder.path}{file.name if file is not None else ''}".rstrip("/").lower()
return path in excludes