From 5bdf837941ac06d402d7389a40ceb4c715f1c88e Mon Sep 17 00:00:00 2001 From: Yoshihiro OKUMURA Date: Fri, 12 Jun 2026 01:26:01 +0900 Subject: [PATCH] feat(doi): add DOI-based path access for commands Support accessing repositories using DOI strings with optional subpaths across ls, download, metadata, and file-metadata commands. - Implement GET v3/doi/{id}/ API model and client calls - Parse and resolve DOI paths into respective folder and files - Extract common folder and file resolution logic to shared helpers - Update README with example DOI-based shell commands --- README.md | 22 +++- mdrsclient/api/__init__.py | 2 + mdrsclient/api/doi.py | 40 +++++++ mdrsclient/commands/base.py | 172 ++++++++++++++++++++++++++- mdrsclient/commands/chacl.py | 2 +- mdrsclient/commands/cp.py | 4 +- mdrsclient/commands/download.py | 55 ++++++++- mdrsclient/commands/file_metadata.py | 8 +- mdrsclient/commands/ls.py | 8 +- mdrsclient/commands/metadata.py | 5 +- mdrsclient/commands/mkdir.py | 2 +- mdrsclient/commands/mv.py | 4 +- mdrsclient/commands/rm.py | 2 +- mdrsclient/commands/upload.py | 2 +- 14 files changed, 301 insertions(+), 27 deletions(-) create mode 100644 mdrsclient/api/doi.py diff --git a/README.md b/README.md index 17171ae..6e8ce7c 100644 --- a/README.md +++ b/README.md @@ -82,13 +82,17 @@ mdrs labs neurodata: ### ls -List the folder contents +List the folder contents. You can also specify a DOI path in the form `remote:10.xxxx/yyy.ID[/optional/subpath]`. ```shell mdrs ls neurodata:/NIU/Repository/ mdrs ls -p SHARING_PASSWORD neurodata:/NIU/Repository/PW_Open/ mdrs ls -r neurodata:/NIU/Repository/Dataset1/ mdrs ls -J -r neurodata:/NIU/Repository/Dataset1/ + +# DOI access examples: +mdrs ls neurodata:10.60178/cbs.20260429-001 +mdrs ls "neurodata:10.60178/cbs.20260429-001/Figure 1" ``` ### mkdir @@ -111,7 +115,7 @@ mdrs upload -r --skip-if-exists ./dataset neurodata:/NIU/Repository/TEST/ ### download -Download the file or folder +Download the file or folder. You can also specify a DOI path. ```shell mdrs download neurodata:/NIU/Repository/TEST/sample.dat ./ @@ -119,6 +123,10 @@ mdrs download -r neurodata:/NIU/Repository/TEST/dataset/ ./ mdrs download -p SHARING_PASSWORD neurodata:/NIU/Repository/PW_Open/Readme.dat ./ mdrs download -r --exclude /NIU/Repository/TEST/dataset/skip neurodata:/NIU/Repository/TEST/dataset/ ./ mdrs download -r --skip-if-exists neurodata:/NIU/Repository/TEST/dataset/ ./ + +# DOI access examples: +mdrs download neurodata:10.60178/cbs.20260429-001/README_NeuroData.md ./ +mdrs download -r "neurodata:10.60178/cbs.20260429-001/Figure 1" ./ ``` ### mv @@ -160,20 +168,26 @@ mdrs chacl pw_open -r -p SHARING_PASSWORD neurodata:/NIU/Repository/PW_Open ### metadata -Get a folder metadata +Get a folder metadata. You can also specify a DOI path. ```shell mdrs metadata neurodata:/NIU/Repository/TEST/ mdrs metadata -p SHARING_PASSWORD neurodata:/NIU/Repository/PW_Open/ + +# DOI access examples: +mdrs metadata neurodata:10.60178/cbs.20260429-001 ``` ### file-metadata -Get the file metadata +Get the file metadata. You can also specify a DOI path. ```shell mdrs file-metadata neurodata:/NIU/Repository/TEST/dataset/sample.dat mdrs file-metadata -p SHARING_PASSWORD neurodata:/NIU/Repository/PW_Open/Readme.txt + +# DOI access examples: +mdrs file-metadata "neurodata:10.60178/cbs.20260429-001/Figure 1/Figure1v3.pdf" ``` ### version diff --git a/mdrsclient/api/__init__.py b/mdrsclient/api/__init__.py index 88341cc..e9af7bb 100644 --- a/mdrsclient/api/__init__.py +++ b/mdrsclient/api/__init__.py @@ -1,9 +1,11 @@ +from mdrsclient.api.doi import DoiApi from mdrsclient.api.files import FilesApi from mdrsclient.api.folders import FoldersApi from mdrsclient.api.laboratories import LaboratoriesApi from mdrsclient.api.users import UsersApi __all__ = [ + "DoiApi", "FilesApi", "FoldersApi", "LaboratoriesApi", diff --git a/mdrsclient/api/doi.py b/mdrsclient/api/doi.py new file mode 100644 index 0000000..7b0e6e6 --- /dev/null +++ b/mdrsclient/api/doi.py @@ -0,0 +1,40 @@ +from typing import Final + +from pydantic import TypeAdapter +from pydantic.dataclasses import dataclass + +from mdrsclient.api.base import BaseApi +from mdrsclient.api.utils import token_check + + +@dataclass(frozen=True) +class DoiFolderRef: + """Nested folder reference returned inside a DOI response. + + The DOI endpoint only returns the folder ``id``; ``laboratory_id`` must be + obtained by subsequently calling the folder retrieve endpoint. + """ + + id: str + + +@dataclass(frozen=True) +class DoiResponse: + """Response from GET v3/doi/{id}/.""" + + # The internal DOI suffix ID returned as a string (e.g. "20260429-001"). + id: str + doi: str + folder: DoiFolderRef + + +class DoiApi(BaseApi): + ENTRYPOINT: Final[str] = "v3/doi/" + + def retrieve(self, doi_id: str) -> DoiResponse: + """Retrieve the folder associated with a DOI suffix ID (GET v3/doi/{id}/).""" + url = self.ENTRYPOINT + doi_id + "/" + token_check(self.connection) + response = self.connection.get(url) + self._raise_response_error(response) + return TypeAdapter(DoiResponse).validate_python(response.json()) diff --git a/mdrsclient/commands/base.py b/mdrsclient/commands/base.py index 2234678..6df4941 100644 --- a/mdrsclient/commands/base.py +++ b/mdrsclient/commands/base.py @@ -1,9 +1,10 @@ +import os import re from abc import ABC, abstractmethod from typing import Any from unicodedata import normalize -from mdrsclient.api import FilesApi, FoldersApi, LaboratoriesApi +from mdrsclient.api import DoiApi, FilesApi, FoldersApi, LaboratoriesApi from mdrsclient.config import ConfigFile from mdrsclient.connection import MDRSConnection from mdrsclient.exceptions import ( @@ -95,3 +96,172 @@ class BaseCommand(ABC): laboratory = folder_array.pop(0) folder = "/" + "/".join(folder_array) return (remote_host, laboratory, folder) + + # ------------------------------------------------------------------ + # DOI helpers + # ------------------------------------------------------------------ + + @staticmethod + def _is_doi(path_component: str) -> bool: + """Return True if path_component looks like a DOI string. + + A DOI is recognised as a string that starts with ``10.`` and + contains a ``/``. + """ + return path_component.startswith("10.") and "/" in path_component + + @staticmethod + def _doi_suffix_id(doi: str) -> str: + """Extract the internal system ID from a full DOI string. + + MDRS uses the segment after the last ``.`` in the suffix (the part + after the ``/``) as its identifier. + Example: ``10.xxxx/prefix.20230511-001`` → ``20230511-001``. + If there is no ``.`` in the suffix, the whole suffix is returned. + Trailing slashes are stripped before processing. + """ + # Strip any trailing slash first. + doi = doi.rstrip("/") + slash_pos = doi.find("/") + if slash_pos == -1: + return doi + suffix = doi[slash_pos + 1 :] + dot_pos = suffix.rfind(".") + return suffix[dot_pos + 1 :] if dot_pos != -1 else suffix + + @staticmethod + def _split_doi_and_subpath(doi_with_path: str) -> tuple[str, str]: + """Split a DOI-with-optional-path string into (doi, subpath).""" + # Find the first '/' that separates registrant from suffix. + first_slash = doi_with_path.find("/") + if first_slash != -1: + after_suffix_start = first_slash + 1 + after_first = doi_with_path[after_suffix_start:] + # Find the next '/' inside the suffix portion — this starts the subpath. + second_slash = after_first.find("/") + if second_slash != -1: + doi_end = after_suffix_start + second_slash + doi = doi_with_path[:doi_end] + subpath = doi_with_path[doi_end:] # begins with "/" + # Treat a bare trailing slash as no subpath (root of DOI folder). + if subpath == "/": + return (doi, "") + else: + return (doi, subpath) + else: + # No second slash — the whole string is the DOI, no subpath. + return (doi_with_path, "") + else: + return (doi_with_path, "") + + @classmethod + def _parse_doi_remote_host(cls, path: str) -> tuple[str, str, str]: + """Parse ``remote:10.xxxx/prefix.ID[/optional/sub/path]`` into ``(remote, doi, subpath)``.""" + parts = path.split(":", 1) + if len(parts) != 2: + raise IllegalArgumentException("remote_path must be in the form 'remote:10.xxxx/prefix.ID'") + remote, doi_with_path = parts + if not cls._is_doi(doi_with_path): + raise IllegalArgumentException( + f"Path `{doi_with_path}` does not look like a DOI (must start with '10.' and contain '/')." + ) + doi, subpath = cls._split_doi_and_subpath(doi_with_path) + return (remote, doi, subpath) + + @classmethod + def _find_folder_by_doi( + cls, + connection: MDRSConnection, + doi: str, + password: str | None = None, + ) -> tuple[Folder, Laboratory]: + """Resolve a DOI to a (Folder, Laboratory) pair. + + Calls GET v3/doi/{id}/ to look up the folder ID, retrieves the full + folder detail (which carries ``laboratory_id``), and resolves the + laboratory from that field. + """ + doi_clean = doi.rstrip("/") + doi_id = cls._doi_suffix_id(doi_clean) + doi_api = DoiApi(connection) + doi_resp = doi_api.retrieve(doi_id) + + # Verify the returned DOI matches the one supplied (case-insensitive). + returned_doi = doi_resp.doi.rstrip("/") + if returned_doi.lower() != doi_clean.lower(): + raise IllegalArgumentException( + f"DOI mismatch: requested `{doi_clean}` but server returned `{returned_doi}`." + ) + + folder_api = FoldersApi(connection) + + # Retrieve full folder detail directly by ID; laboratory_id is here. + folder = folder_api.retrieve(doi_resp.folder.id) + + if folder.lock: + if password is None: + raise UnauthorizedException(f"Folder for DOI `{doi_clean}` is locked.") + folder_api.auth(doi_resp.folder.id, password) + + # Resolve laboratory using laboratory_id from the full folder detail. + lab_api = LaboratoriesApi(connection) + labs = lab_api.list() + lab = labs.find_by_id(folder.laboratory_id) + if lab is None: + raise UnexpectedException(f"Laboratory with id {folder.laboratory_id} not found.") + + connection.laboratories = labs + return (folder, lab) + + @classmethod + def _resolve_folder( + cls, + connection: MDRSConnection, + remote_path: str, + password: str | None = None, + ) -> tuple[Folder, Laboratory]: + """Resolve any remote path (normal or DOI) into a (Folder, Laboratory) pair.""" + path_component = remote_path.split(":", 1)[1] if ":" in remote_path else "" + if cls._is_doi(path_component): + remote, doi, subpath = cls._parse_doi_remote_host(remote_path) + doi_folder, laboratory = cls._find_folder_by_doi(connection, doi, password) + if not subpath: + return (doi_folder, laboratory) + else: + abs_path = doi_folder.path.rstrip("/") + subpath + folder = cls._find_folder(connection, laboratory, abs_path, password) + return (folder, laboratory) + else: + remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) + laboratory = cls._find_laboratory(connection, laboratory_name) + folder = cls._find_folder(connection, laboratory, r_path, password) + return (folder, laboratory) + + @classmethod + def _resolve_file( + cls, + connection: MDRSConnection, + remote_path: str, + password: str | None = None, + ) -> tuple[Folder, Laboratory, str]: + """Resolve a remote path pointing to a file into the parent Folder, its Laboratory, and the file's basename.""" + path_component = remote_path.split(":", 1)[1] if ":" in remote_path else "" + if cls._is_doi(path_component): + remote, doi, subpath = cls._parse_doi_remote_host(remote_path) + doi_folder, laboratory = cls._find_folder_by_doi(connection, doi, password) + subpath_clean = subpath.rstrip("/") + if not subpath_clean: + raise IllegalArgumentException("DOI path must point to a file, not a folder.") + r_dirname = os.path.dirname(subpath_clean) + r_basename = os.path.basename(subpath_clean) + abs_path = doi_folder.path.rstrip("/") + r_dirname + parent_folder = cls._find_folder(connection, laboratory, abs_path, password) + return (parent_folder, laboratory, r_basename) + else: + remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) + r_path = r_path.rstrip("/") + r_dirname = os.path.dirname(r_path) + r_basename = os.path.basename(r_path) + laboratory = cls._find_laboratory(connection, laboratory_name) + parent_folder = cls._find_folder(connection, laboratory, r_dirname, password) + return (parent_folder, laboratory, r_basename) diff --git a/mdrsclient/commands/chacl.py b/mdrsclient/commands/chacl.py index 0ffafb2..c6d1669 100644 --- a/mdrsclient/commands/chacl.py +++ b/mdrsclient/commands/chacl.py @@ -31,7 +31,7 @@ class ChaclCommand(BaseCommand): @classmethod def chacl(cls, remote_path: str, access_level: int, is_recursive: bool, password: str | None) -> None: - (remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path) + remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) r_path = r_path.rstrip("/") connection = cls._create_connection(remote) laboratory = cls._find_laboratory(connection, laboratory_name) diff --git a/mdrsclient/commands/cp.py b/mdrsclient/commands/cp.py index cfabb71..a60a2e2 100644 --- a/mdrsclient/commands/cp.py +++ b/mdrsclient/commands/cp.py @@ -29,8 +29,8 @@ class CpCommand(BaseCommand): @classmethod def cp(cls, src_path: str, dest_path: str, is_recursive: bool) -> None: - (s_remote, s_laboratory_name, s_path) = cls._parse_remote_host_with_path(src_path) - (d_remote, d_laboratory_name, d_path) = cls._parse_remote_host_with_path(dest_path) + s_remote, s_laboratory_name, s_path = cls._parse_remote_host_with_path(src_path) + d_remote, d_laboratory_name, d_path = cls._parse_remote_host_with_path(dest_path) if s_remote != d_remote: raise IllegalArgumentException("Remote host mismatched.") if s_laboratory_name != d_laboratory_name: diff --git a/mdrsclient/commands/download.py b/mdrsclient/commands/download.py index ad26010..f9d7e0d 100644 --- a/mdrsclient/commands/download.py +++ b/mdrsclient/commands/download.py @@ -68,7 +68,60 @@ class DownloadCommand(BaseCommand): password: str | None, excludes: list[str], ) -> None: - (remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path) + # Detect DOI path: "remote:10.xxxx/prefix.ID[/optional/sub/path]" + path_component = remote_path.split(":", 1)[1] if ":" in remote_path else "" + if cls._is_doi(path_component): + remote, doi, subpath = cls._parse_doi_remote_host(remote_path) + connection = cls._create_connection(remote) + l_dirname = os.path.realpath(local_path) + if not os.path.isdir(l_dirname): + raise IllegalArgumentException(f"Local directory `{local_path}` not found.") + doi_folder, laboratory = cls._find_folder_by_doi(connection, doi, password) + + subpath_clean = subpath.rstrip("/") + if not subpath_clean: + folder = doi_folder + is_folder = True + else: + r_dirname = os.path.dirname(subpath_clean) + r_basename = os.path.basename(subpath_clean) + abs_parent_path = doi_folder.path.rstrip("/") + r_dirname + r_parent_folder = cls._find_folder(connection, laboratory, abs_parent_path, password) + r_parent_files = cls._find_files(connection, r_parent_folder.id) + file = find_file(r_parent_files, r_basename) + if file is not None: + if cls.__check_excludes(excludes, laboratory, r_parent_folder, file): + return + context = DownloadContext(False, is_skip_if_exists, []) + l_path = os.path.join(l_dirname, r_basename) + context.files.append(DownloadFileInfo(file, l_path)) + cls.__multiple_download(connection, context) + return + else: + folder = r_parent_folder.find_sub_folder(r_basename) + if folder is None: + raise IllegalArgumentException(f"File or folder `{subpath_clean}` not found.") + is_folder = True + + # For a DOI target the whole folder is the download target. + if not is_recursive: + # Non-recursive: download only the files at the top level of the DOI folder. + files = cls._find_files(connection, folder.id) + context = DownloadContext(False, is_skip_if_exists, []) + for file in files: + if cls.__check_excludes(excludes, laboratory, folder, file): + continue + l_path = os.path.join(l_dirname, file.name) + context.files.append(DownloadFileInfo(file, l_path)) + cls.__multiple_download(connection, context) + return + folder_api = FoldersApi(connection) + cls.__multiple_download_pickup_recursive_files( + connection, folder_api, laboratory, folder.id, l_dirname, excludes, is_skip_if_exists + ) + return + + remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) r_path = r_path.rstrip("/") r_dirname = os.path.dirname(r_path) r_basename = os.path.basename(r_path) diff --git a/mdrsclient/commands/file_metadata.py b/mdrsclient/commands/file_metadata.py index 6121a7a..79f6662 100644 --- a/mdrsclient/commands/file_metadata.py +++ b/mdrsclient/commands/file_metadata.py @@ -25,13 +25,9 @@ class FileMetadataCommand(BaseCommand): @classmethod def file_metadata(cls, remote_path: str, password: str | None) -> None: - (remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path) - r_path = r_path.rstrip("/") - r_dirname = os.path.dirname(r_path) - r_basename = os.path.basename(r_path) + remote = remote_path.split(":", 1)[0] if ":" in remote_path else "" connection = cls._create_connection(remote) - laboratory = cls._find_laboratory(connection, laboratory_name) - folder = cls._find_folder(connection, laboratory, r_dirname, password) + folder, laboratory, r_basename = cls._resolve_file(connection, remote_path, password) files = cls._find_files(connection, folder.id) file = find_file(files, r_basename) if file is None: diff --git a/mdrsclient/commands/ls.py b/mdrsclient/commands/ls.py index 7c1591f..8e94333 100644 --- a/mdrsclient/commands/ls.py +++ b/mdrsclient/commands/ls.py @@ -54,9 +54,11 @@ class LsCommand(BaseCommand): @classmethod def ls(cls, remote_path: str, password: str | None, is_json: bool, is_recursive: bool, is_quiet: bool) -> None: - (remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path) + remote = remote_path.split(":", 1)[0] if ":" in remote_path else "" connection = cls._create_connection(remote) - laboratory = cls._find_laboratory(connection, laboratory_name) + folder, laboratory = cls._resolve_folder(connection, remote_path, password) + laboratory_name = laboratory.name + files = cls._find_files(connection, folder.id) context = LsCommandContext( f"{remote}:/{laboratory_name}", connection, @@ -66,8 +68,6 @@ class LsCommand(BaseCommand): is_quiet, is_recursive, ) - folder = cls._find_folder(connection, laboratory, r_path, password) - files = cls._find_files(connection, folder.id) if context.is_json: cls._ls_json(context, folder, files) else: diff --git a/mdrsclient/commands/metadata.py b/mdrsclient/commands/metadata.py index c75a10f..2b0725a 100644 --- a/mdrsclient/commands/metadata.py +++ b/mdrsclient/commands/metadata.py @@ -22,10 +22,9 @@ class MetadataCommand(BaseCommand): @classmethod def metadata(cls, remote_path: str, password: str | None) -> None: - (remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path) + remote = remote_path.split(":", 1)[0] if ":" in remote_path else "" connection = cls._create_connection(remote) - laboratory = cls._find_laboratory(connection, laboratory_name) - folder = cls._find_folder(connection, laboratory, r_path, password) + folder, laboratory = cls._resolve_folder(connection, remote_path, password) folder_api = FoldersApi(connection) metadata = folder_api.metadata(folder.id) print(json.dumps(metadata, ensure_ascii=False)) diff --git a/mdrsclient/commands/mkdir.py b/mdrsclient/commands/mkdir.py index 9b04f0e..6184368 100644 --- a/mdrsclient/commands/mkdir.py +++ b/mdrsclient/commands/mkdir.py @@ -23,7 +23,7 @@ class MkdirCommand(BaseCommand): @classmethod def mkdir(cls, remote_path: str) -> None: - (remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path) + remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) r_path = r_path.rstrip("/") r_dirname = os.path.dirname(r_path) r_basename = os.path.basename(r_path) diff --git a/mdrsclient/commands/mv.py b/mdrsclient/commands/mv.py index 2fc3884..e82eabf 100644 --- a/mdrsclient/commands/mv.py +++ b/mdrsclient/commands/mv.py @@ -25,8 +25,8 @@ class MvCommand(BaseCommand): @classmethod def mv(cls, src_path: str, dest_path: str) -> None: - (s_remote, s_laboratory_name, s_path) = cls._parse_remote_host_with_path(src_path) - (d_remote, d_laboratory_name, d_path) = cls._parse_remote_host_with_path(dest_path) + s_remote, s_laboratory_name, s_path = cls._parse_remote_host_with_path(src_path) + d_remote, d_laboratory_name, d_path = cls._parse_remote_host_with_path(dest_path) if s_remote != d_remote: raise IllegalArgumentException("Remote host mismatched.") if s_laboratory_name != d_laboratory_name: diff --git a/mdrsclient/commands/rm.py b/mdrsclient/commands/rm.py index 24dbea8..30e2b93 100644 --- a/mdrsclient/commands/rm.py +++ b/mdrsclient/commands/rm.py @@ -26,7 +26,7 @@ class RmCommand(BaseCommand): @classmethod def rm(cls, remote_path: str, is_recursive: bool) -> None: - (remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path) + remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) r_path = r_path.rstrip("/") r_dirname = os.path.dirname(r_path) r_basename = os.path.basename(r_path) diff --git a/mdrsclient/commands/upload.py b/mdrsclient/commands/upload.py index 6f82ebd..36de969 100644 --- a/mdrsclient/commands/upload.py +++ b/mdrsclient/commands/upload.py @@ -49,7 +49,7 @@ class UploadCommand(BaseCommand): @classmethod def upload(cls, local_path: str, remote_path: str, is_recursive: bool, is_skip_if_exists: bool) -> None: - (remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path) + remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path) l_path = os.path.abspath(local_path) if not os.path.exists(l_path): raise IllegalArgumentException(f"File or directory `{local_path}` not found.")