feat(doi): add DOI-based path access for commands

Support accessing repositories using DOI strings with optional subpaths
across ls, download, metadata, and file-metadata commands.

- Implement GET v3/doi/{id}/ API model and client calls
- Parse and resolve DOI paths into respective folder and files
- Extract common folder and file resolution logic to shared helpers
- Update README with example DOI-based shell commands
This commit is contained in:
2026-06-12 01:26:01 +09:00
parent 04c0003a61
commit 5bdf837941
14 changed files with 301 additions and 27 deletions
+2
View File
@@ -1,9 +1,11 @@
from mdrsclient.api.doi import DoiApi
from mdrsclient.api.files import FilesApi
from mdrsclient.api.folders import FoldersApi
from mdrsclient.api.laboratories import LaboratoriesApi
from mdrsclient.api.users import UsersApi
__all__ = [
"DoiApi",
"FilesApi",
"FoldersApi",
"LaboratoriesApi",
+40
View File
@@ -0,0 +1,40 @@
from typing import Final
from pydantic import TypeAdapter
from pydantic.dataclasses import dataclass
from mdrsclient.api.base import BaseApi
from mdrsclient.api.utils import token_check
@dataclass(frozen=True)
class DoiFolderRef:
"""Nested folder reference returned inside a DOI response.
The DOI endpoint only returns the folder ``id``; ``laboratory_id`` must be
obtained by subsequently calling the folder retrieve endpoint.
"""
id: str
@dataclass(frozen=True)
class DoiResponse:
"""Response from GET v3/doi/{id}/."""
# The internal DOI suffix ID returned as a string (e.g. "20260429-001").
id: str
doi: str
folder: DoiFolderRef
class DoiApi(BaseApi):
ENTRYPOINT: Final[str] = "v3/doi/"
def retrieve(self, doi_id: str) -> DoiResponse:
"""Retrieve the folder associated with a DOI suffix ID (GET v3/doi/{id}/)."""
url = self.ENTRYPOINT + doi_id + "/"
token_check(self.connection)
response = self.connection.get(url)
self._raise_response_error(response)
return TypeAdapter(DoiResponse).validate_python(response.json())
+171 -1
View File
@@ -1,9 +1,10 @@
import os
import re
from abc import ABC, abstractmethod
from typing import Any
from unicodedata import normalize
from mdrsclient.api import FilesApi, FoldersApi, LaboratoriesApi
from mdrsclient.api import DoiApi, FilesApi, FoldersApi, LaboratoriesApi
from mdrsclient.config import ConfigFile
from mdrsclient.connection import MDRSConnection
from mdrsclient.exceptions import (
@@ -95,3 +96,172 @@ class BaseCommand(ABC):
laboratory = folder_array.pop(0)
folder = "/" + "/".join(folder_array)
return (remote_host, laboratory, folder)
# ------------------------------------------------------------------
# DOI helpers
# ------------------------------------------------------------------
@staticmethod
def _is_doi(path_component: str) -> bool:
"""Return True if path_component looks like a DOI string.
A DOI is recognised as a string that starts with ``10.`` and
contains a ``/``.
"""
return path_component.startswith("10.") and "/" in path_component
@staticmethod
def _doi_suffix_id(doi: str) -> str:
"""Extract the internal system ID from a full DOI string.
MDRS uses the segment after the last ``.`` in the suffix (the part
after the ``/``) as its identifier.
Example: ``10.xxxx/prefix.20230511-001`` → ``20230511-001``.
If there is no ``.`` in the suffix, the whole suffix is returned.
Trailing slashes are stripped before processing.
"""
# Strip any trailing slash first.
doi = doi.rstrip("/")
slash_pos = doi.find("/")
if slash_pos == -1:
return doi
suffix = doi[slash_pos + 1 :]
dot_pos = suffix.rfind(".")
return suffix[dot_pos + 1 :] if dot_pos != -1 else suffix
@staticmethod
def _split_doi_and_subpath(doi_with_path: str) -> tuple[str, str]:
"""Split a DOI-with-optional-path string into (doi, subpath)."""
# Find the first '/' that separates registrant from suffix.
first_slash = doi_with_path.find("/")
if first_slash != -1:
after_suffix_start = first_slash + 1
after_first = doi_with_path[after_suffix_start:]
# Find the next '/' inside the suffix portion — this starts the subpath.
second_slash = after_first.find("/")
if second_slash != -1:
doi_end = after_suffix_start + second_slash
doi = doi_with_path[:doi_end]
subpath = doi_with_path[doi_end:] # begins with "/"
# Treat a bare trailing slash as no subpath (root of DOI folder).
if subpath == "/":
return (doi, "")
else:
return (doi, subpath)
else:
# No second slash — the whole string is the DOI, no subpath.
return (doi_with_path, "")
else:
return (doi_with_path, "")
@classmethod
def _parse_doi_remote_host(cls, path: str) -> tuple[str, str, str]:
"""Parse ``remote:10.xxxx/prefix.ID[/optional/sub/path]`` into ``(remote, doi, subpath)``."""
parts = path.split(":", 1)
if len(parts) != 2:
raise IllegalArgumentException("remote_path must be in the form 'remote:10.xxxx/prefix.ID'")
remote, doi_with_path = parts
if not cls._is_doi(doi_with_path):
raise IllegalArgumentException(
f"Path `{doi_with_path}` does not look like a DOI (must start with '10.' and contain '/')."
)
doi, subpath = cls._split_doi_and_subpath(doi_with_path)
return (remote, doi, subpath)
@classmethod
def _find_folder_by_doi(
cls,
connection: MDRSConnection,
doi: str,
password: str | None = None,
) -> tuple[Folder, Laboratory]:
"""Resolve a DOI to a (Folder, Laboratory) pair.
Calls GET v3/doi/{id}/ to look up the folder ID, retrieves the full
folder detail (which carries ``laboratory_id``), and resolves the
laboratory from that field.
"""
doi_clean = doi.rstrip("/")
doi_id = cls._doi_suffix_id(doi_clean)
doi_api = DoiApi(connection)
doi_resp = doi_api.retrieve(doi_id)
# Verify the returned DOI matches the one supplied (case-insensitive).
returned_doi = doi_resp.doi.rstrip("/")
if returned_doi.lower() != doi_clean.lower():
raise IllegalArgumentException(
f"DOI mismatch: requested `{doi_clean}` but server returned `{returned_doi}`."
)
folder_api = FoldersApi(connection)
# Retrieve full folder detail directly by ID; laboratory_id is here.
folder = folder_api.retrieve(doi_resp.folder.id)
if folder.lock:
if password is None:
raise UnauthorizedException(f"Folder for DOI `{doi_clean}` is locked.")
folder_api.auth(doi_resp.folder.id, password)
# Resolve laboratory using laboratory_id from the full folder detail.
lab_api = LaboratoriesApi(connection)
labs = lab_api.list()
lab = labs.find_by_id(folder.laboratory_id)
if lab is None:
raise UnexpectedException(f"Laboratory with id {folder.laboratory_id} not found.")
connection.laboratories = labs
return (folder, lab)
@classmethod
def _resolve_folder(
cls,
connection: MDRSConnection,
remote_path: str,
password: str | None = None,
) -> tuple[Folder, Laboratory]:
"""Resolve any remote path (normal or DOI) into a (Folder, Laboratory) pair."""
path_component = remote_path.split(":", 1)[1] if ":" in remote_path else ""
if cls._is_doi(path_component):
remote, doi, subpath = cls._parse_doi_remote_host(remote_path)
doi_folder, laboratory = cls._find_folder_by_doi(connection, doi, password)
if not subpath:
return (doi_folder, laboratory)
else:
abs_path = doi_folder.path.rstrip("/") + subpath
folder = cls._find_folder(connection, laboratory, abs_path, password)
return (folder, laboratory)
else:
remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path)
laboratory = cls._find_laboratory(connection, laboratory_name)
folder = cls._find_folder(connection, laboratory, r_path, password)
return (folder, laboratory)
@classmethod
def _resolve_file(
cls,
connection: MDRSConnection,
remote_path: str,
password: str | None = None,
) -> tuple[Folder, Laboratory, str]:
"""Resolve a remote path pointing to a file into the parent Folder, its Laboratory, and the file's basename."""
path_component = remote_path.split(":", 1)[1] if ":" in remote_path else ""
if cls._is_doi(path_component):
remote, doi, subpath = cls._parse_doi_remote_host(remote_path)
doi_folder, laboratory = cls._find_folder_by_doi(connection, doi, password)
subpath_clean = subpath.rstrip("/")
if not subpath_clean:
raise IllegalArgumentException("DOI path must point to a file, not a folder.")
r_dirname = os.path.dirname(subpath_clean)
r_basename = os.path.basename(subpath_clean)
abs_path = doi_folder.path.rstrip("/") + r_dirname
parent_folder = cls._find_folder(connection, laboratory, abs_path, password)
return (parent_folder, laboratory, r_basename)
else:
remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path)
r_path = r_path.rstrip("/")
r_dirname = os.path.dirname(r_path)
r_basename = os.path.basename(r_path)
laboratory = cls._find_laboratory(connection, laboratory_name)
parent_folder = cls._find_folder(connection, laboratory, r_dirname, password)
return (parent_folder, laboratory, r_basename)
+1 -1
View File
@@ -31,7 +31,7 @@ class ChaclCommand(BaseCommand):
@classmethod
def chacl(cls, remote_path: str, access_level: int, is_recursive: bool, password: str | None) -> None:
(remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path)
remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path)
r_path = r_path.rstrip("/")
connection = cls._create_connection(remote)
laboratory = cls._find_laboratory(connection, laboratory_name)
+2 -2
View File
@@ -29,8 +29,8 @@ class CpCommand(BaseCommand):
@classmethod
def cp(cls, src_path: str, dest_path: str, is_recursive: bool) -> None:
(s_remote, s_laboratory_name, s_path) = cls._parse_remote_host_with_path(src_path)
(d_remote, d_laboratory_name, d_path) = cls._parse_remote_host_with_path(dest_path)
s_remote, s_laboratory_name, s_path = cls._parse_remote_host_with_path(src_path)
d_remote, d_laboratory_name, d_path = cls._parse_remote_host_with_path(dest_path)
if s_remote != d_remote:
raise IllegalArgumentException("Remote host mismatched.")
if s_laboratory_name != d_laboratory_name:
+54 -1
View File
@@ -68,7 +68,60 @@ class DownloadCommand(BaseCommand):
password: str | None,
excludes: list[str],
) -> None:
(remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path)
# Detect DOI path: "remote:10.xxxx/prefix.ID[/optional/sub/path]"
path_component = remote_path.split(":", 1)[1] if ":" in remote_path else ""
if cls._is_doi(path_component):
remote, doi, subpath = cls._parse_doi_remote_host(remote_path)
connection = cls._create_connection(remote)
l_dirname = os.path.realpath(local_path)
if not os.path.isdir(l_dirname):
raise IllegalArgumentException(f"Local directory `{local_path}` not found.")
doi_folder, laboratory = cls._find_folder_by_doi(connection, doi, password)
subpath_clean = subpath.rstrip("/")
if not subpath_clean:
folder = doi_folder
is_folder = True
else:
r_dirname = os.path.dirname(subpath_clean)
r_basename = os.path.basename(subpath_clean)
abs_parent_path = doi_folder.path.rstrip("/") + r_dirname
r_parent_folder = cls._find_folder(connection, laboratory, abs_parent_path, password)
r_parent_files = cls._find_files(connection, r_parent_folder.id)
file = find_file(r_parent_files, r_basename)
if file is not None:
if cls.__check_excludes(excludes, laboratory, r_parent_folder, file):
return
context = DownloadContext(False, is_skip_if_exists, [])
l_path = os.path.join(l_dirname, r_basename)
context.files.append(DownloadFileInfo(file, l_path))
cls.__multiple_download(connection, context)
return
else:
folder = r_parent_folder.find_sub_folder(r_basename)
if folder is None:
raise IllegalArgumentException(f"File or folder `{subpath_clean}` not found.")
is_folder = True
# For a DOI target the whole folder is the download target.
if not is_recursive:
# Non-recursive: download only the files at the top level of the DOI folder.
files = cls._find_files(connection, folder.id)
context = DownloadContext(False, is_skip_if_exists, [])
for file in files:
if cls.__check_excludes(excludes, laboratory, folder, file):
continue
l_path = os.path.join(l_dirname, file.name)
context.files.append(DownloadFileInfo(file, l_path))
cls.__multiple_download(connection, context)
return
folder_api = FoldersApi(connection)
cls.__multiple_download_pickup_recursive_files(
connection, folder_api, laboratory, folder.id, l_dirname, excludes, is_skip_if_exists
)
return
remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path)
r_path = r_path.rstrip("/")
r_dirname = os.path.dirname(r_path)
r_basename = os.path.basename(r_path)
+2 -6
View File
@@ -25,13 +25,9 @@ class FileMetadataCommand(BaseCommand):
@classmethod
def file_metadata(cls, remote_path: str, password: str | None) -> None:
(remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path)
r_path = r_path.rstrip("/")
r_dirname = os.path.dirname(r_path)
r_basename = os.path.basename(r_path)
remote = remote_path.split(":", 1)[0] if ":" in remote_path else ""
connection = cls._create_connection(remote)
laboratory = cls._find_laboratory(connection, laboratory_name)
folder = cls._find_folder(connection, laboratory, r_dirname, password)
folder, laboratory, r_basename = cls._resolve_file(connection, remote_path, password)
files = cls._find_files(connection, folder.id)
file = find_file(files, r_basename)
if file is None:
+4 -4
View File
@@ -54,9 +54,11 @@ class LsCommand(BaseCommand):
@classmethod
def ls(cls, remote_path: str, password: str | None, is_json: bool, is_recursive: bool, is_quiet: bool) -> None:
(remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path)
remote = remote_path.split(":", 1)[0] if ":" in remote_path else ""
connection = cls._create_connection(remote)
laboratory = cls._find_laboratory(connection, laboratory_name)
folder, laboratory = cls._resolve_folder(connection, remote_path, password)
laboratory_name = laboratory.name
files = cls._find_files(connection, folder.id)
context = LsCommandContext(
f"{remote}:/{laboratory_name}",
connection,
@@ -66,8 +68,6 @@ class LsCommand(BaseCommand):
is_quiet,
is_recursive,
)
folder = cls._find_folder(connection, laboratory, r_path, password)
files = cls._find_files(connection, folder.id)
if context.is_json:
cls._ls_json(context, folder, files)
else:
+2 -3
View File
@@ -22,10 +22,9 @@ class MetadataCommand(BaseCommand):
@classmethod
def metadata(cls, remote_path: str, password: str | None) -> None:
(remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path)
remote = remote_path.split(":", 1)[0] if ":" in remote_path else ""
connection = cls._create_connection(remote)
laboratory = cls._find_laboratory(connection, laboratory_name)
folder = cls._find_folder(connection, laboratory, r_path, password)
folder, laboratory = cls._resolve_folder(connection, remote_path, password)
folder_api = FoldersApi(connection)
metadata = folder_api.metadata(folder.id)
print(json.dumps(metadata, ensure_ascii=False))
+1 -1
View File
@@ -23,7 +23,7 @@ class MkdirCommand(BaseCommand):
@classmethod
def mkdir(cls, remote_path: str) -> None:
(remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path)
remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path)
r_path = r_path.rstrip("/")
r_dirname = os.path.dirname(r_path)
r_basename = os.path.basename(r_path)
+2 -2
View File
@@ -25,8 +25,8 @@ class MvCommand(BaseCommand):
@classmethod
def mv(cls, src_path: str, dest_path: str) -> None:
(s_remote, s_laboratory_name, s_path) = cls._parse_remote_host_with_path(src_path)
(d_remote, d_laboratory_name, d_path) = cls._parse_remote_host_with_path(dest_path)
s_remote, s_laboratory_name, s_path = cls._parse_remote_host_with_path(src_path)
d_remote, d_laboratory_name, d_path = cls._parse_remote_host_with_path(dest_path)
if s_remote != d_remote:
raise IllegalArgumentException("Remote host mismatched.")
if s_laboratory_name != d_laboratory_name:
+1 -1
View File
@@ -26,7 +26,7 @@ class RmCommand(BaseCommand):
@classmethod
def rm(cls, remote_path: str, is_recursive: bool) -> None:
(remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path)
remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path)
r_path = r_path.rstrip("/")
r_dirname = os.path.dirname(r_path)
r_basename = os.path.basename(r_path)
+1 -1
View File
@@ -49,7 +49,7 @@ class UploadCommand(BaseCommand):
@classmethod
def upload(cls, local_path: str, remote_path: str, is_recursive: bool, is_skip_if_exists: bool) -> None:
(remote, laboratory_name, r_path) = cls._parse_remote_host_with_path(remote_path)
remote, laboratory_name, r_path = cls._parse_remote_host_with_path(remote_path)
l_path = os.path.abspath(local_path)
if not os.path.exists(l_path):
raise IllegalArgumentException(f"File or directory `{local_path}` not found.")