From 020ef8835a053defbce63c640ac96a6ce508fd0d Mon Sep 17 00:00:00 2001 From: Yoshihiro OKUMURA Date: Thu, 4 Jul 2024 12:33:57 +0900 Subject: [PATCH] fixed bug to upload large file. --- .cspell.json | 9 ++++++++- LICENSE.txt | 2 +- README.md | 7 ++++--- mdrsclient/VERSION | 2 +- mdrsclient/api/files.py | 29 +++++++++++++++++++++++++---- mdrsclient/commands/upload.py | 2 +- mdrsclient/connection.py | 10 +++++----- pyproject.toml | 15 ++++++++------- 8 files changed, 53 insertions(+), 23 deletions(-) diff --git a/.cspell.json b/.cspell.json index e8d8ae2..32bec29 100644 --- a/.cspell.json +++ b/.cspell.json @@ -1,7 +1,14 @@ { "version": "0.2", "language": "en,en-gb", - "ignoreWords": ["followlinks", "getframe", "pycache", "pydantic", "UNLCK"], + "ignoreWords": [ + "followlinks", + "getframe", + "pycache", + "pydantic", + "toolbelt", + "UNLCK" + ], "words": [ "chacl", "kikan", diff --git a/LICENSE.txt b/LICENSE.txt index a57106c..22692f4 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 Neuroinformatics Unit, RIKEN CBS +Copyright (c) 2023- Neuroinformatics Unit, RIKEN CBS Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 47bb964..954813b 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ Upload the file or directory ```shell mdrs upload ./sample.dat neurodata:/NIU/Repository/TEST/ mdrs upload -r ./dataset neurodata:/NIU/Repository/TEST/ +mdrs upload -r -s ./dataset neurodata:/NIU/Repository/TEST/ ``` ### download @@ -115,8 +116,8 @@ mdrs cp -r neurodata:/NIU/Repository/TEST/dataset neurodata:/NIU/Repository/TEST Remove the file or folder ```shell -mdrs rm neurodata:/NIU/Repository/TEST2/sample2.dat -mdrs rm -r neurodata:/NIU/Repository/TEST2/dataset +mdrs rm neurodata:/NIU/Repository/TEST/sample.dat +mdrs rm -r neurodata:/NIU/Repository/TEST/dataset ``` ### chacl @@ -126,7 +127,7 @@ Change the folder access level ```shell mdrs chacl private neurodata:/NIU/Repository/Private mdrs chacl cbs_open -r neurodata:/NIU/Repository/CBS_Open -mdrs chacl pw_open -r -p FOLDER_PASSWORD neurodata:/NIU/Repository/PW_Open +mdrs chacl pw_open -r -p PW_OPEN_PASSWORD neurodata:/NIU/Repository/PW_Open ``` ### metadata diff --git a/mdrsclient/VERSION b/mdrsclient/VERSION index 3a3cd8c..d0149fe 100644 --- a/mdrsclient/VERSION +++ b/mdrsclient/VERSION @@ -1 +1 @@ -1.3.1 +1.3.4 diff --git a/mdrsclient/api/files.py b/mdrsclient/api/files.py index 5fc5e7a..ecd4dfe 100644 --- a/mdrsclient/api/files.py +++ b/mdrsclient/api/files.py @@ -1,8 +1,10 @@ +import mimetypes import os from typing import Any, Final from pydantic import TypeAdapter from pydantic.dataclasses import dataclass +from requests_toolbelt.multipart.encoder import MultipartEncoder from mdrsclient.api.base import BaseApi from mdrsclient.api.utils import token_check @@ -17,6 +19,7 @@ class FilesApiCreateResponse: class FilesApi(BaseApi): ENTRYPOINT: Final[str] = "v3/files/" + FALLBACK_MIMETYPE: Final[str] = "application/octet-stream" def retrieve(self, id: str) -> File: # print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name) @@ -30,30 +33,42 @@ class FilesApi(BaseApi): # print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name) url = self.ENTRYPOINT token_check(self.connection) - data: dict[str, str | int] = {"folder_id": folder_id} + data: dict[str, str | int] | MultipartEncoder = {} try: with open(os.path.realpath(path), mode="rb") as fp: - response = self.connection.post(url, data=data, files={"file": fp}) + data = MultipartEncoder( + fields={"folder_id": folder_id, "file": (os.path.basename(path), fp, self._get_mime_type(path))} + ) + response = self.connection.post(url, data=data, headers={"Content-Type": data.content_type}) self._raise_response_error(response) ret = TypeAdapter(FilesApiCreateResponse).validate_python(response.json()) except OSError: raise UnexpectedException(f"Could not open `{path}` file.") + except MemoryError: + raise UnexpectedException("Out of memory.") + except Exception as e: + print(e) + raise UnexpectedException(e) return ret.id def update(self, file: File, path: str | None) -> bool: # print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name) url = self.ENTRYPOINT + file.id + "/" token_check(self.connection) + data: dict[str, str | int] | MultipartEncoder = {} if path is not None: # update file body try: with open(os.path.realpath(path), mode="rb") as fp: - response = self.connection.put(url, files={"file": fp}) + data = MultipartEncoder(fields={"file": (os.path.basename(path), fp, self._get_mime_type(path))}) + response = self.connection.put(url, data=data, headers={"Content-Type": data.content_type}) except OSError: raise UnexpectedException(f"Could not open `{path}` file.") + except MemoryError: + raise UnexpectedException("Out of memory.") else: # update metadata - data: dict[str, str | int] = {"name": file.name, "description": file.description} + data = {"name": file.name, "description": file.description} response = self.connection.put(url, data=data) self._raise_response_error(response) return True @@ -106,3 +121,9 @@ class FilesApi(BaseApi): except PermissionError: print(f"Cannot create file `{path}`: Permission denied.") return True + + def _get_mime_type(self, path: str) -> str: + mt = mimetypes.guess_type(path) + if mt: + return mt[0] or self.FALLBACK_MIMETYPE + return self.FALLBACK_MIMETYPE diff --git a/mdrsclient/commands/upload.py b/mdrsclient/commands/upload.py index fbc7c82..0199849 100644 --- a/mdrsclient/commands/upload.py +++ b/mdrsclient/commands/upload.py @@ -106,4 +106,4 @@ class UploadCommand(BaseCommand): file_api.update(file, info.path) print(os.path.join(info.folder.path, basename)) except MDRSException as e: - print(f"API Error: {e}") + print(f"Error: {e}") diff --git a/mdrsclient/connection.py b/mdrsclient/connection.py index 1124925..a4700e2 100644 --- a/mdrsclient/connection.py +++ b/mdrsclient/connection.py @@ -1,9 +1,9 @@ import platform import threading -from io import BufferedReader from typing import TypedDict from requests import Response, Session +from requests_toolbelt.multipart.encoder import MultipartEncoder # Unpack is new in 3.11 from typing_extensions import Unpack @@ -21,14 +21,14 @@ class _KwArgsMDRSConnectionGet(TypedDict, total=False): class _KwArgsMDRSConnectionPost(TypedDict, total=False): params: dict[str, str | int] - data: dict[str, str | int] - files: dict[str, BufferedReader] + data: dict[str, str | int] | MultipartEncoder + headers: dict[str, str] class _KwArgsMDRSConnectionPut(TypedDict, total=False): params: dict[str, str | int] - data: dict[str, str | int] - files: dict[str, BufferedReader] + data: dict[str, str | int] | MultipartEncoder + headers: dict[str, str] class _KwArgsMDRSConnectionDelete(TypedDict, total=False): diff --git a/pyproject.toml b/pyproject.toml index 98c423d..cce21d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mdrs-client-python" -version = "1.3.3" +version = "1.3.4" description = "The mdrs-client-python is python library and a command-line client for up- and downloading files to and from MDRS based repository." authors = ["Yoshihiro OKUMURA "] license = "MIT" @@ -22,19 +22,20 @@ packages = [ [tool.poetry.dependencies] python = "^3.10" -requests = "^2.31.0" +requests = "^2.32.3" +requests-toolbelt = "^1.0.0" python-dotenv = "^1.0.1" -pydantic = "^2.6.1" -pydantic-settings = "^2.1.0" +pydantic = "^2.8.2" +pydantic-settings = "^2.3.4" PyJWT = "^2.8.0" validators = "^0.22.0" [tool.poetry.group.dev.dependencies] -black = "^24.2.0" -flake8 = "^7.0.0" +black = "^24.2.2" +flake8 = "^7.1.0" Flake8-pyproject = "^1.2.3" isort = "^5.13.2" -pyright = "^1.1.350" +pyright = "^1.1.370" [tool.poetry.scripts] mdrs = 'mdrsclient.__main__:main'