use concurrent for the multiple file uploads instead of multiprocess.

This commit is contained in:
Yoshihiro OKUMURA 2023-05-09 13:08:50 +09:00
parent 6e065e7e25
commit c724af538b
Signed by: orrisroot
GPG Key ID: 470AA444C92904B2
11 changed files with 122 additions and 129 deletions

View File

@ -4,6 +4,7 @@ import requests
from pydantic import parse_obj_as
from requests import Response
from mdrsclient.connection import MDRSConnection
from mdrsclient.exceptions import (
BadRequestException,
ForbiddenException,
@ -11,27 +12,28 @@ from mdrsclient.exceptions import (
UnexpectedException,
)
from mdrsclient.models import DRFStandardizedErrors
from mdrsclient.session import MDRSSession
class BaseApi(ABC):
def __init__(self, session: MDRSSession) -> None:
self.session = session
connection: MDRSConnection
def __init__(self, connection: MDRSConnection) -> None:
self.connection = connection
def _get(self, url, *args, **kwargs) -> Response:
return self.session.get(self.__build_url(url), *args, **kwargs)
return self.connection.session.get(self.__build_url(url), *args, **kwargs)
def _post(self, url, *args, **kwargs) -> Response:
return self.session.post(self.__build_url(url), *args, **kwargs)
return self.connection.session.post(self.__build_url(url), *args, **kwargs)
def _put(self, url, *args, **kwargs) -> Response:
return self.session.put(self.__build_url(url), *args, **kwargs)
return self.connection.session.put(self.__build_url(url), *args, **kwargs)
def _delete(self, url, *args, **kwargs) -> Response:
return self.session.delete(self.__build_url(url), *args, **kwargs)
return self.connection.session.delete(self.__build_url(url), *args, **kwargs)
def _patch(self, url, *args, **kwargs) -> Response:
return self.session.patch(self.__build_url(url), *args, **kwargs)
return self.connection.session.patch(self.__build_url(url), *args, **kwargs)
def _raise_response_error(self, response: Response) -> None:
if response.status_code >= 300:
@ -48,4 +50,4 @@ class BaseApi(ABC):
raise UnexpectedException(errors.errors[0].detail)
def __build_url(self, *args: tuple) -> str:
return self.session.build_url(*args)
return self.connection.build_url(*args)

View File

@ -21,7 +21,7 @@ class FileApi(BaseApi):
def retrieve(self, id: str) -> File:
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT + id + "/"
token_check(self.session)
token_check(self.connection)
response = self._get(url)
self._raise_response_error(response)
return parse_obj_as(File, response.json())
@ -29,7 +29,7 @@ class FileApi(BaseApi):
def create(self, folder_id: str, path: str) -> str:
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT
token_check(self.session)
token_check(self.connection)
data = {"folder_id": folder_id}
try:
with open(path, mode="rb") as fp:
@ -43,7 +43,7 @@ class FileApi(BaseApi):
def update(self, file: File, path: str | None) -> bool:
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT + file.id + "/"
token_check(self.session)
token_check(self.connection)
if path is not None:
try:
with open(path, mode="rb") as fp:
@ -59,7 +59,7 @@ class FileApi(BaseApi):
def destroy(self, file: File) -> bool:
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT + file.id + "/"
token_check(self.session)
token_check(self.connection)
response = self._delete(url)
self._raise_response_error(response)
return True
@ -68,7 +68,7 @@ class FileApi(BaseApi):
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT + file.id + "/move/"
data = {"folder": folder_id}
token_check(self.session)
token_check(self.connection)
response = self._post(url, data=data)
self._raise_response_error(response)
return True
@ -76,7 +76,7 @@ class FileApi(BaseApi):
def metadata(self, file: File) -> dict:
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT + file.id + "/metadata/"
token_check(self.session)
token_check(self.connection)
response = self._get(url)
self._raise_response_error(response)
return response.json()

View File

@ -21,7 +21,7 @@ class FolderApi(BaseApi):
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT
params = {"path": path, "laboratory_id": laboratory_id}
token_check(self.session)
token_check(self.connection)
response = self._get(url, params=params)
self._raise_response_error(response)
ret: list[FolderSimple] = []
@ -32,7 +32,7 @@ class FolderApi(BaseApi):
def retrieve(self, id: str) -> Folder:
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT + id + "/"
token_check(self.session)
token_check(self.connection)
response = self._get(url)
self._raise_response_error(response)
ret = parse_obj_as(Folder, response.json())
@ -42,7 +42,7 @@ class FolderApi(BaseApi):
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT
data = {"name": name, "parent_id": parent_id, "description": "", "template_id": -1}
token_check(self.session)
token_check(self.connection)
response = self._post(url, data=data)
self._raise_response_error(response)
ret = parse_obj_as(FolderCreateResponse, response.json())
@ -55,7 +55,7 @@ class FolderApi(BaseApi):
"name": folder.name,
"description": folder.description,
}
token_check(self.session)
token_check(self.connection)
response = self._put(url, data=data)
self._raise_response_error(response)
return True
@ -63,7 +63,7 @@ class FolderApi(BaseApi):
def destroy(self, id: str) -> bool:
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT + id + "/"
token_check(self.session)
token_check(self.connection)
response = self._delete(url)
self._raise_response_error(response)
return True
@ -71,7 +71,7 @@ class FolderApi(BaseApi):
def metadata(self, id: str) -> dict:
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT + id + "/metadata/"
token_check(self.session)
token_check(self.connection)
response = self._get(url)
self._raise_response_error(response)
return response.json()

View File

@ -14,7 +14,7 @@ class LaboratoryApi(BaseApi):
def list(self) -> Laboratories:
print(self.__class__.__name__ + "::" + sys._getframe().f_code.co_name)
url = self.ENTRYPOINT
token_check(self.session)
token_check(self.connection)
response = self._get(url)
self._raise_response_error(response)
ret = Laboratories([])

View File

@ -1,15 +1,15 @@
from mdrsclient.api.user import UserApi
from mdrsclient.connection import MDRSConnection
from mdrsclient.exceptions import UnauthorizedException
from mdrsclient.session import MDRSSession
def token_check(session: MDRSSession) -> None:
if session.token is not None:
if session.token.is_refresh_required:
user_api = UserApi(session)
def token_check(connection: MDRSConnection) -> None:
if connection.token is not None:
if connection.token.is_refresh_required:
user_api = UserApi(connection)
try:
session.token = user_api.refresh(session.token)
connection.token = user_api.refresh(connection.token)
except UnauthorizedException:
session.logout()
elif session.token.is_expired:
session.logout()
connection.logout()
elif connection.token.is_expired:
connection.logout()

View File

@ -1,7 +1,7 @@
import dataclasses
import os
from argparse import Namespace, _SubParsersAction
from multiprocessing import Process
from concurrent.futures import ThreadPoolExecutor
from pydantic import parse_obj_as
from pydantic.dataclasses import dataclass
@ -9,18 +9,18 @@ from pydantic.dataclasses import dataclass
from mdrsclient.api import FileApi, FolderApi
from mdrsclient.commands.base import BaseCommand
from mdrsclient.commands.utils import (
create_session,
create_connection,
find_folder,
find_laboratory,
parse_remote_host_with_path,
)
from mdrsclient.connection import MDRSConnection
from mdrsclient.exceptions import (
IllegalArgumentException,
MDRSException,
UnexpectedException,
)
from mdrsclient.models import File, Folder
from mdrsclient.session import MDRSSession
from mdrsclient.settings import NUMBER_OF_PROCESS
@ -66,14 +66,14 @@ class FileCommand(BaseCommand):
local_path = os.path.realpath(args.local_path)
if not os.path.exists(local_path):
raise IllegalArgumentException(f"File or directory `{args.local_path}` not found.")
session = create_session(remote)
laboratory = find_laboratory(session, laboratory_name)
folder = find_folder(session, laboratory, path)
connection = create_connection(remote)
laboratory = find_laboratory(connection, laboratory_name)
folder = find_folder(connection, laboratory, path)
upload_files: list[UploadFile] = []
if os.path.isdir(local_path):
if not args.recursive:
raise IllegalArgumentException(f"Cannot upload `{args.local_path}`: Is a directory.")
folder_api = FolderApi(session)
folder_api = FolderApi(connection)
folders: dict[str, Folder] = {}
folders[path] = folder
local_basename = os.path.basename(local_path)
@ -107,7 +107,7 @@ class FileCommand(BaseCommand):
upload_files.append(UploadFile(folders[dest_folder_path], os.path.join(dirpath, filename)))
else:
upload_files.append(UploadFile(folder, local_path))
FileCommand._multiple_upload(session, upload_files)
FileCommand._multiple_upload(connection, upload_files)
@staticmethod
def download(args: Namespace) -> None:
@ -115,16 +115,16 @@ class FileCommand(BaseCommand):
path = path.rstrip("/")
parent_path = os.path.dirname(path)
file_name = os.path.basename(path)
session = create_session(remote)
connection = create_connection(remote)
if not os.path.isdir(args.local_path):
raise IllegalArgumentException(f"Local directory `{args.local_path}` not found.")
local_file = os.path.join(args.local_path, file_name)
laboratory = find_laboratory(session, laboratory_name)
folder = find_folder(session, laboratory, parent_path)
laboratory = find_laboratory(connection, laboratory_name)
folder = find_folder(connection, laboratory, parent_path)
file = folder.find_file(file_name)
if file is None:
raise IllegalArgumentException(f"File `{file_name}` not found.")
r = session.get(session.build_url("v2/" + file.download_url), stream=True)
r = connection.session.get(connection.build_url("v2/" + file.download_url), stream=True)
try:
with open(local_file, "wb") as f:
for chunk in r.iter_content(chunk_size=4096):
@ -151,17 +151,17 @@ class FileCommand(BaseCommand):
else:
dest_dirpath = os.path.dirname(dest_path)
dest_filename = os.path.basename(dest_path)
session = create_session(src_remote)
laboratory = find_laboratory(session, src_laboratory_name)
src_folder = find_folder(session, laboratory, src_dirpath)
dest_folder = find_folder(session, laboratory, dest_dirpath)
connection = create_connection(src_remote)
laboratory = find_laboratory(connection, src_laboratory_name)
src_folder = find_folder(connection, laboratory, src_dirpath)
dest_folder = find_folder(connection, laboratory, dest_dirpath)
src_file = src_folder.find_file(src_filename)
if src_file is None:
raise IllegalArgumentException(f"File `{src_filename}` not found.")
dest_file = dest_folder.find_file(dest_filename)
if dest_file is not None:
raise IllegalArgumentException(f"File `{dest_filename}` already exists.")
file_api = FileApi(session)
file_api = FileApi(connection)
if src_folder.id != dest_folder.id:
file_api.move(src_file, dest_folder.id)
if dest_filename != src_filename:
@ -175,13 +175,13 @@ class FileCommand(BaseCommand):
path = path.rstrip("/")
parent_path = os.path.dirname(path)
file_name = os.path.basename(path)
session = create_session(remote)
laboratory = find_laboratory(session, laboratory_name)
folder = find_folder(session, laboratory, parent_path)
connection = create_connection(remote)
laboratory = find_laboratory(connection, laboratory_name)
folder = find_folder(connection, laboratory, parent_path)
file = folder.find_file(file_name)
if file is None:
raise IllegalArgumentException(f"File `{file_name}` not found.")
file_api = FileApi(session)
file_api = FileApi(connection)
file_api.destroy(file)
@staticmethod
@ -190,35 +190,24 @@ class FileCommand(BaseCommand):
path = path.rstrip("/")
parent_path = os.path.dirname(path)
file_name = os.path.basename(path)
session = create_session(remote)
laboratory = find_laboratory(session, laboratory_name)
folder = find_folder(session, laboratory, parent_path)
connection = create_connection(remote)
laboratory = find_laboratory(connection, laboratory_name)
folder = find_folder(connection, laboratory, parent_path)
file = folder.find_file(file_name)
if file is None:
raise IllegalArgumentException(f"File `{file_name}` not found.")
file_api = FileApi(session)
file_api = FileApi(connection)
metadata = file_api.metadata(file)
print(metadata)
@staticmethod
def _multiple_upload(session: MDRSSession, upload_files: list[UploadFile]) -> None:
processes: list[Process] = []
for idx in range(NUMBER_OF_PROCESS):
processes.append(
Process(
target=FileCommand._multiple_upload_worker,
args=(session, upload_files, idx, NUMBER_OF_PROCESS),
)
)
for process in processes:
process.start()
for process in processes:
process.join()
def _multiple_upload(connection: MDRSConnection, upload_files: list[UploadFile]) -> None:
file_api = FileApi(connection)
with ThreadPoolExecutor(max_workers=NUMBER_OF_PROCESS) as pool:
pool.map(lambda x: FileCommand._multiple_upload_worker(file_api, x), upload_files)
@staticmethod
def _multiple_upload_worker(session: MDRSSession, upload_files: list[UploadFile], idx: int, num_proc: int) -> None:
file_api = FileApi(session)
for upload_file in upload_files[idx::num_proc]:
def _multiple_upload_worker(file_api: FileApi, upload_file: UploadFile) -> None:
file_name = os.path.basename(upload_file.path)
file = next((x for x in upload_file.folder.files if x.name == file_name), None)
try:

View File

@ -4,7 +4,7 @@ from argparse import Namespace, _SubParsersAction
from mdrsclient.api import FolderApi
from mdrsclient.commands.base import BaseCommand
from mdrsclient.commands.utils import (
create_session,
create_connection,
find_folder,
find_laboratory,
parse_remote_host_with_path,
@ -34,9 +34,9 @@ class FolderCommand(BaseCommand):
@staticmethod
def list(args: Namespace) -> None:
(remote, laboratory_name, path) = parse_remote_host_with_path(args.remote_path)
session = create_session(remote)
laboratory = find_laboratory(session, laboratory_name)
folder = find_folder(session, laboratory, path)
connection = create_connection(remote)
laboratory = find_laboratory(connection, laboratory_name)
folder = find_folder(connection, laboratory, path)
label = {
"type": "Type",
"acl": "Access",
@ -49,7 +49,7 @@ class FolderCommand(BaseCommand):
for key in label.keys():
length[key] = len(label[key])
for sub_folder in folder.sub_folders:
sub_laboratory = session.laboratories.find_by_id(sub_folder.lab_id)
sub_laboratory = connection.laboratories.find_by_id(sub_folder.lab_id)
sub_laboratory_name = sub_laboratory.name if sub_laboratory is not None else "(invalid)"
length["acl"] = max(length["acl"], len(sub_folder.access_level_name))
length["laboratory"] = max(length["laboratory"], len(sub_laboratory_name))
@ -71,7 +71,7 @@ class FolderCommand(BaseCommand):
print("-" * len(header.expandtabs()))
for sub_folder in sorted(folder.sub_folders, key=lambda x: x.name):
sub_laboratory = session.laboratories.find_by_id(sub_folder.lab_id)
sub_laboratory = connection.laboratories.find_by_id(sub_folder.lab_id)
sub_laboratory_name = sub_laboratory.name if sub_laboratory is not None else "(invalid)"
print(
f"{'[d]':{length['type']}}\t{sub_folder.access_level_name:{length['acl']}}\t"
@ -91,27 +91,27 @@ class FolderCommand(BaseCommand):
path = path.rstrip("/")
parent_path = os.path.dirname(path)
folder_name = os.path.basename(path)
session = create_session(remote)
laboratory = find_laboratory(session, laboratory_name)
folder = find_folder(session, laboratory, parent_path)
folder_api = FolderApi(session)
connection = create_connection(remote)
laboratory = find_laboratory(connection, laboratory_name)
folder = find_folder(connection, laboratory, parent_path)
folder_api = FolderApi(connection)
folder_api.create(folder_name, folder.id)
@staticmethod
def rmdir(args: Namespace) -> None:
(remote, laboratory_name, path) = parse_remote_host_with_path(args.remote_path)
session = create_session(remote)
laboratory = find_laboratory(session, laboratory_name)
folder = find_folder(session, laboratory, path)
folder_api = FolderApi(session)
connection = create_connection(remote)
laboratory = find_laboratory(connection, laboratory_name)
folder = find_folder(connection, laboratory, path)
folder_api = FolderApi(connection)
folder_api.destroy(folder.id)
@staticmethod
def metadata(args: Namespace) -> None:
(remote, laboratory_name, path) = parse_remote_host_with_path(args.remote_path)
session = create_session(remote)
laboratory = find_laboratory(session, laboratory_name)
folder = find_folder(session, laboratory, path)
folder_api = FolderApi(session)
connection = create_connection(remote)
laboratory = find_laboratory(connection, laboratory_name)
folder = find_folder(connection, laboratory, path)
folder_api = FolderApi(connection)
metadata = folder_api.metadata(folder.id)
print(metadata)

View File

@ -2,7 +2,7 @@ from argparse import Namespace, _SubParsersAction
from mdrsclient.api import LaboratoryApi
from mdrsclient.commands.base import BaseCommand
from mdrsclient.commands.utils import create_session, parse_remote_host
from mdrsclient.commands.utils import create_connection, parse_remote_host
class LaboratoryCommand(BaseCommand):
@ -16,10 +16,10 @@ class LaboratoryCommand(BaseCommand):
@staticmethod
def list(args: Namespace) -> None:
remote = parse_remote_host(args.remote)
session = create_session(remote)
laboratory_api = LaboratoryApi(session)
connection = create_connection(remote)
laboratory_api = LaboratoryApi(connection)
laboratories = laboratory_api.list()
session.laboratories = laboratories
connection.laboratories = laboratories
label = {"id": "ID", "name": "Name", "pi_name": "PI", "full_name": "Laboratory"}
length: dict[str, int] = {}
for key in label.keys():

View File

@ -5,8 +5,8 @@ from mdrsclient.api import UserApi
from mdrsclient.commands.base import BaseCommand
from mdrsclient.commands.utils import parse_remote_host
from mdrsclient.config import ConfigFile
from mdrsclient.connection import MDRSConnection
from mdrsclient.exceptions import MissingConfigurationException
from mdrsclient.session import MDRSSession
class UserCommand(BaseCommand):
@ -31,13 +31,13 @@ class UserCommand(BaseCommand):
config = ConfigFile(remote)
if config.url is None:
raise MissingConfigurationException(f"Remote host `{remote}` is not found.")
session = MDRSSession(config.remote, config.url)
connection = MDRSConnection(config.remote, config.url)
username = input("Username: ").strip()
password = getpass.getpass("Password: ").strip()
user_api = UserApi(session)
user_api = UserApi(connection)
(user, token) = user_api.auth(username, password)
session.user = user
session.token = token
connection.user = user
connection.token = token
@staticmethod
def logout(args: Namespace) -> None:
@ -45,8 +45,8 @@ class UserCommand(BaseCommand):
config = ConfigFile(remote)
if config.url is None:
raise MissingConfigurationException(f"Remote host `{remote}` is not found.")
session = MDRSSession(config.remote, config.url)
session.logout()
connection = MDRSConnection(config.remote, config.url)
connection.logout()
@staticmethod
def whoami(args: Namespace) -> None:
@ -54,8 +54,8 @@ class UserCommand(BaseCommand):
config = ConfigFile(remote)
if config.url is None:
raise MissingConfigurationException(f"Remote host `{remote}` is not found.")
session = MDRSSession(config.remote, config.url)
if session.token is not None and session.token.is_expired:
session.logout()
username = session.user.username if session.user is not None else "(Anonymous)"
connection = MDRSConnection(config.remote, config.url)
if connection.token is not None and connection.token.is_expired:
connection.logout()
username = connection.user.username if connection.user is not None else "(Anonymous)"
print(username)

View File

@ -2,6 +2,7 @@ import re
from mdrsclient.api import FolderApi, LaboratoryApi
from mdrsclient.config import ConfigFile
from mdrsclient.connection import MDRSConnection
from mdrsclient.exceptions import (
IllegalArgumentException,
MissingConfigurationException,
@ -9,28 +10,27 @@ from mdrsclient.exceptions import (
UnexpectedException,
)
from mdrsclient.models import Folder, Laboratory
from mdrsclient.session import MDRSSession
def create_session(remote: str) -> MDRSSession:
def create_connection(remote: str) -> MDRSConnection:
config = ConfigFile(remote)
if config.url is None:
raise MissingConfigurationException(f"Remote host `{remote}` is not found.")
return MDRSSession(config.remote, config.url)
return MDRSConnection(config.remote, config.url)
def find_laboratory(session: MDRSSession, laboratory_name: str) -> Laboratory:
if session.laboratories.empty() or session.token is not None and session.token.is_expired:
laboratory_api = LaboratoryApi(session)
session.laboratories = laboratory_api.list()
laboratory = session.laboratories.find_by_name(laboratory_name)
def find_laboratory(connection: MDRSConnection, laboratory_name: str) -> Laboratory:
if connection.laboratories.empty() or connection.token is not None and connection.token.is_expired:
laboratory_api = LaboratoryApi(connection)
connection.laboratories = laboratory_api.list()
laboratory = connection.laboratories.find_by_name(laboratory_name)
if laboratory is None:
raise IllegalArgumentException(f"Laboratory `{laboratory_name}` not found.")
return laboratory
def find_folder(session: MDRSSession, laboratory: Laboratory, path: str) -> Folder:
folder_api = FolderApi(session)
def find_folder(connection: MDRSConnection, laboratory: Laboratory, path: str) -> Folder:
folder_api = FolderApi(connection)
folders = folder_api.list(laboratory.id, path)
if len(folders) != 1:
raise UnexpectedException(f"Folder `{path}` not found.")

View File

@ -5,13 +5,15 @@ from mdrsclient.exceptions import MissingConfigurationException
from mdrsclient.models import Laboratories, Token, User
class MDRSSession(requests.Session):
class MDRSConnection:
url: str
session: requests.Session
__cache: CacheFile
def __init__(self, remote: str, url: str) -> None:
super().__init__()
self.url = url
self.session = requests.Session()
self.__cache = CacheFile(remote)
self.__prepare_headers()
@ -25,7 +27,7 @@ class MDRSSession(requests.Session):
def logout(self) -> None:
del self.__cache.user
del self.__cache.token
self.headers.update({"Authorization": ""})
self.session.headers.update({"Authorization": ""})
@property
def user(self) -> User | None:
@ -53,6 +55,6 @@ class MDRSSession(requests.Session):
self.__cache.laboratories = laboratories
def __prepare_headers(self) -> None:
self.headers.update({"accept": "application/json"})
self.session.headers.update({"accept": "application/json"})
if self.token is not None:
self.headers.update({"Authorization": f"Bearer {self.token.access}"})
self.session.headers.update({"Authorization": f"Bearer {self.token.access}"})