From 5915f5c73df7f06e8b730f96c50c0b877be515be Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Thu, 7 Apr 2022 10:44:51 +0200 Subject: [PATCH] Automatically update the bucket limits when adding files to drafts * previously, adding files to drafts could fail if the configured limits (max. file size, and bucket quota) were too small for the files * however, the main use for these CLI commands is to ingest files that are too large for self-upload via UI or API --- invenio_utilities_tuw/cli/drafts.py | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/invenio_utilities_tuw/cli/drafts.py b/invenio_utilities_tuw/cli/drafts.py index 6f04e05..45b386b 100644 --- a/invenio_utilities_tuw/cli/drafts.py +++ b/invenio_utilities_tuw/cli/drafts.py @@ -38,6 +38,36 @@ from .utils import ( ) +def auto_increase_bucket_limits(bucket, filepaths, to_unlimited=False): + """Dynamically increase the bucket quoat if necessary.""" + # see what the file sizes look like + file_sizes = [os.path.getsize(filepath) for filepath in filepaths] + sum_sizes = sum(file_sizes) + max_size = max(file_sizes) + + if bucket.quota_left is not None: + if to_unlimited: + bucket.quota_size = None + + else: + # see how big the files are, and compare it against the bucket's quota + req_extra_quota = sum_sizes - bucket.quota_left + + # if we need some extra quota, increase it + if req_extra_quota > 0: + bucket.quota_size += req_extra_quota + + if bucket.max_file_size and bucket.max_file_size < max_size: + # do similar checks for the maximum file size + if to_unlimited: + bucket.max_file_size = None + else: + bucket.max_file_size = max_size + + # make changes known + db.session.flush() + + @click.group() def drafts(): """Management commands for creation and publication of drafts.""" @@ -121,16 +151,20 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid): draft.commit() file_names = [] + file_paths = [] if isdir(deposit_files_path): exists = lambda fn: isfile(join(deposit_files_path, fn)) content = os.listdir(deposit_files_path) file_names = [basename(fn) for fn in content if exists(fn)] + for fn in file_names: + file_paths.append(join(deposit_files_path, fn)) if len(content) != len(file_names): ignored = [basename(fn) for fn in content if not exists(fn)] msg = f"ignored in '{deposit_files_path}': {ignored}" click.secho(msg, fg="yellow", err=True) + auto_increase_bucket_limits(draft.files.bucket, file_paths) file_service.init_files( id_=recid, identity=identity, data=[{"key": fn} for fn in file_names] ) @@ -289,6 +323,8 @@ def add_files(filepaths, pid, pid_type, user): elif isfile(file_path): paths.append(file_path) + # make sure that the files fit in the bucket, and add them + auto_increase_bucket_limits(draft.files.bucket, paths) keys = [basename(fp) for fp in paths] if len(set(keys)) != len(keys): click.secho( @@ -296,6 +332,7 @@ def add_files(filepaths, pid, pid_type, user): ) sys.exit(1) + # TODO: this fails if a file with the same name already exists file_service.init_files( id_=recid, identity=identity, data=[{"key": basename(fp)} for fp in paths] ) -- GitLab