From 114386b413c0088410b8042f5a94921c7ba5d297 Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Sun, 9 Mar 2025 22:01:25 +0100 Subject: [PATCH 1/8] Use `current_rdm_records_service` proxy directly --- invenio_utilities_tuw/cli/drafts.py | 16 +++------------- invenio_utilities_tuw/cli/files.py | 6 ++---- invenio_utilities_tuw/cli/records.py | 13 +++---------- invenio_utilities_tuw/cli/utils.py | 7 ++----- invenio_utilities_tuw/config.py | 13 +------------ invenio_utilities_tuw/utils.py | 17 +---------------- 6 files changed, 12 insertions(+), 60 deletions(-) diff --git a/invenio_utilities_tuw/cli/drafts.py b/invenio_utilities_tuw/cli/drafts.py index 1ae567f..fabf8b6 100644 --- a/invenio_utilities_tuw/cli/drafts.py +++ b/invenio_utilities_tuw/cli/drafts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2020 - 2021 TU Wien. +# Copyright (C) 2020 - 2025 TU Wien. # # Invenio-Utilities-TUW is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -19,9 +19,10 @@ import click from flask.cli import with_appcontext from invenio_access.permissions import system_identity from invenio_db import db +from invenio_rdm_records.proxies import current_rdm_records_service as service from invenio_records_resources.services.errors import PermissionDeniedError -from ..utils import get_identity_for_user, get_record_service, get_user_by_identifier +from ..utils import get_identity_for_user, get_user_by_identifier from .options import ( option_as_user, option_owners, @@ -86,7 +87,6 @@ def drafts(): def list_drafts(user): """List all drafts accessible to the given user.""" identity = get_identity_for_user(user) - service = get_record_service() recids = [ dm.json["id"] for dm in service.draft_cls.model_cls.query.all() @@ -134,7 +134,6 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid): draft, if such a subdirectory exists. """ recid = None - service = get_record_service() file_service = service.draft_files identity = get_identity_for_user(user) @@ -220,7 +219,6 @@ def show_draft(pid, pid_type, user, pretty_print, raw): """Show the stored data for the specified draft.""" pid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() indent = 2 if pretty_print else None draft = service.read_draft(id_=pid, identity=identity) @@ -250,7 +248,6 @@ def update_draft(metadata_file, pid, pid_type, user, patch, owners): """Update the specified draft's metadata.""" pid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() metadata = json.load(metadata_file) if patch: @@ -275,7 +272,6 @@ def publish_draft(pid, pid_type, user): """Publish the specified draft.""" pid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() service.publish(id_=pid, identity=identity) click.secho(pid, fg="green") @@ -290,7 +286,6 @@ def delete_draft(pid, pid_type, user): """Delete the specified draft.""" pid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() service.delete_draft(id_=pid, identity=identity) click.secho(pid, fg="red") @@ -311,7 +306,6 @@ def add_files(filepaths, pid, pid_type, user): """Add the specified files to the draft.""" recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() file_service = service.draft_files draft = service.read_draft(id_=recid, identity=identity)._record @@ -385,7 +379,6 @@ def remove_files(filekeys, pid, pid_type, user): """ recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() file_service = service.draft_files for file_key in filekeys: @@ -412,7 +405,6 @@ def list_files(pid, pid_type, user): """Show a list of files deposited with the draft.""" recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() draft = service.read_draft(id_=recid, identity=identity)._record for name, rec_file in draft.files.entries.items(): @@ -430,7 +422,6 @@ def list_files(pid, pid_type, user): def verify_files(pid, pid_type): """Verify the checksums for each of the draft's files.""" recid = convert_to_recid(pid, pid_type) - service = get_record_service() draft = service.read_draft(id_=recid, identity=system_identity)._record num_errors = 0 @@ -469,7 +460,6 @@ def verify_files(pid, pid_type): @with_appcontext def list_stale_drafts(num_days): """List all drafts that haven't been updated for a while.""" - service = get_record_service() dc, mc = service.draft_cls, service.draft_cls.model_cls cutoff_date = datetime.utcnow() - timedelta(days=abs(num_days)) diff --git a/invenio_utilities_tuw/cli/files.py b/invenio_utilities_tuw/cli/files.py index 7282fa4..0c34563 100644 --- a/invenio_utilities_tuw/cli/files.py +++ b/invenio_utilities_tuw/cli/files.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2020 - 2021 TU Wien. +# Copyright (C) 2020 - 2025 TU Wien. # # Invenio-Utilities-TUW is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -17,9 +17,9 @@ from flask.cli import with_appcontext from invenio_access.permissions import system_identity from invenio_db import db from invenio_files_rest.models import FileInstance, Location, ObjectVersion +from invenio_rdm_records.proxies import current_rdm_records_service as service from sqlalchemy.exc import IntegrityError -from ..utils import get_record_service from .options import option_pid_type, option_pid_value_optional from .utils import convert_to_recid @@ -136,7 +136,6 @@ def list_deleted_files(pid, pid_type): (via its PID). """ recid = convert_to_recid(pid, pid_type) if pid else None - service = get_record_service() identity = system_identity # if a PID was specified, limit the cleaning to this record's bucket @@ -174,7 +173,6 @@ def hard_delete_files(pid, pid_type): (via its PID). """ recid = convert_to_recid(pid, pid_type) if pid else None - service = get_record_service() identity = system_identity # if a PID was specified, limit the cleaning to this record's bucket diff --git a/invenio_utilities_tuw/cli/records.py b/invenio_utilities_tuw/cli/records.py index b062a6e..f13a8a2 100644 --- a/invenio_utilities_tuw/cli/records.py +++ b/invenio_utilities_tuw/cli/records.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2020 - 2021 TU Wien. +# Copyright (C) 2020 - 2025 TU Wien. # # Invenio-Utilities-TUW is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -14,9 +14,10 @@ import sys import click from flask.cli import with_appcontext from invenio_db import db +from invenio_rdm_records.proxies import current_rdm_records_service as service from invenio_records_resources.services.errors import PermissionDeniedError -from ..utils import get_identity_for_user, get_record_service, get_user_by_identifier +from ..utils import get_identity_for_user, get_user_by_identifier from .options import ( option_as_user, option_owners, @@ -40,7 +41,6 @@ def records(): def list_records(user): """List all records accessible to the given user.""" identity = get_identity_for_user(user) - service = get_record_service() rec_model_cls = service.record_cls.model_cls recids = [ @@ -76,7 +76,6 @@ def show_record(pid, pid_type, user, pretty_print, raw): """Show the stored data for the specified draft.""" pid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() indent = 2 if pretty_print else None record = service.read(id_=pid, identity=identity) @@ -116,7 +115,6 @@ def update_record(metadata_file, pid, pid_type, user, patch, owners, direct): """Update the specified draft's metadata.""" pid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() metadata = json.load(metadata_file) if patch: @@ -167,7 +165,6 @@ def delete_record(pid, pid_type, user): """Delete the specified record.""" identity = get_identity_for_user(user) recid = convert_to_recid(pid, pid_type) - service = get_record_service() service.delete(id_=recid, identity=identity) click.secho(recid, fg="red") @@ -187,7 +184,6 @@ def list_files(pid, pid_type, user): """Show a list of files deposited with the record.""" recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() record = service.read(id_=recid, identity=identity)._record for name, rec_file in record.files.entries.items(): @@ -207,7 +203,6 @@ def verify_files(pid, pid_type, user): """Verify the checksums for each of the record's files.""" recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_service() record = service.read(id_=recid, identity=identity)._record num_errors = 0 @@ -240,8 +235,6 @@ def verify_files(pid, pid_type, user): @with_appcontext def reindex_records(pids, pid_type): """Reindex all available (or just the specified) records.""" - service = get_record_service() - if pids: records = [ service.record_cls.get_record(get_object_uuid(pid, pid_type)) diff --git a/invenio_utilities_tuw/cli/utils.py b/invenio_utilities_tuw/cli/utils.py index 8fee151..59c4fb0 100644 --- a/invenio_utilities_tuw/cli/utils.py +++ b/invenio_utilities_tuw/cli/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2020 - 2021 TU Wien. +# Copyright (C) 2020 - 2025 TU Wien. # # Invenio-Utilities-TUW is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -14,8 +14,7 @@ from collections.abc import Iterable from invenio_db import db from invenio_pidstore.errors import PIDAlreadyExists from invenio_pidstore.models import PersistentIdentifier - -from ..utils import get_record_service +from invenio_rdm_records.proxies import current_rdm_records_service as service def read_metadata(metadata_file_path): @@ -34,8 +33,6 @@ def create_record_from_metadata( metadata, identity, vanity_pid=None, vanity_pid_type="recid" ): """Create a draft from the specified metadata.""" - service = get_record_service() - if vanity_pid is not None: # check if the vanity PID is already taken, before doing anything stupid count = PersistentIdentifier.query.filter_by( diff --git a/invenio_utilities_tuw/config.py b/invenio_utilities_tuw/config.py index 190958f..b272ed7 100644 --- a/invenio_utilities_tuw/config.py +++ b/invenio_utilities_tuw/config.py @@ -1,20 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2020 - 2021 TU Wien. +# Copyright (C) 2020 - 2025 TU Wien. # # Invenio-Utilities-TUW is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more # details. """Configuration for Invenio-Utilities-TUW.""" - -from invenio_rdm_records.proxies import current_rdm_records - - -def default_record_service_factory(): - """Return ``current_rdm_records.records_service.``""" - return current_rdm_records.records_service - - -UTILITIES_TUW_RECORD_SERVICE_FACTORY = default_record_service_factory -"""Factory function for creating a RecordService.""" diff --git a/invenio_utilities_tuw/utils.py b/invenio_utilities_tuw/utils.py index 6fcf4d8..7a84f6e 100644 --- a/invenio_utilities_tuw/utils.py +++ b/invenio_utilities_tuw/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2020 - 2021 TU Wien. +# Copyright (C) 2020 - 2025 TU Wien. # # Invenio-Utilities-TUW is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -10,14 +10,11 @@ from difflib import SequenceMatcher -from flask import current_app from invenio_access.permissions import any_user, system_identity from invenio_access.utils import get_identity from invenio_accounts import current_accounts from werkzeug.utils import import_string -from .config import default_record_service_factory - def get_or_import(value, default=None): """Try an import if value is an endpoint string, or return value itself.""" @@ -29,18 +26,6 @@ def get_or_import(value, default=None): return default -def get_record_service(): - """Get the configured RecordService.""" - factory = current_app.config.get("UTILITIES_TUW_RECORD_SERVICE_FACTORY", None) - - if factory is not None: - factory = get_or_import(factory) - else: - factory = default_record_service_factory - - return factory() - - def get_user_by_identifier(id_or_email): """Get the user specified via email or ID.""" if id_or_email is not None: -- GitLab From 08234824325f66d63db8937516eeb08cfa5ce474 Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Sun, 9 Mar 2025 22:19:03 +0100 Subject: [PATCH 2/8] Remove support for multiple record owners --- invenio_utilities_tuw/cli/drafts.py | 32 +++++++++++----------------- invenio_utilities_tuw/cli/options.py | 8 +++---- invenio_utilities_tuw/cli/records.py | 14 ++++++------ invenio_utilities_tuw/cli/utils.py | 21 +++++------------- 4 files changed, 28 insertions(+), 47 deletions(-) diff --git a/invenio_utilities_tuw/cli/drafts.py b/invenio_utilities_tuw/cli/drafts.py index fabf8b6..f43661b 100644 --- a/invenio_utilities_tuw/cli/drafts.py +++ b/invenio_utilities_tuw/cli/drafts.py @@ -25,7 +25,7 @@ from invenio_records_resources.services.errors import PermissionDeniedError from ..utils import get_identity_for_user, get_user_by_identifier from .options import ( option_as_user, - option_owners, + option_owner, option_pid_type, option_pid_value, option_pretty_print, @@ -37,7 +37,7 @@ from .utils import ( create_record_from_metadata, patch_metadata, read_metadata, - set_record_owners, + set_record_owner, ) @@ -119,10 +119,10 @@ def list_drafts(user): default=False, help="publish the draft after creation (default: false)", ) -@option_owners +@option_owner @option_vanity_pid @with_appcontext -def create_draft(metadata_path, publish, user, owners, vanity_pid): +def create_draft(metadata_path, publish, user, owner, vanity_pid): """Create a new record draft with the specified metadata. The specified metadata path can either point to a JSON file containing the metadata, @@ -194,9 +194,9 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid): else: raise TypeError(f"neither a file nor a directory: {metadata_path}") - if owners: - owners = [get_user_by_identifier(owner) for owner in owners] - set_record_owners(draft, owners) + if owner: + owner = get_user_by_identifier(owner) + set_record_owner(draft, owner) if service.indexer: service.indexer.index(draft) @@ -242,9 +242,9 @@ def show_draft(pid, pid_type, user, pretty_print, raw): "(default: replace)" ), ) -@option_owners +@option_owner @with_appcontext -def update_draft(metadata_file, pid, pid_type, user, patch, owners): +def update_draft(metadata_file, pid, pid_type, user, patch, owner): """Update the specified draft's metadata.""" pid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) @@ -254,10 +254,10 @@ def update_draft(metadata_file, pid, pid_type, user, patch, owners): draft_data = service.read_draft(id_=pid, identity=identity).data.copy() metadata = patch_metadata(draft_data, metadata) - if owners: + if owner: draft = service.read_draft(id_=pid, identity=identity)._record - owners = [get_user_by_identifier(owner) for owner in owners] - set_record_owners(draft, owners) + owner = get_user_by_identifier(owner) + set_record_owner(draft, owner) service.update_draft(id_=pid, identity=identity, data=metadata) click.secho(pid, fg="green") @@ -480,15 +480,7 @@ def list_stale_drafts(num_days): for draft in stale_drafts: recid = draft.pid.pid_value title = draft.metadata.get("title", "[UNNAMED]") - - # InvenioRDM v11 still supports several record owners uploader = draft.parent.access.owned_by - if isinstance(uploader, list): - if uploader: - uploader = uploader[0] - else: - uploader = None - uploader_email = uploader.resolve().email if uploader else "[N/A]" updated = draft.updated.date() if draft.updated else "[N/A]" stale_draft_infos.append(draft_info(recid, title, uploader_email, updated)) diff --git a/invenio_utilities_tuw/cli/options.py b/invenio_utilities_tuw/cli/options.py index 8f8f570..1eec889 100644 --- a/invenio_utilities_tuw/cli/options.py +++ b/invenio_utilities_tuw/cli/options.py @@ -65,14 +65,14 @@ option_pid_values = click.option( ), ) -option_owners = click.option( +option_owner = click.option( "--owner", "-o", - "owners", + "owner", metavar="OWNER", required=False, - multiple=True, - help="email address of the record owner to set (can be specified multiple times)", + multiple=False, + help="email address of the record owner to set", ) option_vanity_pid = click.option( diff --git a/invenio_utilities_tuw/cli/records.py b/invenio_utilities_tuw/cli/records.py index f13a8a2..69978b3 100644 --- a/invenio_utilities_tuw/cli/records.py +++ b/invenio_utilities_tuw/cli/records.py @@ -20,14 +20,14 @@ from invenio_records_resources.services.errors import PermissionDeniedError from ..utils import get_identity_for_user, get_user_by_identifier from .options import ( option_as_user, - option_owners, + option_owner, option_pid_type, option_pid_value, option_pid_values, option_pretty_print, option_raw, ) -from .utils import convert_to_recid, get_object_uuid, patch_metadata, set_record_owners +from .utils import convert_to_recid, get_object_uuid, patch_metadata, set_record_owner @click.group() @@ -109,9 +109,9 @@ def show_record(pid, pid_type, user, pretty_print, raw): "and update the record directly (not recommended)" ), ) -@option_owners +@option_owner @with_appcontext -def update_record(metadata_file, pid, pid_type, user, patch, owners, direct): +def update_record(metadata_file, pid, pid_type, user, patch, owner, direct): """Update the specified draft's metadata.""" pid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) @@ -145,10 +145,10 @@ def update_record(metadata_file, pid, pid_type, user, patch, owners, direct): click.secho("trying with service.update()...", fg="yellow", err=True) service.update(id_=pid, identity=identity, data=metadata) - if owners: + if owner: record = service.read(id_=pid, identity=identity)._record - owners = [get_user_by_identifier(owner) for owner in owners] - set_record_owners(record, owners) + owner = get_user_by_identifier(owner) + set_record_owner(record, owner) if service.indexer: service.indexer.index(record) diff --git a/invenio_utilities_tuw/cli/utils.py b/invenio_utilities_tuw/cli/utils.py index 59c4fb0..9ef6dd2 100644 --- a/invenio_utilities_tuw/cli/utils.py +++ b/invenio_utilities_tuw/cli/utils.py @@ -9,7 +9,6 @@ """Utilities for the CLI commands.""" import json -from collections.abc import Iterable from invenio_db import db from invenio_pidstore.errors import PIDAlreadyExists @@ -99,14 +98,11 @@ def convert_to_recid(pid_value, pid_type): return pid_value -def set_record_owners(record, owners, commit=True): - """Set the record's owners, assuming an RDMRecord-like record object.""" +def set_record_owner(record, owner, commit=True): + """Set the record's owner, assuming an RDMRecord-like record object.""" parent = record.parent - parent.access.owners.clear() - for owner in owners: - parent.access.owners.add(owner) - + parent.access.owned_by = owner if commit: parent.commit() db.session.commit() @@ -128,12 +124,5 @@ def bytes_to_human(size): def is_owned_by(user, record): """Check if the record is owned by the given user.""" - owners = record.parent.access.owned_by - - # note: InvenioRDM v12 changed record ownership to a single entity rather - # than a list of entities, but we're still on v11 so we make it - # compatible with both variants - if not isinstance(owners, Iterable): - owners = [owners] - - return any([o and o.owner_id == user.id for o in owners]) + owner = record.parent.access.owned_by + return owner and owner.owner_id == user.id -- GitLab From cd67d49c6919a8ac2f1b24e95a83d9e1eabc315d Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Mon, 10 Mar 2025 00:25:14 +0100 Subject: [PATCH 3/8] Implement context manager for temporarily monkey-patching methods --- invenio_utilities_tuw/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/invenio_utilities_tuw/utils.py b/invenio_utilities_tuw/utils.py index 7a84f6e..27b28b4 100644 --- a/invenio_utilities_tuw/utils.py +++ b/invenio_utilities_tuw/utils.py @@ -8,6 +8,7 @@ """Utility functions for Invenio-Utilities-TUW.""" +from contextlib import contextmanager from difflib import SequenceMatcher from invenio_access.permissions import any_user, system_identity @@ -54,3 +55,12 @@ def get_identity_for_user(user): def similarity(a: str, b: str) -> float: """Calculate the similarity between two strings.""" return SequenceMatcher(None, a, b).ratio() + + +@contextmanager +def monkey_patch_temp(object_, method_name, new_method): + """Temporarily monkey patch an object's method.""" + x = getattr(object_, method_name) + setattr(object_, method_name, new_method.__get__(object_, type(object_))) + yield object_ + setattr(object_, method_name, x) -- GitLab From 5c7c8f0377fd882e808f2926cd648b46f28916c8 Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Mon, 10 Mar 2025 22:19:48 +0100 Subject: [PATCH 4/8] Update "drafts files add" command * make it work even for drafts that have been published before * also extract a few pieces into utility functions --- invenio_utilities_tuw/cli/drafts.py | 138 ++++++++++++---------------- invenio_utilities_tuw/cli/utils.py | 60 ++++++++++++ 2 files changed, 121 insertions(+), 77 deletions(-) diff --git a/invenio_utilities_tuw/cli/drafts.py b/invenio_utilities_tuw/cli/drafts.py index f43661b..13b0935 100644 --- a/invenio_utilities_tuw/cli/drafts.py +++ b/invenio_utilities_tuw/cli/drafts.py @@ -21,6 +21,7 @@ from invenio_access.permissions import system_identity from invenio_db import db from invenio_rdm_records.proxies import current_rdm_records_service as service from invenio_records_resources.services.errors import PermissionDeniedError +from invenio_records_resources.services.uow import UnitOfWork from ..utils import get_identity_for_user, get_user_by_identifier from .options import ( @@ -33,6 +34,8 @@ from .options import ( option_vanity_pid, ) from .utils import ( + auto_increase_bucket_limits, + collect_file_paths, convert_to_recid, create_record_from_metadata, patch_metadata, @@ -41,41 +44,6 @@ from .utils import ( ) -def file_exists(path, filename): - """Check if the file exists in the given path.""" - return isfile(join(path, filename)) - - -def auto_increase_bucket_limits(bucket, filepaths, to_unlimited=False): - """Dynamically increase the bucket quoat if necessary.""" - # see what the file sizes look like - file_sizes = [os.path.getsize(filepath) for filepath in filepaths] - sum_sizes = sum(file_sizes) - max_size = max(file_sizes) - - if bucket.quota_left is not None: - if to_unlimited: - bucket.quota_size = None - - else: - # see how big the files are, and compare it against the bucket's quota - req_extra_quota = sum_sizes - bucket.quota_left - - # if we need some extra quota, increase it - if req_extra_quota > 0: - bucket.quota_size += req_extra_quota - - if bucket.max_file_size and bucket.max_file_size < max_size: - # do similar checks for the maximum file size - if to_unlimited: - bucket.max_file_size = None - else: - bucket.max_file_size = max_size - - # make changes known - db.session.flush() - - @click.group() def drafts(): """Management commands for creation and publication of drafts.""" @@ -162,7 +130,7 @@ def create_draft(metadata_path, publish, user, owner, vanity_pid): content = os.listdir(deposit_files_path) file_names = [ - basename(fn) for fn in content if file_exists(deposit_files_path, fn) + basename(fn) for fn in content if isfile(join(deposit_files_path, fn)) ] for fn in file_names: @@ -172,7 +140,7 @@ def create_draft(metadata_path, publish, user, owner, vanity_pid): ignored = [ basename(fn) for fn in content - if not file_exists(deposit_files_path, fn) + if not isfile(join(deposit_files_path, fn)) ] msg = f"ignored in '{deposit_files_path}': {ignored}" click.secho(msg, fg="yellow", err=True) @@ -309,32 +277,8 @@ def add_files(filepaths, pid, pid_type, user): file_service = service.draft_files draft = service.read_draft(id_=recid, identity=identity)._record - if not draft.files.enabled: - draft.files.enabled = True - draft.commit() - - paths = [] - for file_path in filepaths: - if isdir(file_path): - # add all files (no recursion into sub-dirs) from the directory - content = os.listdir(file_path) - file_names = [basename(fn) for fn in content if file_exists(file_path, fn)] - - if len(content) != len(file_names): - ignored = [ - basename(fn) for fn in content if not file_exists(file_path, fn) - ] - msg = f"ignored in '{file_path}': {ignored}" - click.secho(msg, fg="yellow", err=True) - - paths_ = [join(file_path, fn) for fn in file_names] - paths.extend(paths_) - - elif isfile(file_path): - paths.append(file_path) - - # make sure that the files fit in the bucket, and add them - auto_increase_bucket_limits(draft.files.bucket, paths) + # check if any of the files' basenames are duplicate + paths = collect_file_paths(filepaths) keys = [basename(fp) for fp in paths] if len(set(keys)) != len(keys): click.secho( @@ -343,27 +287,67 @@ def add_files(filepaths, pid, pid_type, user): sys.exit(1) # check for existing duplicates - files = list(file_service.list_files(id_=recid, identity=identity).entries()) - existing_file_keys = [e["key"] for e in files] + existing_file_keys = list(draft.files.entries.keys()) if any([k for k in keys if k in existing_file_keys]): click.secho( "aborting: reuse of existing file names detected", fg="yellow", err=True ) sys.exit(1) - # if all went well so far, continue on - file_service.init_files( - id_=recid, identity=identity, data=[{"key": basename(fp)} for fp in paths] - ) - for fp in paths: - fn = basename(fp) - with open(fp, "rb") as deposit_file: - file_service.set_file_content( - id_=recid, file_key=fn, identity=identity, stream=deposit_file - ) - file_service.commit_file(id_=recid, file_key=fn, identity=identity) + uow = UnitOfWork(db.session) + try: + # prepare the draft's file manager and bucket for files + bucket_was_locked = draft.files.bucket.locked + files_were_enabled = draft.files.enabled + draft.files.bucket.locked = False + if not files_were_enabled: + draft.files.enabled = True + draft.commit() + db.session.flush() + + auto_increase_bucket_limits(draft.files.bucket, paths) + file_service.init_files( + id_=recid, + identity=identity, + data=[{"key": basename(fp)} for fp in paths], + uow=uow, + ) - click.secho(recid, fg="green") + for fp in paths: + fn = basename(fp) + with open(fp, "rb") as deposit_file: + file_service.set_file_content( + id_=recid, + file_key=fn, + identity=identity, + stream=deposit_file, + uow=uow, + ) + file_service.commit_file(id_=recid, file_key=fn, identity=identity, uow=uow) + click.secho(recid, fg="green") + + # if the draft has already been published, we may need to enable the files for + # the published record as well + if draft.is_published and not files_were_enabled: + record = service.record_cls.get_record(draft.id) + record.files.enabled = True + record.commit() + + uow.commit() + + except Exception as e: + uow.rollback() + if draft.files.enabled != files_were_enabled: + draft.files.enabled = files_were_enabled + draft.commit() + + click.secho(f"aborted due to error: {e}", fg="red", err=True) + + finally: + if bucket_was_locked != draft.files.bucket.locked: + draft.files.bucket.locked = bucket_was_locked + + db.session.commit() @files.command("remove") diff --git a/invenio_utilities_tuw/cli/utils.py b/invenio_utilities_tuw/cli/utils.py index 9ef6dd2..e192b72 100644 --- a/invenio_utilities_tuw/cli/utils.py +++ b/invenio_utilities_tuw/cli/utils.py @@ -9,7 +9,10 @@ """Utilities for the CLI commands.""" import json +import os +from os.path import basename, isdir, isfile, join +import click from invenio_db import db from invenio_pidstore.errors import PIDAlreadyExists from invenio_pidstore.models import PersistentIdentifier @@ -126,3 +129,60 @@ def is_owned_by(user, record): """Check if the record is owned by the given user.""" owner = record.parent.access.owned_by return owner and owner.owner_id == user.id + + +def collect_file_paths(paths): + """Collect file paths from the given paths. + + If one of the given paths is a directory, its path will be replaced with the + paths of all files it contains. + If it contains any subdirectories however, then it will be skipped instead. + """ + paths_ = [] + for path in paths: + if isdir(path): + # add all files (no recursion into sub-dirs) from the directory + content = os.listdir(path) + file_names = [basename(fn) for fn in content if isfile(join(path, fn))] + + if len(content) != len(file_names): + ignored = [basename(fn) for fn in content if not isfile(join(path, fn))] + msg = f"ignored in '{path}': {ignored}" + click.secho(msg, fg="yellow", err=True) + + paths_ = [join(path, fn) for fn in file_names] + paths_.extend(paths_) + + elif isfile(path): + paths_.append(path) + + return paths_ + + +def auto_increase_bucket_limits(bucket, filepaths, to_unlimited=False): + """Dynamically increase the bucket quota if necessary.""" + file_sizes = [os.path.getsize(filepath) for filepath in filepaths] + sum_sizes = sum(file_sizes) + max_size = max(file_sizes) + + if bucket.quota_left is not None: + if to_unlimited: + bucket.quota_size = None + + else: + # see how big the files are, and compare it against the bucket's quota + req_extra_quota = sum_sizes - bucket.quota_left + + # if we need some extra quota, increase it + if req_extra_quota > 0: + bucket.quota_size += req_extra_quota + + if bucket.max_file_size and bucket.max_file_size < max_size: + # do similar checks for the maximum file size + if to_unlimited: + bucket.max_file_size = None + else: + bucket.max_file_size = max_size + + # make changes known + db.session.flush() -- GitLab From 22a4e20dcef7df7269aa8248b8590013dc1f1407 Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Mon, 10 Mar 2025 23:55:31 +0100 Subject: [PATCH 5/8] Update "drafts files remove" command to work with published drafts --- invenio_utilities_tuw/cli/drafts.py | 35 ++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/invenio_utilities_tuw/cli/drafts.py b/invenio_utilities_tuw/cli/drafts.py index 13b0935..df940fc 100644 --- a/invenio_utilities_tuw/cli/drafts.py +++ b/invenio_utilities_tuw/cli/drafts.py @@ -364,19 +364,34 @@ def remove_files(filekeys, pid, pid_type, user): recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) file_service = service.draft_files + draft = service.read_draft(id_=recid, identity=identity)._record - for file_key in filekeys: - try: - file_service.delete_file(id_=recid, file_key=file_key, identity=identity) - click.secho(file_key, fg="red") + bucket_was_locked = draft.files.bucket.locked + draft.files.bucket.locked = False + uow = UnitOfWork(db.session) + try: + for file_key in filekeys: + try: + file_service.delete_file( + id_=recid, file_key=file_key, identity=identity, uow=uow + ) + click.secho(file_key, fg="red") - except KeyError as err: - click.secho(f"error: {err}", fg="yellow", err=True) + except KeyError as err: + click.secho(f"error: {err}", fg="yellow", err=True) - draft = service.read_draft(id_=recid, identity=identity)._record - if not draft.files.entries: - draft.files.enabled = False - draft.commit() + if not draft.files.entries: + draft.files.enabled = False + draft.commit() + + uow.commit() + + except Exception as e: + uow.rollback() + click.secho(f"aborted due to error: {e}", fg="red", err=True) + + finally: + draft.files.bucket.locked = bucket_was_locked db.session.commit() -- GitLab From eaeea6f07c58cbe50257f691b1b6df259e6e23f0 Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Mon, 10 Mar 2025 23:03:26 +0100 Subject: [PATCH 6/8] Implement "files fix-state" command for drafts and records --- invenio_utilities_tuw/cli/drafts.py | 14 ++++++++++++++ invenio_utilities_tuw/cli/records.py | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/invenio_utilities_tuw/cli/drafts.py b/invenio_utilities_tuw/cli/drafts.py index df940fc..393b9d8 100644 --- a/invenio_utilities_tuw/cli/drafts.py +++ b/invenio_utilities_tuw/cli/drafts.py @@ -447,6 +447,20 @@ def verify_files(pid, pid_type): sys.exit(1) +@files.command("fix-state") +@option_pid_value +@option_pid_type +@with_appcontext +def fix_state(pid, pid_type): + """Fix the draft's file manager state and lock the bucket.""" + recid = convert_to_recid(pid, pid_type) + draft = service.read_draft(id_=recid, identity=system_identity)._record + draft.files.lock() + draft.files.enabled = bool(draft.files.entries) + draft.commit() + db.session.commit() + + @drafts.command("list-stale") @click.option( "--days", diff --git a/invenio_utilities_tuw/cli/records.py b/invenio_utilities_tuw/cli/records.py index 69978b3..143f56e 100644 --- a/invenio_utilities_tuw/cli/records.py +++ b/invenio_utilities_tuw/cli/records.py @@ -13,6 +13,7 @@ import sys import click from flask.cli import with_appcontext +from invenio_access.permissions import system_identity from invenio_db import db from invenio_rdm_records.proxies import current_rdm_records_service as service from invenio_records_resources.services.errors import PermissionDeniedError @@ -229,6 +230,20 @@ def verify_files(pid, pid_type, user): sys.exit(1) +@files.command("fix-state") +@option_pid_value +@option_pid_type +@with_appcontext +def fix_state(pid, pid_type): + """Fix the record's file manager state and lock the bucket.""" + recid = convert_to_recid(pid, pid_type) + record = service.read(id_=recid, identity=system_identity)._record + record.files.lock() + record.files.enabled = bool(record.files.entries) + record.commit() + db.session.commit() + + @records.command("reindex") @option_pid_values @option_pid_type -- GitLab From 8f4d78d27448529ad83c0252ee6692b4e4518d4a Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Tue, 11 Mar 2025 17:04:55 +0100 Subject: [PATCH 7/8] Add vocabulary management commands --- invenio_utilities_tuw/cli/cli.py | 2 + invenio_utilities_tuw/cli/vocabularies.py | 128 ++++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 invenio_utilities_tuw/cli/vocabularies.py diff --git a/invenio_utilities_tuw/cli/cli.py b/invenio_utilities_tuw/cli/cli.py index b063acc..5fb4d8d 100644 --- a/invenio_utilities_tuw/cli/cli.py +++ b/invenio_utilities_tuw/cli/cli.py @@ -15,6 +15,7 @@ from .files import files from .records import records from .reports import reports from .users import users +from .vocabularies import vocabularies @click.group() @@ -27,3 +28,4 @@ utilities.add_command(files) utilities.add_command(records) utilities.add_command(reports) utilities.add_command(users) +utilities.add_command(vocabularies) diff --git a/invenio_utilities_tuw/cli/vocabularies.py b/invenio_utilities_tuw/cli/vocabularies.py new file mode 100644 index 0000000..784eead --- /dev/null +++ b/invenio_utilities_tuw/cli/vocabularies.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2025 TU Wien. +# +# Invenio-Utilities-TUW is free software; you can redistribute it and/or +# modify it under the terms of the MIT License; see LICENSE file for more +# details. + +"""Management commands for vocabularies.""" + +import os +import sys + +import click +import dictdiffer +import yaml +from flask.cli import with_appcontext +from invenio_access.permissions import system_identity +from invenio_pidstore.errors import PIDDoesNotExistError +from invenio_records_resources.proxies import current_service_registry +from invenio_vocabularies.records.api import VocabularyType +from sqlalchemy.exc import NoResultFound + +special_vocabulary_types = [ + "affiliations", + "awards", + "funders", + "names", + "subjects", +] + + +def _get_service_for_type(vocab_type: str): + """Get the registered service for the given vocabulary type.""" + if vocab_type in special_vocabulary_types: + return current_service_registry.get(vocab_type), False + + if vocab_type not in {vt.id for vt in VocabularyType.query.all()}: + raise LookupError(f"could not find vocabulary type '{vocab_type}'") + + return current_service_registry.get("vocabularies"), True + + +@click.group("vocabularies") +def vocabularies(): + """Management commands for vocabularies.""" + + +@vocabularies.command("list-types") +@with_appcontext +def list_vocabulary_types(): + """List the available vocabulary types.""" + types = {vt.id for vt in VocabularyType.query.all()} + for t in sorted(types): + if t in special_vocabulary_types: + click.secho(t, fg="green") + else: + click.echo(t) + + +@vocabularies.command("update") +@click.argument( + "filepath", + required=True, # help="file with the vocabulary entry definitions" +) +@click.argument( + "vocabulary_id", + required=True, +) +@click.option( + "--type", + "-t", + "vocab_type", + required=False, + default=None, + help="vocabulary type for the entry to add or update", +) +@with_appcontext +def add_or_update(vocab_type: str | None, filepath: str, vocabulary_id: str): + """Add or update the vocabulary.""" + if not vocab_type: + file_name = os.path.basename(filepath) + vocab_type, _ = os.path.splitext(file_name) + + try: + service, needs_type = _get_service_for_type(vocab_type) + except LookupError as e: + click.secho(e, fg="red", err=True) + sys.exit(1) + + with open(filepath, "r") as f: + vocab_entries = yaml.safe_load(f) + try: + vocab_entry, *_ = [e for e in vocab_entries if e.get("id") == vocabulary_id] + except ValueError: + click.secho(f"could not find entry '{vocabulary_id}'", fg="red", err=True) + sys.exit(1) + + # the special vocabularies don't need their type specified as part of the ID, + # but generic vocabulary types do + id_ = (vocab_type, vocabulary_id) if needs_type else vocabulary_id + + try: + # first we try to update an existing vocabulary entry + old_entry = service.read(system_identity, id_)._obj + old_entry.setdefault("id", vocabulary_id) + old_entry.pop("$schema") + old_entry.pop("pid", None) + old_entry.pop("type", None) + + # check if an update is actually necessary + diffs = list(dictdiffer.diff(vocab_entry, old_entry)) + if diffs: + if needs_type: + vocab_entry["type"] = vocab_type + + new_entry = service.update(system_identity, id_, vocab_entry)._obj + click.echo(f"updated '{vocab_type}' vocabulary: {new_entry}") + else: + click.secho("no updates necessary", fg="green") + + except (NoResultFound, PIDDoesNotExistError): + # if the lookup failed, we need to add the vocabulary entry + if needs_type: + vocab_entry["type"] = vocab_type + + new_entry = service.create(system_identity, vocab_entry)._obj + click.echo(f"added '{vocab_type}' vocabulary: {new_entry}") -- GitLab From 220b06a454f2bd1a639555ea8d948aad4a74fae6 Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Mon, 10 Mar 2025 23:26:17 +0100 Subject: [PATCH 8/8] Bump version to v2025.0.0 --- CHANGES.rst | 9 +++++++++ invenio_utilities_tuw/__init__.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3a15aff..23eb258 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,15 @@ Changes ======= +Version v2025.0.0 (released 2025-03-10) + +- global: remove capabilities for multiple record owners +- global: drop old way of fetching records service +- drafts: update "files {add,remove}" command to also work with already published drafts +- records: add "files fix-state" command to fix files manager state and lock the bucket +- vocabularies: add basic vocabulary management commands + + Version v2024.2 (released 2024-06-24, updated 2024-09-19) - v12 compat: Replace ``Flask-BabelEx`` with ``Invenio-i18n`` diff --git a/invenio_utilities_tuw/__init__.py b/invenio_utilities_tuw/__init__.py index ee7e9c4..7a7223b 100644 --- a/invenio_utilities_tuw/__init__.py +++ b/invenio_utilities_tuw/__init__.py @@ -10,6 +10,6 @@ from .ext import InvenioUtilitiesTUW -__version__ = "2024.2.1" +__version__ = "2025.0.0" __all__ = ("__version__", "InvenioUtilitiesTUW") -- GitLab