From 11edd46344c4c527e6e52ff9b3b2bf87b65b1cb0 Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Mon, 12 Jul 2021 17:53:50 +0200 Subject: [PATCH] Modernize CLI commands * incorporate changes made to the service structure (all services attached to the record service) * remove references to files-rest API where alternatives are available * remove permission checks and as-user CLI option from some methods --- invenio_utilities_tuw/cli/drafts.py | 114 ++++++++++++++++++--------- invenio_utilities_tuw/cli/files.py | 20 ++--- invenio_utilities_tuw/cli/records.py | 66 +++++++++------- invenio_utilities_tuw/cli/utils.py | 12 +-- invenio_utilities_tuw/config.py | 15 +--- invenio_utilities_tuw/utils.py | 26 ++---- 6 files changed, 134 insertions(+), 119 deletions(-) diff --git a/invenio_utilities_tuw/cli/drafts.py b/invenio_utilities_tuw/cli/drafts.py index 75e6b85..bc93ea3 100644 --- a/invenio_utilities_tuw/cli/drafts.py +++ b/invenio_utilities_tuw/cli/drafts.py @@ -15,10 +15,10 @@ from os.path import basename, isdir, isfile, join import click from flask.cli import with_appcontext +from invenio_access.permissions import system_identity from invenio_db import db -from invenio_files_rest.models import ObjectVersion -from ..utils import get_draft_file_service, get_record_service +from ..utils import get_record_service from .options import ( option_as_user, option_owners, @@ -62,9 +62,15 @@ def list_drafts(user): for recid in recids: try: draft = service.read_draft(id_=recid, identity=identity) - click.secho( - "{} - {}".format(draft.id, draft.data["metadata"]["title"]), fg="green" - ) + recid = draft.id + title = draft.data["metadata"].get("title", "-") + files = draft._record.files + if files.enabled: + num_files = f"{len(files.entries):02}" + else: + num_files = "no" + + click.secho(f"{recid}\t{num_files} files\t{title}", fg="green") except: pass @@ -94,6 +100,8 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid): draft, if such a subdirectory exists. """ recid = None + service = get_record_service() + file_service = service.draft_files identity = get_identity_for_user(user) if isfile(metadata_path): @@ -101,6 +109,8 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid): metadata = set_creatibutor_names(metadata) draft = create_record_from_metadata(metadata, identity, vanity_pid=vanity_pid) recid = draft["id"] + draft.files.enabled = False + draft.commit() elif isdir(metadata_path): metadata_file_path = join(metadata_path, "metadata.json") @@ -112,6 +122,8 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid): metadata = set_creatibutor_names(metadata) draft = create_record_from_metadata(metadata, identity) recid = draft["id"] + draft.files.enabled = True + draft.commit() file_names = [] if isdir(deposit_files_path): @@ -124,31 +136,32 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid): msg = "ignored in '{}': {}".format(deposit_files_path, ignored) click.secho(msg, fg="red", err=True) - service = get_draft_file_service() - service.init_files( + file_service.init_files( id_=recid, identity=identity, data=[{"key": fn} for fn in file_names] ) for fn in file_names: file_path = join(deposit_files_path, fn) with open(file_path, "rb") as deposit_file: - service.set_file_content( + file_service.set_file_content( id_=recid, file_key=fn, identity=identity, stream=deposit_file ) - service.commit_file(id_=recid, file_key=fn, identity=identity) + file_service.commit_file(id_=recid, file_key=fn, identity=identity) else: raise Exception("neither a file nor a directory: %s" % metadata_path) if owners: - actual_draft = draft._record if hasattr(draft, "_record") else draft owners = [get_user_by_identifier(owner) for owner in owners] - set_record_owners(actual_draft, owners) + set_record_owners(draft, owners) + if service.indexer: + service.indexer.index(draft) if publish: - service = get_record_service() service.publish(id_=recid, identity=identity) + # commit, as there are code paths that don't have a commit otherwise + db.session.commit() click.secho(recid, fg="green") @@ -164,10 +177,12 @@ def show_draft(pid, pid_type, user, pretty_print, raw): pid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) service = get_record_service() - draft = service.read_draft(id_=pid, identity=identity) indent = 2 if pretty_print else None + + draft = service.read_draft(id_=pid, identity=identity) data = draft._record.model.json if raw else draft.data data = json.dumps(data, indent=indent) + click.echo(data) @@ -199,10 +214,9 @@ def update_draft(metadata_file, pid, pid_type, user, patch, owners): metadata = patch_metadata(draft_data, metadata) if owners: - draft = service.read_draft(id_=pid, identity=identity) - actual_draft = draft._record if hasattr(draft, "_record") else draft + draft = service.read_draft(id_=pid, identity=identity)._record owners = [get_user_by_identifier(owner) for owner in owners] - set_record_owners(actual_draft, owners) + set_record_owners(draft, owners) metadata = set_creatibutor_names(metadata) service.update_draft(id_=pid, identity=identity, data=metadata) @@ -220,6 +234,7 @@ def publish_draft(pid, pid_type, user): identity = get_identity_for_user(user) service = get_record_service() service.publish(id_=pid, identity=identity) + click.secho(pid, fg="green") @@ -234,6 +249,7 @@ def delete_draft(pid, pid_type, user): identity = get_identity_for_user(user) service = get_record_service() service.delete_draft(id_=pid, identity=identity) + click.secho(pid, fg="red") @@ -253,7 +269,13 @@ def add_files(filepaths, pid, pid_type, user): """Add the specified files to the draft.""" recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_draft_file_service() + service = get_record_service() + file_service = service.draft_files + draft = service.read_draft(id_=recid, identity=identity)._record + + if not draft.files.enabled: + draft.files.enabled = True + draft.commit() paths = [] for file_path in filepaths: @@ -279,16 +301,16 @@ def add_files(filepaths, pid, pid_type, user): click.secho("aborting: duplicates in file names detected", fg="red", err=True) sys.exit(1) - service.init_files( + file_service.init_files( id_=recid, identity=identity, data=[{"key": basename(fp)} for fp in paths] ) for fp in paths: fn = basename(fp) with open(fp, "rb") as deposit_file: - service.set_file_content( + file_service.set_file_content( id_=recid, file_key=fn, identity=identity, stream=deposit_file ) - service.commit_file(id_=recid, file_key=fn, identity=identity) + file_service.commit_file(id_=recid, file_key=fn, identity=identity) click.secho(recid, fg="green") @@ -300,15 +322,28 @@ def add_files(filepaths, pid, pid_type, user): @option_as_user @with_appcontext def remove_files(filekeys, pid, pid_type, user): - """Remove the deposited files.""" + """Remove the given deposited files from the draft. + + Note that this operation does not remove the files from storage! + """ recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_draft_file_service() + service = get_record_service() + file_service = service.draft_files for file_key in filekeys: - service.delete_file(id_=recid, file_key=file_key, identity=identity) - click.secho(file_key, fg="red") - # TODO: add option for hard-deleting files + try: + file_service.delete_file(id_=recid, file_key=file_key, identity=identity) + click.secho(file_key, fg="red") + + except KeyError as err: + click.secho(f"error: {err}", fg="yellow", err=True) + + draft = service.read_draft(id_=recid, identity=identity)._record + if not draft.files.entries: + draft.files.enabled = False + draft.commit() + db.session.commit() @files.command("list") @@ -320,32 +355,35 @@ def list_files(pid, pid_type, user): """Show a list of files deposited with the draft.""" recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_draft_file_service() - file_results = service.list_files(id_=recid, identity=identity) - for f in file_results.entries: - ov = ObjectVersion.get(f["bucket_id"], f["key"], f["version_id"]) - fi = ov.file - click.secho("{}\t{}\t{}".format(ov.key, fi.uri, fi.checksum), fg="green") + service = get_record_service() + draft = service.read_draft(id_=recid, identity=identity)._record + + for name, rec_file in draft.files.entries.items(): + fi = rec_file.file + if fi: + click.secho("{}\t{}\t{}".format(name, fi.uri, fi.checksum), fg="green") + else: + click.secho(name, fg="red") @files.command("verify") @option_pid_value @option_pid_type -@option_as_user @with_appcontext -def verify_files(pid, pid_type, user): +def verify_files(pid, pid_type): """Verify the checksums for each of the draft's files.""" recid = convert_to_recid(pid, pid_type) - identity = get_identity_for_user(user) service = get_record_service() - service.require_permission(identity, "read_files") - draft = service.read_draft(id_=recid, identity=identity) - draft = draft._record if hasattr(draft, "_record") else draft + draft = service.read_draft(id_=recid, identity=system_identity)._record num_errors = 0 for name, rec_file in draft.files.entries.items(): - if rec_file.file.verify_checksum(): + if not rec_file.file: + click.secho(name, fg="red") + + elif rec_file.file.verify_checksum(): click.secho(name, fg="green") + else: click.secho("{}: failed checksum verification".format(name), fg="red") num_errors += 1 diff --git a/invenio_utilities_tuw/cli/files.py b/invenio_utilities_tuw/cli/files.py index 326e1d0..9dc6971 100644 --- a/invenio_utilities_tuw/cli/files.py +++ b/invenio_utilities_tuw/cli/files.py @@ -13,6 +13,7 @@ from collections import defaultdict import click from flask.cli import with_appcontext +from invenio_access.permissions import system_identity from invenio_db import db from invenio_files_rest.models import Location, ObjectVersion @@ -70,11 +71,10 @@ def deleted(): @deleted.command("list") -@option_as_user @option_pid_value @option_pid_type @with_appcontext -def list_deleted_files(user, pid, pid_type): +def list_deleted_files(pid, pid_type): """Hard-delete files that have already been soft-deleted. Optionally, this operation can be restricted to the bucket associated with a draft @@ -82,7 +82,7 @@ def list_deleted_files(user, pid, pid_type): """ recid = convert_to_recid(pid, pid_type) if pid else None service = get_record_service() - identity = get_identity_for_user(user) + identity = system_identity # if a PID was specified, limit the cleaning to this record's bucket marked_as_deleted = ObjectVersion.query.filter_by(file_id=None, is_head=True) @@ -90,9 +90,6 @@ def list_deleted_files(user, pid, pid_type): draft = service.read_draft(id_=recid, identity=identity)._record marked_as_deleted = marked_as_deleted.filter_by(bucket=draft.files.bucket) - # check if the specified user has permissions - service.require_permission(identity, "read_files") - # hard-delete all soft-deleted ObjectVersions file_instances = defaultdict(set) for dov in marked_as_deleted.all(): @@ -112,11 +109,10 @@ def list_deleted_files(user, pid, pid_type): @click.confirmation_option( prompt="are you sure you want to permanently remove soft-deleted files?" ) -@option_as_user @option_pid_value @option_pid_type @with_appcontext -def hard_delete_files(user, pid, pid_type): +def hard_delete_files(pid, pid_type): """Hard-delete files that have already been soft-deleted. Optionally, this operation can be restricted to the bucket associated with a draft @@ -124,7 +120,7 @@ def hard_delete_files(user, pid, pid_type): """ recid = convert_to_recid(pid, pid_type) if pid else None service = get_record_service() - identity = get_identity_for_user(user) + identity = system_identity # if a PID was specified, limit the cleaning to this record's bucket marked_as_deleted = ObjectVersion.query.filter_by(file_id=None, is_head=True) @@ -132,9 +128,6 @@ def hard_delete_files(user, pid, pid_type): draft = service.read_draft(id_=recid, identity=identity)._record marked_as_deleted = marked_as_deleted.filter_by(bucket=draft.files.bucket) - # check if the specified user has permissions - service.require_permission(identity, "delete") - # hard-delete all soft-deleted ObjectVersions file_instances = defaultdict(set) for dov in marked_as_deleted.all(): @@ -185,9 +178,8 @@ def list_orphan_files(): @click.confirmation_option( prompt="are you sure you want to permanently remove orphaned files?" ) -@option_as_user @with_appcontext -def clean_files(user): +def clean_files(): """List files from locations that aren't referenced in any DB models anymore.""" for loc in Location.query.all(): # we only know how to handle directories on the file system for now diff --git a/invenio_utilities_tuw/cli/records.py b/invenio_utilities_tuw/cli/records.py index b6a31fd..3adb015 100644 --- a/invenio_utilities_tuw/cli/records.py +++ b/invenio_utilities_tuw/cli/records.py @@ -14,9 +14,8 @@ import sys import click from flask.cli import with_appcontext from invenio_db import db -from invenio_files_rest.models import ObjectVersion -from ..utils import get_record_file_service, get_record_service +from ..utils import get_record_service from .options import ( option_as_user, option_owners, @@ -61,11 +60,17 @@ def list_records(user): for recid in recids: try: rec = service.read(id_=recid, identity=identity) - click.secho( - "{} - {}".format(rec.id, rec.data["metadata"]["title"]), fg="green" - ) + recid = rec.id + title = rec.data["metadata"].get("title", "-") + files = rec._record.files + if files.enabled: + num_files = f"{len(files.entries):02}" + else: + num_files = "no" + + click.secho(f"{recid}\t{num_files} files\t{title}", fg="green") except: - raise + pass @records.command("show") @@ -80,10 +85,12 @@ def show_record(pid, pid_type, user, pretty_print, raw): pid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) service = get_record_service() - record = service.read(id_=pid, identity=identity) indent = 2 if pretty_print else None + + record = service.read(id_=pid, identity=identity) data = record._record.model.json if raw else record.data data = json.dumps(data, indent=indent) + click.echo(data) @@ -128,17 +135,21 @@ def update_record(metadata_file, pid, pid_type, user, patch, owners, direct): if direct: record = service.read(id_=pid, identity=identity)._record record.update(metadata) + # the refresh is required because the access system field takes precedence # over the record's data in 'record.commit()' record.access.refresh_from_dict(record["access"]) record.commit() db.session.commit() service.indexer.index(record) + else: try: # first, try the modern approach of updating records (March 2021) + service.edit(id_=pid, identity=identity) service.update_draft(id_=pid, identity=identity, data=metadata) service.publish(id_=pid, identity=identity) + except Exception as e: # if that fails, try the good old plain update click.secho("error: {}".format(e), fg="yellow") @@ -146,10 +157,11 @@ def update_record(metadata_file, pid, pid_type, user, patch, owners, direct): service.update(id_=pid, identity=identity, data=metadata) if owners: - record = service.read(id_=pid, identity=identity) - actual_record = record._record if hasattr(record, "_record") else record + record = service.read(id_=pid, identity=identity)._record owners = [get_user_by_identifier(owner) for owner in owners] - set_record_owners(actual_record, owners) + set_record_owners(record, owners) + if service.indexer: + service.indexer.index(record) click.secho(pid, fg="green") @@ -185,12 +197,15 @@ def list_files(pid, pid_type, user): """Show a list of files deposited with the record.""" recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_file_service() - file_results = service.list_files(id_=recid, identity=identity) - for f in file_results.entries: - ov = ObjectVersion.get(f["bucket_id"], f["key"], f["version_id"]) - fi = ov.file - click.secho("{}\t{}\t{}".format(ov.key, fi.uri, fi.checksum), fg="green") + service = get_record_service() + record = service.read(id_=recid, identity=identity)._record + + for name, rec_file in record.files.entries.items(): + fi = rec_file.file + if fi: + click.secho("{}\t{}\t{}".format(name, fi.uri, fi.checksum), fg="green") + else: + click.secho(name, fg="red") @files.command("verify") @@ -202,15 +217,17 @@ def verify_files(pid, pid_type, user): """Verify the checksums for each of the record's files.""" recid = convert_to_recid(pid, pid_type) identity = get_identity_for_user(user) - service = get_record_file_service() - service.require_permission(identity, "read_files") - record = service.read(id_=recid, identity=identity) - record = record._record if hasattr(record, "_record") else record + service = get_record_service() + record = service.read(id_=recid, identity=identity)._record num_errors = 0 for name, rec_file in record.files.entries.items(): - if rec_file.file.verify_checksum(): + if not rec_file.file: + click.secho(name, fg="red") + + elif rec_file.file.verify_checksum(): click.secho(name, fg="green") + else: click.secho("{}: failed checksum verification".format(name), fg="red") num_errors += 1 @@ -228,16 +245,11 @@ def verify_files(pid, pid_type, user): @records.command("reindex") @option_pid_values @option_pid_type -@option_as_user @with_appcontext -def reindex_records(pids, pid_type, user): +def reindex_records(pids, pid_type): """Reindex all available (or just the specified) records.""" service = get_record_service() - # basically, this is just a check whether the user exists, - # since there's no permission for re-indexing - get_identity_for_user(user) - if pids: records = [ service.record_cls.get_record(get_object_uuid(pid, pid_type)) diff --git a/invenio_utilities_tuw/cli/utils.py b/invenio_utilities_tuw/cli/utils.py index f6b976e..4c6f882 100644 --- a/invenio_utilities_tuw/cli/utils.py +++ b/invenio_utilities_tuw/cli/utils.py @@ -49,18 +49,20 @@ def create_record_from_metadata( "PID '{}:{}' is already taken".format(vanity_pid_type, vanity_pid) ) - draft = service.create(identity=identity, data=metadata) - actual_draft = draft._record if hasattr(draft, "_record") else draft + draft = service.create(identity=identity, data=metadata)._record if vanity_pid: # service.update_draft() is called to update the IDs in the record's metadata # (via record.commit()), re-index the record, and commit the db session if service.indexer: - service.indexer.delete(actual_draft) + service.indexer.delete(draft) - actual_draft.pid.pid_value = vanity_pid + draft.pid.pid_value = vanity_pid db.session.commit() - draft = service.update_draft(vanity_pid, identity=identity, data=metadata) + + draft = service.update_draft( + vanity_pid, identity=identity, data=metadata + )._record return draft diff --git a/invenio_utilities_tuw/config.py b/invenio_utilities_tuw/config.py index f9dfaf4..e364c11 100644 --- a/invenio_utilities_tuw/config.py +++ b/invenio_utilities_tuw/config.py @@ -6,22 +6,9 @@ # modify it under the terms of the MIT License; see LICENSE file for more # details. -"""Some utilities for InvenioRDM.""" +"""Configuration for Invenio-Utilities-TUW.""" from invenio_rdm_records.proxies import current_rdm_records -UTILITIES_TUW_BASE_TEMPLATE = "invenio_utilities_tuw/base.html" -"""Default base template for the demo page.""" - UTILITIES_TUW_RECORD_SERVICE_FACTORY = lambda: current_rdm_records.records_service """Factory function for creating a RecordService.""" - -UTILITIES_TUW_RECORD_FILES_SERVICE_FACTORY = ( - lambda: current_rdm_records.record_files_service -) -"""Factory function for creating a RecordFileService.""" - -UTILITIES_TUW_DRAFT_FILES_SERVICE_FACTORY = ( - lambda: current_rdm_records.draft_files_service -) -"""Factory function for creating a DraftFileService.""" diff --git a/invenio_utilities_tuw/utils.py b/invenio_utilities_tuw/utils.py index 90a93d1..b8220e3 100644 --- a/invenio_utilities_tuw/utils.py +++ b/invenio_utilities_tuw/utils.py @@ -8,7 +8,6 @@ """Utility functions for Invenio-Utilities-TUW.""" - from flask import current_app from invenio_rdm_records.proxies import current_rdm_records from werkzeug.utils import import_string @@ -26,26 +25,11 @@ def get_or_import(value, default=None): def get_record_service(): """Get the configured RecordService.""" - factory = current_app.config.get( - "UTILITIES_TUW_RECORD_SERVICE_FACTORY", - lambda: current_rdm_records.records_service, - ) - return factory() - - -def get_record_file_service(): - """Get the configured RecordFileService.""" - factory = current_app.config.get( - "UTILITIES_TUW_RECORD_FILES_SERVICE_FACTORY", - lambda: current_rdm_records.record_files_service, - ) - return factory() + factory = current_app.config.get("UTILITIES_TUW_RECORD_SERVICE_FACTORY", None) + if factory is not None: + factory = get_or_import(factory) + else: + factory = lambda: current_rdm_records.records_service -def get_draft_file_service(): - """Get the configured DraftFilesService.""" - factory = current_app.config.get( - "UTILITIES_TUW_DRAFT_FILES_SERVICE_FACTORY", - lambda: current_rdm_records.draft_files_service, - ) return factory() -- GitLab