From 11edd46344c4c527e6e52ff9b3b2bf87b65b1cb0 Mon Sep 17 00:00:00 2001
From: Maximilian Moser <maximilian.moser@tuwien.ac.at>
Date: Mon, 12 Jul 2021 17:53:50 +0200
Subject: [PATCH] Modernize CLI commands

* incorporate changes made to the service structure (all services
  attached to the record service)
* remove references to files-rest API where alternatives are available
* remove permission checks and as-user CLI option from some methods
---
 invenio_utilities_tuw/cli/drafts.py  | 114 ++++++++++++++++++---------
 invenio_utilities_tuw/cli/files.py   |  20 ++---
 invenio_utilities_tuw/cli/records.py |  66 +++++++++-------
 invenio_utilities_tuw/cli/utils.py   |  12 +--
 invenio_utilities_tuw/config.py      |  15 +---
 invenio_utilities_tuw/utils.py       |  26 ++----
 6 files changed, 134 insertions(+), 119 deletions(-)

diff --git a/invenio_utilities_tuw/cli/drafts.py b/invenio_utilities_tuw/cli/drafts.py
index 75e6b85..bc93ea3 100644
--- a/invenio_utilities_tuw/cli/drafts.py
+++ b/invenio_utilities_tuw/cli/drafts.py
@@ -15,10 +15,10 @@ from os.path import basename, isdir, isfile, join
 
 import click
 from flask.cli import with_appcontext
+from invenio_access.permissions import system_identity
 from invenio_db import db
-from invenio_files_rest.models import ObjectVersion
 
-from ..utils import get_draft_file_service, get_record_service
+from ..utils import get_record_service
 from .options import (
     option_as_user,
     option_owners,
@@ -62,9 +62,15 @@ def list_drafts(user):
     for recid in recids:
         try:
             draft = service.read_draft(id_=recid, identity=identity)
-            click.secho(
-                "{} - {}".format(draft.id, draft.data["metadata"]["title"]), fg="green"
-            )
+            recid = draft.id
+            title = draft.data["metadata"].get("title", "-")
+            files = draft._record.files
+            if files.enabled:
+                num_files = f"{len(files.entries):02}"
+            else:
+                num_files = "no"
+
+            click.secho(f"{recid}\t{num_files} files\t{title}", fg="green")
         except:
             pass
 
@@ -94,6 +100,8 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid):
     draft, if such a subdirectory exists.
     """
     recid = None
+    service = get_record_service()
+    file_service = service.draft_files
     identity = get_identity_for_user(user)
 
     if isfile(metadata_path):
@@ -101,6 +109,8 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid):
         metadata = set_creatibutor_names(metadata)
         draft = create_record_from_metadata(metadata, identity, vanity_pid=vanity_pid)
         recid = draft["id"]
+        draft.files.enabled = False
+        draft.commit()
 
     elif isdir(metadata_path):
         metadata_file_path = join(metadata_path, "metadata.json")
@@ -112,6 +122,8 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid):
         metadata = set_creatibutor_names(metadata)
         draft = create_record_from_metadata(metadata, identity)
         recid = draft["id"]
+        draft.files.enabled = True
+        draft.commit()
 
         file_names = []
         if isdir(deposit_files_path):
@@ -124,31 +136,32 @@ def create_draft(metadata_path, publish, user, owners, vanity_pid):
                 msg = "ignored in '{}': {}".format(deposit_files_path, ignored)
                 click.secho(msg, fg="red", err=True)
 
-        service = get_draft_file_service()
-        service.init_files(
+        file_service.init_files(
             id_=recid, identity=identity, data=[{"key": fn} for fn in file_names]
         )
         for fn in file_names:
             file_path = join(deposit_files_path, fn)
             with open(file_path, "rb") as deposit_file:
-                service.set_file_content(
+                file_service.set_file_content(
                     id_=recid, file_key=fn, identity=identity, stream=deposit_file
                 )
 
-            service.commit_file(id_=recid, file_key=fn, identity=identity)
+            file_service.commit_file(id_=recid, file_key=fn, identity=identity)
 
     else:
         raise Exception("neither a file nor a directory: %s" % metadata_path)
 
     if owners:
-        actual_draft = draft._record if hasattr(draft, "_record") else draft
         owners = [get_user_by_identifier(owner) for owner in owners]
-        set_record_owners(actual_draft, owners)
+        set_record_owners(draft, owners)
+        if service.indexer:
+            service.indexer.index(draft)
 
     if publish:
-        service = get_record_service()
         service.publish(id_=recid, identity=identity)
 
+    # commit, as there are code paths that don't have a commit otherwise
+    db.session.commit()
     click.secho(recid, fg="green")
 
 
@@ -164,10 +177,12 @@ def show_draft(pid, pid_type, user, pretty_print, raw):
     pid = convert_to_recid(pid, pid_type)
     identity = get_identity_for_user(user)
     service = get_record_service()
-    draft = service.read_draft(id_=pid, identity=identity)
     indent = 2 if pretty_print else None
+
+    draft = service.read_draft(id_=pid, identity=identity)
     data = draft._record.model.json if raw else draft.data
     data = json.dumps(data, indent=indent)
+
     click.echo(data)
 
 
@@ -199,10 +214,9 @@ def update_draft(metadata_file, pid, pid_type, user, patch, owners):
         metadata = patch_metadata(draft_data, metadata)
 
     if owners:
-        draft = service.read_draft(id_=pid, identity=identity)
-        actual_draft = draft._record if hasattr(draft, "_record") else draft
+        draft = service.read_draft(id_=pid, identity=identity)._record
         owners = [get_user_by_identifier(owner) for owner in owners]
-        set_record_owners(actual_draft, owners)
+        set_record_owners(draft, owners)
 
     metadata = set_creatibutor_names(metadata)
     service.update_draft(id_=pid, identity=identity, data=metadata)
@@ -220,6 +234,7 @@ def publish_draft(pid, pid_type, user):
     identity = get_identity_for_user(user)
     service = get_record_service()
     service.publish(id_=pid, identity=identity)
+
     click.secho(pid, fg="green")
 
 
@@ -234,6 +249,7 @@ def delete_draft(pid, pid_type, user):
     identity = get_identity_for_user(user)
     service = get_record_service()
     service.delete_draft(id_=pid, identity=identity)
+
     click.secho(pid, fg="red")
 
 
@@ -253,7 +269,13 @@ def add_files(filepaths, pid, pid_type, user):
     """Add the specified files to the draft."""
     recid = convert_to_recid(pid, pid_type)
     identity = get_identity_for_user(user)
-    service = get_draft_file_service()
+    service = get_record_service()
+    file_service = service.draft_files
+    draft = service.read_draft(id_=recid, identity=identity)._record
+
+    if not draft.files.enabled:
+        draft.files.enabled = True
+        draft.commit()
 
     paths = []
     for file_path in filepaths:
@@ -279,16 +301,16 @@ def add_files(filepaths, pid, pid_type, user):
         click.secho("aborting: duplicates in file names detected", fg="red", err=True)
         sys.exit(1)
 
-    service.init_files(
+    file_service.init_files(
         id_=recid, identity=identity, data=[{"key": basename(fp)} for fp in paths]
     )
     for fp in paths:
         fn = basename(fp)
         with open(fp, "rb") as deposit_file:
-            service.set_file_content(
+            file_service.set_file_content(
                 id_=recid, file_key=fn, identity=identity, stream=deposit_file
             )
-        service.commit_file(id_=recid, file_key=fn, identity=identity)
+        file_service.commit_file(id_=recid, file_key=fn, identity=identity)
 
     click.secho(recid, fg="green")
 
@@ -300,15 +322,28 @@ def add_files(filepaths, pid, pid_type, user):
 @option_as_user
 @with_appcontext
 def remove_files(filekeys, pid, pid_type, user):
-    """Remove the deposited files."""
+    """Remove the given deposited files from the draft.
+
+    Note that this operation does not remove the files from storage!
+    """
     recid = convert_to_recid(pid, pid_type)
     identity = get_identity_for_user(user)
-    service = get_draft_file_service()
+    service = get_record_service()
+    file_service = service.draft_files
 
     for file_key in filekeys:
-        service.delete_file(id_=recid, file_key=file_key, identity=identity)
-        click.secho(file_key, fg="red")
-        # TODO: add option for hard-deleting files
+        try:
+            file_service.delete_file(id_=recid, file_key=file_key, identity=identity)
+            click.secho(file_key, fg="red")
+
+        except KeyError as err:
+            click.secho(f"error: {err}", fg="yellow", err=True)
+
+    draft = service.read_draft(id_=recid, identity=identity)._record
+    if not draft.files.entries:
+        draft.files.enabled = False
+        draft.commit()
+        db.session.commit()
 
 
 @files.command("list")
@@ -320,32 +355,35 @@ def list_files(pid, pid_type, user):
     """Show a list of files deposited with the draft."""
     recid = convert_to_recid(pid, pid_type)
     identity = get_identity_for_user(user)
-    service = get_draft_file_service()
-    file_results = service.list_files(id_=recid, identity=identity)
-    for f in file_results.entries:
-        ov = ObjectVersion.get(f["bucket_id"], f["key"], f["version_id"])
-        fi = ov.file
-        click.secho("{}\t{}\t{}".format(ov.key, fi.uri, fi.checksum), fg="green")
+    service = get_record_service()
+    draft = service.read_draft(id_=recid, identity=identity)._record
+
+    for name, rec_file in draft.files.entries.items():
+        fi = rec_file.file
+        if fi:
+            click.secho("{}\t{}\t{}".format(name, fi.uri, fi.checksum), fg="green")
+        else:
+            click.secho(name, fg="red")
 
 
 @files.command("verify")
 @option_pid_value
 @option_pid_type
-@option_as_user
 @with_appcontext
-def verify_files(pid, pid_type, user):
+def verify_files(pid, pid_type):
     """Verify the checksums for each of the draft's files."""
     recid = convert_to_recid(pid, pid_type)
-    identity = get_identity_for_user(user)
     service = get_record_service()
-    service.require_permission(identity, "read_files")
-    draft = service.read_draft(id_=recid, identity=identity)
-    draft = draft._record if hasattr(draft, "_record") else draft
+    draft = service.read_draft(id_=recid, identity=system_identity)._record
     num_errors = 0
 
     for name, rec_file in draft.files.entries.items():
-        if rec_file.file.verify_checksum():
+        if not rec_file.file:
+            click.secho(name, fg="red")
+
+        elif rec_file.file.verify_checksum():
             click.secho(name, fg="green")
+
         else:
             click.secho("{}: failed checksum verification".format(name), fg="red")
             num_errors += 1
diff --git a/invenio_utilities_tuw/cli/files.py b/invenio_utilities_tuw/cli/files.py
index 326e1d0..9dc6971 100644
--- a/invenio_utilities_tuw/cli/files.py
+++ b/invenio_utilities_tuw/cli/files.py
@@ -13,6 +13,7 @@ from collections import defaultdict
 
 import click
 from flask.cli import with_appcontext
+from invenio_access.permissions import system_identity
 from invenio_db import db
 from invenio_files_rest.models import Location, ObjectVersion
 
@@ -70,11 +71,10 @@ def deleted():
 
 
 @deleted.command("list")
-@option_as_user
 @option_pid_value
 @option_pid_type
 @with_appcontext
-def list_deleted_files(user, pid, pid_type):
+def list_deleted_files(pid, pid_type):
     """Hard-delete files that have already been soft-deleted.
 
     Optionally, this operation can be restricted to the bucket associated with a draft
@@ -82,7 +82,7 @@ def list_deleted_files(user, pid, pid_type):
     """
     recid = convert_to_recid(pid, pid_type) if pid else None
     service = get_record_service()
-    identity = get_identity_for_user(user)
+    identity = system_identity
 
     # if a PID was specified, limit the cleaning to this record's bucket
     marked_as_deleted = ObjectVersion.query.filter_by(file_id=None, is_head=True)
@@ -90,9 +90,6 @@ def list_deleted_files(user, pid, pid_type):
         draft = service.read_draft(id_=recid, identity=identity)._record
         marked_as_deleted = marked_as_deleted.filter_by(bucket=draft.files.bucket)
 
-    # check if the specified user has permissions
-    service.require_permission(identity, "read_files")
-
     # hard-delete all soft-deleted ObjectVersions
     file_instances = defaultdict(set)
     for dov in marked_as_deleted.all():
@@ -112,11 +109,10 @@ def list_deleted_files(user, pid, pid_type):
 @click.confirmation_option(
     prompt="are you sure you want to permanently remove soft-deleted files?"
 )
-@option_as_user
 @option_pid_value
 @option_pid_type
 @with_appcontext
-def hard_delete_files(user, pid, pid_type):
+def hard_delete_files(pid, pid_type):
     """Hard-delete files that have already been soft-deleted.
 
     Optionally, this operation can be restricted to the bucket associated with a draft
@@ -124,7 +120,7 @@ def hard_delete_files(user, pid, pid_type):
     """
     recid = convert_to_recid(pid, pid_type) if pid else None
     service = get_record_service()
-    identity = get_identity_for_user(user)
+    identity = system_identity
 
     # if a PID was specified, limit the cleaning to this record's bucket
     marked_as_deleted = ObjectVersion.query.filter_by(file_id=None, is_head=True)
@@ -132,9 +128,6 @@ def hard_delete_files(user, pid, pid_type):
         draft = service.read_draft(id_=recid, identity=identity)._record
         marked_as_deleted = marked_as_deleted.filter_by(bucket=draft.files.bucket)
 
-    # check if the specified user has permissions
-    service.require_permission(identity, "delete")
-
     # hard-delete all soft-deleted ObjectVersions
     file_instances = defaultdict(set)
     for dov in marked_as_deleted.all():
@@ -185,9 +178,8 @@ def list_orphan_files():
 @click.confirmation_option(
     prompt="are you sure you want to permanently remove orphaned files?"
 )
-@option_as_user
 @with_appcontext
-def clean_files(user):
+def clean_files():
     """List files from locations that aren't referenced in any DB models anymore."""
     for loc in Location.query.all():
         # we only know how to handle directories on the file system for now
diff --git a/invenio_utilities_tuw/cli/records.py b/invenio_utilities_tuw/cli/records.py
index b6a31fd..3adb015 100644
--- a/invenio_utilities_tuw/cli/records.py
+++ b/invenio_utilities_tuw/cli/records.py
@@ -14,9 +14,8 @@ import sys
 import click
 from flask.cli import with_appcontext
 from invenio_db import db
-from invenio_files_rest.models import ObjectVersion
 
-from ..utils import get_record_file_service, get_record_service
+from ..utils import get_record_service
 from .options import (
     option_as_user,
     option_owners,
@@ -61,11 +60,17 @@ def list_records(user):
     for recid in recids:
         try:
             rec = service.read(id_=recid, identity=identity)
-            click.secho(
-                "{} - {}".format(rec.id, rec.data["metadata"]["title"]), fg="green"
-            )
+            recid = rec.id
+            title = rec.data["metadata"].get("title", "-")
+            files = rec._record.files
+            if files.enabled:
+                num_files = f"{len(files.entries):02}"
+            else:
+                num_files = "no"
+
+            click.secho(f"{recid}\t{num_files} files\t{title}", fg="green")
         except:
-            raise
+            pass
 
 
 @records.command("show")
@@ -80,10 +85,12 @@ def show_record(pid, pid_type, user, pretty_print, raw):
     pid = convert_to_recid(pid, pid_type)
     identity = get_identity_for_user(user)
     service = get_record_service()
-    record = service.read(id_=pid, identity=identity)
     indent = 2 if pretty_print else None
+
+    record = service.read(id_=pid, identity=identity)
     data = record._record.model.json if raw else record.data
     data = json.dumps(data, indent=indent)
+
     click.echo(data)
 
 
@@ -128,17 +135,21 @@ def update_record(metadata_file, pid, pid_type, user, patch, owners, direct):
     if direct:
         record = service.read(id_=pid, identity=identity)._record
         record.update(metadata)
+
         # the refresh is required because the access system field takes precedence
         # over the record's data in 'record.commit()'
         record.access.refresh_from_dict(record["access"])
         record.commit()
         db.session.commit()
         service.indexer.index(record)
+
     else:
         try:
             # first, try the modern approach of updating records (March 2021)
+            service.edit(id_=pid, identity=identity)
             service.update_draft(id_=pid, identity=identity, data=metadata)
             service.publish(id_=pid, identity=identity)
+
         except Exception as e:
             # if that fails, try the good old plain update
             click.secho("error: {}".format(e), fg="yellow")
@@ -146,10 +157,11 @@ def update_record(metadata_file, pid, pid_type, user, patch, owners, direct):
             service.update(id_=pid, identity=identity, data=metadata)
 
     if owners:
-        record = service.read(id_=pid, identity=identity)
-        actual_record = record._record if hasattr(record, "_record") else record
+        record = service.read(id_=pid, identity=identity)._record
         owners = [get_user_by_identifier(owner) for owner in owners]
-        set_record_owners(actual_record, owners)
+        set_record_owners(record, owners)
+        if service.indexer:
+            service.indexer.index(record)
 
     click.secho(pid, fg="green")
 
@@ -185,12 +197,15 @@ def list_files(pid, pid_type, user):
     """Show a list of files deposited with the record."""
     recid = convert_to_recid(pid, pid_type)
     identity = get_identity_for_user(user)
-    service = get_record_file_service()
-    file_results = service.list_files(id_=recid, identity=identity)
-    for f in file_results.entries:
-        ov = ObjectVersion.get(f["bucket_id"], f["key"], f["version_id"])
-        fi = ov.file
-        click.secho("{}\t{}\t{}".format(ov.key, fi.uri, fi.checksum), fg="green")
+    service = get_record_service()
+    record = service.read(id_=recid, identity=identity)._record
+
+    for name, rec_file in record.files.entries.items():
+        fi = rec_file.file
+        if fi:
+            click.secho("{}\t{}\t{}".format(name, fi.uri, fi.checksum), fg="green")
+        else:
+            click.secho(name, fg="red")
 
 
 @files.command("verify")
@@ -202,15 +217,17 @@ def verify_files(pid, pid_type, user):
     """Verify the checksums for each of the record's files."""
     recid = convert_to_recid(pid, pid_type)
     identity = get_identity_for_user(user)
-    service = get_record_file_service()
-    service.require_permission(identity, "read_files")
-    record = service.read(id_=recid, identity=identity)
-    record = record._record if hasattr(record, "_record") else record
+    service = get_record_service()
+    record = service.read(id_=recid, identity=identity)._record
     num_errors = 0
 
     for name, rec_file in record.files.entries.items():
-        if rec_file.file.verify_checksum():
+        if not rec_file.file:
+            click.secho(name, fg="red")
+
+        elif rec_file.file.verify_checksum():
             click.secho(name, fg="green")
+
         else:
             click.secho("{}: failed checksum verification".format(name), fg="red")
             num_errors += 1
@@ -228,16 +245,11 @@ def verify_files(pid, pid_type, user):
 @records.command("reindex")
 @option_pid_values
 @option_pid_type
-@option_as_user
 @with_appcontext
-def reindex_records(pids, pid_type, user):
+def reindex_records(pids, pid_type):
     """Reindex all available (or just the specified) records."""
     service = get_record_service()
 
-    # basically, this is just a check whether the user exists,
-    # since there's no permission for re-indexing
-    get_identity_for_user(user)
-
     if pids:
         records = [
             service.record_cls.get_record(get_object_uuid(pid, pid_type))
diff --git a/invenio_utilities_tuw/cli/utils.py b/invenio_utilities_tuw/cli/utils.py
index f6b976e..4c6f882 100644
--- a/invenio_utilities_tuw/cli/utils.py
+++ b/invenio_utilities_tuw/cli/utils.py
@@ -49,18 +49,20 @@ def create_record_from_metadata(
                 "PID '{}:{}' is already taken".format(vanity_pid_type, vanity_pid)
             )
 
-    draft = service.create(identity=identity, data=metadata)
-    actual_draft = draft._record if hasattr(draft, "_record") else draft
+    draft = service.create(identity=identity, data=metadata)._record
 
     if vanity_pid:
         # service.update_draft() is called to update the IDs in the record's metadata
         # (via record.commit()), re-index the record, and commit the db session
         if service.indexer:
-            service.indexer.delete(actual_draft)
+            service.indexer.delete(draft)
 
-        actual_draft.pid.pid_value = vanity_pid
+        draft.pid.pid_value = vanity_pid
         db.session.commit()
-        draft = service.update_draft(vanity_pid, identity=identity, data=metadata)
+
+        draft = service.update_draft(
+            vanity_pid, identity=identity, data=metadata
+        )._record
 
     return draft
 
diff --git a/invenio_utilities_tuw/config.py b/invenio_utilities_tuw/config.py
index f9dfaf4..e364c11 100644
--- a/invenio_utilities_tuw/config.py
+++ b/invenio_utilities_tuw/config.py
@@ -6,22 +6,9 @@
 # modify it under the terms of the MIT License; see LICENSE file for more
 # details.
 
-"""Some utilities for InvenioRDM."""
+"""Configuration for Invenio-Utilities-TUW."""
 
 from invenio_rdm_records.proxies import current_rdm_records
 
-UTILITIES_TUW_BASE_TEMPLATE = "invenio_utilities_tuw/base.html"
-"""Default base template for the demo page."""
-
 UTILITIES_TUW_RECORD_SERVICE_FACTORY = lambda: current_rdm_records.records_service
 """Factory function for creating a RecordService."""
-
-UTILITIES_TUW_RECORD_FILES_SERVICE_FACTORY = (
-    lambda: current_rdm_records.record_files_service
-)
-"""Factory function for creating a RecordFileService."""
-
-UTILITIES_TUW_DRAFT_FILES_SERVICE_FACTORY = (
-    lambda: current_rdm_records.draft_files_service
-)
-"""Factory function for creating a DraftFileService."""
diff --git a/invenio_utilities_tuw/utils.py b/invenio_utilities_tuw/utils.py
index 90a93d1..b8220e3 100644
--- a/invenio_utilities_tuw/utils.py
+++ b/invenio_utilities_tuw/utils.py
@@ -8,7 +8,6 @@
 
 """Utility functions for Invenio-Utilities-TUW."""
 
-
 from flask import current_app
 from invenio_rdm_records.proxies import current_rdm_records
 from werkzeug.utils import import_string
@@ -26,26 +25,11 @@ def get_or_import(value, default=None):
 
 def get_record_service():
     """Get the configured RecordService."""
-    factory = current_app.config.get(
-        "UTILITIES_TUW_RECORD_SERVICE_FACTORY",
-        lambda: current_rdm_records.records_service,
-    )
-    return factory()
-
-
-def get_record_file_service():
-    """Get the configured RecordFileService."""
-    factory = current_app.config.get(
-        "UTILITIES_TUW_RECORD_FILES_SERVICE_FACTORY",
-        lambda: current_rdm_records.record_files_service,
-    )
-    return factory()
+    factory = current_app.config.get("UTILITIES_TUW_RECORD_SERVICE_FACTORY", None)
 
+    if factory is not None:
+        factory = get_or_import(factory)
+    else:
+        factory = lambda: current_rdm_records.records_service
 
-def get_draft_file_service():
-    """Get the configured DraftFilesService."""
-    factory = current_app.config.get(
-        "UTILITIES_TUW_DRAFT_FILES_SERVICE_FACTORY",
-        lambda: current_rdm_records.draft_files_service,
-    )
     return factory()
-- 
GitLab