From a425f560165b4e326737511be7a37a122c54e543 Mon Sep 17 00:00:00 2001
From: Maximilian Moser <maximilian.moser@tuwien.ac.at>
Date: Mon, 14 Dec 2020 09:38:06 +0100
Subject: [PATCH] Add commands related to listing of files
* add command for listing soft-deleted files
* add command for listing all files of a record
---
invenio_utilities_tuw/cli/files.py | 65 ++++++++++++++++++++++++++---
invenio_utilities_tuw/cli/record.py | 21 ++++++++++
2 files changed, 81 insertions(+), 5 deletions(-)
diff --git a/invenio_utilities_tuw/cli/files.py b/invenio_utilities_tuw/cli/files.py
index 2e7e5ca..9db2438 100644
--- a/invenio_utilities_tuw/cli/files.py
+++ b/invenio_utilities_tuw/cli/files.py
@@ -5,7 +5,8 @@ from collections import defaultdict
import click
from flask.cli import with_appcontext
from invenio_db import db
-from invenio_files_rest.models import ObjectVersion, FileInstance
+from invenio_files_rest.models import Bucket, ObjectVersion, FileInstance
+from invenio_rdm_records.records.models import RecordMetadata, DraftMetadata
from invenio_rdm_records.services.services import (
BibliographicRecordService as RecordService,
)
@@ -45,7 +46,52 @@ def files():
pass
-@files.command("rm")
+@files.group("deleted")
+def deleted():
+ """Management commands for soft-deleted files."""
+ pass
+
+
+@deleted.command("list")
+@option_as_user
+@option_pid_value
+@option_pid_type
+@with_appcontext
+def list_deleted_files(user, pid, pid_type):
+ """Hard-delete files that have already been soft-deleted.
+
+ Optionally, this operation can be restricted to the bucket associated with a draft
+ (via its PID).
+ """
+ recid = convert_to_recid(pid, pid_type) if pid else None
+ service = RecordService()
+ identity = get_identity_for_user(user)
+
+ # if a PID was specified, limit the cleaning to this record's bucket
+ marked_as_deleted = ObjectVersion.query.filter_by(file_id=None, is_head=True)
+ if recid is not None:
+ draft = service.read_draft(id_=recid, identity=identity)._record
+ marked_as_deleted = marked_as_deleted.filter_by(bucket=draft.files.bucket)
+
+ # check if the specified user has permissions
+ service.require_permission(identity, "read_files")
+
+ # hard-delete all soft-deleted ObjectVersions
+ file_instances = defaultdict(set)
+ for dov in marked_as_deleted.all():
+ for ov in ObjectVersion.get_versions(dov.bucket, dov.key).all():
+ if ov.file is not None:
+ file_instances[ov.key].add(ov.file)
+
+ # delete the associated FileInstances, and remove files from disk
+ for key in file_instances:
+ for fi in file_instances[key]:
+ click.secho("{}\t{}".format(key, fi.uri), fg="green")
+
+ db.session.commit()
+
+
+@deleted.command("rm")
@click.confirmation_option(
prompt="are you sure you want to permanently remove soft-deleted files?"
)
@@ -102,10 +148,19 @@ def orphans():
@orphans.command("list")
@with_appcontext
-def list_orphan_files(dry_run):
+def list_orphan_files():
"""List files that aren't referenced in any records (anymore)."""
- for fi in (f for f in FileInstance.query.all() if not f.objects):
- click.secho(fi.uri, fg="yellow")
+ # TODO iterate over all records & drafts, get their buckets
+ # and check which buckets from the db aren't listed
+ bucket_ids = set((r.bucket.id for r in (RecordMetadata.query.all() + DraftMetadata.query.all()) if r.bucket is not None))
+ print(len(bucket_ids))
+ buckets = Bucket.query.filter(~Bucket.id.in_(bucket_ids)).all()
+ print(len(buckets))
+
+ for bucket in buckets:
+ for ov in bucket.objects:
+ if ov.file is not None:
+ click.secho(ov.file.uri, fg="yellow")
@orphans.command("clean")
diff --git a/invenio_utilities_tuw/cli/record.py b/invenio_utilities_tuw/cli/record.py
index df1c7fa..f4aebdd 100644
--- a/invenio_utilities_tuw/cli/record.py
+++ b/invenio_utilities_tuw/cli/record.py
@@ -4,10 +4,14 @@ import json
import click
from flask.cli import with_appcontext
+from invenio_files_rest.models import ObjectVersion
from invenio_rdm_records.records.models import RecordMetadata
from invenio_rdm_records.services.services import (
BibliographicRecordService as RecordService,
)
+from invenio_rdm_records.services.services import (
+ BibliographicRecordFilesService as RecordFileService,
+)
from .utils import (
convert_to_recid,
@@ -112,3 +116,20 @@ def delete_record(pid, pid_type, user):
service.delete(id_=recid, identity=identity)
click.secho(recid, fg="red")
+
+
+@record.command("files")
+@option_pid_value
+@option_pid_type
+@option_as_user
+@with_appcontext
+def list_files(pid, pid_type, user):
+ """Show a list of files deposited with the record."""
+ recid = convert_to_recid(pid, pid_type)
+ identity = get_identity_for_user(user)
+ service = RecordFileService()
+ file_results = service.list_files(id_=recid, identity=identity)
+ for f in file_results.entries:
+ ov = ObjectVersion.get(f["bucket_id"], f["key"], f["version_id"])
+ fi = ov.file
+ click.secho("{}\t{}\t{}".format(ov.key, fi.uri, fi.checksum), fg="green")
--
GitLab