diff --git a/invenio_utilities_tuw/cli/files.py b/invenio_utilities_tuw/cli/files.py
index 2e7e5ca65aa384ee18c6331d76013f937655015a..9db24385ab6c3fc67e7862f18f8af895f2238247 100644
--- a/invenio_utilities_tuw/cli/files.py
+++ b/invenio_utilities_tuw/cli/files.py
@@ -5,7 +5,8 @@ from collections import defaultdict
import click
from flask.cli import with_appcontext
from invenio_db import db
-from invenio_files_rest.models import ObjectVersion, FileInstance
+from invenio_files_rest.models import Bucket, ObjectVersion, FileInstance
+from invenio_rdm_records.records.models import RecordMetadata, DraftMetadata
from invenio_rdm_records.services.services import (
BibliographicRecordService as RecordService,
)
@@ -45,7 +46,52 @@ def files():
pass
-@files.command("rm")
+@files.group("deleted")
+def deleted():
+ """Management commands for soft-deleted files."""
+ pass
+
+
+@deleted.command("list")
+@option_as_user
+@option_pid_value
+@option_pid_type
+@with_appcontext
+def list_deleted_files(user, pid, pid_type):
+ """Hard-delete files that have already been soft-deleted.
+
+ Optionally, this operation can be restricted to the bucket associated with a draft
+ (via its PID).
+ """
+ recid = convert_to_recid(pid, pid_type) if pid else None
+ service = RecordService()
+ identity = get_identity_for_user(user)
+
+ # if a PID was specified, limit the cleaning to this record's bucket
+ marked_as_deleted = ObjectVersion.query.filter_by(file_id=None, is_head=True)
+ if recid is not None:
+ draft = service.read_draft(id_=recid, identity=identity)._record
+ marked_as_deleted = marked_as_deleted.filter_by(bucket=draft.files.bucket)
+
+ # check if the specified user has permissions
+ service.require_permission(identity, "read_files")
+
+ # hard-delete all soft-deleted ObjectVersions
+ file_instances = defaultdict(set)
+ for dov in marked_as_deleted.all():
+ for ov in ObjectVersion.get_versions(dov.bucket, dov.key).all():
+ if ov.file is not None:
+ file_instances[ov.key].add(ov.file)
+
+ # delete the associated FileInstances, and remove files from disk
+ for key in file_instances:
+ for fi in file_instances[key]:
+ click.secho("{}\t{}".format(key, fi.uri), fg="green")
+
+ db.session.commit()
+
+
+@deleted.command("rm")
@click.confirmation_option(
prompt="are you sure you want to permanently remove soft-deleted files?"
)
@@ -102,10 +148,19 @@ def orphans():
@orphans.command("list")
@with_appcontext
-def list_orphan_files(dry_run):
+def list_orphan_files():
"""List files that aren't referenced in any records (anymore)."""
- for fi in (f for f in FileInstance.query.all() if not f.objects):
- click.secho(fi.uri, fg="yellow")
+ # TODO iterate over all records & drafts, get their buckets
+ # and check which buckets from the db aren't listed
+ bucket_ids = set((r.bucket.id for r in (RecordMetadata.query.all() + DraftMetadata.query.all()) if r.bucket is not None))
+ print(len(bucket_ids))
+ buckets = Bucket.query.filter(~Bucket.id.in_(bucket_ids)).all()
+ print(len(buckets))
+
+ for bucket in buckets:
+ for ov in bucket.objects:
+ if ov.file is not None:
+ click.secho(ov.file.uri, fg="yellow")
@orphans.command("clean")
diff --git a/invenio_utilities_tuw/cli/record.py b/invenio_utilities_tuw/cli/record.py
index df1c7fa865cb1266084a13546711740c06ef517f..f4aebddeb19dc13d44086388d0a2a6c0af752f80 100644
--- a/invenio_utilities_tuw/cli/record.py
+++ b/invenio_utilities_tuw/cli/record.py
@@ -4,10 +4,14 @@ import json
import click
from flask.cli import with_appcontext
+from invenio_files_rest.models import ObjectVersion
from invenio_rdm_records.records.models import RecordMetadata
from invenio_rdm_records.services.services import (
BibliographicRecordService as RecordService,
)
+from invenio_rdm_records.services.services import (
+ BibliographicRecordFilesService as RecordFileService,
+)
from .utils import (
convert_to_recid,
@@ -112,3 +116,20 @@ def delete_record(pid, pid_type, user):
service.delete(id_=recid, identity=identity)
click.secho(recid, fg="red")
+
+
+@record.command("files")
+@option_pid_value
+@option_pid_type
+@option_as_user
+@with_appcontext
+def list_files(pid, pid_type, user):
+ """Show a list of files deposited with the record."""
+ recid = convert_to_recid(pid, pid_type)
+ identity = get_identity_for_user(user)
+ service = RecordFileService()
+ file_results = service.list_files(id_=recid, identity=identity)
+ for f in file_results.entries:
+ ov = ObjectVersion.get(f["bucket_id"], f["key"], f["version_id"])
+ fi = ov.file
+ click.secho("{}\t{}\t{}".format(ov.key, fi.uri, fi.checksum), fg="green")