*** Wartungsfenster jeden ersten Mittwoch vormittag im Monat ***

Skip to content
Snippets Groups Projects
Commit a425f560 authored by Moser, Maximilian's avatar Moser, Maximilian
Browse files

Add commands related to listing of files

* add command for listing soft-deleted files
* add command for listing all files of a record
parent a2fd9550
No related branches found
No related tags found
No related merge requests found
......@@ -5,7 +5,8 @@ from collections import defaultdict
import click
from flask.cli import with_appcontext
from invenio_db import db
from invenio_files_rest.models import ObjectVersion, FileInstance
from invenio_files_rest.models import Bucket, ObjectVersion, FileInstance
from invenio_rdm_records.records.models import RecordMetadata, DraftMetadata
from invenio_rdm_records.services.services import (
BibliographicRecordService as RecordService,
)
......@@ -45,7 +46,52 @@ def files():
pass
@files.command("rm")
@files.group("deleted")
def deleted():
"""Management commands for soft-deleted files."""
pass
@deleted.command("list")
@option_as_user
@option_pid_value
@option_pid_type
@with_appcontext
def list_deleted_files(user, pid, pid_type):
"""Hard-delete files that have already been soft-deleted.
Optionally, this operation can be restricted to the bucket associated with a draft
(via its PID).
"""
recid = convert_to_recid(pid, pid_type) if pid else None
service = RecordService()
identity = get_identity_for_user(user)
# if a PID was specified, limit the cleaning to this record's bucket
marked_as_deleted = ObjectVersion.query.filter_by(file_id=None, is_head=True)
if recid is not None:
draft = service.read_draft(id_=recid, identity=identity)._record
marked_as_deleted = marked_as_deleted.filter_by(bucket=draft.files.bucket)
# check if the specified user has permissions
service.require_permission(identity, "read_files")
# hard-delete all soft-deleted ObjectVersions
file_instances = defaultdict(set)
for dov in marked_as_deleted.all():
for ov in ObjectVersion.get_versions(dov.bucket, dov.key).all():
if ov.file is not None:
file_instances[ov.key].add(ov.file)
# delete the associated FileInstances, and remove files from disk
for key in file_instances:
for fi in file_instances[key]:
click.secho("{}\t{}".format(key, fi.uri), fg="green")
db.session.commit()
@deleted.command("rm")
@click.confirmation_option(
prompt="are you sure you want to permanently remove soft-deleted files?"
)
......@@ -102,10 +148,19 @@ def orphans():
@orphans.command("list")
@with_appcontext
def list_orphan_files(dry_run):
def list_orphan_files():
"""List files that aren't referenced in any records (anymore)."""
for fi in (f for f in FileInstance.query.all() if not f.objects):
click.secho(fi.uri, fg="yellow")
# TODO iterate over all records & drafts, get their buckets
# and check which buckets from the db aren't listed
bucket_ids = set((r.bucket.id for r in (RecordMetadata.query.all() + DraftMetadata.query.all()) if r.bucket is not None))
print(len(bucket_ids))
buckets = Bucket.query.filter(~Bucket.id.in_(bucket_ids)).all()
print(len(buckets))
for bucket in buckets:
for ov in bucket.objects:
if ov.file is not None:
click.secho(ov.file.uri, fg="yellow")
@orphans.command("clean")
......
......@@ -4,10 +4,14 @@ import json
import click
from flask.cli import with_appcontext
from invenio_files_rest.models import ObjectVersion
from invenio_rdm_records.records.models import RecordMetadata
from invenio_rdm_records.services.services import (
BibliographicRecordService as RecordService,
)
from invenio_rdm_records.services.services import (
BibliographicRecordFilesService as RecordFileService,
)
from .utils import (
convert_to_recid,
......@@ -112,3 +116,20 @@ def delete_record(pid, pid_type, user):
service.delete(id_=recid, identity=identity)
click.secho(recid, fg="red")
@record.command("files")
@option_pid_value
@option_pid_type
@option_as_user
@with_appcontext
def list_files(pid, pid_type, user):
"""Show a list of files deposited with the record."""
recid = convert_to_recid(pid, pid_type)
identity = get_identity_for_user(user)
service = RecordFileService()
file_results = service.list_files(id_=recid, identity=identity)
for f in file_results.entries:
ov = ObjectVersion.get(f["bucket_id"], f["key"], f["version_id"])
fi = ov.file
click.secho("{}\t{}\t{}".format(ov.key, fi.uri, fi.checksum), fg="green")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment