diff --git a/invenio_config_tuw/tasks.py b/invenio_config_tuw/tasks.py index 1103149511ddfa87ae336c8f3beb4779820661be..36f5b1d7adccda726f7130bd58466ecdc8686167 100644 --- a/invenio_config_tuw/tasks.py +++ b/invenio_config_tuw/tasks.py @@ -10,9 +10,12 @@ from typing import Optional from celery import shared_task +from celery.schedules import crontab from flask import current_app, url_for from invenio_access.permissions import system_identity from invenio_accounts.proxies import current_datastore +from invenio_db import db +from invenio_files_rest.models import FileInstance from invenio_notifications.tasks import broadcast_notification from invenio_rdm_records.proxies import current_rdm_records_service as records_service @@ -78,3 +81,27 @@ def send_publication_notification(recid: str, user_id: Optional[str] = None): html_message=html_message, ) broadcast_notification(notification.dumps()) + + +@shared_task +def remove_dead_files(): + """Remove dead file instances (that don't have a URI) from the database. + + These files seem to be leftovers from failed uploads that don't get cleaned up + properly. + """ + dead_file_instances = FileInstance.query.filter(FileInstance.uri.is_(None)).all() + for fi in dead_file_instances: + db.session.delete(fi) + for o in fi.objects: + db.session.delete(o) + + db.session.commit() + + +CELERY_BEAT_SCHEDULE = { + "clean-dead-files": { + "task": "invenio_config_tuw.tasks.remove_dead_files", + "schedule": crontab(minute=1, hour=2), + }, +}