From 7ad96bae8f17b03ea8965fcf9fb0f9fa2ec21bc6 Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Mon, 10 Feb 2025 18:18:23 +0100 Subject: [PATCH] Add background task for cleaning up dead files * sometimes the cleanup operations for failed file uploads seem to leave over some `FileInstance` entries in the database that have a null URI * these file instances cause the periodic health checks to fail and trigger warning emails --- invenio_config_tuw/tasks.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/invenio_config_tuw/tasks.py b/invenio_config_tuw/tasks.py index 1103149..36f5b1d 100644 --- a/invenio_config_tuw/tasks.py +++ b/invenio_config_tuw/tasks.py @@ -10,9 +10,12 @@ from typing import Optional from celery import shared_task +from celery.schedules import crontab from flask import current_app, url_for from invenio_access.permissions import system_identity from invenio_accounts.proxies import current_datastore +from invenio_db import db +from invenio_files_rest.models import FileInstance from invenio_notifications.tasks import broadcast_notification from invenio_rdm_records.proxies import current_rdm_records_service as records_service @@ -78,3 +81,27 @@ def send_publication_notification(recid: str, user_id: Optional[str] = None): html_message=html_message, ) broadcast_notification(notification.dumps()) + + +@shared_task +def remove_dead_files(): + """Remove dead file instances (that don't have a URI) from the database. + + These files seem to be leftovers from failed uploads that don't get cleaned up + properly. + """ + dead_file_instances = FileInstance.query.filter(FileInstance.uri.is_(None)).all() + for fi in dead_file_instances: + db.session.delete(fi) + for o in fi.objects: + db.session.delete(o) + + db.session.commit() + + +CELERY_BEAT_SCHEDULE = { + "clean-dead-files": { + "task": "invenio_config_tuw.tasks.remove_dead_files", + "schedule": crontab(minute=1, hour=2), + }, +} -- GitLab