diff --git a/invenio_utilities_tuw/cli/cli.py b/invenio_utilities_tuw/cli/cli.py index 162395a5f88ac3e2bbe613ad8aa1ec6a63670860..3d5a1a46a4f9c21376c858bc8053609e965487be 100644 --- a/invenio_utilities_tuw/cli/cli.py +++ b/invenio_utilities_tuw/cli/cli.py @@ -13,6 +13,7 @@ import click from .drafts import drafts from .files import files from .records import records +from .reports import reports from .search import search from .users import users @@ -25,5 +26,6 @@ def utilities(): utilities.add_command(drafts) utilities.add_command(files) utilities.add_command(records) +utilities.add_command(reports) utilities.add_command(search) utilities.add_command(users) diff --git a/invenio_utilities_tuw/cli/reports.py b/invenio_utilities_tuw/cli/reports.py new file mode 100644 index 0000000000000000000000000000000000000000..c2ee92d0dd0a90f5627b4aec4c80bb22ca8db94e --- /dev/null +++ b/invenio_utilities_tuw/cli/reports.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 TU Wien. +# +# Invenio-Utilities-TUW is free software; you can redistribute it and/or +# modify it under the terms of the MIT License; see LICENSE file for more +# details. + +"""Commands for generating reports.""" + +import click +from flask.cli import with_appcontext +from invenio_accounts.models import User +from invenio_rdm_records.proxies import current_rdm_records_service as rdm_service + +from .utils import bytes_to_human, is_owned_by + + +@click.group() +def reports(): + """Commands for generating reports.""" + + +@reports.command("uploads-per-year") +@with_appcontext +def uploads_per_year(): + """Report the published uploads per year.""" + sep = "\t" + rec_cls = rdm_service.record_cls + records = [rec_cls(rm.data, model=rm) for rm in rec_cls.model_cls.query.all()] + years = sorted({rec.created.year for rec in records}) + years_records_files = { + year: { + rec["id"]: {fn: e.file.size for fn, e in rec.files.entries.items()} + for rec in records + if rec.created.year == year + } + for year in years + } + + for year in years_records_files: + records_files = years_records_files[year] + num_recs = len(records_files) + num_files = sum([len(files) for files in records_files.values()]) + upload_sizes = sum( + [sum(s for s in files.values()) for files in records_files.values()] + ) + + click.echo( + f"{year}{sep}{num_recs} records{sep}{num_files} files{sep}{bytes_to_human(upload_sizes)}" + ) + + +@reports.command("uploads-per-user") +@with_appcontext +def uploads_per_user(): + """Generate a list of uploads per user.""" + users = User.query.all() + rec_cls = rdm_service.record_cls + records = [rec_cls(rm.data, model=rm) for rm in rec_cls.model_cls.query.all()] + + records_per_user = { + u: [rec for rec in records if is_owned_by(u, rec)] for u in users + } + + # sort the users according to the number of their uploads + for user, records in sorted(records_per_user.items(), key=lambda e: len(e[1])): + if not records: + continue + + click.echo(f"{user.id} {user.email}: {len(records)} records") + for rec in records: + click.echo(f" {rec.pid.pid_value}") + click.echo() diff --git a/invenio_utilities_tuw/cli/utils.py b/invenio_utilities_tuw/cli/utils.py index 686a7206531e7ccdaafd1341dba5813e68fab85f..8fee151bffd3548ceead252eff3e67ced994199f 100644 --- a/invenio_utilities_tuw/cli/utils.py +++ b/invenio_utilities_tuw/cli/utils.py @@ -9,6 +9,7 @@ """Utilities for the CLI commands.""" import json +from collections.abc import Iterable from invenio_db import db from invenio_pidstore.errors import PIDAlreadyExists @@ -112,3 +113,30 @@ def set_record_owners(record, owners, commit=True): if commit: parent.commit() db.session.commit() + + +def bytes_to_human(size): + """Make the size (in bytes) more human-readable.""" + units = ["B", "KiB", "MiB", "GiB", "TiB", "PiB"] + unit = units[0] + for u in units[1:]: + if size < 1024: + break + + size /= 1024 + unit = u + + return f"{size:.2f} {unit}" + + +def is_owned_by(user, record): + """Check if the record is owned by the given user.""" + owners = record.parent.access.owned_by + + # note: InvenioRDM v12 changed record ownership to a single entity rather + # than a list of entities, but we're still on v11 so we make it + # compatible with both variants + if not isinstance(owners, Iterable): + owners = [owners] + + return any([o and o.owner_id == user.id for o in owners])