From f2afdf857a58aa00fa7572d6a348b85d2f1cabdb Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Fri, 23 Feb 2024 15:56:40 +0100 Subject: [PATCH] Store list of results for results per format * building up this list while loading results reduces the amount of potential work for filtering later on * also enable passing a list of already known formats to `load_results(...)` * new formats will be added to the supplied list as they're encountered --- formatscaper/core/models.py | 2 ++ formatscaper/core/utils.py | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/formatscaper/core/models.py b/formatscaper/core/models.py index 28a54f4..15749ff 100644 --- a/formatscaper/core/models.py +++ b/formatscaper/core/models.py @@ -1,4 +1,5 @@ import dataclasses +from typing import List @dataclasses.dataclass @@ -18,6 +19,7 @@ class Format: name: str mime: str endangered: bool = False + results: List["Result"] = dataclasses.field(default_factory=list) def as_dict(self): """Dump the data as dictionary.""" diff --git a/formatscaper/core/utils.py b/formatscaper/core/utils.py index 4742385..e8d60f3 100644 --- a/formatscaper/core/utils.py +++ b/formatscaper/core/utils.py @@ -88,13 +88,19 @@ def store_results(results: List[Result], file_name: str, file_format: str) -> bo def load_results( - file_name: str, file_format: Optional[str] = None, strict: bool = True + file_name: str, + file_format: Optional[str] = None, + strict: bool = True, + formats: Optional[List[Format]] = None, ) -> Optional[List[Result]]: """Load the results from the given file. In case the ``file_format`` isn't specified, auto-detection is attempted. If ``strict`` is set, then the result loading will fail if it cannot parse the format for a result. + Optionally, a list of already known ``formats`` can be supplied to avoid + creating duplicate ``Format`` instances. + Newly encountered formats will be appended to the supplied list. """ if file_format is None: if re.search(r"\.ya?ml$", file_name, re.IGNORECASE): @@ -116,12 +122,17 @@ def load_results( # note: we deduplicate formats so that manipulation of one entry updates all entries results = [] - known_formats = {} + formats = formats or [] + known_formats = {format.puid: format for format in formats} + for res in raw_results: format = None try: - f = Format(**res["format"]) - format = known_formats.setdefault(res["format"]["puid"], f) + format = known_formats.get(res["format"]["puid"], None) + if format is None: + format = Format(**res["format"]) + known_formats[format.puid] = format + formats.append(format) except (TypeError, KeyError) as e: # TypeError: the result doesn't have all required parts for Format() # KeyError: either the result doesn't have a format or it lacks the PUID @@ -129,6 +140,8 @@ def load_results( raise e res.pop("format", None) - results.append(Result(**res, format=format)) + result = Result(**res, format=format) + format.results.append(result) + results.append(result) return results -- GitLab