From f2afdf857a58aa00fa7572d6a348b85d2f1cabdb Mon Sep 17 00:00:00 2001
From: Maximilian Moser <maximilian.moser@tuwien.ac.at>
Date: Fri, 23 Feb 2024 15:56:40 +0100
Subject: [PATCH] Store list of results for results per format

* building up this list while loading results reduces the amount of
  potential work for filtering later on
* also enable passing a list of already known formats to
  `load_results(...)`
* new formats will be added to the supplied list as they're encountered
---
 formatscaper/core/models.py |  2 ++
 formatscaper/core/utils.py  | 23 ++++++++++++++++++-----
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/formatscaper/core/models.py b/formatscaper/core/models.py
index 28a54f4..15749ff 100644
--- a/formatscaper/core/models.py
+++ b/formatscaper/core/models.py
@@ -1,4 +1,5 @@
 import dataclasses
+from typing import List
 
 
 @dataclasses.dataclass
@@ -18,6 +19,7 @@ class Format:
     name: str
     mime: str
     endangered: bool = False
+    results: List["Result"] = dataclasses.field(default_factory=list)
 
     def as_dict(self):
         """Dump the data as dictionary."""
diff --git a/formatscaper/core/utils.py b/formatscaper/core/utils.py
index 4742385..e8d60f3 100644
--- a/formatscaper/core/utils.py
+++ b/formatscaper/core/utils.py
@@ -88,13 +88,19 @@ def store_results(results: List[Result], file_name: str, file_format: str) -> bo
 
 
 def load_results(
-    file_name: str, file_format: Optional[str] = None, strict: bool = True
+    file_name: str,
+    file_format: Optional[str] = None,
+    strict: bool = True,
+    formats: Optional[List[Format]] = None,
 ) -> Optional[List[Result]]:
     """Load the results from the given file.
 
     In case the ``file_format`` isn't specified, auto-detection is attempted.
     If ``strict`` is set, then the result loading will fail if it cannot parse
     the format for a result.
+    Optionally, a list of already known ``formats`` can be supplied to avoid
+    creating duplicate ``Format`` instances.
+    Newly encountered formats will be appended to the supplied list.
     """
     if file_format is None:
         if re.search(r"\.ya?ml$", file_name, re.IGNORECASE):
@@ -116,12 +122,17 @@ def load_results(
 
     # note: we deduplicate formats so that manipulation of one entry updates all entries
     results = []
-    known_formats = {}
+    formats = formats or []
+    known_formats = {format.puid: format for format in formats}
+
     for res in raw_results:
         format = None
         try:
-            f = Format(**res["format"])
-            format = known_formats.setdefault(res["format"]["puid"], f)
+            format = known_formats.get(res["format"]["puid"], None)
+            if format is None:
+                format = Format(**res["format"])
+                known_formats[format.puid] = format
+                formats.append(format)
         except (TypeError, KeyError) as e:
             # TypeError: the result doesn't have all required parts for Format()
             # KeyError:  either the result doesn't have a format or it lacks the PUID
@@ -129,6 +140,8 @@ def load_results(
                 raise e
 
         res.pop("format", None)
-        results.append(Result(**res, format=format))
+        result = Result(**res, format=format)
+        format.results.append(result)
+        results.append(result)
 
     return results
-- 
GitLab