diff --git a/formatscaper/core/models.py b/formatscaper/core/models.py index 28a54f4fc6b60a78e8f2a688fd7294e46425ab64..15749ff1bd69d14a55f4307422258e79ba76116b 100644 --- a/formatscaper/core/models.py +++ b/formatscaper/core/models.py @@ -1,4 +1,5 @@ import dataclasses +from typing import List @dataclasses.dataclass @@ -18,6 +19,7 @@ class Format: name: str mime: str endangered: bool = False + results: List["Result"] = dataclasses.field(default_factory=list) def as_dict(self): """Dump the data as dictionary.""" diff --git a/formatscaper/core/utils.py b/formatscaper/core/utils.py index 47423851e34786644fd6712cfa8ae7f12cb8d702..e8d60f374408a4e617b8f39bda6006ecaea46a92 100644 --- a/formatscaper/core/utils.py +++ b/formatscaper/core/utils.py @@ -88,13 +88,19 @@ def store_results(results: List[Result], file_name: str, file_format: str) -> bo def load_results( - file_name: str, file_format: Optional[str] = None, strict: bool = True + file_name: str, + file_format: Optional[str] = None, + strict: bool = True, + formats: Optional[List[Format]] = None, ) -> Optional[List[Result]]: """Load the results from the given file. In case the ``file_format`` isn't specified, auto-detection is attempted. If ``strict`` is set, then the result loading will fail if it cannot parse the format for a result. + Optionally, a list of already known ``formats`` can be supplied to avoid + creating duplicate ``Format`` instances. + Newly encountered formats will be appended to the supplied list. """ if file_format is None: if re.search(r"\.ya?ml$", file_name, re.IGNORECASE): @@ -116,12 +122,17 @@ def load_results( # note: we deduplicate formats so that manipulation of one entry updates all entries results = [] - known_formats = {} + formats = formats or [] + known_formats = {format.puid: format for format in formats} + for res in raw_results: format = None try: - f = Format(**res["format"]) - format = known_formats.setdefault(res["format"]["puid"], f) + format = known_formats.get(res["format"]["puid"], None) + if format is None: + format = Format(**res["format"]) + known_formats[format.puid] = format + formats.append(format) except (TypeError, KeyError) as e: # TypeError: the result doesn't have all required parts for Format() # KeyError: either the result doesn't have a format or it lacks the PUID @@ -129,6 +140,8 @@ def load_results( raise e res.pop("format", None) - results.append(Result(**res, format=format)) + result = Result(**res, format=format) + format.results.append(result) + results.append(result) return results