From 68f0cc7ac9ed48ef39f8a487c242ae70d81b27df Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Thu, 22 Feb 2024 14:54:44 +0100 Subject: [PATCH] Consistently use yaml over yml as file extension --- formatscaper/README.md | 20 ++++++++++---------- formatscaper/formatscaper.py | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/formatscaper/README.md b/formatscaper/README.md index 718f8f8..396c0f7 100644 --- a/formatscaper/README.md +++ b/formatscaper/README.md @@ -16,7 +16,7 @@ The aim here is to assist us with the task of digital preservation; specifically Formatscaper is designed to build up the required context over its lifetime of use, so no particular setup is required. To get started, simply feed it a list of files to analyze in the [expected format](#input) and let it run. -The most relevant file is [`formats.yml`](#formats-file), which is intended to receive limited manual tweaks over time (basically just setting the `endangered` flag for outdated formats). +The most relevant file is [`formats.yaml`](#formats-file), which is intended to receive limited manual tweaks over time (basically just setting the `endangered` flag for outdated formats). Every time formatscaper is run, it will update this file with formats that haven't been listed before. This means that the knowledge base about file formats gets extended over time. @@ -84,7 +84,7 @@ Because we try to not extend the semantics of available constructs with non-stan The input for formatscaper needs to be a list of objects (in YAML format) describing the context of each file to investigate. This includes the URI of the file, its original file name (which gets discarded by Invenio), and the record which the file is a part of. -```yml +```yaml - filename: hosts uri: /etc/hosts record: 1234-abcd @@ -101,9 +101,9 @@ This includes the URI of the file, its original file name (which gets discarded ### Formats file -The formats file (e.g. `formats.yml`) contains information about previously encountered file formats and their endangerment status. +The formats file (e.g. `formats.yaml`) contains information about previously encountered file formats and their endangerment status. Some context (like a human-readable name and MIME type) per format is also provided here, primarily to make it more understandable for operators. -```yml +```yaml - endangered: false mime: text/plain name: Plain Text File @@ -143,8 +143,8 @@ The file is rewritten every time formatscaper is run, so extra information like ### Results -The results file (e.g. `results.yml`) contains information about each investigated file and their identified formats, along with a notes about their endangerment status. -```yml +The results file (e.g. `results.yaml`) contains information about each investigated file and their identified formats, along with a notes about their endangerment status. +```yaml - filename: /etc/hosts format: endangered: false @@ -195,7 +195,7 @@ Further, it can optionally provide information about the actual file `format`. If present, this information will override the format information as reported by siegfried. Example: -```yml +```yaml - filename: /mnt/data/de/ad/be/ef/data#something.idk format: @@ -240,7 +240,7 @@ record_files = [ ] # serialize the information as YAML file -with open("record-files.yml", "w") as f: +with open("record-files.yaml", "w") as f: yaml.dump(record_files, f) ``` @@ -255,10 +255,10 @@ To filter results in the shell, you can use the command [`yq`](https://github.co Show only the endangered files: ```sh -yq 'map(select(.format.endangered == true))' results.yml +yq 'map(select(.format.endangered == true))' results.yaml ``` Filtering results per record: ```sh -yq 'map(select(.record == "<RECORD-ID>"))' results.yml +yq 'map(select(.record == "<RECORD-ID>"))' results.yaml ``` diff --git a/formatscaper/formatscaper.py b/formatscaper/formatscaper.py index b2e6590..63aada1 100755 --- a/formatscaper/formatscaper.py +++ b/formatscaper/formatscaper.py @@ -29,10 +29,10 @@ parser = argparse.ArgumentParser( parser.add_argument( "--formats", "--f", - default="formats.yml", + default="formats.yaml", help=( "list of known file formats and if they're endangered; " - "this file will be updated (default: formats.yml)" + "this file will be updated (default: formats.yaml)" ), ) parser.add_argument( -- GitLab