From 5472dc2c4297b213c3acdfb3582b5caed17d9dcc Mon Sep 17 00:00:00 2001 From: Maximilian Moser <maximilian.moser@tuwien.ac.at> Date: Fri, 19 Jan 2024 15:20:29 +0100 Subject: [PATCH] Add pickle as an output file format for formatscaper * also set it as default, because it's much faster and smaller than yaml --- formatscaper/formatscaper.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/formatscaper/formatscaper.py b/formatscaper/formatscaper.py index 0895b1f..9db9f8e 100755 --- a/formatscaper/formatscaper.py +++ b/formatscaper/formatscaper.py @@ -2,6 +2,7 @@ import argparse import dataclasses +import pickle import re import subprocess import sys @@ -43,8 +44,15 @@ parser.add_argument( parser.add_argument( "--output", "-o", - default="results.yml", - help="file in which to store the identified format for each file (default: results.yml)", # noqa + default="results.{FORMAT}", + help="file in which to store the identified format for each file (default: results.{FORMAT})", # noqa +) +parser.add_argument( + "--output-format", + "-F", + default="pickle", + choices=["pickle", "yaml"], + help="format of the results (default: pickle)", ) parser.add_argument( "--parallel", @@ -215,12 +223,21 @@ if endangered_files: # store the results to files +output_file_name = args.output.format(FORMAT=args.output_format) try: - with open(args.output, "w") as output_file: - yaml.dump([dataclasses.asdict(res) for res in all_results], output_file) + simple_results = [dataclasses.asdict(res) for res in all_results] + file_mode = "w" if args.output_format == "yaml" else "wb" + with open(output_file_name, file_mode) as output_file: + if args.output_format == "yaml": + yaml.dump(simple_results, output_file) + elif args.output_format == "pickle": + pickle.dump(simple_results, output_file) except OSError: - print(f"WARN: couldn't store the results ({args.output})", file=sys.stderr) + print( + f"WARN: couldn't store the results to file ({output_file_name})", + file=sys.stderr, + ) try: updated_formats = [dataclasses.asdict(f) for f in formats.values()] -- GitLab