*** Wartungsfenster jeden ersten Mittwoch vormittag im Monat ***

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • crdm/formatscaper
1 result
Show changes
Commits on Source (2)
......@@ -3,8 +3,10 @@
import argparse
import os
import re
import shutil
import subprocess
import sys
import tempfile
import threading
import progressbar as pb
......@@ -79,7 +81,7 @@ def scape_formats(config):
m = re.match(r"siegfried ((\d+\.?)+)", sf_output)
if m and m.group(1):
ver_nums = [int(num) for num in m.group(1).split(".")]
if not (ver_nums[0] >= 1 and ver_nums[1] >= 10):
if not (ver_nums[0] >= 1 and ver_nums[1] >= 11):
print(
f"WARN: siegfried version too old ({m.group(1)})", file=sys.stderr
)
......@@ -159,9 +161,14 @@ def scape_formats(config):
pb.Timer(),
]
progress_bar = pb.ProgressBar(max_value=len(record_files), widgets=pb_ws)
base_dir = tempfile.mkdtemp()
def process_record_file(record_file: RecordFile) -> None:
with sem:
# link the files under investigation into a scoped directory
file_dir = os.path.join(base_dir, record_file.record)
file_path = os.path.join(file_dir, record_file.filename)
try:
# if we already have an overridden result for the record file
# in question, we skip it
......@@ -179,15 +186,22 @@ def scape_formats(config):
file_infos = []
else:
# create a symlink to the file with a proper name to help siegfried
# with file format identification as the file name plays a role
# (this will be deleted afterwards)
os.makedirs(file_dir, exist_ok=True)
os.symlink(record_file.uri, file_path)
sf_output = subprocess.check_output(
[
config.sf_binary,
"-sym",
"-z",
"-multi",
"1",
"-name",
record_file.filename,
record_file.uri,
file_path,
],
stderr=sf_error_log,
)
......@@ -217,7 +231,7 @@ def scape_formats(config):
# replace first occurrence of the URI with filename
filename = file_info["filename"].replace(
record_file.uri, record_file.filename, 1
(file_dir + os.path.sep), "", 1
)
result = Result(
......@@ -239,6 +253,13 @@ def scape_formats(config):
except (subprocess.CalledProcessError, ValueError) as e:
print("WARN: error during sf execution:", str(e), file=sys.stderr)
finally:
try:
# in any case, remove the symlink to the file we generated
os.remove(file_path)
except FileNotFoundError:
pass
# analyze all files in parallel, and create the summary after all threads complete
threads = []
for record_file in record_files or []:
......@@ -252,6 +273,8 @@ def scape_formats(config):
except KeyboardInterrupt:
pass
# clean up
shutil.rmtree(base_dir)
if sf_error_log is not None:
sf_error_log.close()
......
import dataclasses
from typing import List, Optional
from sqlalchemy import ForeignKey, create_engine
from sqlalchemy import ForeignKey, UniqueConstraint, create_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, relationship
......@@ -98,6 +98,9 @@ class Result(ModelBase):
format_id: Mapped[Optional[int]] = mapped_column(ForeignKey("format.id"))
format: Mapped[Format] = relationship(back_populates="results")
# filenames are unique per record
__table_args__ = UniqueConstraint("record", "filename")
@property
def risk(self):
"""Calculate the risk assessment for the file."""
......