*** Wartungsfenster jeden ersten Mittwoch vormittag im Monat ***

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • crdm/formatscaper
1 result
Show changes
Commits on Source (4)
......@@ -4,6 +4,10 @@
*.pickle
*.sqlite
# don't ignore some YAML files though!
!.gitlab-ci.yml
!tests/**/*.yml
# logs
sf.log
......@@ -16,3 +20,5 @@ formatscaper.egg-info
Pipfile
Pipfile.lock
**/__pycache__
.coverage
dist
# vim: ts=2
stages:
- testing
- release
run-tests:
stage: testing
script:
- pip install --upgrade pip pipenv
- pipenv --rm || true
- pipenv run pip install -e '.[tests]'
- pipenv run pytest
coverage: /TOTAL.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/
pypi-release:
stage: release
needs:
- run-tests
rules:
- if: '$CI_COMMIT_TAG =~ /^v\d+/'
script:
- pip3 install --upgrade pip build twine check-manifest
- rm -f dist/*
- python3 -m check_manifest
- python3 -m build
- python3 -m twine check dist/*
- TWINE_USERNAME=${PYPI_USER} TWINE_PASSWORD=${PYPI_PASSWORD} python3 -m twine upload --skip-existing --non-interactive dist/*
MIT License
Copyright (C) 2024 TU Wien.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
exclude .gitlab-ci.yml
exclude *.sqlite
exclude *.log
exclude *.yml
include MANIFEST.in
include LICENSE
recursive-include tests *.py *.yml
"""Formatscaper is a tool for generating an overview of the file format landscape."""
from .models import Format, RecordFile, Result
__all__ = (
......
"""CLI utilities for formatscaper."""
from .formatscaper import run_formatscaper_cli
from .resultman import run_resultman_cli
......
#!/bin/env python3
"""The file format identification command for formatscaper."""
import argparse
import os
import re
......@@ -20,7 +22,6 @@ completed_tasks = 0
def parse_cli_args():
"""Run the formatscaper command."""
# set up the argument parser
parser = argparse.ArgumentParser(
description=(
......
#!/bin/env python3
"""Textual user interface for managing the results."""
import argparse
import math
from collections import defaultdict
......
"""Data models for formatscaper."""
import dataclasses
from typing import List, Optional
......
"""Utility functions for handling formats and results."""
import pickle
import re
import sys
......
......@@ -33,6 +33,12 @@ dev = [
"flake8 >= 7.0",
"flake8-pyproject >= 1.2.3",
]
tests = [
"pytest >= 8.3",
"pytest-black >= 0.3",
"pytest-cov >= 5.0",
"pytest-isort >= 4.0",
]
[project.scripts]
formatscaper = "formatscaper.cli:run_formatscaper_cli"
......@@ -48,3 +54,11 @@ extend-ignore = ["E203", "E704"]
[tool.isort]
profile = "black"
[tool.pytest.ini_options]
addopts = '--black --isort --doctest-glob="*.rst" --doctest-modules --cov=formatscaper --cov-report=term-missing'
[tool.coverage.run]
omit = [
"formatscaper/cli/resultman.py"
]
"""Pytest configuration."""
- puid: x-fmt/111
name: Plain Text File
mime: text/plain
risk: 1
- puid: x-fmt/263
name: ZIP Format
mime: application/zip
risk: 2
- puid: UNKNOWN
name: null
mime: null
risk: 5
- puid: fmt/818
name: YAML
mime: null
risk: 1
- puid: fmt/938
name: Python Source Code File
mime: null
risk: 3
- puid: fmt/1149
name: Markdown
mime: text/markdown
risk: 1
- puid: fmt/43
name: JPEG File Interchange Format
mime: image/jpeg
risk: 1
- puid: fmt/615
name: Gimp Image File Format
mime: null
risk: 3
- puid: x-fmt/390
name: Exchangeable Image File Format (Compressed)
mime: image/jpeg
risk: 1
- puid: fmt/12
name: Portable Network Graphics
mime: image/png
risk: 1
- puid: fmt/1639
name: Adobe InDesign Document
mime: null
risk: 2
- puid: fmt/13
name: Portable Network Graphics
mime: image/png
risk: 2
- puid: fmt/11
name: Portable Network Graphics
mime: image/png
risk: 2
- puid: fmt/276
name: Acrobat PDF 1.7 - Portable Document Format
mime: application/pdf
risk: 1
- puid: fmt/215
name: Microsoft Powerpoint for Windows
mime: application/vnd.openxmlformats-officedocument.presentationml.presentation
risk: 2
- puid: fmt/412
name: Microsoft Word for Windows
mime: application/vnd.openxmlformats-officedocument.wordprocessingml.document
risk: 2
- puid: fmt/199
name: MPEG-4 Media File
mime: application/mp4
risk: 2
- puid: x-fmt/391
name: Exchangeable Image File Format (Compressed)
mime: image/jpeg
risk: 1
- puid: fmt/471
name: Hypertext Markup Language
mime: text/html
risk: 1
- puid: fmt/92
name: Scalable Vector Graphics
mime: image/svg+xml
risk: 1
- puid: fmt/4
name: Graphics Interchange Format
mime: image/gif
risk: 1
- puid: x-fmt/224
name: Cascading Style Sheet
mime: text/css
risk: 1
- puid: fmt/20
name: Acrobat PDF 1.6 - Portable Document Format
mime: application/pdf
risk: 1
- filename: hosts
uri: /etc/hosts
record: 1234-abcd
- filename: environment
uri: /etc/environment
record: 1234-abcd
- filename: FS Table
uri: /etc/fstab
record: 1234-abcd
- filename: original_name.pdf
uri: /home/mmoser/Documents/renamed.pdf
record: different-record
- filename: invenio-upload.zip
uri: /mnt/data/uploaded_data/12/34/56/data
record: qwer-5678
"""Testing the utility functions."""
import os
import tempfile
import yaml
from formatscaper.models import Format, RecordFile
from formatscaper.utils import load_formats, load_record_files, store_formats
def test_load_record_files_from_yaml():
"""Test loading of record files."""
record_files = load_record_files("tests/data/test_record_files.yml")
assert len(record_files) > 1
for record_file in record_files:
assert isinstance(record_file, RecordFile)
def test_load_formats():
"""Test loading of formats."""
formats = load_formats("tests/data/test_formats.yml")
assert len(formats) > 1
for format in formats.values():
assert isinstance(format, Format)
assert 0 <= format.risk <= 5
def test_store_formats():
"""Test storing of formats."""
formats = [
Format(puid="x-fmt/111", name="Plain Text File", mime="text/plain", risk=1),
Format(puid="fmt/615", name="Gimp Image File Format", mime=None, risk=3),
Format(puid="UNKNOWN", name=None, mime=None, risk=5),
]
try:
filename = tempfile.mktemp(suffix=".yml")
store_formats(formats, filename)
with open(filename, "r") as f:
result = yaml.safe_load(f)
assert len(result) == len(formats)
for format in formats:
assert format.as_dict() in result
finally:
os.remove(filename)