Remove the record migration scripts

* because they're ancient, and InvenioRDM has started providing version upgrade guides themselves since v1

Remove the record migration scripts
7a3fd3e8 · Moser, Maximilian · 4493a3cc · 7a3fd3e8 · 4493a3cc · 4493a3cc
Commit 7a3fd3e8 authored 2 years ago by Moser, Maximilian
--- a/README.md
+++ b/README.md
@@ -11,13 +11,6 @@ Record creation & file upload utility for the REST API introduced in the Invenio
 Note that a similar (although server-side) functionality has been implemented by the CLI commands provided by the Invenio-Utilities-TUW module.


-record-migration
----------------
-
-
-Tools for migrating record metadata to new releases.
-
-
 reporting
 ---------


--- a/record-migration/dump-records.py
+++ b/record-migration/dump-records.py
-#!/usr/bin/env python3
-
-import json
-import os
-import re
-import subprocess as sp
-import sys
-
-output_dir = sys.argv[1] if len(sys.argv) > 1 else "outputs"
-cp = sp.run(["invenio", "tuw", "records", "list"], capture_output=True, text=True)
-recid_pattern = re.compile(r"(\w+\-\w+)\b")
-record_pids = []
-
-for line in cp.stdout.splitlines():
-    if m := recid_pattern.match(line):
-        record_pids.append(m.group(1))
-
-if not os.path.isdir(output_dir):
-    os.mkdir(output_dir)
-
-for pid_value in record_pids:
-    file_name = os.path.join(output_dir, "%s.json" % pid_value)
-    cp = sp.run(
-        ["invenio", "tuw", "records", "show", "--raw", "-p", pid_value],
-        capture_output=True,
-        text=True,
-    )
-
-    record = json.loads(cp.stdout)
-
-    with open(file_name, "w") as output_file:
-        json.dump(record, output_file)
--- a/record-migration/insert-records.py
+++ b/record-migration/insert-records.py
-#!/usr/bin/env python3
-
-import argparse as ap
-import json
-import os
-import re
-import subprocess as sp
-import sys
-
-parser = ap.ArgumentParser()
-parser.add_argument("directory", help="directory in which to look for files", nargs="?")
-parser.add_argument(
-    "--user",
-    "-u",
-    help="user to impersonate for updating the records (if omitted: system process)",
-)
-parser.add_argument("--ext", "-e", help="file extensions to look for")
-args = parser.parse_args()
-
-output_dir = args.directory or "outputs"
-search_ext = args.ext or "migrated"
-
-if not os.path.isdir(output_dir):
-    print("error: directory not found: %s" % output_dir, file=sys.stderr)
-
-for file_name in os.listdir(output_dir):
-    pid_value, ext = os.path.splitext(file_name)
-    if ext[1:] != search_ext:
-        continue
-
-    ff = os.path.join(output_dir, file_name)
-    with open(ff, "r") as input_file:
-        record = json.load(input_file)
-
-    cmd = ["invenio", "tuw", "records", "update"]
-    if args.user:
-        cmd += ["-u", args.user]
-
-    cmd += ["-p", pid_value, ff]
-
-    cp = sp.run(
-        cmd,
-        capture_output=True,
-        text=True,
-    )
-    print(sp.stdout)
--- a/record-migration/migration/dec20-to-feb21.py
+++ b/record-migration/migration/dec20-to-feb21.py
-#!/usr/bin/env python3
-
-import json
-import os
-import sys
-
-
-def map_identifiers(identifiers):
-    ret = []
-    for k in identifiers:
-        val = {
-            "identifier": identifiers[k],
-            "scheme": k,
-        }
-        ret.append(val)
-    return ret
-
-
-def map_affiliation(affiliation):
-    return {
-        "name": affiliation["name"],
-        "identifiers": map_identifiers(affiliation.get("identifiers", {})),
-    }
-
-
-def map_creatibutor(creatibutor, default_role="editor"):
-    return {
-        "role": creatibutor.get("role", default_role),
-        "affiliations": [map_affiliation(aff) for aff in creatibutor["affiliations"]],
-        "person_or_org": {
-            "type": creatibutor["type"],
-            "name": creatibutor.get("name"),
-            "given_name": creatibutor.get("given_name"),
-            "family_name": creatibutor.get("family_name"),
-            "identifiers": map_identifiers(creatibutor.get("identifiers", [])),
-        },
-    }
-
-
-def map_right(right):
-    return {
-        "link": right.get("uri"),
-        "id": right.get("identifier"),
-        "title": right.get("rights"),
-    }
-
-
-def map_location(location):
-    ret = location.copy()
-    ret["identifiers"] = map_identifiers(location.get("identifiers", []))
-    return ret
-
-
-# - - - - - - - - - - - #
-# start doing the stuff #
-# - - - - - - - - - - - #
-
-directory = sys.argv[1] if len(sys.argv) > 1 else "outputs"
-for f in os.listdir(directory):
-    ff = os.path.join(directory, f)
-    recid, ext = os.path.splitext(f)
-    if ext.lower() != ".json":
-        continue
-
-    with open(ff, "r") as data_file:
-        data = json.load(data_file)
-
-    new = data.copy()
-    for k in ["id", "pid", "conceptid", "conceptpid"]:
-        new.pop(k, None)
-
-    new["metadata"]["contributors"] = [
-        map_creatibutor(c) for c in data["metadata"].get("contributors", [])
-    ]
-    new["metadata"]["creators"] = [
-        map_creatibutor(c) for c in data["metadata"]["creators"]
-    ]
-    new["metadata"]["rights"] = [
-        map_right(right) for right in data["metadata"].get("rights", [])
-    ]
-    new["metadata"]["locations"] = [
-        map_location(location) for location in data["metadata"].get("locations", [])
-    ]
-
-    # TODO: check why this is the case
-    new["metadata"].pop("references", None)
-
-    # because subjects aren't supported right now (TODO: check)
-    new["metadata"].pop("subjects", None)
-
-    # because the identifiers may have used weird schemes in both cases
-    new["metadata"].pop("locations", None)
-    new["metadata"].pop("funding", None)
-
-    new["access"] = {"record": "public", "files": "public", "owned_by": [{"user": "1"}]}
-
-    ff = os.path.join(directory, recid + ".migrated")
-    with open(ff, "w") as data_file:
-        json.dump(new, data_file)