*** Wartungsfenster jeden ersten Mittwoch vormittag im Monat ***

Skip to content
Snippets Groups Projects
Commit 7a3fd3e8 authored by Moser, Maximilian's avatar Moser, Maximilian
Browse files

Remove the record migration scripts

* because they're ancient, and InvenioRDM has started providing version
  upgrade guides themselves since v1
parent 4493a3cc
No related branches found
No related tags found
1 merge request!5Add script for checking each record's landing page for failure
......@@ -11,13 +11,6 @@ Record creation & file upload utility for the REST API introduced in the Invenio
Note that a similar (although server-side) functionality has been implemented by the CLI commands provided by the Invenio-Utilities-TUW module.
record-migration
----------------
Tools for migrating record metadata to new releases.
reporting
---------
......
#!/usr/bin/env python3
import json
import os
import re
import subprocess as sp
import sys
output_dir = sys.argv[1] if len(sys.argv) > 1 else "outputs"
cp = sp.run(["invenio", "tuw", "records", "list"], capture_output=True, text=True)
recid_pattern = re.compile(r"(\w+\-\w+)\b")
record_pids = []
for line in cp.stdout.splitlines():
if m := recid_pattern.match(line):
record_pids.append(m.group(1))
if not os.path.isdir(output_dir):
os.mkdir(output_dir)
for pid_value in record_pids:
file_name = os.path.join(output_dir, "%s.json" % pid_value)
cp = sp.run(
["invenio", "tuw", "records", "show", "--raw", "-p", pid_value],
capture_output=True,
text=True,
)
record = json.loads(cp.stdout)
with open(file_name, "w") as output_file:
json.dump(record, output_file)
#!/usr/bin/env python3
import argparse as ap
import json
import os
import re
import subprocess as sp
import sys
parser = ap.ArgumentParser()
parser.add_argument("directory", help="directory in which to look for files", nargs="?")
parser.add_argument(
"--user",
"-u",
help="user to impersonate for updating the records (if omitted: system process)",
)
parser.add_argument("--ext", "-e", help="file extensions to look for")
args = parser.parse_args()
output_dir = args.directory or "outputs"
search_ext = args.ext or "migrated"
if not os.path.isdir(output_dir):
print("error: directory not found: %s" % output_dir, file=sys.stderr)
for file_name in os.listdir(output_dir):
pid_value, ext = os.path.splitext(file_name)
if ext[1:] != search_ext:
continue
ff = os.path.join(output_dir, file_name)
with open(ff, "r") as input_file:
record = json.load(input_file)
cmd = ["invenio", "tuw", "records", "update"]
if args.user:
cmd += ["-u", args.user]
cmd += ["-p", pid_value, ff]
cp = sp.run(
cmd,
capture_output=True,
text=True,
)
print(sp.stdout)
#!/usr/bin/env python3
import json
import os
import sys
def map_identifiers(identifiers):
ret = []
for k in identifiers:
val = {
"identifier": identifiers[k],
"scheme": k,
}
ret.append(val)
return ret
def map_affiliation(affiliation):
return {
"name": affiliation["name"],
"identifiers": map_identifiers(affiliation.get("identifiers", {})),
}
def map_creatibutor(creatibutor, default_role="editor"):
return {
"role": creatibutor.get("role", default_role),
"affiliations": [map_affiliation(aff) for aff in creatibutor["affiliations"]],
"person_or_org": {
"type": creatibutor["type"],
"name": creatibutor.get("name"),
"given_name": creatibutor.get("given_name"),
"family_name": creatibutor.get("family_name"),
"identifiers": map_identifiers(creatibutor.get("identifiers", [])),
},
}
def map_right(right):
return {
"link": right.get("uri"),
"id": right.get("identifier"),
"title": right.get("rights"),
}
def map_location(location):
ret = location.copy()
ret["identifiers"] = map_identifiers(location.get("identifiers", []))
return ret
# - - - - - - - - - - - #
# start doing the stuff #
# - - - - - - - - - - - #
directory = sys.argv[1] if len(sys.argv) > 1 else "outputs"
for f in os.listdir(directory):
ff = os.path.join(directory, f)
recid, ext = os.path.splitext(f)
if ext.lower() != ".json":
continue
with open(ff, "r") as data_file:
data = json.load(data_file)
new = data.copy()
for k in ["id", "pid", "conceptid", "conceptpid"]:
new.pop(k, None)
new["metadata"]["contributors"] = [
map_creatibutor(c) for c in data["metadata"].get("contributors", [])
]
new["metadata"]["creators"] = [
map_creatibutor(c) for c in data["metadata"]["creators"]
]
new["metadata"]["rights"] = [
map_right(right) for right in data["metadata"].get("rights", [])
]
new["metadata"]["locations"] = [
map_location(location) for location in data["metadata"].get("locations", [])
]
# TODO: check why this is the case
new["metadata"].pop("references", None)
# because subjects aren't supported right now (TODO: check)
new["metadata"].pop("subjects", None)
# because the identifiers may have used weird schemes in both cases
new["metadata"].pop("locations", None)
new["metadata"].pop("funding", None)
new["access"] = {"record": "public", "files": "public", "owned_by": [{"user": "1"}]}
ff = os.path.join(directory, recid + ".migrated")
with open(ff, "w") as data_file:
json.dump(new, data_file)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment