From 0bb5be176f0cd40f9346f3e232a0fc8a197cf936 Mon Sep 17 00:00:00 2001
From: Maximilian Moser <maximilian.moser@tuwien.ac.at>
Date: Tue, 13 Aug 2024 22:21:23 +0200
Subject: [PATCH] Synthesize some metadata defaults from the TISS profile if
available
---
invenio_config_tuw/config.py | 7 +++-
invenio_config_tuw/utils.py | 77 +++++++++++++++++++++++++++++++++---
2 files changed, 77 insertions(+), 7 deletions(-)
diff --git a/invenio_config_tuw/config.py b/invenio_config_tuw/config.py
index 3f09b57..29ee990 100644
--- a/invenio_config_tuw/config.py
+++ b/invenio_config_tuw/config.py
@@ -22,7 +22,11 @@ from .permissions import (
)
from .schemas import TUWUserPreferencesSchema, TUWUserProfileSchema, TUWUserSchema
from .services import TUWRecordsComponents
-from .utils import check_user_email_for_tuwien, current_user_as_creator
+from .utils import (
+ check_user_email_for_tuwien,
+ current_user_as_creator,
+ subjects_from_tiss,
+)
# Invenio-Config-TUW
# ==================
@@ -159,6 +163,7 @@ APP_RDM_DEPOSIT_FORM_DEFAULTS = {
},
"version": "1.0.0",
"description": "<h2>A primer on your dataset's description (to be edited)</h2><p>The influence of proper documentation on the reusability for research data should not be underestimated!<br>In order to help others understand how to interpret and reuse your data, we provide you with a few questions to help you structure your dataset's description (though please don't feel obligated to stick to them):</p><h3>Context and methodology</h3><ul><li>What is the research domain or project in which this dataset was created?</li><li>Which purpose does this dataset serve?</li><li>How was this dataset created?</li></ul><h3>Technical details</h3><ul><li>What is the structure of this dataset? Do the folders and files follow a certain naming convention?</li><li>Is any specific software required to open and work with this dataset?</li><li>Are there any additional resources available regarding the dataset, e.g. documentation, source code, etc.?</li></ul><h3>Further details</h3><ul><li>Is there anything else that other people may need to know when they want to reuse the dataset?</li></ul>", # noqa
+ "subjects": subjects_from_tiss,
}
RDM_RECORDS_SERVICE_COMPONENTS = TUWRecordsComponents
diff --git a/invenio_config_tuw/utils.py b/invenio_config_tuw/utils.py
index e7a11bf..c12ba95 100644
--- a/invenio_config_tuw/utils.py
+++ b/invenio_config_tuw/utils.py
@@ -7,8 +7,11 @@
"""Utility functions."""
-from typing import Dict, Tuple
+import re
+from typing import Dict, List, Optional, Tuple
+import requests
+from flask import current_app
from flask_principal import Identity
from flask_security import current_user
from invenio_access import any_user
@@ -53,6 +56,25 @@ def get_identity_for_user(user):
return identity
+def get_tiss_info(user) -> Optional[Dict]:
+ """Get the TISS information for the user if available."""
+ if not user or not user.user_profile or not user.user_profile.get("tiss_id"):
+ return None
+
+ tiss_id = user.user_profile["tiss_id"]
+ try:
+ response = requests.get(
+ f"https://tiss.tuwien.ac.at/api/person/v22/id/{tiss_id}"
+ )
+ if response.status_code == 200:
+ return response.json()
+
+ except Exception as e:
+ current_app.logger.warn(e)
+
+ return None
+
+
# Utilities for invenio configuration
# -----------------------------------
@@ -87,7 +109,30 @@ def _names_from_user_profile(profile: Dict) -> Tuple[str, str, str]:
return (given_name, family_name, full_name)
-def current_user_as_creator():
+def parse_affiliations(user_profile: Dict) -> List[Dict]:
+ """Get the (primary) affiliation from the user."""
+ affiliations = []
+
+ for aff in (user_profile.get("affiliations") or "").split(", "):
+ # TODO translate from the domain to the affiliation vocabulary somehow
+ if aff == "tuwien.ac.at":
+ affiliations.append({"id": "04d836q62", "name": "TU Wien"})
+
+ # assume TU Wien as a fallback affiliation
+ return affiliations or [{"id": "04d836q62", "name": "TU Wien"}]
+
+
+def get_identifiers(user) -> List[Dict]:
+ """Get identifiers from the current user's TISS profile if available."""
+ identifiers = []
+ if tiss_info := get_tiss_info(current_user):
+ if orcid := tiss_info.get("orcid"):
+ identifiers.append({"scheme": "orcid", "identifier": orcid})
+
+ return identifiers
+
+
+def current_user_as_creator() -> Dict:
"""Use the currently logged-in user to populate a creator in the deposit form."""
profile = current_user.user_profile or {}
given_name, family_name, full_name = _names_from_user_profile(profile)
@@ -96,14 +141,12 @@ def current_user_as_creator():
if not given_name and not family_name and not full_name:
return []
- # TODO parse affiliation from user profile
- # TODO add identifiers (e.g. ORCID from TISS, if available)
creator = {
- "affiliations": [{"id": "04d836q62", "name": "TU Wien"}],
+ "affiliations": parse_affiliations(profile),
"person_or_org": {
"family_name": family_name,
"given_name": given_name,
- "identifiers": [],
+ "identifiers": get_identifiers(current_user),
"name": full_name,
"type": "personal",
},
@@ -111,3 +154,25 @@ def current_user_as_creator():
}
return [creator]
+
+
+def _employee_entry_to_keyword(employee: Dict) -> Dict:
+ """Translate the org unit affiliation to keywords."""
+ # TODO there's only a limited number of org units at TUW,
+ # we should be able to come up with something better!
+ name = employee["org_ref"]["name_en"]
+ keyword = re.sub(r"research unit of ", "", name, flags=re.IGNORECASE)
+
+ # NOTE the subject must be either {"id": <SUBJECT_ID>} or {"subject": <CUSTOM>}
+ return {"subject": keyword}
+
+
+def subjects_from_tiss() -> List[str]:
+ """Create subjects from the current user's TISS profile if available."""
+ keywords = []
+ if tiss_info := get_tiss_info(current_user):
+ for org in tiss_info.get("employee", []):
+ if keyword := _employee_entry_to_keyword(org):
+ keywords.append(keyword)
+
+ return keywords
--
GitLab