*** Wartungsfenster jeden ersten Mittwoch vormittag im Monat ***

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • martin.weise/fairnb
1 result
Show changes
Commits on Source (4)
......@@ -34,7 +34,6 @@ Lukas Mahler, https://orcid.org/0000-0002-8985-8139
| Name | Identifier |
|-------------------------------------|----------------------------------------------------------------------------------------------------------------------|
| Docker Image on TUWRD | [![DOI](https://researchdata.tuwien.ac.at/badge/DOI/10.48436/f0kah-nnc65.svg)](https://doi.org/10.48436/f0kah-nnc65) |
| Docker Image on DockerHub | [https://hub.docker.com/r/mahlukas/fairnb](https://hub.docker.com/r/mahlukas/fairnb) |
| DBRepo Database | [https://dbrepo1.ec.tuwien.ac.at/pid/34](https://dbrepo1.ec.tuwien.ac.at/pid/34) |
| TUgitLab | https://gitlab.tuwien.ac.at/martin.weise/fairnb |
......
......@@ -41,7 +41,7 @@ class Util:
def get_dbrepo_connector(self, path: pathlib.Path = None):
config = self.get_config(path=path)
return DBRepoConnector.from_config(
config, config["credentials"]
config, config.get("credentials")
)
def get_invenio_connector(self, path: pathlib.Path = None):
......
%% Cell type:markdown id: tags:
# Main Notebook
Executes all research notebooks one by one and initializes needed connectors and nbconfig. All produced entities are always saved to disk and uploaded (using FAIRnb) if ONLY_LOCAL is False.
The entities created by one notebook are passed to the next notebook as dependencies, creating a pipeline of notebooks.
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
import yaml
import logging
from definitions import CONFIG_PATH, BASE_PATH, RESOURCE_PATH
from fairnb.entity.dbrepo_entity import DBRepoEntity
from fairnb.entity.invenio_entity import InvenioRDMEntity
from fairnb.nb_config import NbConfig
from fairnb.executor import Executor
from fairnb.util import Util
```
%% Cell type:code id: tags:
``` python
logging.basicConfig(
level=logging.DEBUG
level=logging.INFO
)
ONLY_LOCAL = False
ONLY_LOCAL = True
# experiment executor setup
executor = Executor()
util = Util.get_instance() # util caches loaded credentials -> via Singleton
connector = util.get_dbrepo_connector(CONFIG_PATH / "dbrepo_config.yml")
invenio_connector = util.get_invenio_connector(CONFIG_PATH / "invenio_config.yml")
NOTEBOOK_PATH = BASE_PATH / "notebooks"
LOCAL_PATH = BASE_PATH / "tmp"
MAIN_PATH = BASE_PATH / "notebooks" / "main.ipynb"
table_metadata = yaml.safe_load(open(RESOURCE_PATH / "default_table_metadata.yml", "r"))
```
%% Cell type:code id: tags:
``` python
# ------------- Convert Audio Files for TUWRD ----
metadata = yaml.safe_load(open(RESOURCE_PATH / "1_audio_files" / "record_metadata.yml", "r"))
nb_config_audio_files = NbConfig(
nb_location=NOTEBOOK_PATH / "1_audio_files.ipynb",
main_location=MAIN_PATH,
entities=[
audio_files_entity := InvenioRDMEntity.new(
name = "audio_tar",
description = "Raw music files",
location=LOCAL_PATH / "1_audio_files" / "output" / "emotifymusic.tar.gz",
dbrepo_connector=connector,
invenio_connector=invenio_connector,
record_metadata=metadata,
table_metadata=table_metadata,
type="audio_tar"
)
],
dependencies=[]
)
executor.execute(nb_config_audio_files, only_local=ONLY_LOCAL)
```
%% Output
INFO:papermill:Input Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/1_audio_files.ipynb
INFO:papermill:Output Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/1_audio_files.ipynb
INFO:papermill:Executing notebook with kernel: python3
%% Cell type:code id: tags:
``` python
# ------------- Raw feature generation -------------
table_metadata = yaml.safe_load(open(RESOURCE_PATH / "2_generate_features" / "raw_features_metadata.yml", "r"))
nb_config_generate_features = NbConfig(
nb_location=NOTEBOOK_PATH / "2_generate_features.ipynb",
main_location=MAIN_PATH,
entities=[
raw_features_entity := DBRepoEntity.new(
name="raw_features",
description="Raw MFCC features of audio files.",
location=LOCAL_PATH / "2_generate_features" / "output" / "raw_features.csv",
dbrepo_connector=connector,
table_name="raw_features",
table_description="Raw MFCC features of audio files for genre prediction.",
table_metadata=table_metadata,
type="raw_features"
)
],
dependencies=[
audio_files_entity.copy_to_location(LOCAL_PATH / "2_generate_features" / "input" / "emotifymusic.tar.gz")
]
)
executor.execute(nb_config_generate_features, only_local=ONLY_LOCAL)
```
%% Output
INFO:papermill:Input Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/2_generate_features.ipynb
INFO:papermill:Output Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/2_generate_features.ipynb
INFO:papermill:Executing notebook with kernel: python3
%% Cell type:code id: tags:
``` python
# ------------- Feature Aggregation ----------------
if "raw_features_entity" not in globals(): # load saved entity if not already in memory
raw_features_entity = DBRepoEntity.existing(
id="3",
location=LOCAL_PATH / "2_generate_features" / "output" / "raw_features.csv",
dbrepo_connector=connector,
)
# use new for direct entry in ONLY_LOCAL if raw features already created (or downloaded)
# raw_features_entity = DBRepoEntity.new(
# location=LOCAL_PATH / "2_generate_features" / "output" / "raw_features.csv",
# dbrepo_connector=connector,
# name="raw_features",
# description="Raw features of audio files.",
# table_name="raw_features",
# table_description="desc",
# type="raw_features"
# )
nb_config_aggregate_features = NbConfig(
nb_location=NOTEBOOK_PATH / "3_aggregate_features.ipynb",
main_location=MAIN_PATH,
entities=[
features_entity := DBRepoEntity.new(
name="aggregated_features",
description="Aggregated features of audio files.",
location=LOCAL_PATH / "3_aggregate_features" / "output" / "features.csv",
dbrepo_connector=connector,
table_name="aggregated_features",
table_description="Aggregated MFCC features of audio files",
type="aggregated_features",
table_metadata=table_metadata
)
],
dependencies=[
raw_features_entity.copy_to_location(LOCAL_PATH / "3_aggregate_features" / "input" / "raw_features.csv")
]
)
executor.execute(nb_config_aggregate_features, only_local=ONLY_LOCAL)
```
%% Output
INFO:papermill:Input Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/3_aggregate_features.ipynb
INFO:papermill:Output Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/3_aggregate_features.ipynb
INFO:papermill:Executing notebook with kernel: python3
%% Cell type:code id: tags:
``` python
# Load features from disk if not already in memory
if "features_entity" not in globals():
features_entity = DBRepoEntity.new(
location=LOCAL_PATH / "3_aggregate_features" / "output" / "features.csv",
dbrepo_connector=connector,
name="features",
description="Aggregated features of audio files.",
table_name="aggregated_features",
table_description="Aggregated features of audio files",
type="aggregated_features",
table_metadata=table_metadata
)
# or load directly from dbrepo:
# features_entity = DBRepoEntity.existing(
# id=1,
# dbrepo_connector=connector,
# location=LOCAL_PATH / "3_aggregate_features" / "output" / "features.csv"
# )
# -------------- SPLITTING -------------------------
nb_config_splits = NbConfig(
nb_location=NOTEBOOK_PATH / "4_split.ipynb",
main_location=MAIN_PATH,
entities=[
split_entity := DBRepoEntity.new(
name="test/train split",
description="Split of aggregated data into testing and training subsets using 11908553 as seed.",
table_name="splits_table",
table_description="Splits of aggregated data into testing and training subsets.",
location=LOCAL_PATH / "4_split" / "output" / "split.csv", # location where script saves generated entity
dbrepo_connector=connector,
type="split",
table_metadata=table_metadata
)
],
dependencies=[
features_entity.copy_to_location(LOCAL_PATH / "4_split" / "input" / "features.csv")
]
)
# generate splits
executor.execute(nb_config_splits, only_local=ONLY_LOCAL)
```
%% Output
INFO:papermill:Input Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/4_split.ipynb
INFO:papermill:Output Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/4_split.ipynb
INFO:papermill:Executing notebook with kernel: python3
%% Cell type:code id: tags:
``` python
# -------------- ML ------------------------------
with open(RESOURCE_PATH / "5_ml_model" / "ml_model_entity_metadata.yml", "r") as file:
metadata = yaml.safe_load(file)
nb_config_ml = NbConfig(
nb_location=NOTEBOOK_PATH / "5_ml_model.ipynb",
main_location=MAIN_PATH,
entities=[
ml_model_entity := InvenioRDMEntity.new(
name="ml_model",
description="An ml model representing the trained clf",
location=LOCAL_PATH / "5_ml_model" / "output" / "ml_model.pickle",
dbrepo_connector=connector,
invenio_connector=invenio_connector,
record_metadata=metadata,
type="clf",
table_metadata=table_metadata
),
test_result_entity := DBRepoEntity.new(
name="prediction_results",
description="Result of predictions for ml model",
table_name="prediction_result",
table_description="Prediction results of genre prediction on ml model",
location=LOCAL_PATH / "5_ml_model" / "output" / "test_result.csv",
dbrepo_connector=connector,
type="prediction_result",
table_metadata=table_metadata
)
],
dependencies=[
split_entity.copy_to_location(LOCAL_PATH / "5_ml_model" / "input" / "split.csv"), # pass in old entity as new dependency
features_entity.copy_to_location(LOCAL_PATH / "5_ml_model" / "input" / "features.csv")
]
)
# run ml
executor.execute(nb_config_ml, only_local=ONLY_LOCAL)
```
%% Output
INFO:papermill:Input Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/5_ml_model.ipynb
INFO:papermill:Output Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/5_ml_model.ipynb
INFO:papermill:Executing notebook with kernel: python3
......
source diff could not be displayed: it is too large. Options to address this: view the blob.
access:
files: restricted
files: public
record: public
files:
default_preview: null
enabled: true
metadata:
creators:
- affiliations:
- name: Technical University of Vienna
person_or_org:
contributors:
- person_or_org:
family_name: Mahler
given_name: Lukas
identifiers:
- identifier: 0000-0002-8985-8139
scheme: orcid
name: L. Mahler
name: Mahler, Lukas
type: personal
role:
id: contactperson
creators:
- person_or_org:
family_name: Aljanaki
given_name: Anna
name: Aljanaki, Anna
type: personal
role:
id: datacollector
description: "<p>400 MP3 files of one minute playtime each, names are labeled with
the respective genre, one of: classical, rock, pop and electronic.</p><p>This
is an upload of the data on an institutional repository in case the original mirror
ceases to exist. To reference to the data, please cite the paper as described
on https://www2.projects.science.uu.nl/memotion/emotifydata/</p>"
identifiers:
- scheme: url
identifier: https://www2.projects.science.uu.nl/memotion/emotifydata/emotifymusic.zip
publication_date: '2022-01-01'
resource_type:
id: sound
title: Flattened Emotify Dataset
description: "400 MP3 files of one minute playtime each, names are labeled with the respective genre, one of: classical, rock, pop and electronic."
publisher: TU Wien
publisher: Universiteit Utrecht
related_identifiers:
- identifier: https://www2.projects.science.uu.nl/memotion/emotifydata/
- scheme: url
identifier: https://www2.projects.science.uu.nl/memotion/emotifydata/
relation_type:
id: isderivedfrom
resource_type:
id: sound
scheme: url
- identifier: https://gitlab.tuwien.ac.at/martin.weise/fairnb
relation_type:
id: isderivedfrom
resource_type:
id: software
scheme: url
- identifier: https://dbrepo1.ec.tuwien.ac.at/pid/34
relation_type:
id: issupplementto
resource_type:
id: dataset
scheme: url
resource_type:
id: sound
rights:
- id: cc-by-nc-sa-1.0
title: 'Flattened Audio files of: Dataset on Induced Musical Emotion from Game with
a Purpose Emotify'
# access:
# files: restricted
# record: public
# files:
# default_preview: null
# enabled: true
# metadata:
# creators:
# - affiliations:
# - name: Technical University of Vienna
# person_or_org:
# family_name: Mahler
# given_name: Lukas
# identifiers:
# - identifier: 0000-0002-8985-8139
# scheme: orcid
# name: L. Mahler
# type: personal
# publication_date: '2022-01-01'
# resource_type:
# id: sound
# title: Flattened Emotify Dataset
# description: "400 MP3 files of one minute playtime each, names are labeled with the respective genre, one of: classical, rock, pop and electronic."
# publisher: TU Wien
# related_identifiers:
# - identifier: https://www2.projects.science.uu.nl/memotion/emotifydata/
# relation_type:
# id: isderivedfrom
# resource_type:
# id: sound
# scheme: url
# - identifier: https://gitlab.tuwien.ac.at/martin.weise/fairnb
# relation_type:
# id: isderivedfrom
# resource_type:
# id: software
# scheme: url
# - identifier: https://dbrepo1.ec.tuwien.ac.at/pid/34
# relation_type:
# id: issupplementto
# resource_type:
# id: dataset
# scheme: url