*** Wartungsfenster jeden ersten Mittwoch vormittag im Monat ***

Skip to content
Snippets Groups Projects
Unverified Commit 246750b8 authored by Mahler, Lukas's avatar Mahler, Lukas
Browse files

commit from pc

parent abb4a024
Branches
Tags
1 merge request!3improve fairnb and rework notebooks
...@@ -248,8 +248,8 @@ class DBRepoConnector: ...@@ -248,8 +248,8 @@ class DBRepoConnector:
chunk_size=CHUNK_SIZE, chunk_size=CHUNK_SIZE,
) )
upload_url = uploader.create_url() upload_url = uploader.create_url().replace('http', 'https')
uploader.set_url(upload_url.replace('http', 'https')) # FIX: wrong location response uploader.set_url(upload_url) # FIX: wrong location response
uploader.upload() uploader.upload()
response_upload_import = requests.post( response_upload_import = requests.post(
......
%% Cell type:markdown id:4389a8092677254e tags: %% Cell type:markdown id:4389a8092677254e tags:
# Audio Files # Audio Files
Bundle the provided audio files (400, in MP3) in a tar, encrypt it using gzip and store it in the output folder. Bundle the provided audio files (400, in MP3) in a tar, encrypt it using gzip and store it in the output folder.
%% Cell type:code id:87ab37c6 tags: %% Cell type:code id:87ab37c6 tags:
``` python ``` python
from definitions import BASE_PATH from definitions import BASE_PATH
import tarfile import tarfile
import zipfile import zipfile
import os import os
from pathlib import Path from pathlib import Path
``` ```
%% Cell type:code id:1b4e6b01 tags:parameters %% Cell type:code id:1b4e6b01 tags:parameters
``` python ``` python
# Parameters # Parameters
INPUT_PATHS = {} INPUT_PATHS = {}
OUTPUT_PATHS = { OUTPUT_PATHS = {
"audio_tar": str(BASE_PATH / "tmp/1_audio_files/output/emotifymusic.tar.gz") "audio_tar": str(BASE_PATH / "tmp/1_audio_files/output/emotifymusic.tar.gz")
} }
``` ```
%% Cell type:code id:a0c3731f tags:injected-parameters %% Cell type:code id:24969e80 tags:injected-parameters
``` python ``` python
# Parameters # Parameters
INPUT_PATHS = {} INPUT_PATHS = {}
OUTPUT_PATHS = { OUTPUT_PATHS = {
"audio_tar": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/1_audio_files/output/emotifymusic.tar.gz" "audio_tar": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/1_audio_files/output/emotifymusic.tar.gz"
} }
``` ```
%% Cell type:code id:1e487573 tags: %% Cell type:code id:1e487573 tags:
``` python ``` python
# load provided files # load provided files
zip_path = BASE_PATH / "resource" / "1_audio_files" / "emotifymusic.zip" zip_path = BASE_PATH / "resource" / "1_audio_files" / "emotifymusic.zip"
dir_path = BASE_PATH / "tmp" / "1_audio_files" / "music" dir_path = BASE_PATH / "tmp" / "1_audio_files" / "music"
dir_path.mkdir(parents=True, exist_ok=True) dir_path.mkdir(parents=True, exist_ok=True)
# unzip to dir_path # unzip to dir_path
with zipfile.ZipFile(zip_path, "r") as zfile: with zipfile.ZipFile(zip_path, "r") as zfile:
zfile.extractall(path=dir_path) zfile.extractall(path=dir_path)
``` ```
%% Cell type:code id:c3193f35 tags: %% Cell type:code id:c3193f35 tags:
``` python ``` python
file_paths = list(dir_path.rglob('**/*.*')) file_paths = list(dir_path.rglob('**/*.*'))
flattened_dir_path = BASE_PATH / "tmp" / "1_audio_files" / "flattened" flattened_dir_path = BASE_PATH / "tmp" / "1_audio_files" / "flattened"
flattened_dir_path.mkdir(parents=True, exist_ok=True) flattened_dir_path.mkdir(parents=True, exist_ok=True)
for path in file_paths: for path in file_paths:
(flattened_dir_path / path.relative_to(dir_path).as_posix().replace('/', '_')).write_bytes(path.read_bytes()) (flattened_dir_path / path.relative_to(dir_path).as_posix().replace('/', '_')).write_bytes(path.read_bytes())
``` ```
%% Cell type:code id:3272ea2b tags: %% Cell type:code id:3272ea2b tags:
``` python ``` python
tar_path = Path(OUTPUT_PATHS["audio_tar"]) tar_path = Path(OUTPUT_PATHS["audio_tar"])
tar_path.parent.mkdir(parents=True, exist_ok=True) tar_path.parent.mkdir(parents=True, exist_ok=True)
with tarfile.open(tar_path, "w:gz") as file: with tarfile.open(tar_path, "w:gz") as file:
file.add(flattened_dir_path, arcname=os.path.sep) file.add(flattened_dir_path, arcname=os.path.sep)
``` ```
......
%% Cell type:markdown id:e92b4fe9 tags: %% Cell type:markdown id:e92b4fe9 tags:
# Split the Features into Train and Test Set # Split the Features into Train and Test Set
%% Cell type:code id:5f1fae44 tags: %% Cell type:code id:5f1fae44 tags:
``` python ``` python
import pandas as pd import pandas as pd
from pathlib import Path from pathlib import Path
from definitions import BASE_PATH from definitions import BASE_PATH
``` ```
%% Cell type:code id:01de1b27 tags:parameters %% Cell type:code id:01de1b27 tags:parameters
``` python ``` python
# Tagged with 'parameters' # Tagged with 'parameters'
from definitions import BASE_PATH from definitions import BASE_PATH
INPUT_PATHS: dict[str, str] = { INPUT_PATHS: dict[str, str] = {
"features": (BASE_PATH / "tmp" / "4_split" / "input" / "features.csv").__str__() "features": (BASE_PATH / "tmp" / "4_split" / "input" / "features.csv").__str__()
} }
OUTPUT_PATHS: dict[str, str] = { OUTPUT_PATHS: dict[str, str] = {
"split": (BASE_PATH / "tmp" / "4_split" / "output" / "split.csv").__str__() "split": (BASE_PATH / "tmp" / "4_split" / "output" / "split.csv").__str__()
} }
``` ```
%% Cell type:code id:d8169758 tags:injected-parameters %% Cell type:code id:7e3072e3 tags:injected-parameters
``` python ``` python
# Parameters # Parameters
INPUT_PATHS = { INPUT_PATHS = {
"aggregated_features": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/input/features.csv" "aggregated_features": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/input/features.csv"
} }
OUTPUT_PATHS = { OUTPUT_PATHS = {
"split": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv" "split": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv"
} }
``` ```
%% Cell type:code id:a4cc6800 tags: %% Cell type:code id:a4cc6800 tags:
``` python ``` python
# INPUT # INPUT
for path in INPUT_PATHS.values(): for path in INPUT_PATHS.values():
assert Path(path).exists() assert Path(path).exists()
features = pd.read_csv(INPUT_PATHS["aggregated_features"]) features = pd.read_csv(INPUT_PATHS["aggregated_features"])
``` ```
%% Cell type:code id:a186d0c4 tags: %% Cell type:code id:a186d0c4 tags:
``` python ``` python
train = features.sample(frac=0.8).sort_index() train = features.sample(frac=0.8).sort_index()
test = features.drop(train.index) test = features.drop(train.index)
split_true = pd.DataFrame({ split_true = pd.DataFrame({
"filename": train.filename, "filename": train.filename,
"train": True "train": True
}) })
split_false = pd.DataFrame({ split_false = pd.DataFrame({
"filename": test.filename, "filename": test.filename,
"train": False "train": False
}) })
split_concat = pd.concat([split_true, split_false])\ split_concat = pd.concat([split_true, split_false])\
.sort_values("filename")\ .sort_values("filename")\
.reset_index(drop=True) .reset_index(drop=True)
``` ```
%% Cell type:code id:091e0641 tags: %% Cell type:code id:091e0641 tags:
``` python ``` python
split_concat split_concat
``` ```
%% Output %% Output
filename train filename train
0 classical_1.mp3 False 0 classical_1.mp3 True
1 classical_10.mp3 True 1 classical_10.mp3 False
2 classical_100.mp3 False 2 classical_100.mp3 True
3 classical_11.mp3 True 3 classical_11.mp3 True
4 classical_12.mp3 True 4 classical_12.mp3 True
.. ... ... .. ... ...
395 rock_95.mp3 True 395 rock_95.mp3 True
396 rock_96.mp3 True 396 rock_96.mp3 True
397 rock_97.mp3 True 397 rock_97.mp3 True
398 rock_98.mp3 True 398 rock_98.mp3 False
399 rock_99.mp3 True 399 rock_99.mp3 True
[400 rows x 2 columns] [400 rows x 2 columns]
%% Cell type:code id:7b11b8bb tags: %% Cell type:code id:7b11b8bb tags:
``` python ``` python
# output # output
OUTPUT_PATH = Path(OUTPUT_PATHS["split"]) OUTPUT_PATH = Path(OUTPUT_PATHS["split"])
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
output = split_concat output = split_concat
output.to_csv(OUTPUT_PATH, index=False) output.to_csv(OUTPUT_PATH, index=False)
``` ```
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment