***
Wartungsfenster jeden ersten Mittwoch vormittag im Monat
***
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
fairnb
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Weise, Martin
fairnb
Commits
246750b8
Unverified
Commit
246750b8
authored
1 year ago
by
Mahler, Lukas
Browse files
Options
Downloads
Patches
Plain Diff
commit from pc
parent
abb4a024
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!3
improve fairnb and rework notebooks
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
fairnb/api/dbrepo.py
+2
-2
2 additions, 2 deletions
fairnb/api/dbrepo.py
notebooks/1_audio_files.ipynb
+49
-49
49 additions, 49 deletions
notebooks/1_audio_files.ipynb
notebooks/4_split.ipynb
+64
-64
64 additions, 64 deletions
notebooks/4_split.ipynb
notebooks/main.ipynb
+220
-120
220 additions, 120 deletions
notebooks/main.ipynb
with
335 additions
and
235 deletions
fairnb/api/dbrepo.py
+
2
−
2
View file @
246750b8
...
@@ -248,8 +248,8 @@ class DBRepoConnector:
...
@@ -248,8 +248,8 @@ class DBRepoConnector:
chunk_size
=
CHUNK_SIZE
,
chunk_size
=
CHUNK_SIZE
,
)
)
upload_url
=
uploader
.
create_url
()
upload_url
=
uploader
.
create_url
()
.
replace
(
'
http
'
,
'
https
'
)
uploader
.
set_url
(
upload_url
.
replace
(
'
http
'
,
'
https
'
)
)
# FIX: wrong location response
uploader
.
set_url
(
upload_url
)
# FIX: wrong location response
uploader
.
upload
()
uploader
.
upload
()
response_upload_import
=
requests
.
post
(
response_upload_import
=
requests
.
post
(
...
...
This diff is collapsed.
Click to expand it.
notebooks/1_audio_files.ipynb
+
49
−
49
View file @
246750b8
...
@@ -9,10 +9,10 @@
...
@@ -9,10 +9,10 @@
"outputs_hidden": false
"outputs_hidden": false
},
},
"papermill": {
"papermill": {
"duration": 0.00
365
,
"duration": 0.00
2251
,
"end_time": "2023-10-1
0T20:19:34.354097
",
"end_time": "2023-10-1
2T15:02:44.784634
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:19:34.350447
",
"start_time": "2023-10-1
2T15:02:44.782383
",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -30,19 +30,19 @@
...
@@ -30,19 +30,19 @@
"metadata": {
"metadata": {
"collapsed": false,
"collapsed": false,
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:19:34.369926
Z",
"iopub.execute_input": "2023-10-1
2T15:02:44.793587
Z",
"iopub.status.busy": "2023-10-1
0T20:19:34.368623
Z",
"iopub.status.busy": "2023-10-1
2T15:02:44.792076
Z",
"iopub.status.idle": "2023-10-1
0T20:19:34.39468
8Z",
"iopub.status.idle": "2023-10-1
2T15:02:44.80512
8Z",
"shell.execute_reply": "2023-10-1
0T20:19:34.393354
Z"
"shell.execute_reply": "2023-10-1
2T15:02:44.804326
Z"
},
},
"jupyter": {
"jupyter": {
"outputs_hidden": false
"outputs_hidden": false
},
},
"papermill": {
"papermill": {
"duration": 0.0
37909
,
"duration": 0.0
19647
,
"end_time": "2023-10-1
0T20:19:34.398270
",
"end_time": "2023-10-1
2T15:02:44.806758
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:19:34.36036
1",
"start_time": "2023-10-1
2T15:02:44.78711
1",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -62,16 +62,16 @@
...
@@ -62,16 +62,16 @@
"id": "1b4e6b01",
"id": "1b4e6b01",
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:19:34.412182
Z",
"iopub.execute_input": "2023-10-1
2T15:02:44.812055
Z",
"iopub.status.busy": "2023-10-1
0T20:19:34.410878
Z",
"iopub.status.busy": "2023-10-1
2T15:02:44.811746
Z",
"iopub.status.idle": "2023-10-1
0T20:19:34.418072
Z",
"iopub.status.idle": "2023-10-1
2T15:02:44.816038
Z",
"shell.execute_reply": "2023-10-1
0T20:19:34.41676
2Z"
"shell.execute_reply": "2023-10-1
2T15:02:44.81487
2Z"
},
},
"papermill": {
"papermill": {
"duration": 0.0
178
,
"duration": 0.0
09142
,
"end_time": "2023-10-1
0T20:19:34.421245
",
"end_time": "2023-10-1
2T15:02:44.817858
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:19:34.403445
",
"start_time": "2023-10-1
2T15:02:44.808716
",
"status": "completed"
"status": "completed"
},
},
"tags": [
"tags": [
...
@@ -90,19 +90,19 @@
...
@@ -90,19 +90,19 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 3,
"execution_count": 3,
"id": "
a0c3731f
",
"id": "
24969e80
",
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:19:34.432077
Z",
"iopub.execute_input": "2023-10-1
2T15:02:44.828671
Z",
"iopub.status.busy": "2023-10-1
0T20:19:34.431120
Z",
"iopub.status.busy": "2023-10-1
2T15:02:44.828439
Z",
"iopub.status.idle": "2023-10-1
0T20:19:34.436917
Z",
"iopub.status.idle": "2023-10-1
2T15:02:44.832839
Z",
"shell.execute_reply": "2023-10-1
0T20:19:34.435800
Z"
"shell.execute_reply": "2023-10-1
2T15:02:44.831693
Z"
},
},
"papermill": {
"papermill": {
"duration": 0.01
4193
,
"duration": 0.01
3697
,
"end_time": "2023-10-1
0T20:19:34.439709
",
"end_time": "2023-10-1
2T15:02:44.836265
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:19:34.425516
",
"start_time": "2023-10-1
2T15:02:44.822568
",
"status": "completed"
"status": "completed"
},
},
"tags": [
"tags": [
...
@@ -125,19 +125,19 @@
...
@@ -125,19 +125,19 @@
"metadata": {
"metadata": {
"collapsed": false,
"collapsed": false,
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:19:34.446770
Z",
"iopub.execute_input": "2023-10-1
2T15:02:44.845046
Z",
"iopub.status.busy": "2023-10-1
0T20:19:3
4.44
642
6Z",
"iopub.status.busy": "2023-10-1
2T15:02:4
4.
8
44
71
6Z",
"iopub.status.idle": "2023-10-1
0T20:19:36.570981
Z",
"iopub.status.idle": "2023-10-1
2T15:02:47.414624
Z",
"shell.execute_reply": "2023-10-1
0T20:19:36.57021
7Z"
"shell.execute_reply": "2023-10-1
2T15:02:47.41394
7Z"
},
},
"jupyter": {
"jupyter": {
"outputs_hidden": false
"outputs_hidden": false
},
},
"papermill": {
"papermill": {
"duration": 2.
131337
,
"duration": 2.
57669
,
"end_time": "2023-10-1
0T20:19:36.573889
",
"end_time": "2023-10-1
2T15:02:47.417300
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:19:34.442552
",
"start_time": "2023-10-1
2T15:02:44.840610
",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -161,19 +161,19 @@
...
@@ -161,19 +161,19 @@
"metadata": {
"metadata": {
"collapsed": false,
"collapsed": false,
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:19:36.590650
Z",
"iopub.execute_input": "2023-10-1
2T15:02:47.424387
Z",
"iopub.status.busy": "2023-10-1
0T20:19:36.590408
Z",
"iopub.status.busy": "2023-10-1
2T15:02:47.423773
Z",
"iopub.status.idle": "2023-10-1
0T20:19:37.253257
Z",
"iopub.status.idle": "2023-10-1
2T15:02:47.964668
Z",
"shell.execute_reply": "2023-10-1
0T20:19:37.252729
Z"
"shell.execute_reply": "2023-10-1
2T15:02:47.963816
Z"
},
},
"jupyter": {
"jupyter": {
"outputs_hidden": false
"outputs_hidden": false
},
},
"papermill": {
"papermill": {
"duration": 0.
673
08
3
,
"duration": 0.
5471
08,
"end_time": "2023-10-1
0T20:19:37.25479
3",
"end_time": "2023-10-1
2T15:02:47.96743
3",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:19:36.581710
",
"start_time": "2023-10-1
2T15:02:47.420325
",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -195,19 +195,19 @@
...
@@ -195,19 +195,19 @@
"metadata": {
"metadata": {
"collapsed": false,
"collapsed": false,
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:19:37.268248
Z",
"iopub.execute_input": "2023-10-1
2T15:02:47.977304
Z",
"iopub.status.busy": "2023-10-1
0T20:19:37.267971
Z",
"iopub.status.busy": "2023-10-1
2T15:02:47.976955
Z",
"iopub.status.idle": "2023-10-1
0T20:19:50.606898
Z",
"iopub.status.idle": "2023-10-1
2T15:03:01.218667
Z",
"shell.execute_reply": "2023-10-1
0T20:19:50.606324
Z"
"shell.execute_reply": "2023-10-1
2T15:03:01.218013
Z"
},
},
"jupyter": {
"jupyter": {
"outputs_hidden": false
"outputs_hidden": false
},
},
"papermill": {
"papermill": {
"duration": 13.
3
471
22
,
"duration": 13.
2
471
33
,
"end_time": "2023-10-1
0T20:19:50.608576
",
"end_time": "2023-10-1
2T15:03:01.220388
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:19:37.261454
",
"start_time": "2023-10-1
2T15:02:47.973255
",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -242,8 +242,8 @@
...
@@ -242,8 +242,8 @@
},
},
"papermill": {
"papermill": {
"default_parameters": {},
"default_parameters": {},
"duration": 17.
279795
,
"duration": 17.
446157
,
"end_time": "2023-10-1
0T20:19:50.829787
",
"end_time": "2023-10-1
2T15:03:01.439421
",
"environment_variables": {},
"environment_variables": {},
"exception": null,
"exception": null,
"input_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/1_audio_files.ipynb",
"input_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/1_audio_files.ipynb",
...
@@ -254,7 +254,7 @@
...
@@ -254,7 +254,7 @@
"audio_tar": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/1_audio_files/output/emotifymusic.tar.gz"
"audio_tar": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/1_audio_files/output/emotifymusic.tar.gz"
}
}
},
},
"start_time": "2023-10-1
0T20:19:33.549992
",
"start_time": "2023-10-1
2T15:02:43.993264
",
"version": "2.4.0"
"version": "2.4.0"
}
}
},
},
...
...
%% Cell type:markdown id:4389a8092677254e tags:
%% Cell type:markdown id:4389a8092677254e tags:
# Audio Files
# Audio Files
Bundle the provided audio files (400, in MP3) in a tar, encrypt it using gzip and store it in the output folder.
Bundle the provided audio files (400, in MP3) in a tar, encrypt it using gzip and store it in the output folder.
%% Cell type:code id:87ab37c6 tags:
%% Cell type:code id:87ab37c6 tags:
```
python
```
python
from
definitions
import
BASE_PATH
from
definitions
import
BASE_PATH
import
tarfile
import
tarfile
import
zipfile
import
zipfile
import
os
import
os
from
pathlib
import
Path
from
pathlib
import
Path
```
```
%% Cell type:code id:1b4e6b01 tags:parameters
%% Cell type:code id:1b4e6b01 tags:parameters
```
python
```
python
# Parameters
# Parameters
INPUT_PATHS
=
{}
INPUT_PATHS
=
{}
OUTPUT_PATHS
=
{
OUTPUT_PATHS
=
{
"
audio_tar
"
:
str
(
BASE_PATH
/
"
tmp/1_audio_files/output/emotifymusic.tar.gz
"
)
"
audio_tar
"
:
str
(
BASE_PATH
/
"
tmp/1_audio_files/output/emotifymusic.tar.gz
"
)
}
}
```
```
%% Cell type:code id:
a0c3731f
tags:injected-parameters
%% Cell type:code id:
24969e80
tags:injected-parameters
```
python
```
python
# Parameters
# Parameters
INPUT_PATHS
=
{}
INPUT_PATHS
=
{}
OUTPUT_PATHS
=
{
OUTPUT_PATHS
=
{
"
audio_tar
"
:
"
/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/1_audio_files/output/emotifymusic.tar.gz
"
"
audio_tar
"
:
"
/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/1_audio_files/output/emotifymusic.tar.gz
"
}
}
```
```
%% Cell type:code id:1e487573 tags:
%% Cell type:code id:1e487573 tags:
```
python
```
python
# load provided files
# load provided files
zip_path
=
BASE_PATH
/
"
resource
"
/
"
1_audio_files
"
/
"
emotifymusic.zip
"
zip_path
=
BASE_PATH
/
"
resource
"
/
"
1_audio_files
"
/
"
emotifymusic.zip
"
dir_path
=
BASE_PATH
/
"
tmp
"
/
"
1_audio_files
"
/
"
music
"
dir_path
=
BASE_PATH
/
"
tmp
"
/
"
1_audio_files
"
/
"
music
"
dir_path
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
dir_path
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
# unzip to dir_path
# unzip to dir_path
with
zipfile
.
ZipFile
(
zip_path
,
"
r
"
)
as
zfile
:
with
zipfile
.
ZipFile
(
zip_path
,
"
r
"
)
as
zfile
:
zfile
.
extractall
(
path
=
dir_path
)
zfile
.
extractall
(
path
=
dir_path
)
```
```
%% Cell type:code id:c3193f35 tags:
%% Cell type:code id:c3193f35 tags:
```
python
```
python
file_paths
=
list
(
dir_path
.
rglob
(
'
**/*.*
'
))
file_paths
=
list
(
dir_path
.
rglob
(
'
**/*.*
'
))
flattened_dir_path
=
BASE_PATH
/
"
tmp
"
/
"
1_audio_files
"
/
"
flattened
"
flattened_dir_path
=
BASE_PATH
/
"
tmp
"
/
"
1_audio_files
"
/
"
flattened
"
flattened_dir_path
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
flattened_dir_path
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
for
path
in
file_paths
:
for
path
in
file_paths
:
(
flattened_dir_path
/
path
.
relative_to
(
dir_path
).
as_posix
().
replace
(
'
/
'
,
'
_
'
)).
write_bytes
(
path
.
read_bytes
())
(
flattened_dir_path
/
path
.
relative_to
(
dir_path
).
as_posix
().
replace
(
'
/
'
,
'
_
'
)).
write_bytes
(
path
.
read_bytes
())
```
```
%% Cell type:code id:3272ea2b tags:
%% Cell type:code id:3272ea2b tags:
```
python
```
python
tar_path
=
Path
(
OUTPUT_PATHS
[
"
audio_tar
"
])
tar_path
=
Path
(
OUTPUT_PATHS
[
"
audio_tar
"
])
tar_path
.
parent
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
tar_path
.
parent
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
with
tarfile
.
open
(
tar_path
,
"
w:gz
"
)
as
file
:
with
tarfile
.
open
(
tar_path
,
"
w:gz
"
)
as
file
:
file
.
add
(
flattened_dir_path
,
arcname
=
os
.
path
.
sep
)
file
.
add
(
flattened_dir_path
,
arcname
=
os
.
path
.
sep
)
```
```
...
...
This diff is collapsed.
Click to expand it.
notebooks/4_split.ipynb
+
64
−
64
View file @
246750b8
...
@@ -5,10 +5,10 @@
...
@@ -5,10 +5,10 @@
"id": "e92b4fe9",
"id": "e92b4fe9",
"metadata": {
"metadata": {
"papermill": {
"papermill": {
"duration": 0.005
82
2,
"duration": 0.0
8
05
1
2,
"end_time": "2023-10-1
0T20:29:52.589509
",
"end_time": "2023-10-1
2T15:14:55.816491
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:29:52.583687
",
"start_time": "2023-10-1
2T15:14:55.735979
",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -23,16 +23,16 @@
...
@@ -23,16 +23,16 @@
"id": "5f1fae44",
"id": "5f1fae44",
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:29:52.604063
Z",
"iopub.execute_input": "2023-10-1
2T15:14:55.891297
Z",
"iopub.status.busy": "2023-10-1
0T20:29:52.602712
Z",
"iopub.status.busy": "2023-10-1
2T15:14:55.890921
Z",
"iopub.status.idle": "2023-10-1
0T20:29:52.903037
Z",
"iopub.status.idle": "2023-10-1
2T15:14:56.150268
Z",
"shell.execute_reply": "2023-10-1
0T20:29:52.902341
Z"
"shell.execute_reply": "2023-10-1
2T15:14:56.149700
Z"
},
},
"papermill": {
"papermill": {
"duration": 0.3
10276
,
"duration": 0.3
03163
,
"end_time": "2023-10-1
0T20:29:52.905670
",
"end_time": "2023-10-1
2T15:14:56.153825
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:29:52.595394
",
"start_time": "2023-10-1
2T15:14:55.850662
",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -51,16 +51,16 @@
...
@@ -51,16 +51,16 @@
"metadata": {
"metadata": {
"collapsed": false,
"collapsed": false,
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:29:52.911502
Z",
"iopub.execute_input": "2023-10-1
2T15:14:56.221857
Z",
"iopub.status.busy": "2023-10-1
0T20:29:52.911091
Z",
"iopub.status.busy": "2023-10-1
2T15:14:56.221487
Z",
"iopub.status.idle": "2023-10-1
0T20:29:52.91596
7Z",
"iopub.status.idle": "2023-10-1
2T15:14:56.22567
7Z",
"shell.execute_reply": "2023-10-1
0T20:29:52.915019
Z"
"shell.execute_reply": "2023-10-1
2T15:14:56.225035
Z"
},
},
"papermill": {
"papermill": {
"duration": 0.0
09356
,
"duration": 0.0
42817
,
"end_time": "2023-10-1
0T20:29:52.9
173
8
3",
"end_time": "2023-10-1
2T15:14:56.23
1733",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:29:52.908027
",
"start_time": "2023-10-1
2T15:14:56.188916
",
"status": "completed"
"status": "completed"
},
},
"tags": [
"tags": [
...
@@ -83,19 +83,19 @@
...
@@ -83,19 +83,19 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 3,
"execution_count": 3,
"id": "
d8169758
",
"id": "
7e3072e3
",
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:29:52.92221
8Z",
"iopub.execute_input": "2023-10-1
2T15:14:56.30085
8Z",
"iopub.status.busy": "2023-10-1
0T20:29:52.921930
Z",
"iopub.status.busy": "2023-10-1
2T15:14:56.300376
Z",
"iopub.status.idle": "2023-10-1
0T20:29:52.925542
Z",
"iopub.status.idle": "2023-10-1
2T15:14:56.304010
Z",
"shell.execute_reply": "2023-10-1
0T20:29:52.924834
Z"
"shell.execute_reply": "2023-10-1
2T15:14:56.303352
Z"
},
},
"papermill": {
"papermill": {
"duration": 0.0
07
457,
"duration": 0.04
2
57
6
,
"end_time": "2023-10-1
0T20:29:52.926785
",
"end_time": "2023-10-1
2T15:14:56.308986
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:29:52.919328
",
"start_time": "2023-10-1
2T15:14:56.266410
",
"status": "completed"
"status": "completed"
},
},
"tags": [
"tags": [
...
@@ -119,16 +119,16 @@
...
@@ -119,16 +119,16 @@
"id": "a4cc6800",
"id": "a4cc6800",
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:29:52.931298
Z",
"iopub.execute_input": "2023-10-1
2T15:14:56.377079
Z",
"iopub.status.busy": "2023-10-1
0T20:29:52.931087
Z",
"iopub.status.busy": "2023-10-1
2T15:14:56.376735
Z",
"iopub.status.idle": "2023-10-1
0T20:29:52.953647
Z",
"iopub.status.idle": "2023-10-1
2T15:14:56.401195
Z",
"shell.execute_reply": "2023-10-1
0T20:29:52.952884
Z"
"shell.execute_reply": "2023-10-1
2T15:14:56.400641
Z"
},
},
"papermill": {
"papermill": {
"duration": 0.0
26646
,
"duration": 0.0
62802
,
"end_time": "2023-10-1
0T20:29:52.955241
",
"end_time": "2023-10-1
2T15:14:56.405085
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:29:52.928595
",
"start_time": "2023-10-1
2T15:14:56.342283
",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -149,16 +149,16 @@
...
@@ -149,16 +149,16 @@
"id": "a186d0c4",
"id": "a186d0c4",
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:29:52.960691
Z",
"iopub.execute_input": "2023-10-1
2T15:14:56.469799
Z",
"iopub.status.busy": "2023-10-1
0T20:29:52.960246
Z",
"iopub.status.busy": "2023-10-1
2T15:14:56.469259
Z",
"iopub.status.idle": "2023-10-1
0T20:29:52.967937
Z",
"iopub.status.idle": "2023-10-1
2T15:14:56.477683
Z",
"shell.execute_reply": "2023-10-1
0T20:29:52.967204
Z"
"shell.execute_reply": "2023-10-1
2T15:14:56.477139
Z"
},
},
"papermill": {
"papermill": {
"duration": 0.0
11851
,
"duration": 0.0
45526
,
"end_time": "2023-10-1
0T20:29:52.969363
",
"end_time": "2023-10-1
2T15:14:56.482599
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:29:52.957512
",
"start_time": "2023-10-1
2T15:14:56.437073
",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -188,16 +188,16 @@
...
@@ -188,16 +188,16 @@
"id": "091e0641",
"id": "091e0641",
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:29:52.976324
Z",
"iopub.execute_input": "2023-10-1
2T15:14:56.553531
Z",
"iopub.status.busy": "2023-10-1
0T20:29:52.975376
Z",
"iopub.status.busy": "2023-10-1
2T15:14:56.553258
Z",
"iopub.status.idle": "2023-10-1
0T20:29:52.997
93
5
Z",
"iopub.status.idle": "2023-10-1
2T15:14:56.5705
93Z",
"shell.execute_reply": "2023-10-1
0T20:29:52.996626
Z"
"shell.execute_reply": "2023-10-1
2T15:14:56.569992
Z"
},
},
"papermill": {
"papermill": {
"duration": 0.0
28011
,
"duration": 0.0
58756
,
"end_time": "2023-10-1
0T20:29:52.999430
",
"end_time": "2023-10-1
2T15:14:56.577017
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:29:52.971419
",
"start_time": "2023-10-1
2T15:14:56.518261
",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -232,17 +232,17 @@
...
@@ -232,17 +232,17 @@
" <tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <th>0</th>\n",
" <td>classical_1.mp3</td>\n",
" <td>classical_1.mp3</td>\n",
" <td>
Fals
e</td>\n",
" <td>
Tru
e</td>\n",
" </tr>\n",
" </tr>\n",
" <tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>1</th>\n",
" <td>classical_10.mp3</td>\n",
" <td>classical_10.mp3</td>\n",
" <td>
Tru
e</td>\n",
" <td>
Fals
e</td>\n",
" </tr>\n",
" </tr>\n",
" <tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>2</th>\n",
" <td>classical_100.mp3</td>\n",
" <td>classical_100.mp3</td>\n",
" <td>
Fals
e</td>\n",
" <td>
Tru
e</td>\n",
" </tr>\n",
" </tr>\n",
" <tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>3</th>\n",
...
@@ -277,7 +277,7 @@
...
@@ -277,7 +277,7 @@
" <tr>\n",
" <tr>\n",
" <th>398</th>\n",
" <th>398</th>\n",
" <td>rock_98.mp3</td>\n",
" <td>rock_98.mp3</td>\n",
" <td>
Tru
e</td>\n",
" <td>
Fals
e</td>\n",
" </tr>\n",
" </tr>\n",
" <tr>\n",
" <tr>\n",
" <th>399</th>\n",
" <th>399</th>\n",
...
@@ -291,16 +291,16 @@
...
@@ -291,16 +291,16 @@
],
],
"text/plain": [
"text/plain": [
" filename train\n",
" filename train\n",
"0 classical_1.mp3
Fals
e\n",
"0 classical_1.mp3
Tru
e\n",
"1 classical_10.mp3
Tru
e\n",
"1 classical_10.mp3
Fals
e\n",
"2 classical_100.mp3
Fals
e\n",
"2 classical_100.mp3
Tru
e\n",
"3 classical_11.mp3 True\n",
"3 classical_11.mp3 True\n",
"4 classical_12.mp3 True\n",
"4 classical_12.mp3 True\n",
".. ... ...\n",
".. ... ...\n",
"395 rock_95.mp3 True\n",
"395 rock_95.mp3 True\n",
"396 rock_96.mp3 True\n",
"396 rock_96.mp3 True\n",
"397 rock_97.mp3 True\n",
"397 rock_97.mp3 True\n",
"398 rock_98.mp3
Tru
e\n",
"398 rock_98.mp3
Fals
e\n",
"399 rock_99.mp3 True\n",
"399 rock_99.mp3 True\n",
"\n",
"\n",
"[400 rows x 2 columns]"
"[400 rows x 2 columns]"
...
@@ -321,16 +321,16 @@
...
@@ -321,16 +321,16 @@
"id": "7b11b8bb",
"id": "7b11b8bb",
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2023-10-1
0T20:29:53.004736
Z",
"iopub.execute_input": "2023-10-1
2T15:14:56.663525
Z",
"iopub.status.busy": "2023-10-1
0T20:29:53.004384
Z",
"iopub.status.busy": "2023-10-1
2T15:14:56.662876
Z",
"iopub.status.idle": "2023-10-1
0T20:29:53.012049
Z",
"iopub.status.idle": "2023-10-1
2T15:14:56.669457
Z",
"shell.execute_reply": "2023-10-1
0T20:29:53.01085
5Z"
"shell.execute_reply": "2023-10-1
2T15:14:56.66868
5Z"
},
},
"papermill": {
"papermill": {
"duration": 0.0
12188
,
"duration": 0.0
57933
,
"end_time": "2023-10-1
0T20:29:53.013673
",
"end_time": "2023-10-1
2T15:14:56.676546
",
"exception": false,
"exception": false,
"start_time": "2023-10-1
0T20:29:53.001485
",
"start_time": "2023-10-1
2T15:14:56.618613
",
"status": "completed"
"status": "completed"
},
},
"tags": []
"tags": []
...
@@ -367,8 +367,8 @@
...
@@ -367,8 +367,8 @@
},
},
"papermill": {
"papermill": {
"default_parameters": {},
"default_parameters": {},
"duration":
1.731275
,
"duration":
2.277047
,
"end_time": "2023-10-1
0T20:29:53.335011
",
"end_time": "2023-10-1
2T15:14:57.042208
",
"environment_variables": {},
"environment_variables": {},
"exception": null,
"exception": null,
"input_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/4_split.ipynb",
"input_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/4_split.ipynb",
...
@@ -381,7 +381,7 @@
...
@@ -381,7 +381,7 @@
"split": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv"
"split": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv"
}
}
},
},
"start_time": "2023-10-1
0T20:29:51.603736
",
"start_time": "2023-10-1
2T15:14:54.765161
",
"version": "2.4.0"
"version": "2.4.0"
}
}
},
},
...
...
%% Cell type:markdown id:e92b4fe9 tags:
%% Cell type:markdown id:e92b4fe9 tags:
# Split the Features into Train and Test Set
# Split the Features into Train and Test Set
%% Cell type:code id:5f1fae44 tags:
%% Cell type:code id:5f1fae44 tags:
```
python
```
python
import
pandas
as
pd
import
pandas
as
pd
from
pathlib
import
Path
from
pathlib
import
Path
from
definitions
import
BASE_PATH
from
definitions
import
BASE_PATH
```
```
%% Cell type:code id:01de1b27 tags:parameters
%% Cell type:code id:01de1b27 tags:parameters
```
python
```
python
# Tagged with 'parameters'
# Tagged with 'parameters'
from
definitions
import
BASE_PATH
from
definitions
import
BASE_PATH
INPUT_PATHS
:
dict
[
str
,
str
]
=
{
INPUT_PATHS
:
dict
[
str
,
str
]
=
{
"
features
"
:
(
BASE_PATH
/
"
tmp
"
/
"
4_split
"
/
"
input
"
/
"
features.csv
"
).
__str__
()
"
features
"
:
(
BASE_PATH
/
"
tmp
"
/
"
4_split
"
/
"
input
"
/
"
features.csv
"
).
__str__
()
}
}
OUTPUT_PATHS
:
dict
[
str
,
str
]
=
{
OUTPUT_PATHS
:
dict
[
str
,
str
]
=
{
"
split
"
:
(
BASE_PATH
/
"
tmp
"
/
"
4_split
"
/
"
output
"
/
"
split.csv
"
).
__str__
()
"
split
"
:
(
BASE_PATH
/
"
tmp
"
/
"
4_split
"
/
"
output
"
/
"
split.csv
"
).
__str__
()
}
}
```
```
%% Cell type:code id:
d8169758
tags:injected-parameters
%% Cell type:code id:
7e3072e3
tags:injected-parameters
```
python
```
python
# Parameters
# Parameters
INPUT_PATHS
=
{
INPUT_PATHS
=
{
"
aggregated_features
"
:
"
/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/input/features.csv
"
"
aggregated_features
"
:
"
/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/input/features.csv
"
}
}
OUTPUT_PATHS
=
{
OUTPUT_PATHS
=
{
"
split
"
:
"
/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv
"
"
split
"
:
"
/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv
"
}
}
```
```
%% Cell type:code id:a4cc6800 tags:
%% Cell type:code id:a4cc6800 tags:
```
python
```
python
# INPUT
# INPUT
for
path
in
INPUT_PATHS
.
values
():
for
path
in
INPUT_PATHS
.
values
():
assert
Path
(
path
).
exists
()
assert
Path
(
path
).
exists
()
features
=
pd
.
read_csv
(
INPUT_PATHS
[
"
aggregated_features
"
])
features
=
pd
.
read_csv
(
INPUT_PATHS
[
"
aggregated_features
"
])
```
```
%% Cell type:code id:a186d0c4 tags:
%% Cell type:code id:a186d0c4 tags:
```
python
```
python
train
=
features
.
sample
(
frac
=
0.8
).
sort_index
()
train
=
features
.
sample
(
frac
=
0.8
).
sort_index
()
test
=
features
.
drop
(
train
.
index
)
test
=
features
.
drop
(
train
.
index
)
split_true
=
pd
.
DataFrame
({
split_true
=
pd
.
DataFrame
({
"
filename
"
:
train
.
filename
,
"
filename
"
:
train
.
filename
,
"
train
"
:
True
"
train
"
:
True
})
})
split_false
=
pd
.
DataFrame
({
split_false
=
pd
.
DataFrame
({
"
filename
"
:
test
.
filename
,
"
filename
"
:
test
.
filename
,
"
train
"
:
False
"
train
"
:
False
})
})
split_concat
=
pd
.
concat
([
split_true
,
split_false
])
\
split_concat
=
pd
.
concat
([
split_true
,
split_false
])
\
.
sort_values
(
"
filename
"
)
\
.
sort_values
(
"
filename
"
)
\
.
reset_index
(
drop
=
True
)
.
reset_index
(
drop
=
True
)
```
```
%% Cell type:code id:091e0641 tags:
%% Cell type:code id:091e0641 tags:
```
python
```
python
split_concat
split_concat
```
```
%% Output
%% Output
filename train
filename train
0 classical_1.mp3
Fals
e
0 classical_1.mp3
Tru
e
1 classical_10.mp3
Tru
e
1 classical_10.mp3
Fals
e
2 classical_100.mp3
Fals
e
2 classical_100.mp3
Tru
e
3 classical_11.mp3 True
3 classical_11.mp3 True
4 classical_12.mp3 True
4 classical_12.mp3 True
.. ... ...
.. ... ...
395 rock_95.mp3 True
395 rock_95.mp3 True
396 rock_96.mp3 True
396 rock_96.mp3 True
397 rock_97.mp3 True
397 rock_97.mp3 True
398 rock_98.mp3
Tru
e
398 rock_98.mp3
Fals
e
399 rock_99.mp3 True
399 rock_99.mp3 True
[400 rows x 2 columns]
[400 rows x 2 columns]
%% Cell type:code id:7b11b8bb tags:
%% Cell type:code id:7b11b8bb tags:
```
python
```
python
# output
# output
OUTPUT_PATH
=
Path
(
OUTPUT_PATHS
[
"
split
"
])
OUTPUT_PATH
=
Path
(
OUTPUT_PATHS
[
"
split
"
])
OUTPUT_PATH
.
parent
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
OUTPUT_PATH
.
parent
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
output
=
split_concat
output
=
split_concat
output
.
to_csv
(
OUTPUT_PATH
,
index
=
False
)
output
.
to_csv
(
OUTPUT_PATH
,
index
=
False
)
```
```
...
...
This diff is collapsed.
Click to expand it.
notebooks/main.ipynb
+
220
−
120
View file @
246750b8
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment