Newer
Older
"id": "f48a4573",
"metadata": {
"papermill": {
"duration": 0.006395,
"end_time": "2023-08-21T16:05:27.959997",
"start_time": "2023-08-21T16:05:27.953602",
"status": "completed"
},
"tags": []
},
"source": [
"# Aggregate MFCC Features\n",
"\n",
"Aggregate from n rows par file to 1 (calculate min, max, etc. for each feature)."
"ExecuteTime": {
"end_time": "2023-08-14T15:32:41.535589478Z",
"start_time": "2023-08-14T15:32:40.986222405Z"
},
"collapsed": true,
"execution": {
"iopub.execute_input": "2023-08-21T16:05:27.992117Z",
"iopub.status.busy": "2023-08-21T16:05:27.991058Z",
"iopub.status.idle": "2023-08-21T16:05:29.085230Z",
"shell.execute_reply": "2023-08-21T16:05:29.083759Z"
"duration": 1.113632,
"end_time": "2023-08-21T16:05:29.090761",
"start_time": "2023-08-21T16:05:27.977129",
"status": "completed"
},
"tags": []
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"from definitions import BASE_PATH"
]
},
"execution_count": 2,
"id": "26f640e0",
"iopub.execute_input": "2023-08-21T16:05:29.116428Z",
"iopub.status.busy": "2023-08-21T16:05:29.114006Z",
"iopub.status.idle": "2023-08-21T16:05:29.126919Z",
"shell.execute_reply": "2023-08-21T16:05:29.124758Z"
"duration": 0.028512,
"end_time": "2023-08-21T16:05:29.131713",
"start_time": "2023-08-21T16:05:29.103201",
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"# INPUT_PATH = BASE_PATH / \"tmp\" / \"3_aggregate_features\" / \"input\"\n",
"# OUTPUT_PATH = BASE_PATH / \"tmp\" / \"3_aggregate_features\" / \"output\"\n",
"#\n",
"# INPUT_PATHS: dict[str, str] = {\n",
"# \"raw_features\": (INPUT_PATH / \"raw_features.csv\").__str__()\n",
"# }\n",
"#\n",
"# OUTPUT_PATHS: dict[str, str] = {\n",
"# \"features\": (OUTPUT_PATH / \"features.csv\").__str__()\n",
"# }\n",
"INPUT_PATHS: dict[str, str] = {}\n",
"OUTPUT_PATHS: dict[str, str] = {}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-21T16:05:29.153037Z",
"iopub.status.busy": "2023-08-21T16:05:29.152027Z",
"iopub.status.idle": "2023-08-21T16:05:29.161973Z",
"shell.execute_reply": "2023-08-21T16:05:29.159663Z"
"duration": 0.025025,
"end_time": "2023-08-21T16:05:29.165408",
"start_time": "2023-08-21T16:05:29.140383",
"status": "completed"
},
"tags": [
"injected-parameters"
]
},
"outputs": [],
"source": [
"# Parameters\n",
"INPUT_PATHS = {\n",
" \"raw_features\": \"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/input/raw_features.csv\"\n",
"OUTPUT_PATHS = {\n",
" \"aggregated_features\": \"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/output/features.csv\"\n",
"}\n"
"execution_count": 4,
"id": "c5d9d980",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-21T16:05:29.183331Z",
"iopub.status.busy": "2023-08-21T16:05:29.181976Z",
"iopub.status.idle": "2023-08-21T16:05:47.896449Z",
"shell.execute_reply": "2023-08-21T16:05:47.894434Z"
"duration": 18.730379,
"end_time": "2023-08-21T16:05:47.901647",
"start_time": "2023-08-21T16:05:29.171268",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# inputs\n",
"raw_features = pd.read_csv(INPUT_PATHS[\"raw_features\"], index_col=False)"
"id": "99f75f47",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-14T16:12:29.198485Z",
"iopub.status.busy": "2023-08-14T16:12:29.197413Z",
"iopub.status.idle": "2023-08-14T16:12:40.483527Z",
"shell.execute_reply": "2023-08-14T16:12:40.482234Z"
},
"papermill": {
"duration": null,
"end_time": null,
"start_time": "2023-08-21T16:05:47.909658",
"status": "running"
"source": [
"meta_columns = [\"sample\", \"filename\", \"label\"]\n",
"mfcc_aggregated = raw_features\\\n",
" .drop(meta_columns, axis=1, errors='ignore')\\\n",
" .groupby(raw_features.filename).agg(['min', 'max', 'mean', 'std', 'skew'])\n",
"\n",
"mfcc_meta = pd.DataFrame(raw_features['label'].groupby(raw_features.filename).last())\n",
"mfcc_meta.columns = pd.MultiIndex.from_arrays([['label'], ['']]) # needed for merge\n",
"mfcc_merged = pd.merge(mfcc_meta, mfcc_aggregated, left_index=True, right_index=True)\n",
"\n",
"# reduce multi index to single index\n",
"one_level_cols = ['_'.join([str(el) for el in col]) for col in mfcc_merged.columns[1:]]\n",
"one_level_cols.insert(0, \"label\")\n",
"\n",
"mfcc_merged.columns = pd.Index(one_level_cols)\n",
"mfcc_merged = mfcc_merged.reset_index()\n",
"mfcc_merged"
"id": "4ac5c765",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-14T16:12:40.510391Z",
"iopub.status.busy": "2023-08-14T16:12:40.509065Z",
"iopub.status.idle": "2023-08-14T16:12:40.758881Z",
"shell.execute_reply": "2023-08-14T16:12:40.757493Z"
},
"papermill": {
"duration": null,
"end_time": null,
"exception": null,
"start_time": null,
"status": "pending"
"outputs": [],
"source": [
"# outputs\n",
"aggregated_features_path = Path(OUTPUT_PATHS[\"aggregated_features\"]).resolve()\n",
"aggregated_features_path.parent.mkdir(parents=True, exist_ok=True)\n",
"output.to_csv(aggregated_features_path, index=False)"
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"version": "3.10.12"
},
"papermill": {
"default_parameters": {},
"duration": null,
"end_time": null,
"environment_variables": {},
"exception": null,
"input_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/3_aggregate_features.ipynb",
"output_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/3_aggregate_features.ipynb",
"parameters": {
"INPUT_PATHS": {
"raw_features": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/input/raw_features.csv"
},
"OUTPUT_PATHS": {
"aggregated_features": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/output/features.csv"
}
},
"start_time": "2023-08-21T16:05:26.283764",