{ "cells": [ { "cell_type": "markdown", "id": "f48a4573", "metadata": { "papermill": { "duration": 0.007574, "end_time": "2024-02-15T15:10:25.602842", "exception": false, "start_time": "2024-02-15T15:10:25.595268", "status": "completed" }, "tags": [] }, "source": [ "# Aggregate MFCC Features\n", "\n", "Aggregate from n rows par file to 1 (calculate min, max, etc. for each feature)." ] }, { "cell_type": "code", "execution_count": 1, "id": "389576b8", "metadata": { "ExecuteTime": { "end_time": "2023-08-14T15:32:41.535589478Z", "start_time": "2023-08-14T15:32:40.986222405Z" }, "collapsed": true, "execution": { "iopub.execute_input": "2024-02-15T15:10:25.622644Z", "iopub.status.busy": "2024-02-15T15:10:25.621412Z", "iopub.status.idle": "2024-02-15T15:10:26.300635Z", "shell.execute_reply": "2024-02-15T15:10:26.298854Z" }, "jupyter": { "outputs_hidden": true }, "papermill": { "duration": 0.697649, "end_time": "2024-02-15T15:10:26.308493", "exception": false, "start_time": "2024-02-15T15:10:25.610844", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "import pandas as pd\n", "from definitions import BASE_PATH" ] }, { "cell_type": "code", "execution_count": 2, "id": "26f640e0", "metadata": { "execution": { "iopub.execute_input": "2024-02-15T15:10:26.329340Z", "iopub.status.busy": "2024-02-15T15:10:26.327934Z", "iopub.status.idle": "2024-02-15T15:10:26.348148Z", "shell.execute_reply": "2024-02-15T15:10:26.345286Z" }, "papermill": { "duration": 0.050433, "end_time": "2024-02-15T15:10:26.366702", "exception": false, "start_time": "2024-02-15T15:10:26.316269", "status": "completed" }, "tags": [ "parameters" ] }, "outputs": [], "source": [ "INPUT_PATH = BASE_PATH / \"tmp\" / \"3_aggregate_features\" / \"input\"\n", "OUTPUT_PATH = BASE_PATH / \"tmp\" / \"3_aggregate_features\" / \"output\"\n", "\n", "INPUT_PATHS: dict[str, str] = {\n", " \"raw_features\": (INPUT_PATH / \"raw_features.csv\").__str__()\n", "}\n", "\n", "OUTPUT_PATHS: dict[str, str] = {\n", " \"features\": (OUTPUT_PATH / \"features.csv\").__str__()\n", "}" ] }, { "cell_type": "code", "execution_count": 3, "id": "88ecee07", "metadata": { "execution": { "iopub.execute_input": "2024-02-15T15:10:26.382035Z", "iopub.status.busy": "2024-02-15T15:10:26.381041Z", "iopub.status.idle": "2024-02-15T15:10:26.389326Z", "shell.execute_reply": "2024-02-15T15:10:26.387547Z" }, "papermill": { "duration": 0.034885, "end_time": "2024-02-15T15:10:26.405941", "exception": false, "start_time": "2024-02-15T15:10:26.371056", "status": "completed" }, "tags": [ "injected-parameters" ] }, "outputs": [], "source": [ "# Parameters\n", "INPUT_PATHS = {\n", " \"raw_features\": \"/home/lukas/Programming/uni/bachelorarbeit/fairnb/tmp/3_aggregate_features/input/raw_features.csv\"\n", "}\n", "OUTPUT_PATHS = {\n", " \"aggregated_features\": \"/home/lukas/Programming/uni/bachelorarbeit/fairnb/tmp/3_aggregate_features/output/features.csv\"\n", "}\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "c5d9d980", "metadata": { "execution": { "iopub.execute_input": "2024-02-15T15:10:26.423067Z", "iopub.status.busy": "2024-02-15T15:10:26.421685Z", "iopub.status.idle": "2024-02-15T15:10:39.968586Z", "shell.execute_reply": "2024-02-15T15:10:39.967418Z" }, "papermill": { "duration": 13.561331, "end_time": "2024-02-15T15:10:39.974046", "exception": false, "start_time": "2024-02-15T15:10:26.412715", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# inputs\n", "raw_features = pd.read_csv(INPUT_PATHS[\"raw_features\"], index_col=False)" ] }, { "cell_type": "code", "execution_count": 5, "id": "99f75f47", "metadata": { "execution": { "iopub.execute_input": "2024-02-15T15:10:39.992721Z", "iopub.status.busy": "2024-02-15T15:10:39.992127Z", "iopub.status.idle": "2024-02-15T15:10:47.425790Z", "shell.execute_reply": "2024-02-15T15:10:47.423657Z" }, "papermill": { "duration": 7.455977, "end_time": "2024-02-15T15:10:47.436642", "exception": false, "start_time": "2024-02-15T15:10:39.980665", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>filename</th>\n", " <th>label</th>\n", " <th>0_min</th>\n", " <th>0_max</th>\n", " <th>0_mean</th>\n", " <th>0_std</th>\n", " <th>0_skew</th>\n", " <th>1_min</th>\n", " <th>1_max</th>\n", " <th>1_mean</th>\n", " <th>...</th>\n", " <th>38_min</th>\n", " <th>38_max</th>\n", " <th>38_mean</th>\n", " <th>38_std</th>\n", " <th>38_skew</th>\n", " <th>39_min</th>\n", " <th>39_max</th>\n", " <th>39_mean</th>\n", " <th>39_std</th>\n", " <th>39_skew</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>classical_1.mp3</td>\n", " <td>classical</td>\n", " <td>-530.78436</td>\n", " <td>-163.308350</td>\n", " <td>-302.203167</td>\n", " <td>51.142183</td>\n", " <td>-0.468374</td>\n", " <td>0.000000</td>\n", " <td>178.75162</td>\n", " <td>111.332342</td>\n", " <td>...</td>\n", " <td>-44.098070</td>\n", " <td>47.308060</td>\n", " <td>-3.713503</td>\n", " <td>16.553984</td>\n", " <td>0.230691</td>\n", " <td>-46.794480</td>\n", " <td>49.352516</td>\n", " <td>-2.282116</td>\n", " <td>15.285639</td>\n", " <td>0.171462</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>classical_10.mp3</td>\n", " <td>classical</td>\n", " <td>-562.85785</td>\n", " <td>-96.164795</td>\n", " <td>-219.259016</td>\n", " <td>53.561838</td>\n", " <td>-0.772320</td>\n", " <td>0.029056</td>\n", " <td>259.63270</td>\n", " <td>215.094182</td>\n", " <td>...</td>\n", " <td>-27.458416</td>\n", " <td>29.811110</td>\n", " <td>0.484271</td>\n", " <td>8.660648</td>\n", " <td>-0.479016</td>\n", " <td>-28.989983</td>\n", " <td>27.533710</td>\n", " <td>0.952658</td>\n", " <td>10.477735</td>\n", " <td>-0.185771</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>classical_100.mp3</td>\n", " <td>classical</td>\n", " <td>-536.23737</td>\n", " <td>-61.608826</td>\n", " <td>-177.804114</td>\n", " <td>83.381622</td>\n", " <td>-2.587179</td>\n", " <td>0.000000</td>\n", " <td>190.47589</td>\n", " <td>112.471713</td>\n", " <td>...</td>\n", " <td>-27.335688</td>\n", " <td>27.610388</td>\n", " <td>-0.333233</td>\n", " <td>8.185075</td>\n", " <td>0.208425</td>\n", " <td>-38.095375</td>\n", " <td>31.397880</td>\n", " <td>-1.494916</td>\n", " <td>10.917299</td>\n", " <td>0.020985</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>classical_11.mp3</td>\n", " <td>classical</td>\n", " <td>-536.45746</td>\n", " <td>-120.429665</td>\n", " <td>-222.126303</td>\n", " <td>76.246992</td>\n", " <td>-2.402418</td>\n", " <td>0.000000</td>\n", " <td>159.42575</td>\n", " <td>99.853645</td>\n", " <td>...</td>\n", " <td>-31.774948</td>\n", " <td>31.500881</td>\n", " <td>-3.781627</td>\n", " <td>9.191043</td>\n", " <td>0.260886</td>\n", " <td>-22.667440</td>\n", " <td>50.992897</td>\n", " <td>1.600777</td>\n", " <td>10.125545</td>\n", " <td>0.595763</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>classical_12.mp3</td>\n", " <td>classical</td>\n", " <td>-562.67523</td>\n", " <td>-148.133560</td>\n", " <td>-270.975406</td>\n", " <td>52.191182</td>\n", " <td>-0.366586</td>\n", " <td>0.000000</td>\n", " <td>194.26416</td>\n", " <td>148.226647</td>\n", " <td>...</td>\n", " <td>-44.843810</td>\n", " <td>28.490644</td>\n", " <td>-6.242015</td>\n", " <td>10.546545</td>\n", " <td>0.341848</td>\n", " <td>-25.040888</td>\n", " <td>46.878204</td>\n", " <td>1.844494</td>\n", " <td>11.160392</td>\n", " <td>0.503120</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>395</th>\n", " <td>rock_95.mp3</td>\n", " <td>rock</td>\n", " <td>-553.11010</td>\n", " <td>-5.218835</td>\n", " <td>-193.506047</td>\n", " <td>76.869437</td>\n", " <td>-0.201055</td>\n", " <td>-89.948746</td>\n", " <td>201.18045</td>\n", " <td>111.724191</td>\n", " <td>...</td>\n", " <td>-27.043941</td>\n", " <td>22.451445</td>\n", " <td>-7.234634</td>\n", " <td>8.471853</td>\n", " <td>0.753855</td>\n", " <td>-24.712723</td>\n", " <td>23.410387</td>\n", " <td>-4.502398</td>\n", " <td>6.687984</td>\n", " <td>0.238807</td>\n", " </tr>\n", " <tr>\n", " <th>396</th>\n", " <td>rock_96.mp3</td>\n", " <td>rock</td>\n", " <td>-541.23600</td>\n", " <td>27.163334</td>\n", " <td>-119.113996</td>\n", " <td>58.420684</td>\n", " <td>-0.957699</td>\n", " <td>-7.415961</td>\n", " <td>210.49246</td>\n", " <td>125.453699</td>\n", " <td>...</td>\n", " <td>-37.584858</td>\n", " <td>28.087936</td>\n", " <td>-9.704238</td>\n", " <td>8.447620</td>\n", " <td>0.112760</td>\n", " <td>-38.147890</td>\n", " <td>21.814402</td>\n", " <td>-8.249507</td>\n", " <td>7.807756</td>\n", " <td>0.071968</td>\n", " </tr>\n", " <tr>\n", " <th>397</th>\n", " <td>rock_97.mp3</td>\n", " <td>rock</td>\n", " <td>-518.49500</td>\n", " <td>58.526745</td>\n", " <td>-66.267744</td>\n", " <td>65.635619</td>\n", " <td>-0.898026</td>\n", " <td>-58.824410</td>\n", " <td>175.20135</td>\n", " <td>99.288265</td>\n", " <td>...</td>\n", " <td>-29.620445</td>\n", " <td>26.325895</td>\n", " <td>-5.722825</td>\n", " <td>7.727378</td>\n", " <td>0.207489</td>\n", " <td>-29.497524</td>\n", " <td>25.410654</td>\n", " <td>-3.356614</td>\n", " <td>8.170526</td>\n", " <td>0.160330</td>\n", " </tr>\n", " <tr>\n", " <th>398</th>\n", " <td>rock_98.mp3</td>\n", " <td>rock</td>\n", " <td>-518.64307</td>\n", " <td>53.555115</td>\n", " <td>-45.734517</td>\n", " <td>52.444200</td>\n", " <td>-1.705641</td>\n", " <td>0.000000</td>\n", " <td>187.04274</td>\n", " <td>96.440874</td>\n", " <td>...</td>\n", " <td>-26.967848</td>\n", " <td>8.714737</td>\n", " <td>-9.511491</td>\n", " <td>5.551820</td>\n", " <td>-0.025604</td>\n", " <td>-23.020084</td>\n", " <td>13.948638</td>\n", " <td>-2.664985</td>\n", " <td>5.051498</td>\n", " <td>-0.258407</td>\n", " </tr>\n", " <tr>\n", " <th>399</th>\n", " <td>rock_99.mp3</td>\n", " <td>rock</td>\n", " <td>-544.70310</td>\n", " <td>75.612130</td>\n", " <td>-49.380943</td>\n", " <td>54.045627</td>\n", " <td>-0.863093</td>\n", " <td>-32.930653</td>\n", " <td>191.73538</td>\n", " <td>93.971242</td>\n", " <td>...</td>\n", " <td>-21.929403</td>\n", " <td>17.050608</td>\n", " <td>-5.296691</td>\n", " <td>5.894963</td>\n", " <td>0.390705</td>\n", " <td>-20.983192</td>\n", " <td>29.312023</td>\n", " <td>-0.321836</td>\n", " <td>6.571660</td>\n", " <td>0.384794</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>400 rows × 202 columns</p>\n", "</div>" ], "text/plain": [ " filename label 0_min 0_max 0_mean \\\n", "0 classical_1.mp3 classical -530.78436 -163.308350 -302.203167 \n", "1 classical_10.mp3 classical -562.85785 -96.164795 -219.259016 \n", "2 classical_100.mp3 classical -536.23737 -61.608826 -177.804114 \n", "3 classical_11.mp3 classical -536.45746 -120.429665 -222.126303 \n", "4 classical_12.mp3 classical -562.67523 -148.133560 -270.975406 \n", ".. ... ... ... ... ... \n", "395 rock_95.mp3 rock -553.11010 -5.218835 -193.506047 \n", "396 rock_96.mp3 rock -541.23600 27.163334 -119.113996 \n", "397 rock_97.mp3 rock -518.49500 58.526745 -66.267744 \n", "398 rock_98.mp3 rock -518.64307 53.555115 -45.734517 \n", "399 rock_99.mp3 rock -544.70310 75.612130 -49.380943 \n", "\n", " 0_std 0_skew 1_min 1_max 1_mean ... 38_min \\\n", "0 51.142183 -0.468374 0.000000 178.75162 111.332342 ... -44.098070 \n", "1 53.561838 -0.772320 0.029056 259.63270 215.094182 ... -27.458416 \n", "2 83.381622 -2.587179 0.000000 190.47589 112.471713 ... -27.335688 \n", "3 76.246992 -2.402418 0.000000 159.42575 99.853645 ... -31.774948 \n", "4 52.191182 -0.366586 0.000000 194.26416 148.226647 ... -44.843810 \n", ".. ... ... ... ... ... ... ... \n", "395 76.869437 -0.201055 -89.948746 201.18045 111.724191 ... -27.043941 \n", "396 58.420684 -0.957699 -7.415961 210.49246 125.453699 ... -37.584858 \n", "397 65.635619 -0.898026 -58.824410 175.20135 99.288265 ... -29.620445 \n", "398 52.444200 -1.705641 0.000000 187.04274 96.440874 ... -26.967848 \n", "399 54.045627 -0.863093 -32.930653 191.73538 93.971242 ... -21.929403 \n", "\n", " 38_max 38_mean 38_std 38_skew 39_min 39_max 39_mean \\\n", "0 47.308060 -3.713503 16.553984 0.230691 -46.794480 49.352516 -2.282116 \n", "1 29.811110 0.484271 8.660648 -0.479016 -28.989983 27.533710 0.952658 \n", "2 27.610388 -0.333233 8.185075 0.208425 -38.095375 31.397880 -1.494916 \n", "3 31.500881 -3.781627 9.191043 0.260886 -22.667440 50.992897 1.600777 \n", "4 28.490644 -6.242015 10.546545 0.341848 -25.040888 46.878204 1.844494 \n", ".. ... ... ... ... ... ... ... \n", "395 22.451445 -7.234634 8.471853 0.753855 -24.712723 23.410387 -4.502398 \n", "396 28.087936 -9.704238 8.447620 0.112760 -38.147890 21.814402 -8.249507 \n", "397 26.325895 -5.722825 7.727378 0.207489 -29.497524 25.410654 -3.356614 \n", "398 8.714737 -9.511491 5.551820 -0.025604 -23.020084 13.948638 -2.664985 \n", "399 17.050608 -5.296691 5.894963 0.390705 -20.983192 29.312023 -0.321836 \n", "\n", " 39_std 39_skew \n", "0 15.285639 0.171462 \n", "1 10.477735 -0.185771 \n", "2 10.917299 0.020985 \n", "3 10.125545 0.595763 \n", "4 11.160392 0.503120 \n", ".. ... ... \n", "395 6.687984 0.238807 \n", "396 7.807756 0.071968 \n", "397 8.170526 0.160330 \n", "398 5.051498 -0.258407 \n", "399 6.571660 0.384794 \n", "\n", "[400 rows x 202 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "meta_columns = [\"sample\", \"filename\", \"label\"]\n", "mfcc_aggregated = raw_features\\\n", " .drop(meta_columns, axis=1, errors='ignore')\\\n", " .groupby(raw_features.filename).agg(['min', 'max', 'mean', 'std', 'skew'])\n", "\n", "mfcc_meta = pd.DataFrame(raw_features['label'].groupby(raw_features.filename).last())\n", "mfcc_meta.columns = pd.MultiIndex.from_arrays([['label'], ['']]) # needed for merge\n", "mfcc_merged = pd.merge(mfcc_meta, mfcc_aggregated, left_index=True, right_index=True)\n", "\n", "# reduce multi index to single index\n", "one_level_cols = ['_'.join([str(el) for el in col]) for col in mfcc_merged.columns[1:]]\n", "one_level_cols.insert(0, \"label\")\n", "\n", "mfcc_merged.columns = pd.Index(one_level_cols)\n", "mfcc_merged = mfcc_merged.reset_index()\n", "mfcc_merged" ] }, { "cell_type": "code", "execution_count": 6, "id": "4ac5c765", "metadata": { "execution": { "iopub.execute_input": "2024-02-15T15:10:47.454568Z", "iopub.status.busy": "2024-02-15T15:10:47.452996Z", "iopub.status.idle": "2024-02-15T15:10:47.646600Z", "shell.execute_reply": "2024-02-15T15:10:47.644995Z" }, "papermill": { "duration": 0.209091, "end_time": "2024-02-15T15:10:47.653114", "exception": false, "start_time": "2024-02-15T15:10:47.444023", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# outputs\n", "aggregated_features_path = Path(OUTPUT_PATHS[\"aggregated_features\"]).resolve()\n", "aggregated_features_path.parent.mkdir(parents=True, exist_ok=True)\n", "\n", "output = mfcc_merged\n", "output.to_csv(aggregated_features_path, index=False)" ] } ], "metadata": { "celltoolbar": "Tags", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 24.653494, "end_time": "2024-02-15T15:10:48.496631", "environment_variables": {}, "exception": null, "input_path": "/home/lukas/Programming/uni/bachelorarbeit/fairnb/notebooks/3_aggregate_features.ipynb", "output_path": "/home/lukas/Programming/uni/bachelorarbeit/fairnb/notebooks/3_aggregate_features.ipynb", "parameters": { "INPUT_PATHS": { "raw_features": "/home/lukas/Programming/uni/bachelorarbeit/fairnb/tmp/3_aggregate_features/input/raw_features.csv" }, "OUTPUT_PATHS": { "aggregated_features": "/home/lukas/Programming/uni/bachelorarbeit/fairnb/tmp/3_aggregate_features/output/features.csv" } }, "start_time": "2024-02-15T15:10:23.843137", "version": "2.4.0" } }, "nbformat": 4, "nbformat_minor": 5 }