Newer
Older
"id": "f48a4573",
"metadata": {
"papermill": {
"duration": 0.00482,
"end_time": "2024-02-19T14:43:18.927810",
"start_time": "2024-02-19T14:43:18.922990",
"status": "completed"
},
"tags": []
},
"source": [
"# Aggregate MFCC Features\n",
"\n",
"Aggregate from n rows par file to 1 (calculate min, max, etc. for each feature)."
"collapsed": true,
"execution": {
"iopub.execute_input": "2024-02-19T14:43:18.941968Z",
"iopub.status.busy": "2024-02-19T14:43:18.940586Z",
"iopub.status.idle": "2024-02-19T14:43:19.225227Z",
"shell.execute_reply": "2024-02-19T14:43:19.224264Z"
"duration": 0.295054,
"end_time": "2024-02-19T14:43:19.228421",
"start_time": "2024-02-19T14:43:18.933367",
"status": "completed"
},
"tags": []
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"from definitions import BASE_PATH"
]
},
"iopub.execute_input": "2024-02-19T14:43:19.235696Z",
"iopub.status.busy": "2024-02-19T14:43:19.235399Z",
"iopub.status.idle": "2024-02-19T14:43:19.240990Z",
"shell.execute_reply": "2024-02-19T14:43:19.240022Z"
"duration": 0.012583,
"end_time": "2024-02-19T14:43:19.243948",
"start_time": "2024-02-19T14:43:19.231365",
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"INPUT_PATH = BASE_PATH / \"tmp\" / \"3_aggregate_features\" / \"input\"\n",
"OUTPUT_PATH = BASE_PATH / \"tmp\" / \"3_aggregate_features\" / \"output\"\n",
"\n",
"INPUT_PATHS: dict[str, str] = {\n",
" \"raw_features\": (INPUT_PATH / \"raw_features.csv\").__str__()\n",
"}\n",
"\n",
"OUTPUT_PATHS: dict[str, str] = {\n",
" \"features\": (OUTPUT_PATH / \"features.csv\").__str__()\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40dbf7fa",
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-19T14:43:19.248798Z",
"iopub.status.busy": "2024-02-19T14:43:19.248350Z",
"iopub.status.idle": "2024-02-19T14:43:19.251965Z",
"shell.execute_reply": "2024-02-19T14:43:19.251370Z"
"duration": 0.007812,
"end_time": "2024-02-19T14:43:19.253560",
"start_time": "2024-02-19T14:43:19.245748",
"status": "completed"
},
"tags": [
"injected-parameters"
]
},
"outputs": [],
"source": [
"# Parameters\n",
"INPUT_PATHS = {\n",
" \"raw_features\": \"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/input/raw_features.csv\"\n",
" \"aggregated_features\": \"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/output/features.csv\"\n",
"id": "c5d9d980",
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-19T14:43:19.263504Z",
"iopub.status.busy": "2024-02-19T14:43:19.263172Z",
"iopub.status.idle": "2024-02-19T14:43:23.707599Z",
"shell.execute_reply": "2024-02-19T14:43:23.706545Z"
"duration": 4.452062,
"end_time": "2024-02-19T14:43:23.709599",
"start_time": "2024-02-19T14:43:19.257537",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# inputs\n",
"raw_features = pd.read_csv(INPUT_PATHS[\"raw_features\"], index_col=False)"
"id": "99f75f47",
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-19T14:43:23.721962Z",
"iopub.status.busy": "2024-02-19T14:43:23.721698Z",
"iopub.status.idle": "2024-02-19T14:43:27.486305Z",
"shell.execute_reply": "2024-02-19T14:43:27.485675Z"
"duration": 3.772661,
"end_time": "2024-02-19T14:43:27.488022",
"start_time": "2024-02-19T14:43:23.715361",
"source": [
"meta_columns = [\"sample\", \"filename\", \"label\"]\n",
"mfcc_aggregated = raw_features\\\n",
" .drop(meta_columns, axis=1, errors='ignore')\\\n",
" .groupby(raw_features.filename).agg(['min', 'max', 'mean', 'std', 'skew'])\n",
"\n",
"mfcc_meta = pd.DataFrame(raw_features['label'].groupby(raw_features.filename).last())\n",
"mfcc_meta.columns = pd.MultiIndex.from_arrays([['label'], ['']]) # needed for merge\n",
"mfcc_merged = pd.merge(mfcc_meta, mfcc_aggregated, left_index=True, right_index=True)\n",
"\n",
"# reduce multi index to single index\n",
"one_level_cols = ['_'.join([str(el) for el in col]) for col in mfcc_merged.columns[1:]]\n",
"one_level_cols.insert(0, \"label\")\n",
"\n",
"mfcc_merged.columns = pd.Index(one_level_cols)\n",
"mfcc_merged = mfcc_merged.reset_index()\n",
"mfcc_merged"
"id": "4ac5c765",
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-19T14:43:27.495015Z",
"iopub.status.busy": "2024-02-19T14:43:27.494787Z",
"iopub.status.idle": "2024-02-19T14:43:27.574541Z",
"shell.execute_reply": "2024-02-19T14:43:27.573938Z"
"duration": 0.084978,
"end_time": "2024-02-19T14:43:27.576110",
"start_time": "2024-02-19T14:43:27.491132",
"outputs": [],
"source": [
"# outputs\n",
"aggregated_features_path = Path(OUTPUT_PATHS[\"aggregated_features\"]).resolve()\n",
"aggregated_features_path.parent.mkdir(parents=True, exist_ok=True)\n",
"output.to_csv(aggregated_features_path, index=False)"
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
},
"papermill": {
"default_parameters": {},
"duration": 9.950754,
"end_time": "2024-02-19T14:43:27.897395",
"environment_variables": {},
"exception": null,
"input_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/3_aggregate_features.ipynb",
"output_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/3_aggregate_features.ipynb",
"parameters": {
"INPUT_PATHS": {
"raw_features": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/input/raw_features.csv"
"aggregated_features": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/output/features.csv"
"start_time": "2024-02-19T14:43:17.946641",