*** Wartungsfenster jeden ersten Mittwoch vormittag im Monat ***

Skip to content
Snippets Groups Projects
3_aggregate_features.ipynb 7.57 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f48a4573",
   "metadata": {
    "papermill": {
     "duration": 0.00482,
     "end_time": "2024-02-19T14:43:18.927810",
     "exception": false,
     "start_time": "2024-02-19T14:43:18.922990",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Aggregate MFCC Features\n",
    "\n",
    "Aggregate from n rows par file to 1 (calculate min, max, etc. for each feature)."
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "389576b8",
   "metadata": {
    "collapsed": true,
    "execution": {
     "iopub.execute_input": "2024-02-19T14:43:18.941968Z",
     "iopub.status.busy": "2024-02-19T14:43:18.940586Z",
     "iopub.status.idle": "2024-02-19T14:43:19.225227Z",
     "shell.execute_reply": "2024-02-19T14:43:19.224264Z"
Mahler, Lukas's avatar
Mahler, Lukas committed
    "jupyter": {
     "outputs_hidden": true
    },
     "duration": 0.295054,
     "end_time": "2024-02-19T14:43:19.228421",
     "exception": false,
     "start_time": "2024-02-19T14:43:18.933367",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "import pandas as pd\n",
    "from definitions import BASE_PATH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "26f640e0",
   "metadata": {
     "iopub.execute_input": "2024-02-19T14:43:19.235696Z",
     "iopub.status.busy": "2024-02-19T14:43:19.235399Z",
     "iopub.status.idle": "2024-02-19T14:43:19.240990Z",
     "shell.execute_reply": "2024-02-19T14:43:19.240022Z"
     "duration": 0.012583,
     "end_time": "2024-02-19T14:43:19.243948",
     "exception": false,
     "start_time": "2024-02-19T14:43:19.231365",
     "status": "completed"
    },
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
Mahler, Lukas's avatar
Mahler, Lukas committed
    "INPUT_PATH = BASE_PATH / \"tmp\" / \"3_aggregate_features\" / \"input\"\n",
    "OUTPUT_PATH = BASE_PATH / \"tmp\" / \"3_aggregate_features\" / \"output\"\n",
    "\n",
    "INPUT_PATHS: dict[str, str] = {\n",
    "    \"raw_features\": (INPUT_PATH / \"raw_features.csv\").__str__()\n",
    "}\n",
    "\n",
    "OUTPUT_PATHS: dict[str, str] = {\n",
    "    \"features\": (OUTPUT_PATH / \"features.csv\").__str__()\n",
    "}"
   "execution_count": null,
   "id": "40dbf7fa",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-19T14:43:19.248798Z",
     "iopub.status.busy": "2024-02-19T14:43:19.248350Z",
     "iopub.status.idle": "2024-02-19T14:43:19.251965Z",
     "shell.execute_reply": "2024-02-19T14:43:19.251370Z"
     "duration": 0.007812,
     "end_time": "2024-02-19T14:43:19.253560",
     "exception": false,
     "start_time": "2024-02-19T14:43:19.245748",
     "status": "completed"
    },
    "tags": [
     "injected-parameters"
    ]
   },
   "outputs": [],
   "source": [
    "# Parameters\n",
    "INPUT_PATHS = {\n",
    "    \"raw_features\": \"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/input/raw_features.csv\"\n",
    "OUTPUT_PATHS = {\n",
    "    \"aggregated_features\": \"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/output/features.csv\"\n",
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5d9d980",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-19T14:43:19.263504Z",
     "iopub.status.busy": "2024-02-19T14:43:19.263172Z",
     "iopub.status.idle": "2024-02-19T14:43:23.707599Z",
     "shell.execute_reply": "2024-02-19T14:43:23.706545Z"
     "duration": 4.452062,
     "end_time": "2024-02-19T14:43:23.709599",
     "exception": false,
     "start_time": "2024-02-19T14:43:19.257537",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# inputs\n",
    "raw_features = pd.read_csv(INPUT_PATHS[\"raw_features\"], index_col=False)"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99f75f47",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-19T14:43:23.721962Z",
     "iopub.status.busy": "2024-02-19T14:43:23.721698Z",
     "iopub.status.idle": "2024-02-19T14:43:27.486305Z",
     "shell.execute_reply": "2024-02-19T14:43:27.485675Z"
     "duration": 3.772661,
     "end_time": "2024-02-19T14:43:27.488022",
     "exception": false,
     "start_time": "2024-02-19T14:43:23.715361",
     "status": "completed"
   "outputs": [],
   "source": [
    "meta_columns = [\"sample\", \"filename\", \"label\"]\n",
    "mfcc_aggregated = raw_features\\\n",
    "    .drop(meta_columns, axis=1, errors='ignore')\\\n",
    "    .groupby(raw_features.filename).agg(['min', 'max', 'mean', 'std', 'skew'])\n",
    "\n",
    "mfcc_meta = pd.DataFrame(raw_features['label'].groupby(raw_features.filename).last())\n",
    "mfcc_meta.columns = pd.MultiIndex.from_arrays([['label'], ['']])    # needed for merge\n",
    "mfcc_merged = pd.merge(mfcc_meta, mfcc_aggregated, left_index=True, right_index=True)\n",
    "\n",
    "# reduce multi index to single index\n",
    "one_level_cols = ['_'.join([str(el) for el in col]) for col in mfcc_merged.columns[1:]]\n",
    "one_level_cols.insert(0, \"label\")\n",
    "\n",
    "mfcc_merged.columns = pd.Index(one_level_cols)\n",
    "mfcc_merged = mfcc_merged.reset_index()\n",
    "mfcc_merged"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ac5c765",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-02-19T14:43:27.495015Z",
     "iopub.status.busy": "2024-02-19T14:43:27.494787Z",
     "iopub.status.idle": "2024-02-19T14:43:27.574541Z",
     "shell.execute_reply": "2024-02-19T14:43:27.573938Z"
     "duration": 0.084978,
     "end_time": "2024-02-19T14:43:27.576110",
     "exception": false,
     "start_time": "2024-02-19T14:43:27.491132",
     "status": "completed"
   "outputs": [],
   "source": [
    "# outputs\n",
    "aggregated_features_path = Path(OUTPUT_PATHS[\"aggregated_features\"]).resolve()\n",
    "aggregated_features_path.parent.mkdir(parents=True, exist_ok=True)\n",
    "output = mfcc_merged\n",
    "output.to_csv(aggregated_features_path, index=False)"
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 9.950754,
   "end_time": "2024-02-19T14:43:27.897395",
   "environment_variables": {},
   "exception": null,
   "input_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/3_aggregate_features.ipynb",
   "output_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/3_aggregate_features.ipynb",
   "parameters": {
    "INPUT_PATHS": {
     "raw_features": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/input/raw_features.csv"
     "aggregated_features": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/3_aggregate_features/output/features.csv"
   "start_time": "2024-02-19T14:43:17.946641",
   "version": "2.4.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5