Newer
Older
{
"cell_type": "markdown",
"source": [
"# Feature Extraction of Base audio files from Invenio"
],
"metadata": {
"collapsed": false
}
},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from contextlib import contextmanager, redirect_stderr, redirect_stdout\n",
"import librosa\n",
"import tarfile\n",
"from pathlib import Path\n",
"from concurrent.futures import ThreadPoolExecutor\n",
"from definitions import BASE_PATH\n",
"import os"
"# inputs\n",
"\n",
"DEFAULT_SAMPLING_RATE = 22050\n",
"INPUT_PATH = BASE_PATH / \"tmp\" / \"2_generate_features\" / \"input\"\n",
"assert INPUT_PATH.exists() and INPUT_PATH.is_dir()\n",
"\n",
"# files = load_input_audio_archive(gzip_audio_path := INPUT_PATH / \"audio_files.tar.gz\", gzip_audio_path.with_suffix(\"\"))\n",
"with tarfile.open(audio_gz := INPUT_PATH / \"audio_files.tar.gz\", \"r:gz\") as archive:\n",
" archive.extractall(path=(path_out := audio_gz.with_suffix(\"\").with_suffix(\"\")))\n",
"\n",
"files = list(path_out.rglob(\"**/*.*\"))"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"@contextmanager\n",
"def suppress_stdout_stderr():\n",
" \"\"\"A context manager that redirects stdout and stderr to devnull\"\"\"\n",
" with open(os.devnull, 'w') as fnull:\n",
" with redirect_stderr(fnull) as err, redirect_stdout(fnull) as out:\n",
" yield err, out"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"def generate_mfcc_feature(filepath: Path, sr: int = DEFAULT_SAMPLING_RATE, number_mfccs: int = 40):\n",
" x, _ = load_mp3(filepath, sr=sr)\n",
" assert sr == _\n",
" mfcc = librosa.feature.mfcc(x, sr=sr, n_mfcc=number_mfccs)\n",
"\n",
" # transpose to use mfcc bands as columns instead of rows\n",
" return pd.DataFrame(mfcc).transpose()\n",
"\n",
"def load_mp3(filepath: Path, sr: int = DEFAULT_SAMPLING_RATE):\n",
" x, sr = librosa.load(filepath, sr=sr) # extract wave (x) with sample rate (sr)\n",
" return x, sr\n",
"\n",
"with suppress_stdout_stderr(), ThreadPoolExecutor(6) as executor:\n",
" dataframes = list(executor.map(\n",
" lambda args: generate_mfcc_feature(args), files)\n",
" )"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 39,
"outputs": [
{
"data": {
"text/plain": " sample filename label 0 1 2 \\\n0 0 classical_31.mp3 classical -542.092712 0.000000 0.000000 \n1 1 classical_31.mp3 classical -365.767517 128.248245 22.577076 \n2 2 classical_31.mp3 classical -263.690796 152.662231 8.463179 \n3 3 classical_31.mp3 classical -274.046204 167.853973 0.219928 \n4 4 classical_31.mp3 classical -321.646484 180.291336 6.916321 \n... ... ... ... ... ... ... \n2581 2581 classical_72.mp3 classical -160.162979 130.473236 -32.025749 \n2582 2582 classical_72.mp3 classical -162.934143 132.160980 -28.300167 \n2583 2583 classical_72.mp3 classical -169.480850 135.194305 -22.370029 \n2584 2584 classical_72.mp3 classical -168.950531 132.961243 -21.946640 \n2585 2585 classical_72.mp3 classical -189.100098 132.286774 -27.398260 \n\n 3 4 5 6 ... 30 31 \\\n0 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 \n1 20.934528 -0.449026 -1.795851 2.589335 ... 9.622649 -4.580847 \n2 26.477173 -2.974573 -3.461343 -0.786243 ... 10.505743 -4.702750 \n3 20.995689 3.986111 -8.224628 -4.622746 ... 11.967682 -3.144123 \n4 -2.687384 19.342781 -8.181542 -13.956495 ... 9.823071 -0.603017 \n... ... ... ... ... ... ... ... \n2581 26.953928 -4.436360 4.611903 -5.685563 ... -26.455120 11.349600 \n2582 29.726395 -4.294048 5.272017 -11.556747 ... -28.494461 9.209765 \n2583 34.150307 -4.354477 10.803973 -16.160812 ... -32.821671 4.160483 \n2584 32.357052 -0.539544 12.749693 -16.936378 ... -34.177902 8.545770 \n2585 24.619659 1.121615 12.316277 -18.560797 ... -25.033850 8.331364 \n\n 32 33 34 35 36 37 38 \\\n0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n1 -8.416413 -2.966157 -3.655181 -3.406460 -0.686921 5.571447 6.963717 \n2 -11.563266 -5.561853 -5.313986 -3.294160 -3.057588 10.480234 17.680906 \n3 -12.740451 -7.371952 -5.917359 -5.108079 -4.611672 13.642330 26.605982 \n4 -10.818475 -5.302371 -4.300508 -8.503809 0.192995 22.705029 34.970181 \n... ... ... ... ... ... ... ... \n2581 3.235260 -8.523579 0.887564 0.670013 5.510224 9.620525 0.865595 \n2582 6.382549 -8.970840 3.018173 1.580217 6.897562 8.666082 -3.402146 \n2583 6.848429 -6.571140 1.366374 1.166915 0.111318 10.242280 -4.068230 \n2584 11.344945 -0.459642 -1.710949 2.100704 -6.341388 11.665768 -3.810056 \n2585 12.196844 -0.386062 -5.598907 -0.709183 -4.358295 8.415619 -0.043498 \n\n 39 \n0 0.000000 \n1 6.761931 \n2 15.001865 \n3 24.616718 \n4 31.780888 \n... ... \n2581 -12.899762 \n2582 -11.331513 \n2583 -12.881722 \n2584 -15.456677 \n2585 -11.858371 \n\n[1029854 rows x 43 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>sample</th>\n <th>filename</th>\n <th>label</th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>...</th>\n <th>30</th>\n <th>31</th>\n <th>32</th>\n <th>33</th>\n <th>34</th>\n <th>35</th>\n <th>36</th>\n <th>37</th>\n <th>38</th>\n <th>39</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>classical_31.mp3</td>\n <td>classical</td>\n <td>-542.092712</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>...</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>classical_31.mp3</td>\n <td>classical</td>\n <td>-365.767517</td>\n <td>128.248245</td>\n <td>22.577076</td>\n <td>20.934528</td>\n <td>-0.449026</td>\n <td>-1.795851</td>\n <td>2.589335</td>\n <td>...</td>\n <td>9.622649</td>\n <td>-4.580847</td>\n <td>-8.416413</td>\n <td>-2.966157</td>\n <td>-3.655181</td>\n <td>-3.406460</td>\n <td>-0.686921</td>\n <td>5.571447</td>\n <td>6.963717</td>\n <td>6.761931</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2</td>\n <td>classical_31.mp3</td>\n <td>classical</td>\n <td>-263.690796</td>\n <td>152.662231</td>\n <td>8.463179</td>\n <td>26.477173</td>\n <td>-2.974573</td>\n <td>-3.461343</td>\n <td>-0.786243</td>\n <td>...</td>\n <td>10.505743</td>\n <td>-4.702750</td>\n <td>-11.563266</td>\n <td>-5.561853</td>\n <td>-5.313986</td>\n <td>-3.294160</td>\n <td>-3.057588</td>\n <td>10.480234</td>\n <td>17.680906</td>\n <td>15.001865</td>\n </tr>\n <tr>\n <th>3</th>\n <td>3</td>\n <td>classical_31.mp3</td>\n <td>classical</td>\n <td>-274.046204</td>\n <td>167.853973</td>\n <td>0.219928</td>\n <td>20.995689</td>\n <td>3.986111</td>\n <td>-8.224628</td>\n <td>-4.622746</td>\n <td>...</td>\n <td>11.967682</td>\n <td>-3.144123</td>\n <td>-12.740451</td>\n <td>-7.371952</td>\n <td>-5.917359</td>\n <td>-5.108079</td>\n <td>-4.611672</td>\n <td>13.642330</td>\n <td>26.605982</td>\n <td>24.616718</td>\n </tr>\n <tr>\n <th>4</th>\n <td>4</td>\n <td>classical_31.mp3</td>\n <td>classical</td>\n <td>-321.646484</td>\n <td>180.291336</td>\n <td>6.916321</td>\n <td>-2.687384</td>\n <td>19.342781</td>\n <td>-8.181542</td>\n <td>-13.956495</td>\n <td>...</td>\n <td>9.823071</td>\n <td>-0.603017</td>\n <td>-10.818475</td>\n <td>-5.302371</td>\n <td>-4.300508</td>\n <td>-8.503809</td>\n <td>0.192995</td>\n <td>22.705029</td>\n <td>34.970181</td>\n <td>31.780888</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>2581</th>\n <td>2581</td>\n <td>classical_72.mp3</td>\n <td>classical</td>\n <td>-160.162979</td>\n <td>130.473236</td>\n <td>-32.025749</td>\n <td>26.953928</td>\n <td>-4.436360</td>\n <td>4.611903</td>\n <td>-5.685563</td>\n <td>...</td>\n <td>-26.455120</td>\n <td>11.349600</td>\n <td>3.235260</td>\n <td>-8.523579</td>\n <td>0.887564</td>\n <td>0.670013</td>\n <td>5.510224</td>\n <td>9.620525</td>\n <td>0.865595</td>\n <td>-12.899762</td>\n </tr>\n <tr>\n <th>2582</th>\n <td>2582</td>\n <td>classical_72.mp3</td>\n <td>classical</td>\n <td>-162.934143</td>\n <td>132.160980</td>\n <td>-28.300167</td>\n <td>29.726395</td>\n <td>-4.294048</td>\n <td>5.272017</td>\n <td>-11.556747</td>\n <td>...</td>\n <td>-28.494461</td>\n <td>9.209765</td>\n <td>6.382549</td>\n <td>-8.970840</td>\n <td>3.018173</td>\n <td>1.580217</td>\n <td>6.897562</td>\n <td>8.666082</td>\n <td>-3.402146</td>\n <td>-11.331513</td>\n </tr>\n <tr>\n <th>2583</th>\n <td>2583</td>\n <td>classical_72.mp3</td>\n <td>classical</td>\n <td>-169.480850</td>\n <td>135.194305</td>\n <td>-22.370029</td>\n <td>34.150307</td>\n <td>-4.354477</td>\n <td>10.803973</td>\n <td>-16.160812</td>\n <td>...</td>\n <td>-32.821671</td>\n <td>4.160483</td>\n <td>6.848429</td>\n <td>-6.571140</td>\n <td>1.366374</td>\n <td>1.166915</td>\n <td>0.111318</td>\n <td>10.242280</td>\n <td>-4.068230</td>\n <td>-12.881722</td>\n </tr>\n <tr>\n <th>2584</th>\n <td>2584</td>\n <td>classical_72.mp3</td>\n <td>classical</td>\n <td>-168.950531</td>\n <td>132.961243</td>\n <td>-21.946640</td>\n <td>32.357052</td>\n <td>-0.539544</td>\n <td>12.749693</td>\n <td>-16.936378</td>\n <td>...</td>\n <td>-34.177902</td>\n <td>8.545770</td>\n <td>11.344945</td>\n <td>-0.459642</td>\n <td>-1.710949</td>\n <td>2.100704</td>\n <td>-6.341388</td>\n <td>11.665768</td>\n <td>-3.810056</td>\n <td>-15.456677</td>\n </tr>\n <tr>\n <th>2585</th>\n <td>2585</td>\n <td>classical_72.mp3</td>\n <td>classical</td>\n <td>-189.100098</td>\n <td>132.286774</td>\n <td>-27.398260</td>\n <td>24.619659</td>\n <td>1.121615</td>\n <td>12.316277</td>\n <td>-18.560797</td>\n <td>...</td>\n <td>-25.033850</td>\n <td>8.331364</td>\n <td>12.196844</td>\n <td>-0.386062</td>\n <td>-5.598907</td>\n <td>-0.709183</td>\n <td>-4.358295</td>\n <td>8.415619</td>\n <td>-0.043498</td>\n <td>-11.858371</td>\n </tr>\n </tbody>\n</table>\n<p>1029854 rows × 43 columns</p>\n</div>"
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"for file, dataframe in zip(files, dataframes):\n",
" dataframe[\"sample\"] = dataframe.index.to_numpy(copy=True)\n",
" dataframe[\"filename\"] = file.name\n",
" dataframe[\"label\"] = file.name.split('_')[0] # extract genre from file name\n",
"dataframe_concat = pd.concat(dataframes)\n",
"columns_old = list(dataframe_concat.columns)\n",
"columns = columns_old[-3:] + columns_old[:-3]\n",
"dataframe_concat = dataframe_concat[columns]\n",
"output: pd.DataFrame = dataframe_concat\n",
"output"
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"# outputs\n",
"OUTPUT_PATH = BASE_PATH / \"tmp\" / \"2_generate_features\" / \"output\"\n",
"\n",
"output.to_csv(OUTPUT_PATH / \"output.csv\", index=False)\n",
"output.to_pickle(OUTPUT_PATH / \"output.pickle\")"
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}