diff --git a/fairnb/api/dbrepo.py b/fairnb/api/dbrepo.py index cdf3a9fba40083ad6eb1f42aa05498061eda4287..af75770999dcdc3f6e7ee70f7a25d8eff2e44aa4 100644 --- a/fairnb/api/dbrepo.py +++ b/fairnb/api/dbrepo.py @@ -248,8 +248,8 @@ class DBRepoConnector: chunk_size=CHUNK_SIZE, ) - upload_url = uploader.create_url() - uploader.set_url(upload_url.replace('http', 'https')) # FIX: wrong location response + upload_url = uploader.create_url().replace('http', 'https') + uploader.set_url(upload_url) # FIX: wrong location response uploader.upload() response_upload_import = requests.post( diff --git a/notebooks/1_audio_files.ipynb b/notebooks/1_audio_files.ipynb index 14a85d7af5546b9d8dcdf619168915fcb5ad976b..9a4a89f206310b670043c2c0d36b363ac76d7c28 100644 --- a/notebooks/1_audio_files.ipynb +++ b/notebooks/1_audio_files.ipynb @@ -9,10 +9,10 @@ "outputs_hidden": false }, "papermill": { - "duration": 0.00365, - "end_time": "2023-10-10T20:19:34.354097", + "duration": 0.002251, + "end_time": "2023-10-12T15:02:44.784634", "exception": false, - "start_time": "2023-10-10T20:19:34.350447", + "start_time": "2023-10-12T15:02:44.782383", "status": "completed" }, "tags": [] @@ -30,19 +30,19 @@ "metadata": { "collapsed": false, "execution": { - "iopub.execute_input": "2023-10-10T20:19:34.369926Z", - "iopub.status.busy": "2023-10-10T20:19:34.368623Z", - "iopub.status.idle": "2023-10-10T20:19:34.394688Z", - "shell.execute_reply": "2023-10-10T20:19:34.393354Z" + "iopub.execute_input": "2023-10-12T15:02:44.793587Z", + "iopub.status.busy": "2023-10-12T15:02:44.792076Z", + "iopub.status.idle": "2023-10-12T15:02:44.805128Z", + "shell.execute_reply": "2023-10-12T15:02:44.804326Z" }, "jupyter": { "outputs_hidden": false }, "papermill": { - "duration": 0.037909, - "end_time": "2023-10-10T20:19:34.398270", + "duration": 0.019647, + "end_time": "2023-10-12T15:02:44.806758", "exception": false, - "start_time": "2023-10-10T20:19:34.360361", + "start_time": "2023-10-12T15:02:44.787111", "status": "completed" }, "tags": [] @@ -62,16 +62,16 @@ "id": "1b4e6b01", "metadata": { "execution": { - "iopub.execute_input": "2023-10-10T20:19:34.412182Z", - "iopub.status.busy": "2023-10-10T20:19:34.410878Z", - "iopub.status.idle": "2023-10-10T20:19:34.418072Z", - "shell.execute_reply": "2023-10-10T20:19:34.416762Z" + "iopub.execute_input": "2023-10-12T15:02:44.812055Z", + "iopub.status.busy": "2023-10-12T15:02:44.811746Z", + "iopub.status.idle": "2023-10-12T15:02:44.816038Z", + "shell.execute_reply": "2023-10-12T15:02:44.814872Z" }, "papermill": { - "duration": 0.0178, - "end_time": "2023-10-10T20:19:34.421245", + "duration": 0.009142, + "end_time": "2023-10-12T15:02:44.817858", "exception": false, - "start_time": "2023-10-10T20:19:34.403445", + "start_time": "2023-10-12T15:02:44.808716", "status": "completed" }, "tags": [ @@ -90,19 +90,19 @@ { "cell_type": "code", "execution_count": 3, - "id": "a0c3731f", + "id": "24969e80", "metadata": { "execution": { - "iopub.execute_input": "2023-10-10T20:19:34.432077Z", - "iopub.status.busy": "2023-10-10T20:19:34.431120Z", - "iopub.status.idle": "2023-10-10T20:19:34.436917Z", - "shell.execute_reply": "2023-10-10T20:19:34.435800Z" + "iopub.execute_input": "2023-10-12T15:02:44.828671Z", + "iopub.status.busy": "2023-10-12T15:02:44.828439Z", + "iopub.status.idle": "2023-10-12T15:02:44.832839Z", + "shell.execute_reply": "2023-10-12T15:02:44.831693Z" }, "papermill": { - "duration": 0.014193, - "end_time": "2023-10-10T20:19:34.439709", + "duration": 0.013697, + "end_time": "2023-10-12T15:02:44.836265", "exception": false, - "start_time": "2023-10-10T20:19:34.425516", + "start_time": "2023-10-12T15:02:44.822568", "status": "completed" }, "tags": [ @@ -125,19 +125,19 @@ "metadata": { "collapsed": false, "execution": { - "iopub.execute_input": "2023-10-10T20:19:34.446770Z", - "iopub.status.busy": "2023-10-10T20:19:34.446426Z", - "iopub.status.idle": "2023-10-10T20:19:36.570981Z", - "shell.execute_reply": "2023-10-10T20:19:36.570217Z" + "iopub.execute_input": "2023-10-12T15:02:44.845046Z", + "iopub.status.busy": "2023-10-12T15:02:44.844716Z", + "iopub.status.idle": "2023-10-12T15:02:47.414624Z", + "shell.execute_reply": "2023-10-12T15:02:47.413947Z" }, "jupyter": { "outputs_hidden": false }, "papermill": { - "duration": 2.131337, - "end_time": "2023-10-10T20:19:36.573889", + "duration": 2.57669, + "end_time": "2023-10-12T15:02:47.417300", "exception": false, - "start_time": "2023-10-10T20:19:34.442552", + "start_time": "2023-10-12T15:02:44.840610", "status": "completed" }, "tags": [] @@ -161,19 +161,19 @@ "metadata": { "collapsed": false, "execution": { - "iopub.execute_input": "2023-10-10T20:19:36.590650Z", - "iopub.status.busy": "2023-10-10T20:19:36.590408Z", - "iopub.status.idle": "2023-10-10T20:19:37.253257Z", - "shell.execute_reply": "2023-10-10T20:19:37.252729Z" + "iopub.execute_input": "2023-10-12T15:02:47.424387Z", + "iopub.status.busy": "2023-10-12T15:02:47.423773Z", + "iopub.status.idle": "2023-10-12T15:02:47.964668Z", + "shell.execute_reply": "2023-10-12T15:02:47.963816Z" }, "jupyter": { "outputs_hidden": false }, "papermill": { - "duration": 0.673083, - "end_time": "2023-10-10T20:19:37.254793", + "duration": 0.547108, + "end_time": "2023-10-12T15:02:47.967433", "exception": false, - "start_time": "2023-10-10T20:19:36.581710", + "start_time": "2023-10-12T15:02:47.420325", "status": "completed" }, "tags": [] @@ -195,19 +195,19 @@ "metadata": { "collapsed": false, "execution": { - "iopub.execute_input": "2023-10-10T20:19:37.268248Z", - "iopub.status.busy": "2023-10-10T20:19:37.267971Z", - "iopub.status.idle": "2023-10-10T20:19:50.606898Z", - "shell.execute_reply": "2023-10-10T20:19:50.606324Z" + "iopub.execute_input": "2023-10-12T15:02:47.977304Z", + "iopub.status.busy": "2023-10-12T15:02:47.976955Z", + "iopub.status.idle": "2023-10-12T15:03:01.218667Z", + "shell.execute_reply": "2023-10-12T15:03:01.218013Z" }, "jupyter": { "outputs_hidden": false }, "papermill": { - "duration": 13.347122, - "end_time": "2023-10-10T20:19:50.608576", + "duration": 13.247133, + "end_time": "2023-10-12T15:03:01.220388", "exception": false, - "start_time": "2023-10-10T20:19:37.261454", + "start_time": "2023-10-12T15:02:47.973255", "status": "completed" }, "tags": [] @@ -242,8 +242,8 @@ }, "papermill": { "default_parameters": {}, - "duration": 17.279795, - "end_time": "2023-10-10T20:19:50.829787", + "duration": 17.446157, + "end_time": "2023-10-12T15:03:01.439421", "environment_variables": {}, "exception": null, "input_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/1_audio_files.ipynb", @@ -254,7 +254,7 @@ "audio_tar": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/1_audio_files/output/emotifymusic.tar.gz" } }, - "start_time": "2023-10-10T20:19:33.549992", + "start_time": "2023-10-12T15:02:43.993264", "version": "2.4.0" } }, diff --git a/notebooks/4_split.ipynb b/notebooks/4_split.ipynb index 371f87d8020e8c5e32f3fd71f1815f9d4274225e..833b60e9729352cbe20950ac603312beac1e4adb 100644 --- a/notebooks/4_split.ipynb +++ b/notebooks/4_split.ipynb @@ -5,10 +5,10 @@ "id": "e92b4fe9", "metadata": { "papermill": { - "duration": 0.005822, - "end_time": "2023-10-10T20:29:52.589509", + "duration": 0.080512, + "end_time": "2023-10-12T15:14:55.816491", "exception": false, - "start_time": "2023-10-10T20:29:52.583687", + "start_time": "2023-10-12T15:14:55.735979", "status": "completed" }, "tags": [] @@ -23,16 +23,16 @@ "id": "5f1fae44", "metadata": { "execution": { - "iopub.execute_input": "2023-10-10T20:29:52.604063Z", - "iopub.status.busy": "2023-10-10T20:29:52.602712Z", - "iopub.status.idle": "2023-10-10T20:29:52.903037Z", - "shell.execute_reply": "2023-10-10T20:29:52.902341Z" + "iopub.execute_input": "2023-10-12T15:14:55.891297Z", + "iopub.status.busy": "2023-10-12T15:14:55.890921Z", + "iopub.status.idle": "2023-10-12T15:14:56.150268Z", + "shell.execute_reply": "2023-10-12T15:14:56.149700Z" }, "papermill": { - "duration": 0.310276, - "end_time": "2023-10-10T20:29:52.905670", + "duration": 0.303163, + "end_time": "2023-10-12T15:14:56.153825", "exception": false, - "start_time": "2023-10-10T20:29:52.595394", + "start_time": "2023-10-12T15:14:55.850662", "status": "completed" }, "tags": [] @@ -51,16 +51,16 @@ "metadata": { "collapsed": false, "execution": { - "iopub.execute_input": "2023-10-10T20:29:52.911502Z", - "iopub.status.busy": "2023-10-10T20:29:52.911091Z", - "iopub.status.idle": "2023-10-10T20:29:52.915967Z", - "shell.execute_reply": "2023-10-10T20:29:52.915019Z" + "iopub.execute_input": "2023-10-12T15:14:56.221857Z", + "iopub.status.busy": "2023-10-12T15:14:56.221487Z", + "iopub.status.idle": "2023-10-12T15:14:56.225677Z", + "shell.execute_reply": "2023-10-12T15:14:56.225035Z" }, "papermill": { - "duration": 0.009356, - "end_time": "2023-10-10T20:29:52.917383", + "duration": 0.042817, + "end_time": "2023-10-12T15:14:56.231733", "exception": false, - "start_time": "2023-10-10T20:29:52.908027", + "start_time": "2023-10-12T15:14:56.188916", "status": "completed" }, "tags": [ @@ -83,19 +83,19 @@ { "cell_type": "code", "execution_count": 3, - "id": "d8169758", + "id": "7e3072e3", "metadata": { "execution": { - "iopub.execute_input": "2023-10-10T20:29:52.922218Z", - "iopub.status.busy": "2023-10-10T20:29:52.921930Z", - "iopub.status.idle": "2023-10-10T20:29:52.925542Z", - "shell.execute_reply": "2023-10-10T20:29:52.924834Z" + "iopub.execute_input": "2023-10-12T15:14:56.300858Z", + "iopub.status.busy": "2023-10-12T15:14:56.300376Z", + "iopub.status.idle": "2023-10-12T15:14:56.304010Z", + "shell.execute_reply": "2023-10-12T15:14:56.303352Z" }, "papermill": { - "duration": 0.007457, - "end_time": "2023-10-10T20:29:52.926785", + "duration": 0.042576, + "end_time": "2023-10-12T15:14:56.308986", "exception": false, - "start_time": "2023-10-10T20:29:52.919328", + "start_time": "2023-10-12T15:14:56.266410", "status": "completed" }, "tags": [ @@ -119,16 +119,16 @@ "id": "a4cc6800", "metadata": { "execution": { - "iopub.execute_input": "2023-10-10T20:29:52.931298Z", - "iopub.status.busy": "2023-10-10T20:29:52.931087Z", - "iopub.status.idle": "2023-10-10T20:29:52.953647Z", - "shell.execute_reply": "2023-10-10T20:29:52.952884Z" + "iopub.execute_input": "2023-10-12T15:14:56.377079Z", + "iopub.status.busy": "2023-10-12T15:14:56.376735Z", + "iopub.status.idle": "2023-10-12T15:14:56.401195Z", + "shell.execute_reply": "2023-10-12T15:14:56.400641Z" }, "papermill": { - "duration": 0.026646, - "end_time": "2023-10-10T20:29:52.955241", + "duration": 0.062802, + "end_time": "2023-10-12T15:14:56.405085", "exception": false, - "start_time": "2023-10-10T20:29:52.928595", + "start_time": "2023-10-12T15:14:56.342283", "status": "completed" }, "tags": [] @@ -149,16 +149,16 @@ "id": "a186d0c4", "metadata": { "execution": { - "iopub.execute_input": "2023-10-10T20:29:52.960691Z", - "iopub.status.busy": "2023-10-10T20:29:52.960246Z", - "iopub.status.idle": "2023-10-10T20:29:52.967937Z", - "shell.execute_reply": "2023-10-10T20:29:52.967204Z" + "iopub.execute_input": "2023-10-12T15:14:56.469799Z", + "iopub.status.busy": "2023-10-12T15:14:56.469259Z", + "iopub.status.idle": "2023-10-12T15:14:56.477683Z", + "shell.execute_reply": "2023-10-12T15:14:56.477139Z" }, "papermill": { - "duration": 0.011851, - "end_time": "2023-10-10T20:29:52.969363", + "duration": 0.045526, + "end_time": "2023-10-12T15:14:56.482599", "exception": false, - "start_time": "2023-10-10T20:29:52.957512", + "start_time": "2023-10-12T15:14:56.437073", "status": "completed" }, "tags": [] @@ -188,16 +188,16 @@ "id": "091e0641", "metadata": { "execution": { - "iopub.execute_input": "2023-10-10T20:29:52.976324Z", - "iopub.status.busy": "2023-10-10T20:29:52.975376Z", - "iopub.status.idle": "2023-10-10T20:29:52.997935Z", - "shell.execute_reply": "2023-10-10T20:29:52.996626Z" + "iopub.execute_input": "2023-10-12T15:14:56.553531Z", + "iopub.status.busy": "2023-10-12T15:14:56.553258Z", + "iopub.status.idle": "2023-10-12T15:14:56.570593Z", + "shell.execute_reply": "2023-10-12T15:14:56.569992Z" }, "papermill": { - "duration": 0.028011, - "end_time": "2023-10-10T20:29:52.999430", + "duration": 0.058756, + "end_time": "2023-10-12T15:14:56.577017", "exception": false, - "start_time": "2023-10-10T20:29:52.971419", + "start_time": "2023-10-12T15:14:56.518261", "status": "completed" }, "tags": [] @@ -232,17 +232,17 @@ " <tr>\n", " <th>0</th>\n", " <td>classical_1.mp3</td>\n", - " <td>False</td>\n", + " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>classical_10.mp3</td>\n", - " <td>True</td>\n", + " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>classical_100.mp3</td>\n", - " <td>False</td>\n", + " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", @@ -277,7 +277,7 @@ " <tr>\n", " <th>398</th>\n", " <td>rock_98.mp3</td>\n", - " <td>True</td>\n", + " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>399</th>\n", @@ -291,16 +291,16 @@ ], "text/plain": [ " filename train\n", - "0 classical_1.mp3 False\n", - "1 classical_10.mp3 True\n", - "2 classical_100.mp3 False\n", + "0 classical_1.mp3 True\n", + "1 classical_10.mp3 False\n", + "2 classical_100.mp3 True\n", "3 classical_11.mp3 True\n", "4 classical_12.mp3 True\n", ".. ... ...\n", "395 rock_95.mp3 True\n", "396 rock_96.mp3 True\n", "397 rock_97.mp3 True\n", - "398 rock_98.mp3 True\n", + "398 rock_98.mp3 False\n", "399 rock_99.mp3 True\n", "\n", "[400 rows x 2 columns]" @@ -321,16 +321,16 @@ "id": "7b11b8bb", "metadata": { "execution": { - "iopub.execute_input": "2023-10-10T20:29:53.004736Z", - "iopub.status.busy": "2023-10-10T20:29:53.004384Z", - "iopub.status.idle": "2023-10-10T20:29:53.012049Z", - "shell.execute_reply": "2023-10-10T20:29:53.010855Z" + "iopub.execute_input": "2023-10-12T15:14:56.663525Z", + "iopub.status.busy": "2023-10-12T15:14:56.662876Z", + "iopub.status.idle": "2023-10-12T15:14:56.669457Z", + "shell.execute_reply": "2023-10-12T15:14:56.668685Z" }, "papermill": { - "duration": 0.012188, - "end_time": "2023-10-10T20:29:53.013673", + "duration": 0.057933, + "end_time": "2023-10-12T15:14:56.676546", "exception": false, - "start_time": "2023-10-10T20:29:53.001485", + "start_time": "2023-10-12T15:14:56.618613", "status": "completed" }, "tags": [] @@ -367,8 +367,8 @@ }, "papermill": { "default_parameters": {}, - "duration": 1.731275, - "end_time": "2023-10-10T20:29:53.335011", + "duration": 2.277047, + "end_time": "2023-10-12T15:14:57.042208", "environment_variables": {}, "exception": null, "input_path": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/4_split.ipynb", @@ -381,7 +381,7 @@ "split": "/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv" } }, - "start_time": "2023-10-10T20:29:51.603736", + "start_time": "2023-10-12T15:14:54.765161", "version": "2.4.0" } }, diff --git a/notebooks/main.ipynb b/notebooks/main.ipynb index d20532b244283f4ac5746627351e2692260b93f4..b2c5a5a39e0348d22cccd264900ee4950e162421 100644 --- a/notebooks/main.ipynb +++ b/notebooks/main.ipynb @@ -18,8 +18,8 @@ "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2023-10-10T20:19:32.437151147Z", - "start_time": "2023-10-10T20:19:32.416560426Z" + "end_time": "2023-10-12T15:14:46.207331868Z", + "start_time": "2023-10-12T15:14:46.207003875Z" } }, "outputs": [], @@ -34,8 +34,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-10T20:19:33.208604535Z", - "start_time": "2023-10-10T20:19:32.416869981Z" + "end_time": "2023-10-12T15:14:47.334123101Z", + "start_time": "2023-10-12T15:14:46.420606786Z" } }, "outputs": [], @@ -59,14 +59,25 @@ "collapsed": false, "lines_to_next_cell": 2, "ExecuteTime": { - "end_time": "2023-10-10T20:19:33.338327558Z", - "start_time": "2023-10-10T20:19:33.211495550Z" + "end_time": "2023-10-12T15:14:47.527905861Z", + "start_time": "2023-10-12T15:14:47.329609217Z" } }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:urllib3.util.retry:Converted retries value: 1 -> Retry(total=1, connect=None, read=None, redirect=None, status=None)\n", + "DEBUG:urllib3.util.retry:Converted retries value: 1 -> Retry(total=1, connect=None, read=None, redirect=None, status=None)\n", + "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): test.dbrepo.tuwien.ac.at:443\n", + "DEBUG:urllib3.connectionpool:https://test.dbrepo.tuwien.ac.at:443 \"POST /api/auth/realms/dbrepo/protocol/openid-connect/token HTTP/1.1\" 200 4299\n" + ] + } + ], "source": [ "logging.basicConfig(\n", - " level=logging.INFO\n", + " level=logging.DEBUG\n", ")\n", "\n", "ONLY_LOCAL = False\n", @@ -83,12 +94,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-10T20:22:19.285948692Z", - "start_time": "2023-10-10T20:19:33.368231905Z" + "end_time": "2023-10-12T15:05:43.638046061Z", + "start_time": "2023-10-12T15:02:43.738618700Z" } }, "outputs": [ @@ -106,7 +117,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "51fe3c161031485d92d9cd7d23edc4d3" + "model_id": "c97f76be78b84199b70336a49adcecaa" } }, "metadata": {}, @@ -120,7 +131,33 @@ "INFO:fairnb.api.invenio:Picked up 1 files\n", "INFO:fairnb.api.invenio:Uploading 1 to https://test.researchdata.tuwien.ac.at\n", "INFO:fairnb.api.invenio:Uploading /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/1_audio_files/output/emotifymusic.tar.gz as emotifymusic.tar.gz\n", - "INFO:fairnb.api.invenio:Finished upload of emotifymusic.tar.gz\n" + "INFO:fairnb.api.invenio:Finished upload of emotifymusic.tar.gz\n", + "WARNING:fairnb.api.dbrepo:Move for table 7 failed: <Response [400]>\n" + ] + }, + { + "ename": "KeyError", + "evalue": "'id'", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", + "File \u001B[0;32m~/Programming/uni/bachelorarbeit/dbrepo-ismir/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3802\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key, method, tolerance)\u001B[0m\n\u001B[1;32m 3801\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 3802\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 3803\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", + "File \u001B[0;32m~/Programming/uni/bachelorarbeit/dbrepo-ismir/.venv/lib/python3.10/site-packages/pandas/_libs/index.pyx:138\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", + "File \u001B[0;32m~/Programming/uni/bachelorarbeit/dbrepo-ismir/.venv/lib/python3.10/site-packages/pandas/_libs/index.pyx:165\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", + "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:5745\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", + "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:5753\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", + "\u001B[0;31mKeyError\u001B[0m: 'id'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[4], line 21\u001B[0m\n\u001B[1;32m 3\u001B[0m metadata \u001B[38;5;241m=\u001B[39m yaml\u001B[38;5;241m.\u001B[39msafe_load(\u001B[38;5;28mopen\u001B[39m(RESOURCE_PATH \u001B[38;5;241m/\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m1_audio_files\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;241m/\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mrecord_metadata.yml\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mr\u001B[39m\u001B[38;5;124m\"\u001B[39m))\n\u001B[1;32m 5\u001B[0m nb_config_audio_files \u001B[38;5;241m=\u001B[39m NbConfig(\n\u001B[1;32m 6\u001B[0m nb_location\u001B[38;5;241m=\u001B[39mNOTEBOOK_PATH \u001B[38;5;241m/\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m1_audio_files.ipynb\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[1;32m 7\u001B[0m entities\u001B[38;5;241m=\u001B[39m[\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 18\u001B[0m dependencies\u001B[38;5;241m=\u001B[39m[]\n\u001B[1;32m 19\u001B[0m )\n\u001B[0;32m---> 21\u001B[0m \u001B[43mexecutor\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnb_config_audio_files\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43monly_local\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mONLY_LOCAL\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Programming/uni/bachelorarbeit/dbrepo-ismir/fairnb/executor.py:31\u001B[0m, in \u001B[0;36mExecutor.execute\u001B[0;34m(cls, nb_config, require_download, only_local, **kwargs)\u001B[0m\n\u001B[1;32m 28\u001B[0m \u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39mexecute_notebook(nb_config)\n\u001B[1;32m 30\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m only_local:\n\u001B[0;32m---> 31\u001B[0m \u001B[38;5;28;43mcls\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mupload_entities\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnb_config\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Programming/uni/bachelorarbeit/dbrepo-ismir/fairnb/executor.py:52\u001B[0m, in \u001B[0;36mExecutor.upload_entities\u001B[0;34m(nb_config)\u001B[0m\n\u001B[1;32m 47\u001B[0m \u001B[38;5;129m@staticmethod\u001B[39m\n\u001B[1;32m 48\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mupload_entities\u001B[39m(nb_config: NbConfig):\n\u001B[1;32m 49\u001B[0m \u001B[38;5;66;03m# load generated entity and upload it\u001B[39;00m\n\u001B[1;32m 50\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m entity \u001B[38;5;129;01min\u001B[39;00m nb_config\u001B[38;5;241m.\u001B[39mentities:\n\u001B[1;32m 51\u001B[0m \u001B[38;5;66;03m# use inspect to get path of caller\u001B[39;00m\n\u001B[0;32m---> 52\u001B[0m \u001B[43mentity\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mupload\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnb_config\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mnb_location\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mnb_config\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mdependencies\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/Programming/uni/bachelorarbeit/dbrepo-ismir/fairnb/entity/invenio_entity.py:97\u001B[0m, in \u001B[0;36mInvenioEntity.upload\u001B[0;34m(self, executed_file, dependencies)\u001B[0m\n\u001B[1;32m 86\u001B[0m uri \u001B[38;5;241m=\u001B[39m create_response[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mlinks\u001B[39m\u001B[38;5;124m\"\u001B[39m][\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mrecord\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;66;03m# Invalid until published\u001B[39;00m\n\u001B[1;32m 88\u001B[0m metadata \u001B[38;5;241m=\u001B[39m EntityProvenance\u001B[38;5;241m.\u001B[39mnew(\n\u001B[1;32m 89\u001B[0m name\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mname,\n\u001B[1;32m 90\u001B[0m description\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdescription,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 94\u001B[0m platform\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124minvenio\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[1;32m 95\u001B[0m )\n\u001B[0;32m---> 97\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mupload_provenance\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmetadata\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 98\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mupload_dependencies(dependencies)\n", + "File \u001B[0;32m~/Programming/uni/bachelorarbeit/dbrepo-ismir/fairnb/entity/entity.py:126\u001B[0m, in \u001B[0;36mEntity.upload_provenance\u001B[0;34m(self, provenance)\u001B[0m\n\u001B[1;32m 123\u001B[0m df \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdbrepo_connector\u001B[38;5;241m.\u001B[39mdownload_table_as_df(\u001B[38;5;28mstr\u001B[39m(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmetadata_table_id))\n\u001B[1;32m 125\u001B[0m \u001B[38;5;66;03m# FIXME: create robust version of id retrieval, if possible\u001B[39;00m\n\u001B[0;32m--> 126\u001B[0m row \u001B[38;5;241m=\u001B[39m df\u001B[38;5;241m.\u001B[39miloc[\u001B[43mdf\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mid\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[38;5;241m.\u001B[39midxmax()] \u001B[38;5;66;03m# get the newest row, as it should contain the correct data\u001B[39;00m\n\u001B[1;32m 127\u001B[0m meta \u001B[38;5;241m=\u001B[39m EntityProvenance\u001B[38;5;241m.\u001B[39mfrom_series(row)\n\u001B[1;32m 128\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m meta\u001B[38;5;241m.\u001B[39mcreation_time \u001B[38;5;241m==\u001B[39m provenance\u001B[38;5;241m.\u001B[39mcreation_time \u001B[38;5;129;01mand\u001B[39;00m meta\u001B[38;5;241m.\u001B[39mname \u001B[38;5;241m==\u001B[39m provenance\u001B[38;5;241m.\u001B[39mname\n", + "File \u001B[0;32m~/Programming/uni/bachelorarbeit/dbrepo-ismir/.venv/lib/python3.10/site-packages/pandas/core/frame.py:3807\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3805\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[1;32m 3806\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[0;32m-> 3807\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 3808\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[1;32m 3809\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", + "File \u001B[0;32m~/Programming/uni/bachelorarbeit/dbrepo-ismir/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3804\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key, method, tolerance)\u001B[0m\n\u001B[1;32m 3802\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_engine\u001B[38;5;241m.\u001B[39mget_loc(casted_key)\n\u001B[1;32m 3803\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n\u001B[0;32m-> 3804\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[1;32m 3805\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[1;32m 3806\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[1;32m 3807\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[1;32m 3808\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[1;32m 3809\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", + "\u001B[0;31mKeyError\u001B[0m: 'id'" ] } ], @@ -150,44 +187,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-10T20:29:37.869662601Z", - "start_time": "2023-10-10T20:22:19.292012183Z" + "start_time": "2023-10-12T15:05:43.645571236Z" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:papermill:Input Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/2_generate_features.ipynb\n", - "INFO:papermill:Output Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/2_generate_features.ipynb\n" - ] - }, - { - "data": { - "text/plain": "Executing: 0%| | 0/9 [00:00<?, ?cell/s]", - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "1542f4273cb540f391061731ae294f62" - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:papermill:Executing notebook with kernel: python3\n", - "WARNING:fairnb.api.dbrepo:Re-authenticating due to almost expired token\n" - ] - } - ], + "outputs": [], "source": [ "# ------------- Raw feature generation -------------\n", "nb_config_generate_features = NbConfig(\n", @@ -213,43 +220,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-10T20:29:51.551709980Z", - "start_time": "2023-10-10T20:29:37.875148186Z" + "start_time": "2023-10-12T15:05:43.647919709Z" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:papermill:Input Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/3_aggregate_features.ipynb\n", - "INFO:papermill:Output Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/3_aggregate_features.ipynb\n" - ] - }, - { - "data": { - "text/plain": "Executing: 0%| | 0/7 [00:00<?, ?cell/s]", - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "3a3d98af87954445acbde5eb85faaba0" - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:papermill:Executing notebook with kernel: python3\n" - ] - } - ], + "outputs": [], "source": [ "# ------------- Feature Aggregation ----------------\n", "if \"raw_features_entity\" not in globals():\n", @@ -286,12 +264,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "collapsed": false, + "is_executing": true, "ExecuteTime": { - "end_time": "2023-10-10T20:29:55.469694709Z", - "start_time": "2023-10-10T20:29:51.551964304Z" + "start_time": "2023-10-12T15:14:54.285396178Z" } }, "outputs": [ @@ -300,7 +278,43 @@ "output_type": "stream", "text": [ "INFO:papermill:Input Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/4_split.ipynb\n", - "INFO:papermill:Output Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/4_split.ipynb\n" + "INFO:papermill:Output Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/4_split.ipynb\n", + "DEBUG:blib2to3.pgen2.driver:NAME 'INPUT_PATHS' (prefix='# Parameters\\n')\n", + "DEBUG:blib2to3.pgen2.driver:EQUAL '=' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:LBRACE '{' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:STRING '\"aggregated_features\"' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:COLON ':' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:STRING '\"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/input/features.csv\"' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:RBRACE '}' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:NEWLINE '\\n' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:NAME 'OUTPUT_PATHS' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:EQUAL '=' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:LBRACE '{' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:STRING '\"split\"' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:COLON ':' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:STRING '\"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv\"' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:RBRACE '}' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:NEWLINE '\\n' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:ENDMARKER '' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:Stop.\n", + "DEBUG:blib2to3.pgen2.driver:NAME 'INPUT_PATHS' (prefix='# Parameters\\n')\n", + "DEBUG:blib2to3.pgen2.driver:EQUAL '=' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:LBRACE '{' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:STRING '\"aggregated_features\"' (prefix='\\n ')\n", + "DEBUG:blib2to3.pgen2.driver:COLON ':' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:STRING '\"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/input/features.csv\"' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:RBRACE '}' (prefix='\\n')\n", + "DEBUG:blib2to3.pgen2.driver:NEWLINE '\\n' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:NAME 'OUTPUT_PATHS' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:EQUAL '=' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:LBRACE '{' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:STRING '\"split\"' (prefix='\\n ')\n", + "DEBUG:blib2to3.pgen2.driver:COLON ':' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:STRING '\"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv\"' (prefix=' ')\n", + "DEBUG:blib2to3.pgen2.driver:RBRACE '}' (prefix='\\n')\n", + "DEBUG:blib2to3.pgen2.driver:NEWLINE '\\n' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:ENDMARKER '' (prefix='')\n", + "DEBUG:blib2to3.pgen2.driver:Stop.\n" ] }, { @@ -309,7 +323,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "340a4bbf08d144aba5521599be170809" + "model_id": "79111c7324b440b7bc4b73c650b2dd35" } }, "metadata": {}, @@ -319,7 +333,126 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:papermill:Executing notebook with kernel: python3\n" + "DEBUG:asyncio:Using selector: EpollSelector\n", + "DEBUG:asyncio:Using selector: EpollSelector\n", + "INFO:papermill:Executing notebook with kernel: python3\n", + "DEBUG:papermill:Skipping non-executing cell 0\n", + "DEBUG:papermill:Executing cell:\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "from definitions import BASE_PATH\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'busy'}\n", + "DEBUG:papermill:msg_type: execute_input\n", + "DEBUG:papermill:content: {'code': 'import pandas as pd\\nfrom pathlib import Path\\nfrom definitions import BASE_PATH', 'execution_count': 1}\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'idle'}\n", + "DEBUG:papermill:Executing cell:\n", + "# Tagged with 'parameters'\n", + "from definitions import BASE_PATH\n", + "\n", + "INPUT_PATHS: dict[str, str] = {\n", + " \"features\": (BASE_PATH / \"tmp\" / \"4_split\" / \"input\" / \"features.csv\").__str__()\n", + "}\n", + "OUTPUT_PATHS: dict[str, str] = {\n", + " \"split\": (BASE_PATH / \"tmp\" / \"4_split\" / \"output\" / \"split.csv\").__str__()\n", + "}\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'busy'}\n", + "DEBUG:papermill:msg_type: execute_input\n", + "DEBUG:papermill:content: {'code': '# Tagged with \\'parameters\\'\\nfrom definitions import BASE_PATH\\n\\nINPUT_PATHS: dict[str, str] = {\\n \"features\": (BASE_PATH / \"tmp\" / \"4_split\" / \"input\" / \"features.csv\").__str__()\\n}\\nOUTPUT_PATHS: dict[str, str] = {\\n \"split\": (BASE_PATH / \"tmp\" / \"4_split\" / \"output\" / \"split.csv\").__str__()\\n}', 'execution_count': 2}\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'idle'}\n", + "DEBUG:papermill:Executing cell:\n", + "# Parameters\n", + "INPUT_PATHS = {\n", + " \"aggregated_features\": \"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/input/features.csv\"\n", + "}\n", + "OUTPUT_PATHS = {\n", + " \"split\": \"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv\"\n", + "}\n", + "\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'busy'}\n", + "DEBUG:papermill:msg_type: execute_input\n", + "DEBUG:papermill:content: {'code': '# Parameters\\nINPUT_PATHS = {\\n \"aggregated_features\": \"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/input/features.csv\"\\n}\\nOUTPUT_PATHS = {\\n \"split\": \"/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/4_split/output/split.csv\"\\n}\\n', 'execution_count': 3}\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'idle'}\n", + "DEBUG:papermill:Executing cell:\n", + "# INPUT\n", + "\n", + "for path in INPUT_PATHS.values():\n", + " assert Path(path).exists()\n", + "\n", + "features = pd.read_csv(INPUT_PATHS[\"aggregated_features\"])\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'busy'}\n", + "DEBUG:papermill:msg_type: execute_input\n", + "DEBUG:papermill:content: {'code': '# INPUT\\n\\nfor path in INPUT_PATHS.values():\\n assert Path(path).exists()\\n\\nfeatures = pd.read_csv(INPUT_PATHS[\"aggregated_features\"])', 'execution_count': 4}\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'idle'}\n", + "DEBUG:papermill:Executing cell:\n", + "train = features.sample(frac=0.8).sort_index()\n", + "test = features.drop(train.index)\n", + "\n", + "split_true = pd.DataFrame({\n", + " \"filename\": train.filename,\n", + " \"train\": True\n", + "})\n", + "split_false = pd.DataFrame({\n", + " \"filename\": test.filename,\n", + " \"train\": False\n", + "})\n", + "\n", + "split_concat = pd.concat([split_true, split_false])\\\n", + " .sort_values(\"filename\")\\\n", + " .reset_index(drop=True)\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'busy'}\n", + "DEBUG:papermill:msg_type: execute_input\n", + "DEBUG:papermill:content: {'code': 'train = features.sample(frac=0.8).sort_index()\\ntest = features.drop(train.index)\\n\\nsplit_true = pd.DataFrame({\\n \"filename\": train.filename,\\n \"train\": True\\n})\\nsplit_false = pd.DataFrame({\\n \"filename\": test.filename,\\n \"train\": False\\n})\\n\\nsplit_concat = pd.concat([split_true, split_false])\\\\\\n .sort_values(\"filename\")\\\\\\n .reset_index(drop=True)', 'execution_count': 5}\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'idle'}\n", + "DEBUG:papermill:Executing cell:\n", + "split_concat\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'busy'}\n", + "DEBUG:papermill:msg_type: execute_input\n", + "DEBUG:papermill:content: {'code': 'split_concat', 'execution_count': 6}\n", + "DEBUG:papermill:msg_type: execute_result\n", + "DEBUG:papermill:content: {'data': {'text/plain': ' filename train\\n0 classical_1.mp3 True\\n1 classical_10.mp3 False\\n2 classical_100.mp3 True\\n3 classical_11.mp3 True\\n4 classical_12.mp3 True\\n.. ... ...\\n395 rock_95.mp3 True\\n396 rock_96.mp3 True\\n397 rock_97.mp3 True\\n398 rock_98.mp3 False\\n399 rock_99.mp3 True\\n\\n[400 rows x 2 columns]', 'text/html': '<div>\\n<style scoped>\\n .dataframe tbody tr th:only-of-type {\\n vertical-align: middle;\\n }\\n\\n .dataframe tbody tr th {\\n vertical-align: top;\\n }\\n\\n .dataframe thead th {\\n text-align: right;\\n }\\n</style>\\n<table border=\"1\" class=\"dataframe\">\\n <thead>\\n <tr style=\"text-align: right;\">\\n <th></th>\\n <th>filename</th>\\n <th>train</th>\\n </tr>\\n </thead>\\n <tbody>\\n <tr>\\n <th>0</th>\\n <td>classical_1.mp3</td>\\n <td>True</td>\\n </tr>\\n <tr>\\n <th>1</th>\\n <td>classical_10.mp3</td>\\n <td>False</td>\\n </tr>\\n <tr>\\n <th>2</th>\\n <td>classical_100.mp3</td>\\n <td>True</td>\\n </tr>\\n <tr>\\n <th>3</th>\\n <td>classical_11.mp3</td>\\n <td>True</td>\\n </tr>\\n <tr>\\n <th>4</th>\\n <td>classical_12.mp3</td>\\n <td>True</td>\\n </tr>\\n <tr>\\n <th>...</th>\\n <td>...</td>\\n <td>...</td>\\n </tr>\\n <tr>\\n <th>395</th>\\n <td>rock_95.mp3</td>\\n <td>True</td>\\n </tr>\\n <tr>\\n <th>396</th>\\n <td>rock_96.mp3</td>\\n <td>True</td>\\n </tr>\\n <tr>\\n <th>397</th>\\n <td>rock_97.mp3</td>\\n <td>True</td>\\n </tr>\\n <tr>\\n <th>398</th>\\n <td>rock_98.mp3</td>\\n <td>False</td>\\n </tr>\\n <tr>\\n <th>399</th>\\n <td>rock_99.mp3</td>\\n <td>True</td>\\n </tr>\\n </tbody>\\n</table>\\n<p>400 rows × 2 columns</p>\\n</div>'}, 'metadata': {}, 'execution_count': 6}\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'idle'}\n", + "DEBUG:papermill:Executing cell:\n", + "# output\n", + "OUTPUT_PATH = Path(OUTPUT_PATHS[\"split\"])\n", + "OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + "output = split_concat\n", + "output.to_csv(OUTPUT_PATH, index=False)\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'busy'}\n", + "DEBUG:papermill:msg_type: execute_input\n", + "DEBUG:papermill:content: {'code': '# output\\nOUTPUT_PATH = Path(OUTPUT_PATHS[\"split\"])\\nOUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)\\n\\noutput = split_concat\\noutput.to_csv(OUTPUT_PATH, index=False)', 'execution_count': 7}\n", + "DEBUG:papermill:msg_type: status\n", + "DEBUG:papermill:content: {'execution_state': 'idle'}\n", + "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): test.dbrepo.tuwien.ac.at:443\n", + "DEBUG:urllib3.connectionpool:https://test.dbrepo.tuwien.ac.at:443 \"GET /api/database/3 HTTP/1.1\" 200 None\n", + "DEBUG:fairnb.api.dbrepo:<Response [200]>\n", + "DEBUG:git.cmd:Popen(['git', 'cat-file', '--batch-check'], cwd=/home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir, universal_newlines=False, shell=None, istream=<valid stream>)\n", + "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): test.dbrepo.tuwien.ac.at:443\n", + "DEBUG:urllib3.connectionpool:https://test.dbrepo.tuwien.ac.at:443 \"GET /api/database/3 HTTP/1.1\" 200 None\n", + "DEBUG:fairnb.api.dbrepo:<Response [200]>\n", + "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): test.dbrepo.tuwien.ac.at:443\n", + "DEBUG:urllib3.connectionpool:https://test.dbrepo.tuwien.ac.at:443 \"GET /api/database/3 HTTP/1.1\" 200 None\n", + "DEBUG:fairnb.api.dbrepo:<Response [200]>\n", + "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): test.dbrepo.tuwien.ac.at:443\n", + "DEBUG:urllib3.connectionpool:https://test.dbrepo.tuwien.ac.at:443 \"POST /api/upload/files/ HTTP/1.1\" 500 1422\n", + "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): test.dbrepo.tuwien.ac.at:443\n", + "DEBUG:urllib3.connectionpool:https://test.dbrepo.tuwien.ac.at:443 \"PATCH /api/upload/files/1cc10ef4b512ea9d55862c7c0d13f0f8 HTTP/1.1\" 204 0\n", + "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): test.dbrepo.tuwien.ac.at:443\n", + "DEBUG:urllib3.connectionpool:https://test.dbrepo.tuwien.ac.at:443 \"POST /api/database/3/table/7/data/import HTTP/1.1\" 400 None\n", + "DEBUG:charset_normalizer:Encoding detection: ascii is most likely the one.\n" ] } ], @@ -368,47 +501,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-10T20:30:03.919914535Z", - "start_time": "2023-10-10T20:29:55.479897609Z" + "start_time": "2023-10-12T15:05:43.672101780Z" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:papermill:Input Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/5_ml_model.ipynb\n", - "INFO:papermill:Output Notebook: /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/notebooks/5_ml_model.ipynb\n" - ] - }, - { - "data": { - "text/plain": "Executing: 0%| | 0/20 [00:00<?, ?cell/s]", - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "d53f45cbcb33427eafa4c7c174b9dd65" - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:papermill:Executing notebook with kernel: python3\n", - "INFO:fairnb.api.invenio:Picked up 1 files\n", - "INFO:fairnb.api.invenio:Uploading 1 to https://test.researchdata.tuwien.ac.at\n", - "INFO:fairnb.api.invenio:Uploading /home/lukas/Programming/uni/bachelorarbeit/dbrepo-ismir/tmp/5_ml_model/output/ml_model.pickle as ml_model.pickle\n", - "INFO:fairnb.api.invenio:Finished upload of ml_model.pickle\n" - ] - } - ], + "outputs": [], "source": [ "# -------------- ML ------------------------------\n", "with open(RESOURCE_PATH / \"5_ml_model\" / \"ml_model_entity_metadata.yml\", \"r\") as file:\n",