Unify logic for creating download files

* the file paths are now based on the container id and file name * if no base directory is specified, it is based on a random temporary directory * the path for the base directory is based on `tempfile.tempdir`, which can be influenced e.g. through the $TMPDIR environment variable

Unify logic for creating download files
cc983a00 · Moser, Maximilian · d316c743 · cc983a00 · cc983a00 · cc983a00
Commit cc983a00 authored 1 year ago by Moser, Maximilian
--- a/vre_repository_connector/api/dbrepo.py
+++ b/vre_repository_connector/api/dbrepo.py
@@ -26,7 +26,7 @@ from dbrepo.api.dto import (
 from dbrepo.RestClient import RestClient
 from pandas import concat
-from ..utils import url_regex
+from ..utils import create_download_file, url_regex
 from .base import BaseWrapper
 db_tbl_regex = re.compile(r"(/api)?/database/([0-9]+)(/table/([0-9]+)(/.*)?)?")
@@ -290,6 +290,8 @@ class DBRepo(BaseWrapper):
        # write the dataframe to a CSV file, with the columns in the same order
        # as they were uploaded, and without index column
-        name = f"{table_name}.csv"
+        file_path = create_download_file(
-        data.to_csv(name, index=False, columns=[c.name for c in table.columns])
+            database_id, f"{table_name.rstrip('.csv')}.csv"
-        return name
+        )
+        data.to_csv(file_path, index=False, columns=[c.name for c in table.columns])
+        return file_path
--- a/vre_repository_connector/api/inveniordm.py
+++ b/vre_repository_connector/api/inveniordm.py
@@ -9,7 +9,6 @@ import getpass
 import os.path
 import pathlib
 import re
-import tempfile
 import urllib.parse
 from typing import Iterable, Optional, Tuple
@@ -18,7 +17,7 @@ from inveniordm_py.files.metadata import FilesListMetadata, OutgoingStream
 from inveniordm_py.records.metadata import DraftMetadata
 from inveniordm_py.records.resources import Draft
-from ..utils import doi_regex, url_regex
+from ..utils import create_download_file, doi_regex, url_regex
 from .base import BaseWrapper
 recid_regex = re.compile(r"^.*(/api)?/records/(.*)$")
@@ -180,8 +179,8 @@ class InvenioRDM(BaseWrapper):
            record.get()
            response = record.files(file_name).download()
-            with tempfile.NamedTemporaryFile(delete=False) as downloaded_file:
+            file_name = create_download_file(record_pid, file_name)
+            with open(file_name, "wb") as downloaded_file:
                for chunk in response.iter_content(chunk_size=256):
                    downloaded_file.write(chunk)

--- a/vre_repository_connector/utils.py
+++ b/vre_repository_connector/utils.py
@@ -5,7 +5,29 @@
 """Utility functions."""
+import pathlib
 import re
+import tempfile
+from typing import Optional
 doi_regex = re.compile(r"^((https?://)?doi.org/)?(10\.\d+)/(.*)$")
 url_regex = re.compile(r"^((.*?)://)?(.*)$")
+_tempdir = tempfile.mkdtemp()
+"""Randomly named temporary directory, fixed for the duration of the run.
+The outcome can be influenced by setting ``tempfile.tempdir`` before first use.
+"""
+def create_download_file(
+    container_id: str | int, file_name: str, dir: Optional[str] = None
+) -> str:
+    """Create a file for storing downloaded content."""
+    cid = str(container_id)
+    dir = pathlib.Path(dir or _tempdir) / cid
+    dir.mkdir(mode=0o700, parents=True, exist_ok=True)
+    file_path = dir / file_name
+    file_path.touch(0o700)
+    return str(file_path)