From d6d935868cb5c55601f4de9bb89aa69213196d69 Mon Sep 17 00:00:00 2001 From: Quentin Kaiser Date: Tue, 1 Jul 2025 14:18:45 +0200 Subject: [PATCH] feat(processing): add unblob dedicated temporary directory handling --- python/unblob/processing.py | 24 ++++++++++++++++++++++++ python/unblob/sandbox.py | 4 ++++ tests/test_cli.py | 1 + tests/test_sandbox.py | 8 ++++++++ 4 files changed, 37 insertions(+) diff --git a/python/unblob/processing.py b/python/unblob/processing.py index 64ebf210fd..c245b146b1 100644 --- a/python/unblob/processing.py +++ b/python/unblob/processing.py @@ -1,5 +1,8 @@ +import atexit import multiprocessing +import os import shutil +import tempfile from collections.abc import Iterable, Sequence from operator import attrgetter from pathlib import Path @@ -100,6 +103,21 @@ class ExtractionConfig: dir_handlers: DirectoryHandlers = BUILTIN_DIR_HANDLERS verbose: int = 1 progress_reporter: type[ProgressReporter] = NullProgressReporter + tmp_dir: Path = attrs.field( + factory=lambda: Path(tempfile.mkdtemp(prefix="unblob-tmp-")) + ) + + def __attrs_post_init__(self): + atexit.register(self._cleanup_tmp_dir) + + def _cleanup_tmp_dir(self): + if isinstance(self.tmp_dir, Path) and self.tmp_dir.exists(): + try: + shutil.rmtree(self.tmp_dir) + except Exception as e: + logger.warning( + "Failed to clean up tmp_dir", tmp_dir=self.tmp_dir, exc_info=e + ) def _get_output_path(self, path: Path) -> Path: """Return path under extract root.""" @@ -244,11 +262,17 @@ def __init__(self, config: ExtractionConfig): def process_task(self, task: Task) -> TaskResult: result = TaskResult(task) try: + self._set_tmp_dir() self._process_task(result, task) except Exception as exc: self._process_error(result, exc) return result + def _set_tmp_dir(self): + """Set environment variables so all subprocesses and handlers use our temp dir.""" + for var in ("TMP", "TMPDIR", "TEMP", "TEMPDIR"): + os.environ[var] = self._config.tmp_dir.as_posix() + def _process_error(self, result: TaskResult, exc: Exception): error_report = UnknownError(exception=exc) result.add_report(error_report) diff --git a/python/unblob/sandbox.py b/python/unblob/sandbox.py index 61b02b099d..c46c9aa337 100644 --- a/python/unblob/sandbox.py +++ b/python/unblob/sandbox.py @@ -55,6 +55,10 @@ def __init__( AccessFS.remove_file(config.extract_root), AccessFS.make_dir(config.extract_root.parent), AccessFS.read_write(log_path), + # Allow access to the managed temp directory for handlers + AccessFS.read_write(config.tmp_dir), + AccessFS.remove_dir(config.tmp_dir), + AccessFS.remove_file(config.tmp_dir), *extra_passthrough, ] diff --git a/tests/test_cli.py b/tests/test_cli.py index 5ce0e07015..a8ac0e01c5 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -274,6 +274,7 @@ def test_archive_success( verbose=expected_verbosity, progress_reporter=expected_progress_reporter, ) + config.tmp_dir = mock.ANY process_file_mock.assert_called_once_with(config, in_path, None) logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path, log_path) diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py index 97db8c2f9f..886a3f1b70 100644 --- a/tests/test_sandbox.py +++ b/tests/test_sandbox.py @@ -34,6 +34,8 @@ def test_necessary_resources_can_be_created_in_sandbox( ): directory_in_extract_root = extraction_config.extract_root / "path" / "to" / "dir" file_in_extract_root = directory_in_extract_root / "file" + file_in_tmp_dir = extraction_config.tmp_dir / "tmp_file" + directory_in_tmp_dir = extraction_config.tmp_dir / "tmp_dir" sandbox.run(extraction_config.extract_root.mkdir, parents=True) sandbox.run(directory_in_extract_root.mkdir, parents=True) @@ -45,6 +47,12 @@ def test_necessary_resources_can_be_created_in_sandbox( log_path.touch() sandbox.run(log_path.write_text, "log line") + sandbox.run(directory_in_tmp_dir.mkdir, parents=True) + sandbox.run(file_in_tmp_dir.touch) + sandbox.run(file_in_tmp_dir.write_text, "tmp file content") + sandbox.run(file_in_tmp_dir.unlink) + sandbox.run(directory_in_tmp_dir.rmdir) + def test_access_outside_sandbox_is_not_possible(sandbox: Sandbox, tmp_path: Path): unrelated_dir = tmp_path / "unrelated" / "path"