mesoscope · rugeli · May 1, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py
@@ -48,9 +48,43 @@ def is_obj(comp_or_obj):
         # in resolved DB data, if the top level of a downloaded comp doesn't have the key `name`, it's an obj
         return not comp_or_obj.get("name") and "object" in comp_or_obj
 
+    @staticmethod
+    def _is_positional(item):
+        if isinstance(item, bool):
+            return False
+        if isinstance(item, (int, float)):
+            return True
+        if isinstance(item, list):
+            return all(DataDoc._is_positional(x) for x in item)
+        return False
+
+    @staticmethod
+    def _normalize_for_hashing(data):
+        """
+        Recursively normalize the input json data so that dedup hashes are
+        stable across semantically equivalent inputs.
+
+        Categories (see `cellpack/tests/test_data_doc.py` for worked shapes):
+            - dict key order — sorted by `json.dumps(sort_keys=True)`
+            - string lists (e.g. region composition refs) — sorted
+            - mixed string + inline-dict lists — sorted
+            - pure-dict lists (partners, inline composition entries) — sorted
+            - positional numeric lists (vectors, colors) — leave as is
+            - positional nested-numeric lists (bounding boxes) — leave as is
+        """
+        if isinstance(data, dict):
+            return {k: DataDoc._normalize_for_hashing(v) for k, v in data.items()}
+        if isinstance(data, list):
+            normalized = [DataDoc._normalize_for_hashing(item) for item in data]
+            if all(DataDoc._is_positional(item) for item in normalized):
+                return normalized
+            return sorted(normalized, key=lambda x: json.dumps(x, sort_keys=True))
+        return data
+
     @staticmethod
     def generate_hash(doc_data):
-        doc_str = json.dumps(doc_data, sort_keys=True)
+        normalized_data = DataDoc._normalize_for_hashing(doc_data)
+        doc_str = json.dumps(normalized_data, sort_keys=True)
         return hashlib.sha256(doc_str.encode()).hexdigest()
 
 

diff --git a/cellpack/tests/test_aws_handler.py b/cellpack/tests/test_aws_handler.py
@@ -31,7 +31,7 @@ def test_get_aws_object_key():
         assert object_key == "test_folder/test_file"
 
 
-def test_upload_file():
+def test_upload_file(tmp_path):
     with mock_aws():
         aws_handler = AWSHandler(
             bucket_name="test_bucket",
@@ -43,13 +43,13 @@ def test_upload_file():
             Bucket="test_bucket",
             CreateBucketConfiguration={"LocationConstraint": "us-west-2"},
         )
-        with open("test_file.txt", "w") as file:
-            file.write("test file")
-        file_name = aws_handler.upload_file("test_file.txt")
+        test_file = tmp_path / "test_file.txt"
+        test_file.write_text("test file")
+        file_name = aws_handler.upload_file(str(test_file))
         assert file_name == "test_folder/test_file.txt"
 
 
-def test_create_presigned_url():
+def test_create_presigned_url(tmp_path):
     with mock_aws(), patch.object(AWSHandler, "_s3_client") as mock_client:
         presigned_url = "https://s3.us-west-2.amazonaws.com/test_bucket/test_folder/test_file.txt?query=string"
         mock_client.generate_presigned_url.return_value = presigned_url
@@ -63,17 +63,17 @@ def test_create_presigned_url():
             Bucket="test_bucket",
             CreateBucketConfiguration={"LocationConstraint": "us-west-2"},
         )
-        with open("test_file.txt", "w") as file:
-            file.write("test file")
-        aws_handler.upload_file("test_file.txt")
+        test_file = tmp_path / "test_file.txt"
+        test_file.write_text("test file")
+        aws_handler.upload_file(str(test_file))
         url = aws_handler.create_presigned_url("test_file.txt")
         assert url is not None
         assert url.startswith(
             "https://s3.us-west-2.amazonaws.com/test_bucket/test_folder/test_file.txt"
         )
 
 
-def test_is_url_valid():
+def test_is_url_valid(tmp_path):
     with mock_aws(), patch.object(AWSHandler, "_s3_client") as mock_client:
         presigned_url = "https://s3.us-west-2.amazonaws.com/test_bucket/test_folder/test_file.txt?query=string"
         mock_client.generate_presigned_url.return_value = presigned_url
@@ -87,9 +87,9 @@ def test_is_url_valid():
             Bucket="test_bucket",
             CreateBucketConfiguration={"LocationConstraint": "us-west-2"},
         )
-        with open("test_file.txt", "w") as file:
-            file.write("test file")
-        aws_handler.upload_file("test_file.txt")
+        test_file = tmp_path / "test_file.txt"
+        test_file.write_text("test file")
+        aws_handler.upload_file(str(test_file))
         url = aws_handler.create_presigned_url("test_file.txt")
         assert aws_handler.is_url_valid(url) is True
         assert aws_handler.is_url_valid("invalid_url") is False

diff --git a/cellpack/tests/test_data_doc.py b/cellpack/tests/test_data_doc.py
@@ -45,3 +45,69 @@ def test_generate_hash():
         generated_hash = DataDoc.generate_hash(input_data)
         assert isinstance(generated_hash, str)
         assert generated_hash == DataDoc.generate_hash(input_data)
+
+
+def test_generate_hash_is_stable_across_key_order():
+    recipe_a = {"name": "test", "version": "1.0", "count": 1}
+    recipe_b = {"count": 1, "version": "1.0", "name": "test"}
+    assert DataDoc.generate_hash(recipe_a) == DataDoc.generate_hash(recipe_b)
+
+
+def test_generate_hash_is_stable_across_string_list_order():
+    recipe_a = {
+        "composition": {
+            "space": {"regions": {"interior": ["A", "B", "C", "D", "E"]}},
+            "A": {"object": "sphere_100", "count": 6},
+            "B": {"object": "sphere_200", "count": 2},
+            "C": {"object": "sphere_50", "count": 15},
+        }
+    }
+    recipe_b = {
+        "composition": {
+            "A": {"count": 6, "object": "sphere_100"},
+            "C": {"object": "sphere_50", "count": 15},
+            "B": {"object": "sphere_200", "count": 2},
+            "space": {"regions": {"interior": ["E", "C", "A", "D", "B"]}},
+        }
+    }
+    assert DataDoc.generate_hash(recipe_a) == DataDoc.generate_hash(recipe_b)
+
+
+def test_generate_hash_preserves_positional_list_order():
+    # numeric/nested lists encode positional data (bounding boxes, vectors, colors) and must remain order-sensitive.
+    bbox_a = {"bounding_box": [[0, 0, 0], [1000, 1000, 1]]}
+    bbox_b = {"bounding_box": [[1000, 1000, 1], [0, 0, 0]]}
+    assert DataDoc.generate_hash(bbox_a) != DataDoc.generate_hash(bbox_b)
+
+    axis_a = {"rotation_axis": [0, 0, 1]}
+    axis_b = {"rotation_axis": [1, 0, 0]}
+    assert DataDoc.generate_hash(axis_a) != DataDoc.generate_hash(axis_b)
+
+
+def test_generate_hash_is_stable_across_mixed_list_order():
+    # region lists that mix string refs with inline dicts should dedup regardless of element order.
+    recipe_a = {
+        "composition": {
+            "bounding_area": {
+                "regions": {
+                    "interior": [
+                        "outer_sphere",
+                        {"object": "green_sphere", "count": 5},
+                    ]
+                }
+            }
+        }
+    }
+    recipe_b = {
+        "composition": {
+            "bounding_area": {
+                "regions": {
+                    "interior": [
+                        {"object": "green_sphere", "count": 5},
+                        "outer_sphere",
+                    ]
+                }
+            }
+        }
+    }
+    assert DataDoc.generate_hash(recipe_a) == DataDoc.generate_hash(recipe_b)
diff --git a/cellpack/tests/test_pack_cli.py b/cellpack/tests/test_pack_cli.py
@@ -0,0 +1,90 @@
+"""
+Integration tests for the `pack` CLI entry point (`cellpack/bin/pack.py`).
+
+Two input types are covered:
+
+    1. A string file path — the existing local CLI workflow invoked as
+        `pack -r RECIPE_PATH -c CONFIG_PATH`. Accepting a recipe dict must not change anything about this
+        path.
+
+    2. A json dict — the new flow used by the docker server's
+        `pack_handler` when it receives a json body. The dict must flow
+        through `RecipeLoader` and the rest of the pipeline remains the same to a
+        recipe loaded.
+
+The packing config can be supplied as a file path or omitted entirely, in
+which case `ConfigLoader` falls back to its built-in default values.
+"""
+
+import json
+from pathlib import Path
+import copy
+import pytest
+
+from cellpack.bin.pack import pack
+
+_RECIPE = {
+    "version": "1.0.0",
+    "format_version": "2.0",
+    "name": "test_pack_cli",
+    "bounding_box": [[0, 0, 0], [50, 50, 1]],
+    "objects": {
+        "sphere_5": {
+            "type": "single_sphere",
+            "radius": 5,
+            "max_jitter": [1, 1, 0],
+            "place_method": "jitter",
+        }
+    },
+    "composition": {
+        "space": {"regions": {"interior": ["A"]}},
+        "A": {"object": "sphere_5", "count": 1},
+    },
+}
+
+
+@pytest.fixture
+def recipe_data():
+    # RecipeLoader mutates the dict it receives, so each test gets a fresh copy to stay independent of run order.
+    return copy.deepcopy(_RECIPE)
+
+
+def _write_config(tmp_path: Path) -> Path:
+    config = {
+        "name": "test_pack_cli",
+        "out": f"{tmp_path}/",
+        "place_method": "jitter",
+        "inner_grid_method": "raytrace",
+        "save_analyze_result": False,
+        "save_plot_figures": False,
+        "number_of_packings": 1,
+        "show_progress_bar": False,
+        "load_from_grid_file": False,
+    }
+    config_path = tmp_path / "config.json"
+    config_path.write_text(json.dumps(config))
+    return config_path
+
+
+def test_pack_with_recipe_path(tmp_path, recipe_data):
+    recipe_path = tmp_path / "recipe.json"
+    recipe_path.write_text(json.dumps(recipe_data))
+    config_path = _write_config(tmp_path)
+    pack(recipe=str(recipe_path), config_path=str(config_path))
+
+
+def test_pack_with_recipe_dict(tmp_path, recipe_data):
+    """
+    `pack()` also accepts a recipe dict directly, so
+    the docker server can forward a parsed JSON body without writing it to
+    db first.
+    """
+    config_path = _write_config(tmp_path)
+    pack(recipe=recipe_data, config_path=str(config_path))
+
+
+def test_pack_with_default_config(tmp_path, monkeypatch, recipe_data):
+    """Omitting `config_path` falls back to `ConfigLoader.default_values`."""
+    # default `out: "out/"` is relative, monkeypatch.chdir keeps outputs inside tmp_path.
+    monkeypatch.chdir(tmp_path)
+    pack(recipe=recipe_data)