broadinstitute · jaspreetishar · Sep 14, 2025 · Sep 15, 2025 · Sep 15, 2025 · Oct 22, 2025
diff --git a/notebooks/Clustergram_Example.ipynb b/notebooks/Clustergram_Example.ipynb
@@ -584,7 +584,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.13.0"
   },
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {

diff --git a/notebooks/Landscape-Clustergram_Xenium.ipynb b/notebooks/Landscape-Clustergram_Xenium.ipynb
@@ -178,7 +178,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.13.0"
   },
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {

diff --git a/notebooks/MERSCOPE_pre-process.ipynb b/notebooks/MERSCOPE_pre-process.ipynb
diff --git a/notebooks/Xenium_pre-process.ipynb b/notebooks/Xenium_pre-process.ipynb
diff --git a/src/celldega/pre/__init__.py b/src/celldega/pre/__init__.py
@@ -356,7 +356,14 @@ def _process_image_channel(path_landscape_files, channel_info, img):
         raise ValueError(f"Unsupported image dimensions: {img.ndim}. Expected 2D or 3D image.")
 
     output_path = Path(path_landscape_files) / f"{channel_name}_output_regular.tif"
-    imsave(output_path, image_data)
+
+    tifffile.imwrite(
+        output_path,
+        image_data,
+        bigtiff=True,
+        compression="zlib",
+        tile=(256, 256),
+    )
 
     # Convert the image to PNG format
     image_png = _convert_to_png(str(output_path))

diff --git a/src/celldega/pre/boundary_tile.py b/src/celldega/pre/boundary_tile.py
@@ -157,9 +157,17 @@ def filter_and_save_fine_boundary(
     # Apply rounding to the GEOMETRY column
     fine_tile_cells["GEOMETRY"] = fine_tile_cells["GEOMETRY"].apply(_round_nested_coord_list)
 
-    if not fine_tile_cells.empty:
-        filename = f"{path_output}/cell_tile_{fine_i}_{fine_j}.parquet"
-        fine_tile_cells.to_parquet(filename, index=False)
+    filename = f"{path_output}/cell_tile_{fine_i}_{fine_j}.parquet"
+
+    if fine_tile_cells.empty:
+        # Write an empty DataFrame with the right schema
+        fine_tile_cells = pd.DataFrame(columns=["GEOMETRY", "name"])
+
+    fine_tile_cells.to_parquet(filename, index=False)
+
+    # if not fine_tile_cells.empty:
+    #     filename = f"{path_output}/cell_tile_{fine_i}_{fine_j}.parquet"
+    #     fine_tile_cells.to_parquet(filename, index=False)
 
 
 def process_fine_boundaries(
@@ -184,17 +192,23 @@ def process_fine_boundaries(
             fine_tile_x_min = x_min + fine_i * tile_size
             fine_tile_x_max = fine_tile_x_min + tile_size
 
-            if not (fine_tile_x_min >= coarse_tile_x_min and fine_tile_x_max <= coarse_tile_x_max):
-                continue
+            if fine_tile_x_max <= coarse_tile_x_min or fine_tile_x_min >= coarse_tile_x_max:
+                continue  # no horizontal overlap
+
+            # if not (fine_tile_x_min >= coarse_tile_x_min and fine_tile_x_max <= coarse_tile_x_max):
+            #     continue
 
             for fine_j in range(n_fine_tiles_y):
                 fine_tile_y_min = y_min + fine_j * tile_size
                 fine_tile_y_max = fine_tile_y_min + tile_size
 
-                if not (
-                    fine_tile_y_min >= coarse_tile_y_min and fine_tile_y_max <= coarse_tile_y_max
-                ):
-                    continue
+                # if not (
+                #     fine_tile_y_min >= coarse_tile_y_min and fine_tile_y_max <= coarse_tile_y_max
+                # ):
+                #     continue
+
+                if fine_tile_y_max <= coarse_tile_y_min or fine_tile_y_min >= coarse_tile_y_max:
+                    continue  # no vertical overlap
 
                 futures.append(
                     executor.submit(
@@ -382,13 +396,23 @@ def make_cell_boundary_tiles(
 
         gdf_cells.index = gdf_cells.index.astype(str).map(cell_str_to_int_mapping)
 
+        print(gdf_cells.index.isnull().sum())
+
         gdf_cells["center_x"] = gdf_cells.geometry.centroid.x
         gdf_cells["center_y"] = gdf_cells.geometry.centroid.y
     else:
         raise ValueError(
             f"Unsupported technology: {technology}. Supported technologies are 'MERSCOPE' and 'Xenium'."
         )
 
+        print("gdf_cells.shape:", gdf_cells.shape)
+        print("Center x range:", gdf_cells["center_x"].min(), gdf_cells["center_x"].max())
+        print("Center y range:", gdf_cells["center_y"].min(), gdf_cells["center_y"].max())
+        print("Tile bounds:", tile_bounds)
+
+        # Optional assert to catch empty outputs early
+        assert not gdf_cells.empty, "Cell boundaries GeoDataFrame is empty!"
+
     # Calculate tile bounds and fine/coarse tiles
     x_min, x_max = tile_bounds["x_min"], tile_bounds["x_max"]
     y_min, y_max = tile_bounds["y_min"], tile_bounds["y_max"]

diff --git a/src/celldega/pre/run_pre_processing.py b/src/celldega/pre/run_pre_processing.py
@@ -197,6 +197,7 @@ def main(
         cbg = dega.pre.read_cbg_mtx(str(paths["cbg_matrix"]), technology=technology)
     elif technology == "MERSCOPE":
         cbg = pd.read_csv(str(paths["cbg_csv"]), index_col=0)
+        cbg.index = cbg.index.astype(str)
 
     def make_column_names_unique_fast(df):
         counts = defaultdict(int)

diff --git a/src/celldega/pre/trx_tile.py b/src/celldega/pre/trx_tile.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 
 import numpy as np
+import pandas as pd
 import polars as pl
 from scipy.sparse import csr_matrix
 from tqdm import tqdm
@@ -111,18 +112,25 @@ def _process_fine_tiles_transcripts(
             fine_tile_x_max = fine_tile_x_min + tile_size
 
             # Process only if the fine tile falls within the current coarse tile's bounds
-            if fine_tile_x_min < coarse_tile_x_min or fine_tile_x_max > coarse_tile_x_max:
-                continue
+
+            if fine_tile_x_max <= coarse_tile_x_min or fine_tile_x_min >= coarse_tile_x_max:
+                continue  # no horizontal overlap
+
+            # if fine_tile_x_min < coarse_tile_x_min or fine_tile_x_max > coarse_tile_x_max:
+            #     continue
 
             for fine_j in range(n_fine_tiles_y):
                 fine_tile_y_min = y_min + fine_j * tile_size
                 fine_tile_y_max = fine_tile_y_min + tile_size
 
                 # Process only if the fine tile falls within the current coarse tile's bounds
-                if not (
-                    fine_tile_y_min >= coarse_tile_y_min and fine_tile_y_max <= coarse_tile_y_max
-                ):
-                    continue
+                # if not (
+                #     fine_tile_y_min >= coarse_tile_y_min and fine_tile_y_max <= coarse_tile_y_max
+                # ):
+                #     continue
+
+                if fine_tile_y_max <= coarse_tile_y_min or fine_tile_y_min >= coarse_tile_y_max:
+                    continue  # no vertical overlap
 
                 # Submit the task for each fine tile to process in parallel
                 futures.append(
@@ -195,7 +203,14 @@ def _filter_and_save_fine_tile(
         filename = Path(path_trx_tiles) / f"transcripts_tile_{fine_i}_{fine_j}.parquet"
 
         # Save the filtered DataFrame to a Parquet file
-        fine_tile_trx.to_pandas().to_parquet(filename, index=False)
+        # fine_tile_trx.to_pandas().to_parquet(filename, index=False)
+
+        # filename = Path(path_trx_tiles) / f"transcripts_tile_{fine_i}_{fine_j}.parquet"
+
+        if fine_tile_trx.is_empty():
+            pd.DataFrame(columns=["geometry"]).to_parquet(filename, index=False)
+        else:
+            fine_tile_trx.to_pandas().to_parquet(filename, index=False)
 
 
 def _load_transcript_data_by_technology(technology, path_trx):

diff --git a/tests/unit/test_pre/test_sbg_tile.py b/tests/unit/test_pre/test_sbg_tile.py
@@ -1,13 +1,14 @@
 import importlib.util
+from pathlib import Path
 import sys
 import types
-from pathlib import Path
 
 import numpy as np
 import pandas as pd
 import pytest
 from scipy.sparse import csr_matrix
 
+
 ROOT_DIR = Path(__file__).resolve().parents[3]
 PRE_ROOT = ROOT_DIR / "src" / "celldega" / "pre"