Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion notebooks/Clustergram_Example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
"version": "3.13.0"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
Expand Down
2 changes: 1 addition & 1 deletion notebooks/Landscape-Clustergram_Xenium.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
"version": "3.13.0"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
Expand Down
305 changes: 305 additions & 0 deletions notebooks/MERSCOPE_pre-process.ipynb

Large diffs are not rendered by default.

137 changes: 43 additions & 94 deletions notebooks/Xenium_pre-process.ipynb

Large diffs are not rendered by default.

9 changes: 8 additions & 1 deletion src/celldega/pre/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,14 @@ def _process_image_channel(path_landscape_files, channel_info, img):
raise ValueError(f"Unsupported image dimensions: {img.ndim}. Expected 2D or 3D image.")

output_path = Path(path_landscape_files) / f"{channel_name}_output_regular.tif"
imsave(output_path, image_data)

tifffile.imwrite(
output_path,
image_data,
bigtiff=True,
compression="zlib",
tile=(256, 256),
)

# Convert the image to PNG format
image_png = _convert_to_png(str(output_path))
Expand Down
42 changes: 33 additions & 9 deletions src/celldega/pre/boundary_tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,17 @@ def filter_and_save_fine_boundary(
# Apply rounding to the GEOMETRY column
fine_tile_cells["GEOMETRY"] = fine_tile_cells["GEOMETRY"].apply(_round_nested_coord_list)

if not fine_tile_cells.empty:
filename = f"{path_output}/cell_tile_{fine_i}_{fine_j}.parquet"
fine_tile_cells.to_parquet(filename, index=False)
filename = f"{path_output}/cell_tile_{fine_i}_{fine_j}.parquet"

if fine_tile_cells.empty:
# Write an empty DataFrame with the right schema
fine_tile_cells = pd.DataFrame(columns=["GEOMETRY", "name"])

fine_tile_cells.to_parquet(filename, index=False)

# if not fine_tile_cells.empty:
# filename = f"{path_output}/cell_tile_{fine_i}_{fine_j}.parquet"
# fine_tile_cells.to_parquet(filename, index=False)


def process_fine_boundaries(
Expand All @@ -184,17 +192,23 @@ def process_fine_boundaries(
fine_tile_x_min = x_min + fine_i * tile_size
fine_tile_x_max = fine_tile_x_min + tile_size

if not (fine_tile_x_min >= coarse_tile_x_min and fine_tile_x_max <= coarse_tile_x_max):
continue
if fine_tile_x_max <= coarse_tile_x_min or fine_tile_x_min >= coarse_tile_x_max:
continue # no horizontal overlap

# if not (fine_tile_x_min >= coarse_tile_x_min and fine_tile_x_max <= coarse_tile_x_max):
# continue

for fine_j in range(n_fine_tiles_y):
fine_tile_y_min = y_min + fine_j * tile_size
fine_tile_y_max = fine_tile_y_min + tile_size

if not (
fine_tile_y_min >= coarse_tile_y_min and fine_tile_y_max <= coarse_tile_y_max
):
continue
# if not (
# fine_tile_y_min >= coarse_tile_y_min and fine_tile_y_max <= coarse_tile_y_max
# ):
# continue

if fine_tile_y_max <= coarse_tile_y_min or fine_tile_y_min >= coarse_tile_y_max:
continue # no vertical overlap

futures.append(
executor.submit(
Expand Down Expand Up @@ -382,13 +396,23 @@ def make_cell_boundary_tiles(

gdf_cells.index = gdf_cells.index.astype(str).map(cell_str_to_int_mapping)

print(gdf_cells.index.isnull().sum())

gdf_cells["center_x"] = gdf_cells.geometry.centroid.x
gdf_cells["center_y"] = gdf_cells.geometry.centroid.y
else:
raise ValueError(
f"Unsupported technology: {technology}. Supported technologies are 'MERSCOPE' and 'Xenium'."
)

print("gdf_cells.shape:", gdf_cells.shape)
print("Center x range:", gdf_cells["center_x"].min(), gdf_cells["center_x"].max())
print("Center y range:", gdf_cells["center_y"].min(), gdf_cells["center_y"].max())
print("Tile bounds:", tile_bounds)

# Optional assert to catch empty outputs early
assert not gdf_cells.empty, "Cell boundaries GeoDataFrame is empty!"

# Calculate tile bounds and fine/coarse tiles
x_min, x_max = tile_bounds["x_min"], tile_bounds["x_max"]
y_min, y_max = tile_bounds["y_min"], tile_bounds["y_max"]
Expand Down
1 change: 1 addition & 0 deletions src/celldega/pre/run_pre_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def main(
cbg = dega.pre.read_cbg_mtx(str(paths["cbg_matrix"]), technology=technology)
elif technology == "MERSCOPE":
cbg = pd.read_csv(str(paths["cbg_csv"]), index_col=0)
cbg.index = cbg.index.astype(str)

def make_column_names_unique_fast(df):
counts = defaultdict(int)
Expand Down
29 changes: 22 additions & 7 deletions src/celldega/pre/trx_tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path

import numpy as np
import pandas as pd
import polars as pl
from scipy.sparse import csr_matrix
from tqdm import tqdm
Expand Down Expand Up @@ -111,18 +112,25 @@ def _process_fine_tiles_transcripts(
fine_tile_x_max = fine_tile_x_min + tile_size

# Process only if the fine tile falls within the current coarse tile's bounds
if fine_tile_x_min < coarse_tile_x_min or fine_tile_x_max > coarse_tile_x_max:
continue

if fine_tile_x_max <= coarse_tile_x_min or fine_tile_x_min >= coarse_tile_x_max:
continue # no horizontal overlap

# if fine_tile_x_min < coarse_tile_x_min or fine_tile_x_max > coarse_tile_x_max:
# continue

for fine_j in range(n_fine_tiles_y):
fine_tile_y_min = y_min + fine_j * tile_size
fine_tile_y_max = fine_tile_y_min + tile_size

# Process only if the fine tile falls within the current coarse tile's bounds
if not (
fine_tile_y_min >= coarse_tile_y_min and fine_tile_y_max <= coarse_tile_y_max
):
continue
# if not (
# fine_tile_y_min >= coarse_tile_y_min and fine_tile_y_max <= coarse_tile_y_max
# ):
# continue

if fine_tile_y_max <= coarse_tile_y_min or fine_tile_y_min >= coarse_tile_y_max:
continue # no vertical overlap

# Submit the task for each fine tile to process in parallel
futures.append(
Expand Down Expand Up @@ -195,7 +203,14 @@ def _filter_and_save_fine_tile(
filename = Path(path_trx_tiles) / f"transcripts_tile_{fine_i}_{fine_j}.parquet"

# Save the filtered DataFrame to a Parquet file
fine_tile_trx.to_pandas().to_parquet(filename, index=False)
# fine_tile_trx.to_pandas().to_parquet(filename, index=False)

# filename = Path(path_trx_tiles) / f"transcripts_tile_{fine_i}_{fine_j}.parquet"

if fine_tile_trx.is_empty():
pd.DataFrame(columns=["geometry"]).to_parquet(filename, index=False)
else:
fine_tile_trx.to_pandas().to_parquet(filename, index=False)


def _load_transcript_data_by_technology(technology, path_trx):
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/test_pre/test_sbg_tile.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import importlib.util
from pathlib import Path
import sys
import types
from pathlib import Path

import numpy as np
import pandas as pd
import pytest
from scipy.sparse import csr_matrix


ROOT_DIR = Path(__file__).resolve().parents[3]
PRE_ROOT = ROOT_DIR / "src" / "celldega" / "pre"

Expand Down
Loading