Skip to content

Commit cc878c1

Browse files
add umap script
1 parent 4dc4525 commit cc878c1

File tree

3 files changed

+85
-0
lines changed

3 files changed

+85
-0
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
figures
2+
explore
3+
14
# OS related
25
.DS_Store
36

novae_benchmark/pan_tissue_umap.py

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import time
2+
from pathlib import Path
3+
4+
import matplotlib.pyplot as plt
5+
import scanpy as sc
6+
import seaborn as sns
7+
from anndata import AnnData
8+
9+
10+
def timing(func):
11+
def wrapper(*args, **kwargs):
12+
start_time = time.time()
13+
func(*args, **kwargs)
14+
end_time = time.time()
15+
execution_time = end_time - start_time
16+
print(f"Execution time: {execution_time} seconds")
17+
return execution_time
18+
19+
return wrapper
20+
21+
22+
title_dict = {
23+
"tissue": "Tissue",
24+
"technology": "Technology",
25+
"domain": "Domain",
26+
}
27+
28+
29+
@timing
30+
def compute_and_save_umap(adata: AnnData, res_path: Path):
31+
print(f"{adata.n_obs=}")
32+
33+
sc.pp.neighbors(adata, use_rep="novae_latent_corrected")
34+
sc.tl.umap(adata)
35+
36+
colors = []
37+
for key in ["domain", "tissue", "technology", "novae_domains_15", "novae_domains_20", "novae_domains_25"]:
38+
if key in adata.obs:
39+
colors.append(key)
40+
41+
for color in colors:
42+
sc.pl.umap(adata, color=color, show=False, title=title_dict.get(color, color))
43+
sns.despine(offset=10, trim=True)
44+
plt.savefig(res_path / f"umap_{adata.n_obs}_{color}.png", bbox_inches="tight", dpi=300)
45+
46+
47+
def main():
48+
# data_path = Path("/Users/quentinblampey/dev/novae/data/results/dry-wood-40")
49+
# res_path = Path("/Users/quentinblampey/dev/novae_benchmark/figures")
50+
data_path = Path("/gpfs/workdir/blampeyq/novae/data/results/zany-night-17")
51+
res_path = Path("/gpfs/workdir/blampeyq/novae_benchmark/figures")
52+
53+
adata_full = sc.read_h5ad(data_path / "adata_conc.h5ad")
54+
55+
print("adata:", adata_full)
56+
57+
for n_obs in [1_000, 1_000_000, 10_000_000, None]:
58+
if n_obs is not None and n_obs < adata_full.n_obs:
59+
adata = sc.pp.subsample(adata_full, n_obs=n_obs, copy=True)
60+
else:
61+
adata = adata_full
62+
63+
compute_and_save_umap(adata, res_path)
64+
65+
66+
if __name__ == "__main__":
67+
main()

remote/umaps.sh

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
#SBATCH --job-name=novae
3+
#SBATCH --output=/gpfs/workdir/blampeyq/.jobs_outputs/%j
4+
#SBATCH --mem=128G
5+
#SBATCH --time=24:00:00
6+
#SBATCH --cpus-per-task=2
7+
#SBATCH --partition=cpu_long
8+
9+
module purge
10+
module load anaconda3/2022.10/gcc-11.2.0
11+
source activate novae
12+
13+
cd /gpfs/workdir/blampeyq/novae_benchmark
14+
15+
python -u novae_benchmark/pan_tissue_umap.py

0 commit comments

Comments
 (0)