Skip to content

Commit

Permalink
added crosswalking
Browse files Browse the repository at this point in the history
  • Loading branch information
vickydaiya committed Dec 7, 2023
1 parent 7fa64ff commit 0ca01f8
Show file tree
Hide file tree
Showing 8 changed files with 639 additions and 21 deletions.
1 change: 1 addition & 0 deletions constants.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ _=${VERSION:?"No version selected!"}
# Shorthands and configuration options
export RUNNER=${RUNNER:-"docker"}
export DATASETS_DIR=${DATASETS_DIR:-"./datasets"}
export CROSSWALKING_TABLES_DIR=${CROSSWALKING_TABLES_DIR:-"./crosswalking-tables"}
export RAW_DATA_DIR=${RAW_DATA_DIR:-"./raw-data"}
export OUTPUT_DIR="$RAW_DATA_DIR/$DATASET/$VERSION"
export DATA_REPO_DIR="$RAW_DATA_DIR/data-repo"
Expand Down
350 changes: 350 additions & 0 deletions crosswalking-tables/azimuth.csv

Large diffs are not rendered by default.

238 changes: 238 additions & 0 deletions crosswalking-tables/celltypist.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
Celltypist_Label,Label,CL_ID,CL_Match
Tcm/Naive helper T cells,"central memory CD4-positive, alpha-beta T cell",CL:0000904,
CD16+ NK cells,"CD16-positive, CD56-dim natural killer cell, human",CL:0000939,
Tem/Effector helper T cells,"effector memory CD4-positive, alpha-beta T cell",CL:0000905,
Tcm/Naive cytotoxic T cells,"central memory CD8-positive, alpha-beta T cell",CL:0000907,
Tem/Temra cytotoxic T cells,"effector memory CD8-positive, alpha-beta T cell, terminally differentiated",CL:0001062,
Classical monocytes,classical monocyte,CL:0000860,
Switched memory B cells,class switched memory B cell,CL:0000972,
MAIT cells,mucosal invariant T cell,CL:0000940,
Naive B cells,naive B cell,CL:0000788,
Tem/Trm cytotoxic T cells,"effector memory CD8-positive, alpha-beta T cell",CL:0000913,
Non-switched memory B cells,unswitched memory B cell,CL:0000970,
Non-classical monocytes,non-classical monocyte,CL:0000875,
CD16- NK cells,"CD16-negative, CD56-bright natural killer cell, human",CL:0000938,
gdT,gamma-delta T cell,CL:0000798,
Regulatory T cells,regulatory T cell,CL:0000815,
DC2,conventional dendritic cell,CL:0000990,
Plasma cells,plasma cell,CL:0000786,
Memory B cells,memory B cell,CL:0000787,
Tem/Temra helper T cells,"effector memory CD4-positive, alpha-beta T cell, terminally differentiated",CL:0001087,
Megakaryocytes/platelets,megakaryocyte,CL:0000556,
Age-associated B cells,memory B cell,CL:0000787,
Cycling immune mix,cell,CL:0000000,
pDC,plasmacytoid dendritic cell,CL:0000784,
DC1,conventional dendritic cell,CL:0000990,
Plasmablasts,plasmablast,CL:0000980,
HSC/MPP,hematopoietic multipotent progenitor cell,CL:0000837,
C1 non-classical monocytes,non-classical monocyte,CL:0000875,
Neutrophil-myeloid progenitor,neutrophil progenitor cell,CL:0000834,
Megakaryocyte precursor,megakaryocyte progenitor cell,CL:0000553,
Mid erythroid,erythroid lineage cell,CL:0000764,
Early erythroid,erythroid lineage cell,CL:0000764,
CMP,common myeloid progenitor,CL:0000049,
MEMP,megakaryocyte-erythroid progenitor cell,CL:0000050,
Early MK,megakaryocyte progenitor cell,CL:0000553,
ELP,early lymphoid progenitor,CL:0000936,
CRTAM+ gamma-delta T cells,gamma-delta T cell,CL:0000798,
Erythrocytes,erythrocyte,CL:0000232,
GMP,granulocyte monocyte progenitor cell,CL:0000557,
Late erythroid,erythroid lineage cell,CL:0000764,
Promyelocytes,promyelocyte,CL:0000836,
Neutrophils,neutrophil,CL:0000775,
Cycling T&NK,cell,CL:0000000,
DC precursor,common dendritic progenitor,CL:0001029,
ETP,thymocyte,CL:0000893,
Follicular helper T cells,T follicular helper cell,CL:0002038,
NAMPT neutrophils,neutrophil,CL:0000775,
CD24 neutrophils,neutrophil,CL:0000775,
Mast cells,mast cell,CL:0000097,
Pro-B cells,pro-B cell,CL:0000826,
Small pre-B cells,small pre-B-II cell,CL:0000954,
pDC precursor,"plasmacytoid dendritic cell, human",CL:0001058,
Atrial cardiomyocytes,regular atrial cardiac myocyte,CL:0002129,
Adipocytes,fat cell,CL:0000136,
Fibroblasts,fibroblast,CL:0000057,
Arterial SMC,vascular associated smooth muscle cell,CL:0000359,
Myeloid,myeloid cell,CL:0000763,
Cytoplasmic cardiomyocytes,cardiac muscle cell,CL:0000746,
Capillary endothelial cells,capillary endothelial cell,CL:0002144,
Fibroblast-like endothelial cells,endothelial cell,CL:0000115,
Basic SMC,smooth muscle cell,CL:0000192,
Ventricle-enriched pericytes,pericyte,CL:0000669,
CMC-like endothelial cells,endothelial cell,CL:0000115,
Lymphatic endothelial cells,endothelial cell of lymphatic vessel,CL:0002138,
Arterial endothelial cells,endothelial cell of artery,CL:1000413,
Atria-enriched pericytes,pericyte,CL:0000669,
Venous endothelial cells,vein endothelial cell,CL:0002543,
Stromal pericytes,pericyte,CL:0000669,
Ventricular cardiomyocytes,regular ventricular cardiac myocyte,CL:0002131,
Lymphoid,lymphocyte,CL:0000542,
Neuronal cells,neuron,CL:0000540,
CMC-like pericytes,pericyte,CL:0000669,
Atria-enriched endothelial cells,endothelial cell,CL:0000115,
Immune-related endothelial cells,endothelial cell,CL:0000115,
NK cells,natural killer cell,CL:0000623,
MT-high fibroblasts,fibroblast,CL:0000057,
Mesothelial cells,mesothelial cell,CL:0000077,
Oligodendrocytes,oligodendrocyte,CL:0000128,
CA3 FGF5 ExN,hippocampal pyramidal neuron,CL:1001571,
SGCZ+ granule cells,hippocampal granule cell,CL:0001033,
P2RY12+ microglia,mature microglial cell,CL:0002629,
OPC,oligodendrocyte precursor cell,CL:0002453,
CA ACVR1C ExN,hippocampal pyramidal neuron,CL:1001571,
VIP InN,hippocampal interneuron,CL:1001569,
PDLIM5+ granule cells,hippocampal granule cell,CL:0001033,
LAMP5 NMBR InN,hippocampal interneuron,CL:1001569,
NR2F2 InN,hippocampal interneuron,CL:1001569,
CD83+ microglia,mature microglial cell,CL:0002629,
GFAP low astrocytes,astrocyte of the hippocampus,CL:0002604,
CA GRIK1 ExN,hippocampal pyramidal neuron,CL:1001571,
LAMP5 CHST9 InN,hippocampal interneuron,CL:1001569,
GFAP high astrocytes,astrocyte of the hippocampus,CL:0002604,
PVALB InN,hippocampal interneuron,CL:1001569,
SST InN,hippocampal interneuron,CL:1001569,
CA/Sub COBLL1 ExN,hippocampal pyramidal neuron,CL:1001571,
LAMP5 KIT InN,hippocampal interneuron,CL:1001569,
COP,oligodendrocyte precursor cell,CL:0002453,
Macrophages,macrophage,CL:0000235,
T cells,T cell,CL:0000084,
CA/Sub GFRA1 ExN,hippocampal pyramidal neuron,CL:1001571,
CA/Sub CUX2 ExN,hippocampal pyramidal neuron,CL:1001571,
MEIS2 SHISAL2B InN,hippocampal interneuron,CL:1001569,
CA GLYATL1 ExN,hippocampal pyramidal neuron,CL:1001571,
CA/Sub TRPC3 ExN,hippocampal pyramidal neuron,CL:1001571,
CA/Sub PXDN ExN,hippocampal pyramidal neuron,CL:1001571,
CA/Sub KCNK13 ExN,hippocampal pyramidal neuron,CL:1001571,
VLMC,vascular leptomeningeal cell,CL:4023051,
Pericytes,pericyte,CL:0000669,
Endothelial cells,endothelial cell,CL:0000115,
Venous SMC,smooth muscle cell of the brain vasculature,CL:0002590,
Ependymal cells,ependymal cell,CL:0000065,
Sub DCN ExN,hippocampal pyramidal neuron,CL:1001571,
TA,transit amplifying cell,CL:0009010,
Enterocytes,enterocyte,CL:0000584,
Tuft cells,tuft cell of colon,CL:0009041,
Goblet cells,goblet cell,CL:0000160,
BEST4+ enterocytes,"BEST4+ intestinal epithelial cell, human",CL:4030026,
Paneth,paneth cell,CL:0000510,
Colonocytes,enterocyte of epithelium of large intestine,CL:0002071,
Enteroendocrine cells,enteroendocrine cell,CL:0000164,
Stem cells,stem cell,CL:0000034,
Cycling/Secretory TA,transit amplifying cell,CL:0009010,
Stromal 1,stromal cell,CL:0000499,
Myofibroblasts,myofibroblast cell,CL:0000186,
Stromal 3,stromal cell,CL:0000499,
Stromal 2,stromal cell,CL:0000499,
Glia,glial cell,CL:0000125,
gamma-delta T cells,gamma-delta T cell,CL:0000798,
Intestinal macrophages,colon macrophage,CL:0009038,
Follicular B cells,follicular B cell,CL:0000843,
ILC,innate lymphoid cell,CL:0001065,
Type 17 helper T cells,T-helper 17 cell,CL:0000899,
Germinal center B cells,germinal center B cell,CL:0000844,
Trm cytotoxic T cells,"effector memory CD8-positive, alpha-beta T cell",CL:0000913,
Monocytes,monocyte,CL:0000576,
Type 1 helper T cells,T-helper 1 cell,CL:0000545,
Cycling B cells,B cell,CL:0000236,
Immature goblet cells,goblet cell,CL:0000160,
DCT,kidney distal convoluted tubule epithelial cell,CL:1000849,
PT-S1/2,epithelial cell of proximal tubule,CL:0002306,
dPT,epithelial cell of proximal tubule,CL:0002306,
aPT,epithelial cell of proximal tubule,CL:0002306,
T/NK,lymphocyte,CL:0000542,
CNT,kidney connecting tubule epithelial cell,CL:1000768,
aTAL,kidney loop of Henle thick ascending limb epithelial cell,CL:1001106,
C-TAL,kidney loop of Henle thick ascending limb epithelial cell,CL:1001106,
IC-A,kidney collecting duct intercalated cell,CL:1001432,
EC-PTC,peritubular capillary endothelial cell,CL:1001033,
FIB/aFIB,kidney interstitial fibroblast,CL:1000692,
PC,kidney collecting duct principal cell,CL:1001431,
PT-S3,epithelial cell of proximal tubule,CL:0002306,
EC-GC,glomerular capillary endothelial cell,CL:1001005,
IC-B,kidney collecting duct intercalated cell,CL:1001432,
Podocytes,podocyte,CL:0000653,
PEC,parietal epithelial cell,CL:1000452,
EC-DVR,vasa recta cell,CL:1001036,
EC-AEA,kidney arterial blood vessel cell,CL:1000891,
M-TAL,kidney loop of Henle thick ascending limb epithelial cell,CL:1001106,
IMCD,kidney inner medulla collecting duct epithelial cell,CL:1000547,
EC-LYM,endothelial cell of lymphatic vessel,CL:0002138,
M-FIB,renal medullary fibroblast,CL:4030022,
DTL,kidney loop of Henle thin descending limb epithelial cell,CL:1001111,
EC-AVR,vasa recta cell,CL:1001036,
VSMC/P,mural cell,CL:0008034,
DC,"dendritic cell, human",CL:0001056,
VSMC,vascular associated smooth muscle cell,CL:0000359,
dATL,kidney loop of Henle thin ascending limb epithelial cell,CL:1001107,
PT,epithelial cell of proximal tubule,CL:0002306,
dVSMC,vascular associated smooth muscle cell,CL:0000359,
dM-FIB,renal medullary fibroblast,CL:4030022,
dC-IC-A,kidney collecting duct intercalated cell,CL:1001432,
CCD-IC-A,kidney collecting duct intercalated cell,CL:1001432,
B cells,B cell,CL:0000236,
Immune mix,cell,CL:0000000,
Central venous LSECs,endothelial cell of pericentral hepatic sinusoid,CL:0019022,
Interzonal hepatocytes,midzonal region hepatocyte,CL:0019028,
Inflammatory macrophages,inflammatory macrophage,CL:0000863,
Pericentral hepatocytes,centrilobular region hepatocyte,CL:0019029,
Periportal hepatocytes,periportal region hepatocyte,CL:0019026,
Periportal LSECs,endothelial cell of periportal hepatic sinusoid,CL:0019021,
Non-inflammatory macrophages,macrophage,CL:0000235,
Portal endothelial cells,endothelial cell of periportal hepatic sinusoid,CL:0019021,
Cholangiocytes,cholangiocyte,CL:1000488,
Stellate cells,hepatic stellate cell,CL:0000632,
CD4 T cells,"CD4-positive, alpha-beta T cell",CL:0000624,
Migratory DCs,"dendritic cell, human",CL:0001056,
Capillary aerocytes,alveolar capillary type 2 endothelial cell,CL:4028003,
Pulmonary venous endothelial cells,vein endothelial cell,CL:0002543,
Alveolar macrophages,alveolar macrophage,CL:0000583,
Systemic venous endothelial cells,vein endothelial cell,CL:0002543,
Pulmonary SMC,smooth muscle cell of the pulmonary artery,CL:0002591,
Club cells,club cell,CL:0000158,
AT1,type I pneumocyte,CL:0002062,
AT2,type II pneumocyte,CL:0002063,
Suprabasal cells,basal cell,CL:0000646,
Basal cells,basal cell,CL:0000646,
Cycling macrophages,macrophage,CL:0000235,
Cycling T,T cell,CL:0000084,
Cycling NK,natural killer cell,CL:0000623,
Intermediate macrophages,macrophage,CL:0000235,
Alveolar fibroblasts,fibroblast of lung,CL:0002553,
Ciliated cells,lung ciliated cell,CL:1000271,
Adventitial fibroblasts,fibroblast,CL:0000057,
Lipofibroblasts,alveolar type 1 fibroblast cell,CL:4028004,
Ionocytes,ionocyte,CL:0005006,
Fibromyocytes,muscle cell,CL:0000187,
Activated CD4 T cells,"activated CD4-positive, alpha-beta T cell",CL:0000896,
BTAF1 high T cells,T cell,CL:0000084,
Erythrophagocytic macrophages,macrophage,CL:0000235,
BTAF1 high B cells,B cell,CL:0000236,
mLN Stroma (FMO2+),stromal cell,CL:0000499,
Alpha cells,pancreatic A cell,CL:0000171,
Delta cells,pancreatic D cell,CL:0000173,
Beta cells,type B pancreatic cell,CL:0000169,
Stressed beta cells,type B pancreatic cell,CL:0000169,
Ductal cells,pancreatic ductal cell,CL:0002079,
Gamma cells,pancreatic PP cell,CL:0002275,
Activated stellate cells,pancreatic stellate cell,CL:0002410,
Secretory acinar cells,pancreatic acinar cell,CL:0002064,
Acinar-mix,pancreatic acinar cell,CL:0002064,
Idling acinar cells,pancreatic acinar cell,CL:0002064,
Quiescent stellate cells,pancreatic stellate cell,CL:0002410,
MUC5B+ ductal cells,pancreatic ductal cell,CL:0002079,
PAX7 low MuSCs,skeletal muscle satellite stem cell,CL:0008011,
SMC/Pericyte,mural cell,CL:0008034,
C1QA+ CD74+ macrophages,macrophage,CL:0000235,
NK/T cells,lymphocyte,CL:0000542,
PAX7 high MuSCs,skeletal muscle satellite stem cell,CL:0008011,
Ribosome high skeletal muscle,cell of skeletal muscle,CL:0000188,
LRRK2+ fast skeletal muscle,fast muscle cell,CL:0000190,
S100A9+ LYZ+ macrophages,macrophage,CL:0000235,
Mesenchymal stem cells,mesenchymal stem cell,CL:0000134,
Slow skeletal muscle,slow muscle cell,CL:0000189,
Fast skeletal muscle,fast muscle cell,CL:0000190,
Lymphoid/Macrophage,cell,CL:0000000,
Tendon cells,tendon cell,CL:0000388,
Low-quality B cells,B cell,CL:0000236,
Erythroid,erythroid lineage cell,CL:0000764,
21 changes: 15 additions & 6 deletions src/generate-jobs/generate.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
import { writeFile } from 'node:fs/promises';
import { getSummaryRef } from '../util/common.js';
import { concurrentMap } from '../util/concurrent-map.js';
import {
ALGORITHMS,
DEFAULT_MAX_CONCURRENCY,
MAX_CONCURRENCY,
} from '../util/constants.js';
import { ALGORITHMS, DEFAULT_MAX_CONCURRENCY, MAX_CONCURRENCY } from '../util/constants.js';
import { createSpecs } from './spec.js';
import { getJobGeneratorRef } from './utils.js';
import { UnknownOrganError } from '../util/errors.js';
import { getCrosswalkingFilePath } from '../util/paths.js';
import { fileExists } from '../util/fs.js';

async function crosswalkExists(config) {
const result = {};
await concurrentMap(ALGORITHMS, async (algorithm) => {
const path = getCrosswalkingFilePath(config, algorithm);
result[algorithm] = await fileExists(path);
});

return result;
}

async function tryGenerateJobs(dataset, config) {
const ref = getSummaryRef(dataset);
Expand All @@ -21,7 +29,8 @@ async function tryGenerateJobs(dataset, config) {
}
});

const specs = createSpecs(metadata, config);
const crosswalks = await crosswalkExists(config);
const specs = createSpecs(metadata, config, crosswalks);
for (const algorithm in specs) {
const spec = specs[algorithm];
const specString = JSON.stringify(spec, undefined, 2);
Expand Down
25 changes: 19 additions & 6 deletions src/generate-jobs/spec.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { join } from 'node:path';

import { ALGORITHMS, DATA_FILE } from '../util/constants.js';
import { getModelsDir } from '../util/paths.js';
import { getCrosswalkingFilePath, getModelsDir } from '../util/paths.js';

const ALL_DISABLED_METADATA = ALGORITHMS.reduce((metadata, algorithm) => ({ ...metadata, [algorithm]: false }), {});

Expand Down Expand Up @@ -31,12 +31,23 @@ function getEnabledAlgorithms(metadata) {
return ALGORITHMS.filter((algorithm) => metadata[algorithm] !== false);
}

function createAlgorithmSpec(algorithm, metadata, defaults) {
function createAlgorithmSpec(config, algorithm, metadata, defaults, crosswalkExists) {
return {
[algorithm]: {
...defaults[algorithm],
...metadata[algorithm],
},
...(crosswalkExists && {
crosswalk: {
table: {
class: 'File',
path: getCrosswalkingFilePath(config, algorithm),
},
tableLabelColumn: `${algorithm.charAt(0).toUpperCase() + algorithm.slice(1)}_Label`,
tableClidColumn: 'CL_ID',
tableMatchColumn: 'CL_Match',
},
}),
summarize: {
annotationMethod: algorithm,
cellSource: metadata.cellSource,
Expand All @@ -45,10 +56,12 @@ function createAlgorithmSpec(algorithm, metadata, defaults) {
};
}

export function createSpec(metadata, config) {
export function createSpec(metadata, config, crosswalks) {
const defaults = getAlgorithmDefaults(config);
const algorithms = getEnabledAlgorithms(metadata);
const algorithmSpecs = algorithms.map((algorithm) => createAlgorithmSpec(algorithm, metadata, defaults));
const algorithmSpecs = algorithms.map((algorithm) =>
createAlgorithmSpec(config, algorithm, metadata, defaults, crosswalks[algorithm])
);

return {
organ: metadata.organ,
Expand All @@ -60,15 +73,15 @@ export function createSpec(metadata, config) {
};
}

export function createSpecs(metadata, config) {
export function createSpecs(metadata, config, crosswalks) {
const result = {};
for (const algorithm of getEnabledAlgorithms(metadata)) {
const newMetadata = {
...metadata,
...ALL_DISABLED_METADATA,
[algorithm]: metadata[algorithm],
};
result[algorithm] = createSpec(newMetadata, config);
result[algorithm] = createSpec(newMetadata, config, crosswalks);
}

return result;
Expand Down
1 change: 1 addition & 0 deletions src/util/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export const DATASET_HANDLERS = 'DATASET_HANDLERS';
export const DATASET = 'DATASET';
export const VERSION = 'VERSION';
export const DATASETS_DIR = 'DATASETS_DIR';
export const CROSSWALKING_TABLES_DIR = 'CROSSWALKING_TABLES_DIR';
export const OUTPUT_DIR = 'OUTPUT_DIR';
export const DATA_REPO_DIR = 'DATA_REPO_DIR';
export const CACHE_DIR = 'CACHE_DIR';
Expand Down
2 changes: 2 additions & 0 deletions src/util/default-env-reviver.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { resolve } from 'node:path';
import {
CACHE_DIR,
CROSSWALKING_TABLES_DIR,
DATASETS_DIR,
DATASET_HANDLERS,
DATA_REPO_DIR,
Expand Down Expand Up @@ -37,6 +38,7 @@ export function defaultEnvReviver(key, value) {
case DATA_REPO_DIR:
case CACHE_DIR:
case MODELS_DIR:
case CROSSWALKING_TABLES_DIR:
case SRC_DIR:
return resolve(value);

Expand Down
Loading

0 comments on commit 0ca01f8

Please sign in to comment.