Skip to content

Commit

Permalink
added pipeline for SenNet
Browse files Browse the repository at this point in the history
  • Loading branch information
vickydaiya committed Dec 6, 2023
1 parent 77c4e8c commit 7fa64ff
Show file tree
Hide file tree
Showing 16 changed files with 438 additions and 265 deletions.
68 changes: 12 additions & 56 deletions src/hubmap/downloader.js
Original file line number Diff line number Diff line change
@@ -1,71 +1,27 @@
import { execFile as callbackExecFile } from 'node:child_process';
import { promisify } from 'node:util';
import { getSrcFilePath } from '../util/paths.js';
import { Config } from '../util/config.js';
import { FORCE } from '../util/constants.js';
import { downloadFile } from '../util/fs.js';
import { IDownloader } from '../util/handler.js';
import { getMetadataLookup } from './metadata.js';
import { XConsortiaDownloader } from '../xconsortia/downloader.js';
import { getMetadata } from '../xconsortia/metadata.js';
import { ID_KEYWORD, METADATA_FIELDS, metadataToLookup } from './metadata.js';

const HUBMAP_TOKEN = 'HUBMAP_TOKEN';
const HUBMAP_SEARCH_URL = 'HUBMAP_SEARCH_URL';
const HUBMAP_ASSETS_URL = 'HUBMAP_ASSETS_URL';
const DEFAULT_HUBMAP_SEARCH_URL =
'https://search.api.hubmapconsortium.org/v3/portal/search';
const DEFAULT_HUBMAP_SEARCH_URL = 'https://search.api.hubmapconsortium.org/v3/portal/search';
const DEFAULT_HUBMAP_ASSETS_URL = 'https://assets.hubmapconsortium.org/';

const execFile = promisify(callbackExecFile);

/** @implements {IDownloader} */
export class Downloader {
export class Downloader extends XConsortiaDownloader {
constructor(config) {
/** @type {Config} */
this.config = config;
/** @type {string} */
this.token = config.get(HUBMAP_TOKEN);
/** @type {string} */
this.searchUrl = config.get(HUBMAP_SEARCH_URL, DEFAULT_HUBMAP_SEARCH_URL);
/** @type {string} */
this.assetsUrl = config.get(HUBMAP_ASSETS_URL, DEFAULT_HUBMAP_ASSETS_URL);
this.exprAdjustScript = 'expr_h5ad_adjust.py';
/** @type {string} */
this.exprAdjustScriptFilePath = getSrcFilePath(
super(
config,
'hubmap',
this.exprAdjustScript
config.get(HUBMAP_TOKEN),
config.get(HUBMAP_SEARCH_URL, DEFAULT_HUBMAP_SEARCH_URL),
config.get(HUBMAP_ASSETS_URL, DEFAULT_HUBMAP_ASSETS_URL)
);
}

async prepareDownload(datasets) {
const ids = datasets.map((dataset) => dataset.id);
const lookup = await getMetadataLookup(ids, this.searchUrl, this.token);
for (const dataset of datasets) {
const metadata = lookup.get(dataset.id);
Object.assign(dataset, metadata);
}
}

async download(dataset) {
if (!dataset.uuid) {
throw new Error('Missing uuid - Dataset might have been deleted');
}

const url = new URL(`${dataset.uuid}/expr.h5ad`, this.assetsUrl);
url.searchParams.set('token', this.token);
await downloadFile(dataset.dataFilePath, url, {
headers: {
Authorization: `Bearer ${this.token}`,
},
overwrite: this.config.get(FORCE, false),
});

const { stdout } = await execFile('python3', [
this.exprAdjustScriptFilePath,
dataset.dataFilePath,
'--assay',
dataset.assay_type,
'--output',
dataset.dataFilePath,
]);
async getMetadataLookup(ids) {
const metadata = await getMetadata(ids, this.searchUrl, this.token, ID_KEYWORD, METADATA_FIELDS);
return metadataToLookup(metadata);
}
}
2 changes: 1 addition & 1 deletion src/hubmap/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
export * from './listing.js';
export * from './downloader.js';
export * from './job-generator.js';
export * from '../xconsortia/job-generator.js';

export function supports(dataset) {
return /^hbm/i.test(dataset.id);
Expand Down
32 changes: 5 additions & 27 deletions src/hubmap/listing.js
Original file line number Diff line number Diff line change
@@ -1,36 +1,14 @@
import { Config } from '../util/config.js';
import { checkFetchResponse } from '../util/fs.js';
import { IListing } from '../util/handler.js';
import { getHeaders } from './metadata.js';
import { XConsortiaListing } from '../xconsortia/listing.js';

const HUBMAP_TOKEN = 'HUBMAP_TOKEN';
const HUBMAP_SEARCH_URL = 'HUBMAP_SEARCH_URL';
const DEFAULT_HUBMAP_SEARCH_URL =
'https://search.api.hubmapconsortium.org/v3/portal/search';
const DEFAULT_HUBMAP_SEARCH_URL = 'https://search.api.hubmapconsortium.org/v3/portal/search';

/** @implements {IListing} */
export class Listing {
export class Listing extends XConsortiaListing {
constructor(config) {
/** @type {Config} */
this.config = config;
/** @type {string} */
this.token = config.get(HUBMAP_TOKEN);
/** @type {string} */
this.searchUrl = config.get(HUBMAP_SEARCH_URL, DEFAULT_HUBMAP_SEARCH_URL);
}

async getDatasets() {
const resp = await fetch(this.searchUrl, {
method: 'POST',
headers: getHeaders(this.token),
body: JSON.stringify(this.getBody()),
});
checkFetchResponse(resp, 'HuBMAP: Failed to fetch list of collections');

const {
hits: { hits },
} = await resp.json();
return hits.map(({ _source: { hubmap_id } }) => hubmap_id);
super(config, config.get(HUBMAP_TOKEN), config.get(HUBMAP_SEARCH_URL, DEFAULT_HUBMAP_SEARCH_URL), 'hubmap_id');
}

getBody() {
Expand All @@ -44,7 +22,7 @@ export class Listing {
},
},
_source: {
includes: ['hubmap_id'],
includes: [this.idKeyword],
},
};
}
Expand Down
199 changes: 33 additions & 166 deletions src/hubmap/metadata.js
Original file line number Diff line number Diff line change
@@ -1,133 +1,41 @@
import { getMetadata, getSampleBlockId, getSampleSectionId, ORGAN_MAPPING } from '../xconsortia/metadata.js';

/**
* @typedef {object} HubmapMetadata
* @property {string} uuid
* @property {string} organ
* @property {string} organ_source
* @property {string} assay_type
* @property {string} dataset_iri
* @property {string} dataset_id
* @property {string} donor_sex
* @property {string} donor_race
* @property {string} donor_age
* @property {string} donor_bmi
* @property {string} block_sample_iri
* @property {string} section_sample_iri
* @property {string} donor_iri
* @property {string} block_sample_id
* @property {string} section_sample_id
* @property {string} donor_id
*/

const HUBMAP_ENTITY_ENDPOINT =
'https://entity.api.hubmapconsortium.org/entities/';
const HUBMAP_PORTAL_ENDPOINT =
'https://portal.hubmapconsortium.org/browse/dataset/';

const ORGAN_MAPPING = {
AO: 'UBERON:0000948',
BL: 'UBERON:0001255',
BD: 'UBERON:0001270',
BM: 'UBERON:0001270',
BR: 'UBERON:0000955',
LB: 'UBERON:0001004',
RB: 'UBERON:0001004',
LE: 'UBERON:0004548',
RE: 'UBERON:0004549',
LF: 'UBERON:0001303',
RF: 'UBERON:0001302',
HT: 'UBERON:0000948',
LK: 'UBERON:0004538',
RK: 'UBERON:0004539',
LI: 'UBERON:0000059',
LV: 'UBERON:0002107',
LL: 'UBERON:0001004',
LN: 'FMA:24978',
RL: 'UBERON:0001004',
RN: 'FMA:24977',
LY: 'UBERON:0002509',
LO: 'FMA:7214',
RO: 'FMA:7213',
PA: 'UBERON:0001264',
PL: 'UBERON:0001987',
SI: 'UBERON:0002108',
SK: 'UBERON:0002097',
SP: 'UBERON:0002106',
TH: 'UBERON:0002370',
TR: 'UBERON:0001004',
UR: 'UBERON:0001223',
UT: 'UBERON:0000995',
};

export function getHeaders(token) {
return {
'Content-type': 'application/json',
Authorization: `Bearer ${token}`,
};
}

function getBody(ids) {
return {
version: true,
from: 0,
size: 10000,
query: {
terms: {
'hubmap_id.keyword': ids,
},
},
_source: {
includes: [
'uuid',
'hubmap_id',
'origin_samples.organ',
'data_types',
'mapped_consortium',
'group_name',
'group_uuid',
'donor.mapped_metadata.race',
'donor.mapped_metadata.sex',
'donor.mapped_metadata.age_value',
'donor.mapped_metadata.body_mass_index_value',
'ancestors',
'donor.uuid',
'source_samples',
],
},
};
}

function checkResponse(response) {
if (!response.ok) {
const { status, statusText } = response;
const message = `Failed to fetch metadata: ${status}:${statusText}`;
throw new Error(message);
}
}

function getSampleBlockId(ancestors) {
for (const ancestor of ancestors) {
if (
ancestor['entity_type'].toLowerCase() == 'sample' &&
ancestor['sample_category'].toLowerCase() == 'block'
) {
return {
block_id: `${HUBMAP_ENTITY_ENDPOINT}${ancestor['uuid']}`,
rui_location: ancestor['rui_location'] ?? '',
};
}
}
return '';
}

function getSampleSectionId(ancestors, type) {
for (const ancestor of ancestors) {
if (
ancestor['entity_type'].toLowerCase() == 'sample' &&
ancestor['sample_category'].toLowerCase() == 'section'
) {
return `${HUBMAP_ENTITY_ENDPOINT}${ancestor['uuid']}`;
}
}
return '';
}
const HUBMAP_ENTITY_ENDPOINT = 'https://entity.api.hubmapconsortium.org/entities/';
const HUBMAP_PORTAL_ENDPOINT = 'https://portal.hubmapconsortium.org/browse/dataset/';
export const ID_KEYWORD = 'hubmap_id';
export const METADATA_FIELDS = [
'uuid',
'hubmap_id',
'origin_samples.organ',
'data_types',
'mapped_consortium',
'group_name',
'group_uuid',
'donor.mapped_metadata.race',
'donor.mapped_metadata.sex',
'donor.mapped_metadata.age_value',
'donor.mapped_metadata.body_mass_index_value',
'ancestors',
'donor.uuid',
];

function toLookup(result) {
export function metadataToLookup(result) {
/** @type {Map<string, HubmapMetadata>} */
const lookup = new Map();
for (const hit of result.hits.hits) {
Expand All @@ -142,23 +50,18 @@ function toLookup(result) {
group_uuid,
donor: {
mapped_metadata: {
age_value: [donor_age] = [null],
race: [donor_race] = [null],
sex: [donor_sex] = [null],
body_mass_index_value: [donor_bmi] = [null],
} = {
age_value: [null],
race: [null],
sex: [null],
body_mass_index_value: [null],
},
age_value: [donor_age] = [],
race: [donor_race] = [],
sex: [donor_sex] = [],
body_mass_index_value: [donor_bmi] = [],
} = {},
uuid: donor_uuid,
},
ancestors,
},
} = hit;
const mapped_organ = ORGAN_MAPPING[organ.toUpperCase()];
const { block_id, rui_location } = getSampleBlockId(ancestors);
const { block_id, rui_location } = getSampleBlockId(ancestors, HUBMAP_ENTITY_ENDPOINT);
lookup.set(hubmap_id, {
organ: mapped_organ,
organ_source: organ,
Expand All @@ -175,48 +78,12 @@ function toLookup(result) {
donor_sex: donor_sex ?? '',
donor_bmi: donor_bmi ?? '',
donor_race: donor_race ?? '',
organ_id: `http://purl.obolibrary.org/obo/UBERON_${
mapped_organ.split(':')[1]
}`,
organ_id: `http://purl.obolibrary.org/obo/UBERON_${mapped_organ.split(':')[1]}`,
block_id,
section_id: getSampleSectionId(ancestors),
section_id: getSampleSectionId(ancestors, HUBMAP_ENTITY_ENDPOINT),
rui_location,
});
}

return lookup;
}

/**
* Handles 303 responses from the search api.
* A 303 response is returned when the resulting query is to large for the
* search api. Instead it returns a temporary url from which to download the result.
*
* @param {Response} resp
* @returns {Promise<Response>}
*/
async function handle303Response(resp) {
const text = await resp.text();
if (text.startsWith('https')) {
return await fetch(text);
}

return resp;
}

export async function getMetadataLookup(ids, url, token) {
let resp = await fetch(url, {
method: 'POST',
headers: getHeaders(token),
body: JSON.stringify(getBody(ids)),
});

if (resp.status === 303) {
resp = await handle303Response(resp);
}

checkResponse(resp);

const result = await resp.json();
return toLookup(result);
}
Loading

0 comments on commit 7fa64ff

Please sign in to comment.