diff --git a/src/hubmap/downloader.js b/src/hubmap/downloader.js index c52f62b..64a4d13 100644 --- a/src/hubmap/downloader.js +++ b/src/hubmap/downloader.js @@ -1,71 +1,27 @@ -import { execFile as callbackExecFile } from 'node:child_process'; -import { promisify } from 'node:util'; -import { getSrcFilePath } from '../util/paths.js'; -import { Config } from '../util/config.js'; -import { FORCE } from '../util/constants.js'; -import { downloadFile } from '../util/fs.js'; import { IDownloader } from '../util/handler.js'; -import { getMetadataLookup } from './metadata.js'; +import { XConsortiaDownloader } from '../xconsortia/downloader.js'; +import { getMetadata } from '../xconsortia/metadata.js'; +import { ID_KEYWORD, METADATA_FIELDS, metadataToLookup } from './metadata.js'; const HUBMAP_TOKEN = 'HUBMAP_TOKEN'; const HUBMAP_SEARCH_URL = 'HUBMAP_SEARCH_URL'; const HUBMAP_ASSETS_URL = 'HUBMAP_ASSETS_URL'; -const DEFAULT_HUBMAP_SEARCH_URL = - 'https://search.api.hubmapconsortium.org/v3/portal/search'; +const DEFAULT_HUBMAP_SEARCH_URL = 'https://search.api.hubmapconsortium.org/v3/portal/search'; const DEFAULT_HUBMAP_ASSETS_URL = 'https://assets.hubmapconsortium.org/'; -const execFile = promisify(callbackExecFile); - /** @implements {IDownloader} */ -export class Downloader { +export class Downloader extends XConsortiaDownloader { constructor(config) { - /** @type {Config} */ - this.config = config; - /** @type {string} */ - this.token = config.get(HUBMAP_TOKEN); - /** @type {string} */ - this.searchUrl = config.get(HUBMAP_SEARCH_URL, DEFAULT_HUBMAP_SEARCH_URL); - /** @type {string} */ - this.assetsUrl = config.get(HUBMAP_ASSETS_URL, DEFAULT_HUBMAP_ASSETS_URL); - this.exprAdjustScript = 'expr_h5ad_adjust.py'; - /** @type {string} */ - this.exprAdjustScriptFilePath = getSrcFilePath( + super( config, - 'hubmap', - this.exprAdjustScript + config.get(HUBMAP_TOKEN), + config.get(HUBMAP_SEARCH_URL, DEFAULT_HUBMAP_SEARCH_URL), + config.get(HUBMAP_ASSETS_URL, DEFAULT_HUBMAP_ASSETS_URL) ); } - async prepareDownload(datasets) { - const ids = datasets.map((dataset) => dataset.id); - const lookup = await getMetadataLookup(ids, this.searchUrl, this.token); - for (const dataset of datasets) { - const metadata = lookup.get(dataset.id); - Object.assign(dataset, metadata); - } - } - - async download(dataset) { - if (!dataset.uuid) { - throw new Error('Missing uuid - Dataset might have been deleted'); - } - - const url = new URL(`${dataset.uuid}/expr.h5ad`, this.assetsUrl); - url.searchParams.set('token', this.token); - await downloadFile(dataset.dataFilePath, url, { - headers: { - Authorization: `Bearer ${this.token}`, - }, - overwrite: this.config.get(FORCE, false), - }); - - const { stdout } = await execFile('python3', [ - this.exprAdjustScriptFilePath, - dataset.dataFilePath, - '--assay', - dataset.assay_type, - '--output', - dataset.dataFilePath, - ]); + async getMetadataLookup(ids) { + const metadata = await getMetadata(ids, this.searchUrl, this.token, ID_KEYWORD, METADATA_FIELDS); + return metadataToLookup(metadata); } } diff --git a/src/hubmap/index.js b/src/hubmap/index.js index ef1bf36..97b9c7f 100644 --- a/src/hubmap/index.js +++ b/src/hubmap/index.js @@ -1,6 +1,6 @@ export * from './listing.js'; export * from './downloader.js'; -export * from './job-generator.js'; +export * from '../xconsortia/job-generator.js'; export function supports(dataset) { return /^hbm/i.test(dataset.id); diff --git a/src/hubmap/listing.js b/src/hubmap/listing.js index 0eba41c..8788b5f 100644 --- a/src/hubmap/listing.js +++ b/src/hubmap/listing.js @@ -1,36 +1,14 @@ -import { Config } from '../util/config.js'; -import { checkFetchResponse } from '../util/fs.js'; import { IListing } from '../util/handler.js'; -import { getHeaders } from './metadata.js'; +import { XConsortiaListing } from '../xconsortia/listing.js'; const HUBMAP_TOKEN = 'HUBMAP_TOKEN'; const HUBMAP_SEARCH_URL = 'HUBMAP_SEARCH_URL'; -const DEFAULT_HUBMAP_SEARCH_URL = - 'https://search.api.hubmapconsortium.org/v3/portal/search'; +const DEFAULT_HUBMAP_SEARCH_URL = 'https://search.api.hubmapconsortium.org/v3/portal/search'; /** @implements {IListing} */ -export class Listing { +export class Listing extends XConsortiaListing { constructor(config) { - /** @type {Config} */ - this.config = config; - /** @type {string} */ - this.token = config.get(HUBMAP_TOKEN); - /** @type {string} */ - this.searchUrl = config.get(HUBMAP_SEARCH_URL, DEFAULT_HUBMAP_SEARCH_URL); - } - - async getDatasets() { - const resp = await fetch(this.searchUrl, { - method: 'POST', - headers: getHeaders(this.token), - body: JSON.stringify(this.getBody()), - }); - checkFetchResponse(resp, 'HuBMAP: Failed to fetch list of collections'); - - const { - hits: { hits }, - } = await resp.json(); - return hits.map(({ _source: { hubmap_id } }) => hubmap_id); + super(config, config.get(HUBMAP_TOKEN), config.get(HUBMAP_SEARCH_URL, DEFAULT_HUBMAP_SEARCH_URL), 'hubmap_id'); } getBody() { @@ -44,7 +22,7 @@ export class Listing { }, }, _source: { - includes: ['hubmap_id'], + includes: [this.idKeyword], }, }; } diff --git a/src/hubmap/metadata.js b/src/hubmap/metadata.js index 47e3a5c..af995ec 100644 --- a/src/hubmap/metadata.js +++ b/src/hubmap/metadata.js @@ -1,133 +1,41 @@ +import { getMetadata, getSampleBlockId, getSampleSectionId, ORGAN_MAPPING } from '../xconsortia/metadata.js'; + /** * @typedef {object} HubmapMetadata * @property {string} uuid * @property {string} organ * @property {string} organ_source * @property {string} assay_type - * @property {string} dataset_iri + * @property {string} dataset_id * @property {string} donor_sex * @property {string} donor_race * @property {string} donor_age * @property {string} donor_bmi - * @property {string} block_sample_iri - * @property {string} section_sample_iri - * @property {string} donor_iri + * @property {string} block_sample_id + * @property {string} section_sample_id + * @property {string} donor_id */ -const HUBMAP_ENTITY_ENDPOINT = - 'https://entity.api.hubmapconsortium.org/entities/'; -const HUBMAP_PORTAL_ENDPOINT = - 'https://portal.hubmapconsortium.org/browse/dataset/'; - -const ORGAN_MAPPING = { - AO: 'UBERON:0000948', - BL: 'UBERON:0001255', - BD: 'UBERON:0001270', - BM: 'UBERON:0001270', - BR: 'UBERON:0000955', - LB: 'UBERON:0001004', - RB: 'UBERON:0001004', - LE: 'UBERON:0004548', - RE: 'UBERON:0004549', - LF: 'UBERON:0001303', - RF: 'UBERON:0001302', - HT: 'UBERON:0000948', - LK: 'UBERON:0004538', - RK: 'UBERON:0004539', - LI: 'UBERON:0000059', - LV: 'UBERON:0002107', - LL: 'UBERON:0001004', - LN: 'FMA:24978', - RL: 'UBERON:0001004', - RN: 'FMA:24977', - LY: 'UBERON:0002509', - LO: 'FMA:7214', - RO: 'FMA:7213', - PA: 'UBERON:0001264', - PL: 'UBERON:0001987', - SI: 'UBERON:0002108', - SK: 'UBERON:0002097', - SP: 'UBERON:0002106', - TH: 'UBERON:0002370', - TR: 'UBERON:0001004', - UR: 'UBERON:0001223', - UT: 'UBERON:0000995', -}; - -export function getHeaders(token) { - return { - 'Content-type': 'application/json', - Authorization: `Bearer ${token}`, - }; -} - -function getBody(ids) { - return { - version: true, - from: 0, - size: 10000, - query: { - terms: { - 'hubmap_id.keyword': ids, - }, - }, - _source: { - includes: [ - 'uuid', - 'hubmap_id', - 'origin_samples.organ', - 'data_types', - 'mapped_consortium', - 'group_name', - 'group_uuid', - 'donor.mapped_metadata.race', - 'donor.mapped_metadata.sex', - 'donor.mapped_metadata.age_value', - 'donor.mapped_metadata.body_mass_index_value', - 'ancestors', - 'donor.uuid', - 'source_samples', - ], - }, - }; -} - -function checkResponse(response) { - if (!response.ok) { - const { status, statusText } = response; - const message = `Failed to fetch metadata: ${status}:${statusText}`; - throw new Error(message); - } -} - -function getSampleBlockId(ancestors) { - for (const ancestor of ancestors) { - if ( - ancestor['entity_type'].toLowerCase() == 'sample' && - ancestor['sample_category'].toLowerCase() == 'block' - ) { - return { - block_id: `${HUBMAP_ENTITY_ENDPOINT}${ancestor['uuid']}`, - rui_location: ancestor['rui_location'] ?? '', - }; - } - } - return ''; -} - -function getSampleSectionId(ancestors, type) { - for (const ancestor of ancestors) { - if ( - ancestor['entity_type'].toLowerCase() == 'sample' && - ancestor['sample_category'].toLowerCase() == 'section' - ) { - return `${HUBMAP_ENTITY_ENDPOINT}${ancestor['uuid']}`; - } - } - return ''; -} +const HUBMAP_ENTITY_ENDPOINT = 'https://entity.api.hubmapconsortium.org/entities/'; +const HUBMAP_PORTAL_ENDPOINT = 'https://portal.hubmapconsortium.org/browse/dataset/'; +export const ID_KEYWORD = 'hubmap_id'; +export const METADATA_FIELDS = [ + 'uuid', + 'hubmap_id', + 'origin_samples.organ', + 'data_types', + 'mapped_consortium', + 'group_name', + 'group_uuid', + 'donor.mapped_metadata.race', + 'donor.mapped_metadata.sex', + 'donor.mapped_metadata.age_value', + 'donor.mapped_metadata.body_mass_index_value', + 'ancestors', + 'donor.uuid', +]; -function toLookup(result) { +export function metadataToLookup(result) { /** @type {Map} */ const lookup = new Map(); for (const hit of result.hits.hits) { @@ -142,23 +50,18 @@ function toLookup(result) { group_uuid, donor: { mapped_metadata: { - age_value: [donor_age] = [null], - race: [donor_race] = [null], - sex: [donor_sex] = [null], - body_mass_index_value: [donor_bmi] = [null], - } = { - age_value: [null], - race: [null], - sex: [null], - body_mass_index_value: [null], - }, + age_value: [donor_age] = [], + race: [donor_race] = [], + sex: [donor_sex] = [], + body_mass_index_value: [donor_bmi] = [], + } = {}, uuid: donor_uuid, }, ancestors, }, } = hit; const mapped_organ = ORGAN_MAPPING[organ.toUpperCase()]; - const { block_id, rui_location } = getSampleBlockId(ancestors); + const { block_id, rui_location } = getSampleBlockId(ancestors, HUBMAP_ENTITY_ENDPOINT); lookup.set(hubmap_id, { organ: mapped_organ, organ_source: organ, @@ -175,48 +78,12 @@ function toLookup(result) { donor_sex: donor_sex ?? '', donor_bmi: donor_bmi ?? '', donor_race: donor_race ?? '', - organ_id: `http://purl.obolibrary.org/obo/UBERON_${ - mapped_organ.split(':')[1] - }`, + organ_id: `http://purl.obolibrary.org/obo/UBERON_${mapped_organ.split(':')[1]}`, block_id, - section_id: getSampleSectionId(ancestors), + section_id: getSampleSectionId(ancestors, HUBMAP_ENTITY_ENDPOINT), rui_location, }); } return lookup; } - -/** - * Handles 303 responses from the search api. - * A 303 response is returned when the resulting query is to large for the - * search api. Instead it returns a temporary url from which to download the result. - * - * @param {Response} resp - * @returns {Promise} - */ -async function handle303Response(resp) { - const text = await resp.text(); - if (text.startsWith('https')) { - return await fetch(text); - } - - return resp; -} - -export async function getMetadataLookup(ids, url, token) { - let resp = await fetch(url, { - method: 'POST', - headers: getHeaders(token), - body: JSON.stringify(getBody(ids)), - }); - - if (resp.status === 303) { - resp = await handle303Response(resp); - } - - checkResponse(resp); - - const result = await resp.json(); - return toLookup(result); -} diff --git a/src/sennet/downloader.js b/src/sennet/downloader.js new file mode 100644 index 0000000..bb3d3ed --- /dev/null +++ b/src/sennet/downloader.js @@ -0,0 +1,27 @@ +import { IDownloader } from '../util/handler.js'; +import { XConsortiaDownloader } from '../xconsortia/downloader.js'; +import { getMetadata } from '../xconsortia/metadata.js'; +import { ID_KEYWORD, METADATA_FIELDS, toLookup } from './metadata.js'; + +const SENNET_TOKEN = 'SENNET_TOKEN'; +const SENNET_SEARCH_URL = 'SENNET_SEARCH_URL'; +const SENNET_ASSETS_URL = 'SENNET_ASSETS_URL'; +const DEFAULT_SENNET_SEARCH_URL = 'https://search.api.sennetconsortium.org/entities/search'; +const DEFAULT_SENNET_ASSETS_URL = 'https://assets.api.sennetconsortium.org/'; + +/** @implements {IDownloader} */ +export class Downloader extends XConsortiaDownloader { + constructor(config) { + super( + config, + config.get(SENNET_TOKEN), + config.get(SENNET_SEARCH_URL, DEFAULT_SENNET_SEARCH_URL), + config.get(SENNET_ASSETS_URL, DEFAULT_SENNET_ASSETS_URL) + ); + } + + async getMetadataLookup(ids) { + const metadata = await getMetadata(ids, this.searchUrl, this.token, ID_KEYWORD, METADATA_FIELDS); + return toLookup(metadata); + } +} diff --git a/src/sennet/index.js b/src/sennet/index.js new file mode 100644 index 0000000..c82b9ff --- /dev/null +++ b/src/sennet/index.js @@ -0,0 +1,7 @@ +export * from './listing.js'; +export * from './downloader.js'; +export * from '../xconsortia/job-generator.js'; + +export function supports(dataset) { + return /^snt/i.test(dataset.id); +} diff --git a/src/sennet/listing.js b/src/sennet/listing.js new file mode 100644 index 0000000..aaecb69 --- /dev/null +++ b/src/sennet/listing.js @@ -0,0 +1,45 @@ +import { IListing } from '../util/handler.js'; +import { XConsortiaListing } from '../xconsortia/listing.js'; + +const SENNET_TOKEN = 'SENNET_TOKEN'; +const SENNET_SEARCH_URL = 'SENNET_SEARCH_URL'; +const DEFAULT_SENNET_SEARCH_URL = 'https://search.api.sennetconsortium.org/entities/search'; + +/** @implements {IListing} */ +export class Listing extends XConsortiaListing { + constructor(config) { + super(config, config.get(SENNET_TOKEN), config.get(SENNET_SEARCH_URL, DEFAULT_SENNET_SEARCH_URL), 'sennet_id'); + } + + getBody() { + return { + version: true, + from: 0, + size: 10000, + query: { + bool: { + must: [ + { + term: { + 'entity_type.keyword': 'Dataset', + }, + }, + { + term: { + 'files.rel_path.keyword': 'expr.h5ad', + }, + }, + { + term: { + 'source.source_type.keyword': 'Human', + }, + }, + ], + }, + }, + _source: { + includes: [this.idKeyword], + }, + }; + } +} diff --git a/src/sennet/metadata.js b/src/sennet/metadata.js new file mode 100644 index 0000000..0316330 --- /dev/null +++ b/src/sennet/metadata.js @@ -0,0 +1,88 @@ +import { getMetadata, getSampleBlockId, getSampleSectionId, ORGAN_MAPPING } from '../xconsortia/metadata.js'; + +/** + * @typedef {object} SennetMetadata + * @property {string} uuid + * @property {string} organ + * @property {string} organ_source + * @property {string} assay_type + * @property {string} dataset_id + * @property {string} group_name + * @property {string} group_uuid + * @property {string} donor_sex + * @property {string} donor_race + * @property {string} donor_age + * @property {string} donor_bmi + * @property {string} block_sample_id + * @property {string} section_sample_id + * @property {string} donor_id + */ + +const SENNET_ENTITY_ENDPOINT = 'https://entity.api.sennetconsortium.org/entities/'; +const SENNET_PORTAL_ENDPOINT = 'https://data.sennetconsortium.org/dataset'; +export const ID_KEYWORD = 'sennet_id'; +export const METADATA_FIELDS = [ + 'uuid', + 'sennet_id', + 'origin_sample.organ', + 'data_types', + 'group_name', + 'group_uuid', + 'source.source_mapped_metadata.race', + 'source.source_mapped_metadata.sex', + 'source.source_mapped_metadata.age_value', + 'source.source_mapped_metadata.body_mass_index_value', + 'ancestors', + 'source.uuid', +]; + +export function toLookup(result) { + /** @type {Map} */ + const lookup = new Map(); + for (const hit of result.hits.hits) { + const { + _source: { + sennet_id, + uuid, + origin_sample: { organ }, + data_types: [assay_type], + group_name, + group_uuid, + source: { + source_mapped_metadata: { + age_value: [donor_age] = [], + race: [donor_race] = [], + sex: [donor_sex] = [], + body_mass_index_value: [donor_bmi] = [], + } = {}, + uuid: donor_uuid, + }, + ancestors, + }, + } = hit; + const mapped_organ = ORGAN_MAPPING[organ.toUpperCase()]; + const { block_id, rui_location } = getSampleBlockId(ancestors, SENNET_ENTITY_ENDPOINT); + lookup.set(sennet_id, { + organ: mapped_organ, + organ_source: organ, + uuid, + assay_type, + dataset_id: `${SENNET_ENTITY_ENDPOINT}${uuid}`, + dataset_link: `${SENNET_PORTAL_ENDPOINT}?uuid=${uuid}`, + dataset_technology: 'OTHER', + consortium_name: 'SenNet', + provider_name: group_name, + provider_uuid: group_uuid, + donor_id: `${SENNET_ENTITY_ENDPOINT}${donor_uuid}`, + donor_age: donor_age ?? '', + donor_sex: donor_sex ?? '', + donor_bmi: donor_bmi ?? '', + donor_race: donor_race ?? '', + organ_id: `http://purl.obolibrary.org/obo/UBERON_${mapped_organ.split(':')[1]}`, + block_id, + section_id: getSampleSectionId(ancestors, SENNET_ENTITY_ENDPOINT), + rui_location, + }); + } + return lookup; +} diff --git a/src/util/constants.js b/src/util/constants.js index 0968200..13a7b24 100644 --- a/src/util/constants.js +++ b/src/util/constants.js @@ -15,14 +15,7 @@ export const DATASET_LIST = 'DATASET_LIST'; export const DATASET_LIST_URL = 'DATASET_LIST_URL'; export const DATASET_COLUMN_ID = 'DATASET_COLUMN_ID'; -export const REQUIRED_ENV_VARIABLES = [ - DATASET, - DATASETS_DIR, - OUTPUT_DIR, - DATA_REPO_DIR, - MODELS_DIR, - SRC_DIR, -]; +export const REQUIRED_ENV_VARIABLES = [DATASET, DATASETS_DIR, OUTPUT_DIR, DATA_REPO_DIR, MODELS_DIR, SRC_DIR]; // File names export const LISTING_FILE = 'listing.csv'; @@ -40,5 +33,5 @@ export const ALGORITHMS = ['azimuth', 'celltypist', 'popv']; export const DEFAULT_MAX_CONCURRENCY = 2; export const DEFAULT_PYTHON_LOG_LEVEL = 40; // Error level export const DEFAULT_CACHE_DIR = './tmp'; -export const DEFAULT_DATASET_HANDLERS = ['hubmap', 'gtex', 'cellxgene']; +export const DEFAULT_DATASET_HANDLERS = ['hubmap', 'sennet', 'gtex', 'cellxgene']; export const DEFAULT_DATASET_LIST = 'listing.csv'; diff --git a/src/util/handler.js b/src/util/handler.js index e4074ac..8173c29 100644 --- a/src/util/handler.js +++ b/src/util/handler.js @@ -111,9 +111,7 @@ export async function loadDatasetHandlers(config) { const fallbackHandlerName = 'default-dataset-handler'; const handlerNames = config.get(DATASET_HANDLERS, DEFAULT_DATASET_HANDLERS); const handlerNamesWithFallback = [...handlerNames, fallbackHandlerName]; - const handlerModules = await concurrentMap(handlerNamesWithFallback, (name) => - loadDatasetHandler(name, config) - ); + const handlerModules = await concurrentMap(handlerNamesWithFallback, (name) => loadDatasetHandler(name, config)); const handlers = handlerModules.filter(verifyDatasetHandler); return new Map(handlers); @@ -131,8 +129,8 @@ async function loadDatasetHandler(name, config) { try { return [name, await import(path)]; - } catch { - const msg = `Failed to load dataset handler '${name}' using path '${path}'`; + } catch (error) { + const msg = `Failed to load dataset handler '${name}' using path '${path}'. Cause: ${error.message}`; console.warn(msg); return undefined; } diff --git a/src/util/logging.js b/src/util/logging.js index 247d45c..fde2062 100644 --- a/src/util/logging.js +++ b/src/util/logging.js @@ -10,7 +10,7 @@ export async function logEvent(event, ...args) { console.debug(`${event}:End -- Args:`, ...args); return result; } catch (error) { - console.error(`${event}:Failure -- Args:`, ...args); + console.error(`${event}:Failure -- Args:`, ...args, `-- Cause: ${error.message}`); throw error; } finally { console.timeEnd(timerId); diff --git a/src/xconsortia/downloader.js b/src/xconsortia/downloader.js new file mode 100644 index 0000000..a3a1903 --- /dev/null +++ b/src/xconsortia/downloader.js @@ -0,0 +1,64 @@ +import { execFile as callbackExecFile } from 'node:child_process'; +import { promisify } from 'node:util'; +import { Config } from '../util/config.js'; +import { FORCE } from '../util/constants.js'; +import { downloadFile } from '../util/fs.js'; +import { getSrcFilePath } from '../util/paths.js'; + +const execFile = promisify(callbackExecFile); + +/** @implements {IDownloader} */ +export class XConsortiaDownloader { + constructor(config, token, searchUrl, assetsUrl) { + /** @type {Config} */ + this.config = config; + /** @type {string} */ + this.token = token; + /** @type {string} */ + this.searchUrl = searchUrl; + /** @type {string} */ + this.assetsUrl = assetsUrl; + /** @type {string} */ + this.exprAdjustScript = 'expr_h5ad_adjust.py'; + /** @type {string} */ + this.exprAdjustScriptFilePath = getSrcFilePath(config, 'xconsortia', this.exprAdjustScript); + } + + async prepareDownload(datasets) { + const ids = datasets.map((dataset) => dataset.id); + const lookup = await this.getMetadataLookup(ids); + for (const dataset of datasets) { + const metadata = lookup.get(dataset.id); + Object.assign(dataset, metadata); + } + return datasets; + } + + async download(dataset) { + if (!dataset.uuid) { + throw new Error('Missing uuid - Dataset might have been deleted'); + } + + const url = new URL(`${dataset.uuid}/expr.h5ad`, this.assetsUrl); + url.searchParams.set('token', this.token); + await downloadFile(dataset.dataFilePath, url, { + headers: { + Authorization: `Bearer ${this.token}`, + }, + overwrite: this.config.get(FORCE, false), + }); + + await execFile('python3', [ + this.exprAdjustScriptFilePath, + dataset.dataFilePath, + '--assay', + dataset.assay_type, + '--output', + dataset.dataFilePath, + ]); + } + + async getMetadataLookup(_ids) { + throw new Error('getMetadataLookup must be overriden'); + } +} diff --git a/src/hubmap/expr_h5ad_adjust.py b/src/xconsortia/expr_h5ad_adjust.py similarity index 100% rename from src/hubmap/expr_h5ad_adjust.py rename to src/xconsortia/expr_h5ad_adjust.py diff --git a/src/hubmap/job-generator.js b/src/xconsortia/job-generator.js similarity index 100% rename from src/hubmap/job-generator.js rename to src/xconsortia/job-generator.js diff --git a/src/xconsortia/listing.js b/src/xconsortia/listing.js new file mode 100644 index 0000000..0e34b77 --- /dev/null +++ b/src/xconsortia/listing.js @@ -0,0 +1,36 @@ +import { Config } from '../util/config.js'; +import { checkFetchResponse } from '../util/fs.js'; +import { IListing } from '../util/handler.js'; +import { getHeaders } from './metadata.js'; + +/** @implements {IListing} */ +export class XConsortiaListing { + constructor(config, token, searchUrl, idKeyword) { + /** @type {Config} */ + this.config = config; + /** @type {string} */ + this.token = token; + /** @type {string} */ + this.searchUrl = searchUrl; + /** @type {string} */ + this.idKeyword = idKeyword; + } + + async getDatasets() { + const resp = await fetch(this.searchUrl, { + method: 'POST', + headers: getHeaders(this.token), + body: JSON.stringify(this.getBody()), + }); + checkFetchResponse(resp, `${this.idKeyword.split('_')[0]}: Failed to fetch list of collections`); + + const { + hits: { hits }, + } = await resp.json(); + return hits.map(({ _source }) => _source[this.idKeyword]); + } + + getBody() { + throw new Error('getBody must be overriden'); + } +} diff --git a/src/xconsortia/metadata.js b/src/xconsortia/metadata.js new file mode 100644 index 0000000..d439bec --- /dev/null +++ b/src/xconsortia/metadata.js @@ -0,0 +1,114 @@ +import { checkFetchResponse } from '../util/fs.js'; + +export const ORGAN_MAPPING = { + AO: 'UBERON:0000948', + BL: 'UBERON:0001255', + BD: 'UBERON:0001270', + BM: 'UBERON:0001270', + BR: 'UBERON:0000955', + LB: 'UBERON:0001004', + RB: 'UBERON:0001004', + LE: 'UBERON:0004548', + RE: 'UBERON:0004549', + LF: 'UBERON:0001303', + RF: 'UBERON:0001302', + HT: 'UBERON:0000948', + LK: 'UBERON:0004538', + RK: 'UBERON:0004539', + LI: 'UBERON:0000059', + LV: 'UBERON:0002107', + LL: 'UBERON:0001004', + LN: 'FMA:24978', + RL: 'UBERON:0001004', + RN: 'FMA:24977', + LY: 'UBERON:0002509', + LO: 'FMA:7214', + RO: 'FMA:7213', + PA: 'UBERON:0001264', + PL: 'UBERON:0001987', + SI: 'UBERON:0002108', + SK: 'UBERON:0002097', + SP: 'UBERON:0002106', + TH: 'UBERON:0002370', + TR: 'UBERON:0001004', + UR: 'UBERON:0001223', + UT: 'UBERON:0000995', +}; + +export function getHeaders(token) { + return { + 'Content-type': 'application/json', + Authorization: `Bearer ${token}`, + }; +} + +function getBody(ids, id_keyword, fields) { + return { + version: true, + from: 0, + size: 10000, + query: { + terms: { + [`${id_keyword}.keyword`]: ids, + }, + }, + _source: { + includes: fields, + }, + }; +} + +/** + * Handles 303 responses from the search api. + * A 303 response is returned when the resulting query is to large for the + * search api. Instead it returns a temporary url from which to download the result. + * + * @param {Response} resp + * @returns {Promise} + */ +async function handle303Response(resp) { + const text = await resp.text(); + if (text.startsWith('https')) { + return await fetch(text); + } + + return resp; +} + +export async function getMetadata(ids, url, token, id_keyword, fields) { + let resp = await fetch(url, { + method: 'POST', + headers: getHeaders(token), + body: JSON.stringify(getBody(ids, id_keyword, fields)), + }); + if (resp.status === 303) { + resp = await handle303Response(resp); + } + + checkFetchResponse(resp, 'Failed to fetch metadata'); + const result = await resp.json(); + return result; +} + +export function getSampleBlockId(ancestors, url_prefix) { + for (const ancestor of ancestors) { + if (ancestor['entity_type'].toLowerCase() == 'sample' && ancestor['sample_category'].toLowerCase() == 'block') { + return { + block_id: `${url_prefix}${ancestor['uuid']}`, + rui_location: ancestor['rui_location'] ?? '', + }; + } + } + + return {}; +} + +export function getSampleSectionId(ancestors, url_prefix) { + for (const ancestor of ancestors) { + if (ancestor['entity_type'].toLowerCase() == 'sample' && ancestor['sample_category'].toLowerCase() == 'section') { + return `${url_prefix}${ancestor['uuid']}`; + } + } + + return ''; +}