diff --git a/docs/src/content/docs/for-users/submit-sequences.md b/docs/src/content/docs/for-users/submit-sequences.md index 5ef484c8c5..b819b6efa9 100644 --- a/docs/src/content/docs/for-users/submit-sequences.md +++ b/docs/src/content/docs/for-users/submit-sequences.md @@ -10,6 +10,7 @@ Loculus expects: - Sequence data in `fasta` format with a unique submissionID per sequence. - Metadata in `tsv` format for each sequence. If you upload through the Website, you can also use Excel files (`xls` or `xlsx` format). If you need help formatting metadata, there is a metadata template for each organism on the submission page. +You can also map columns in your file to the expected upload column names by clicking the 'Add column mapping' button. ![Metadata template.](../../../assets/MetadataTemplate.png) diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml index f700e63af0..ce333d0501 100644 --- a/kubernetes/loculus/values.yaml +++ b/kubernetes/loculus/values.yaml @@ -486,6 +486,7 @@ defaultOrganismConfig: &defaultOrganismConfig - Zaire - name: geoLocAdmin1 displayName: Collection subdivision level 1 + desired: true generateIndex: true autocomplete: true initiallyVisible: true @@ -494,11 +495,13 @@ defaultOrganismConfig: &defaultOrganismConfig ingest: division - name: geoLocAdmin2 displayName: Collection subdivision level 2 + desired: true generateIndex: true autocomplete: true header: Sample details - name: geoLocCity displayName: Collection city + desired: true generateIndex: true autocomplete: true header: Sample details @@ -511,6 +514,7 @@ defaultOrganismConfig: &defaultOrganismConfig header: Sample details - name: specimenCollectorSampleId displayName: Isolate name + desired: true header: Sample details ingest: ncbiIsolateName enableSubstringSearch: true @@ -518,6 +522,7 @@ defaultOrganismConfig: &defaultOrganismConfig displayName: Authors type: authors header: Authors + desired: true enableSubstringSearch: true order: 40 truncateColumnDisplayTo: 25 @@ -529,6 +534,7 @@ defaultOrganismConfig: &defaultOrganismConfig columnWidth: 140 - name: authorAffiliations displayName: Author affiliations + desired: true enableSubstringSearch: true truncateColumnDisplayTo: 15 header: Authors @@ -587,6 +593,7 @@ defaultOrganismConfig: &defaultOrganismConfig oneHeader: true - name: cultureId displayName: Culture ID + desired: true header: Sample details - name: sampleReceivedDate ontology_id: GENEPIO:0001177 @@ -594,6 +601,7 @@ defaultOrganismConfig: &defaultOrganismConfig guidance: Alternative if "sampleCollectionDate" is not available. Record the date the sample was received by the laboratory. Required granularity includes year, month and day. Before sharing this data, ensure this date is not considered identifiable information. If this date is considered identifiable, it is acceptable to add "jitter" to the received date by adding or subtracting calendar days. Do not change the received date in your original records. Alternatively, collection_date may be used as a substitute in the data you share. The date should be provided in ISO 8601 standard format "YYYY-MM-DD". example: '2020-03-20' displayName: Sample received date + desired: true type: date preprocessing: function: parse_and_assert_past_date @@ -659,6 +667,7 @@ defaultOrganismConfig: &defaultOrganismConfig example: Swab [GENEPIO:0100027] displayName: Collection device header: Sampling + desired: true - name: collectionMethod ontology_id: GENEPIO:0001241 definition: The process used to collect the sample e.g. phlebotomy, necropsy. @@ -666,6 +675,7 @@ defaultOrganismConfig: &defaultOrganismConfig example: Bronchoalveolar lavage (BAL) [GENEPIO:0100032] displayName: Collection method header: Sampling + desired: true - name: foodProduct ontology_id: GENEPIO:0100444 definition: A material consumed and digested for nutritional value or enjoyment. @@ -833,6 +843,7 @@ defaultOrganismConfig: &defaultOrganismConfig inputs: date: sequencingDate header: Sequencing + desired: true - name: ampliconPcrPrimerScheme ontology_id: GENEPIO:0001456 definition: The specifications of the primers (primer sequences, binding positions, fragment size generated etc) used to generate the amplicons to be sequenced. @@ -854,6 +865,7 @@ defaultOrganismConfig: &defaultOrganismConfig example: Oxford Nanopore MinION [GENEPIO:0100142] displayName: Sequencing instrument header: Sequencing + desired: true - name: sequencingProtocol ontology_id: GENEPIO:0001454 definition: The protocol used to generate the sequence. @@ -861,6 +873,7 @@ defaultOrganismConfig: &defaultOrganismConfig example: Genomes were generated through amplicon sequencing of 1200 bp amplicons with Freed schema primers. Libraries were created using Illumina DNA Prep kits, and sequence data was produced using Miseq Micro v2 (500 cycles) sequencing kits. displayName: Sequencing protocol header: Sequencing + desired: true - name: sequencingAssayType ontology_id: GENEPIO:0100997 definition: The overarching sequencing methodology that was used to determine the sequence of a biomaterial. @@ -932,6 +945,7 @@ defaultOrganismConfig: &defaultOrganismConfig displayName: Depth of coverage type: int header: Sequencing + desired: true - name: breadthOfCoverage ontology_id: GENEPIO:0001475 definition: The threshold used as a cut-off for the depth of coverage. @@ -1003,6 +1017,7 @@ defaultOrganismConfig: &defaultOrganismConfig autocomplete: true header: "Host" ingest: ncbiHostName + desired: true - name: hostNameCommon generateIndex: true autocomplete: true @@ -1016,6 +1031,7 @@ defaultOrganismConfig: &defaultOrganismConfig url: "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=info&id=__value__" header: "Host" ingest: ncbiHostTaxId + desired: true - name: isLabHost type: boolean autocomplete: true diff --git a/website/package-lock.json b/website/package-lock.json index 114b942eb9..8ce9613b83 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -32,6 +32,7 @@ "luxon": "^3.5.0", "neverthrow": "^8.1.1", "openid-client": "^5.7.1", + "papaparse": "^5.5.1", "react": "^18.3.1", "react-chartjs-2": "^5.3.0", "react-confirm-alert": "^3.0.6", @@ -62,6 +63,7 @@ "@types/lodash": "^4.17.14", "@types/luxon": "^3.4.2", "@types/node": "^22.10.7", + "@types/papaparse": "^5.3.15", "@types/react": "^18.3.12", "@types/react-dom": "^18.3.1", "@types/uuid": "^10.0.0", @@ -4169,6 +4171,16 @@ "undici-types": "~6.20.0" } }, + "node_modules/@types/papaparse": { + "version": "5.3.15", + "resolved": "https://registry.npmjs.org/@types/papaparse/-/papaparse-5.3.15.tgz", + "integrity": "sha512-JHe6vF6x/8Z85nCX4yFdDslN11d+1pr12E526X8WAfhadOeaOTx5AuIkvDKIBopfvlzpzkdMx4YyvSKCM9oqtw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/parse-json": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/@types/parse-json/-/parse-json-4.0.2.tgz", @@ -12207,6 +12219,12 @@ "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", "license": "(MIT AND Zlib)" }, + "node_modules/papaparse": { + "version": "5.5.1", + "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.5.1.tgz", + "integrity": "sha512-EuEKUhyxrHVozD7g3/ztsJn6qaKse8RPfR6buNB2dMJvdtXNhcw8jccVi/LxNEY3HVrV6GO6Z4OoeCG9Iy9wpA==", + "license": "MIT" + }, "node_modules/parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", diff --git a/website/package.json b/website/package.json index ff0833d6f5..7599090540 100644 --- a/website/package.json +++ b/website/package.json @@ -45,6 +45,7 @@ "luxon": "^3.5.0", "neverthrow": "^8.1.1", "openid-client": "^5.7.1", + "papaparse": "^5.5.1", "react": "^18.3.1", "react-chartjs-2": "^5.3.0", "react-confirm-alert": "^3.0.6", @@ -75,6 +76,7 @@ "@types/lodash": "^4.17.14", "@types/luxon": "^3.4.2", "@types/node": "^22.10.7", + "@types/papaparse": "^5.3.15", "@types/react": "^18.3.12", "@types/react-dom": "^18.3.1", "@types/uuid": "^10.0.0", diff --git a/website/src/components/MetadataTable.astro b/website/src/components/MetadataTable.astro index 071bf03c3e..92c42c28f3 100644 --- a/website/src/components/MetadataTable.astro +++ b/website/src/components/MetadataTable.astro @@ -1,5 +1,5 @@ --- -import { getConfiguredOrganisms, getSchema } from '../config'; +import { getConfiguredOrganisms, getGroupedInputFields, getSchema } from '../config'; import OrganismTableSelector from './OrganismMetadataTableSelector'; import type { OrganismMetadata } from './OrganismMetadataTableSelector'; @@ -9,7 +9,7 @@ const organisms: OrganismMetadata[] = configuredOrganisms.map((organism) => { key: organism.key, displayName: organism.displayName, metadata: getSchema(organism.key).metadata, - inputFields: getSchema(organism.key).inputFields, + groupedInputFields: getGroupedInputFields(organism.key, 'submit'), }; }); --- diff --git a/website/src/components/OrganismMetadataTableSelector.tsx b/website/src/components/OrganismMetadataTableSelector.tsx index 59eb42c5a1..4c21e02058 100644 --- a/website/src/components/OrganismMetadataTableSelector.tsx +++ b/website/src/components/OrganismMetadataTableSelector.tsx @@ -1,16 +1,15 @@ -import React, { useState, useEffect } from 'react'; +import { useState, useEffect } from 'react'; import type { FC } from 'react'; import { routes } from '../routes/routes.ts'; import type { Metadata, InputField } from '../types/config.ts'; -import { groupFieldsByHeader } from '../utils/groupFieldsByHeader.ts'; import IwwaArrowDown from '~icons/iwwa/arrow-down'; export type OrganismMetadata = { key: string; displayName: string; metadata: Metadata[]; - inputFields: InputField[]; + groupedInputFields: Map; }; type Props = { @@ -40,7 +39,7 @@ const OrganismMetadataTableSelector: FC = ({ organisms }) => { useEffect(() => { if (selectedOrganism) { - setGroupedFields(groupFieldsByHeader(selectedOrganism.inputFields, selectedOrganism.metadata)); + setGroupedFields(selectedOrganism.groupedInputFields); } }, [selectedOrganism]); diff --git a/website/src/components/Submission/DataUploadForm.tsx b/website/src/components/Submission/DataUploadForm.tsx index 0acd00cb7b..a172162c62 100644 --- a/website/src/components/Submission/DataUploadForm.tsx +++ b/website/src/components/Submission/DataUploadForm.tsx @@ -5,6 +5,8 @@ import { type FormEvent, useState } from 'react'; import { dataUploadDocsUrl } from './dataUploadDocsUrl.ts'; import { getClientLogger } from '../../clientLogger.ts'; +import type { ColumnMapping } from './FileUpload/ColumnMapping.ts'; +import { ColumnMappingModal } from './FileUpload/ColumnMappingModal.tsx'; import { UploadComponent } from './FileUpload/UploadComponent.tsx'; import DataUseTermsSelector from '../../components/DataUseTerms/DataUseTermsSelector'; import useClientFlag from '../../hooks/isClient.ts'; @@ -23,7 +25,8 @@ import { dateTimeInMonths } from '../../utils/DateTimeInMonths.tsx'; import { createAuthorizationHeader } from '../../utils/createAuthorizationHeader.ts'; import { stringifyMaybeAxiosError } from '../../utils/stringifyMaybeAxiosError.ts'; import { withQueryProvider } from '../common/withQueryProvider.tsx'; -import { FASTA_FILE_KIND, METADATA_FILE_KIND } from './FileUpload/fileProcessing.ts'; +import { FASTA_FILE_KIND, METADATA_FILE_KIND, type ProcessedFile, RawFile } from './FileUpload/fileProcessing.ts'; +import type { InputField } from '../../types/config.ts'; export type UploadAction = 'submit' | 'revise'; @@ -34,6 +37,7 @@ type DataUploadFormProps = { action: UploadAction; group: Group; referenceGenomeSequenceNames: ReferenceGenomesSequenceNames; + metadataTemplateFields: Map; onSuccess: () => void; onError: (message: string) => void; }; @@ -122,9 +126,12 @@ const InnerDataUploadForm = ({ onError, group, referenceGenomeSequenceNames, + metadataTemplateFields, }: DataUploadFormProps) => { - const [metadataFile, setMetadataFile] = useState(null); - const [sequenceFile, setSequenceFile] = useState(null); + const [metadataFile, setMetadataFile] = useState(null); + // The columnMapping can be null; if null -> don't apply mapping. + const [columnMapping, setColumnMapping] = useState(null); + const [sequenceFile, setSequenceFile] = useState(null); const [exampleEntries, setExampleEntries] = useState(10); const { submit, revise, isLoading } = useSubmitFiles(accessToken, organism, clientConfig, onSuccess, onError); @@ -145,11 +152,11 @@ const InnerDataUploadForm = ({ const metadataFile = createTempFile(exampleMetadataContent, 'text/tab-separated-values', 'metadata.tsv'); const sequenceFile = createTempFile(sequenceFileContent, 'application/octet-stream', 'sequences.fasta'); - setMetadataFile(metadataFile); - setSequenceFile(sequenceFile); + setMetadataFile(new RawFile(metadataFile)); + setSequenceFile(new RawFile(sequenceFile)); }; - const handleSubmit = (event: FormEvent) => { + const handleSubmit = async (event: FormEvent) => { event.preventDefault(); if (!agreedToINSDCUploadTerms) { @@ -173,12 +180,18 @@ const InnerDataUploadForm = ({ return; } + let finalMetadataFile = metadataFile.inner(); + + if (columnMapping !== null) { + finalMetadataFile = await columnMapping.applyTo(metadataFile); + } + switch (action) { case 'submit': { const groupId = group.groupId; submit({ - metadataFile, - sequenceFile, + metadataFile: finalMetadataFile, + sequenceFile: sequenceFile.inner(), groupId, dataUseTermsType, restrictedUntil: @@ -189,7 +202,7 @@ const InnerDataUploadForm = ({ break; } case 'revise': - revise({ metadataFile, sequenceFile }); + revise({ metadataFile: finalMetadataFile, sequenceFile: sequenceFile.inner() }); break; } }; @@ -255,8 +268,8 @@ const InnerDataUploadForm = ({ )} @@ -274,12 +287,22 @@ const InnerDataUploadForm = ({
- +
+ + {metadataFile !== null && ( + + )} +
@@ -368,7 +391,7 @@ const InnerDataUploadForm = ({ name='submit' type='submit' className='rounded-md py-2 text-sm font-semibold shadow-sm focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 bg-primary-600 text-white hover:bg-primary-500' - onClick={handleSubmit} + onClick={(e) => void handleSubmit(e)} disabled={isLoading || !isClient} >
diff --git a/website/src/components/Submission/FileUpload/ColumnMapping.spec.ts b/website/src/components/Submission/FileUpload/ColumnMapping.spec.ts new file mode 100644 index 0000000000..f73f79cc67 --- /dev/null +++ b/website/src/components/Submission/FileUpload/ColumnMapping.spec.ts @@ -0,0 +1,103 @@ +import { describe, it, expect } from 'vitest'; + +import { ColumnMapping } from './ColumnMapping'; +import { RawFile } from './fileProcessing'; + +describe('ColumnMapping', () => { + it('should create a mapping from columns', () => { + const sourceColumns = ['date', 'location', 'Foo Bar']; + const inputFields = [{ name: 'date' }, { name: 'location' }, { name: 'foo', displayName: 'Foo Bar' }]; + + const mapping = ColumnMapping.fromColumns(sourceColumns, inputFields); + const entries = mapping.entries(); + + expect(entries).toEqual([ + ['date', 'date'], + ['location', 'location'], + ['Foo Bar', 'foo'], + ]); + }); + + it('should create a mapping from columns with sensible column mapping', () => { + const sourceColumns = ['state', 'geoLocAdmin', 'geoLocAdmin2']; + const inputFields = [ + { name: 'date' }, + { name: 'geoLocAdmin2', displayName: 'Collection subdivision level 2' }, + { name: 'geoLocAdmin1', displayName: 'Collection subdivision level 1' }, + ]; + + const mapping = ColumnMapping.fromColumns(sourceColumns, inputFields); + const entries = mapping.entries(); + + expect(entries).toEqual([ + ['state', null], + ['geoLocAdmin', 'geoLocAdmin1'], + ['geoLocAdmin2', 'geoLocAdmin2'], + ]); + }); + + it('should create a mapping from columns without duplicates', () => { + const sourceColumns = ['date', 'Date']; + const inputFields = [{ name: 'date', displayName: 'Date' }, { name: 'location' }, { name: 'foo' }]; + + const mapping = ColumnMapping.fromColumns(sourceColumns, inputFields); + const entries = mapping.entries(); + + expect(entries).toEqual([ + ['date', 'date'], + ['Date', null], + ]); + }); + + it('should update a specific mapping', () => { + const sourceColumns = ['loc']; + const inputFields = [ + { name: 'location', displayName: 'Location' }, + { name: 'date', displayName: 'Date' }, + ]; + const mapping = ColumnMapping.fromColumns(sourceColumns, inputFields); + + const updatedMapping = mapping.updateWith('loc', 'date'); + + const entries = updatedMapping.entries(); + expect(entries).toEqual([['loc', 'date']]); + }); + + it('should update a specific mapping and unset the previous mapping', () => { + const sourceColumns = ['loc', 'date']; + const inputFields = [ + { name: 'location', displayName: 'Location' }, + { name: 'date', displayName: 'Date' }, + ]; + const mapping = ColumnMapping.fromColumns(sourceColumns, inputFields); + let entries = mapping.entries(); + expect(entries).toEqual([ + ['loc', null], + ['date', 'date'], + ]); + + const updatedMapping = mapping.updateWith('loc', 'date'); + + entries = updatedMapping.entries(); + expect(entries).toEqual([ + ['loc', 'date'], + ['date', null], + ]); + }); + + it('should apply a mapping correctly', async () => { + const sourceColumns = ['loc', 'date']; + const inputFields = [{ name: 'date' }, { name: 'location', displayName: 'Location' }]; + const mapping = ColumnMapping.fromColumns(sourceColumns, inputFields); + const updatedMapping = mapping.updateWith('loc', 'location'); + + const tsvContent = 'date\tloc\n' + '2023-01-01\t"U\nS\nA"\n' + '2023-01-02\tCanada\n'; + + const tsvFile = new File([tsvContent], 'input.tsv'); + + const remappedFile = await updatedMapping.applyTo(new RawFile(tsvFile)); + const remappedContent = await remappedFile.text(); + + expect(remappedContent).toBe('location\tdate\n' + '"U\nS\nA"\t2023-01-01\n' + 'Canada\t2023-01-02'); + }); +}); diff --git a/website/src/components/Submission/FileUpload/ColumnMapping.ts b/website/src/components/Submission/FileUpload/ColumnMapping.ts new file mode 100644 index 0000000000..b90d48f583 --- /dev/null +++ b/website/src/components/Submission/FileUpload/ColumnMapping.ts @@ -0,0 +1,114 @@ +import Papa from 'papaparse'; + +import { type ProcessedFile } from './fileProcessing'; +import type { InputField } from '../../../types/config'; +import stringSimilarity from '../../../utils/stringSimilarity'; + +export class ColumnMapping { + private constructor(private readonly map: ReadonlyMap) {} + + private static getBestMatchingTargetColumn(sourceColumn: string, inputFields: InputField[]): string | null { + if (inputFields.length === 0) return null; + const [bestMatch, score] = inputFields + .map((field): [string, number] => { + const score = Math.max( + stringSimilarity(sourceColumn, field.name), + stringSimilarity(sourceColumn, field.displayName ?? ''), + ); + return [field.name, score]; + }) + .reduce((maxItem, currentItem) => (currentItem[1] > maxItem[1] ? currentItem : maxItem)); + return score > 0.8 ? bestMatch : null; + } + + /* Create a new mapping with the given columns, doing a best-effort to pre-match columns. */ + public static fromColumns(sourceColumns: string[], inputFields: InputField[]) { + const mapping = new Map(); + let availableFields = inputFields; + let remainingSourceColumns = sourceColumns; + // set them all to null to keep order + sourceColumns.forEach((sourceColumn) => mapping.set(sourceColumn, null)); + // assign exact matches first + sourceColumns.forEach((sourceColumn) => { + const foundField = availableFields.find( + (inputField) => inputField.name === sourceColumn || inputField.displayName === sourceColumn, + ); + if (foundField) { + mapping.set(sourceColumn, foundField.name); + availableFields = availableFields.filter((f) => f.name !== sourceColumn); + remainingSourceColumns = remainingSourceColumns.filter((f) => f !== sourceColumn); + } + }); + // do best effort matching second + remainingSourceColumns.forEach((sourceColumn) => { + const bestMatch = this.getBestMatchingTargetColumn(sourceColumn, availableFields); + mapping.set(sourceColumn, bestMatch); + availableFields = availableFields.filter((field) => field.name !== bestMatch); + }); + return new ColumnMapping(mapping); + } + + /* Update the mapping with new source and target columns, keeping previously mapped values. */ + public update(newSourceColumns: string[], newInputFields: InputField[]): ColumnMapping { + // keep entries that existed before + const newMapping = new Map( + newSourceColumns.map((newSourceCol) => { + const prevTargetCol = this.map.get(newSourceCol); + if (prevTargetCol && newInputFields.map((f) => f.name).includes(prevTargetCol)) { + return [newSourceCol, prevTargetCol]; + } else { + return [newSourceCol, null]; + } + }), + ); + return new ColumnMapping(newMapping); + } + + /* Returns the entries in the mapping as a list. Each item in the list has: + * - The source column name + * - The target column name + */ + public entries(): [string, string | null][] { + return Array.from(this.map.entries()); + } + + public usedColumns(): string[] { + return Array.from(this.map.values()).filter((v): v is string => v !== null); + } + + public updateWith(sourceColumn: string, targetColumn: string | null): ColumnMapping { + const newMapping = new Map(this.map); + newMapping.set(sourceColumn, targetColumn); + this.map.forEach((targetCol, srcCol) => targetCol === targetColumn && newMapping.set(srcCol, null)); + return new ColumnMapping(newMapping); + } + + /* Apply this mapping to a TSV file, returning a new file with remapped columns. */ + public async applyTo(tsvFile: ProcessedFile): Promise { + const text = await tsvFile.text(); + const parsed = Papa.parse(text, { delimiter: '\t', skipEmptyLines: true }); + const inputRows: string[][] = parsed.data; + const headersInFile = inputRows.splice(0, 1)[0]; + const headers: string[] = []; + const indicies: number[] = []; + this.entries().forEach(([sourceCol, targetCol]) => { + if (targetCol === null) return; + headers.push(targetCol); + indicies.push(headersInFile.findIndex((sourceHeader) => sourceHeader === sourceCol)); + }); + const newRows = inputRows.map((row) => indicies.map((i) => row[i])); + const newFileContent = Papa.unparse([headers, ...newRows], { delimiter: '\t', newline: '\n' }); + return new File([newFileContent], 'remapped.tsv'); + } + + public equals(other: ColumnMapping | null): boolean { + if (other === null) { + return false; + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const mapsAreEqual = (m1: ReadonlyMap, m2: ReadonlyMap) => + m1.size === m2.size && Array.from(m1.keys()).every((key) => m1.get(key) === m2.get(key)); + + return mapsAreEqual(this.map, other.map); + } +} diff --git a/website/src/components/Submission/FileUpload/ColumnMappingModal.spec.tsx b/website/src/components/Submission/FileUpload/ColumnMappingModal.spec.tsx new file mode 100644 index 0000000000..a241237394 --- /dev/null +++ b/website/src/components/Submission/FileUpload/ColumnMappingModal.spec.tsx @@ -0,0 +1,106 @@ +import { render, screen, fireEvent, waitFor } from '@testing-library/react'; +import { toast } from 'react-toastify'; +import { describe, expect, it, vi } from 'vitest'; + +import { ColumnMapping } from './ColumnMapping'; +import { ColumnMappingModal } from './ColumnMappingModal'; +import type { ProcessedFile } from './fileProcessing'; +import type { InputField } from '../../../types/config'; + +vi.mock('react-toastify', () => ({ + toast: { + error: vi.fn(), + }, +})); + +describe('ColumnMappingModal', () => { + const mockInputFile: ProcessedFile = { + text: vi.fn().mockResolvedValue('Column1\tColumn2\tColumn3\nData1\tData2\tData3'), + } as unknown as ProcessedFile; + + const mockGroupedInputFields = new Map([ + ['Group1', [{ name: 'Field1', required: true, displayName: 'Field 1' }]], + ['Group2', [{ name: 'Field2', required: false, displayName: 'Field 2' }]], + ]); + + let mockColumnMapping: ColumnMapping | null = null; + const mockSetColumnMapping = vi.fn(); + + it('renders the modal and opens when the button is clicked', async () => { + render( + , + ); + + const openButton = screen.getByText(/Add column mapping/i); + fireEvent.click(openButton); + + expect(await screen.findByText(/Remap Columns/i)).toBeInTheDocument(); + }); + + it('loads and displays columns from the input file', async () => { + render( + , + ); + + fireEvent.click(screen.getByText(/Add column mapping/i)); + + // eslint-disable-next-line @typescript-eslint/unbound-method + await waitFor(() => expect(mockInputFile.text).toHaveBeenCalled()); + expect(await screen.findByText(/Column in your file/i)).toBeInTheDocument(); + expect(await screen.findByText(/Column1/i)).toBeInTheDocument(); + expect(await screen.findByText(/Column2/i)).toBeInTheDocument(); + expect(await screen.findByText(/Column3/i)).toBeInTheDocument(); + }); + + it('displays error if file header cannot be read', async () => { + const mockErrorFile: ProcessedFile = { + text: vi.fn().mockRejectedValue(new Error('File read error')), + } as unknown as ProcessedFile; + + render( + , + ); + + fireEvent.click(screen.getByText(/Add column mapping/i)); + + await waitFor(() => expect(toast.error).toHaveBeenCalledWith('Could not read file header: File read error')); + }); + + it('discards the column mapping', async () => { + mockColumnMapping = ColumnMapping.fromColumns( + ['Column1', 'Column2'], + Array.from(mockGroupedInputFields.values()).flat(), + ); + + render( + , + ); + + fireEvent.click(screen.getByText(/Edit column mapping/i)); + + const discardButton = await screen.findByText(/Discard Mapping/i); + fireEvent.click(discardButton); + + expect(mockSetColumnMapping).toHaveBeenCalledWith(null); + }); +}); diff --git a/website/src/components/Submission/FileUpload/ColumnMappingModal.tsx b/website/src/components/Submission/FileUpload/ColumnMappingModal.tsx new file mode 100644 index 0000000000..4b936a05ac --- /dev/null +++ b/website/src/components/Submission/FileUpload/ColumnMappingModal.tsx @@ -0,0 +1,290 @@ +import { Listbox, ListboxButton, ListboxOption, ListboxOptions } from '@headlessui/react'; +import { Result, err, ok } from 'neverthrow'; +import Papa from 'papaparse'; +import { useEffect, useState, type Dispatch, type FC, type SetStateAction } from 'react'; +import { toast } from 'react-toastify'; +import { Tooltip } from 'react-tooltip'; + +import { ColumnMapping } from './ColumnMapping'; +import { type ProcessedFile } from './fileProcessing'; +import type { InputField } from '../../../types/config'; +import { BaseDialog } from '../../common/BaseDialog'; + +interface ColumnMappingModalProps { + inputFile: ProcessedFile; + columnMapping: ColumnMapping | null; + setColumnMapping: (newMapping: ColumnMapping | null) => void; + groupedInputFields: Map; +} + +export const ColumnMappingModal: FC = ({ + inputFile, + columnMapping, + setColumnMapping, + groupedInputFields, +}) => { + const [isOpen, setIsOpen] = useState(false); + + const openDialog = () => setIsOpen(true); + const closeDialog = () => { + setIsOpen(false); + // This is used to not have the Tooltip on the open-button pop up again. + setTimeout(() => { + if (document.activeElement === null) return; + (document.activeElement as HTMLElement).blur(); + }, 10); + }; + + const [currentMapping, setCurrentMapping] = useState(null); + const [inputColumns, setInputColumns] = useState(null); + + useEffect(() => { + if (!isOpen) return; + const loadColumns = async () => { + const columnExtractionResult = await extractColumns(inputFile); + columnExtractionResult.match( + (inputColumns) => setInputColumns(inputColumns), + (error) => { + toast.error(`Could not read file header: ${error.message}`); + setIsOpen(false); // close dialog on error. + }, + ); + }; + void loadColumns(); + }, [isOpen, inputFile, setInputColumns]); + + useEffect(() => { + if (inputColumns === null) return; + const inputFields = Array.from(groupedInputFields.values()).flat(); + if (columnMapping !== null) { + setCurrentMapping(columnMapping.update(inputColumns, inputFields)); + } else { + setCurrentMapping(ColumnMapping.fromColumns(inputColumns, inputFields)); + } + }, [inputColumns, columnMapping, groupedInputFields, setCurrentMapping]); + + const handleSubmit = () => { + setColumnMapping(currentMapping); + closeDialog(); + }; + + const handleDiscard = () => { + setColumnMapping(null); + closeDialog(); + }; + + const requiredFieldsWithDuplicates = Array.from(groupedInputFields.values()) + .flat() + .filter((f) => f.required); + const requiredFields = requiredFieldsWithDuplicates.filter( + (f, i) => requiredFieldsWithDuplicates.findIndex((x) => x.name === f.name) === i, + ); + const missingFields = requiredFields.filter((field) => !currentMapping?.usedColumns().includes(field.name)); + + const isChanged = !columnMapping?.equals(currentMapping); + const submittable = isChanged && missingFields.length === 0; + + const openModalButtonText = columnMapping !== null ? 'Edit column mapping' : 'Add column mapping'; + const saveButtonText = columnMapping === null ? 'Add this mapping' : 'Save'; + const minWidthStyle = calculateMinWidthStyleFromPossibleOptions(groupedInputFields); + + return ( + <> + + + If your metadata file does not use the defined field names, this allow you +
+ to map columns in your file to the fields expected by the database. +
+ + {currentMapping === null || inputColumns === null ? ( + 'Loading ...' + ) : ( +
+ + + + + + + + + {currentMapping.entries().map(([sourceCol, targetCol]) => ( + + ))} + +
Column in your fileSubmission column
+
+ {missingFields.length > 0 && 'All required fields need to be set to apply this mapping.'} +
+
+ {columnMapping !== null && ( + <> + +
+ + )} + + +
+
+ )} + + + ); +}; + +async function extractColumns(tsvFile: ProcessedFile): Promise> { + let text; + try { + text = await tsvFile.text(); + } catch (error) { + return Promise.resolve(err(error as Error)); + } + const parsed = Papa.parse(text, { delimiter: '\t', skipEmptyLines: true }); + return ok(parsed.data[0]); +} + +interface ColumnSelectorRowProps { + selectingFor: string; + options: Map; + usedOptions: string[]; + selectedOption: string | null; + setColumnMapping: Dispatch>; +} + +export const ColumnSelectorRow: FC = ({ + selectingFor, + options, + usedOptions, + selectedOption, + setColumnMapping, +}) => { + const selectedField = selectedOption + ? Array.from(options.values()) + .flat() + .find((o) => o.name === selectedOption) + : undefined; + const selectedOptionText = selectedField?.displayName ?? selectedField?.name; + const isExactMatch = selectedField?.displayName === selectingFor || selectedField?.name === selectingFor; + + const minWidthStyle = calculateMinWidthStyleFromPossibleOptions(options); + + const inputFieldToListboxOption = (header: string, field: InputField): React.JSX.Element => ( + + + {field.displayName ?? field.name} + + +

+ {field.name} +

+ {field.definition &&

{field.definition}

} + {field.guidance &&

{field.guidance}

} +
+
+ ); + + return ( + + {selectingFor} + + + setColumnMapping((currentMapping) => currentMapping!.updateWith(selectingFor, newValue)) + } + > + +
+ {selectedOption ? ( + {selectedOptionText} + ) : ( + unmapped + )} +
+ + + + + +
+ + + + unmapped + +
+ {Array.from(options.entries()).map(([header, fields]) => { + if (fields.length === 0) return; + return ( +
+
{header}
+ {fields.map((field) => inputFieldToListboxOption(header, field))} +
+ ); + })} +
+ + + + ); +}; + +/* Estimate the min width of a column with select for these input fields, + * so you don't get layout shifts when selecting different, longer, values. */ +function calculateMinWidthStyleFromPossibleOptions(options: Map): React.CSSProperties { + const maxOptionTextLength = Math.max( + ...Array.from(options.values()) + .flat() + .flatMap((x) => [x.name, x.displayName]) + .map((text) => text?.length ?? 0), + ); + + return { minWidth: `${Math.ceil(maxOptionTextLength / 2) + 2}rem` }; +} diff --git a/website/src/components/Submission/FileUpload/UploadComponent.tsx b/website/src/components/Submission/FileUpload/UploadComponent.tsx index 2b79cbe84d..aa2f737a28 100644 --- a/website/src/components/Submission/FileUpload/UploadComponent.tsx +++ b/website/src/components/Submission/FileUpload/UploadComponent.tsx @@ -10,7 +10,7 @@ export const UploadComponent = ({ ariaLabel, fileKind, }: { - setFile: (file: File | null) => void; + setFile: (file: ProcessedFile | null) => void; name: string; ariaLabel: string; fileKind: FileKind; @@ -37,7 +37,7 @@ export const UploadComponent = ({ }, ); } - setFile(processedFile !== null ? processedFile.inner() : null); + setFile(processedFile); rawSetMyFile(processedFile); }, [setFile, rawSetMyFile], @@ -84,7 +84,7 @@ export const UploadComponent = ({ }, [myFile, setMyFile]); return (
{ }); test.each([ + ['testfile.tsv.zip', 0], + ['testfile.tsv.gz', 0], + ['testfile.tsv.zst', 0], ['testfile_different_formats.xls', 0], ['testfile_different_formats.xlsx', 0], ['testfile_different_formats.xlsx.gz', 0], @@ -44,7 +47,7 @@ describe('fileProcessing', () => { const processedFile = processingResult._unsafeUnwrap(); expect(processedFile.warnings().length).toBe(warningsCount); - expect(await processedFile.inner().text()).toEqual(tsvFileContent); + expect(await processedFile.text().then((text) => text.replace(/[\r]+/g, ''))).toEqual(tsvFileContent); }, 10000, ); diff --git a/website/src/components/Submission/FileUpload/fileProcessing.ts b/website/src/components/Submission/FileUpload/fileProcessing.ts index 9916f6eff6..6466fc2ae0 100644 --- a/website/src/components/Submission/FileUpload/fileProcessing.ts +++ b/website/src/components/Submission/FileUpload/fileProcessing.ts @@ -29,7 +29,8 @@ export const METADATA_FILE_KIND: FileKind = { const isCompressed = COMPRESSION_EXTENSIONS.includes(extension); const dataExtension = isCompressed ? fileNameParts[fileNameParts.length - 2] : extension; const compressionExtension = isCompressed ? extension : null; - if (dataExtension === 'tsv') return ok(new RawFile(file)); + if (dataExtension === 'tsv' && !isCompressed) return ok(new RawFile(file)); + if (dataExtension === 'tsv' && isCompressed) return ok(new CompressedFile(file)); if (dataExtension === 'xlsx' || dataExtension === 'xls') { if (isCompressed && compressionExtension === 'xz') { return err( @@ -39,7 +40,7 @@ export const METADATA_FILE_KIND: FileKind = { ), ); } - const compression = isCompressed ? (compressionExtension as ExcelCompressionKind) : undefined; + const compression = isCompressed ? (compressionExtension as SupportedInBrowserCompressionKind) : null; const excelFile = new ExcelFile(file, compression); try { await excelFile.init(); @@ -63,6 +64,8 @@ export interface ProcessedFile { /* The file containing the data (might be processed, only exists in memory) */ inner(): File; + text(): Promise; + /* The handle to the file on disk. */ handle(): File; @@ -70,12 +73,10 @@ export interface ProcessedFile { warnings(): string[]; } -class RawFile implements ProcessedFile { - private innerFile: File; +export const dummy = 0; - constructor(file: File) { - this.innerFile = file; - } +export class RawFile implements ProcessedFile { + constructor(private innerFile: File) {} inner(): File { return this.innerFile; @@ -85,22 +86,63 @@ class RawFile implements ProcessedFile { return this.innerFile; } + async text(): Promise { + return this.innerFile.text(); + } + warnings(): string[] { return []; } } -type SupportedExcelCompressionKind = 'zst' | 'gz' | 'zip'; -type NoCompression = null; -type ExcelCompressionKind = NoCompression | SupportedExcelCompressionKind; +type SupportedInBrowserCompressionKind = 'zst' | 'gz' | 'zip'; +const isSupportedInBrowserCompressionKind = (s: string): s is SupportedInBrowserCompressionKind => + ['zst', 'gz', 'zip'].includes(s); + +async function decompress( + compressedData: ArrayBuffer, + compression: SupportedInBrowserCompressionKind, +): Promise { + switch (compression) { + case 'zst': { + const array = fzstd.decompress(new Uint8Array(compressedData)); + return array.buffer.slice(array.byteOffset, array.byteOffset + array.byteLength); + } + case 'gz': { + return fflate.decompressSync(new Uint8Array(compressedData)).buffer; + } + case 'zip': { + const zip = JSZip.loadAsync(compressedData); + return zip.then((z) => z.files[Object.keys(z.files)[0]].async('arraybuffer')); + } + } +} + +export class CompressedFile extends RawFile { + async text(): Promise { + const fileNameSegments = this.inner().name.split('.'); + const compressionType = fileNameSegments[fileNameSegments.length - 1].toLowerCase(); -class ExcelFile implements ProcessedFile { + if (isSupportedInBrowserCompressionKind(compressionType)) { + return this.inner() + .arrayBuffer() + .then((b) => decompress(b, compressionType)) + .then((b) => new TextDecoder('utf-8').decode(b as ArrayBuffer)); + } + + if (compressionType === 'xz') throw new Error('xz files cannot be opened for editing.'); + + throw new Error(`Unknown extension: ${compressionType}`); + } +} + +export class ExcelFile implements ProcessedFile { private originalFile: File; - private compression: ExcelCompressionKind; + private compression: SupportedInBrowserCompressionKind | null; private tsvFile: File | undefined; private processingWarnings: string[]; - constructor(excelFile: File, compression: ExcelCompressionKind = null) { + constructor(excelFile: File, compression: SupportedInBrowserCompressionKind | null = null) { // assumes that the given file is actually an excel file (might be compressed). this.originalFile = excelFile; this.compression = compression; @@ -108,22 +150,9 @@ class ExcelFile implements ProcessedFile { } private async getRawData(): Promise { - switch (this.compression) { - case null: - return this.originalFile.arrayBuffer(); - case 'zst': - return this.originalFile - .arrayBuffer() - .then((b) => fzstd.decompress(new Uint8Array(b))) - .then((b) => b.buffer.slice(b.byteOffset, b.byteOffset + b.byteLength)); - case 'gz': - return this.originalFile.arrayBuffer().then((b) => fflate.decompressSync(new Uint8Array(b)).buffer); - case 'zip': - return this.originalFile - .arrayBuffer() - .then((b) => JSZip.loadAsync(b)) - .then((zip) => zip.files[Object.keys(zip.files)[0]].async('arraybuffer')); - } + const compression = this.compression; + const buffer = this.originalFile.arrayBuffer(); + return compression === null ? buffer : buffer.then((b) => decompress(b, compression)); } async init() { @@ -170,6 +199,10 @@ class ExcelFile implements ProcessedFile { return this.tsvFile; } + async text(): Promise { + return this.inner().text(); + } + handle(): File { return this.originalFile; } diff --git a/website/src/components/Submission/FileUpload/test_files/testfile.tsv.gz b/website/src/components/Submission/FileUpload/test_files/testfile.tsv.gz new file mode 100644 index 0000000000..b560418b2a Binary files /dev/null and b/website/src/components/Submission/FileUpload/test_files/testfile.tsv.gz differ diff --git a/website/src/components/Submission/FileUpload/test_files/testfile.tsv.zip b/website/src/components/Submission/FileUpload/test_files/testfile.tsv.zip new file mode 100644 index 0000000000..6b985b6c26 Binary files /dev/null and b/website/src/components/Submission/FileUpload/test_files/testfile.tsv.zip differ diff --git a/website/src/components/Submission/FileUpload/test_files/testfile.tsv.zst b/website/src/components/Submission/FileUpload/test_files/testfile.tsv.zst new file mode 100644 index 0000000000..6eb1279e4a Binary files /dev/null and b/website/src/components/Submission/FileUpload/test_files/testfile.tsv.zst differ diff --git a/website/src/components/Submission/RevisionForm.tsx b/website/src/components/Submission/RevisionForm.tsx index 27dc7277a6..5bbd51bc5c 100644 --- a/website/src/components/Submission/RevisionForm.tsx +++ b/website/src/components/Submission/RevisionForm.tsx @@ -4,6 +4,7 @@ import { toast } from 'react-toastify'; import { DataUploadForm } from './DataUploadForm.tsx'; import { routes } from '../../routes/routes.ts'; import { type Group } from '../../types/backend.ts'; +import type { InputField } from '../../types/config.ts'; import type { ReferenceGenomesSequenceNames } from '../../types/referencesGenomes'; import type { ClientConfig } from '../../types/runtimeConfig.ts'; @@ -13,6 +14,7 @@ type RevisionFormProps = { clientConfig: ClientConfig; group: Group; referenceGenomeSequenceNames: ReferenceGenomesSequenceNames; + metadataTemplateFields: Map; }; export const RevisionForm: FC = ({ @@ -21,6 +23,7 @@ export const RevisionForm: FC = ({ clientConfig, group, referenceGenomeSequenceNames, + metadataTemplateFields, }) => { return (
@@ -28,6 +31,7 @@ export const RevisionForm: FC = ({ accessToken={accessToken} organism={organism} referenceGenomeSequenceNames={referenceGenomeSequenceNames} + metadataTemplateFields={metadataTemplateFields} clientConfig={clientConfig} action='revise' onError={(message) => toast.error(message, { position: 'top-center', autoClose: false })} diff --git a/website/src/components/Submission/SubmissionForm.spec.tsx b/website/src/components/Submission/SubmissionForm.spec.tsx index ff6b041bc4..023a27677d 100644 --- a/website/src/components/Submission/SubmissionForm.spec.tsx +++ b/website/src/components/Submission/SubmissionForm.spec.tsx @@ -56,6 +56,7 @@ function renderSubmissionForm() { organism={testOrganism} clientConfig={testConfig.public} group={group} + metadataTemplateFields={new Map([['fooSection', [{ name: 'foo' }, { name: 'bar' }]]])} />, ); } diff --git a/website/src/components/Submission/SubmissionForm.tsx b/website/src/components/Submission/SubmissionForm.tsx index bc10c357b2..9e6a73abc7 100644 --- a/website/src/components/Submission/SubmissionForm.tsx +++ b/website/src/components/Submission/SubmissionForm.tsx @@ -4,6 +4,7 @@ import { toast } from 'react-toastify'; import { DataUploadForm } from './DataUploadForm.tsx'; import { routes } from '../../routes/routes.ts'; import { type Group } from '../../types/backend.ts'; +import type { InputField } from '../../types/config.ts'; import type { ReferenceGenomesSequenceNames } from '../../types/referencesGenomes'; import type { ClientConfig } from '../../types/runtimeConfig.ts'; @@ -13,6 +14,7 @@ type SubmissionFormProps = { clientConfig: ClientConfig; group: Group; referenceGenomeSequenceNames: ReferenceGenomesSequenceNames; + metadataTemplateFields: Map; }; export const SubmissionForm: FC = ({ @@ -21,6 +23,7 @@ export const SubmissionForm: FC = ({ clientConfig, group, referenceGenomeSequenceNames, + metadataTemplateFields, }) => { return (
@@ -28,6 +31,7 @@ export const SubmissionForm: FC = ({ accessToken={accessToken} organism={organism} referenceGenomeSequenceNames={referenceGenomeSequenceNames} + metadataTemplateFields={metadataTemplateFields} clientConfig={clientConfig} action='submit' onError={(message) => toast.error(message, { position: 'top-center', autoClose: false })} diff --git a/website/src/components/common/BaseDialog.tsx b/website/src/components/common/BaseDialog.tsx index 8787673f42..9da35ec64d 100644 --- a/website/src/components/common/BaseDialog.tsx +++ b/website/src/components/common/BaseDialog.tsx @@ -6,15 +6,19 @@ interface BaseDialogProps { isOpen: boolean; onClose: () => void; children: ReactNode; + fullWidth?: boolean; } -export const BaseDialog: React.FC = ({ title, isOpen, onClose, children }) => { +export const BaseDialog: React.FC = ({ title, isOpen, onClose, children, fullWidth = true }) => { + const fullWidthClasses = fullWidth ? 'w-full w-max-5xl' : ''; return (
- + {title} diff --git a/website/src/config.ts b/website/src/config.ts index 862cfbaaff..032951e952 100644 --- a/website/src/config.ts +++ b/website/src/config.ts @@ -3,7 +3,14 @@ import path from 'path'; import type { z, ZodError } from 'zod'; -import { type InstanceConfig, type Schema, type WebsiteConfig, websiteConfig } from './types/config.ts'; +import { ACCESSION_FIELD, SUBMISSION_ID_FIELD } from './settings.ts'; +import { + type InstanceConfig, + type Schema, + type WebsiteConfig, + websiteConfig, + type InputField, +} from './types/config.ts'; import { type ReferenceGenomes } from './types/referencesGenomes.ts'; import { runtimeConfig, type RuntimeConfig, type ServiceUrls } from './types/runtimeConfig.ts'; @@ -92,12 +99,88 @@ export function getSchema(organism: string): Schema { return getConfig(organism).schema; } -export function getMetadataTemplateFields(organism: string): string[] { +export function getMetadataTemplateFields( + organism: string, + action: 'submit' | 'revise', +): Map { const schema = getConfig(organism).schema; - if (schema.metadataTemplate !== undefined) { - return schema.metadataTemplate; - } - return getConfig(organism).schema.inputFields.map((field) => field.name); + const baseFields: string[] = schema.metadataTemplate ?? schema.inputFields.map((field) => field.name); + const extraFields = action === 'submit' ? [SUBMISSION_ID_FIELD] : [ACCESSION_FIELD, SUBMISSION_ID_FIELD]; + const allFields = [...extraFields, ...baseFields]; + const fieldsToDisplaynames = new Map( + allFields.map((field) => [field, schema.metadata.find((metadata) => metadata.name === field)?.displayName]), + ); + return fieldsToDisplaynames; +} + +function getAccessionInputField(): InputField { + const accessionPrefix = getWebsiteConfig().accessionPrefix; + const instanceName = getWebsiteConfig().name; + return { + name: ACCESSION_FIELD, + displayName: 'Accession', + definition: `The ${instanceName} accession (without version) of the sequence you would like to revise.`, + example: `${accessionPrefix}000P97Y`, + noEdit: true, + required: true, + }; +} + +function getSubmissionIdInputField(): InputField { + return { + name: SUBMISSION_ID_FIELD, + displayName: 'Submission ID', + definition: 'FASTA ID', + guidance: + 'Your sequence identifier; should match the FASTA file header - this is used to link the metadata to the FASTA sequence', + example: 'GJP123', + noEdit: true, + required: true, + }; +} + +export function getGroupedInputFields( + organism: string, + action: 'submit' | 'revise', + excludeDuplicates: boolean = false, +): Map { + const inputFields = getConfig(organism).schema.inputFields; + const metadata = getConfig(organism).schema.metadata; + + const groups = new Map(); + + const requiredFields = inputFields.filter((meta) => meta.required); + const desiredFields = inputFields.filter((meta) => meta.desired); + + const coreFields = + action === 'submit' ? [getSubmissionIdInputField()] : [getSubmissionIdInputField(), getAccessionInputField()]; + + groups.set('Required fields', [...coreFields, ...requiredFields]); + groups.set('Desired fields', desiredFields); + if (!excludeDuplicates) groups.set('Submission details', [getSubmissionIdInputField()]); + + const fieldAlreadyAdded = (fieldName: string) => + Array.from(groups.values()) + .flatMap((fields) => fields.map((f) => f.name)) + .some((name) => name === fieldName); + + inputFields.forEach((field) => { + const metadataEntry = metadata.find((meta) => meta.name === field.name); + const header = metadataEntry?.header ?? 'Uncategorized'; + + if (!groups.has(header)) { + groups.set(header, []); + } + + // Optionally remove duplicates + if (excludeDuplicates && fieldAlreadyAdded(field.name)) { + return; + } + + groups.get(header)!.push({ ...field }); + }); + + return groups; } export function getRuntimeConfig(): RuntimeConfig { diff --git a/website/src/pages/[organism]/submission/[groupId]/revise.astro b/website/src/pages/[organism]/submission/[groupId]/revise.astro index 7f34789237..4a6e82863b 100644 --- a/website/src/pages/[organism]/submission/[groupId]/revise.astro +++ b/website/src/pages/[organism]/submission/[groupId]/revise.astro @@ -2,7 +2,7 @@ import { cleanOrganism } from '../../../../components/Navigation/cleanOrganism'; import { RevisionForm } from '../../../../components/Submission/RevisionForm'; import SubmissionPageWrapper from '../../../../components/Submission/SubmissionPageWrapper.astro'; -import { getRuntimeConfig } from '../../../../config'; +import { getGroupedInputFields, getRuntimeConfig } from '../../../../config'; import { getAccessToken } from '../../../../utils/getAccessToken'; import { getReferenceGenomesSequenceNames } from '../../../../utils/search'; import { getGroupsAndCurrentGroup } from '../../../../utils/submissionPages'; @@ -17,6 +17,8 @@ if (!cleanedOrganism) { }; } const referenceGenomeSequenceNames = getReferenceGenomesSequenceNames(cleanedOrganism.key); +const groupedInputFields = getGroupedInputFields(cleanedOrganism.key, 'revise', true); + const groupsResult = await getGroupsAndCurrentGroup(Astro.params, Astro.locals.session); const clientConfig = getRuntimeConfig().public; @@ -30,6 +32,7 @@ const clientConfig = getRuntimeConfig().public; accessToken={getAccessToken(Astro.locals.session)!} organism={organism} referenceGenomeSequenceNames={referenceGenomeSequenceNames} + metadataTemplateFields={groupedInputFields} clientConfig={clientConfig} group={group} client:load diff --git a/website/src/pages/[organism]/submission/[groupId]/submit.astro b/website/src/pages/[organism]/submission/[groupId]/submit.astro index 2b6d10819b..9af17c6a9c 100644 --- a/website/src/pages/[organism]/submission/[groupId]/submit.astro +++ b/website/src/pages/[organism]/submission/[groupId]/submit.astro @@ -2,7 +2,7 @@ import { cleanOrganism } from '../../../../components/Navigation/cleanOrganism'; import { SubmissionForm } from '../../../../components/Submission/SubmissionForm'; import SubmissionPageWrapper from '../../../../components/Submission/SubmissionPageWrapper.astro'; -import { getRuntimeConfig } from '../../../../config'; +import { getGroupedInputFields, getRuntimeConfig } from '../../../../config'; import { getAccessToken } from '../../../../utils/getAccessToken'; import { getReferenceGenomesSequenceNames } from '../../../../utils/search'; import { getGroupsAndCurrentGroup } from '../../../../utils/submissionPages'; @@ -18,6 +18,7 @@ if (!cleanedOrganism) { }; } const referenceGenomeSequenceNames = getReferenceGenomesSequenceNames(cleanedOrganism.key); +const groupedInputFields = getGroupedInputFields(cleanedOrganism.key, 'submit', true); const groupsResult = await getGroupsAndCurrentGroup(Astro.params, Astro.locals.session); @@ -36,6 +37,7 @@ Astro.response.headers.append('Expires', '0'); { @@ -16,7 +15,8 @@ export const GET: APIRoute = ({ params, request }) => { } const action: UploadAction = new URL(request.url).searchParams.get('format') === 'revise' ? 'revise' : 'submit'; - const extraFields = action === 'submit' ? [SUBMISSION_ID_FIELD] : [ACCESSION_FIELD, SUBMISSION_ID_FIELD]; + const fieldNames = getMetadataTemplateFields(organism.key, action); + const tsvTemplate = [...fieldNames.keys()].join('\t') + '\n'; const headers: Record = { 'Content-Type': 'text/tsv', // eslint-disable-line @typescript-eslint/naming-convention @@ -25,9 +25,6 @@ export const GET: APIRoute = ({ params, request }) => { const filename = `${organism.displayName.replaceAll(' ', '_')}_metadata_${action === 'revise' ? 'revision_' : ''}template.tsv`; headers['Content-Disposition'] = `attachment; filename="${filename}"`; - const fieldNames = getMetadataTemplateFields(organism.key); - const tsvTemplate = [...extraFields, ...fieldNames].join('\t') + '\n'; - return new Response(tsvTemplate, { headers, }); diff --git a/website/src/types/config.ts b/website/src/types/config.ts index 3f6846ab6b..df6262fc8b 100644 --- a/website/src/types/config.ts +++ b/website/src/types/config.ts @@ -124,6 +124,7 @@ const logoConfig = z.object({ }); export const websiteConfig = z.object({ + accessionPrefix: z.string(), organisms: z.record(instanceConfig), name: z.string(), logo: logoConfig, diff --git a/website/src/utils/groupFieldsByHeader.ts b/website/src/utils/groupFieldsByHeader.ts deleted file mode 100644 index 2436c3c578..0000000000 --- a/website/src/utils/groupFieldsByHeader.ts +++ /dev/null @@ -1,37 +0,0 @@ -import type { InputField, Metadata } from '../types/config'; - -const SUBMISSION_ID_FIELD: InputField = { - name: 'submissionId', - displayName: 'Submission ID', - definition: 'FASTA ID', - guidance: - 'Your sequence identifier; should match the FASTA file header - this is used to link the metadata to the FASTA sequence', - example: 'GJP123', - noEdit: true, - required: true, -}; - -export const groupFieldsByHeader = (inputFields: InputField[], metadata: Metadata[]): Map => { - const groups = new Map(); - - const requiredFields = inputFields.filter((meta) => meta.required); - const desiredFields = inputFields.filter((meta) => meta.desired); - - groups.set('Required fields', [...requiredFields, SUBMISSION_ID_FIELD]); - groups.set('Desired fields', desiredFields); - groups.set('Submission details', [SUBMISSION_ID_FIELD]); - - inputFields.forEach((field) => { - const metadataEntry = metadata.find((meta) => meta.name === field.name); - const header = metadataEntry?.header ?? 'Uncategorized'; - - if (!groups.has(header)) { - groups.set(header, []); - } - groups.get(header)!.push({ - ...field, - }); - }); - - return groups; -}; diff --git a/website/src/utils/stringSimilarity.ts b/website/src/utils/stringSimilarity.ts new file mode 100644 index 0000000000..5b3d5596f2 --- /dev/null +++ b/website/src/utils/stringSimilarity.ts @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 Stephen Brown + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * string-similarity-js + * Original source: https://github.com/stephenjjbrown/string-similarity-js/blob/715c1ea4c06077fc1753dad106facc64f6a5bd99/src/string-similarity.ts + */ + +/** + * Calculate similarity between two strings + * @param {string} str1 First string to match + * @param {string} str2 Second string to match + * @param {number} [substringLength=2] Optional. Length of substring to be used in calculating similarity. Default 2. + * @param {boolean} [caseSensitive=false] Optional. Whether you want to consider case in string matching. Default false; + * @returns Number between 0 and 1, with 0 being a low match score. + */ +export const stringSimilarity = ( + str1: string, + str2: string, + substringLength: number = 2, + caseSensitive: boolean = false, +) => { + if (!caseSensitive) { + str1 = str1.toLowerCase(); + str2 = str2.toLowerCase(); + } + + if (str1.length < substringLength || str2.length < substringLength) return 0; + + const map = new Map(); + for (let i = 0; i < str1.length - (substringLength - 1); i++) { + const substr1 = str1.substring(i, i + substringLength); + map.set(substr1, (map.get(substr1) ?? 0) + 1); + } + + let match = 0; + for (let j = 0; j < str2.length - (substringLength - 1); j++) { + const substr2 = str2.substring(j, j + substringLength); + const count = map.get(substr2) ?? 0; + if (count > 0) { + map.set(substr2, count - 1); + match++; + } + } + + return (match * 2) / (str1.length + str2.length - (substringLength - 1) * 2); +}; +export default stringSimilarity;