From 177d57a510d02ba8d09340edf0d287adf680622a Mon Sep 17 00:00:00 2001 From: cellgeni Date: Fri, 17 Jan 2025 12:59:31 +0000 Subject: [PATCH 01/18] Changed the script to have the same structure as `curl_ena_metadata.sh` --- scripts/curl_sra_metadata.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/curl_sra_metadata.sh b/scripts/curl_sra_metadata.sh index 97d9876..89b4376 100755 --- a/scripts/curl_sra_metadata.sh +++ b/scripts/curl_sra_metadata.sh @@ -1,7 +1,6 @@ #!/bin/bash -e -SERIES=$1 -RUNS="$SERIES.project.list" +RUNS=$1 if [[ ! -f $RUNS ]] then @@ -18,9 +17,7 @@ do WebEnv=$(grep -oP '\K[^<]+' $i.xml) QueryKey=$(grep -oP '\K[^<]+' $i.xml) rm -f $i.xml - curl "https://trace.ncbi.nlm.nih.gov/Traces/sra-db-be/sra-db-be.cgi?rettype=runinfo&WebEnv=${WebEnv}&query_key=${QueryKey}" 2> /dev/null | sed '/BioProject/d' | sed 's/,/\t/g' > $i.sra.tsv + curl "https://trace.ncbi.nlm.nih.gov/Traces/sra-db-be/sra-db-be.cgi?rettype=runinfo&WebEnv=${WebEnv}&query_key=${QueryKey}" 2> /dev/null | sed '/BioProject/d' | sed 's/,/\t/g' done -cat *.sra.tsv > $SERIES.sra.tsv - >&2 echo "CURL SRA METADATA: ALL DONE!" From b54e19b4b7c1d8308091ac1e56f080273dcb4f9f Mon Sep 17 00:00:00 2001 From: cellgeni Date: Fri, 17 Jan 2025 16:36:13 +0000 Subject: [PATCH 02/18] splited the script in separate functions and added soft fike parsing to get GSM IDs --- scripts/collect_metadata.sh | 613 ++++++++++++++++++++++++------------ 1 file changed, 410 insertions(+), 203 deletions(-) diff --git a/scripts/collect_metadata.sh b/scripts/collect_metadata.sh index 10b02c6..6f9a360 100755 --- a/scripts/collect_metadata.sh +++ b/scripts/collect_metadata.sh @@ -1,26 +1,12 @@ #!/bin/bash -SERIES=$1 -SUBSET=$2 +set -uo pipefail -META=$SERIES.ena.tsv +function download_geo_family() { + local SERIES=$1 -## this would get increasingly complicated when we support more databases -## for now its 3 main ones: GEO, ArrayExpress, and naked SRA/ENA project ID -## got rid of ffq in this version, this works faster and more to the point (but is more ENA-dependent) - -if (( $# != 1 && $# != 2 )) -then - >&2 echo "USAGE: ./collect_metadata.sh [sample_list]" - >&2 echo - >&2 echo "(requires curl_ena_metadata.sh and parse_ena_metadata.sh present in the same directory)" - exit 1 -fi - -if [[ $SERIES == GSE* ]] -then ## download the so-called soft_family file, and use it to generate same files as above - PAD=`echo $SERIES | perl -ne 's/\d{3}$/nnn/; print'` + local PAD=`echo $SERIES | perl -ne 's/\d{3}$/nnn/; print'` wget -O ${SERIES}_family.soft.gz https://ftp.ncbi.nlm.nih.gov/geo/series/$PAD/$SERIES/soft/${SERIES}_family.soft.gz ## -f overwrites the old stuff gzip -fd ${SERIES}_family.soft.gz @@ -29,18 +15,50 @@ then >&2 echo "ERROR: Failed to download ${SERIES}_family.soft file; please make sure the series you requested exists, or fix the download URL!" exit 1 fi +} + +function download_sdrf_idf_files() { + local SERIES=$1 + + wget -O $SERIES.sdrf.txt https://www.ebi.ac.uk/biostudies/files/$SERIES/$SERIES.sdrf.txt + wget -O $SERIES.idf.txt https://www.ebi.ac.uk/biostudies/files/$SERIES/$SERIES.idf.txt + + if [[ ! -s $SERIES.sdrf.txt ]] + then + >&2 echo "ERROR: Failed to download $SERIES.sdrf.txt file; please make sure the series you requested exists, or fix the download URL!" + exit 1 + fi +} + +function parse_geo_family() { + local SERIES=$1 + + ## get bioproject ID + grep Series_relation ${SERIES}_family.soft | perl -ne 'print "$1\n" if (m/(PRJ[A-Z]+\d+)/)' | sort | uniq || echo "" > $SERIES.project.list + + ## get sample IDs; samples here are GSM IDs; usually for a 10x GSM==SRS==SRX, but I haven't checked *all* of the SRA you know + awk ' + BEGIN {OFS="\t"} + # get all IDs + /\^SAMPLE/ { sample=gensub(/.*(GSM[0-9]+)/, "\\1", "g", $0) } + /Sample_geo_accession/ { geo=gensub(/.*(GSM[0-9]+)/, "\\1", "g", $0) } + /Sample_relation = SRA:/ { sra=gensub(/.*(SRX[0-9]+)/, "\\1", "g", $0) } + /Sample_relation = BioSample:/ { biosample=gensub(/.*(SAMN[0-9]+)/, "\\1", "g", $0) } + + # When all three pieces of information are found, print them as a tab-separated line + /BioSample:/ && sample && geo && sra && biosample { + print sample,geo,sra,biosample + sample=""; geo=""; sra=""; biosample="" + } + ' ${SERIES}_family.soft > $SERIES.sample.relation.list + cut -f 2 ${SERIES}.sample.relation.list > $SERIES.sample.list + cut -f 4 ${SERIES}.sample.relation.list > $SERIES.biosample.list - ## samples here are GSM IDs; usually for a 10x GSM==SRS==SRX, but I haven't checked *all* of the SRA you know - grep Sample_geo_accession ${SERIES}_family.soft | awk '{print $3}' | sort | uniq > $SERIES.sample.list - grep Series_relation ${SERIES}_family.soft | perl -ne 'print "$1\n" if (m/(PRJ[A-Z]+\d+)/)' | sort | uniq > $SERIES.project.list - ## first variable is used to spot dbGap and other problematic datasets; - ## second variable is used to find SubSeries when SuperSeries does not produce any meaningful ENA links - EXPIDS=`grep Series_relation ${SERIES}_family.soft | grep -v PRJ | wc -l` - SUBGSE=`grep Series_relation ${SERIES}_family.soft | grep SuperSeries | perl -ne 'print "$1\n" if (m/(GSE\d+)/)'` + local EXPIDS=`grep Series_relation ${SERIES}_family.soft | grep -v PRJ | wc -l` ## few sanity checks: - if [[ `cat $SERIES.project.list | wc -l` != "1" ]] + if [[ `cat $SERIES.project.list | wc -l` -gt 1 ]] then >&2 echo "WARNING: more than 1 project associated with series $SERIES! This shouldn't normally happen, do take a look." fi @@ -49,219 +67,408 @@ then then >&2 echo "WARNING: No secondary run/experiment (SRP/SRX) IDs in the family.soft file; this often happens in datasets that are restricted access (dbGap, etc)." fi +} - ## curl info about each run (SRR/ERR/DRR) from ENA and SRA APIs; v2 pulls GSM data etc - RET=1 - RET_SRA=1 - TRIES=1 - until (( $RET == 0)) - do - ./curl_ena_metadata.sh $SERIES.project.list > $SERIES.ena.tsv - RET=$? +function parse_sdrf_idf() { + local SERIES=$1 - if [[ $RET_SRA -eq 1 ]] - then - ./curl_sra_metadata.sh $SERIES - RET_SRA=$? - fi + ## samples are ERS in case of ArrayExpress. Why not ERX, you might ask? Yes, ask you might. + cat $SERIES.sdrf.txt | tr '\t' '\n' | grep "^ERS" | sort | uniq > $SERIES.sample.list +} + +function get_subseries_from_family { + local SERIES=$1 + local OUTPUT_FILE=$2 + local SUBGSE=`grep Series_relation ${SERIES}_family.soft | grep SuperSeries | perl -ne 'print "$1\n" if (m/(GSE\d+)/)'` + + ## delete output file if it exists + if [[ -f $OUTPUT_FILE ]] + then + rm $OUTPUT_FILE + fi + + ## pulls sub-series data + if [[ $SUBGSE == "" ]] + then + >&2 echo "ERROR: No GSE subseries were listed in ${SERIES}_family.soft file!" + else + for i in $SUBGSE + do + local PAD=`echo $i | perl -ne 's/\d{3}$/nnn/; print'` + wget -O ${i}_family.soft.gz https://ftp.ncbi.nlm.nih.gov/geo/series/$PAD/$i/soft/${i}_family.soft.gz + gzip -fd ${i}_family.soft.gz + grep Series_relation ${i}_family.soft | perl -ne 'print "$1\n" if (m/(PRJ[A-Z]+\d+)/)' | sort | uniq >> $OUTPUT_FILE + done + fi +} + +function download_metadata { + local SERIES=$1 + local SCRIPT=$2 + local DOWNLOAD_LIST=$3 + local OUTPUT_FILE=$4 + + local STATUS=1 + local TRIES=1 + + if [[ ! -s $DOWNLOAD_LIST ]] + then + >&2 echo "ERROR: No download list $DOWNLOAD_LIST found!" + return 1 + fi - ## this either pulls sub-series data (and replaces $SERIES.project.list with useful PRJNA* IDs), or just quits after 5 tries + + while [[ ! $STATUS -eq 0 && TRIES -le 5 ]] + do + $SCRIPT $DOWNLOAD_LIST > $OUTPUT_FILE + STATUS=$? TRIES=$((TRIES+1)) - if (( $TRIES > 5 )) - then - >&2 echo "WARNING: No ENA records can be retrieved for GEO projects listed in $SERIES.project.list!" - if [[ $SUBGSE == "" ]] + sleep 1 + done + + if [[ ! -s $OUTPUT_FILE ]] + then + >&2 echo "ERROR: Failed to download metadata for $SERIES using $SCRIPT and $DOWNLOAD_LIST!" + return 1 + else + return $STATUS + fi +} + + +function alternative_download_metadata { + local SERIES=$1 + local SCRIPT=$2 + local OUTPUT_FILE=$3 + local STATUS=1 + + ## try loading SRA metadata using subseries + >&2 echo "WARNING: replacing $SERIES.project.list with sub-series projects.." + if [[ ! -f $SERIES.subproject.list ]] + then + get_subseries_from_family "$SERIES" "$SERIES.subproject.list" + fi + + ## try loading SRA metadata using subseries + if [[ -s $SERIES.subproject.list ]] + then + download_metadata "$SERIES" "$SCRIPT" "$SERIES.subproject.list" "$OUTPUT_FILE" + STATUS=$? + fi + + ## try loading SRA using BioSample identifiers + if [ $STATUS -eq 1 ] + then + >&2 echo "WARNING: replacing $SERIES.project.list with BioSample identifiers.." + download_metadata "$SERIES" "$SCRIPT" "$SERIES.biosample.list" "$OUTPUT_FILE" + STATUS=$? + fi + + if [ $STATUS -eq 1 ] + then + >&2 echo "ERROR: Failed to download metadata for $SERIES using $SCRIPT methods!" + fi + return $STATUS +} + +function write_accessions() { + local SERIES=$1 + local SAMPLE=$2 + local SMPS=$3 + local EXPS=$4 + local RUNS=$5 + + ## check that we have all the IDs + if [[ $EXPS == "" || $RUNS == "" ]] + then + return 1 + fi + + # write the accessions to the accessions file + if [[ $SERIES == GSE* ]] + then + echo -e "$SAMPLE\t$SMPS\t$EXPS\t$RUNS" >> $SERIES.accessions.tsv + else + echo -e "-\t$SAMPLE\t$EXPS\t$RUNS" >> $SERIES.accessions.tsv + fi + return 0 +} + +function get_sample_ids() { + local SERIES=$1 + local META=$2 + local STATUS=1 + + ## delete accessions file if exists + if [[ -s $SERIES.accessions.tsv ]] + then + rm $SERIES.accessions.tsv + fi + + ## get sample, experiment, and run IDs for each sample from metadata file + if [[ -s $META ]] + then + for i in `cat $SERIES.sample.list` + do + ## get BioSample ID if possible + if [[ -s $SERIES.sample.relation.list ]] then - >&2 echo "ERROR: No GSE subseries were listed in ${SERIES}_family.soft - no alternative PRJNA* to be found, and no ENA entries can be retrieved!" + local biosample=`grep $i $SERIES.sample.relation.list | cut -f 4 | tr -d '\n'` else - >&2 echo "WARNING: replacing $SERIES.project.list with sub-series projects.." - rm $SERIES.project.list - for i in $SUBGSE - do - PAD=`echo $i | perl -ne 's/\d{3}$/nnn/; print'` - wget -O ${i}_family.soft.gz https://ftp.ncbi.nlm.nih.gov/geo/series/$PAD/$i/soft/${i}_family.soft.gz - gzip -fd ${i}_family.soft.gz - grep Series_relation ${i}_family.soft | perl -ne 'print "$1\n" if (m/(PRJ[A-Z]+\d+)/)' | sort | uniq >> $SERIES.project.list - done - - ## now, once we re-populated $SERIES.project.list, let's try to get ENA records for the new IDs.. - >&2 echo "WARNING: pulling ENA records using sub-series project identifiers.." - RET=1 - TRIES=1 - until (( $RET == 0 )) - do - ./curl_ena_metadata.sh $SERIES.project.list > $SERIES.ena.tsv - RET=$? - TRIES=$((TRIES+1)) - if (( $TRIES > 5 )) - then - >&2 echo "ERROR: Still no ENA records can be retrieved for the GEO SUBSERIES projects listed in $SERIES.project.list!" - fi - done + local biosample=$i fi - - ## ena metadata loading failed, now check if sra was loaded - if [[ $RET_SRA -eq 1 ]] + + ## try to get sample, experiment, and run IDs from metadata file using GSM + if [[ `grep $i $META` ]] then - >&2 echo "ERROR: No SRA records can be retrieved for the $SERIES, I quit!" - exit 1 + SMPS=`grep $i $META | tr '\t' '\n' | grep -P "^[SE]RS\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` + EXPS=`grep $i $META | tr '\t' '\n' | grep -P "^[SE]RX\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` + RUNS=`grep $i $META | tr '\t' '\n' | grep -P "^[SE]RR\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` + write_accessions $SERIES $i $SMPS $EXPS $RUNS + STATUS=$? + ## try to get sample, experiment, and run IDs from metadata file using BioSample + elif [[ `grep $biosample $META` ]] + then + SMPS=`grep $biosample $META | tr '\t' '\n' | grep -P "^[SE]RS\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` + EXPS=`grep $biosample $META | tr '\t' '\n' | grep -P "^[SE]RX\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` + RUNS=`grep $biosample $META | tr '\t' '\n' | grep -P "^[SE]RR\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` + write_accessions $SERIES $i $SMPS $EXPS $RUNS + STATUS=$? else - META="$SERIES.sra.tsv" - RET=0 + >&2 echo "ERROR: No experiment or run ID found for $i in $META!" + STATUS=1 + break fi - fi - sleep 1 - done - # checking if ENA metadata file is empty - if [[ ! -s $SERIES.ena.tsv ]] + ## check that we have all the IDs + if [[ $STATUS -eq 1 ]] + then + >&2 echo "WARNING: No experiment or run ID found for $i in $META!" + break + fi + done + + ## check that all samples are in accessions file and change status to 0 + if [[ `cat $SERIES.sample.list | wc -l` -eq `cut -f 1 $SERIES.accessions.tsv | wc -l` ]] + then + STATUS=0 + fi + else + >&2 echo "ERROR: No metadata file $META found!" + fi + return $STATUS +} + +subset_accessions() { + local SERIES=$1 + local SUBSET=${2:-""} + + if [[ $SUBSET != "" ]] then - META="$SERIES.sra.tsv" + >&2 echo "Narrowing down the dataset using the file $SUBSET" + >&2 echo "New list of the samples to be processed:" + >&2 cat $SUBSET + grep -f $SUBSET $SERIES.sample.list > $SERIES.sample.list.tmp + mv $SERIES.sample.list.tmp $SERIES.sample.list + grep -f $SUBSET $SERIES.accessions.tsv > $SERIES.accessions.tsv.tmp + mv $SERIES.accessions.tsv.tmp $SERIES.accessions.tsv fi +} + +subset_meta() { + local META=$1 + local SUBSET=${2:-""} + + if [[ $SUBSET != "" ]] + then + grep -f $SUBSET $META > $META.tmp + mv $META.tmp $META + fi +} + +function make_run_relation_files() { + local SERIES=$1 + + ## make run list + cut -f 4 $SERIES.accessions.tsv | tr ',' '\n' | sort | uniq > $SERIES.run.list + + ## make sample x run file + if [[ $SERIES == GSE* ]] + then + cut -f 1,4 $SERIES.accessions.tsv > $SERIES.sample_x_run.tsv + else + cut -f 2,4 $SERIES.accessions.tsv > $SERIES.sample_x_run.tsv + fi +} + +function make_util_files() { + local SERIES=$1 + local SUBSET=${2:-""} + local STATUS=1 - ## make an accession table. If you adjust the ENA curl query, column numbers will change, so beware if [[ -s $SERIES.accessions.tsv ]] then - >&2 echo "WARNING: file $SERIES.accessions.tsv exists. This shouldn't normally happen. Overwriting the file by parsing $SERIES.ena.tsv.." + >&2 echo "WARNING: file $SERIES.accessions.tsv exists. This shouldn't normally happen. Overwriting the file.." rm $SERIES.accessions.tsv fi + + ## get sample, experiment, and run IDs for each sample from SRA metadata file + if [[ $SERIES == GSE* ]] + then + get_sample_ids $SERIES $SERIES.sra.tsv + STATUS=$? + fi - for i in `cat $SERIES.sample.list` - do - ## changed this because ENA web API is inconsistent with column order.. - SMPS=`grep $i $META | tr '\t' '\n' | grep -P "^[SE]RS\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` - EXPS=`grep $i $META | tr '\t' '\n' | grep -P "^[SE]RX\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` - RUNS=`grep $i $META | tr '\t' '\n' | grep -P "^[SE]RR\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` - echo -e "$i\t$SMPS\t$EXPS\t$RUNS" >> $SERIES.accessions.tsv - done + ## get sample, experiment, and run IDs for each sample from ENA metadata file + if [[ $STATUS -eq 1 ]] + then + get_sample_ids $SERIES $SERIES.ena.tsv + STATUS=$? + fi + + if [[ $STATUS -eq 1 ]] + then + >&2 echo "ERROR: Failed to get sample, experiment, and run IDs for $SERIES using any of the available metadata files!" + exit 1 + fi + + ## subset the accessions file if a sample list is provided + subset_accessions $SERIES $SUBSET ## make few more useful metadata files - cut -f 4 $SERIES.accessions.tsv | tr ',' '\n' | sort | uniq > $SERIES.run.list - cut -f 1,4 $SERIES.accessions.tsv > $SERIES.sample_x_run.tsv -elif [[ $SERIES == E-MTAB* ]] -then - ## sdrf's are wonderful, but don't have the project ID, which is *annoying*. That's OK though, we'll go by SRS. - wget -O $SERIES.sdrf.txt https://www.ebi.ac.uk/biostudies/files/$SERIES/$SERIES.sdrf.txt - wget -O $SERIES.idf.txt https://www.ebi.ac.uk/biostudies/files/$SERIES/$SERIES.idf.txt - if [[ ! -s $SERIES.sdrf.txt ]] + make_run_relation_files $SERIES + + ## finally, classify each run into 3 major types: + ## 1) we have useable 10x paired-end files; 2) we need to get them from 10x BAM; 3) we need to get them from SRA + ## simultaneously, '$SERIES.urls.list' is generated listing all things that need to be downloaded + if [[ -s "$SERIES.ena.tsv" ]] then - >&2 echo "ERROR: Failed to download $SERIES.sdrf.txt file; please make sure the series you requested exists, or fix the download URL!" + subset_meta $SERIES.ena.tsv $SUBSET + ./parse_ena_metadata.sh $SERIES > $SERIES.parsed.tsv + elif [[ -s "$SERIES.sra.tsv" ]] + then + subset_meta $SERIES.ena.tsv $SUBSET + ./parse_sra_metadata.sh $SERIES > $SERIES.parsed.tsv + else + >&2 echo "ERROR: No metadata file found for $SERIES!" exit 1 - fi + fi +} - ## samples are ERS in case of ArrayExpress. Why not ERX, you might ask? Yes, ask you might. - cat $SERIES.sdrf.txt | tr '\t' '\n' | grep "^ERS" | sort | uniq > $SERIES.sample.list +function process_geo() { + local SERIES=$1 + local SUBSET=${2:-""} - ## curl info about each run (SRR/ERR/DRR) from ENA API; in this case we use ERS IDs - RET=1 - TRIES=1 - until (( $RET == 0 )) - do - ## for ArrayExpress, we query by sample ID because sdrf doesn't list the BioProject ID - ./curl_ena_metadata.sh $SERIES.sample.list > $SERIES.ena.tsv - RET=$? - TRIES=$((TRIES+1)) - if (( $TRIES > 5 )) - then - >&2 echo "ERROR: No ENA records can be retrieved for ArrayExpress samples $SERIES.sample.list!" - exit 1 - fi - sleep 1 - done + ## download the family file from GEO + download_geo_family $SERIES + + ## parse the family file to get the project and sample IDs + parse_geo_family $SERIES - ## make an accession table. If you adjust the ENA curl query, column numbers will change, so beware - if [[ -s $SERIES.accessions.tsv ]] - then - >&2 echo "WARNING: file $SERIES.accessions.tsv exists. This shouldn't normally happen. Overwriting the file by parsing $SERIES.ena.tsv.." - rm $SERIES.accessions.tsv - fi + ## download metadata from SRA + download_metadata "$SERIES" "./curl_sra_metadata.sh" "$SERIES.project.list" "$SERIES.sra.tsv" + local SRA_STATUS=$? + + + ## if the download failed, try alternative methods + if [ $SRA_STATUS -eq 1 ] + then + alternative_download_metadata $SERIES "./curl_sra_metadata.sh" "$SERIES.sra.tsv" + SRA_STATUS=$? + fi + + ## download metadata from ENA + download_metadata "$SERIES" "./curl_ena_metadata.sh" "$SERIES.project.list" "$SERIES.ena.tsv" + local ENA_STATUS=$? + + ## if the download failed, try alternative methods + if [ $ENA_STATUS -eq 1 ] + then + alternative_download_metadata $SERIES "./curl_ena_metadata.sh" "$SERIES.ena.tsv" + ENA_STATUS=$? + fi + + ## if both downloads failed, exit with an error + if [ $SRA_STATUS -eq 1 ] && [ $ENA_STATUS -eq 1 ] + then + >&2 echo "ERROR: Failed to download metadata for $SERIES using any of the available methods!" + exit 1 + fi + + ## make utility files + make_util_files $SERIES +} + +function process_arrayexpress { + local SERIES=$1 + local SUBSET=${2:-""} + + ## download the SDRF and IDF files from ArrayExpress + download_sdrf_idf_files $SERIES - for i in `cat $SERIES.sample.list` - do - EXPS=`grep $i $SERIES.ena.tsv | tr '\t' '\n' | grep -P "^[SE]RX\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` - RUNS=`grep $i $SERIES.ena.tsv | tr '\t' '\n' | grep -P "^[SE]RR\d+$" | uniq | tr '\n' ',' | sed "s/,$//"` - echo -e "-\t$i\t$EXPS\t$RUNS" >> $SERIES.accessions.tsv - done + ## parse the SDRF file to get the project and sample IDs + parse_sdrf_idf $SERIES - ## make few more useful metadata files - cut -f 4 $SERIES.accessions.tsv | tr ',' '\n' | sort | uniq > $SERIES.run.list - cut -f 2,4 $SERIES.accessions.tsv > $SERIES.sample_x_run.tsv -elif [[ $SERIES == PRJ* ]] -then + ## download metadata from ENA + download_metadata "$SERIES" "./curl_ena_metadata.sh" "$SERIES.sample.list" "$SERIES.ena.tsv" + local ENA_STATUS=$? + + ## if failed, exit with an error + if [ $ENA_STATUS -eq 1 ] + then + >&2 echo "ERROR: Failed to download metadata for $SERIES using any of the available methods!" + exit 1 + fi + + ## make utility files + make_util_files $SERIES $SUBSET +} + +function process_bioproject { + local SERIES=$1 + local SUBSET=${2:-""} + ## simple version of GEO processing (see above): pull all the needed metadata from ENA using PRJ* echo $SERIES > $SERIES.project.list - ## curl info about each run (SRR/ERR/DRR) from ENA API; v2 pulls GSM data etc - RET=1 - TRIES=1 - until (( $RET == 0 )) - do - ./curl_ena_metadata.sh $SERIES.project.list > $SERIES.ena.tsv - RET=$? - TRIES=$((TRIES+1)) - if (( $TRIES > 5 )) - then - >&2 echo "ERROR: No ENA records can be retrieved for BioProject(s) $SERIES.project.list!" - exit 1 - fi - sleep 1 - done + ## download metadata from ENA + download_metadata "$SERIES" "./curl_ena_metadata.sh" "$SERIES.project.list" "$SERIES.ena.tsv" + local ENA_STATUS=$? - ## make an accession table. If you adjust the ENA curl query, column numbers will change, so beware - if [[ -s $SERIES.accessions.tsv ]] - then - >&2 echo "WARNING: file $SERIES.accessions.tsv exists. This shouldn't normally happen. Overwriting the file by parsing $SERIES.ena.tsv.." - rm $SERIES.accessions.tsv - fi + ## if failed, exit with an error + if [ $ENA_STATUS -eq 1 ] + then + >&2 echo "ERROR: Failed to download metadata for $SERIES using any of the available methods!" + exit 1 + fi - ## for PRJ*, samples are SRS or ERS IDs: + ## create sample list cat $SERIES.ena.tsv | tr '\t' '\n' | grep -P "^[SE]RS\d+$" | sort | uniq > $SERIES.sample.list - for i in `cat $SERIES.sample.list` - do - EXPS=`grep $i $SERIES.ena.tsv | tr '\t' '\n' | grep -P "^[SE]RX\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` - RUNS=`grep $i $SERIES.ena.tsv | tr '\t' '\n' | grep -P "^[SE]RR\d+$" | sort | uniq | tr '\n' ',' | sed "s/,$//"` - echo -e "-\t$i\t$EXPS\t$RUNS" >> $SERIES.accessions.tsv - done + ## make utility files + make_util_files $SERIES $SUBSET +} - ## make few more useful metadata files - cut -f 4 $SERIES.accessions.tsv | tr ',' '\n' | sort | uniq > $SERIES.run.list - cut -f 2,4 $SERIES.accessions.tsv > $SERIES.sample_x_run.tsv -else - >&2 echo "ERROR: The series ID *must* start with GSE, E-MTAB, or PRJ!" - exit 1 -fi - -## if we only want a fraction of samples, make sure we subset all the relevant files -if [[ $SUBSET != "" ]] -then - >&2 echo "Narrowing down the dataset using the file $SUBSET" - >&2 echo "New list of the samples to be processed:" - >&2 cat $SUBSET - grep -f $SUBSET $SERIES.sample.list > $SERIES.sample.list.tmp - mv $SERIES.sample.list.tmp $SERIES.sample.list - grep -f $SUBSET $META > $META.tmp - mv $META.tmp $META - grep -f $SUBSET $SERIES.accessions.tsv > $SERIES.accessions.tsv.tmp - mv $SERIES.accessions.tsv.tmp $SERIES.accessions.tsv - - cut -f 4 $SERIES.accessions.tsv | tr ',' '\n' | sort | uniq > $SERIES.run.list - ## for GSE, sample=GSM; for E-MTAB/PRJ, sample=ERS/SRS - if [[ $SERIES == GSE* ]] +function main () { + if (( $# != 1 && $# != 2 )) then - cut -f 1,4 $SERIES.accessions.tsv > $SERIES.sample_x_run.tsv - else - cut -f 2,4 $SERIES.accessions.tsv > $SERIES.sample_x_run.tsv + >&2 echo "USAGE: ./collect_metadata.sh [sample_list]" + >&2 echo + >&2 echo "(requires curl_ena_metadata.sh and parse_ena_metadata.sh present in the same directory)" + exit 1 fi -fi - -## finally, classify each run into 3 major types: -## 1) we have useable 10x paired-end files; 2) we need to get them from 10x BAM; 3) we need to get them from SRA -## simultaneously, '$SERIES.urls.list' is generated listing all things that need to be downloaded -if [[ $META == "$SERIES.ena.tsv" ]] -then - ./parse_ena_metadata.sh $SERIES > $SERIES.parsed.tsv -else - ./parse_sra_metadata.sh $SERIES > $SERIES.parsed.tsv -fi + + local SERIES=$1 + local SUBSET=${2:-""} + + # Handle different series types + case "$SERIES" in + GSE*) process_geo "$SERIES" "$SUBSET" ;; + E-MTAB*) process_arrayexpress "$SERIES" "$SUBSET" ;; + PRJ*) process_bioproject "$SERIES" "$SUBSET" ;; + *) echo "ERROR: The series ID must start with GSE, E-MTAB, or PRJ!" >&2; exit 1 ;; + esac +} + +main "$@" From aa07f4a9c090d7eef96879aa3aea00697d88ae90 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Fri, 17 Jan 2025 17:08:48 +0000 Subject: [PATCH 03/18] removed alternative_download_metadata function --- scripts/collect_metadata.sh | 99 ++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/scripts/collect_metadata.sh b/scripts/collect_metadata.sh index 6f9a360..6cf8fbf 100755 --- a/scripts/collect_metadata.sh +++ b/scripts/collect_metadata.sh @@ -91,6 +91,7 @@ function get_subseries_from_family { if [[ $SUBGSE == "" ]] then >&2 echo "ERROR: No GSE subseries were listed in ${SERIES}_family.soft file!" + return 1 else for i in $SUBGSE do @@ -100,6 +101,7 @@ function get_subseries_from_family { grep Series_relation ${i}_family.soft | perl -ne 'print "$1\n" if (m/(PRJ[A-Z]+\d+)/)' | sort | uniq >> $OUTPUT_FILE done fi + return 1 } function download_metadata { @@ -135,42 +137,6 @@ function download_metadata { fi } - -function alternative_download_metadata { - local SERIES=$1 - local SCRIPT=$2 - local OUTPUT_FILE=$3 - local STATUS=1 - - ## try loading SRA metadata using subseries - >&2 echo "WARNING: replacing $SERIES.project.list with sub-series projects.." - if [[ ! -f $SERIES.subproject.list ]] - then - get_subseries_from_family "$SERIES" "$SERIES.subproject.list" - fi - - ## try loading SRA metadata using subseries - if [[ -s $SERIES.subproject.list ]] - then - download_metadata "$SERIES" "$SCRIPT" "$SERIES.subproject.list" "$OUTPUT_FILE" - STATUS=$? - fi - - ## try loading SRA using BioSample identifiers - if [ $STATUS -eq 1 ] - then - >&2 echo "WARNING: replacing $SERIES.project.list with BioSample identifiers.." - download_metadata "$SERIES" "$SCRIPT" "$SERIES.biosample.list" "$OUTPUT_FILE" - STATUS=$? - fi - - if [ $STATUS -eq 1 ] - then - >&2 echo "ERROR: Failed to download metadata for $SERIES using $SCRIPT methods!" - fi - return $STATUS -} - function write_accessions() { local SERIES=$1 local SAMPLE=$2 @@ -366,27 +332,60 @@ function process_geo() { ## parse the family file to get the project and sample IDs parse_geo_family $SERIES - ## download metadata from SRA - download_metadata "$SERIES" "./curl_sra_metadata.sh" "$SERIES.project.list" "$SERIES.sra.tsv" - local SRA_STATUS=$? + ## Try loading metadata using $SERIES.project.list + if [[ -s $SERIES.project.list ]] + then + ## download metadata from SRA + download_metadata "$SERIES" "./curl_sra_metadata.sh" "$SERIES.project.list" "$SERIES.sra.tsv" + local SRA_STATUS=$? + + ## download metadata from ENA + download_metadata "$SERIES" "./curl_ena_metadata.sh" "$SERIES.project.list" "$SERIES.ena.tsv" + local ENA_STATUS=$? + fi - ## if the download failed, try alternative methods - if [ $SRA_STATUS -eq 1 ] + ## if the download failed, try using suboroject IDs + if [ $SRA_STATUS -eq 1 ] || [ $ENA_STATUS -eq 1 ] then - alternative_download_metadata $SERIES "./curl_sra_metadata.sh" "$SERIES.sra.tsv" - SRA_STATUS=$? + >&2 echo "WARNING: replacing $SERIES.project.list with sub-series projects.." + ## get subseries from family file + get_subseries_from_family "$SERIES" "$SERIES.subproject.list" + + ## download metadata from SRA + if [ $SRA_STATUS -eq 1 ] + then + download_metadata "$SERIES" "./curl_sra_metadata.sh" "$SERIES.subproject.list" "$SERIES.sra.tsv" + SRA_STATUS=$? + fi + + ## download metadata from ENA + if [ $ENA_STATUS -eq 1 ] + then + download_metadata "$SERIES" "./curl_ena_metadata.sh" "$SERIES.subproject.list" "$SERIES.ena.tsv" + ENA_STATUS=$? + fi fi - ## download metadata from ENA - download_metadata "$SERIES" "./curl_ena_metadata.sh" "$SERIES.project.list" "$SERIES.ena.tsv" - local ENA_STATUS=$? - ## if the download failed, try alternative methods - if [ $ENA_STATUS -eq 1 ] +## if the download using subproject IDs failed, try using BioSample IDs + if [ $SRA_STATUS -eq 1 ] || [ $ENA_STATUS -eq 1 ] then - alternative_download_metadata $SERIES "./curl_ena_metadata.sh" "$SERIES.ena.tsv" - ENA_STATUS=$? + >&2 echo "WARNING: replacing $SERIES.subproject.list with BioSample IDs.." + + ## download metadata from SRA + if [ $SRA_STATUS -eq 1 ] + then + download_metadata "$SERIES" "./curl_sra_metadata.sh" "$SERIES.biosample.list" "$SERIES.sra.tsv" + SRA_STATUS=$? + fi + + ## download metadata from ENA + if [ $ENA_STATUS -eq 1 ] + then + download_metadata "$SERIES" "./curl_ena_metadata.sh" "$SERIES.biosample.list" "$SERIES.ena.tsv" + ENA_STATUS=$? + fi fi ## if both downloads failed, exit with an error From 84711b1658d4634d0aa3ec8a9ec41d36cd333b30 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Mon, 20 Jan 2025 14:50:44 +0000 Subject: [PATCH 04/18] placed SRA_STATUS variable in the begining of the function --- scripts/collect_metadata.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/collect_metadata.sh b/scripts/collect_metadata.sh index 6cf8fbf..986b44c 100755 --- a/scripts/collect_metadata.sh +++ b/scripts/collect_metadata.sh @@ -34,7 +34,7 @@ function parse_geo_family() { local SERIES=$1 ## get bioproject ID - grep Series_relation ${SERIES}_family.soft | perl -ne 'print "$1\n" if (m/(PRJ[A-Z]+\d+)/)' | sort | uniq || echo "" > $SERIES.project.list + grep Series_relation ${SERIES}_family.soft | perl -ne 'print "$1\n" if (m/(PRJ[A-Z]+\d+)/)' | sort | uniq > $SERIES.project.list ## get sample IDs; samples here are GSM IDs; usually for a 10x GSM==SRS==SRX, but I haven't checked *all* of the SRA you know awk ' @@ -325,6 +325,8 @@ function make_util_files() { function process_geo() { local SERIES=$1 local SUBSET=${2:-""} + local SRA_STATUS=1 + local ENA_STATUS=1 ## download the family file from GEO download_geo_family $SERIES @@ -337,11 +339,11 @@ function process_geo() { then ## download metadata from SRA download_metadata "$SERIES" "./curl_sra_metadata.sh" "$SERIES.project.list" "$SERIES.sra.tsv" - local SRA_STATUS=$? + SRA_STATUS=$? ## download metadata from ENA download_metadata "$SERIES" "./curl_ena_metadata.sh" "$SERIES.project.list" "$SERIES.ena.tsv" - local ENA_STATUS=$? + ENA_STATUS=$? fi From 832b5d57ae4e592ecb696fe67d5ea5c4f151a79f Mon Sep 17 00:00:00 2001 From: cellgeni Date: Mon, 20 Jan 2025 15:58:56 +0000 Subject: [PATCH 05/18] Added tests for metadata collection --- .github/workflows/test.yml | 33 ++++++++ test_data/GSE191067/GSE191067.accessions.tsv | 11 +++ test_data/GSE191067/GSE191067.parsed.tsv | 44 +++++++++++ test_data/GSE191067/GSE191067.run.list | 44 +++++++++++ test_data/GSE191067/GSE191067.sample.list | 11 +++ .../GSE191067/GSE191067.sample_x_run.tsv | 11 +++ test_data/GSE191067/GSE191067.urls.list | 44 +++++++++++ test_data/GSE250130/GSE250130.accessions.tsv | 28 +++++++ test_data/GSE250130/GSE250130.parsed.tsv | 28 +++++++ test_data/GSE250130/GSE250130.run.list | 28 +++++++ test_data/GSE250130/GSE250130.sample.list | 28 +++++++ .../GSE250130/GSE250130.sample_x_run.tsv | 28 +++++++ test_data/GSE250130/GSE250130.urls.list | 56 ++++++++++++++ test_data/GSE264508/GSE264508.accessions.tsv | 18 +++++ test_data/GSE264508/GSE264508.parsed.tsv | 42 ++++++++++ test_data/GSE264508/GSE264508.run.list | 42 ++++++++++ test_data/GSE264508/GSE264508.sample.list | 18 +++++ .../GSE264508/GSE264508.sample_x_run.tsv | 18 +++++ test_data/GSE264508/GSE264508.urls.list | 76 +++++++++++++++++++ test_data/GSE274955/GSE274955.accessions.tsv | 5 ++ test_data/GSE274955/GSE274955.parsed.tsv | 5 ++ test_data/GSE274955/GSE274955.run.list | 5 ++ test_data/GSE274955/GSE274955.sample.list | 5 ++ .../GSE274955/GSE274955.sample_x_run.tsv | 5 ++ test_data/GSE274955/GSE274955.urls.list | 5 ++ tests/test_metadata.sh | 51 +++++++++++++ 26 files changed, 689 insertions(+) create mode 100644 .github/workflows/test.yml create mode 100644 test_data/GSE191067/GSE191067.accessions.tsv create mode 100644 test_data/GSE191067/GSE191067.parsed.tsv create mode 100644 test_data/GSE191067/GSE191067.run.list create mode 100644 test_data/GSE191067/GSE191067.sample.list create mode 100644 test_data/GSE191067/GSE191067.sample_x_run.tsv create mode 100644 test_data/GSE191067/GSE191067.urls.list create mode 100644 test_data/GSE250130/GSE250130.accessions.tsv create mode 100644 test_data/GSE250130/GSE250130.parsed.tsv create mode 100644 test_data/GSE250130/GSE250130.run.list create mode 100644 test_data/GSE250130/GSE250130.sample.list create mode 100644 test_data/GSE250130/GSE250130.sample_x_run.tsv create mode 100644 test_data/GSE250130/GSE250130.urls.list create mode 100644 test_data/GSE264508/GSE264508.accessions.tsv create mode 100644 test_data/GSE264508/GSE264508.parsed.tsv create mode 100644 test_data/GSE264508/GSE264508.run.list create mode 100644 test_data/GSE264508/GSE264508.sample.list create mode 100644 test_data/GSE264508/GSE264508.sample_x_run.tsv create mode 100644 test_data/GSE264508/GSE264508.urls.list create mode 100644 test_data/GSE274955/GSE274955.accessions.tsv create mode 100644 test_data/GSE274955/GSE274955.parsed.tsv create mode 100644 test_data/GSE274955/GSE274955.run.list create mode 100644 test_data/GSE274955/GSE274955.sample.list create mode 100644 test_data/GSE274955/GSE274955.sample_x_run.tsv create mode 100644 test_data/GSE274955/GSE274955.urls.list create mode 100644 tests/test_metadata.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..a3b761c --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,33 @@ +name: Test Collect Metadata Script + +on: + push: + branches: + - main + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + include: + - series_id: GSE191067 + - series_id: GSE264508 + - series_id: GSE274955 + - series_id: GSE250130 + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up environment + run: | + sudo apt-get update + sudo apt-get install -y wget perl curl jq + + - name: Run metadata collection tests + run: | + chmod +x ./scripts/* + chmod +x ./tests/test_metadata.sh + ./tests/test_metadata.sh ${matrix.series_id} \ No newline at end of file diff --git a/test_data/GSE191067/GSE191067.accessions.tsv b/test_data/GSE191067/GSE191067.accessions.tsv new file mode 100644 index 0000000..de33c63 --- /dev/null +++ b/test_data/GSE191067/GSE191067.accessions.tsv @@ -0,0 +1,11 @@ +GSM5738231 SRS11329588 SRX13428615 SRR17249694,SRR17249695,SRR17249696,SRR17249697 +GSM5738232 SRS11329589 SRX13428616 SRR17249690,SRR17249691,SRR17249692,SRR17249693 +GSM5738233 SRS11329594 SRX13428617 SRR17249684,SRR17249685,SRR17249686,SRR17249687 +GSM5738234 SRS11329591 SRX13428618 SRR17249680,SRR17249681,SRR17249682,SRR17249683 +GSM5738235 SRS11329599 SRX13428625 SRR17249654,SRR17249655,SRR17249688,SRR17249689 +GSM5738236 SRS11329592 SRX13428619 SRR17249676,SRR17249677,SRR17249678,SRR17249679 +GSM5738237 SRS11329593 SRX13428620 SRR17249672,SRR17249673,SRR17249674,SRR17249675 +GSM5738238 SRS11329597 SRX13428621 SRR17249668,SRR17249669,SRR17249670,SRR17249671 +GSM5738239 SRS11329595 SRX13428622 SRR17249664,SRR17249665,SRR17249666,SRR17249667 +GSM5738240 SRS11329596 SRX13428623 SRR17249660,SRR17249661,SRR17249662,SRR17249663 +GSM5738241 SRS11329598 SRX13428624 SRR17249656,SRR17249657,SRR17249658,SRR17249659 diff --git a/test_data/GSE191067/GSE191067.parsed.tsv b/test_data/GSE191067/GSE191067.parsed.tsv new file mode 100644 index 0000000..f7ac3de --- /dev/null +++ b/test_data/GSE191067/GSE191067.parsed.tsv @@ -0,0 +1,44 @@ +SRR17249654 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249654/SRR17249654.lite.1 SRA +SRR17249655 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249655/SRR17249655.lite.1 SRA +SRR17249656 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249656/SRR17249656.lite.1 SRA +SRR17249657 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249657/SRR17249657.lite.1 SRA +SRR17249658 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249658/SRR17249658.lite.1 SRA +SRR17249659 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249659/SRR17249659.lite.1 SRA +SRR17249660 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249660/SRR17249660.lite.1 SRA +SRR17249661 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249661/SRR17249661.lite.1 SRA +SRR17249662 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249662/SRR17249662.lite.1 SRA +SRR17249663 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249663/SRR17249663.lite.1 SRA +SRR17249664 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249664/SRR17249664.lite.1 SRA +SRR17249665 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249665/SRR17249665.lite.1 SRA +SRR17249666 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249666/SRR17249666.lite.1 SRA +SRR17249667 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249667/SRR17249667.lite.1 SRA +SRR17249668 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249668/SRR17249668.lite.1 SRA +SRR17249669 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249669/SRR17249669.lite.1 SRA +SRR17249670 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249670/SRR17249670.lite.1 SRA +SRR17249671 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249671/SRR17249671.lite.1 SRA +SRR17249672 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249672/SRR17249672.lite.1 SRA +SRR17249673 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249673/SRR17249673.lite.1 SRA +SRR17249674 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249674/SRR17249674.lite.1 SRA +SRR17249675 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249675/SRR17249675.lite.1 SRA +SRR17249676 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249676/SRR17249676.lite.1 SRA +SRR17249677 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249677/SRR17249677.lite.1 SRA +SRR17249678 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249678/SRR17249678.lite.1 SRA +SRR17249679 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249679/SRR17249679.lite.1 SRA +SRR17249680 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249680/SRR17249680.lite.1 SRA +SRR17249681 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249681/SRR17249681.lite.1 SRA +SRR17249682 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249682/SRR17249682.lite.1 SRA +SRR17249683 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249683/SRR17249683.lite.1 SRA +SRR17249684 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249684/SRR17249684.lite.1 SRA +SRR17249685 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249685/SRR17249685.lite.1 SRA +SRR17249686 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249686/SRR17249686.lite.1 SRA +SRR17249687 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249687/SRR17249687.lite.1 SRA +SRR17249688 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249688/SRR17249688.lite.1 SRA +SRR17249689 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249689/SRR17249689.lite.1 SRA +SRR17249690 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249690/SRR17249690.lite.1 SRA +SRR17249691 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249691/SRR17249691.lite.1 SRA +SRR17249692 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249692/SRR17249692.lite.1 SRA +SRR17249693 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249693/SRR17249693.lite.1 SRA +SRR17249694 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249694/SRR17249694.lite.1 SRA +SRR17249695 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249695/SRR17249695.lite.1 SRA +SRR17249696 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249696/SRR17249696.lite.1 SRA +SRR17249697 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249697/SRR17249697.lite.1 SRA diff --git a/test_data/GSE191067/GSE191067.run.list b/test_data/GSE191067/GSE191067.run.list new file mode 100644 index 0000000..71167e4 --- /dev/null +++ b/test_data/GSE191067/GSE191067.run.list @@ -0,0 +1,44 @@ +SRR17249654 +SRR17249655 +SRR17249656 +SRR17249657 +SRR17249658 +SRR17249659 +SRR17249660 +SRR17249661 +SRR17249662 +SRR17249663 +SRR17249664 +SRR17249665 +SRR17249666 +SRR17249667 +SRR17249668 +SRR17249669 +SRR17249670 +SRR17249671 +SRR17249672 +SRR17249673 +SRR17249674 +SRR17249675 +SRR17249676 +SRR17249677 +SRR17249678 +SRR17249679 +SRR17249680 +SRR17249681 +SRR17249682 +SRR17249683 +SRR17249684 +SRR17249685 +SRR17249686 +SRR17249687 +SRR17249688 +SRR17249689 +SRR17249690 +SRR17249691 +SRR17249692 +SRR17249693 +SRR17249694 +SRR17249695 +SRR17249696 +SRR17249697 diff --git a/test_data/GSE191067/GSE191067.sample.list b/test_data/GSE191067/GSE191067.sample.list new file mode 100644 index 0000000..9d278d4 --- /dev/null +++ b/test_data/GSE191067/GSE191067.sample.list @@ -0,0 +1,11 @@ +GSM5738231 +GSM5738232 +GSM5738233 +GSM5738234 +GSM5738235 +GSM5738236 +GSM5738237 +GSM5738238 +GSM5738239 +GSM5738240 +GSM5738241 diff --git a/test_data/GSE191067/GSE191067.sample_x_run.tsv b/test_data/GSE191067/GSE191067.sample_x_run.tsv new file mode 100644 index 0000000..152687c --- /dev/null +++ b/test_data/GSE191067/GSE191067.sample_x_run.tsv @@ -0,0 +1,11 @@ +GSM5738231 SRR17249694,SRR17249695,SRR17249696,SRR17249697 +GSM5738232 SRR17249690,SRR17249691,SRR17249692,SRR17249693 +GSM5738233 SRR17249684,SRR17249685,SRR17249686,SRR17249687 +GSM5738234 SRR17249680,SRR17249681,SRR17249682,SRR17249683 +GSM5738235 SRR17249654,SRR17249655,SRR17249688,SRR17249689 +GSM5738236 SRR17249676,SRR17249677,SRR17249678,SRR17249679 +GSM5738237 SRR17249672,SRR17249673,SRR17249674,SRR17249675 +GSM5738238 SRR17249668,SRR17249669,SRR17249670,SRR17249671 +GSM5738239 SRR17249664,SRR17249665,SRR17249666,SRR17249667 +GSM5738240 SRR17249660,SRR17249661,SRR17249662,SRR17249663 +GSM5738241 SRR17249656,SRR17249657,SRR17249658,SRR17249659 diff --git a/test_data/GSE191067/GSE191067.urls.list b/test_data/GSE191067/GSE191067.urls.list new file mode 100644 index 0000000..4af51d7 --- /dev/null +++ b/test_data/GSE191067/GSE191067.urls.list @@ -0,0 +1,44 @@ +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249654/SRR17249654.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249655/SRR17249655.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249656/SRR17249656.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249657/SRR17249657.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249658/SRR17249658.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249659/SRR17249659.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249660/SRR17249660.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249661/SRR17249661.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249662/SRR17249662.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249663/SRR17249663.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249664/SRR17249664.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249665/SRR17249665.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249666/SRR17249666.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249667/SRR17249667.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249668/SRR17249668.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249669/SRR17249669.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249670/SRR17249670.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249671/SRR17249671.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249672/SRR17249672.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249673/SRR17249673.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249674/SRR17249674.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249675/SRR17249675.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249676/SRR17249676.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249677/SRR17249677.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249678/SRR17249678.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249679/SRR17249679.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249680/SRR17249680.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249681/SRR17249681.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249682/SRR17249682.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249683/SRR17249683.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249684/SRR17249684.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249685/SRR17249685.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249686/SRR17249686.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249687/SRR17249687.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249688/SRR17249688.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249689/SRR17249689.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249690/SRR17249690.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249691/SRR17249691.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249692/SRR17249692.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249693/SRR17249693.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249694/SRR17249694.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249695/SRR17249695.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249696/SRR17249696.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249697/SRR17249697.lite.1 diff --git a/test_data/GSE250130/GSE250130.accessions.tsv b/test_data/GSE250130/GSE250130.accessions.tsv new file mode 100644 index 0000000..5cba66c --- /dev/null +++ b/test_data/GSE250130/GSE250130.accessions.tsv @@ -0,0 +1,28 @@ +GSM7974277 SRS19844433 SRX22870939 SRR27190826 +GSM7974278 SRS19844434 SRX22870940 SRR27190825 +GSM7974279 SRS19844445 SRX22870951 SRR27190814 +GSM7974280 SRS19844454 SRX22870960 SRR27190805 +GSM7974281 SRS19844455 SRX22870961 SRR27190804 +GSM7974282 SRS19844456 SRX22870962 SRR27190803 +GSM7974283 SRS19844457 SRX22870963 SRR27190802 +GSM7974284 SRS19844458 SRX22870964 SRR27190801 +GSM7974285 SRS19844459 SRX22870965 SRR27190800 +GSM7974286 SRS19844460 SRX22870966 SRR27190799 +GSM7974287 SRS19844435 SRX22870941 SRR27190824 +GSM7974288 SRS19844436 SRX22870942 SRR27190823 +GSM7974289 SRS19844437 SRX22870943 SRR27190822 +GSM7974290 SRS19844438 SRX22870944 SRR27190821 +GSM7974291 SRS19844439 SRX22870945 SRR27190820 +GSM7974292 SRS19844440 SRX22870946 SRR27190819 +GSM7974293 SRS19844441 SRX22870947 SRR27190818 +GSM7974294 SRS19844442 SRX22870948 SRR27190817 +GSM7974295 SRS19844443 SRX22870949 SRR27190816 +GSM7974296 SRS19844444 SRX22870950 SRR27190815 +GSM7974297 SRS19844446 SRX22870952 SRR27190813 +GSM7974298 SRS19844447 SRX22870953 SRR27190812 +GSM7974299 SRS19844448 SRX22870954 SRR27190811 +GSM7974300 SRS19844449 SRX22870955 SRR27190810 +GSM7974301 SRS19844450 SRX22870956 SRR27190809 +GSM7974302 SRS19844451 SRX22870957 SRR27190808 +GSM7974303 SRS19844452 SRX22870958 SRR27190807 +GSM7974304 SRS19844453 SRX22870959 SRR27190806 diff --git a/test_data/GSE250130/GSE250130.parsed.tsv b/test_data/GSE250130/GSE250130.parsed.tsv new file mode 100644 index 0000000..188f269 --- /dev/null +++ b/test_data/GSE250130/GSE250130.parsed.tsv @@ -0,0 +1,28 @@ +SRR27190799 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/099/SRR27190799/SRR27190799_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/099/SRR27190799/SRR27190799_2.fastq.gz ENAFQ +SRR27190800 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/000/SRR27190800/SRR27190800_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/000/SRR27190800/SRR27190800_2.fastq.gz ENAFQ +SRR27190801 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/001/SRR27190801/SRR27190801_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/001/SRR27190801/SRR27190801_2.fastq.gz ENAFQ +SRR27190802 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/002/SRR27190802/SRR27190802_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/002/SRR27190802/SRR27190802_2.fastq.gz ENAFQ +SRR27190803 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/003/SRR27190803/SRR27190803_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/003/SRR27190803/SRR27190803_2.fastq.gz ENAFQ +SRR27190804 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/004/SRR27190804/SRR27190804_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/004/SRR27190804/SRR27190804_2.fastq.gz ENAFQ +SRR27190805 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/005/SRR27190805/SRR27190805_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/005/SRR27190805/SRR27190805_2.fastq.gz ENAFQ +SRR27190806 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/006/SRR27190806/SRR27190806_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/006/SRR27190806/SRR27190806_2.fastq.gz ENAFQ +SRR27190807 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/007/SRR27190807/SRR27190807_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/007/SRR27190807/SRR27190807_2.fastq.gz ENAFQ +SRR27190808 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/008/SRR27190808/SRR27190808_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/008/SRR27190808/SRR27190808_2.fastq.gz ENAFQ +SRR27190809 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/009/SRR27190809/SRR27190809_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/009/SRR27190809/SRR27190809_2.fastq.gz ENAFQ +SRR27190810 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/010/SRR27190810/SRR27190810_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/010/SRR27190810/SRR27190810_2.fastq.gz ENAFQ +SRR27190811 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/011/SRR27190811/SRR27190811_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/011/SRR27190811/SRR27190811_2.fastq.gz ENAFQ +SRR27190812 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/012/SRR27190812/SRR27190812_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/012/SRR27190812/SRR27190812_2.fastq.gz ENAFQ +SRR27190813 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/013/SRR27190813/SRR27190813_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/013/SRR27190813/SRR27190813_2.fastq.gz ENAFQ +SRR27190814 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/014/SRR27190814/SRR27190814_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/014/SRR27190814/SRR27190814_2.fastq.gz ENAFQ +SRR27190815 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/015/SRR27190815/SRR27190815_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/015/SRR27190815/SRR27190815_2.fastq.gz ENAFQ +SRR27190816 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/016/SRR27190816/SRR27190816_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/016/SRR27190816/SRR27190816_2.fastq.gz ENAFQ +SRR27190817 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/017/SRR27190817/SRR27190817_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/017/SRR27190817/SRR27190817_2.fastq.gz ENAFQ +SRR27190818 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/018/SRR27190818/SRR27190818_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/018/SRR27190818/SRR27190818_2.fastq.gz ENAFQ +SRR27190819 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/019/SRR27190819/SRR27190819_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/019/SRR27190819/SRR27190819_2.fastq.gz ENAFQ +SRR27190820 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/020/SRR27190820/SRR27190820_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/020/SRR27190820/SRR27190820_2.fastq.gz ENAFQ +SRR27190821 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/021/SRR27190821/SRR27190821_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/021/SRR27190821/SRR27190821_2.fastq.gz ENAFQ +SRR27190822 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/022/SRR27190822/SRR27190822_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/022/SRR27190822/SRR27190822_2.fastq.gz ENAFQ +SRR27190823 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/023/SRR27190823/SRR27190823_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/023/SRR27190823/SRR27190823_2.fastq.gz ENAFQ +SRR27190824 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/024/SRR27190824/SRR27190824_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/024/SRR27190824/SRR27190824_2.fastq.gz ENAFQ +SRR27190825 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/025/SRR27190825/SRR27190825_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/025/SRR27190825/SRR27190825_2.fastq.gz ENAFQ +SRR27190826 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/026/SRR27190826/SRR27190826_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/026/SRR27190826/SRR27190826_2.fastq.gz ENAFQ diff --git a/test_data/GSE250130/GSE250130.run.list b/test_data/GSE250130/GSE250130.run.list new file mode 100644 index 0000000..94aad8d --- /dev/null +++ b/test_data/GSE250130/GSE250130.run.list @@ -0,0 +1,28 @@ +SRR27190799 +SRR27190800 +SRR27190801 +SRR27190802 +SRR27190803 +SRR27190804 +SRR27190805 +SRR27190806 +SRR27190807 +SRR27190808 +SRR27190809 +SRR27190810 +SRR27190811 +SRR27190812 +SRR27190813 +SRR27190814 +SRR27190815 +SRR27190816 +SRR27190817 +SRR27190818 +SRR27190819 +SRR27190820 +SRR27190821 +SRR27190822 +SRR27190823 +SRR27190824 +SRR27190825 +SRR27190826 diff --git a/test_data/GSE250130/GSE250130.sample.list b/test_data/GSE250130/GSE250130.sample.list new file mode 100644 index 0000000..2a53be0 --- /dev/null +++ b/test_data/GSE250130/GSE250130.sample.list @@ -0,0 +1,28 @@ +GSM7974277 +GSM7974278 +GSM7974279 +GSM7974280 +GSM7974281 +GSM7974282 +GSM7974283 +GSM7974284 +GSM7974285 +GSM7974286 +GSM7974287 +GSM7974288 +GSM7974289 +GSM7974290 +GSM7974291 +GSM7974292 +GSM7974293 +GSM7974294 +GSM7974295 +GSM7974296 +GSM7974297 +GSM7974298 +GSM7974299 +GSM7974300 +GSM7974301 +GSM7974302 +GSM7974303 +GSM7974304 diff --git a/test_data/GSE250130/GSE250130.sample_x_run.tsv b/test_data/GSE250130/GSE250130.sample_x_run.tsv new file mode 100644 index 0000000..ba6ec14 --- /dev/null +++ b/test_data/GSE250130/GSE250130.sample_x_run.tsv @@ -0,0 +1,28 @@ +GSM7974277 SRR27190826 +GSM7974278 SRR27190825 +GSM7974279 SRR27190814 +GSM7974280 SRR27190805 +GSM7974281 SRR27190804 +GSM7974282 SRR27190803 +GSM7974283 SRR27190802 +GSM7974284 SRR27190801 +GSM7974285 SRR27190800 +GSM7974286 SRR27190799 +GSM7974287 SRR27190824 +GSM7974288 SRR27190823 +GSM7974289 SRR27190822 +GSM7974290 SRR27190821 +GSM7974291 SRR27190820 +GSM7974292 SRR27190819 +GSM7974293 SRR27190818 +GSM7974294 SRR27190817 +GSM7974295 SRR27190816 +GSM7974296 SRR27190815 +GSM7974297 SRR27190813 +GSM7974298 SRR27190812 +GSM7974299 SRR27190811 +GSM7974300 SRR27190810 +GSM7974301 SRR27190809 +GSM7974302 SRR27190808 +GSM7974303 SRR27190807 +GSM7974304 SRR27190806 diff --git a/test_data/GSE250130/GSE250130.urls.list b/test_data/GSE250130/GSE250130.urls.list new file mode 100644 index 0000000..2667a6d --- /dev/null +++ b/test_data/GSE250130/GSE250130.urls.list @@ -0,0 +1,56 @@ +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/099/SRR27190799/SRR27190799_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/099/SRR27190799/SRR27190799_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/000/SRR27190800/SRR27190800_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/000/SRR27190800/SRR27190800_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/001/SRR27190801/SRR27190801_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/001/SRR27190801/SRR27190801_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/002/SRR27190802/SRR27190802_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/002/SRR27190802/SRR27190802_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/003/SRR27190803/SRR27190803_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/003/SRR27190803/SRR27190803_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/004/SRR27190804/SRR27190804_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/004/SRR27190804/SRR27190804_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/005/SRR27190805/SRR27190805_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/005/SRR27190805/SRR27190805_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/006/SRR27190806/SRR27190806_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/006/SRR27190806/SRR27190806_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/007/SRR27190807/SRR27190807_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/007/SRR27190807/SRR27190807_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/008/SRR27190808/SRR27190808_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/008/SRR27190808/SRR27190808_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/009/SRR27190809/SRR27190809_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/009/SRR27190809/SRR27190809_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/010/SRR27190810/SRR27190810_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/010/SRR27190810/SRR27190810_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/011/SRR27190811/SRR27190811_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/011/SRR27190811/SRR27190811_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/012/SRR27190812/SRR27190812_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/012/SRR27190812/SRR27190812_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/013/SRR27190813/SRR27190813_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/013/SRR27190813/SRR27190813_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/014/SRR27190814/SRR27190814_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/014/SRR27190814/SRR27190814_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/015/SRR27190815/SRR27190815_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/015/SRR27190815/SRR27190815_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/016/SRR27190816/SRR27190816_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/016/SRR27190816/SRR27190816_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/017/SRR27190817/SRR27190817_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/017/SRR27190817/SRR27190817_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/018/SRR27190818/SRR27190818_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/018/SRR27190818/SRR27190818_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/019/SRR27190819/SRR27190819_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/019/SRR27190819/SRR27190819_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/020/SRR27190820/SRR27190820_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/020/SRR27190820/SRR27190820_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/021/SRR27190821/SRR27190821_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/021/SRR27190821/SRR27190821_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/022/SRR27190822/SRR27190822_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/022/SRR27190822/SRR27190822_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/023/SRR27190823/SRR27190823_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/023/SRR27190823/SRR27190823_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/024/SRR27190824/SRR27190824_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/024/SRR27190824/SRR27190824_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/025/SRR27190825/SRR27190825_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/025/SRR27190825/SRR27190825_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/026/SRR27190826/SRR27190826_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR271/026/SRR27190826/SRR27190826_2.fastq.gz diff --git a/test_data/GSE264508/GSE264508.accessions.tsv b/test_data/GSE264508/GSE264508.accessions.tsv new file mode 100644 index 0000000..14c6407 --- /dev/null +++ b/test_data/GSE264508/GSE264508.accessions.tsv @@ -0,0 +1,18 @@ +GSM8219160 SRS21085587 SRX24324207 SRR28758707 +GSM8219161 SRS21085570 SRX24324190 SRR28758748,SRR28758749,SRR28758750,SRR28758751 +GSM8219162 SRS21085571 SRX24324191 SRR28758747 +GSM8219163 SRS21085572 SRX24324192 SRR28758746 +GSM8219164 SRS21085573 SRX24324193 SRR28758742,SRR28758743,SRR28758744,SRR28758745 +GSM8219165 SRS21085577 SRX24324197 SRR28758729 +GSM8219166 SRS21085574 SRX24324194 SRR28758738,SRR28758739,SRR28758740,SRR28758741 +GSM8219167 SRS21085575 SRX24324195 SRR28758734,SRR28758735,SRR28758736,SRR28758737 +GSM8219168 SRS21085576 SRX24324196 SRR28758730,SRR28758731,SRR28758732,SRR28758733 +GSM8219169 SRS21085578 SRX24324198 SRR28758728 +GSM8219170 SRS21085579 SRX24324199 SRR28758727 +GSM8219171 SRS21085580 SRX24324200 SRR28758726 +GSM8219172 SRS21085581 SRX24324201 SRR28758725 +GSM8219173 SRS21085582 SRX24324202 SRR28758724 +GSM8219174 SRS21085588 SRX24324208 SRR28758706 +GSM8219175 SRS21085583 SRX24324203 SRR28758720,SRR28758721,SRR28758722,SRR28758723 +GSM8219176 SRS21085584 SRX24324204 SRR28758716,SRR28758717,SRR28758718,SRR28758719 +GSM8219177 SRS21085585 SRX24324205 SRR28758708,SRR28758709,SRR28758710,SRR28758711 diff --git a/test_data/GSE264508/GSE264508.parsed.tsv b/test_data/GSE264508/GSE264508.parsed.tsv new file mode 100644 index 0000000..51e7be7 --- /dev/null +++ b/test_data/GSE264508/GSE264508.parsed.tsv @@ -0,0 +1,42 @@ +SRR28758706 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/006/SRR28758706/SRR28758706_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/006/SRR28758706/SRR28758706_2.fastq.gz ENAFQ +SRR28758707 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/007/SRR28758707/SRR28758707_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/007/SRR28758707/SRR28758707_2.fastq.gz ENAFQ +SRR28758708 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/008/SRR28758708/SRR28758708.lite SRA +SRR28758709 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/009/SRR28758709/SRR28758709 SRA +SRR28758710 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/010/SRR28758710/SRR28758710.lite SRA +SRR28758711 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/011/SRR28758711/SRR28758711.lite SRA +SRR28758716 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/016/SRR28758716/SRR28758716_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/016/SRR28758716/SRR28758716_2.fastq.gz ENAFQ +SRR28758717 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/017/SRR28758717/SRR28758717_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/017/SRR28758717/SRR28758717_2.fastq.gz ENAFQ +SRR28758718 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/018/SRR28758718/SRR28758718_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/018/SRR28758718/SRR28758718_2.fastq.gz ENAFQ +SRR28758719 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/019/SRR28758719/SRR28758719_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/019/SRR28758719/SRR28758719_2.fastq.gz ENAFQ +SRR28758720 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/020/SRR28758720/SRR28758720_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/020/SRR28758720/SRR28758720_2.fastq.gz ENAFQ +SRR28758721 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/021/SRR28758721/SRR28758721_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/021/SRR28758721/SRR28758721_2.fastq.gz ENAFQ +SRR28758722 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/022/SRR28758722/SRR28758722_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/022/SRR28758722/SRR28758722_2.fastq.gz ENAFQ +SRR28758723 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/023/SRR28758723/SRR28758723_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/023/SRR28758723/SRR28758723_2.fastq.gz ENAFQ +SRR28758724 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/024/SRR28758724/SRR28758724_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/024/SRR28758724/SRR28758724_2.fastq.gz ENAFQ +SRR28758725 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/025/SRR28758725/SRR28758725_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/025/SRR28758725/SRR28758725_2.fastq.gz ENAFQ +SRR28758726 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/026/SRR28758726/SRR28758726_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/026/SRR28758726/SRR28758726_2.fastq.gz ENAFQ +SRR28758727 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/027/SRR28758727/SRR28758727_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/027/SRR28758727/SRR28758727_2.fastq.gz ENAFQ +SRR28758728 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/028/SRR28758728/SRR28758728_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/028/SRR28758728/SRR28758728_2.fastq.gz ENAFQ +SRR28758729 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/029/SRR28758729/SRR28758729_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/029/SRR28758729/SRR28758729_2.fastq.gz ENAFQ +SRR28758730 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/030/SRR28758730/SRR28758730_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/030/SRR28758730/SRR28758730_2.fastq.gz ENAFQ +SRR28758731 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/031/SRR28758731/SRR28758731.lite SRA +SRR28758732 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/032/SRR28758732/SRR28758732.lite SRA +SRR28758733 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/033/SRR28758733/SRR28758733 SRA +SRR28758734 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/034/SRR28758734/SRR28758734_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/034/SRR28758734/SRR28758734_2.fastq.gz ENAFQ +SRR28758735 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/035/SRR28758735/SRR28758735_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/035/SRR28758735/SRR28758735_2.fastq.gz ENAFQ +SRR28758736 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/036/SRR28758736/SRR28758736_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/036/SRR28758736/SRR28758736_2.fastq.gz ENAFQ +SRR28758737 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/037/SRR28758737/SRR28758737_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/037/SRR28758737/SRR28758737_2.fastq.gz ENAFQ +SRR28758738 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/038/SRR28758738/SRR28758738_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/038/SRR28758738/SRR28758738_2.fastq.gz ENAFQ +SRR28758739 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/039/SRR28758739/SRR28758739_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/039/SRR28758739/SRR28758739_2.fastq.gz ENAFQ +SRR28758740 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/040/SRR28758740/SRR28758740_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/040/SRR28758740/SRR28758740_2.fastq.gz ENAFQ +SRR28758741 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/041/SRR28758741/SRR28758741_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/041/SRR28758741/SRR28758741_2.fastq.gz ENAFQ +SRR28758742 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/042/SRR28758742/SRR28758742_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/042/SRR28758742/SRR28758742_2.fastq.gz ENAFQ +SRR28758743 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/043/SRR28758743/SRR28758743_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/043/SRR28758743/SRR28758743_2.fastq.gz ENAFQ +SRR28758744 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/044/SRR28758744/SRR28758744_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/044/SRR28758744/SRR28758744_2.fastq.gz ENAFQ +SRR28758745 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/045/SRR28758745/SRR28758745.lite SRA +SRR28758746 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/046/SRR28758746/SRR28758746_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/046/SRR28758746/SRR28758746_2.fastq.gz ENAFQ +SRR28758747 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/047/SRR28758747/SRR28758747_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/047/SRR28758747/SRR28758747_2.fastq.gz ENAFQ +SRR28758748 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/048/SRR28758748/SRR28758748_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/048/SRR28758748/SRR28758748_2.fastq.gz ENAFQ +SRR28758749 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/049/SRR28758749/SRR28758749_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/049/SRR28758749/SRR28758749_2.fastq.gz ENAFQ +SRR28758750 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/050/SRR28758750/SRR28758750_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/050/SRR28758750/SRR28758750_2.fastq.gz ENAFQ +SRR28758751 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/051/SRR28758751/SRR28758751_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/051/SRR28758751/SRR28758751_2.fastq.gz ENAFQ diff --git a/test_data/GSE264508/GSE264508.run.list b/test_data/GSE264508/GSE264508.run.list new file mode 100644 index 0000000..b153fef --- /dev/null +++ b/test_data/GSE264508/GSE264508.run.list @@ -0,0 +1,42 @@ +SRR28758706 +SRR28758707 +SRR28758708 +SRR28758709 +SRR28758710 +SRR28758711 +SRR28758716 +SRR28758717 +SRR28758718 +SRR28758719 +SRR28758720 +SRR28758721 +SRR28758722 +SRR28758723 +SRR28758724 +SRR28758725 +SRR28758726 +SRR28758727 +SRR28758728 +SRR28758729 +SRR28758730 +SRR28758731 +SRR28758732 +SRR28758733 +SRR28758734 +SRR28758735 +SRR28758736 +SRR28758737 +SRR28758738 +SRR28758739 +SRR28758740 +SRR28758741 +SRR28758742 +SRR28758743 +SRR28758744 +SRR28758745 +SRR28758746 +SRR28758747 +SRR28758748 +SRR28758749 +SRR28758750 +SRR28758751 diff --git a/test_data/GSE264508/GSE264508.sample.list b/test_data/GSE264508/GSE264508.sample.list new file mode 100644 index 0000000..758cb46 --- /dev/null +++ b/test_data/GSE264508/GSE264508.sample.list @@ -0,0 +1,18 @@ +GSM8219160 +GSM8219161 +GSM8219162 +GSM8219163 +GSM8219164 +GSM8219165 +GSM8219166 +GSM8219167 +GSM8219168 +GSM8219169 +GSM8219170 +GSM8219171 +GSM8219172 +GSM8219173 +GSM8219174 +GSM8219175 +GSM8219176 +GSM8219177 diff --git a/test_data/GSE264508/GSE264508.sample_x_run.tsv b/test_data/GSE264508/GSE264508.sample_x_run.tsv new file mode 100644 index 0000000..2b07044 --- /dev/null +++ b/test_data/GSE264508/GSE264508.sample_x_run.tsv @@ -0,0 +1,18 @@ +GSM8219160 SRR28758707 +GSM8219161 SRR28758748,SRR28758749,SRR28758750,SRR28758751 +GSM8219162 SRR28758747 +GSM8219163 SRR28758746 +GSM8219164 SRR28758742,SRR28758743,SRR28758744,SRR28758745 +GSM8219165 SRR28758729 +GSM8219166 SRR28758738,SRR28758739,SRR28758740,SRR28758741 +GSM8219167 SRR28758734,SRR28758735,SRR28758736,SRR28758737 +GSM8219168 SRR28758730,SRR28758731,SRR28758732,SRR28758733 +GSM8219169 SRR28758728 +GSM8219170 SRR28758727 +GSM8219171 SRR28758726 +GSM8219172 SRR28758725 +GSM8219173 SRR28758724 +GSM8219174 SRR28758706 +GSM8219175 SRR28758720,SRR28758721,SRR28758722,SRR28758723 +GSM8219176 SRR28758716,SRR28758717,SRR28758718,SRR28758719 +GSM8219177 SRR28758708,SRR28758709,SRR28758710,SRR28758711 diff --git a/test_data/GSE264508/GSE264508.urls.list b/test_data/GSE264508/GSE264508.urls.list new file mode 100644 index 0000000..9916afb --- /dev/null +++ b/test_data/GSE264508/GSE264508.urls.list @@ -0,0 +1,76 @@ +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/006/SRR28758706/SRR28758706_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/006/SRR28758706/SRR28758706_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/007/SRR28758707/SRR28758707_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/007/SRR28758707/SRR28758707_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/srr/SRR287/008/SRR28758708/SRR28758708.lite +ftp.sra.ebi.ac.uk/vol1/srr/SRR287/009/SRR28758709/SRR28758709 +ftp.sra.ebi.ac.uk/vol1/srr/SRR287/010/SRR28758710/SRR28758710.lite +ftp.sra.ebi.ac.uk/vol1/srr/SRR287/011/SRR28758711/SRR28758711.lite +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/016/SRR28758716/SRR28758716_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/016/SRR28758716/SRR28758716_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/017/SRR28758717/SRR28758717_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/017/SRR28758717/SRR28758717_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/018/SRR28758718/SRR28758718_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/018/SRR28758718/SRR28758718_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/019/SRR28758719/SRR28758719_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/019/SRR28758719/SRR28758719_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/020/SRR28758720/SRR28758720_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/020/SRR28758720/SRR28758720_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/021/SRR28758721/SRR28758721_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/021/SRR28758721/SRR28758721_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/022/SRR28758722/SRR28758722_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/022/SRR28758722/SRR28758722_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/023/SRR28758723/SRR28758723_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/023/SRR28758723/SRR28758723_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/024/SRR28758724/SRR28758724_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/024/SRR28758724/SRR28758724_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/025/SRR28758725/SRR28758725_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/025/SRR28758725/SRR28758725_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/026/SRR28758726/SRR28758726_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/026/SRR28758726/SRR28758726_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/027/SRR28758727/SRR28758727_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/027/SRR28758727/SRR28758727_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/028/SRR28758728/SRR28758728_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/028/SRR28758728/SRR28758728_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/029/SRR28758729/SRR28758729_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/029/SRR28758729/SRR28758729_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/030/SRR28758730/SRR28758730_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/030/SRR28758730/SRR28758730_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/srr/SRR287/031/SRR28758731/SRR28758731.lite +ftp.sra.ebi.ac.uk/vol1/srr/SRR287/032/SRR28758732/SRR28758732.lite +ftp.sra.ebi.ac.uk/vol1/srr/SRR287/033/SRR28758733/SRR28758733 +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/034/SRR28758734/SRR28758734_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/034/SRR28758734/SRR28758734_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/035/SRR28758735/SRR28758735_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/035/SRR28758735/SRR28758735_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/036/SRR28758736/SRR28758736_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/036/SRR28758736/SRR28758736_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/037/SRR28758737/SRR28758737_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/037/SRR28758737/SRR28758737_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/038/SRR28758738/SRR28758738_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/038/SRR28758738/SRR28758738_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/039/SRR28758739/SRR28758739_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/039/SRR28758739/SRR28758739_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/040/SRR28758740/SRR28758740_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/040/SRR28758740/SRR28758740_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/041/SRR28758741/SRR28758741_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/041/SRR28758741/SRR28758741_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/042/SRR28758742/SRR28758742_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/042/SRR28758742/SRR28758742_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/043/SRR28758743/SRR28758743_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/043/SRR28758743/SRR28758743_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/044/SRR28758744/SRR28758744_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/044/SRR28758744/SRR28758744_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/srr/SRR287/045/SRR28758745/SRR28758745.lite +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/046/SRR28758746/SRR28758746_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/046/SRR28758746/SRR28758746_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/047/SRR28758747/SRR28758747_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/047/SRR28758747/SRR28758747_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/048/SRR28758748/SRR28758748_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/048/SRR28758748/SRR28758748_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/049/SRR28758749/SRR28758749_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/049/SRR28758749/SRR28758749_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/050/SRR28758750/SRR28758750_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/050/SRR28758750/SRR28758750_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/051/SRR28758751/SRR28758751_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/051/SRR28758751/SRR28758751_2.fastq.gz diff --git a/test_data/GSE274955/GSE274955.accessions.tsv b/test_data/GSE274955/GSE274955.accessions.tsv new file mode 100644 index 0000000..eaa7d40 --- /dev/null +++ b/test_data/GSE274955/GSE274955.accessions.tsv @@ -0,0 +1,5 @@ +GSM8462812 SRS22358709 SRX25717812 SRR30256652 +GSM8462813 SRS22358710 SRX25717813 SRR30256651 +GSM8462814 SRS22358713 SRX25717814 SRR30256650 +GSM8462815 SRS22358712 SRX25717815 SRR30256649 +GSM8462816 SRS22358714 SRX25717816 SRR30256648 diff --git a/test_data/GSE274955/GSE274955.parsed.tsv b/test_data/GSE274955/GSE274955.parsed.tsv new file mode 100644 index 0000000..b951d24 --- /dev/null +++ b/test_data/GSE274955/GSE274955.parsed.tsv @@ -0,0 +1,5 @@ +SRR30256648 Homo sapiens https://sra-pub-src-1.s3.amazonaws.com/SRR30256648/s6_bam.bam.1 BAM +SRR30256649 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR302/049/SRR30256649/s5_bam.bam BAM +SRR30256650 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR302/050/SRR30256650/s4_bam.bam BAM +SRR30256651 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR302/051/SRR30256651/s3_bam.bam BAM +SRR30256652 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR302/052/SRR30256652/s2_bam.bam BAM diff --git a/test_data/GSE274955/GSE274955.run.list b/test_data/GSE274955/GSE274955.run.list new file mode 100644 index 0000000..10bed13 --- /dev/null +++ b/test_data/GSE274955/GSE274955.run.list @@ -0,0 +1,5 @@ +SRR30256648 +SRR30256649 +SRR30256650 +SRR30256651 +SRR30256652 diff --git a/test_data/GSE274955/GSE274955.sample.list b/test_data/GSE274955/GSE274955.sample.list new file mode 100644 index 0000000..88c6e99 --- /dev/null +++ b/test_data/GSE274955/GSE274955.sample.list @@ -0,0 +1,5 @@ +GSM8462812 +GSM8462813 +GSM8462814 +GSM8462815 +GSM8462816 diff --git a/test_data/GSE274955/GSE274955.sample_x_run.tsv b/test_data/GSE274955/GSE274955.sample_x_run.tsv new file mode 100644 index 0000000..28fe9d6 --- /dev/null +++ b/test_data/GSE274955/GSE274955.sample_x_run.tsv @@ -0,0 +1,5 @@ +GSM8462812 SRR30256652 +GSM8462813 SRR30256651 +GSM8462814 SRR30256650 +GSM8462815 SRR30256649 +GSM8462816 SRR30256648 diff --git a/test_data/GSE274955/GSE274955.urls.list b/test_data/GSE274955/GSE274955.urls.list new file mode 100644 index 0000000..642e985 --- /dev/null +++ b/test_data/GSE274955/GSE274955.urls.list @@ -0,0 +1,5 @@ +https://sra-pub-src-1.s3.amazonaws.com/SRR30256648/s6_bam.bam.1 +ftp.sra.ebi.ac.uk/vol1/srr/SRR302/049/SRR30256649/s5_bam.bam +ftp.sra.ebi.ac.uk/vol1/srr/SRR302/050/SRR30256650/s4_bam.bam +ftp.sra.ebi.ac.uk/vol1/srr/SRR302/051/SRR30256651/s3_bam.bam +ftp.sra.ebi.ac.uk/vol1/srr/SRR302/052/SRR30256652/s2_bam.bam diff --git a/tests/test_metadata.sh b/tests/test_metadata.sh new file mode 100644 index 0000000..491e74d --- /dev/null +++ b/tests/test_metadata.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -e + +if [ "$#" -lt 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +SERIES=$1 +SAMPLE_LIST=${2:-""} +OUTPUT_DIR="output/$SERIES" + +# Create output directory and copy all scripts +mkdir -p $OUTPUT_DIR +cp ./scripts/* $OUTPUT_DIR +cd $OUTPUT_DIR + +# Load metadata +echo "Loading metadata for $SERIES $SAMPLE_LIST" +./collect_metadata.sh $SERIES $SAMPLE_LIST + +# Test output +for file in test_data/$SERIES/* +do + filename=$(basename $file) + echo "Testing $file" + + # Check if the expected output file is created + if [ ! -f $filename ] + then + echo "❌ERROR: Expected output file $filename not found!" + exit 1 + fi + + # Check if the file is not empty + if [ ! -s $filename ] + then + echo "❌ERROR: Output file $filename is empty!" + exit 1 + fi + + # Compare the actual output with the expected output + if ! diff -q $filename $file + then + echo "❌ERROR: Output file $filename does not match expected output!" + exit 1 + fi +done + +# Print Success +echo "✅$SERIES: All tests passed!" From 76357b585ea1c11464a2b07c85a46495639aab6e Mon Sep 17 00:00:00 2001 From: cellgeni Date: Mon, 20 Jan 2025 16:08:43 +0000 Subject: [PATCH 06/18] fixed parameter substitution --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a3b761c..e8d7704 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,4 +30,4 @@ jobs: run: | chmod +x ./scripts/* chmod +x ./tests/test_metadata.sh - ./tests/test_metadata.sh ${matrix.series_id} \ No newline at end of file + ./tests/test_metadata.sh ${{ matrix.series_id }} \ No newline at end of file From fe2a7831694701b5b326f02721c8e695a2904e91 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Mon, 20 Jan 2025 16:32:21 +0000 Subject: [PATCH 07/18] Added full paths --- tests/test_metadata.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_metadata.sh b/tests/test_metadata.sh index 491e74d..bed7a39 100644 --- a/tests/test_metadata.sh +++ b/tests/test_metadata.sh @@ -8,7 +8,7 @@ fi SERIES=$1 SAMPLE_LIST=${2:-""} -OUTPUT_DIR="output/$SERIES" +OUTPUT_DIR="${GITHUB_WORKSPACE}/output/$SERIES" # Create output directory and copy all scripts mkdir -p $OUTPUT_DIR @@ -20,7 +20,7 @@ echo "Loading metadata for $SERIES $SAMPLE_LIST" ./collect_metadata.sh $SERIES $SAMPLE_LIST # Test output -for file in test_data/$SERIES/* +for file in ${GITHUB_WORKSPACE}/test_data/$SERIES/* do filename=$(basename $file) echo "Testing $file" @@ -40,7 +40,7 @@ do fi # Compare the actual output with the expected output - if ! diff -q $filename $file + if [[ ! diff -q $filename $file ]] then echo "❌ERROR: Output file $filename does not match expected output!" exit 1 From 710cbeef062317f4a6e439b2d47951e3e9012703 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Mon, 20 Jan 2025 16:34:37 +0000 Subject: [PATCH 08/18] fixed diff command input --- tests/test_metadata.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_metadata.sh b/tests/test_metadata.sh index bed7a39..2231a80 100644 --- a/tests/test_metadata.sh +++ b/tests/test_metadata.sh @@ -40,7 +40,7 @@ do fi # Compare the actual output with the expected output - if [[ ! diff -q $filename $file ]] + if ! diff -q $filename $file then echo "❌ERROR: Output file $filename does not match expected output!" exit 1 From 58ed938daed752e2a2db629773f4bc0e3cd8fd73 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Tue, 21 Jan 2025 13:12:55 +0000 Subject: [PATCH 09/18] Fixed sample search in `familty.soft` file. Fixed subseting issue for GEO samples --- scripts/collect_metadata.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/collect_metadata.sh b/scripts/collect_metadata.sh index 986b44c..1099eb1 100755 --- a/scripts/collect_metadata.sh +++ b/scripts/collect_metadata.sh @@ -46,7 +46,7 @@ function parse_geo_family() { /Sample_relation = BioSample:/ { biosample=gensub(/.*(SAMN[0-9]+)/, "\\1", "g", $0) } # When all three pieces of information are found, print them as a tab-separated line - /BioSample:/ && sample && geo && sra && biosample { + sample && geo && sra && biosample { print sample,geo,sra,biosample sample=""; geo=""; sra=""; biosample="" } @@ -398,7 +398,7 @@ function process_geo() { fi ## make utility files - make_util_files $SERIES + make_util_files $SERIES $SUBSET } function process_arrayexpress { From d83766ce1a82525aae243f5f94c9b2bd588fa693 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Tue, 21 Jan 2025 13:20:49 +0000 Subject: [PATCH 10/18] Updated metadata files after last fix --- test_data/GSE191067/GSE191067.accessions.tsv | 1 + test_data/GSE191067/GSE191067.parsed.tsv | 4 ++++ test_data/GSE191067/GSE191067.run.list | 4 ++++ test_data/GSE191067/GSE191067.sample.list | 1 + test_data/GSE191067/GSE191067.sample_x_run.tsv | 1 + test_data/GSE191067/GSE191067.urls.list | 4 ++++ test_data/GSE264508/GSE264508.accessions.tsv | 1 + test_data/GSE264508/GSE264508.parsed.tsv | 4 ++++ test_data/GSE264508/GSE264508.run.list | 4 ++++ test_data/GSE264508/GSE264508.sample.list | 1 + test_data/GSE264508/GSE264508.sample_x_run.tsv | 1 + test_data/GSE264508/GSE264508.urls.list | 8 ++++++++ test_data/GSE274955/GSE274955.accessions.tsv | 1 + test_data/GSE274955/GSE274955.parsed.tsv | 1 + test_data/GSE274955/GSE274955.run.list | 1 + test_data/GSE274955/GSE274955.sample.list | 1 + test_data/GSE274955/GSE274955.sample_x_run.tsv | 1 + test_data/GSE274955/GSE274955.urls.list | 1 + 18 files changed, 40 insertions(+) diff --git a/test_data/GSE191067/GSE191067.accessions.tsv b/test_data/GSE191067/GSE191067.accessions.tsv index de33c63..aca94de 100644 --- a/test_data/GSE191067/GSE191067.accessions.tsv +++ b/test_data/GSE191067/GSE191067.accessions.tsv @@ -1,3 +1,4 @@ +GSM5738230 SRS11329590 SRX13428614 SRR17249698,SRR17249699,SRR17249700,SRR17249701 GSM5738231 SRS11329588 SRX13428615 SRR17249694,SRR17249695,SRR17249696,SRR17249697 GSM5738232 SRS11329589 SRX13428616 SRR17249690,SRR17249691,SRR17249692,SRR17249693 GSM5738233 SRS11329594 SRX13428617 SRR17249684,SRR17249685,SRR17249686,SRR17249687 diff --git a/test_data/GSE191067/GSE191067.parsed.tsv b/test_data/GSE191067/GSE191067.parsed.tsv index f7ac3de..0ab62ff 100644 --- a/test_data/GSE191067/GSE191067.parsed.tsv +++ b/test_data/GSE191067/GSE191067.parsed.tsv @@ -42,3 +42,7 @@ SRR17249694 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-p SRR17249695 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249695/SRR17249695.lite.1 SRA SRR17249696 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249696/SRR17249696.lite.1 SRA SRR17249697 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249697/SRR17249697.lite.1 SRA +SRR17249698 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249698/SRR17249698.lite.1 SRA +SRR17249699 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249699/SRR17249699.lite.1 SRA +SRR17249700 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249700/SRR17249700.lite.1 SRA +SRR17249701 Homo sapiens https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249701/SRR17249701.lite.1 SRA diff --git a/test_data/GSE191067/GSE191067.run.list b/test_data/GSE191067/GSE191067.run.list index 71167e4..26fb148 100644 --- a/test_data/GSE191067/GSE191067.run.list +++ b/test_data/GSE191067/GSE191067.run.list @@ -42,3 +42,7 @@ SRR17249694 SRR17249695 SRR17249696 SRR17249697 +SRR17249698 +SRR17249699 +SRR17249700 +SRR17249701 diff --git a/test_data/GSE191067/GSE191067.sample.list b/test_data/GSE191067/GSE191067.sample.list index 9d278d4..1a185ec 100644 --- a/test_data/GSE191067/GSE191067.sample.list +++ b/test_data/GSE191067/GSE191067.sample.list @@ -1,3 +1,4 @@ +GSM5738230 GSM5738231 GSM5738232 GSM5738233 diff --git a/test_data/GSE191067/GSE191067.sample_x_run.tsv b/test_data/GSE191067/GSE191067.sample_x_run.tsv index 152687c..b88fbb1 100644 --- a/test_data/GSE191067/GSE191067.sample_x_run.tsv +++ b/test_data/GSE191067/GSE191067.sample_x_run.tsv @@ -1,3 +1,4 @@ +GSM5738230 SRR17249698,SRR17249699,SRR17249700,SRR17249701 GSM5738231 SRR17249694,SRR17249695,SRR17249696,SRR17249697 GSM5738232 SRR17249690,SRR17249691,SRR17249692,SRR17249693 GSM5738233 SRR17249684,SRR17249685,SRR17249686,SRR17249687 diff --git a/test_data/GSE191067/GSE191067.urls.list b/test_data/GSE191067/GSE191067.urls.list index 4af51d7..d93c05b 100644 --- a/test_data/GSE191067/GSE191067.urls.list +++ b/test_data/GSE191067/GSE191067.urls.list @@ -42,3 +42,7 @@ https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249695/SRR17249695.lite.1 https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249696/SRR17249696.lite.1 https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249697/SRR17249697.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249698/SRR17249698.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249699/SRR17249699.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249700/SRR17249700.lite.1 +https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos6/sra-pub-zq-40/SRR017/17249/SRR17249701/SRR17249701.lite.1 diff --git a/test_data/GSE264508/GSE264508.accessions.tsv b/test_data/GSE264508/GSE264508.accessions.tsv index 14c6407..c34ba28 100644 --- a/test_data/GSE264508/GSE264508.accessions.tsv +++ b/test_data/GSE264508/GSE264508.accessions.tsv @@ -1,3 +1,4 @@ +GSM8219159 SRS21085586 SRX24324206 SRR28758712,SRR28758713,SRR28758714,SRR28758715 GSM8219160 SRS21085587 SRX24324207 SRR28758707 GSM8219161 SRS21085570 SRX24324190 SRR28758748,SRR28758749,SRR28758750,SRR28758751 GSM8219162 SRS21085571 SRX24324191 SRR28758747 diff --git a/test_data/GSE264508/GSE264508.parsed.tsv b/test_data/GSE264508/GSE264508.parsed.tsv index 51e7be7..f1ec763 100644 --- a/test_data/GSE264508/GSE264508.parsed.tsv +++ b/test_data/GSE264508/GSE264508.parsed.tsv @@ -4,6 +4,10 @@ SRR28758708 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/008/SRR28758708/SRR28 SRR28758709 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/009/SRR28758709/SRR28758709 SRA SRR28758710 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/010/SRR28758710/SRR28758710.lite SRA SRR28758711 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR287/011/SRR28758711/SRR28758711.lite SRA +SRR28758712 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/012/SRR28758712/SRR28758712_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/012/SRR28758712/SRR28758712_2.fastq.gz ENAFQ +SRR28758713 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/013/SRR28758713/SRR28758713_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/013/SRR28758713/SRR28758713_2.fastq.gz ENAFQ +SRR28758714 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/014/SRR28758714/SRR28758714_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/014/SRR28758714/SRR28758714_2.fastq.gz ENAFQ +SRR28758715 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/015/SRR28758715/SRR28758715_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/015/SRR28758715/SRR28758715_2.fastq.gz ENAFQ SRR28758716 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/016/SRR28758716/SRR28758716_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/016/SRR28758716/SRR28758716_2.fastq.gz ENAFQ SRR28758717 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/017/SRR28758717/SRR28758717_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/017/SRR28758717/SRR28758717_2.fastq.gz ENAFQ SRR28758718 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/018/SRR28758718/SRR28758718_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/018/SRR28758718/SRR28758718_2.fastq.gz ENAFQ diff --git a/test_data/GSE264508/GSE264508.run.list b/test_data/GSE264508/GSE264508.run.list index b153fef..6087bf8 100644 --- a/test_data/GSE264508/GSE264508.run.list +++ b/test_data/GSE264508/GSE264508.run.list @@ -4,6 +4,10 @@ SRR28758708 SRR28758709 SRR28758710 SRR28758711 +SRR28758712 +SRR28758713 +SRR28758714 +SRR28758715 SRR28758716 SRR28758717 SRR28758718 diff --git a/test_data/GSE264508/GSE264508.sample.list b/test_data/GSE264508/GSE264508.sample.list index 758cb46..fb08cdb 100644 --- a/test_data/GSE264508/GSE264508.sample.list +++ b/test_data/GSE264508/GSE264508.sample.list @@ -1,3 +1,4 @@ +GSM8219159 GSM8219160 GSM8219161 GSM8219162 diff --git a/test_data/GSE264508/GSE264508.sample_x_run.tsv b/test_data/GSE264508/GSE264508.sample_x_run.tsv index 2b07044..1a7acd1 100644 --- a/test_data/GSE264508/GSE264508.sample_x_run.tsv +++ b/test_data/GSE264508/GSE264508.sample_x_run.tsv @@ -1,3 +1,4 @@ +GSM8219159 SRR28758712,SRR28758713,SRR28758714,SRR28758715 GSM8219160 SRR28758707 GSM8219161 SRR28758748,SRR28758749,SRR28758750,SRR28758751 GSM8219162 SRR28758747 diff --git a/test_data/GSE264508/GSE264508.urls.list b/test_data/GSE264508/GSE264508.urls.list index 9916afb..5143953 100644 --- a/test_data/GSE264508/GSE264508.urls.list +++ b/test_data/GSE264508/GSE264508.urls.list @@ -6,6 +6,14 @@ ftp.sra.ebi.ac.uk/vol1/srr/SRR287/008/SRR28758708/SRR28758708.lite ftp.sra.ebi.ac.uk/vol1/srr/SRR287/009/SRR28758709/SRR28758709 ftp.sra.ebi.ac.uk/vol1/srr/SRR287/010/SRR28758710/SRR28758710.lite ftp.sra.ebi.ac.uk/vol1/srr/SRR287/011/SRR28758711/SRR28758711.lite +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/012/SRR28758712/SRR28758712_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/012/SRR28758712/SRR28758712_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/013/SRR28758713/SRR28758713_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/013/SRR28758713/SRR28758713_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/014/SRR28758714/SRR28758714_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/014/SRR28758714/SRR28758714_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/015/SRR28758715/SRR28758715_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/015/SRR28758715/SRR28758715_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/016/SRR28758716/SRR28758716_1.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/016/SRR28758716/SRR28758716_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/fastq/SRR287/017/SRR28758717/SRR28758717_1.fastq.gz diff --git a/test_data/GSE274955/GSE274955.accessions.tsv b/test_data/GSE274955/GSE274955.accessions.tsv index eaa7d40..6d7ca83 100644 --- a/test_data/GSE274955/GSE274955.accessions.tsv +++ b/test_data/GSE274955/GSE274955.accessions.tsv @@ -1,3 +1,4 @@ +GSM8462811 SRS22358711 SRX25717811 SRR30256653 GSM8462812 SRS22358709 SRX25717812 SRR30256652 GSM8462813 SRS22358710 SRX25717813 SRR30256651 GSM8462814 SRS22358713 SRX25717814 SRR30256650 diff --git a/test_data/GSE274955/GSE274955.parsed.tsv b/test_data/GSE274955/GSE274955.parsed.tsv index b951d24..beb39c8 100644 --- a/test_data/GSE274955/GSE274955.parsed.tsv +++ b/test_data/GSE274955/GSE274955.parsed.tsv @@ -3,3 +3,4 @@ SRR30256649 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR302/049/SRR30256649/s5_ba SRR30256650 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR302/050/SRR30256650/s4_bam.bam BAM SRR30256651 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR302/051/SRR30256651/s3_bam.bam BAM SRR30256652 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR302/052/SRR30256652/s2_bam.bam BAM +SRR30256653 Homo sapiens https://sra-pub-src-1.s3.amazonaws.com/SRR30256653/s1_bam.bam.1 BAM diff --git a/test_data/GSE274955/GSE274955.run.list b/test_data/GSE274955/GSE274955.run.list index 10bed13..516bd27 100644 --- a/test_data/GSE274955/GSE274955.run.list +++ b/test_data/GSE274955/GSE274955.run.list @@ -3,3 +3,4 @@ SRR30256649 SRR30256650 SRR30256651 SRR30256652 +SRR30256653 diff --git a/test_data/GSE274955/GSE274955.sample.list b/test_data/GSE274955/GSE274955.sample.list index 88c6e99..beb51f6 100644 --- a/test_data/GSE274955/GSE274955.sample.list +++ b/test_data/GSE274955/GSE274955.sample.list @@ -1,3 +1,4 @@ +GSM8462811 GSM8462812 GSM8462813 GSM8462814 diff --git a/test_data/GSE274955/GSE274955.sample_x_run.tsv b/test_data/GSE274955/GSE274955.sample_x_run.tsv index 28fe9d6..8708493 100644 --- a/test_data/GSE274955/GSE274955.sample_x_run.tsv +++ b/test_data/GSE274955/GSE274955.sample_x_run.tsv @@ -1,3 +1,4 @@ +GSM8462811 SRR30256653 GSM8462812 SRR30256652 GSM8462813 SRR30256651 GSM8462814 SRR30256650 diff --git a/test_data/GSE274955/GSE274955.urls.list b/test_data/GSE274955/GSE274955.urls.list index 642e985..3534e33 100644 --- a/test_data/GSE274955/GSE274955.urls.list +++ b/test_data/GSE274955/GSE274955.urls.list @@ -3,3 +3,4 @@ ftp.sra.ebi.ac.uk/vol1/srr/SRR302/049/SRR30256649/s5_bam.bam ftp.sra.ebi.ac.uk/vol1/srr/SRR302/050/SRR30256650/s4_bam.bam ftp.sra.ebi.ac.uk/vol1/srr/SRR302/051/SRR30256651/s3_bam.bam ftp.sra.ebi.ac.uk/vol1/srr/SRR302/052/SRR30256652/s2_bam.bam +https://sra-pub-src-1.s3.amazonaws.com/SRR30256653/s1_bam.bam.1 From 89c2d0536e69922a7b11f6aeea322f9e62fb0da6 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Tue, 21 Jan 2025 13:21:06 +0000 Subject: [PATCH 11/18] Added more test datasets --- .../E-MTAB-9221/E-MTAB-9221.accessions.tsv | 10 + test_data/E-MTAB-9221/E-MTAB-9221.parsed.tsv | 20 ++ test_data/E-MTAB-9221/E-MTAB-9221.run.list | 20 ++ test_data/E-MTAB-9221/E-MTAB-9221.sample.list | 10 + .../E-MTAB-9221/E-MTAB-9221.sample_x_run.tsv | 10 + test_data/E-MTAB-9221/E-MTAB-9221.urls.list | 40 +++ test_data/GSE111360/GSE111360.accessions.tsv | 17 ++ test_data/GSE111360/GSE111360.parsed.tsv | 23 ++ test_data/GSE111360/GSE111360.run.list | 23 ++ test_data/GSE111360/GSE111360.sample.list | 17 ++ .../GSE111360/GSE111360.sample_x_run.tsv | 17 ++ test_data/GSE111360/GSE111360.subset.list | 17 ++ test_data/GSE111360/GSE111360.urls.list | 46 ++++ test_data/GSE117988/GSE117988.accessions.tsv | 6 + test_data/GSE117988/GSE117988.parsed.tsv | 6 + test_data/GSE117988/GSE117988.run.list | 6 + test_data/GSE117988/GSE117988.sample.list | 6 + .../GSE117988/GSE117988.sample_x_run.tsv | 6 + test_data/GSE117988/GSE117988.urls.list | 6 + test_data/GSE160513/GSE160513.accessions.tsv | 5 + test_data/GSE160513/GSE160513.parsed.tsv | 36 +++ test_data/GSE160513/GSE160513.run.list | 36 +++ test_data/GSE160513/GSE160513.sample.list | 5 + .../GSE160513/GSE160513.sample_x_run.tsv | 5 + test_data/GSE160513/GSE160513.urls.list | 72 +++++ .../PRJNA511433/PRJNA511433.accessions.tsv | 8 + test_data/PRJNA511433/PRJNA511433.parsed.tsv | 128 +++++++++ test_data/PRJNA511433/PRJNA511433.run.list | 128 +++++++++ test_data/PRJNA511433/PRJNA511433.sample.list | 8 + .../PRJNA511433/PRJNA511433.sample_x_run.tsv | 8 + test_data/PRJNA511433/PRJNA511433.urls.list | 256 ++++++++++++++++++ 31 files changed, 1001 insertions(+) create mode 100644 test_data/E-MTAB-9221/E-MTAB-9221.accessions.tsv create mode 100644 test_data/E-MTAB-9221/E-MTAB-9221.parsed.tsv create mode 100644 test_data/E-MTAB-9221/E-MTAB-9221.run.list create mode 100644 test_data/E-MTAB-9221/E-MTAB-9221.sample.list create mode 100644 test_data/E-MTAB-9221/E-MTAB-9221.sample_x_run.tsv create mode 100644 test_data/E-MTAB-9221/E-MTAB-9221.urls.list create mode 100644 test_data/GSE111360/GSE111360.accessions.tsv create mode 100644 test_data/GSE111360/GSE111360.parsed.tsv create mode 100644 test_data/GSE111360/GSE111360.run.list create mode 100644 test_data/GSE111360/GSE111360.sample.list create mode 100644 test_data/GSE111360/GSE111360.sample_x_run.tsv create mode 100644 test_data/GSE111360/GSE111360.subset.list create mode 100644 test_data/GSE111360/GSE111360.urls.list create mode 100644 test_data/GSE117988/GSE117988.accessions.tsv create mode 100644 test_data/GSE117988/GSE117988.parsed.tsv create mode 100644 test_data/GSE117988/GSE117988.run.list create mode 100644 test_data/GSE117988/GSE117988.sample.list create mode 100644 test_data/GSE117988/GSE117988.sample_x_run.tsv create mode 100644 test_data/GSE117988/GSE117988.urls.list create mode 100644 test_data/GSE160513/GSE160513.accessions.tsv create mode 100644 test_data/GSE160513/GSE160513.parsed.tsv create mode 100644 test_data/GSE160513/GSE160513.run.list create mode 100644 test_data/GSE160513/GSE160513.sample.list create mode 100644 test_data/GSE160513/GSE160513.sample_x_run.tsv create mode 100644 test_data/GSE160513/GSE160513.urls.list create mode 100644 test_data/PRJNA511433/PRJNA511433.accessions.tsv create mode 100644 test_data/PRJNA511433/PRJNA511433.parsed.tsv create mode 100644 test_data/PRJNA511433/PRJNA511433.run.list create mode 100644 test_data/PRJNA511433/PRJNA511433.sample.list create mode 100644 test_data/PRJNA511433/PRJNA511433.sample_x_run.tsv create mode 100644 test_data/PRJNA511433/PRJNA511433.urls.list diff --git a/test_data/E-MTAB-9221/E-MTAB-9221.accessions.tsv b/test_data/E-MTAB-9221/E-MTAB-9221.accessions.tsv new file mode 100644 index 0000000..cbde3f6 --- /dev/null +++ b/test_data/E-MTAB-9221/E-MTAB-9221.accessions.tsv @@ -0,0 +1,10 @@ +- ERS4689152 ERX4219953 ERR4265292,ERR4265293 +- ERS4689153 ERX4219954 ERR4265294,ERR4265295 +- ERS4689154 ERX4219955 ERR4265296,ERR4265297 +- ERS4689155 ERX4219956 ERR4265298,ERR4265299 +- ERS4689156 ERX4219957 ERR4265300,ERR4265301 +- ERS4689157 ERX4219958 ERR4265302,ERR4265303 +- ERS4689158 ERX4219959 ERR4265304,ERR4265305 +- ERS4689159 ERX4219960 ERR4265306,ERR4265307 +- ERS4689160 ERX4219962 ERR4265310,ERR4265311 +- ERS4689161 ERX4219961 ERR4265308,ERR4265309 diff --git a/test_data/E-MTAB-9221/E-MTAB-9221.parsed.tsv b/test_data/E-MTAB-9221/E-MTAB-9221.parsed.tsv new file mode 100644 index 0000000..fdbb0d3 --- /dev/null +++ b/test_data/E-MTAB-9221/E-MTAB-9221.parsed.tsv @@ -0,0 +1,20 @@ +ERR4265292 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN2_S2_L001_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN2_S2_L001_R2_001.fastq.gz ORIFQ +ERR4265293 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN2_S2_L002_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN2_S2_L002_R2_001.fastq.gz ORIFQ +ERR4265294 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN3_S3_L001_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN3_S3_L001_R2_001.fastq.gz ORIFQ +ERR4265295 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN3_S3_L002_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN3_S3_L002_R2_001.fastq.gz ORIFQ +ERR4265296 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN5_S5_L001_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN5_S5_L001_R2_001.fastq.gz ORIFQ +ERR4265297 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN5_S5_L002_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN5_S5_L002_R2_001.fastq.gz ORIFQ +ERR4265298 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_S4_L001_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_S4_L001_R2_001.fastq.gz ORIFQ +ERR4265299 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_S4_L002_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_S4_L002_R2_001.fastq.gz ORIFQ +ERR4265300 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_j10_S2_L001_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_j10_S2_L001_R2_001.fastq.gz ORIFQ +ERR4265301 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_j10_S2_L002_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_j10_S2_L002_R2_001.fastq.gz ORIFQ +ERR4265302 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_S1_L001_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_S1_L001_R2_001.fastq.gz ORIFQ +ERR4265303 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_S1_L002_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_S1_L002_R2_001.fastq.gz ORIFQ +ERR4265304 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_j10_S1_L001_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_j10_S1_L001_R2_001.fastq.gz ORIFQ +ERR4265305 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_j10_S1_L002_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_j10_S1_L002_R2_001.fastq.gz ORIFQ +ERR4265306 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_S6_L001_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_S6_L001_R2_001.fastq.gz ORIFQ +ERR4265307 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_S6_L002_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_S6_L002_R2_001.fastq.gz ORIFQ +ERR4265308 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN7_j17_S4_L001_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN7_j17_S4_L001_R2_001.fastq.gz ORIFQ +ERR4265309 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN7_j17_S4_L002_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN7_j17_S4_L002_R2_001.fastq.gz ORIFQ +ERR4265310 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_j10_S3_L001_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_j10_S3_L001_R2_001.fastq.gz ORIFQ +ERR4265311 Homo sapiens ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_j10_S3_L002_R1_001.fastq.gz;ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_j10_S3_L002_R2_001.fastq.gz ORIFQ diff --git a/test_data/E-MTAB-9221/E-MTAB-9221.run.list b/test_data/E-MTAB-9221/E-MTAB-9221.run.list new file mode 100644 index 0000000..d6a8480 --- /dev/null +++ b/test_data/E-MTAB-9221/E-MTAB-9221.run.list @@ -0,0 +1,20 @@ +ERR4265292 +ERR4265293 +ERR4265294 +ERR4265295 +ERR4265296 +ERR4265297 +ERR4265298 +ERR4265299 +ERR4265300 +ERR4265301 +ERR4265302 +ERR4265303 +ERR4265304 +ERR4265305 +ERR4265306 +ERR4265307 +ERR4265308 +ERR4265309 +ERR4265310 +ERR4265311 diff --git a/test_data/E-MTAB-9221/E-MTAB-9221.sample.list b/test_data/E-MTAB-9221/E-MTAB-9221.sample.list new file mode 100644 index 0000000..e1162da --- /dev/null +++ b/test_data/E-MTAB-9221/E-MTAB-9221.sample.list @@ -0,0 +1,10 @@ +ERS4689152 +ERS4689153 +ERS4689154 +ERS4689155 +ERS4689156 +ERS4689157 +ERS4689158 +ERS4689159 +ERS4689160 +ERS4689161 diff --git a/test_data/E-MTAB-9221/E-MTAB-9221.sample_x_run.tsv b/test_data/E-MTAB-9221/E-MTAB-9221.sample_x_run.tsv new file mode 100644 index 0000000..6f26ad5 --- /dev/null +++ b/test_data/E-MTAB-9221/E-MTAB-9221.sample_x_run.tsv @@ -0,0 +1,10 @@ +ERS4689152 ERR4265292,ERR4265293 +ERS4689153 ERR4265294,ERR4265295 +ERS4689154 ERR4265296,ERR4265297 +ERS4689155 ERR4265298,ERR4265299 +ERS4689156 ERR4265300,ERR4265301 +ERS4689157 ERR4265302,ERR4265303 +ERS4689158 ERR4265304,ERR4265305 +ERS4689159 ERR4265306,ERR4265307 +ERS4689160 ERR4265310,ERR4265311 +ERS4689161 ERR4265308,ERR4265309 diff --git a/test_data/E-MTAB-9221/E-MTAB-9221.urls.list b/test_data/E-MTAB-9221/E-MTAB-9221.urls.list new file mode 100644 index 0000000..bbe8ef9 --- /dev/null +++ b/test_data/E-MTAB-9221/E-MTAB-9221.urls.list @@ -0,0 +1,40 @@ +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN2_S2_L001_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN2_S2_L001_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN2_S2_L002_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN2_S2_L002_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN3_S3_L001_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN3_S3_L001_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN3_S3_L002_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN3_S3_L002_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN5_S5_L001_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN5_S5_L001_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN5_S5_L002_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN5_S5_L002_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_S4_L001_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_S4_L001_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_S4_L002_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_S4_L002_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_j10_S2_L001_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_j10_S2_L001_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_j10_S2_L002_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN4_j10_S2_L002_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_S1_L001_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_S1_L001_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_S1_L002_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_S1_L002_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_j10_S1_L001_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_j10_S1_L001_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_j10_S1_L002_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN1_j10_S1_L002_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_S6_L001_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_S6_L001_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_S6_L002_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_S6_L002_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN7_j17_S4_L001_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN7_j17_S4_L001_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN7_j17_S4_L002_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN7_j17_S4_L002_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_j10_S3_L001_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_j10_S3_L001_R2_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_j10_S3_L002_R1_001.fastq.gz +ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-9221/UPN6_j10_S3_L002_R2_001.fastq.gz diff --git a/test_data/GSE111360/GSE111360.accessions.tsv b/test_data/GSE111360/GSE111360.accessions.tsv new file mode 100644 index 0000000..ace7ad4 --- /dev/null +++ b/test_data/GSE111360/GSE111360.accessions.tsv @@ -0,0 +1,17 @@ +GSM3029087 SRS3013127 SRX3757618 SRR6798781 +GSM3029088 SRS3013126 SRX3757619 SRR6798782 +GSM3029091 SRS3013131 SRX3757622 SRR6798785 +GSM3029092 SRS3013130 SRX3757623 SRR6798786,SRR6910844,SRR6910845,SRR6910846 +GSM3029103 SRS3013141 SRX3757634 SRR6798797,SRR6911162,SRR6911163,SRR6911164 +GSM3029104 SRS3013142 SRX3757635 SRR6798798 +GSM3029107 SRS3013145 SRX3757638 SRR6798801 +GSM3029108 SRS3013146 SRX3757639 SRR6798802 +GSM3029119 SRS3013158 SRX3757650 SRR6798813 +GSM3457010 SRS4016071 SRX4978418 SRR8157525 +GSM3457011 SRS4016072 SRX4978419 SRR8157526 +GSM3457012 SRS4016073 SRX4978420 SRR8157527 +GSM3457013 SRS4016074 SRX4978421 SRR8157528 +GSM3457014 SRS4016075 SRX4978422 SRR8157529 +GSM3457015 SRS4016076 SRX4978423 SRR8157530 +GSM3457016 SRS4016077 SRX4978424 SRR8157531 +GSM3457017 SRS4016078 SRX4978425 SRR8157532 diff --git a/test_data/GSE111360/GSE111360.parsed.tsv b/test_data/GSE111360/GSE111360.parsed.tsv new file mode 100644 index 0000000..121ea2f --- /dev/null +++ b/test_data/GSE111360/GSE111360.parsed.tsv @@ -0,0 +1,23 @@ +SRR6798781 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/001/SRR6798781/SRR6798781_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/001/SRR6798781/SRR6798781_2.fastq.gz ENAFQ +SRR6798782 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/002/SRR6798782/SRR6798782_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/002/SRR6798782/SRR6798782_2.fastq.gz ENAFQ +SRR6798785 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/005/SRR6798785/SRR6798785_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/005/SRR6798785/SRR6798785_2.fastq.gz ENAFQ +SRR6798786 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/006/SRR6798786/SRR6798786_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/006/SRR6798786/SRR6798786_2.fastq.gz ENAFQ +SRR6798797 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/007/SRR6798797/SRR6798797_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/007/SRR6798797/SRR6798797_2.fastq.gz ENAFQ +SRR6798798 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/008/SRR6798798/SRR6798798_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/008/SRR6798798/SRR6798798_2.fastq.gz ENAFQ +SRR6798801 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/001/SRR6798801/SRR6798801_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/001/SRR6798801/SRR6798801_2.fastq.gz ENAFQ +SRR6798802 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/002/SRR6798802/SRR6798802_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/002/SRR6798802/SRR6798802_2.fastq.gz ENAFQ +SRR6798813 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/003/SRR6798813/SRR6798813_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/003/SRR6798813/SRR6798813_2.fastq.gz ENAFQ +SRR6910844 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/004/SRR6910844/SRR6910844_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/004/SRR6910844/SRR6910844_2.fastq.gz ENAFQ +SRR6910845 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/005/SRR6910845/SRR6910845_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/005/SRR6910845/SRR6910845_2.fastq.gz ENAFQ +SRR6910846 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/006/SRR6910846/SRR6910846_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/006/SRR6910846/SRR6910846_2.fastq.gz ENAFQ +SRR6911162 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/002/SRR6911162/SRR6911162_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/002/SRR6911162/SRR6911162_2.fastq.gz ENAFQ +SRR6911163 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/003/SRR6911163/SRR6911163_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/003/SRR6911163/SRR6911163_2.fastq.gz ENAFQ +SRR6911164 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/004/SRR6911164/SRR6911164_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/004/SRR6911164/SRR6911164_2.fastq.gz ENAFQ +SRR8157525 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/005/SRR8157525/SRR8157525_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/005/SRR8157525/SRR8157525_2.fastq.gz ENAFQ +SRR8157526 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/006/SRR8157526/SRR8157526_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/006/SRR8157526/SRR8157526_2.fastq.gz ENAFQ +SRR8157527 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/007/SRR8157527/SRR8157527_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/007/SRR8157527/SRR8157527_2.fastq.gz ENAFQ +SRR8157528 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/008/SRR8157528/SRR8157528_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/008/SRR8157528/SRR8157528_2.fastq.gz ENAFQ +SRR8157529 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/009/SRR8157529/SRR8157529_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/009/SRR8157529/SRR8157529_2.fastq.gz ENAFQ +SRR8157530 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/000/SRR8157530/SRR8157530_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/000/SRR8157530/SRR8157530_2.fastq.gz ENAFQ +SRR8157531 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/001/SRR8157531/SRR8157531_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/001/SRR8157531/SRR8157531_2.fastq.gz ENAFQ +SRR8157532 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/002/SRR8157532/SRR8157532_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/002/SRR8157532/SRR8157532_2.fastq.gz ENAFQ diff --git a/test_data/GSE111360/GSE111360.run.list b/test_data/GSE111360/GSE111360.run.list new file mode 100644 index 0000000..0db0f31 --- /dev/null +++ b/test_data/GSE111360/GSE111360.run.list @@ -0,0 +1,23 @@ +SRR6798781 +SRR6798782 +SRR6798785 +SRR6798786 +SRR6798797 +SRR6798798 +SRR6798801 +SRR6798802 +SRR6798813 +SRR6910844 +SRR6910845 +SRR6910846 +SRR6911162 +SRR6911163 +SRR6911164 +SRR8157525 +SRR8157526 +SRR8157527 +SRR8157528 +SRR8157529 +SRR8157530 +SRR8157531 +SRR8157532 diff --git a/test_data/GSE111360/GSE111360.sample.list b/test_data/GSE111360/GSE111360.sample.list new file mode 100644 index 0000000..fbc81f5 --- /dev/null +++ b/test_data/GSE111360/GSE111360.sample.list @@ -0,0 +1,17 @@ +GSM3029087 +GSM3029088 +GSM3029091 +GSM3029092 +GSM3029103 +GSM3029104 +GSM3029107 +GSM3029108 +GSM3029119 +GSM3457010 +GSM3457011 +GSM3457012 +GSM3457013 +GSM3457014 +GSM3457015 +GSM3457016 +GSM3457017 diff --git a/test_data/GSE111360/GSE111360.sample_x_run.tsv b/test_data/GSE111360/GSE111360.sample_x_run.tsv new file mode 100644 index 0000000..7cd9b08 --- /dev/null +++ b/test_data/GSE111360/GSE111360.sample_x_run.tsv @@ -0,0 +1,17 @@ +GSM3029087 SRR6798781 +GSM3029088 SRR6798782 +GSM3029091 SRR6798785 +GSM3029092 SRR6798786,SRR6910844,SRR6910845,SRR6910846 +GSM3029103 SRR6798797,SRR6911162,SRR6911163,SRR6911164 +GSM3029104 SRR6798798 +GSM3029107 SRR6798801 +GSM3029108 SRR6798802 +GSM3029119 SRR6798813 +GSM3457010 SRR8157525 +GSM3457011 SRR8157526 +GSM3457012 SRR8157527 +GSM3457013 SRR8157528 +GSM3457014 SRR8157529 +GSM3457015 SRR8157530 +GSM3457016 SRR8157531 +GSM3457017 SRR8157532 diff --git a/test_data/GSE111360/GSE111360.subset.list b/test_data/GSE111360/GSE111360.subset.list new file mode 100644 index 0000000..15ffeca --- /dev/null +++ b/test_data/GSE111360/GSE111360.subset.list @@ -0,0 +1,17 @@ +GSM3457012 +GSM3029088 +GSM3457013 +GSM3029108 +GSM3029119 +GSM3457010 +GSM3029091 +GSM3029103 +GSM3457016 +GSM3029104 +GSM3029087 +GSM3457015 +GSM3457011 +GSM3457014 +GSM3029092 +GSM3029107 +GSM3457017 diff --git a/test_data/GSE111360/GSE111360.urls.list b/test_data/GSE111360/GSE111360.urls.list new file mode 100644 index 0000000..d60155b --- /dev/null +++ b/test_data/GSE111360/GSE111360.urls.list @@ -0,0 +1,46 @@ +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/001/SRR6798781/SRR6798781_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/001/SRR6798781/SRR6798781_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/002/SRR6798782/SRR6798782_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/002/SRR6798782/SRR6798782_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/005/SRR6798785/SRR6798785_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/005/SRR6798785/SRR6798785_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/006/SRR6798786/SRR6798786_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/006/SRR6798786/SRR6798786_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/007/SRR6798797/SRR6798797_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/007/SRR6798797/SRR6798797_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/008/SRR6798798/SRR6798798_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/008/SRR6798798/SRR6798798_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/001/SRR6798801/SRR6798801_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/001/SRR6798801/SRR6798801_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/002/SRR6798802/SRR6798802_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/002/SRR6798802/SRR6798802_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/003/SRR6798813/SRR6798813_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR679/003/SRR6798813/SRR6798813_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/004/SRR6910844/SRR6910844_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/004/SRR6910844/SRR6910844_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/005/SRR6910845/SRR6910845_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/005/SRR6910845/SRR6910845_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/006/SRR6910846/SRR6910846_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/006/SRR6910846/SRR6910846_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/002/SRR6911162/SRR6911162_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/002/SRR6911162/SRR6911162_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/003/SRR6911163/SRR6911163_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/003/SRR6911163/SRR6911163_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/004/SRR6911164/SRR6911164_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR691/004/SRR6911164/SRR6911164_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/005/SRR8157525/SRR8157525_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/005/SRR8157525/SRR8157525_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/006/SRR8157526/SRR8157526_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/006/SRR8157526/SRR8157526_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/007/SRR8157527/SRR8157527_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/007/SRR8157527/SRR8157527_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/008/SRR8157528/SRR8157528_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/008/SRR8157528/SRR8157528_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/009/SRR8157529/SRR8157529_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/009/SRR8157529/SRR8157529_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/000/SRR8157530/SRR8157530_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/000/SRR8157530/SRR8157530_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/001/SRR8157531/SRR8157531_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/001/SRR8157531/SRR8157531_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/002/SRR8157532/SRR8157532_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR815/002/SRR8157532/SRR8157532_2.fastq.gz diff --git a/test_data/GSE117988/GSE117988.accessions.tsv b/test_data/GSE117988/GSE117988.accessions.tsv new file mode 100644 index 0000000..fcc3f7b --- /dev/null +++ b/test_data/GSE117988/GSE117988.accessions.tsv @@ -0,0 +1,6 @@ +GSM3330559 SRS3693908 SRX4579479 SRR7722937 +GSM3330560 SRS3693909 SRX4579480 SRR7722938 +GSM3330561 SRS3693910 SRX4579481 SRR7722939 +GSM3330562 SRS3693911 SRX4579482 SRR7722940 +GSM3330563 SRS3693912 SRX4579483 SRR7722941 +GSM3330564 SRS3693913 SRX4579484 SRR7722942 diff --git a/test_data/GSE117988/GSE117988.parsed.tsv b/test_data/GSE117988/GSE117988.parsed.tsv new file mode 100644 index 0000000..a4c2fd1 --- /dev/null +++ b/test_data/GSE117988/GSE117988.parsed.tsv @@ -0,0 +1,6 @@ +SRR7722937 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR772/007/SRR7722937 SRA +SRR7722938 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR772/008/SRR7722938 SRA +SRR7722939 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR772/009/SRR7722939 SRA +SRR7722940 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR772/000/SRR7722940 SRA +SRR7722941 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR772/001/SRR7722941 SRA +SRR7722942 Homo sapiens ftp.sra.ebi.ac.uk/vol1/srr/SRR772/002/SRR7722942 SRA diff --git a/test_data/GSE117988/GSE117988.run.list b/test_data/GSE117988/GSE117988.run.list new file mode 100644 index 0000000..e9f134b --- /dev/null +++ b/test_data/GSE117988/GSE117988.run.list @@ -0,0 +1,6 @@ +SRR7722937 +SRR7722938 +SRR7722939 +SRR7722940 +SRR7722941 +SRR7722942 diff --git a/test_data/GSE117988/GSE117988.sample.list b/test_data/GSE117988/GSE117988.sample.list new file mode 100644 index 0000000..b8a527e --- /dev/null +++ b/test_data/GSE117988/GSE117988.sample.list @@ -0,0 +1,6 @@ +GSM3330559 +GSM3330560 +GSM3330561 +GSM3330562 +GSM3330563 +GSM3330564 diff --git a/test_data/GSE117988/GSE117988.sample_x_run.tsv b/test_data/GSE117988/GSE117988.sample_x_run.tsv new file mode 100644 index 0000000..a7dddfc --- /dev/null +++ b/test_data/GSE117988/GSE117988.sample_x_run.tsv @@ -0,0 +1,6 @@ +GSM3330559 SRR7722937 +GSM3330560 SRR7722938 +GSM3330561 SRR7722939 +GSM3330562 SRR7722940 +GSM3330563 SRR7722941 +GSM3330564 SRR7722942 diff --git a/test_data/GSE117988/GSE117988.urls.list b/test_data/GSE117988/GSE117988.urls.list new file mode 100644 index 0000000..6eb00da --- /dev/null +++ b/test_data/GSE117988/GSE117988.urls.list @@ -0,0 +1,6 @@ +ftp.sra.ebi.ac.uk/vol1/srr/SRR772/007/SRR7722937 +ftp.sra.ebi.ac.uk/vol1/srr/SRR772/008/SRR7722938 +ftp.sra.ebi.ac.uk/vol1/srr/SRR772/009/SRR7722939 +ftp.sra.ebi.ac.uk/vol1/srr/SRR772/000/SRR7722940 +ftp.sra.ebi.ac.uk/vol1/srr/SRR772/001/SRR7722941 +ftp.sra.ebi.ac.uk/vol1/srr/SRR772/002/SRR7722942 diff --git a/test_data/GSE160513/GSE160513.accessions.tsv b/test_data/GSE160513/GSE160513.accessions.tsv new file mode 100644 index 0000000..8a47c49 --- /dev/null +++ b/test_data/GSE160513/GSE160513.accessions.tsv @@ -0,0 +1,5 @@ +GSM4874359 SRS7623456 SRX9406032 SRR12952961,SRR12952962,SRR12952963,SRR12952964 +GSM4874360 SRS7623457 SRX9406033 SRR12952965,SRR12952966,SRR12952967,SRR12952968,SRR12952969,SRR12952970,SRR12952971,SRR12952972 +GSM4874361 SRS7623458 SRX9406034 SRR12952973,SRR12952974,SRR12952975,SRR12952976,SRR12952977,SRR12952978,SRR12952979,SRR12952980 +GSM4874362 SRS7623459 SRX9406035 SRR12952981,SRR12952982,SRR12952983,SRR12952984,SRR12952985,SRR12952986,SRR12952987,SRR12952988 +GSM4874363 SRS7623460 SRX9406036 SRR12952989,SRR12952990,SRR12952991,SRR12952992,SRR12952993,SRR12952994,SRR12952995,SRR12952996 diff --git a/test_data/GSE160513/GSE160513.parsed.tsv b/test_data/GSE160513/GSE160513.parsed.tsv new file mode 100644 index 0000000..91f25ae --- /dev/null +++ b/test_data/GSE160513/GSE160513.parsed.tsv @@ -0,0 +1,36 @@ +SRR12952961 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/061/SRR12952961/SRR12952961_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/061/SRR12952961/SRR12952961_2.fastq.gz ENAFQ +SRR12952962 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/062/SRR12952962/SRR12952962_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/062/SRR12952962/SRR12952962_2.fastq.gz ENAFQ +SRR12952963 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/063/SRR12952963/SRR12952963_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/063/SRR12952963/SRR12952963_2.fastq.gz ENAFQ +SRR12952964 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/064/SRR12952964/SRR12952964_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/064/SRR12952964/SRR12952964_2.fastq.gz ENAFQ +SRR12952965 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/065/SRR12952965/SRR12952965_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/065/SRR12952965/SRR12952965_2.fastq.gz ENAFQ +SRR12952966 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/066/SRR12952966/SRR12952966_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/066/SRR12952966/SRR12952966_2.fastq.gz ENAFQ +SRR12952967 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/067/SRR12952967/SRR12952967_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/067/SRR12952967/SRR12952967_2.fastq.gz ENAFQ +SRR12952968 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/068/SRR12952968/SRR12952968_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/068/SRR12952968/SRR12952968_2.fastq.gz ENAFQ +SRR12952969 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/069/SRR12952969/SRR12952969_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/069/SRR12952969/SRR12952969_2.fastq.gz ENAFQ +SRR12952970 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/070/SRR12952970/SRR12952970_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/070/SRR12952970/SRR12952970_2.fastq.gz ENAFQ +SRR12952971 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/071/SRR12952971/SRR12952971_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/071/SRR12952971/SRR12952971_2.fastq.gz ENAFQ +SRR12952972 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/072/SRR12952972/SRR12952972_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/072/SRR12952972/SRR12952972_2.fastq.gz ENAFQ +SRR12952973 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/073/SRR12952973/SRR12952973_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/073/SRR12952973/SRR12952973_2.fastq.gz ENAFQ +SRR12952974 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/074/SRR12952974/SRR12952974_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/074/SRR12952974/SRR12952974_2.fastq.gz ENAFQ +SRR12952975 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/075/SRR12952975/SRR12952975_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/075/SRR12952975/SRR12952975_2.fastq.gz ENAFQ +SRR12952976 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/076/SRR12952976/SRR12952976_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/076/SRR12952976/SRR12952976_2.fastq.gz ENAFQ +SRR12952977 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/077/SRR12952977/SRR12952977_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/077/SRR12952977/SRR12952977_2.fastq.gz ENAFQ +SRR12952978 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/078/SRR12952978/SRR12952978_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/078/SRR12952978/SRR12952978_2.fastq.gz ENAFQ +SRR12952979 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/079/SRR12952979/SRR12952979_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/079/SRR12952979/SRR12952979_2.fastq.gz ENAFQ +SRR12952980 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/080/SRR12952980/SRR12952980_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/080/SRR12952980/SRR12952980_2.fastq.gz ENAFQ +SRR12952981 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/081/SRR12952981/SRR12952981_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/081/SRR12952981/SRR12952981_2.fastq.gz ENAFQ +SRR12952982 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/082/SRR12952982/SRR12952982_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/082/SRR12952982/SRR12952982_2.fastq.gz ENAFQ +SRR12952983 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/083/SRR12952983/SRR12952983_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/083/SRR12952983/SRR12952983_2.fastq.gz ENAFQ +SRR12952984 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/084/SRR12952984/SRR12952984_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/084/SRR12952984/SRR12952984_2.fastq.gz ENAFQ +SRR12952985 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/085/SRR12952985/SRR12952985_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/085/SRR12952985/SRR12952985_2.fastq.gz ENAFQ +SRR12952986 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/086/SRR12952986/SRR12952986_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/086/SRR12952986/SRR12952986_2.fastq.gz ENAFQ +SRR12952987 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/087/SRR12952987/SRR12952987_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/087/SRR12952987/SRR12952987_2.fastq.gz ENAFQ +SRR12952988 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/088/SRR12952988/SRR12952988_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/088/SRR12952988/SRR12952988_2.fastq.gz ENAFQ +SRR12952989 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/089/SRR12952989/SRR12952989_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/089/SRR12952989/SRR12952989_2.fastq.gz ENAFQ +SRR12952990 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/090/SRR12952990/SRR12952990_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/090/SRR12952990/SRR12952990_2.fastq.gz ENAFQ +SRR12952991 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/091/SRR12952991/SRR12952991_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/091/SRR12952991/SRR12952991_2.fastq.gz ENAFQ +SRR12952992 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/092/SRR12952992/SRR12952992_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/092/SRR12952992/SRR12952992_2.fastq.gz ENAFQ +SRR12952993 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/093/SRR12952993/SRR12952993_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/093/SRR12952993/SRR12952993_2.fastq.gz ENAFQ +SRR12952994 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/094/SRR12952994/SRR12952994_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/094/SRR12952994/SRR12952994_2.fastq.gz ENAFQ +SRR12952995 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/095/SRR12952995/SRR12952995_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/095/SRR12952995/SRR12952995_2.fastq.gz ENAFQ +SRR12952996 Mus musculus ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/096/SRR12952996/SRR12952996_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/096/SRR12952996/SRR12952996_2.fastq.gz ENAFQ diff --git a/test_data/GSE160513/GSE160513.run.list b/test_data/GSE160513/GSE160513.run.list new file mode 100644 index 0000000..9c1a797 --- /dev/null +++ b/test_data/GSE160513/GSE160513.run.list @@ -0,0 +1,36 @@ +SRR12952961 +SRR12952962 +SRR12952963 +SRR12952964 +SRR12952965 +SRR12952966 +SRR12952967 +SRR12952968 +SRR12952969 +SRR12952970 +SRR12952971 +SRR12952972 +SRR12952973 +SRR12952974 +SRR12952975 +SRR12952976 +SRR12952977 +SRR12952978 +SRR12952979 +SRR12952980 +SRR12952981 +SRR12952982 +SRR12952983 +SRR12952984 +SRR12952985 +SRR12952986 +SRR12952987 +SRR12952988 +SRR12952989 +SRR12952990 +SRR12952991 +SRR12952992 +SRR12952993 +SRR12952994 +SRR12952995 +SRR12952996 diff --git a/test_data/GSE160513/GSE160513.sample.list b/test_data/GSE160513/GSE160513.sample.list new file mode 100644 index 0000000..0a48e0c --- /dev/null +++ b/test_data/GSE160513/GSE160513.sample.list @@ -0,0 +1,5 @@ +GSM4874359 +GSM4874360 +GSM4874361 +GSM4874362 +GSM4874363 diff --git a/test_data/GSE160513/GSE160513.sample_x_run.tsv b/test_data/GSE160513/GSE160513.sample_x_run.tsv new file mode 100644 index 0000000..c8eb417 --- /dev/null +++ b/test_data/GSE160513/GSE160513.sample_x_run.tsv @@ -0,0 +1,5 @@ +GSM4874359 SRR12952961,SRR12952962,SRR12952963,SRR12952964 +GSM4874360 SRR12952965,SRR12952966,SRR12952967,SRR12952968,SRR12952969,SRR12952970,SRR12952971,SRR12952972 +GSM4874361 SRR12952973,SRR12952974,SRR12952975,SRR12952976,SRR12952977,SRR12952978,SRR12952979,SRR12952980 +GSM4874362 SRR12952981,SRR12952982,SRR12952983,SRR12952984,SRR12952985,SRR12952986,SRR12952987,SRR12952988 +GSM4874363 SRR12952989,SRR12952990,SRR12952991,SRR12952992,SRR12952993,SRR12952994,SRR12952995,SRR12952996 diff --git a/test_data/GSE160513/GSE160513.urls.list b/test_data/GSE160513/GSE160513.urls.list new file mode 100644 index 0000000..50c80fb --- /dev/null +++ b/test_data/GSE160513/GSE160513.urls.list @@ -0,0 +1,72 @@ +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/061/SRR12952961/SRR12952961_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/061/SRR12952961/SRR12952961_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/062/SRR12952962/SRR12952962_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/062/SRR12952962/SRR12952962_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/063/SRR12952963/SRR12952963_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/063/SRR12952963/SRR12952963_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/064/SRR12952964/SRR12952964_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/064/SRR12952964/SRR12952964_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/065/SRR12952965/SRR12952965_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/065/SRR12952965/SRR12952965_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/066/SRR12952966/SRR12952966_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/066/SRR12952966/SRR12952966_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/067/SRR12952967/SRR12952967_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/067/SRR12952967/SRR12952967_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/068/SRR12952968/SRR12952968_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/068/SRR12952968/SRR12952968_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/069/SRR12952969/SRR12952969_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/069/SRR12952969/SRR12952969_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/070/SRR12952970/SRR12952970_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/070/SRR12952970/SRR12952970_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/071/SRR12952971/SRR12952971_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/071/SRR12952971/SRR12952971_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/072/SRR12952972/SRR12952972_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/072/SRR12952972/SRR12952972_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/073/SRR12952973/SRR12952973_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/073/SRR12952973/SRR12952973_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/074/SRR12952974/SRR12952974_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/074/SRR12952974/SRR12952974_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/075/SRR12952975/SRR12952975_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/075/SRR12952975/SRR12952975_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/076/SRR12952976/SRR12952976_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/076/SRR12952976/SRR12952976_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/077/SRR12952977/SRR12952977_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/077/SRR12952977/SRR12952977_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/078/SRR12952978/SRR12952978_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/078/SRR12952978/SRR12952978_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/079/SRR12952979/SRR12952979_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/079/SRR12952979/SRR12952979_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/080/SRR12952980/SRR12952980_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/080/SRR12952980/SRR12952980_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/081/SRR12952981/SRR12952981_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/081/SRR12952981/SRR12952981_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/082/SRR12952982/SRR12952982_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/082/SRR12952982/SRR12952982_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/083/SRR12952983/SRR12952983_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/083/SRR12952983/SRR12952983_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/084/SRR12952984/SRR12952984_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/084/SRR12952984/SRR12952984_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/085/SRR12952985/SRR12952985_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/085/SRR12952985/SRR12952985_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/086/SRR12952986/SRR12952986_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/086/SRR12952986/SRR12952986_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/087/SRR12952987/SRR12952987_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/087/SRR12952987/SRR12952987_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/088/SRR12952988/SRR12952988_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/088/SRR12952988/SRR12952988_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/089/SRR12952989/SRR12952989_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/089/SRR12952989/SRR12952989_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/090/SRR12952990/SRR12952990_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/090/SRR12952990/SRR12952990_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/091/SRR12952991/SRR12952991_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/091/SRR12952991/SRR12952991_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/092/SRR12952992/SRR12952992_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/092/SRR12952992/SRR12952992_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/093/SRR12952993/SRR12952993_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/093/SRR12952993/SRR12952993_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/094/SRR12952994/SRR12952994_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/094/SRR12952994/SRR12952994_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/095/SRR12952995/SRR12952995_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/095/SRR12952995/SRR12952995_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/096/SRR12952996/SRR12952996_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR129/096/SRR12952996/SRR12952996_2.fastq.gz diff --git a/test_data/PRJNA511433/PRJNA511433.accessions.tsv b/test_data/PRJNA511433/PRJNA511433.accessions.tsv new file mode 100644 index 0000000..607f7b2 --- /dev/null +++ b/test_data/PRJNA511433/PRJNA511433.accessions.tsv @@ -0,0 +1,8 @@ +- SRS4181123 SRX5173698 SRR8363218,SRR8363219,SRR8363220,SRR8363221,SRR8363222,SRR8363223,SRR8363224,SRR8363225,SRR8363226,SRR8363227,SRR8363228,SRR8363229,SRR8363230,SRR8363231,SRR8363232,SRR8363233,SRR8363234,SRR8363235,SRR8363236,SRR8363237,SRR8363238,SRR8363239,SRR8363240,SRR8363241 +- SRS4181124 SRX5173697 SRR8363194,SRR8363195,SRR8363196,SRR8363197,SRR8363198,SRR8363199,SRR8363200,SRR8363201,SRR8363202,SRR8363203,SRR8363204,SRR8363205,SRR8363206,SRR8363207,SRR8363208,SRR8363209,SRR8363210,SRR8363211,SRR8363212,SRR8363213,SRR8363214,SRR8363215,SRR8363216,SRR8363217 +- SRS4181125 SRX5173699 SRR8363242,SRR8363243,SRR8363244,SRR8363245,SRR8363246,SRR8363247,SRR8363248,SRR8363249,SRR8363266,SRR8363267,SRR8363268,SRR8363269 +- SRS4181126 SRX5173700 SRR8363270,SRR8363271,SRR8363272,SRR8363273,SRR8363274,SRR8363275,SRR8363276,SRR8363277,SRR8363278,SRR8363279,SRR8363280,SRR8363281 +- SRS4181127 SRX5173702 SRR8363250,SRR8363251,SRR8363252,SRR8363253,SRR8363254,SRR8363255,SRR8363256,SRR8363257,SRR8363306,SRR8363307,SRR8363308,SRR8363309,SRR8363310,SRR8363311,SRR8363312,SRR8363313,SRR8363314,SRR8363315,SRR8363316,SRR8363317,SRR8363318,SRR8363319,SRR8363320,SRR8363321 +- SRS4181128 SRX5173701 SRR8363282,SRR8363283,SRR8363284,SRR8363285,SRR8363286,SRR8363287,SRR8363288,SRR8363289,SRR8363290,SRR8363291,SRR8363292,SRR8363293,SRR8363294,SRR8363295,SRR8363296,SRR8363297,SRR8363298,SRR8363299,SRR8363300,SRR8363301,SRR8363302,SRR8363303,SRR8363304,SRR8363305 +- SRS4181129 SRX5173703 SRR8363258,SRR8363259,SRR8363260,SRR8363261 +- SRS4181130 SRX5173704 SRR8363262,SRR8363263,SRR8363264,SRR8363265 diff --git a/test_data/PRJNA511433/PRJNA511433.parsed.tsv b/test_data/PRJNA511433/PRJNA511433.parsed.tsv new file mode 100644 index 0000000..bf1d501 --- /dev/null +++ b/test_data/PRJNA511433/PRJNA511433.parsed.tsv @@ -0,0 +1,128 @@ +SRR8363194 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363194/SRR8363194_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363194/SRR8363194_2.fastq.gz ENAFQ +SRR8363195 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363195/SRR8363195_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363195/SRR8363195_2.fastq.gz ENAFQ +SRR8363196 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363196/SRR8363196_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363196/SRR8363196_2.fastq.gz ENAFQ +SRR8363197 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363197/SRR8363197_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363197/SRR8363197_2.fastq.gz ENAFQ +SRR8363198 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363198/SRR8363198_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363198/SRR8363198_2.fastq.gz ENAFQ +SRR8363199 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363199/SRR8363199_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363199/SRR8363199_2.fastq.gz ENAFQ +SRR8363200 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363200/SRR8363200_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363200/SRR8363200_2.fastq.gz ENAFQ +SRR8363201 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363201/SRR8363201_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363201/SRR8363201_2.fastq.gz ENAFQ +SRR8363202 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363202/SRR8363202_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363202/SRR8363202_2.fastq.gz ENAFQ +SRR8363203 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363203/SRR8363203_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363203/SRR8363203_2.fastq.gz ENAFQ +SRR8363204 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363204/SRR8363204_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363204/SRR8363204_2.fastq.gz ENAFQ +SRR8363205 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363205/SRR8363205_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363205/SRR8363205_2.fastq.gz ENAFQ +SRR8363206 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363206/SRR8363206_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363206/SRR8363206_2.fastq.gz ENAFQ +SRR8363207 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363207/SRR8363207_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363207/SRR8363207_2.fastq.gz ENAFQ +SRR8363208 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363208/SRR8363208_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363208/SRR8363208_2.fastq.gz ENAFQ +SRR8363209 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363209/SRR8363209_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363209/SRR8363209_2.fastq.gz ENAFQ +SRR8363210 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363210/SRR8363210_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363210/SRR8363210_2.fastq.gz ENAFQ +SRR8363211 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363211/SRR8363211_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363211/SRR8363211_2.fastq.gz ENAFQ +SRR8363212 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363212/SRR8363212_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363212/SRR8363212_2.fastq.gz ENAFQ +SRR8363213 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363213/SRR8363213_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363213/SRR8363213_2.fastq.gz ENAFQ +SRR8363214 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363214/SRR8363214_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363214/SRR8363214_2.fastq.gz ENAFQ +SRR8363215 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363215/SRR8363215_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363215/SRR8363215_2.fastq.gz ENAFQ +SRR8363216 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363216/SRR8363216_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363216/SRR8363216_2.fastq.gz ENAFQ +SRR8363217 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363217/SRR8363217_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363217/SRR8363217_2.fastq.gz ENAFQ +SRR8363218 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363218/SRR8363218_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363218/SRR8363218_2.fastq.gz ENAFQ +SRR8363219 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363219/SRR8363219_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363219/SRR8363219_2.fastq.gz ENAFQ +SRR8363220 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363220/SRR8363220_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363220/SRR8363220_2.fastq.gz ENAFQ +SRR8363221 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363221/SRR8363221_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363221/SRR8363221_2.fastq.gz ENAFQ +SRR8363222 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363222/SRR8363222_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363222/SRR8363222_2.fastq.gz ENAFQ +SRR8363223 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363223/SRR8363223_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363223/SRR8363223_2.fastq.gz ENAFQ +SRR8363224 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363224/SRR8363224_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363224/SRR8363224_2.fastq.gz ENAFQ +SRR8363225 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363225/SRR8363225_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363225/SRR8363225_2.fastq.gz ENAFQ +SRR8363226 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363226/SRR8363226_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363226/SRR8363226_2.fastq.gz ENAFQ +SRR8363227 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363227/SRR8363227_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363227/SRR8363227_2.fastq.gz ENAFQ +SRR8363228 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363228/SRR8363228_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363228/SRR8363228_2.fastq.gz ENAFQ +SRR8363229 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363229/SRR8363229_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363229/SRR8363229_2.fastq.gz ENAFQ +SRR8363230 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363230/SRR8363230_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363230/SRR8363230_2.fastq.gz ENAFQ +SRR8363231 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363231/SRR8363231_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363231/SRR8363231_2.fastq.gz ENAFQ +SRR8363232 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363232/SRR8363232_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363232/SRR8363232_2.fastq.gz ENAFQ +SRR8363233 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363233/SRR8363233_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363233/SRR8363233_2.fastq.gz ENAFQ +SRR8363234 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363234/SRR8363234_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363234/SRR8363234_2.fastq.gz ENAFQ +SRR8363235 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363235/SRR8363235_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363235/SRR8363235_2.fastq.gz ENAFQ +SRR8363236 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363236/SRR8363236_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363236/SRR8363236_2.fastq.gz ENAFQ +SRR8363237 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363237/SRR8363237_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363237/SRR8363237_2.fastq.gz ENAFQ +SRR8363238 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363238/SRR8363238_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363238/SRR8363238_2.fastq.gz ENAFQ +SRR8363239 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363239/SRR8363239_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363239/SRR8363239_2.fastq.gz ENAFQ +SRR8363240 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363240/SRR8363240_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363240/SRR8363240_2.fastq.gz ENAFQ +SRR8363241 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363241/SRR8363241_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363241/SRR8363241_2.fastq.gz ENAFQ +SRR8363242 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363242/SRR8363242_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363242/SRR8363242_2.fastq.gz ENAFQ +SRR8363243 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363243/SRR8363243_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363243/SRR8363243_2.fastq.gz ENAFQ +SRR8363244 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363244/SRR8363244_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363244/SRR8363244_2.fastq.gz ENAFQ +SRR8363245 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363245/SRR8363245_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363245/SRR8363245_2.fastq.gz ENAFQ +SRR8363246 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363246/SRR8363246_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363246/SRR8363246_2.fastq.gz ENAFQ +SRR8363247 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363247/SRR8363247_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363247/SRR8363247_2.fastq.gz ENAFQ +SRR8363248 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363248/SRR8363248_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363248/SRR8363248_2.fastq.gz ENAFQ +SRR8363249 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363249/SRR8363249_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363249/SRR8363249_2.fastq.gz ENAFQ +SRR8363250 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363250/SRR8363250_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363250/SRR8363250_2.fastq.gz ENAFQ +SRR8363251 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363251/SRR8363251_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363251/SRR8363251_2.fastq.gz ENAFQ +SRR8363252 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363252/SRR8363252_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363252/SRR8363252_2.fastq.gz ENAFQ +SRR8363253 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363253/SRR8363253_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363253/SRR8363253_2.fastq.gz ENAFQ +SRR8363254 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363254/SRR8363254_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363254/SRR8363254_2.fastq.gz ENAFQ +SRR8363255 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363255/SRR8363255_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363255/SRR8363255_2.fastq.gz ENAFQ +SRR8363256 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363256/SRR8363256_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363256/SRR8363256_2.fastq.gz ENAFQ +SRR8363257 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363257/SRR8363257_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363257/SRR8363257_2.fastq.gz ENAFQ +SRR8363258 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363258/SRR8363258_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363258/SRR8363258_2.fastq.gz ENAFQ +SRR8363259 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363259/SRR8363259_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363259/SRR8363259_2.fastq.gz ENAFQ +SRR8363260 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363260/SRR8363260_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363260/SRR8363260_2.fastq.gz ENAFQ +SRR8363261 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363261/SRR8363261_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363261/SRR8363261_2.fastq.gz ENAFQ +SRR8363262 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363262/SRR8363262_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363262/SRR8363262_2.fastq.gz ENAFQ +SRR8363263 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363263/SRR8363263_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363263/SRR8363263_2.fastq.gz ENAFQ +SRR8363264 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363264/SRR8363264_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363264/SRR8363264_2.fastq.gz ENAFQ +SRR8363265 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363265/SRR8363265_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363265/SRR8363265_2.fastq.gz ENAFQ +SRR8363266 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363266/SRR8363266_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363266/SRR8363266_2.fastq.gz ENAFQ +SRR8363267 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363267/SRR8363267_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363267/SRR8363267_2.fastq.gz ENAFQ +SRR8363268 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363268/SRR8363268_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363268/SRR8363268_2.fastq.gz ENAFQ +SRR8363269 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363269/SRR8363269_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363269/SRR8363269_2.fastq.gz ENAFQ +SRR8363270 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363270/SRR8363270_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363270/SRR8363270_2.fastq.gz ENAFQ +SRR8363271 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363271/SRR8363271_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363271/SRR8363271_2.fastq.gz ENAFQ +SRR8363272 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363272/SRR8363272_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363272/SRR8363272_2.fastq.gz ENAFQ +SRR8363273 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363273/SRR8363273_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363273/SRR8363273_2.fastq.gz ENAFQ +SRR8363274 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363274/SRR8363274_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363274/SRR8363274_2.fastq.gz ENAFQ +SRR8363275 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363275/SRR8363275_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363275/SRR8363275_2.fastq.gz ENAFQ +SRR8363276 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363276/SRR8363276_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363276/SRR8363276_2.fastq.gz ENAFQ +SRR8363277 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363277/SRR8363277_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363277/SRR8363277_2.fastq.gz ENAFQ +SRR8363278 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363278/SRR8363278_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363278/SRR8363278_2.fastq.gz ENAFQ +SRR8363279 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363279/SRR8363279_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363279/SRR8363279_2.fastq.gz ENAFQ +SRR8363280 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363280/SRR8363280_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363280/SRR8363280_2.fastq.gz ENAFQ +SRR8363281 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363281/SRR8363281_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363281/SRR8363281_2.fastq.gz ENAFQ +SRR8363282 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363282/SRR8363282_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363282/SRR8363282_2.fastq.gz ENAFQ +SRR8363283 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363283/SRR8363283_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363283/SRR8363283_2.fastq.gz ENAFQ +SRR8363284 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363284/SRR8363284_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363284/SRR8363284_2.fastq.gz ENAFQ +SRR8363285 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363285/SRR8363285_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363285/SRR8363285_2.fastq.gz ENAFQ +SRR8363286 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363286/SRR8363286_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363286/SRR8363286_2.fastq.gz ENAFQ +SRR8363287 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363287/SRR8363287_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363287/SRR8363287_2.fastq.gz ENAFQ +SRR8363288 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363288/SRR8363288_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363288/SRR8363288_2.fastq.gz ENAFQ +SRR8363289 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363289/SRR8363289_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363289/SRR8363289_2.fastq.gz ENAFQ +SRR8363290 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363290/SRR8363290_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363290/SRR8363290_2.fastq.gz ENAFQ +SRR8363291 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363291/SRR8363291_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363291/SRR8363291_2.fastq.gz ENAFQ +SRR8363292 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363292/SRR8363292_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363292/SRR8363292_2.fastq.gz ENAFQ +SRR8363293 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363293/SRR8363293_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363293/SRR8363293_2.fastq.gz ENAFQ +SRR8363294 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363294/SRR8363294_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363294/SRR8363294_2.fastq.gz ENAFQ +SRR8363295 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363295/SRR8363295_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363295/SRR8363295_2.fastq.gz ENAFQ +SRR8363296 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363296/SRR8363296_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363296/SRR8363296_2.fastq.gz ENAFQ +SRR8363297 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363297/SRR8363297_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363297/SRR8363297_2.fastq.gz ENAFQ +SRR8363298 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363298/SRR8363298_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363298/SRR8363298_2.fastq.gz ENAFQ +SRR8363299 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363299/SRR8363299_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363299/SRR8363299_2.fastq.gz ENAFQ +SRR8363300 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363300/SRR8363300_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363300/SRR8363300_2.fastq.gz ENAFQ +SRR8363301 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363301/SRR8363301_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363301/SRR8363301_2.fastq.gz ENAFQ +SRR8363302 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363302/SRR8363302_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363302/SRR8363302_2.fastq.gz ENAFQ +SRR8363303 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363303/SRR8363303_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363303/SRR8363303_2.fastq.gz ENAFQ +SRR8363304 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363304/SRR8363304_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363304/SRR8363304_2.fastq.gz ENAFQ +SRR8363305 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363305/SRR8363305_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363305/SRR8363305_2.fastq.gz ENAFQ +SRR8363306 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363306/SRR8363306_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363306/SRR8363306_2.fastq.gz ENAFQ +SRR8363307 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363307/SRR8363307_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363307/SRR8363307_2.fastq.gz ENAFQ +SRR8363308 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363308/SRR8363308_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363308/SRR8363308_2.fastq.gz ENAFQ +SRR8363309 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363309/SRR8363309_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363309/SRR8363309_2.fastq.gz ENAFQ +SRR8363310 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363310/SRR8363310_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363310/SRR8363310_2.fastq.gz ENAFQ +SRR8363311 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363311/SRR8363311_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363311/SRR8363311_2.fastq.gz ENAFQ +SRR8363312 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363312/SRR8363312_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363312/SRR8363312_2.fastq.gz ENAFQ +SRR8363313 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363313/SRR8363313_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363313/SRR8363313_2.fastq.gz ENAFQ +SRR8363314 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363314/SRR8363314_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363314/SRR8363314_2.fastq.gz ENAFQ +SRR8363315 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363315/SRR8363315_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363315/SRR8363315_2.fastq.gz ENAFQ +SRR8363316 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363316/SRR8363316_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363316/SRR8363316_2.fastq.gz ENAFQ +SRR8363317 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363317/SRR8363317_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363317/SRR8363317_2.fastq.gz ENAFQ +SRR8363318 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363318/SRR8363318_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363318/SRR8363318_2.fastq.gz ENAFQ +SRR8363319 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363319/SRR8363319_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363319/SRR8363319_2.fastq.gz ENAFQ +SRR8363320 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363320/SRR8363320_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363320/SRR8363320_2.fastq.gz ENAFQ +SRR8363321 Homo sapiens ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363321/SRR8363321_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363321/SRR8363321_2.fastq.gz ENAFQ diff --git a/test_data/PRJNA511433/PRJNA511433.run.list b/test_data/PRJNA511433/PRJNA511433.run.list new file mode 100644 index 0000000..fba09ee --- /dev/null +++ b/test_data/PRJNA511433/PRJNA511433.run.list @@ -0,0 +1,128 @@ +SRR8363194 +SRR8363195 +SRR8363196 +SRR8363197 +SRR8363198 +SRR8363199 +SRR8363200 +SRR8363201 +SRR8363202 +SRR8363203 +SRR8363204 +SRR8363205 +SRR8363206 +SRR8363207 +SRR8363208 +SRR8363209 +SRR8363210 +SRR8363211 +SRR8363212 +SRR8363213 +SRR8363214 +SRR8363215 +SRR8363216 +SRR8363217 +SRR8363218 +SRR8363219 +SRR8363220 +SRR8363221 +SRR8363222 +SRR8363223 +SRR8363224 +SRR8363225 +SRR8363226 +SRR8363227 +SRR8363228 +SRR8363229 +SRR8363230 +SRR8363231 +SRR8363232 +SRR8363233 +SRR8363234 +SRR8363235 +SRR8363236 +SRR8363237 +SRR8363238 +SRR8363239 +SRR8363240 +SRR8363241 +SRR8363242 +SRR8363243 +SRR8363244 +SRR8363245 +SRR8363246 +SRR8363247 +SRR8363248 +SRR8363249 +SRR8363250 +SRR8363251 +SRR8363252 +SRR8363253 +SRR8363254 +SRR8363255 +SRR8363256 +SRR8363257 +SRR8363258 +SRR8363259 +SRR8363260 +SRR8363261 +SRR8363262 +SRR8363263 +SRR8363264 +SRR8363265 +SRR8363266 +SRR8363267 +SRR8363268 +SRR8363269 +SRR8363270 +SRR8363271 +SRR8363272 +SRR8363273 +SRR8363274 +SRR8363275 +SRR8363276 +SRR8363277 +SRR8363278 +SRR8363279 +SRR8363280 +SRR8363281 +SRR8363282 +SRR8363283 +SRR8363284 +SRR8363285 +SRR8363286 +SRR8363287 +SRR8363288 +SRR8363289 +SRR8363290 +SRR8363291 +SRR8363292 +SRR8363293 +SRR8363294 +SRR8363295 +SRR8363296 +SRR8363297 +SRR8363298 +SRR8363299 +SRR8363300 +SRR8363301 +SRR8363302 +SRR8363303 +SRR8363304 +SRR8363305 +SRR8363306 +SRR8363307 +SRR8363308 +SRR8363309 +SRR8363310 +SRR8363311 +SRR8363312 +SRR8363313 +SRR8363314 +SRR8363315 +SRR8363316 +SRR8363317 +SRR8363318 +SRR8363319 +SRR8363320 +SRR8363321 diff --git a/test_data/PRJNA511433/PRJNA511433.sample.list b/test_data/PRJNA511433/PRJNA511433.sample.list new file mode 100644 index 0000000..f6b349e --- /dev/null +++ b/test_data/PRJNA511433/PRJNA511433.sample.list @@ -0,0 +1,8 @@ +SRS4181123 +SRS4181124 +SRS4181125 +SRS4181126 +SRS4181127 +SRS4181128 +SRS4181129 +SRS4181130 diff --git a/test_data/PRJNA511433/PRJNA511433.sample_x_run.tsv b/test_data/PRJNA511433/PRJNA511433.sample_x_run.tsv new file mode 100644 index 0000000..7e1aef2 --- /dev/null +++ b/test_data/PRJNA511433/PRJNA511433.sample_x_run.tsv @@ -0,0 +1,8 @@ +SRS4181123 SRR8363218,SRR8363219,SRR8363220,SRR8363221,SRR8363222,SRR8363223,SRR8363224,SRR8363225,SRR8363226,SRR8363227,SRR8363228,SRR8363229,SRR8363230,SRR8363231,SRR8363232,SRR8363233,SRR8363234,SRR8363235,SRR8363236,SRR8363237,SRR8363238,SRR8363239,SRR8363240,SRR8363241 +SRS4181124 SRR8363194,SRR8363195,SRR8363196,SRR8363197,SRR8363198,SRR8363199,SRR8363200,SRR8363201,SRR8363202,SRR8363203,SRR8363204,SRR8363205,SRR8363206,SRR8363207,SRR8363208,SRR8363209,SRR8363210,SRR8363211,SRR8363212,SRR8363213,SRR8363214,SRR8363215,SRR8363216,SRR8363217 +SRS4181125 SRR8363242,SRR8363243,SRR8363244,SRR8363245,SRR8363246,SRR8363247,SRR8363248,SRR8363249,SRR8363266,SRR8363267,SRR8363268,SRR8363269 +SRS4181126 SRR8363270,SRR8363271,SRR8363272,SRR8363273,SRR8363274,SRR8363275,SRR8363276,SRR8363277,SRR8363278,SRR8363279,SRR8363280,SRR8363281 +SRS4181127 SRR8363250,SRR8363251,SRR8363252,SRR8363253,SRR8363254,SRR8363255,SRR8363256,SRR8363257,SRR8363306,SRR8363307,SRR8363308,SRR8363309,SRR8363310,SRR8363311,SRR8363312,SRR8363313,SRR8363314,SRR8363315,SRR8363316,SRR8363317,SRR8363318,SRR8363319,SRR8363320,SRR8363321 +SRS4181128 SRR8363282,SRR8363283,SRR8363284,SRR8363285,SRR8363286,SRR8363287,SRR8363288,SRR8363289,SRR8363290,SRR8363291,SRR8363292,SRR8363293,SRR8363294,SRR8363295,SRR8363296,SRR8363297,SRR8363298,SRR8363299,SRR8363300,SRR8363301,SRR8363302,SRR8363303,SRR8363304,SRR8363305 +SRS4181129 SRR8363258,SRR8363259,SRR8363260,SRR8363261 +SRS4181130 SRR8363262,SRR8363263,SRR8363264,SRR8363265 diff --git a/test_data/PRJNA511433/PRJNA511433.urls.list b/test_data/PRJNA511433/PRJNA511433.urls.list new file mode 100644 index 0000000..7e5fc39 --- /dev/null +++ b/test_data/PRJNA511433/PRJNA511433.urls.list @@ -0,0 +1,256 @@ +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363194/SRR8363194_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363194/SRR8363194_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363195/SRR8363195_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363195/SRR8363195_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363196/SRR8363196_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363196/SRR8363196_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363197/SRR8363197_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363197/SRR8363197_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363198/SRR8363198_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363198/SRR8363198_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363199/SRR8363199_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363199/SRR8363199_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363200/SRR8363200_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363200/SRR8363200_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363201/SRR8363201_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363201/SRR8363201_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363202/SRR8363202_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363202/SRR8363202_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363203/SRR8363203_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363203/SRR8363203_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363204/SRR8363204_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363204/SRR8363204_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363205/SRR8363205_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363205/SRR8363205_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363206/SRR8363206_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363206/SRR8363206_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363207/SRR8363207_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363207/SRR8363207_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363208/SRR8363208_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363208/SRR8363208_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363209/SRR8363209_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363209/SRR8363209_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363210/SRR8363210_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363210/SRR8363210_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363211/SRR8363211_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363211/SRR8363211_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363212/SRR8363212_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363212/SRR8363212_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363213/SRR8363213_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363213/SRR8363213_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363214/SRR8363214_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363214/SRR8363214_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363215/SRR8363215_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363215/SRR8363215_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363216/SRR8363216_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363216/SRR8363216_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363217/SRR8363217_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363217/SRR8363217_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363218/SRR8363218_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363218/SRR8363218_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363219/SRR8363219_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363219/SRR8363219_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363220/SRR8363220_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363220/SRR8363220_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363221/SRR8363221_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363221/SRR8363221_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363222/SRR8363222_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363222/SRR8363222_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363223/SRR8363223_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363223/SRR8363223_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363224/SRR8363224_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363224/SRR8363224_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363225/SRR8363225_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363225/SRR8363225_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363226/SRR8363226_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363226/SRR8363226_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363227/SRR8363227_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363227/SRR8363227_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363228/SRR8363228_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363228/SRR8363228_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363229/SRR8363229_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363229/SRR8363229_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363230/SRR8363230_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363230/SRR8363230_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363231/SRR8363231_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363231/SRR8363231_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363232/SRR8363232_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363232/SRR8363232_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363233/SRR8363233_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363233/SRR8363233_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363234/SRR8363234_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363234/SRR8363234_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363235/SRR8363235_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363235/SRR8363235_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363236/SRR8363236_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363236/SRR8363236_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363237/SRR8363237_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363237/SRR8363237_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363238/SRR8363238_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363238/SRR8363238_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363239/SRR8363239_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363239/SRR8363239_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363240/SRR8363240_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363240/SRR8363240_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363241/SRR8363241_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363241/SRR8363241_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363242/SRR8363242_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363242/SRR8363242_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363243/SRR8363243_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363243/SRR8363243_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363244/SRR8363244_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363244/SRR8363244_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363245/SRR8363245_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363245/SRR8363245_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363246/SRR8363246_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363246/SRR8363246_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363247/SRR8363247_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363247/SRR8363247_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363248/SRR8363248_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363248/SRR8363248_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363249/SRR8363249_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363249/SRR8363249_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363250/SRR8363250_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363250/SRR8363250_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363251/SRR8363251_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363251/SRR8363251_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363252/SRR8363252_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363252/SRR8363252_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363253/SRR8363253_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363253/SRR8363253_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363254/SRR8363254_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363254/SRR8363254_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363255/SRR8363255_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363255/SRR8363255_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363256/SRR8363256_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363256/SRR8363256_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363257/SRR8363257_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363257/SRR8363257_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363258/SRR8363258_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363258/SRR8363258_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363259/SRR8363259_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363259/SRR8363259_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363260/SRR8363260_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363260/SRR8363260_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363261/SRR8363261_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363261/SRR8363261_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363262/SRR8363262_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363262/SRR8363262_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363263/SRR8363263_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363263/SRR8363263_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363264/SRR8363264_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363264/SRR8363264_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363265/SRR8363265_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363265/SRR8363265_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363266/SRR8363266_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363266/SRR8363266_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363267/SRR8363267_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363267/SRR8363267_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363268/SRR8363268_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363268/SRR8363268_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363269/SRR8363269_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363269/SRR8363269_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363270/SRR8363270_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363270/SRR8363270_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363271/SRR8363271_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363271/SRR8363271_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363272/SRR8363272_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363272/SRR8363272_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363273/SRR8363273_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363273/SRR8363273_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363274/SRR8363274_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363274/SRR8363274_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363275/SRR8363275_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363275/SRR8363275_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363276/SRR8363276_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363276/SRR8363276_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363277/SRR8363277_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363277/SRR8363277_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363278/SRR8363278_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363278/SRR8363278_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363279/SRR8363279_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363279/SRR8363279_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363280/SRR8363280_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363280/SRR8363280_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363281/SRR8363281_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363281/SRR8363281_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363282/SRR8363282_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363282/SRR8363282_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363283/SRR8363283_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363283/SRR8363283_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363284/SRR8363284_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363284/SRR8363284_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363285/SRR8363285_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363285/SRR8363285_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363286/SRR8363286_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363286/SRR8363286_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363287/SRR8363287_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363287/SRR8363287_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363288/SRR8363288_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363288/SRR8363288_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363289/SRR8363289_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363289/SRR8363289_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363290/SRR8363290_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363290/SRR8363290_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363291/SRR8363291_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363291/SRR8363291_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363292/SRR8363292_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363292/SRR8363292_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363293/SRR8363293_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363293/SRR8363293_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363294/SRR8363294_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363294/SRR8363294_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363295/SRR8363295_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363295/SRR8363295_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363296/SRR8363296_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363296/SRR8363296_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363297/SRR8363297_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363297/SRR8363297_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363298/SRR8363298_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363298/SRR8363298_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363299/SRR8363299_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363299/SRR8363299_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363300/SRR8363300_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363300/SRR8363300_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363301/SRR8363301_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363301/SRR8363301_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363302/SRR8363302_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363302/SRR8363302_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363303/SRR8363303_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363303/SRR8363303_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363304/SRR8363304_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363304/SRR8363304_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363305/SRR8363305_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363305/SRR8363305_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363306/SRR8363306_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363306/SRR8363306_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363307/SRR8363307_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363307/SRR8363307_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363308/SRR8363308_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363308/SRR8363308_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363309/SRR8363309_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363309/SRR8363309_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363310/SRR8363310_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363310/SRR8363310_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363311/SRR8363311_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363311/SRR8363311_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363312/SRR8363312_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/002/SRR8363312/SRR8363312_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363313/SRR8363313_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/003/SRR8363313/SRR8363313_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363314/SRR8363314_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/004/SRR8363314/SRR8363314_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363315/SRR8363315_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/005/SRR8363315/SRR8363315_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363316/SRR8363316_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/006/SRR8363316/SRR8363316_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363317/SRR8363317_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/007/SRR8363317/SRR8363317_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363318/SRR8363318_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/008/SRR8363318/SRR8363318_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363319/SRR8363319_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/009/SRR8363319/SRR8363319_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363320/SRR8363320_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/000/SRR8363320/SRR8363320_2.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363321/SRR8363321_1.fastq.gz +ftp.sra.ebi.ac.uk/vol1/fastq/SRR836/001/SRR8363321/SRR8363321_2.fastq.gz From f8aa5a2b9a7b85b88a965250b4d8a4fd5001ae86 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Tue, 21 Jan 2025 13:21:45 +0000 Subject: [PATCH 12/18] Updated test script and config --- .github/workflows/test.yml | 18 ++++++++++++++++-- tests/test_metadata.sh | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e8d7704..af984d4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,9 +13,23 @@ jobs: matrix: include: - series_id: GSE191067 + subset_list: "" - series_id: GSE264508 + subset_list: "" - series_id: GSE274955 - - series_id: GSE250130 + subset_list: "" + - series_id: GSE250130 + subset_list: "" + - series_id: E-MTAB-9221 + subset_list: "" + - series_id: GSE111360 + subset_list: ${{ github.workspace }}/test_data/GSE111360/GSE111360.subset.list + - series_id: GSE117988 + subset_list: "" + - series_id: GSE160513 + subset_list: "" + - series_id: PRJNA511433 + subset_list: "" steps: - name: Checkout repository @@ -30,4 +44,4 @@ jobs: run: | chmod +x ./scripts/* chmod +x ./tests/test_metadata.sh - ./tests/test_metadata.sh ${{ matrix.series_id }} \ No newline at end of file + ./tests/test_metadata.sh ${{ matrix.series_id }} $${{ matrix.subset_list }} \ No newline at end of file diff --git a/tests/test_metadata.sh b/tests/test_metadata.sh index 2231a80..e42327a 100644 --- a/tests/test_metadata.sh +++ b/tests/test_metadata.sh @@ -2,7 +2,7 @@ set -e if [ "$#" -lt 1 ]; then - echo "Usage: $0 " + echo "Usage: $0 " exit 1 fi From 3c56788fdd41ffddc30953395c68310fd204bd29 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Tue, 21 Jan 2025 13:24:21 +0000 Subject: [PATCH 13/18] fixed typo --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index af984d4..6273cf0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -44,4 +44,4 @@ jobs: run: | chmod +x ./scripts/* chmod +x ./tests/test_metadata.sh - ./tests/test_metadata.sh ${{ matrix.series_id }} $${{ matrix.subset_list }} \ No newline at end of file + ./tests/test_metadata.sh ${{ matrix.series_id }} ${{ matrix.subset_list }} \ No newline at end of file From d75e912ac9fe81291043b129168fd726398c574f Mon Sep 17 00:00:00 2001 From: cellgeni Date: Tue, 21 Jan 2025 13:35:56 +0000 Subject: [PATCH 14/18] Updated subset tesitng --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6273cf0..2cae084 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,7 +23,7 @@ jobs: - series_id: E-MTAB-9221 subset_list: "" - series_id: GSE111360 - subset_list: ${{ github.workspace }}/test_data/GSE111360/GSE111360.subset.list + subset_list: test_data/GSE111360/GSE111360.subset.list - series_id: GSE117988 subset_list: "" - series_id: GSE160513 From 629c3e129859eca5a462784e0df0042b067ee6f6 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Tue, 21 Jan 2025 13:37:14 +0000 Subject: [PATCH 15/18] Updated subset tesitng --- tests/test_metadata.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_metadata.sh b/tests/test_metadata.sh index e42327a..1ea64e2 100644 --- a/tests/test_metadata.sh +++ b/tests/test_metadata.sh @@ -13,6 +13,15 @@ OUTPUT_DIR="${GITHUB_WORKSPACE}/output/$SERIES" # Create output directory and copy all scripts mkdir -p $OUTPUT_DIR cp ./scripts/* $OUTPUT_DIR + +# Copy subset file if provided +if [[ $SAMPLE_LIST ]] +then + cp $SAMPLE_LIST $OUTPUT_DIR + SAMPLE_LIST=$(basename $SAMPLE_LIST) +fi + +# Move to the output directory cd $OUTPUT_DIR # Load metadata From 16de250a27eb8796bd44e72a202812bb457abf53 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Tue, 21 Jan 2025 13:49:37 +0000 Subject: [PATCH 16/18] Updated .yml file to give jobs more meaningful names --- .github/workflows/test.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2cae084..44461c8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,22 +14,32 @@ jobs: include: - series_id: GSE191067 subset_list: "" + comment: "No ENA metadata" - series_id: GSE264508 subset_list: "" + comment: ".fastq and .sra files in ENA metadata" - series_id: GSE274955 subset_list: "" + comment: "Broken .sra files. Files in .bam format available" - series_id: GSE250130 subset_list: "" + comment: "No Project or SubProjects in soft_family file" - series_id: E-MTAB-9221 subset_list: "" + comment: "Regular ENA dataset" - series_id: GSE111360 subset_list: test_data/GSE111360/GSE111360.subset.list + comment: "Subset list provided" - series_id: GSE117988 subset_list: "" + comment: "Crap .fastq files, but .sra files are available in ENA metadata" - series_id: GSE160513 subset_list: "" + comment: "Regular GEO dataset" - series_id: PRJNA511433 subset_list: "" + comment: "Regular GEO dataset but using BioProject" + name: "Test ${{ matrix.series_id }}: ${{ matrix.comment }}" steps: - name: Checkout repository From 0af71f3d6f710e7127e969896d4486763202e2e7 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Tue, 21 Jan 2025 16:20:51 +0000 Subject: [PATCH 17/18] Newline charachter adding to the subset list if there is none --- scripts/collect_metadata.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/collect_metadata.sh b/scripts/collect_metadata.sh index 1099eb1..b829710 100755 --- a/scripts/collect_metadata.sh +++ b/scripts/collect_metadata.sh @@ -233,7 +233,10 @@ subset_accessions() { then >&2 echo "Narrowing down the dataset using the file $SUBSET" >&2 echo "New list of the samples to be processed:" - >&2 cat $SUBSET + >&2 cat $SUBSET + ## add newline character to the end of the file if there is none + sed -i -e '$a\' + ## subset the accessions file grep -f $SUBSET $SERIES.sample.list > $SERIES.sample.list.tmp mv $SERIES.sample.list.tmp $SERIES.sample.list grep -f $SUBSET $SERIES.accessions.tsv > $SERIES.accessions.tsv.tmp From 5a40dd289c4a4ef93cc780e2603c394dcadc2f15 Mon Sep 17 00:00:00 2001 From: cellgeni Date: Tue, 21 Jan 2025 16:35:51 +0000 Subject: [PATCH 18/18] Added copying of the subset list to the main dir --- reprocess_public_10x.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/reprocess_public_10x.sh b/reprocess_public_10x.sh index 1183881..b099c07 100755 --- a/reprocess_public_10x.sh +++ b/reprocess_public_10x.sh @@ -40,6 +40,7 @@ if [[ $SUBSET != "" ]] then >&2 echo "WARNING: Using file $SUBSET to only process select samples!" SUBSET=`readlink -f $SUBSET` + cp $SUBSET $SERIES.subset.list if [[ `grep "^GSM" $SUBSET` == "" && `grep "^SRS" $SUBSET` == "" && `grep "^ERS" $SUBSET` == "" ]] then >&2 echo "ERROR: The subset file $SUBSET can only contain GSM, SRS, or ERS IDs!"