108108
109109# Regex matching the RNAseq sample file naming specification
110110RNASEQ_REG = re .compile (r'.*tumor_rna.[1,2]{1}.fastq.gz' )
111+ # Update from 2021-07-06: Support lanes, indicated with a three digit number
112+ RNASEQ_REG_LANES = re .compile (r'.*tumor_rna_[0-9]{3}.[1,2]{1}.fastq.gz' )
111113
112114# Path to the openBIS properties file
113115PROPERTIES = '/etc/openbis.properties'
@@ -214,7 +216,7 @@ def process(transaction):
214216 for in_file in file_list :
215217 if in_file .endswith ('origlabfilename' ) or in_file .endswith ('sha256sum' ) or 'source_dropbox.txt' in in_file :
216218 continue
217- if RNASEQ_REG .findall (in_file ):
219+ if RNASEQ_REG .findall (in_file ) or RNASEQ_REG_LANES . findall ( in_file ) :
218220 rna_seq_files .append (in_file )
219221 elif 'fastq' in in_file :
220222 if 'normal' in in_file :
@@ -253,8 +255,10 @@ def execute_vcf_registration(vcf_files, transaction):
253255
254256
255257def execute_fastq_registration (fastqs_normal , fastqs_tumor , transaction ):
256- if len (fastqs_tumor ) != 2 or len (fastqs_normal ) != 2 :
258+ if len (fastqs_tumor ) < 2 or len (fastqs_normal ) < 2 :
257259 raise mtbutils .MTBdropboxerror ("Tumor/normal fastq dataset was not complete. Please check." )
260+ elif len (fastqs_tumor ) != len (fastqs_normal ):
261+ raise mtbutils .MTBdropboxerror ("Tumor/normal fastq dataset dont have the same number of files. Are all lanes provided?" )
258262 else :
259263 proc_fastq (fastqs_tumor , transaction )
260264 proc_fastq (fastqs_normal , transaction )
@@ -271,14 +275,19 @@ def get_last_exp_id(experiments):
271275 return exp_ids [- 1 ]
272276
273277
274- def getNextFreeBarcode (projectcode , numberOfBarcodes ):
278+ def getNextFreeBarcode (projectcode , numberOfBarcodes , transaction , space ):
275279 letters = string .ascii_uppercase
276- numberOfBarcodes += 1
277-
278- currentLetter = letters [numberOfBarcodes / 999 ]
279- currentNumber = numberOfBarcodes % 999
280- code = projectcode + str (currentNumber ).zfill (3 ) + currentLetter
281- return code + checksum .checksum (code )
280+ sampleExists = True
281+ newSampleCode = None
282+ while sampleExists :
283+ numberOfBarcodes += 1
284+ currentLetter = letters [numberOfBarcodes / 999 ]
285+ currentNumber = numberOfBarcodes % 999
286+ code = projectcode + str (currentNumber ).zfill (3 ) + currentLetter
287+ newSampleCode = code + checksum .checksum (code )
288+ sampleExists = transaction .getSampleForUpdate (
289+ "/{space}/{sample}" .format (space = space , sample = newSampleCode ))
290+ return newSampleCode
282291
283292
284293def register_rnaseq (rna_seq_files , transaction ):
@@ -296,7 +305,8 @@ def register_rnaseq(rna_seq_files, transaction):
296305 the reason for the failure.
297306 """
298307 print (mtbutils .log_stardate ('Registering incoming MTB RNAseq data {}' .format (rna_seq_files )))
299- assert len (rna_seq_files ) == 2
308+ # Check if dataset files are paired end and complete
309+ assert len (rna_seq_files ) % 2 == 0
300310 file1 = os .path .basename (rna_seq_files [0 ])
301311 file2 = os .path .basename (rna_seq_files [1 ])
302312 assert len (set (QCODE_REG .findall (file1 ))) == 1
@@ -315,7 +325,10 @@ def register_rnaseq(rna_seq_files, transaction):
315325 sc .addSubCriteria (SearchSubCriteria .createExperimentCriteria (pc ))
316326 result = search_service .searchForSamples (sc )
317327 print ("Found {} samples for project {} in space {}." .format (len (result ), project , space ))
318- new_rna_sample_barcode = getNextFreeBarcode (project , numberOfBarcodes = len (result ))
328+ new_rna_sample_barcode = getNextFreeBarcode (project ,
329+ numberOfBarcodes = len (result ),
330+ transaction = transaction ,
331+ space = space )
319332
320333 # Now get the parent sample id (tumor sample, type: BIOLOGICAL_SAMPLE)
321334 tumor_dna_sample = getsample (dna_barcode , transaction )
@@ -448,7 +461,7 @@ def proc_fastq(fastq_file, transaction):
448461 """Register fastq as dataset in openBIS"""
449462
450463 # Check, if there are file pairs present (paired-end data!)
451- if len (fastq_file ) != 2 :
464+ if len (fastq_file ) % 2 != 0 :
452465 raise mtbutils .MTBdropboxerror ('Expecting paired end reads files, found only {}'
453466 .format (len (fastq_file )))
454467 qbiccode_f1 = QCODE_REG .findall (os .path .basename (fastq_file [0 ]))
0 commit comments