Skip to content

Commit

Permalink
Includes mouse searches and several minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
niclas-thomas committed Aug 21, 2014
1 parent 0fac88e commit 6a52932
Show file tree
Hide file tree
Showing 26 changed files with 2,097 additions and 0 deletions.
996 changes: 996 additions & 0 deletions DecombinatorFunctionsV2_2.py

Large diffs are not rendered by default.

86 changes: 86 additions & 0 deletions DecombinatorV2_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
## Executes functions from functionsv2.x
## according to arguments parsed below

import sys, argparse, os
import DecombinatorFunctionsV2_2 as f
import Plotting as p
import ShortReadDecombinator as ShortReads
import warnings

warnings.filterwarnings("ignore") ## Supress warning from Biopython stated when translating of sequences where n mod 3 != 0.

parser = argparse.ArgumentParser(description='Decombinator v2.2')
parser.add_argument('-i','--input', help='Enter the path to the input fastq file you wish to analyse', required=True)
parser.add_argument('-o','--output', help='Enter the name you wish to call the output results files', required=True)
parser.add_argument('-rev','--reversecomplement', help='Enter True or False for whether a search on reverse complement of sequences is also desired', required=False, default=True, type=bool)
parser.add_argument('-b','--barcode', help='Enter True or False for whether sequences contain barcodes', required=False, default=False, type=bool)
parser.add_argument('-bs1','--barcodebegin1', help='Enter integer defining the start of the first barcode region', required=False, type=int )
parser.add_argument('-be1','--barcodefinish1', help='Enter integer defining the end of the first barcode region', required=False, type=int )
parser.add_argument('-bs2','--barcodebegin2', help='Enter integer defining the start of the second barcode region', required=False, type=int )
parser.add_argument('-be2','--barcodefinish2', help='Enter integer defining the end of the second barcode region', required=False,type=int )
parser.add_argument('-p','--withplots', help='Enter True or False', required=False, default=False, type=bool)
parser.add_argument('-sh','--shortreads', help='Enter True or False', required=False, default=False, type=bool)
parser.add_argument('-c','--count', help='Enter True or False', required=False, default=False, type=bool)
parser.add_argument('-of','--outofframe', help='Enter True or False', required=False, default=False, type=bool)
parser.add_argument('-f','--fullsequence', help='Enter True or False', required=False, default=False, type=bool)
parser.add_argument('-ch','--chaintype', help='Enter alpha, beta, gamma, delta or all', required=False, default='all')
parser.add_argument('-s','--speciestype', help='Enter human or mouse', required=False, default='human')
args = vars(parser.parse_args())

inputfile = args['input']
outputfile = args['output']
revsearch = args['reversecomplement']
barcoding = args['barcode']
barcodestart1 = args['barcodebegin1']
barcodeend1 = args['barcodefinish1']
barcodestart2 = args['barcodebegin2']
barcodeend2 = args['barcodefinish2']
include_plots = args['withplots']
forshortreads = args['shortreads']
withcount = args['count']
outframe = args['outofframe']
fullseq = args['fullsequence']
chain = args['chaintype']
species = args['speciestype']

newpath = f.create_folder(outputfile)

if forshortreads == True:
vfile = os.getcwd()+'/human_TRBV_region.fasta'
jfile = os.getcwd()+'/human_TRBJ_region.fasta'
v_key, j_key, v_regions, j_regions = ShortReads.setup(vfile,jfile)
#infile = str(inputfile)
fileid = str(outputfile)
param_set = [10, 2, 1400, 1.05]
ShortReads.analyse_file( inputfile, newpath, fileid, v_key, j_key, v_regions, j_regions, param_set)
else:
f.analysis( inputfile, outputfile, with_reverse_complement_search=revsearch, barcode=barcoding, barcodestart1=barcodestart1, barcodeend1=barcodeend1, barcodestart2=barcodestart2, barcodeend2=barcodeend2, newpath=newpath, omitN=True, chain=chain, species=species)

chains = ['alpha','beta','delta','gamma']
for chain in chains:
if os.stat(newpath+outputfile+'_'+chain+'.txt').st_size != 0: # if the file is non-empty, i.e. if TcRchain seqs were found...
print 'Getting distinct clones for TcR'+chain
f.get_distinct_clones( open(newpath+outputfile+'_'+chain+'.txt', "rU"), handle_results=newpath+str('distinct_clones')+'_'+chain,with_count=withcount )
print 'Translating sequences for TcR'+chain
f.get_translated_sequences( open(newpath+outputfile+'_'+chain+'.txt', "rU"), handle_results=newpath+str('translated_sequences')+'_'+chain, chain=str(chain), species=species, with_outframe=outframe, fullaaseq=fullseq )

if include_plots==True:
print 'Plotting the results of the analysis'
if forshortreads == True:
if os.stat(newpath+outputfile+'_beta'+'.txt').st_size != 0: # if the file is non-empty, i.e. if TcRchain seqs were found...
p.plot_v_usage( open(newpath+outputfile+'_beta'+'.txt', "rU"), chain='beta', species=species, savefilename = newpath+'Vusage')
p.plot_j_usage( open(newpath+outputfile+'_beta'+'.txt', "rU"), chain='beta', species=species, savefilename = newpath+'Jusage')
p.plot_del_v( open(newpath+outputfile+'_beta'+'.txt', "rU"), savefilename = newpath+'Vdels')
p.plot_del_j( open(newpath+outputfile+'_beta'+'.txt', "rU"), savefilename = newpath+'Jdels')
p.plot_vj_joint_dist( open(newpath+outputfile+'_beta'+'.txt', "rU"), chain=str(chain), species=species, savefilename = newpath+'VJusage')
p.plot_insert_lengths( open(newpath+outputfile+'_beta'+'.txt', "rU"), savefilename = newpath+'InsertLengths')
else:
chains = ['alpha','beta','delta','gamma']
for chain in chains:
if os.stat(newpath+outputfile+'_'+chain+'.txt').st_size != 0: # if the file is non-empty, i.e. if TcRchain seqs were found...
p.plot_v_usage( open(newpath+outputfile+'_'+chain+'.txt', "rU"), chain=chain, species=species, savefilename = newpath+'Vusage'+chain )
p.plot_j_usage( open(newpath+outputfile+'_'+chain+'.txt', "rU"), chain=chain, species=species, savefilename = newpath+'Jusage'+chain )
p.plot_del_v( open(newpath+outputfile+'_'+chain+'.txt', "rU"), savefilename = newpath+'Vdels'+chain)
p.plot_del_j( open(newpath+outputfile+'_'+chain+'.txt', "rU"), savefilename = newpath+'Jdels'+chain)
p.plot_vj_joint_dist( open(newpath+outputfile+'_'+chain+'.txt', "rU"), chain=str(chain), species=species, savefilename = newpath+'VJusage'+chain)
p.plot_insert_lengths( open(newpath+outputfile+'_'+chain+'.txt', "rU"), savefilename = newpath+'InsertLengths'+chain)
50 changes: 50 additions & 0 deletions humantags_traj.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
CAAACTCACCTTTGGGACAG 20 M94081|TRAJ10*01|
CTCACCTTTGGGAAGGGGAC 20 M94081|TRAJ11*01|
TTGATCTTCGGGAGTGGGAC 20 X02885|TRAJ12*01|
AAAGTTACCTTTGGAATTGG 20 M94081|TRAJ13*01|
TGGGAGTGGGACAAGATTAT 20 M94081|TRAJ14*01|
CTGATCTTTGGGAAGGGAAC 20 X05775|TRAJ15*01|
CTGCTCTTTGCAAGGGGAAC 20 M94081|TRAJ16*01|
AAGCTAACTTTTGGAGGAGG 20 X05773|TRAJ17*01|
GGGAGGCTATACTTTGGAAG 20 M94081|TRAJ18*01|
AGCTTTGGAGCCGGAACCAC 20 M94081|TRAJ20*01|
CTTTGGATCTGGGACCAAAC 20 M94081|TRAJ21*01|
CAACTGACCTTTGGATCTGG 20 X02886|TRAJ22*01|
AAGCTTATCTTCGGACAGGG 20 M94081|TRAJ23*01|
AAATTCGAGTTTGGAGCAGG 20 X02887|TRAJ24*01|
TTTGTCTTTGGTCCCGGAAC 20 M94081|TRAJ26*01|
CAACCTTTGGGGATGGGACT 20 M94081|TRAJ27*01|
TACCAACTCACTTTCGGGAA 20 M94081|TRAJ28*01|
CTTGTCTTTGGAAAGGGCAC 20 M94081|TRAJ29*01|
AGATAATCTTTGGATCAGGG 20 X02884|TRAJ3*01|
ATCTTTGGAAAAGGGACACG 20 M94081|TRAJ30*01|
TGTTTGGAGATGGAACTCAG 20 M14905|TRAJ31*01|
AACAAGCTCATCTTTGGAAC 20 M94081|TRAJ32*01|
ATCTGGGGCGCTGGGACCAA 20 M94081|TRAJ33*01|
CATCTTTGGGACTGGGACCA 20 M35622|TRAJ34*01|
TCTTCTTTGGGACTGGAACG 20 M94081|TRAJ36*01|
AACTAATCTTTGGGCAAGGG 20 M94081|TRAJ37*01|
AGCTGATTTGGGGATTGGGA 20 M94081|TRAJ38*01|
ATGCTCACCTTTGGAGGGGG 20 M94081|TRAJ39*01|
AAGCTGATTTTTGGAGCAGG 20 M94081|TRAJ4*01|
ATACATCTTTGGAACAGGCA 20 M35620|TRAJ40*01|
CACTCAACTTCGGCAAAGGC 20 M94081|TRAJ41*01|
GGAAATCTCATCTTTGGAAA 20 M94081|TRAJ42*01|
TTTGGAGCAGGGACCAGACT 20 M94081|TRAJ43*01|
AAACTCACCTTTGGGACTGG 20 M35619|TRAJ44*01|
GACGGACTCACCTTTGGCAA 20 M94081|TRAJ45*01|
AAGCTGACTTTTGGGACCGG 20 M94081|TRAJ46*01|
GTCTTTGGCGCAGGAACCAT 20 M94081|TRAJ47*01|
AAATTAACCTTTGGGACTGG 20 M94081|TRAJ48*01|
ATTTTGGGACAGGGACAAGT 20 M94081|TRAJ49*01|
CTTACTTTTGGGAGTGGAAC 20 M94081|TRAJ5*01|
GTGATATTTGGGCCAGGGAC 20 M94081|TRAJ50*01|
TATGGAAAGCTGACATTTGG 20 M94081|TRAJ52*01|
TATAAACTGACATTTGGAAA 20 M94081|TRAJ53*01|
CTGGTATTTGGCCAAGGAAC 20 M94081|TRAJ54*01|
AGCTGACATTTGGAAAAGGA 20 M94081|TRAJ56*01|
AAGCTGGTCTTTGGAAAGGG 20 M94081|TRAJ57*01|
TACCTACATTTGGAAGAGGA 20 M16747|TRAJ6*01|
TCGCTTTTGGGAAGGGGAAC 20 M94081|TRAJ7*01|
CTTGTATTTGGAACTGGCAC 20 M94081|TRAJ8*01|
AACTATCTTTGGAGCAGGAA 20 M94081|TRAJ9*01|
47 changes: 47 additions & 0 deletions humantags_trav.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
GAAAGACTCTGCCTCTTACT 36 AE000658|TRAV1-1*01|
GAAAGACTCTGCCTCTTACC 36 AE000658|TRAV1-2*01|
CAGCTCAGCGATTCAGCCTC 40 AE000659|TRAV10*01|
AAGCTCAGTGATTCAGCCAC 40 AE000659|TRAV12-1*01|
CCAGCCCAGTGATTCAGCCA 41 AE000659|TRAV12-2*01|
ACAGCCCAGTGATTCAGCCA 41 AE000659|TRAV12-3*01|
CAACCTGAAGACTCGGCTGT 40 AE000659|TRAV13-1*01|
CAACCTGGAGACTCAGCTGT 40 AE000659|TRAV13-2*01|
TGGGGGACTCAGCAATGTAC 40 M21626|TRAV14,DV4*01|
AAGAGGAAGACTCAGCCATG 40 AE000659|TRAV16*01|
CGGGCAGCAGACACTGCTTC 40 AE000660|TRAV17*01|
AGCTGTCGGACTCTGCCGTG 40 AE000660|TRAV18*01|
TCGTGGACTCAGCAGTATAC 40 AE000660|TRAV19*01|
GGGAGGCAGATGCTGCTGTT 40 AE000658|TRAV2*01|
AAACCTGAAGACTCAGCCAC 40 AE000660|TRAV20*01|
TCAGCCTGGTGACTCAGCCA 40 AE000660|TRAV21*01|
CAGACCACAGACTCAGGCGT 40 AE000660|TRAV22*01|
CAGCCTGGAGACTCAGCCAC 40 AE000660|TRAV23,DV6*01|
TCCCAGCCTGAAGACTCAGC 40 AE000660|TRAV24*01|
CACCCAGACTACAGATGTAG 40 AE000660|TRAV25*01|
CTGAGAGACACTGCTGTGTA 40 AE000660|TRAV26-1*01|
CTTGAGAGATGCTGCTGTGT 40 AE000660|TRAV26-2*01|
GCCCAGCCTGGTGATACAGG 40 AE000660|TRAV27*01|
CAGCCTGGAGACTCTGCAGT 40 AE000660|TRAV29,DV5*01|
GTGAGCGACTCCGCTTTGTA 40 AE000658|TRAV3*01|
CAGCTCAGTTACTCAGGAAC 40 AE000660|TRAV30*01|
CAGCCCAGCCATGCAGGCAT 40 AE000660|TRAV34*01|
CATACCTAGTGATGTAGGCA 40 AE000660|TRAV35*01|
CAGACCGGAGACTCGGCCAT 40 AE000660|TRAV36,DV7*01|
TGGGGGACACTGCGATGTAT 40 AE000661|TRAV38-1*01|
CTGGGGGATGCCGCGATGTA 40 AE000661|TRAV38-2,DV8*01|
GTGCATGACCTCTCTGCCAC 40 AE000661|TRAV39*01|
CTGAGCGACACTGCTGTGTA 40 AE000658|TRAV4*01|
AGGTATCAGACTCAGCCGTG 40 X73521|TRAV40*01|
CCATCCCAGAGACTCTGCCG 40 AE000661|TRAV41*01|
CAGACTGGGGACTCAGCTAT 40 AE000659|TRAV5*01|
CAGCCTGCAGACTCAGCTAC 40 AE000659|TRAV6*01|
CAGCCTGAAGATTCAGCCAC 40 AE000659|TRAV7*01|
AGTGGAGTGACACAGCTGAG 40 AE000659|TRAV8-1*01|
ATATGAGCGACGCGGCTGAG 40 AE000659|TRAV8-2*01,TRAV8-4*01|
ATTGGAGTGATGCTGCTGAG 40 AE000659|TRAV8-3*01|
ATATAAGCGACACGGCTGAG 40 X02850|TRAV8-6*01|
AAGAGTCAGACTCCGCTGTG 40 AE000659|TRAV9-1*01|
AAGTGTCAGACTCAGCGGTG 40 AE000659|TRAV9-2*01|
TAGAAGATTCAGCAAAGTAC 40 M22198|TRDV1*01|
AGAGAGAGATGAAGGGTCTT 40 X15207|TRDV2*01|
TAAGGACTGAAGACAGTGCC 40 M23326|TRDV3*01|
13 changes: 13 additions & 0 deletions humantags_trbj.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
TCTTTGGACAAG 15 K02545|TRBJ1-1*01|Homo
CCTTCGGTTCGG 15 K02545|TRBJ1-2*01|Homo
ATATTTTGGAGA 15 M14158|TRBJ1-3*01|Homo
TGTTTTTTGGCA 15 M14158|TRBJ1-4*01|Homo
GCATTTTGGTGA 15 M14158|TRBJ1-5*01|Homo
CCTCCACTTTGG 15 M14158|TRBJ1-6*01|Homo
GTTCTTCGGGCC 15 X02987|TRBJ2-1*01|Homo
GCTGTTTTTTGGAG 13 X02987|TRBJ2-2*01|Homo
TATTTTGGCCCA 15 X02987|TRBJ2-3*01|Homo
GTACTTCGGCGC 15 X02987|TRBJ2-4*01|Homo
ACTTCGGGCCAG 15 X02987|TRBJ2-5*01|Homo
CCTGACTTTCGG 15 X02987|TRBJ2-6*01|Homo
GTACTTCGGGCCGGG 12 M14159|TRBJ2-7*01|Homo
45 changes: 45 additions & 0 deletions humantags_trbv.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
CCCAGACATCTGTATATTTC 37 U66059|TRBV10-1*01|Homo
CCCAGACATCTGTGTATTTC 37 U66059|TRBV10-2*01|Homo
AGCTCCCAGACATCTGTGTA 41 U03115|TRBV10-3*01|Homo
TTGGGGACTCGGCCATGTAT 40 M33233|TRBV11-1*01|Homo
TTGAGGACTCGGCCGTGTAT 40 U66059|TRBV11-2*01|Homo
TTGGGGACTCGGCCGTGTAT 40 U03115|TRBV11-3*01|Homo
CCAGGGACTCAGCTGTGTAC 40 K02546|TRBV12-4*01|Homo
CCAGGGACTCAGCTGTGTAT 40 X07223|TRBV12-5*01|Homo
GGGACTCAGCCCTGTACTTC 37 U03115|TRBV13*01|Homo
TGGAGGATTCTGGAGTTTAT 40 X06154|TRBV14*01|Homo
GGGACACAGCCATGTACCTG 37 U03115|TRBV15*01|Homo
TTGAGGATTCAGCAGTGTAT 40 L26231|TRBV16*01|Homo
GAGGAGATTCGGCAGCTTAT 40 L36092|TRBV18*01|Homo
ACCCGACAGCTTTCTATCTC 37 L36092|TRBV19*01|Homo
TGGAGGACTCAGCCATGTAC 40 U66059|TRBV2*01|Homo
CCCATCCTGAAGACAGCAGC 43 M11955|TRBV20-1*01|Homo
ACCAGACAGCTCTTTACTTC 38 M11951|TRBV24-1*01|Homo
CACATACCTCTCAGTACCTC 37 L36092|TRBV25-1*01|Homo
ACCAGACCTCTCTGTACTTC 37 L36092|TRBV27*01|Homo
ACCAGACATCTATGTACCTC 37 U08314|TRBV28*01|Homo
GCCCTGAAGACAGCAGCATA 40 L36092|TRBV29-1*01|Homo
ACTCTGCTGTGTATTTCTGT 34 U07977|TRBV3-1*01|Homo
GTGACTCTGGCTTCTATCTC 34 U66061|TRBV30*01|Homo
ACTCAGCCCTGTATCTCTGC 34 U07977|TRBV4-1*01|Homo
AGAAGACTCGGCCCTGTATCTCTGT 39 U07975|TRBV4-2*01|Homo
ACTCGGCCCTGTATCTCTGC 34 U07978|TRBV4-3*01|Homo
GGGACTCGGCCCTTTATCTT 36 U66059|TRBV5-1*01|Homo
ACGACTCGGCCCTGTATCTC 36 L36092|TRBV5-4*01|Homo
GGGACTCGGCCCTGTATCTC 36 L36092|TRBV5-5*01|Homo
GGGACTCGGCCCTCTATCTC 36 L36092|TRBV5-6*01|Homo
AGGACTCGGCCCTGTATCTC 36 L36092|TRBV5-8*01|Homo
AGTCGGCTGCTCCCTCCCAG 52 X61446|TRBV6-1*01|Homo
CTCAGACATCTGTGTACTTC 37 X61653|TRBV6-4*01|Homo
CTGTCGGCTGCTCCCTCCCA 53 U66059|TRBV6-5*01|Homo
AGTTGGCTGCTCCCTCCCAG 52 L36092|TRBV6-6*01|Homo
AGACATCTGTGTACTTGTGT 34 L36092|TRBV6-8*01|Homo
CCCAGACATCTGTATACTTC 37 X61447|TRBV6-9*01|Homo
AGGAGGACTCGGCCGTGTAT 40 X61442|TRBV7-2*01|Homo
GGGGGGACTCAGCCGTGTAT 40 X61440|TRBV7-3*01|Homo
AGGGGGACTCAGCTGTGTAT 40 L36092|TRBV7-4*01|Homo
AGCGGGACTCGGCCATGTAT 40 L36092|TRBV7-6*01|Homo
AGCGGGACTCAGCCATGTAT 40 L36092|TRBV7-7*01|Homo
AGGAGGACTCCGCCGTGTAT 40 M11953|TRBV7-8*01|Homo
AGGGGGACTCGGCCATGTAT 40 L36092|TRBV7-9*01|Homo
GGGACTCAGCTTTGTATTTC 36 U66059|TRBV9*01|Homo
4 changes: 4 additions & 0 deletions humantags_trdj.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
TCTTTGGAAAAG 15 M20289|TRDJ1*01|Homo
TCTTCTTTGGAA 15 L36386|TRDJ2*01|Homo
ACAGATGTTTTT 15 M21508|TRDJ3*01|Homo
TTGGCAAAGGAA 15 AJ249814|TRDJ4*01|Homo
3 changes: 3 additions & 0 deletions humantags_trdv.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TCAGCAAAGTACTTTTGTGC 32 M22198|TRDV1*01|Homo
GATGAAGGGTCTTACTACTG 33 X15207|TRDV2*01|Homo
ACTGAAGACAGTGCCACTTA 35 M23326|TRDV3*01|Homo
5 changes: 5 additions & 0 deletions humantags_trgj.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
GGAACAACACTG 28 M12960|TRGJ1*01|Homo
GGAACAACACTT 28 M12961|TRGJ2*01|Homo
CAAAAAAATCAA 15 M12950|TRGJP*01|Homo
TCAAGATATTTG 15 X08084|TRGJP1*01|Homo
TCAAGACGTTTG 15 M16016|TRGJP2*01|Homo
6 changes: 6 additions & 0 deletions humantags_trgv.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
ATTGAAAATGACTCTGGGGT 45 M13429|TRGV2*01|Homo
AAAATCTAATTGAAAATGAT 53 M13430|TRGV3*01|Homo
ATTGAAAATGACTCTGGAGT 45 X15272|TRGV4*01|Homo
GAAATCTAATTGAAAATGAT 53 X13355|TRGV5*01|Homo
ATTGAACGTGACTCTGGGGT 45 M13434|TRGV8*01|Homo
AATGTAGAGAAACAGGACAT 51 X07205|TRGV9*01|Homo
Loading

0 comments on commit 6a52932

Please sign in to comment.