diff --git a/Cargo.lock b/Cargo.lock index ce6f0274..9043c325 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1476,29 +1476,6 @@ dependencies = [ "vector_utils 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "vdj_ann" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf2382451427e4e963c910f39d8823ea29d015c5d77193db2aa4278c429a9dc8" -dependencies = [ - "align_tools 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "amino 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "bio_edit", - "debruijn", - "fasta_tools 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "hyperbase 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", - "io_utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "itertools", - "kmer_lookup 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "serde", - "serde_json", - "stats_utils 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "string_utils 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "vdj_types 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "vector_utils 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "vdj_ann_ref" version = "0.2.0" @@ -1512,7 +1489,7 @@ dependencies = [ "pretty_trace 0.5.23 (registry+https://github.com/rust-lang/crates.io-index)", "sha2", "string_utils 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "vdj_ann 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "vdj_ann", "vector_utils 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/vdj_ann/src/annotate.rs b/vdj_ann/src/annotate.rs index 58c1ad33..6cdbce32 100644 --- a/vdj_ann/src/annotate.rs +++ b/vdj_ann/src/annotate.rs @@ -159,6 +159,7 @@ pub fn annotate_seq( allow_weak: bool, allow_improper: bool, abut: bool, + is_gd: Option, // is gamma/delta mode ) { let mut log = Vec::::new(); annotate_seq_core( @@ -170,6 +171,7 @@ pub fn annotate_seq( abut, &mut log, false, + is_gd, ); } @@ -200,6 +202,7 @@ pub fn annotate_seq_core( abut: bool, log: &mut Vec, verbose: bool, + is_gd: Option, // is gamma/delta mode ) { // The DNA string representation is inefficient because it stores bases as packed k-mers // which requires a lot of array bounds checks when unpacking which was a hot path @@ -218,6 +221,8 @@ pub fn annotate_seq_core( const MIN_PERF_EXT: usize = 5; const MAX_RATE: f64 = 0.15; + // Unwrap gamma/delta mode flag + let gd_mode = is_gd.unwrap_or(false); // Find maximal perfect matches of length >= 20, or 12 for J regions, so long // as we have extension to a 20-mer with only one mismatch. @@ -1470,7 +1475,7 @@ pub fn annotate_seq_core( } erase_if(&mut annx, &to_delete); - // For IGH and TRB, if there is a V and J, but no D, look for a D that matches nearly perfectly + // For IGH, TRB and TRD in gd_mode, if there is a V and J, but no D, look for a D that matches nearly perfectly // between them. We consider only alignments having no indels. The following conditions // are required: // 1. At most three mismatches. @@ -1486,7 +1491,8 @@ pub fn annotate_seq_core( let t = ann.2 as usize; if !rheaders[t].contains("segment") { let rt = refdata.rtype[t]; - if rt == 0 || rt == 4 { + // IGH, TRB, or TRD in gd_mode + if rt == 0 || rt == 4 || (gd_mode && rt == 5) { if refdata.segtype[t] == "V" { v = true; vstop = ann.0 + ann.1; @@ -2078,6 +2084,7 @@ pub fn print_annotations( allow_improper: bool, abut: bool, verbose: bool, + is_gd: Option, // is gamma/delta mode ) { let mut ann = Vec::<(i32, i32, i32, i32, i32)>::new(); annotate_seq_core( @@ -2089,6 +2096,7 @@ pub fn print_annotations( abut, log, verbose, + is_gd, ); print_some_annotations(refdata, &ann, log, verbose); } @@ -2717,7 +2725,7 @@ impl ContigAnnotation { is_gd: Option, // is gamma/delta mode ) -> ContigAnnotation { let mut ann = Vec::<(i32, i32, i32, i32, i32)>::new(); - annotate_seq(b, refdata, &mut ann, true, false, true); + annotate_seq(b, refdata, &mut ann, true, false, true, is_gd); let mut log = Vec::::new(); let productive = is_valid(b, refdata, &ann, false, &mut log, is_gd); ContigAnnotation::from_annotate_seq( diff --git a/vdj_ann_ref/Cargo.toml b/vdj_ann_ref/Cargo.toml index 525a90ae..8dce3608 100644 --- a/vdj_ann_ref/Cargo.toml +++ b/vdj_ann_ref/Cargo.toml @@ -21,4 +21,4 @@ pretty_trace = "0.5" sha2 = ">=0.9.3, <0.11" string_utils = "0.1" vector_utils = "0.1" -vdj_ann = "0.4" +vdj_ann = { path = "../vdj_ann" } diff --git a/vdj_ann_ref/src/lib.rs b/vdj_ann_ref/src/lib.rs index 20ac6be9..1fde3a98 100644 --- a/vdj_ann_ref/src/lib.rs +++ b/vdj_ann_ref/src/lib.rs @@ -122,7 +122,7 @@ mod tests { let mut refdata = RefData::new(); make_vdj_ref_data_core(&mut refdata, refx, &ext_refx, is_tcr, is_bcr, None); let mut ann = Vec::<(i32, i32, i32, i32, i32)>::new(); - annotate_seq(&seq, &refdata, &mut ann, true, false, true); + annotate_seq(&seq, &refdata, &mut ann, true, false, true, None); let mut have_d = false; for i in 0..ann.len() { if refdata.is_d(ann[i].2 as usize) {