Skip to content
This repository has been archived by the owner on Feb 23, 2024. It is now read-only.

added missing GD features #98

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 1 addition & 24 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 11 additions & 3 deletions vdj_ann/src/annotate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ pub fn annotate_seq(
allow_weak: bool,
allow_improper: bool,
abut: bool,
is_gd: Option<bool>, // is gamma/delta mode
) {
let mut log = Vec::<u8>::new();
annotate_seq_core(
Expand All @@ -170,6 +171,7 @@ pub fn annotate_seq(
abut,
&mut log,
false,
is_gd,
);
}

Expand Down Expand Up @@ -200,6 +202,7 @@ pub fn annotate_seq_core(
abut: bool,
log: &mut Vec<u8>,
verbose: bool,
is_gd: Option<bool>, // is gamma/delta mode
) {
// The DNA string representation is inefficient because it stores bases as packed k-mers
// which requires a lot of array bounds checks when unpacking which was a hot path
Expand All @@ -218,6 +221,8 @@ pub fn annotate_seq_core(
const MIN_PERF_EXT: usize = 5;
const MAX_RATE: f64 = 0.15;

// Unwrap gamma/delta mode flag
let gd_mode = is_gd.unwrap_or(false);
// Find maximal perfect matches of length >= 20, or 12 for J regions, so long
// as we have extension to a 20-mer with only one mismatch.

Expand Down Expand Up @@ -1470,7 +1475,7 @@ pub fn annotate_seq_core(
}
erase_if(&mut annx, &to_delete);

// For IGH and TRB, if there is a V and J, but no D, look for a D that matches nearly perfectly
// For IGH, TRB and TRD in gd_mode, if there is a V and J, but no D, look for a D that matches nearly perfectly
// between them. We consider only alignments having no indels. The following conditions
// are required:
// 1. At most three mismatches.
Expand All @@ -1486,7 +1491,8 @@ pub fn annotate_seq_core(
let t = ann.2 as usize;
if !rheaders[t].contains("segment") {
let rt = refdata.rtype[t];
if rt == 0 || rt == 4 {
// IGH, TRB, or TRD in gd_mode
if rt == 0 || rt == 4 || (gd_mode && rt == 5) {
if refdata.segtype[t] == "V" {
v = true;
vstop = ann.0 + ann.1;
Expand Down Expand Up @@ -2078,6 +2084,7 @@ pub fn print_annotations(
allow_improper: bool,
abut: bool,
verbose: bool,
is_gd: Option<bool>, // is gamma/delta mode
) {
let mut ann = Vec::<(i32, i32, i32, i32, i32)>::new();
annotate_seq_core(
Expand All @@ -2089,6 +2096,7 @@ pub fn print_annotations(
abut,
log,
verbose,
is_gd,
);
print_some_annotations(refdata, &ann, log, verbose);
}
Expand Down Expand Up @@ -2717,7 +2725,7 @@ impl ContigAnnotation {
is_gd: Option<bool>, // is gamma/delta mode
) -> ContigAnnotation {
let mut ann = Vec::<(i32, i32, i32, i32, i32)>::new();
annotate_seq(b, refdata, &mut ann, true, false, true);
annotate_seq(b, refdata, &mut ann, true, false, true, is_gd);
let mut log = Vec::<u8>::new();
let productive = is_valid(b, refdata, &ann, false, &mut log, is_gd);
ContigAnnotation::from_annotate_seq(
Expand Down
2 changes: 1 addition & 1 deletion vdj_ann_ref/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ pretty_trace = "0.5"
sha2 = ">=0.9.3, <0.11"
string_utils = "0.1"
vector_utils = "0.1"
vdj_ann = "0.4"
vdj_ann = { path = "../vdj_ann" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we want to do this. @DavidBJaffe can comment if this is okay.

2 changes: 1 addition & 1 deletion vdj_ann_ref/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ mod tests {
let mut refdata = RefData::new();
make_vdj_ref_data_core(&mut refdata, refx, &ext_refx, is_tcr, is_bcr, None);
let mut ann = Vec::<(i32, i32, i32, i32, i32)>::new();
annotate_seq(&seq, &refdata, &mut ann, true, false, true);
annotate_seq(&seq, &refdata, &mut ann, true, false, true, None);
let mut have_d = false;
for i in 0..ann.len() {
if refdata.is_d(ann[i].2 as usize) {
Expand Down