Skip to content

Commit 891e582

Browse files
committed
Include intersection of all chromosomes over all bam files, not crash if they are not all matching.
1 parent 31713e6 commit 891e582

5 files changed

+48
-16
lines changed

src/alignmentsieve.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub fn r_alignmentsieve(
4747
// shift is of length 0, 2, or 4.
4848

4949
// Define regions
50-
let (regions, chromsizes) = parse_regions(&Vec::new(), bamifile);
50+
let (regions, chromsizes) = parse_regions(&Vec::new(), vec![bamifile]);
5151

5252
let filters = Alignmentfilters{
5353
minmappingquality: min_mapping_quality,

src/bamcompare.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ pub fn r_bamcompare(
6060
};
6161

6262
// Parse regions & calculate coverage. Note that
63-
let (regions, chromsizes) = parse_regions(&regions, bamifile1);
63+
let (regions, chromsizes) = parse_regions(&regions, vec![bamifile1, bamifile2]);
6464
let regionblocks = region_divider(&regions);
6565

6666
let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap();

src/bamcoverage.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ pub fn r_bamcoverage(
7070
}
7171

7272
// Parse regions & calculate coverage
73-
let (regions, chromsizes) = parse_regions(&regions, bamifile);
73+
let (regions, chromsizes) = parse_regions(&regions, vec![bamifile]);
7474
let regionblocks = region_divider(&regions);
7575
let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap();
7676
let (bg, mapped, _unmapped, readlen, fraglen) = pool.install(|| {

src/covcalc.rs

+40-10
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,39 @@ use std::cmp::min;
66
use std::fmt;
77
use ndarray::Array1;
88

9-
pub fn parse_regions(regions: &Vec<(String, u32, u32)>, bam_ifile: &str) -> (Vec<Region>, HashMap<String, u32>) {
9+
pub fn parse_regions(regions: &Vec<(String, u32, u32)>, bam_ifile: Vec<&str>) -> (Vec<Region>, HashMap<String, u32>) {
1010
// Takes a vector of regions, and a bam reference
1111
// returns a vector of regions, with all chromosomes and full lengths if original regions was empty
1212
// Else it validates the regions against the information from the bam header
1313
// Finally, a Vec with chromsizes is returned as well.
14-
15-
let bam = IndexedReader::from_path(bam_ifile).unwrap();
14+
let mut found_chroms: HashMap<String, usize> = HashMap::new();
15+
for bam in bam_ifile.iter() {
16+
let bam = IndexedReader::from_path(bam).unwrap();
17+
let chroms: Vec<String> = bam.header().target_names().iter().map(|x| String::from_utf8(x.to_vec()).unwrap()).collect();
18+
for chrom in chroms.iter() {
19+
// if it's not in the hashmap, add it, else increment count
20+
if !found_chroms.contains_key(chrom) {
21+
found_chroms.insert(chrom.clone(), 1);
22+
} else {
23+
let count = found_chroms.get_mut(chrom).unwrap();
24+
*count += 1;
25+
}
26+
}
27+
}
28+
let mut validchroms: Vec<String> = Vec::new();
29+
// loop over all chroms in the hashmap, if the count is expected, include them
30+
for (chrom, count) in found_chroms.iter() {
31+
if *count == bam_ifile.len() {
32+
validchroms.push(chrom.clone());
33+
} else {
34+
println!("Chromosome {} is missing in at least one bam file, and thus ignored!", chrom);
35+
}
36+
}
37+
// Crash if validchroms is empty.
38+
assert!(!validchroms.is_empty(), "No chromosomes found that are present in all bam files. Did you mix references ?");
39+
println!("Valid chromosomes are: {:?}", validchroms);
40+
// Read header from first bam file
41+
let bam = IndexedReader::from_path(bam_ifile[0]).unwrap();
1642
let header = bam.header().clone();
1743
let mut chromregions: Vec<Region> = Vec::new();
1844
let mut chromsizes = HashMap::new();
@@ -23,6 +49,10 @@ pub fn parse_regions(regions: &Vec<(String, u32, u32)>, bam_ifile: &str) -> (Vec
2349
.expect("Invalid UTF-8 in chromosome name");
2450
let chromlen = header.target_len(tid)
2551
.expect("Error retrieving length for chromosome");
52+
// If chromname is not in validchroms, skip it.
53+
if !validchroms.contains(&chromname) {
54+
continue;
55+
}
2656
let _reg = Region {
2757
chrom: chromname.clone(),
2858
start: Revalue::U(0),
@@ -42,18 +72,18 @@ pub fn parse_regions(regions: &Vec<(String, u32, u32)>, bam_ifile: &str) -> (Vec
4272
.expect("Invalid UTF-8 in chromosome name");
4373
let chromlen = header.target_len(tid)
4474
.expect("Error retrieving length for chromosome");
45-
chromsizes.insert(chromname, chromlen as u32);
75+
if validchroms.contains(&chromname) {
76+
chromsizes.insert(chromname, chromlen as u32);
77+
}
4678
}
47-
let validchroms: Vec<String> = header
48-
.target_names()
49-
.iter()
50-
.map(|x| String::from_utf8(x.to_vec()).unwrap())
51-
.collect();
5279

5380
for region in regions {
5481
let chromname = &region.0;
5582
assert!(region.1 < region.2, "Region start must be strictly less than region end.");
56-
assert!(validchroms.contains(chromname), "Chromosome {} not found in bam header", chromname);
83+
// Check if chromname is in validchroms
84+
if !validchroms.contains(chromname) {
85+
continue;
86+
}
5787
let _reg = Region {
5888
chrom: chromname.clone(),
5989
start: Revalue::U(region.1),

src/multibamsummary.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ pub fn r_mbams(
3232
// optional parameters
3333
labels: Py<PyList>,
3434
mut binsize: u32,
35-
distance_between_bins: u32,
35+
mut distance_between_bins: u32,
3636
nproc: usize,
3737
bed_file: Py<PyList>,
3838
sup_regions: Vec<(String, u32, u32)>,
@@ -101,6 +101,8 @@ pub fn r_mbams(
101101
let chromsizes = chrombounds_from_bam(bamfiles.get(0).unwrap());
102102

103103
binsize = 1;
104+
distance_between_bins = 0;
105+
104106
// Parse regions from bed files. Note that we retain the name of the bed file (in case there are more then 1)
105107
// Additionaly, score and strand are also retained, if it's a 3-column bed file we just fill in '.'
106108
let mut regionsizes: HashMap<String, u32> = HashMap::new();
@@ -120,9 +122,9 @@ pub fn r_mbams(
120122
});
121123
} else {
122124
if verbose {
123-
println!("BINS mode. with binsize: {}", binsize);
125+
println!("BINS mode. with binsize: {}, distance between bins: {}", binsize, distance_between_bins);
124126
}
125-
let (parsedregions, chromsizes) = parse_regions(&sup_regions, bamfiles.get(0).unwrap());
127+
let (parsedregions, chromsizes) = parse_regions(&sup_regions, bamfiles.iter().map(|x| x.as_str()).collect());
126128
regions = parsedregions;
127129
}
128130

0 commit comments

Comments
 (0)