Skip to content

Commit d669a3a

Browse files
committed
proper coalesce bamcompare, make tempzip struct global
1 parent e2e975c commit d669a3a

File tree

3 files changed

+54
-55
lines changed

3 files changed

+54
-55
lines changed

src/bamcompare.rs

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::fs::File;
88
use itertools::Itertools;
99
use bigtools::{Value};
1010
use crate::filehandler::{bam_ispaired, write_covfile};
11-
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, region_divider};
11+
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, TempZip, region_divider};
1212
use crate::normalization::scale_factor_bamcompare;
1313
use crate::calc::{median, calc_ratio};
1414
use tempfile::{TempPath};
@@ -68,7 +68,7 @@ pub fn r_bamcompare(
6868
// Set up the bam files in a Vec.
6969
let bamfiles = vec![(bamifile1, ispe1), (bamifile2, ispe2)];
7070

71-
let covcalcs: Vec<ParsedBamFile> = pool.install(|| {
71+
let mut covcalcs: Vec<ParsedBamFile> = pool.install(|| {
7272
bamfiles.par_iter()
7373
.map(|(bamfile, ispe)| {
7474
let (bg, mapped, unmapped, readlen, fraglen) = regionblocks.par_iter()
@@ -102,45 +102,44 @@ pub fn r_bamcompare(
102102
println!("scale factor1 = {}, scale factor2 = {}", sf.0, sf.1);
103103
// Create output stream
104104
let mut chrom = "".to_string();
105-
let lines = covcalcs[0].bg.iter().zip(covcalcs[1].bg.iter()).flat_map(
106-
|(t1, t2)| {
107-
let reader1 = BufReader::new(File::open(t1).unwrap()).lines();
108-
let reader2 = BufReader::new(File::open(t2).unwrap()).lines();
109105

110-
reader1.zip(reader2).map(
111-
|(l1, l2)| {
112-
let l1 = l1.unwrap();
113-
let l2 = l2.unwrap();
114-
let fields1: Vec<&str> = l1.split('\t').collect();
115-
let fields2: Vec<&str> = l2.split('\t').collect();
116-
117-
let chrom1: String = fields1[0].to_string();
118-
let chrom2: String = fields2[0].to_string();
119-
let start1: u32 = fields1[1].parse().unwrap();
120-
let start2: u32 = fields2[1].parse().unwrap();
121-
let end1: u32 = fields1[2].parse().unwrap();
122-
let end2: u32 = fields2[2].parse().unwrap();
123-
124-
// Assert the regions are equal.
125-
assert_eq!(chrom1, chrom2);
126-
assert_eq!(start1, start2);
127-
assert_eq!(end1, end2);
128-
129-
// Calculate the coverage.
130-
let cov1: f32 = fields1[3].parse().unwrap();
131-
let cov2: f32 = fields2[3].parse().unwrap();
132-
let cov = calc_ratio(cov1, cov2, &sf.0, &sf.1, &pseudocount, operation);
133-
134-
(chrom1, Value { start: start1, end: end1, value: cov })
135-
}).coalesce(|p, c| {
136-
if p.1.value == c.1.value {
137-
Ok((p.0, Value {start: p.1.start, end: c.1.end, value: p.1.value}))
138-
} else {
139-
Err((p, c))
140-
}
141-
})
142-
}
143-
);
106+
// Extract both vecs of TempPaths into a single vector
107+
let its = vec![
108+
covcalcs[0].bg.drain(..).collect::<Vec<_>>(),
109+
covcalcs[1].bg.drain(..).collect::<Vec<_>>()
110+
];
111+
let its: Vec<_> = its.iter().map(|x| x.into_iter()).collect();
112+
let zips = TempZip { iterators: its };
113+
let zips_vec: Vec<_> = zips.collect();
114+
115+
let lines = zips_vec
116+
.into_iter()
117+
.flat_map(|c| {
118+
let readers: Vec<_> = c.into_iter().map(|x| BufReader::new(File::open(x).unwrap()).lines()).collect();
119+
let temp_zip = TempZip { iterators: readers };
120+
temp_zip.into_iter().map(|mut _l| {
121+
let lines: Vec<_> = _l
122+
.iter_mut()
123+
.map(|x| x.as_mut().unwrap())
124+
.map(|x| x.split('\t').collect())
125+
.map(|x: Vec<&str>| (x[0].to_string(), x[1].parse::<u32>().unwrap(), x[2].parse::<u32>().unwrap(), x[3].parse::<f32>().unwrap()))
126+
.collect();
127+
assert_eq!(lines.len(), 2);
128+
assert_eq!(lines[0].0, lines[1].0);
129+
assert_eq!(lines[0].1, lines[1].1);
130+
assert_eq!(lines[0].2, lines[1].2);
131+
// Calculate the coverage.
132+
let cov = calc_ratio(lines[0].3, lines[1].3, &sf.0, &sf.1, &pseudocount, operation);
133+
(lines[0].0.clone(), Value { start: lines[0].1, end: lines[0].2, value: cov })
134+
}).coalesce(|p, c| {
135+
if p.1.value == c.1.value && p.0 == c.0 {
136+
Ok((p.0, Value {start: p.1.start, end: c.1.end, value: p.1.value}))
137+
} else {
138+
Err((p, c))
139+
}
140+
})
141+
});
142+
144143
write_covfile(lines, ofile, ofiletype, chromsizes);
145144
Ok(())
146145
}

src/covcalc.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,3 +1476,17 @@ impl Bin {
14761476
}
14771477
}
14781478
}
1479+
1480+
pub struct TempZip<I>
1481+
where I: Iterator {
1482+
pub iterators: Vec<I>
1483+
}
1484+
1485+
impl<I, T> Iterator for TempZip<I>
1486+
where I: Iterator<Item=T> {
1487+
type Item = Vec<T>;
1488+
fn next(&mut self) -> Option<Self::Item> {
1489+
let o: Option<Vec<T>> = self.iterators.iter_mut().map(|x| x.next()).collect();
1490+
o
1491+
}
1492+
}

src/multibamsummary.rs

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use std::borrow::Cow;
1313
use std::collections::HashMap;
1414
use std::path::Path;
1515
use std::sync::{Arc, Mutex};
16-
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, region_divider};
16+
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, TempZip, region_divider};
1717
use crate::filehandler::{bam_ispaired, read_bedfile, read_gtffile, chrombounds_from_bam, is_bed_or_gtf};
1818
use crate::calc::{median, calc_ratio, deseq_scalefactors};
1919
use crate::bamcompare::ParsedBamFile;
@@ -268,18 +268,4 @@ pub fn r_mbams(
268268
println!("Matrix written.");
269269
}
270270
Ok(())
271-
}
272-
273-
struct TempZip<I>
274-
where I: Iterator {
275-
iterators: Vec<I>
276-
}
277-
278-
impl<I, T> Iterator for TempZip<I>
279-
where I: Iterator<Item=T> {
280-
type Item = Vec<T>;
281-
fn next(&mut self) -> Option<Self::Item> {
282-
let o: Option<Vec<T>> = self.iterators.iter_mut().map(|x| x.next()).collect();
283-
o
284-
}
285271
}

0 commit comments

Comments
 (0)