Skip to content

Commit

Permalink
proper coalesce bamcompare, make tempzip struct global
Browse files Browse the repository at this point in the history
  • Loading branch information
WardDeb committed Jan 31, 2025
1 parent e2e975c commit d669a3a
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 55 deletions.
79 changes: 39 additions & 40 deletions src/bamcompare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::fs::File;
use itertools::Itertools;
use bigtools::{Value};
use crate::filehandler::{bam_ispaired, write_covfile};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, region_divider};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, TempZip, region_divider};
use crate::normalization::scale_factor_bamcompare;
use crate::calc::{median, calc_ratio};
use tempfile::{TempPath};
Expand Down Expand Up @@ -68,7 +68,7 @@ pub fn r_bamcompare(
// Set up the bam files in a Vec.
let bamfiles = vec![(bamifile1, ispe1), (bamifile2, ispe2)];

let covcalcs: Vec<ParsedBamFile> = pool.install(|| {
let mut covcalcs: Vec<ParsedBamFile> = pool.install(|| {
bamfiles.par_iter()
.map(|(bamfile, ispe)| {
let (bg, mapped, unmapped, readlen, fraglen) = regionblocks.par_iter()
Expand Down Expand Up @@ -102,45 +102,44 @@ pub fn r_bamcompare(
println!("scale factor1 = {}, scale factor2 = {}", sf.0, sf.1);
// Create output stream
let mut chrom = "".to_string();
let lines = covcalcs[0].bg.iter().zip(covcalcs[1].bg.iter()).flat_map(
|(t1, t2)| {
let reader1 = BufReader::new(File::open(t1).unwrap()).lines();
let reader2 = BufReader::new(File::open(t2).unwrap()).lines();

reader1.zip(reader2).map(
|(l1, l2)| {
let l1 = l1.unwrap();
let l2 = l2.unwrap();
let fields1: Vec<&str> = l1.split('\t').collect();
let fields2: Vec<&str> = l2.split('\t').collect();

let chrom1: String = fields1[0].to_string();
let chrom2: String = fields2[0].to_string();
let start1: u32 = fields1[1].parse().unwrap();
let start2: u32 = fields2[1].parse().unwrap();
let end1: u32 = fields1[2].parse().unwrap();
let end2: u32 = fields2[2].parse().unwrap();

// Assert the regions are equal.
assert_eq!(chrom1, chrom2);
assert_eq!(start1, start2);
assert_eq!(end1, end2);

// Calculate the coverage.
let cov1: f32 = fields1[3].parse().unwrap();
let cov2: f32 = fields2[3].parse().unwrap();
let cov = calc_ratio(cov1, cov2, &sf.0, &sf.1, &pseudocount, operation);

(chrom1, Value { start: start1, end: end1, value: cov })
}).coalesce(|p, c| {
if p.1.value == c.1.value {
Ok((p.0, Value {start: p.1.start, end: c.1.end, value: p.1.value}))
} else {
Err((p, c))
}
})
}
);
// Extract both vecs of TempPaths into a single vector
let its = vec![
covcalcs[0].bg.drain(..).collect::<Vec<_>>(),
covcalcs[1].bg.drain(..).collect::<Vec<_>>()
];
let its: Vec<_> = its.iter().map(|x| x.into_iter()).collect();
let zips = TempZip { iterators: its };
let zips_vec: Vec<_> = zips.collect();

let lines = zips_vec
.into_iter()
.flat_map(|c| {
let readers: Vec<_> = c.into_iter().map(|x| BufReader::new(File::open(x).unwrap()).lines()).collect();
let temp_zip = TempZip { iterators: readers };
temp_zip.into_iter().map(|mut _l| {
let lines: Vec<_> = _l
.iter_mut()
.map(|x| x.as_mut().unwrap())
.map(|x| x.split('\t').collect())
.map(|x: Vec<&str>| (x[0].to_string(), x[1].parse::<u32>().unwrap(), x[2].parse::<u32>().unwrap(), x[3].parse::<f32>().unwrap()))
.collect();
assert_eq!(lines.len(), 2);
assert_eq!(lines[0].0, lines[1].0);
assert_eq!(lines[0].1, lines[1].1);
assert_eq!(lines[0].2, lines[1].2);
// Calculate the coverage.
let cov = calc_ratio(lines[0].3, lines[1].3, &sf.0, &sf.1, &pseudocount, operation);
(lines[0].0.clone(), Value { start: lines[0].1, end: lines[0].2, value: cov })
}).coalesce(|p, c| {
if p.1.value == c.1.value && p.0 == c.0 {
Ok((p.0, Value {start: p.1.start, end: c.1.end, value: p.1.value}))
} else {
Err((p, c))
}
})
});

write_covfile(lines, ofile, ofiletype, chromsizes);
Ok(())
}
Expand Down
14 changes: 14 additions & 0 deletions src/covcalc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1476,3 +1476,17 @@ impl Bin {
}
}
}

pub struct TempZip<I>
where I: Iterator {
pub iterators: Vec<I>
}

impl<I, T> Iterator for TempZip<I>
where I: Iterator<Item=T> {
type Item = Vec<T>;
fn next(&mut self) -> Option<Self::Item> {
let o: Option<Vec<T>> = self.iterators.iter_mut().map(|x| x.next()).collect();
o
}
}
16 changes: 1 addition & 15 deletions src/multibamsummary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use std::borrow::Cow;
use std::collections::HashMap;
use std::path::Path;
use std::sync::{Arc, Mutex};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, region_divider};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, TempZip, region_divider};
use crate::filehandler::{bam_ispaired, read_bedfile, read_gtffile, chrombounds_from_bam, is_bed_or_gtf};
use crate::calc::{median, calc_ratio, deseq_scalefactors};
use crate::bamcompare::ParsedBamFile;
Expand Down Expand Up @@ -268,18 +268,4 @@ pub fn r_mbams(
println!("Matrix written.");
}
Ok(())
}

struct TempZip<I>
where I: Iterator {
iterators: Vec<I>
}

impl<I, T> Iterator for TempZip<I>
where I: Iterator<Item=T> {
type Item = Vec<T>;
fn next(&mut self) -> Option<Self::Item> {
let o: Option<Vec<T>> = self.iterators.iter_mut().map(|x| x.next()).collect();
o
}
}

0 comments on commit d669a3a

Please sign in to comment.