Skip to content

Commit

Permalink
infer bed/gtf from the first proper dataline. Escape some warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
WardDeb committed Jan 24, 2025
1 parent a42e55d commit 31713e6
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 42 deletions.
8 changes: 4 additions & 4 deletions src/alignmentsieve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ pub fn r_alignmentsieve(

// write output
let mut obam = Writer::from_path(ofile, &header, bam::Format::Bam).unwrap();
obam.set_threads(nproc);
let _ = obam.set_threads(nproc);
for sb in sieve.into_iter() {
if let Some(sb) = sb {
let mut bam = Reader::from_path(&sb).unwrap();
Expand All @@ -87,7 +87,7 @@ pub fn r_alignmentsieve(
// write filtered reads if necessary
if write_filters {
let mut ofilterbam = Writer::from_path(filtered_out_readsfile, &header, bam::Format::Bam).unwrap();
ofilterbam.set_threads(nproc);
let _ = ofilterbam.set_threads(nproc);
for sb in filtersieve.into_iter() {
if let Some(sb) = sb {
let mut bam = Reader::from_path(&sb).unwrap();
Expand Down Expand Up @@ -148,8 +148,8 @@ fn sieve_bamregion(ibam: &str, regstruct: &Region, alfilters: &Alignmentfilters,
if nproc > 4 {
let readthreads = 2;
let writethreads = nproc - 2;
bam.set_threads(readthreads);
sievebamout.set_threads(writethreads);
let _ = bam.set_threads(readthreads);
let _ = sievebamout.set_threads(writethreads);
if verbose {
println!("Reading = {}, Writing = {}", readthreads, writethreads);
}
Expand Down
17 changes: 7 additions & 10 deletions src/computematrix.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use pyo3::prelude::*;
use pyo3::types::PyList;
use crate::filehandler::{read_bedfile, read_gtffile, chrombounds_from_bw, bwintervals, header_matrix, write_matrix};
use crate::filehandler::{read_bedfile, read_gtffile, chrombounds_from_bw, bwintervals, header_matrix, write_matrix, is_bed_or_gtf};
use rayon::prelude::*;
use rayon::ThreadPoolBuilder;
use std::collections::HashMap;
Expand Down Expand Up @@ -128,15 +128,12 @@ pub fn r_computematrix(
let mut regionsizes: HashMap<String, u32> = HashMap::new();
region_files.iter()
.map(|r| {
let ext = Path::new(r)
.extension()
.and_then(|e| e.to_str())
.map(|e| e.to_ascii_lowercase());

match ext {
Some(v) if v == "gtf".to_string() => read_gtffile(r, &gtfparse, chromsizes.keys().collect()),
Some(v) if v == "bed".to_string() => read_bedfile(r, metagene, chromsizes.keys().collect()),
_ => panic!("Only .bed and .gtf files are allowed as regions. File = {}, Extension = {:?}", r, ext),
let ftype = is_bed_or_gtf(r);

match ftype.as_str() {
"gtf" => read_gtffile(r, &gtfparse, chromsizes.keys().collect()),
"bed" => read_bedfile(r, metagene, chromsizes.keys().collect()),
_ => panic!("Only .bed and .gtf files are allowed (as determined by the number of columns). File = {}", ftype),
}
})
.for_each(|(reg, regsize)| {
Expand Down
19 changes: 19 additions & 0 deletions src/filehandler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,25 @@ where
}
}

pub fn is_bed_or_gtf(fp: &str) -> String {
// Check if file is a bed or gtf file.
let file = File::open(fp).expect(format!("Failed to open file: {}", fp).as_str());
let reader = BufReader::new(file);
// Get the first line that doesn't start with #
for line in reader.lines() {
let line = line.unwrap();
if !line.starts_with('#') {
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() == 9 {
return "gtf".to_string();
} else {
return "bed".to_string();
}
}
}
"Unknown".to_string()
}

pub fn read_gtffile(gtf_file: &String, gtfparse: &Gtfparse, chroms: Vec<&String>) -> (Vec<Region>, (String, u32)) {
// At some point this zoo of String clones should be refactored. Not now though, We have a deadline.
let mut regions: Vec<Region> = Vec::new();
Expand Down
36 changes: 8 additions & 28 deletions src/multibamsummary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::collections::HashMap;
use std::path::Path;
use std::sync::{Arc, Mutex};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, region_divider};
use crate::filehandler::{bam_ispaired, read_bedfile, read_gtffile, chrombounds_from_bam};
use crate::filehandler::{bam_ispaired, read_bedfile, read_gtffile, chrombounds_from_bam, is_bed_or_gtf};
use crate::calc::{median, calc_ratio, deseq_scalefactors};
use crate::bamcompare::ParsedBamFile;
use crate::normalization::scale_factor_bamcompare;
Expand Down Expand Up @@ -106,15 +106,12 @@ pub fn r_mbams(
let mut regionsizes: HashMap<String, u32> = HashMap::new();
bedfiles.iter()
.map(|r| {
let ext = Path::new(r)
.extension()
.and_then(|e| e.to_str())
.map(|e| e.to_ascii_lowercase());

match ext {
Some(v) if v == "gtf".to_string() => read_gtffile(r, &gtfparse, chromsizes.keys().collect()),
Some(v) if v == "bed".to_string() => read_bedfile(r, metagene, chromsizes.keys().collect()),
_ => panic!("Only .bed and .gtf files are allowed as regions. File = {}, Extension = {:?}", r, ext),
let ftype = is_bed_or_gtf(r);

match ftype.as_str() {
"gtf" => read_gtffile(r, &gtfparse, chromsizes.keys().collect()),
"bed" => read_bedfile(r, metagene, chromsizes.keys().collect()),
_ => panic!("Only .bed and .gtf files are allowed (as determined by the number of columns). File = {}", ftype),
}
})
.for_each(|(reg, regsize)| {
Expand Down Expand Up @@ -146,7 +143,7 @@ pub fn r_mbams(
let covcalcs: Vec<_> = pool.install(|| {
bampfiles.par_iter()
.map(|(bamfile, ispe)| {
let (bg, mapped, unmapped, readlen, fraglen) = regionblocks.par_iter()
let (bg, _mapped, _unmapped, _readlen, _fraglen) = regionblocks.par_iter()
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, &filters, false))
.reduce(
|| (vec![], 0, 0, vec![], vec![]),
Expand Down Expand Up @@ -241,23 +238,6 @@ pub fn r_mbams(
Ok(())
}

#[derive(Debug)]
enum Countline {
Int(u32),
Float(f32),
Text(String),
}

impl Countline {
fn to_string(&self) -> String {
match self {
Countline::Int(i) => i.to_string(),
Countline::Float(f) => f.to_string(),
Countline::Text(t) => t.clone(),
}
}
}

struct TempZip<I>
where I: Iterator {
iterators: Vec<I>
Expand Down

0 comments on commit 31713e6

Please sign in to comment.