@@ -6,13 +6,39 @@ use std::cmp::min;
6
6
use std:: fmt;
7
7
use ndarray:: Array1 ;
8
8
9
- pub fn parse_regions ( regions : & Vec < ( String , u32 , u32 ) > , bam_ifile : & str ) -> ( Vec < Region > , HashMap < String , u32 > ) {
9
+ pub fn parse_regions ( regions : & Vec < ( String , u32 , u32 ) > , bam_ifile : Vec < & str > ) -> ( Vec < Region > , HashMap < String , u32 > ) {
10
10
// Takes a vector of regions, and a bam reference
11
11
// returns a vector of regions, with all chromosomes and full lengths if original regions was empty
12
12
// Else it validates the regions against the information from the bam header
13
13
// Finally, a Vec with chromsizes is returned as well.
14
-
15
- let bam = IndexedReader :: from_path ( bam_ifile) . unwrap ( ) ;
14
+ let mut found_chroms: HashMap < String , usize > = HashMap :: new ( ) ;
15
+ for bam in bam_ifile. iter ( ) {
16
+ let bam = IndexedReader :: from_path ( bam) . unwrap ( ) ;
17
+ let chroms: Vec < String > = bam. header ( ) . target_names ( ) . iter ( ) . map ( |x| String :: from_utf8 ( x. to_vec ( ) ) . unwrap ( ) ) . collect ( ) ;
18
+ for chrom in chroms. iter ( ) {
19
+ // if it's not in the hashmap, add it, else increment count
20
+ if !found_chroms. contains_key ( chrom) {
21
+ found_chroms. insert ( chrom. clone ( ) , 1 ) ;
22
+ } else {
23
+ let count = found_chroms. get_mut ( chrom) . unwrap ( ) ;
24
+ * count += 1 ;
25
+ }
26
+ }
27
+ }
28
+ let mut validchroms: Vec < String > = Vec :: new ( ) ;
29
+ // loop over all chroms in the hashmap, if the count is expected, include them
30
+ for ( chrom, count) in found_chroms. iter ( ) {
31
+ if * count == bam_ifile. len ( ) {
32
+ validchroms. push ( chrom. clone ( ) ) ;
33
+ } else {
34
+ println ! ( "Chromosome {} is missing in at least one bam file, and thus ignored!" , chrom) ;
35
+ }
36
+ }
37
+ // Crash if validchroms is empty.
38
+ assert ! ( !validchroms. is_empty( ) , "No chromosomes found that are present in all bam files. Did you mix references ?" ) ;
39
+ println ! ( "Valid chromosomes are: {:?}" , validchroms) ;
40
+ // Read header from first bam file
41
+ let bam = IndexedReader :: from_path ( bam_ifile[ 0 ] ) . unwrap ( ) ;
16
42
let header = bam. header ( ) . clone ( ) ;
17
43
let mut chromregions: Vec < Region > = Vec :: new ( ) ;
18
44
let mut chromsizes = HashMap :: new ( ) ;
@@ -23,6 +49,10 @@ pub fn parse_regions(regions: &Vec<(String, u32, u32)>, bam_ifile: &str) -> (Vec
23
49
. expect ( "Invalid UTF-8 in chromosome name" ) ;
24
50
let chromlen = header. target_len ( tid)
25
51
. expect ( "Error retrieving length for chromosome" ) ;
52
+ // If chromname is not in validchroms, skip it.
53
+ if !validchroms. contains ( & chromname) {
54
+ continue ;
55
+ }
26
56
let _reg = Region {
27
57
chrom : chromname. clone ( ) ,
28
58
start : Revalue :: U ( 0 ) ,
@@ -42,18 +72,18 @@ pub fn parse_regions(regions: &Vec<(String, u32, u32)>, bam_ifile: &str) -> (Vec
42
72
. expect ( "Invalid UTF-8 in chromosome name" ) ;
43
73
let chromlen = header. target_len ( tid)
44
74
. expect ( "Error retrieving length for chromosome" ) ;
45
- chromsizes. insert ( chromname, chromlen as u32 ) ;
75
+ if validchroms. contains ( & chromname) {
76
+ chromsizes. insert ( chromname, chromlen as u32 ) ;
77
+ }
46
78
}
47
- let validchroms: Vec < String > = header
48
- . target_names ( )
49
- . iter ( )
50
- . map ( |x| String :: from_utf8 ( x. to_vec ( ) ) . unwrap ( ) )
51
- . collect ( ) ;
52
79
53
80
for region in regions {
54
81
let chromname = & region. 0 ;
55
82
assert ! ( region. 1 < region. 2 , "Region start must be strictly less than region end." ) ;
56
- assert ! ( validchroms. contains( chromname) , "Chromosome {} not found in bam header" , chromname) ;
83
+ // Check if chromname is in validchroms
84
+ if !validchroms. contains ( chromname) {
85
+ continue ;
86
+ }
57
87
let _reg = Region {
58
88
chrom : chromname. clone ( ) ,
59
89
start : Revalue :: U ( region. 1 ) ,
0 commit comments