Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/dpryan79/MethylDackel
Browse files Browse the repository at this point in the history
  • Loading branch information
Devon Ryan committed Sep 27, 2021
2 parents 24aad05 + aa452bf commit b6db120
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 5 deletions.
9 changes: 8 additions & 1 deletion MBias.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,9 @@ void mbias_usage() {
" present, or else the alignment is ignored. This is equivalent\n"
" to the -f option in samtools. The default is 0, which\n"
" includes all alignments.\n"
" --ignoreNH Ignore NH auxiliary tags. By default, if an NH tag is present\n"
" and its value is >1 then an entry is ignored as a\n"
" multimapper.\n"
" --minConversionEfficiency The minimum non-CpG conversion efficiency observed\n"
" in a read to include it in the output. The default is 0.0 and\n"
" the maximum is 1.0 (100%% conversion). You are strongly\n"
Expand Down Expand Up @@ -309,7 +312,7 @@ int mbias_main(int argc, char *argv[]) {
config.keepCpG = 1; config.keepCHG = 0; config.keepCHH = 0;
config.minMapq = 10; config.minPhred = 5; config.keepDupes = 0;
config.keepSingleton = 0, config.keepDiscordant = 0;
config.filterMappability = 0;
config.filterMappability = 0, config.ignoreNH = 0;

config.fp = NULL;
config.bai = NULL;
Expand Down Expand Up @@ -340,6 +343,7 @@ int mbias_main(int argc, char *argv[]) {
{"chunkSize", 1, NULL, 13},
{"keepStrand", 0, NULL, 14},
{"minConversionEfficiency", 1, NULL, 15},
{"ignoreNH", 0, NULL, 16},
{"ignoreFlags", 1, NULL, 'F'},
{"requireFlags", 1, NULL, 'R'},
{"help", 0, NULL, 'h'},
Expand Down Expand Up @@ -413,6 +417,9 @@ int mbias_main(int argc, char *argv[]) {
case 15:
config.minConversionEfficiency = atof(optarg);
break;
case 16:
config.ignoreNH = 1;
break;
case 'F' :
config.ignoreFlags = atoi(optarg);
break;
Expand Down
2 changes: 2 additions & 0 deletions MethylDackel.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ typedef struct {
@field merge 1: Merge Cs in either a CpG or CHG context into single entries
@field methylKit Output in a format compatible with methylKit
@field minOppositeDepth Minimum depth covering the opposite strand needed to look for variants
@field ignoreNH If set, don't exclude alignments with NH auxiliary tags with values >1 (i.e., marked multimappers).
@field maxVariantFrac If the fraction of non-Gs on the opposite strand is greater than this then a position is excluded.
@field fraction 1: Output should be the methylation fraction only, 0: otherwise
@field counts 1: Output just the coverage
Expand All @@ -91,6 +92,7 @@ typedef struct {
int minMapq, minPhred, keepDupes, minDepth;
int keepDiscordant, keepSingleton, ignoreFlags, requireFlags;
int merge, methylKit, minOppositeDepth;
int ignoreNH;
double maxVariantFrac;
int fraction, counts, logit;
int cytosine_report;
Expand Down
10 changes: 6 additions & 4 deletions common.c
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,12 @@ int filter_func(void *data, bam1_t *b) {
if(b->core.flag & ldata->config->ignoreFlags) continue; //By default: secondary alignments, QC failed, PCR duplicates, and supplemental alignments
if(ldata->config->requireFlags && (b->core.flag & ldata->config->requireFlags) != ldata->config->requireFlags) continue;
if(!ldata->config->keepDupes && b->core.flag & BAM_FDUP) continue;
p = bam_aux_get(b, "NH");
if(p != NULL) {
NH = bam_aux2i(p);
if(NH>1) continue; //Ignore obvious multimappers
if(!ldata->config->ignoreNH) {
p = bam_aux_get(b, "NH");
if(p != NULL) {
NH = bam_aux2i(p);
if(NH>1) continue; //Ignore obvious multimappers
}
}
if((ldata->config->filterMappability) && check_mappability(ldata, b) == 0) continue; //Low mappability
if(!ldata->config->keepSingleton && (b->core.flag & 0x9) == 0x9) continue; //Singleton
Expand Down
8 changes: 8 additions & 0 deletions extract.c
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,9 @@ void extract_usage() {
" file with a .counts.bedGraph extension.\n"
" --logit Extract logit(M/(M+U)) (only) at each position. This produces\n"
" a file with a .logit.bedGraph extension.\n"
" --ignoreNH Ignore NH auxiliary tags. By default, if an NH tag is present\n"
" and its value is >1 then an entry is ignored as a\n"
" multimapper.\n"
" --minOppositeDepth If you would like to exclude sites that likely contain\n"
" SNPs, then specifying a value greater than 0 here will\n"
" indicate the minimum depth required on the strand opposite of\n"
Expand Down Expand Up @@ -722,6 +725,7 @@ int extract_main(int argc, char *argv[]) {
config.keepCpG = 1; config.keepCHG = 0; config.keepCHH = 0;
config.minMapq = 10; config.minPhred = 5; config.keepDupes = 0;
config.keepSingleton = 0, config.keepDiscordant = 0;
config.ignoreNH = 0;
config.minDepth = 1;
config.methylKit = 0;
config.merge = 0;
Expand Down Expand Up @@ -776,6 +780,7 @@ int extract_main(int argc, char *argv[]) {
{"keepStrand", 0, NULL, 20},
{"cytosine_report", 0, NULL, 21},
{"minConversionEfficiency", 1, NULL, 22},
{"ignoreNH", 0, NULL, 23},
{"ignoreFlags", 1, NULL, 'F'},
{"requireFlags", 1, NULL, 'R'},
{"help", 0, NULL, 'h'},
Expand Down Expand Up @@ -885,6 +890,9 @@ int extract_main(int argc, char *argv[]) {
case 22:
config.minConversionEfficiency = atof(optarg);
break;
case 23:
config.ignoreNH = 1;
break;
case 'M':
config.BWName = optarg;
break;
Expand Down
7 changes: 7 additions & 0 deletions perRead.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,9 @@ void perRead_usage() {
" present, or else the alignment is ignored. This is equivalent to the\n"
" -f option in samtools. The default is 0, which includes all\n"
" alignments.\n"
" --ignoreNH Ignore NH auxiliary tags. By default, if an NH tag is present\n"
" and its value is >1 then an entry is ignored as a\n"
" multimapper.\n"
" -@ INT The number of threads to use, the default 1\n"
" --chunkSize INT The size of the genome processed by a single thread at a time.\n"
" The default is 1000000 bases. This value MUST be at least 1.\n"
Expand All @@ -280,6 +283,7 @@ int perRead_main(int argc, char *argv[]) {
config.keepCpG = 1; config.keepCHG = 0; config.keepCHH = 0;
config.minMapq = 10; config.minPhred = 5; config.keepDupes = 0;
config.keepSingleton = 0, config.keepDiscordant = 0;
config.ignoreNH = 0;
config.fp = NULL;
config.bai = NULL;
config.reg = NULL;
Expand Down Expand Up @@ -344,6 +348,9 @@ int perRead_main(int argc, char *argv[]) {
case 20:
keepStrand = 1;
break;
case 21:
config.ignoreNH = 1;
break;
default :
fprintf(stderr, "Invalid option '%c'\n", c);
perRead_usage();
Expand Down
Binary file added tests/NH.bam
Binary file not shown.
Binary file added tests/NH.bam.bai
Binary file not shown.
17 changes: 17 additions & 0 deletions tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,21 @@ def rm(f):
lines = sum(1 for _ in open('test13_CpG.bedGraph'))
assert lines == 1
rm('test13_CpG.bedGraph')

# Test ignoreNH
rm('test14_CpG.bedGraph')
check_call([MPath, 'extract', '-o', 'test14', '-q', '1', 'cg100.fa', 'NH.bam'])
assert op.exists('test14_CpG.bedGraph')
lines = sum(1 for _ in open('test14_CpG.bedGraph'))
assert lines == 1
rm('test14_CpG.bedGraph')

# Test ignoreNH
rm('test15_CpG.bedGraph')
check_call([MPath, 'extract', '-o', 'test15', '--ignoreNH', '-q', '1', 'cg100.fa', 'NH.bam'])
assert op.exists('test15_CpG.bedGraph')
lines = sum(1 for _ in open('test15_CpG.bedGraph'))
assert lines == 49
rm('test15_CpG.bedGraph')
print("Finished correctly")

0 comments on commit b6db120

Please sign in to comment.