Skip to content

Commit

Permalink
Merge branch 'release/2.5.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
dpryan79 committed May 12, 2017
2 parents 28cc0ac + 601d5de commit b5fc407
Show file tree
Hide file tree
Showing 32 changed files with 355 additions and 336 deletions.
13 changes: 13 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
2.5.1

* Added universal new line support to deeptoolsintervals (issue #506).
* Fixed a few issues with correctGCBias under python 3.5 (thanks to @drakeeee)
* Setting `--minThreshold 0.0` or `--maxThreshold 0.0` now works properly. Previously, setting either of these to 0 was ignored. (issue #516)
* You can now specify the plot width and height in `plotPCA` and `plotCorrelation` (heatmap only) with the `--plotWidth` and `--plotHeight` parameters. (issue #507)
* plotCoverage no longer clips the top off of plots. Further, you can now set the plot width and height with `--plotWidth` and `--plotHeight`. (issue #508)
* In bamCoverage, specifying `--filterRNAstrand` no longer results in `--extendReads` being ignored. (issue #520)
* `plotFingerprint` and `plotEnrichment` no longer require producing a plot, which is useful if you only need QC metrics and are using a LOT of samples (such that matplotlib would crash anyway). This hasn't been implemented in Galaxy, but can if people would like it. (issues #519 and #526)
* `computeMatrix` now accepts a `--samplesLabel` option, which is useful in those cases when you aren't immediately running `plotHeatmap` and don't have terribly descriptive file names (issue #523)
* If you use `plotFingerprint` with the `--JSDsample` option and forget to list that file under `--bamfiles` it will be added automatically and the file name added to the labels if needed (issue #527)
* Various Galaxy wrapper fixes

2.5.0

* Fix a bug where using regions with the same name in multiple BED files in computeMatrix caused downstream problems in plotHeatmap/plotProfile (issue #477).
Expand Down
2 changes: 1 addition & 1 deletion deeptools/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# This file is originally generated from Git information by running 'setup.py
# version'. Distribution tarballs contain a pre-generated copy of this file.

__version__ = '2.5.0.1'
__version__ = '2.5.1'
79 changes: 4 additions & 75 deletions deeptools/bamCoverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ def main(args=None):

func_args = {'scaleFactor': scale_factor}

# This fixes issue #520, where --extendReads wasn't honored if --filterRNAstrand was used
if args.filterRNAstrand and not args.Offset:
args.Offset = [1, -1]

if args.MNase:
# check that library is paired end
# using getFragmentAndReadSize
Expand Down Expand Up @@ -210,26 +214,6 @@ def main(args=None):
verbose=args.verbose)
wr.filter_strand = args.filterRNAstrand
wr.Offset = args.Offset

elif args.filterRNAstrand:
wr = filterRnaStrand([args.bam],
binLength=args.binSize,
stepSize=args.binSize,
region=args.region,
numberOfProcessors=args.numberOfProcessors,
extendReads=args.extendReads,
minMappingQuality=args.minMappingQuality,
ignoreDuplicates=args.ignoreDuplicates,
center_read=args.centerReads,
zerosToNans=args.skipNonCoveredRegions,
samFlag_include=args.samFlagInclude,
samFlag_exclude=args.samFlagExclude,
minFragmentLength=args.minFragmentLength,
maxFragmentLength=args.maxFragmentLength,
verbose=args.verbose,
)

wr.filter_strand = args.filterRNAstrand
else:
wr = writeBedGraph.WriteBedGraph([args.bam],
binLength=args.binSize,
Expand Down Expand Up @@ -399,58 +383,3 @@ def get_fragment_from_read(self, read):
fragment_end = fragment_start + 3

return [(fragment_start, fragment_end)]


class filterRnaStrand(writeBedGraph.WriteBedGraph):
"""
Class to redefine the get_fragment_from_read for the --filterRNAstrand case
Only reads either forward or reverse are kept as follows:
For paired-end
--------------
reads forward:
1. alignments of the second in pair (128) if they map to the forward strand (~16)
2. alignments of the first in pair (64) if they map to the reverse strand (~32)
1. include 128, exclude 16
or
2. include 64 exclude 32
reads reverse:
1. alignments of the second in pair (128) if it maps to the reverse strand (16) 128 & 16 = 144
2. alignments of the first in pair (64) if their mates map to the reverse strand (32) 64 & 32 = 96
1. include 144
or
2. include 96
For single-end
--------------
forward: include 16 (map forward strand)
reverse: exclude 16
"""

def get_fragment_from_read(self, read):
"""
Gets only reads for the given strand
"""
fragment_start = fragment_end = None

# only paired forward reads are considered
if read.is_paired:
if self.filter_strand == 'forward':
if (read.flag & 128 == 128 and read.flag & 16 == 0) or (read.flag & 64 == 64 and read.flag & 32 == 0):
return read.get_blocks()
else:
if read.flag & 144 == 144 or read.flag & 96 == 96:
return read.get_blocks()
else:
if self.filter_strand == 'forward' and read.flag & 16 == 16:
return read.get_blocks()
elif self.filter_strand == 'reverse' and read.flag & 16 == 0:
return read.get_blocks()

return [(fragment_start, fragment_end)]
12 changes: 8 additions & 4 deletions deeptools/bamPEFragmentSize.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,18 @@ def parse_arguments():
return parser


def getFragSize(bam, args):
def getFragSize(bam, args, idx):
fragment_len_dict, read_len_dict = get_read_and_fragment_length(bam, return_lengths=True,
blackListFileName=args.blackListFileName,
numberOfProcessors=args.numberOfProcessors,
verbose=args.verbose,
binSize=args.binSize,
distanceBetweenBins=args.distanceBetweenBins)
print("\n\nBAM file : {}".format(bam))
if args.samplesLabel and idx < len(args.samplesLabel):
print("\n\nSample label: {}".format(args.samplesLabel[idx]))
else:
print("\n\nBAM file : {}".format(bam))

if fragment_len_dict:
if fragment_len_dict['mean'] == 0:
print("No pairs were found. Is the data from a paired-end sequencing experiment?")
Expand Down Expand Up @@ -125,8 +129,8 @@ def main(args=None):
args = parse_arguments().parse_args(args)

fraglengths = {}
for bam in args.bamfiles:
fraglengths[bam] = getFragSize(bam, args)
for idx, bam in enumerate(args.bamfiles):
fraglengths[bam] = getFragSize(bam, args, idx)

if args.histogram:
import matplotlib
Expand Down
8 changes: 8 additions & 0 deletions deeptools/computeMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,14 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]):
metavar="BED file",
required=False)

optional.add_argument('--samplesLabel',
help='Labels for the samples. This will then be passed to plotHeatmap and plotProfile. The '
'default is to use the file name of the '
'sample. The sample labels should be separated '
'by spaces and quoted if a label itself'
'contains a space E.g. --samplesLabel label-1 "label 2" ',
nargs='+')

# in contrast to other tools,
# computeMatrix by default outputs
# messages and the --quiet flag supresses them
Expand Down
18 changes: 11 additions & 7 deletions deeptools/correctGCBias.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,12 +237,16 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step):

cvg_corr[vectorStart:vectorEnd] += float(1) / R_gc[gc]
i += 1
if debug:
endTime = time.time()
print("{}, processing {} ({:.1f} per sec) ")
"reads @ {}:{}-{}".format(multiprocessing.current_process().name,
i, i / (endTime - startTime),
chrNameBit, start, end)

try:
if debug:
endTime = time.time()
print("{}, processing {} ({:.1f} per sec) ")
"reads @ {}:{}-{}".format(multiprocessing.current_process().name,
i, i / (endTime - startTime),
chrNameBit, start, end)
except NameError:
pass

if i == 0:
return None
Expand Down Expand Up @@ -661,7 +665,7 @@ def main(args=None):
res = list(map(writeCorrected_wrapper, mp_args))

# concatenate intermediary bedgraph files
_temp_bg_file = open(_temp_bg_file_name, 'w')
_temp_bg_file = open(_temp_bg_file_name, 'wb')
for tempFileName in res:
if tempFileName:
# concatenate all intermediate tempfiles into one
Expand Down
8 changes: 4 additions & 4 deletions deeptools/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def compute_correlation(self):

def plot_correlation(self, plot_fiilename, plot_title='', vmax=None,
vmin=None, colormap='jet', image_format=None,
plot_numbers=False):
plot_numbers=False, plotWidth=11, plotHeight=9.5):
"""
plots a correlation using a symmetric heatmap
"""
Expand All @@ -248,7 +248,7 @@ def plot_correlation(self, plot_fiilename, plot_title='', vmax=None,
vmin = 0 if corr_matrix .min() >= 0 else -1

# Compute and plot dendrogram.
fig = plt.figure(figsize=(11, 9.5))
fig = plt.figure(figsize=(plotWidth, plotHeight))
plt.suptitle(plot_title)

axdendro = fig.add_axes([0.02, 0.12, 0.1, 0.66])
Expand Down Expand Up @@ -431,12 +431,12 @@ def plot_scatter(self, plot_fiilename, plot_title='', image_format=None, log1p=F
plt.savefig(plot_fiilename, format=image_format)
plt.close()

def plot_pca(self, plot_filename, plot_title='', image_format=None, log1p=False):
def plot_pca(self, plot_filename, plot_title='', image_format=None, log1p=False, plotWidth=5, plotHeight=10):
"""
Plot the PCA of a matrix
"""

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(5, 10))
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(plotWidth, plotHeight))
# PCA
if self.rowCenter:
_ = self.matrix.mean(axis=1)
Expand Down
13 changes: 13 additions & 0 deletions deeptools/getFragmentAndReadSize.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFileNam

distanceBetweenBins *= 2
fl = []

# Fix issue #522, allow distanceBetweenBins == 0
if distanceBetweenBins == 0:
imap_res = mapReduce.mapReduce((bam_handle.filename, distanceBetweenBins),
getFragmentLength_wrapper,
chrom_sizes,
genomeChunkLength=binSize,
blackListFileName=blackListFileName,
numberOfProcessors=numberOfProcessors,
verbose=verbose)
fl = np.concatenate(imap_res)

# Try to ensure we have at least 1000 regions from which to compute statistics, halving the intra-bin distance as needed
while len(fl) < 1000 and distanceBetweenBins > 1:
distanceBetweenBins /= 2
stepsize = binSize + distanceBetweenBins
Expand Down
2 changes: 1 addition & 1 deletion deeptools/getScaleFactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def get_scale_factor(args):
bam_mapped, bam_mapped_total = get_num_kept_reads(args)
if args.normalizeTo1x:
# Print output, since normalzation stuff isn't printed to stderr otherwise
sys.stderr.write("normalization: 1x\n")
sys.stderr.write("normalization: 1x (effective genome size {})\n".format(args.normalizeTo1x))

# try to guess fragment length if the bam file contains paired end reads
from deeptools.getFragmentAndReadSize import get_read_and_fragment_length
Expand Down
13 changes: 7 additions & 6 deletions deeptools/heatmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,7 @@ def computeMatrix(self, score_file_list, regions_file, parameters, blackListFile
"matrix length does not match regions length"

if len(regions) == 0:
sys.stderr.write(
"\nERROR: BED file does not contain any valid regions. "
"Please check\n")
sys.stderr.write("\nERROR: Either the BED file does not contain any valid regions or there are none remaining after filtering.\n")
exit(1)
if regions_no_score == len(regions):
exit("\nERROR: None of the BED regions could be found in the bigWig"
Expand All @@ -298,7 +296,10 @@ def computeMatrix(self, score_file_list, regions_file, parameters, blackListFile
numcols = matrix.shape[1]
num_ind_cols = self.get_num_individual_matrix_cols()
sample_boundaries = list(range(0, numcols + num_ind_cols, num_ind_cols))
sample_labels = [splitext(basename(x))[0] for x in score_file_list]
if allArgs is not None and allArgs['samplesLabel'] is not None:
sample_labels = allArgs['samplesLabel']
else:
sample_labels = [splitext(basename(x))[0] for x in score_file_list]

# Determine the group boundaries
group_boundaries = []
Expand Down Expand Up @@ -555,9 +556,9 @@ def compute_sub_matrix_worker(self, chrom, start, end, score_file_list, paramete
if not parameters['missing data as zero']:
coverage[:] = np.nan

if parameters['min threshold'] and coverage.min() <= parameters['min threshold']:
if parameters['min threshold'] is not None and coverage.min() <= parameters['min threshold']:
continue
if parameters['max threshold'] and coverage.max() >= parameters['max threshold']:
if parameters['max threshold'] is not None and coverage.max() >= parameters['max threshold']:
continue
if parameters['scale'] != 1:
coverage = parameters['scale'] * coverage
Expand Down
14 changes: 13 additions & 1 deletion deeptools/plotCorrelation.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,16 @@ def heatmap_options():
parser = argparse.ArgumentParser(add_help=False)
heatmap = parser.add_argument_group('Heatmap options')

heatmap.add_argument('--plotHeight',
help='Plot height in cm.',
type=float,
default=9.5)

heatmap.add_argument('--plotWidth',
help='Plot width in cm. The minimum value is 1 cm.',
type=float,
default=11)

heatmap.add_argument('--zMin', '-min',
default=None,
help='Minimum value for the heatmap intensities. '
Expand Down Expand Up @@ -206,7 +216,9 @@ def main(args=None):
colormap=args.colorMap,
plot_title=args.plotTitle,
image_format=args.plotFileFormat,
plot_numbers=args.plotNumbers)
plot_numbers=args.plotNumbers,
plotWidth=args.plotWidth,
plotHeight=args.plotHeight)

if args.outFileCorMatrix:
corr.save_corr_matrix(args.outFileCorMatrix)
19 changes: 14 additions & 5 deletions deeptools/plotCoverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def parse_arguments(args=None):
""",
epilog='example usages:\nplotCoverage '
'--bamfiles file1.bam file2.bam -out results.png\n\n'
'--bamfiles file1.bam file2.bam -o results.png\n\n'
' \n\n',
conflict_handler='resolve')

Expand Down Expand Up @@ -110,6 +110,16 @@ def required_args():
help='Save raw counts (coverages) to file.',
metavar='FILE')

optional.add_argument('--plotHeight',
help='Plot height in cm.',
type=float,
default=5.0)

optional.add_argument('--plotWidth',
help='Plot width in cm. The minimum value is 1 cm.',
type=float,
default=15.0)

optional.add_argument('--plotFileFormat',
metavar='FILETYPE',
help='Image format type. If given, this option '
Expand Down Expand Up @@ -165,7 +175,7 @@ def main(args=None):
if args.skipZeros:
num_reads_per_bin = countR.remove_row_of_zeros(num_reads_per_bin)

fig, axs = plt.subplots(1, 2, figsize=(15, 5))
fig, axs = plt.subplots(1, 2, figsize=(args.plotWidth, args.plotHeight))
plt.suptitle(args.plotTitle)
# plot up to two std from mean
num_reads_per_bin = num_reads_per_bin.astype(int)
Expand Down Expand Up @@ -212,9 +222,8 @@ def main(args=None):
sample_max[idx],
))

# The 'good' x-axis is computed for each sample. The lower value is favored in which
# distributions with a wider x-range can better be seen.
y_max = min(y_max)
# Don't clip plots
y_max = max(y_max)
axs[0].set_ylim(0, min(1, y_max + (y_max * 0.10)))
axs[0].set_xlim(0, x_max)
axs[0].set_xlabel('coverage (#reads per bp)')
Expand Down
Loading

0 comments on commit b5fc407

Please sign in to comment.