From 3bb56cc0a2d7b5517d6b155368fe2500bfbb3c9a Mon Sep 17 00:00:00 2001 From: LeilyR Date: Fri, 21 Aug 2020 12:23:52 +0200 Subject: [PATCH] release 3.5.0 (#990) * Fixes for 3.4.2 * disable azure on OSX * Closes #945 * computeMatricOperation dataRange (#951) * Added dataRange to computeMatricOperation to return min,max,median and 10th and 90th percentile. * fixed pep8 * more pep8 fix * fixed test_sieve of azure tests * one more fix for test_sieve * imported pysam to test_readFiltering.py * updated hash values for test_sieve * fixed galaxy computeMatrixOperation.xml * More fixes for galaxy wrapper * fixed a little typo in bamCompare (#967) * save the output matrix of the plotheatmap in a format to be compatible with running plotheatmap on it again (#968) * Plot profile color map (#971) * Now colors can be set by user for any heatmap in plotProfile. A bug in tick index for plotting heatmap is also fixed. * added a small description * pep8 fix * added sortUsingSamples and clusterUsingSamples to the galaxy wrapper * [WIP] added auto to plotheatmap #908 (#982) * added auto to plotheatmap * fixed lint, added warning message, updated the help for zmin, zmax * galaxy test plotPCA * lower down the delat for potPCA galaxy test * fix #969 (#970) * fix #969 * PEP8 * updated changes.txt * fixed a little bug in parseCommon.py which caused flake8 failure. * [WIP] added auto to plotheatmap #908 (#982) * added auto to plotheatmap * fixed lint, added warning message, updated the help for zmin, zmax * galaxy test plotPCA * lower down the delat for potPCA galaxy test * updated version and changes * removed alpha from plotPCA test * removed compare="sim_size" * fixed plotHeatmap.xml by removing size from the the params and adding anitizer for the 2 new params. * upated change.txt * updated the doc html Co-authored-by: Devon Ryan --- CHANGES.txt | 10 +++++ deeptools/_version.py | 2 +- deeptools/bamCompare.py | 2 +- deeptools/computeGCBias.py | 2 +- deeptools/computeMatrixOperations.py | 29 ++++++++++++++ deeptools/correctGCBias.py | 8 ++-- deeptools/parserCommon.py | 23 +++++------ deeptools/plotHeatmap.py | 29 +++++++++++++- deeptools/plotProfile.py | 17 +++++--- deeptools/test/test_readFiltering.py | 10 +++-- galaxy/wrapper/computeMatrixOperations.xml | 13 +++++- galaxy/wrapper/deepTools_macros.xml | 4 +- galaxy/wrapper/plotHeatmap.xml | 46 ++++++++++++++++------ 13 files changed, 153 insertions(+), 42 deletions(-) mode change 100644 => 100755 deeptools/computeMatrixOperations.py mode change 100644 => 100755 deeptools/correctGCBias.py diff --git a/CHANGES.txt b/CHANGES.txt index ae06baeb6a..bc55fc6247 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,13 @@ +3.5.0 + + * Fixed a small issue in computeGCBias (issue #969) + * Added dataRange to computeMatricOperation to return min,max,median and 10th and 90th percentile. + * Fixed a small typo in bamCompare. (issue #966) + * Save the output matrix of the plotheatmap in a format to be compatible with running plotheatmap on it again.(issue #953) + * Different colors can now be set by user for plotProfile --plotType heatmap (issue #956) + * Added the `auto` option to the zMin and zMax of plotHeatmap. (issue #908) + * Added `--sortUsingSamples` and `--clusterUsingSamples` to the plotHeatmap galaxy wrapper. (issue #976) + 3.4.3 * Changed iteritems() in estimateEscaleFactor to its python3 compatible items(). diff --git a/deeptools/_version.py b/deeptools/_version.py index e6b2d2c77e..a39b0da3ce 100755 --- a/deeptools/_version.py +++ b/deeptools/_version.py @@ -2,4 +2,4 @@ # This file is originally generated from Git information by running 'setup.py # version'. Distribution tarballs contain a pre-generated copy of this file. -__version__ = '3.4.3' +__version__ = '3.5.0' diff --git a/deeptools/bamCompare.py b/deeptools/bamCompare.py index bbbbe848bd..9f19321f92 100644 --- a/deeptools/bamCompare.py +++ b/deeptools/bamCompare.py @@ -254,7 +254,7 @@ def main(args=None): if args.normalizeUsing == 'None': args.normalizeUsing = None # For the sake of sanity if args.scaleFactorsMethod != 'None' and args.normalizeUsing: - sys.exit("`--normalizeUsing {}` is only valid if you also use `--scaleFactorMethod None`! To prevent erroneous output, I will quit now.\n".format(args.normalizeUsing)) + sys.exit("`--normalizeUsing {}` is only valid if you also use `--scaleFactorsMethod None`! To prevent erroneous output, I will quit now.\n".format(args.normalizeUsing)) # Get mapping statistics bam1, mapped1, unmapped1, stats1 = bamHandler.openBam(args.bamfile1, returnStats=True, nThreads=args.numberOfProcessors) diff --git a/deeptools/computeGCBias.py b/deeptools/computeGCBias.py index cc27b88be9..d5d9a326d6 100644 --- a/deeptools/computeGCBias.py +++ b/deeptools/computeGCBias.py @@ -604,7 +604,7 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N plt.setp(bp['whiskers'], color='black', linestyle='dashed') plt.setp(bp['fliers'], marker='None') # get the whisker that spands the most - y_max = max([x.get_data()[1][1] for x in bp['whiskers']]) + y_max = np.nanmax([x.get_data()[1][1] for x in bp['whiskers']]) ax1.set_ylim(0 - (y_max * 0.05), y_max * 1.05) ax1.set_ylabel('Number of reads') ax1.set_xlabel('GC fraction') diff --git a/deeptools/computeMatrixOperations.py b/deeptools/computeMatrixOperations.py old mode 100644 new mode 100755 index a67cdd7a26..2212dd1f85 --- a/deeptools/computeMatrixOperations.py +++ b/deeptools/computeMatrixOperations.py @@ -46,6 +46,9 @@ def parse_arguments(): or computeMatrixOperations sort -h +or + computeMatrixOperations dataRange -h + """, epilog='example usages:\n' 'computeMatrixOperations subset -m input.mat.gz -o output.mat.gz --group "group 1" "group 2" --samples "sample 3" "sample 10"\n\n' @@ -126,6 +129,14 @@ def parse_arguments(): help='Sort a matrix file to correspond to the order of entries in the desired input file(s). The groups of regions designated by the files must be present in the order found in the output of computeMatrix (otherwise, use the subset command first). Note that this subcommand can also be used to remove unwanted regions, since regions not present in the input file(s) will be omitted from the output.', usage='Example usage:\n computeMatrixOperations sort -m input.mat.gz -R regions1.bed regions2.bed regions3.gtf -o input.sorted.mat.gz\n\n') + # dataRange + subparsers.add_parser( + 'dataRange', + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + parents=[infoArgs()], + help='Returns the min, max, median, 10th and 90th percentile of the matrix values per sample.', + usage='Example usage:\n computeMatrixOperations dataRange -m input.mat.gz\n\n') + parser.add_argument('--version', action='version', version='%(prog)s {}'.format(__version__)) @@ -300,6 +311,22 @@ def printInfo(matrix): print("\t{0}".format(sample)) +def printDataRange(matrix): + """ + Prints the min, max, median, 10th and 90th percentile of the matrix values per sample. + """ + print("Samples\tMin\tMax\tMedian\t10th\t90th") + for i, sample in enumerate(matrix.matrix.sample_labels): + start = matrix.matrix.sample_boundaries[i] + end = matrix.matrix.sample_boundaries[i + 1] + sample_matrix = matrix.matrix.matrix[..., start:end] + print("{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(sample, np.amin(sample_matrix), + np.amax(sample_matrix), + np.ma.median(sample_matrix), + np.percentile(sample_matrix, 10), + np.percentile(sample_matrix, 90))) + + def relabelMatrix(matrix, args): """ Relabel the samples and groups in a matrix @@ -771,6 +798,8 @@ def main(args=None): if args.command == 'info': printInfo(hm) + if args.command == 'dataRange': + printDataRange(hm) elif args.command == 'subset': sIdx = getSampleBounds(args, hm) gIdx, gBounds = getGroupBounds(args, hm) diff --git a/deeptools/correctGCBias.py b/deeptools/correctGCBias.py old mode 100644 new mode 100755 index 5ac1ad8b72..d693224836 --- a/deeptools/correctGCBias.py +++ b/deeptools/correctGCBias.py @@ -239,10 +239,10 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step): try: if debug: endTime = time.time() - print("{}, processing {} ({:.1f} per sec) ") - "reads @ {}:{}-{}".format(multiprocessing.current_process().name, - i, i / (endTime - startTime), - chrNameBit, start, end) + print("{}, processing {} ({:.1f} per sec) " + "reads @ {}:{}-{}".format(multiprocessing.current_process().name, + i, i / (endTime - startTime), + chrNameBit, start, end)) except NameError: pass diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index f0e9fd1b53..ef4f4d0748 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -358,11 +358,10 @@ def numberOfProcessors(string): "{} is not a valid number of processors".format(string)) except Exception as e: - raise argparse.ArgumentTypeError("the value given is not valid. " + raise argparse.ArgumentTypeError("the given value {} is not valid. " "Error message: {}\nThe number of " "available processors in your " - "computer is {}.".format(string, e, - availProc)) + "computer is {}.".format(string, e, availProc)) if numberOfProcessors > availProc: numberOfProcessors = availProc @@ -444,7 +443,7 @@ def heatmapperOutputArgs(args=None, output.add_argument('--outFileNameMatrix', help='If this option is given, then the matrix ' 'of values underlying the heatmap will be saved ' - 'using this name, e.g. MyMatrix.tab.', + 'using this name, e.g. MyMatrix.gz.', metavar='FILE', type=writableFile) @@ -614,9 +613,9 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]): optional.add_argument('--sortUsingSamples', help='List of sample numbers (order as in matrix), ' - 'that are used for sorting by --sortUsing, ' - 'no value uses all samples, ' - 'example: --sortUsingSamples 1 3', + 'which are used by --sortUsing for sorting. ' + 'If no value is set, it uses all samples. ' + 'Example: --sortUsingSamples 1 3', type=int, nargs='+') optional.add_argument('--linesAtTickMarks', @@ -704,15 +703,17 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]): default=None, help='Minimum value for the heatmap intensities. Multiple values, separated by ' 'spaces can be set for each heatmap. If the number of zMin values is smaller than' - 'the number of heatmaps the values are recycled.', - type=float, + 'the number of heatmaps the values are recycled. If a value is set to "auto", it will be set ' + ' to the first percentile of the matrix values.', + type=str, nargs='+') optional.add_argument('--zMax', '-max', default=None, help='Maximum value for the heatmap intensities. Multiple values, separated by ' 'spaces can be set for each heatmap. If the number of zMax values is smaller than' - 'the number of heatmaps the values are recycled.', - type=float, + 'the number of heatmaps the values are recycled. If a value is set to "auto", it will be set ' + ' to the 98th percentile of the matrix values.', + type=str, nargs='+') optional.add_argument('--heatmapHeight', help='Plot height in cm. The default for the heatmap ' diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index 0cabe046d7..aee0a6280e 100644 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -412,6 +412,16 @@ def plotMatrix(hm, outFileName, zMin = [None] else: zMin = [zMin] # convert to list to support multiple entries + elif 'auto' in zMin: + matrix_flatten = hm.matrix.flatten() + auto_min = np.percentile(matrix_flatten, 1.0) + if np.isnan(auto_min): + auto_min = None + new_mins = [float(x) if x != 'auto' else auto_min for x in zMin] + zMin = new_mins + else: + new_mins = [float(x) for x in zMin] + zMin = new_mins if zMax is None: if matrix_flatten is None: @@ -422,6 +432,23 @@ def plotMatrix(hm, outFileName, zMax = [None] else: zMax = [zMax] + elif 'auto' in zMax: + matrix_flatten = hm.matrix.flatten() + auto_max = np.percentile(matrix_flatten, 98.0) + if np.isnan(auto_max): + auto_max = None + new_maxs = [float(x) if x != 'auto' else auto_max for x in zMax] + zMax = new_maxs + else: + new_maxs = [float(x) for x in zMax] + zMax = new_maxs + if (len(zMin) > 1) & (len(zMax) > 1): + for index, value in enumerate(zMax): + if value <= zMin[index]: + sys.stderr.write("Warnirng: In bigwig {}, the given zmin ({}) is larger than " + "or equal to the given zmax ({}). Thus, it has been set " + "to None. \n".format(index + 1, zMin[index], value)) + zMin[index] = None if yMin is None: yMin = [None] @@ -833,7 +860,7 @@ def main(args=None): hm.matrix.computeSilhouette(args.args.hclust) if args.outFileNameMatrix: - hm.save_matrix_values(args.outFileNameMatrix) + hm.save_matrix(args.outFileNameMatrix) if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 2cad0a32cd..e953e4550f 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -6,6 +6,7 @@ import argparse import numpy as np +from math import ceil import matplotlib matplotlib.use('Agg') matplotlib.rcParams['pdf.fonttype'] = 42 @@ -458,6 +459,13 @@ def plotly_hexbin(self): py.plot(fig, filename=self.out_file_name, auto_open=False) def plot_heatmap(self): + cmap = ['RdYlBu_r'] + if self.color_list is not None: # check the length to be equal to the numebr of plots otherwise multiply it! + cmap = self.color_list + if len(cmap) < self.numplots: + all_colors = cmap + for i in range(ceil(self.numplots / len(cmap))): + cmap.extend(all_colors) matrix_flatten = None if self.y_min == [None]: matrix_flatten = self.hm.matrix.flatten() @@ -479,7 +487,6 @@ def plot_heatmap(self): ax_list = [] # turn off y ticks - for plot in range(self.numplots): labels = [] col = plot % self.plots_per_row @@ -503,9 +510,10 @@ def plot_heatmap(self): if self.per_group: title = self.hm.matrix.group_labels[plot] + tickIdx = plot % self.hm.matrix.get_num_samples() else: title = self.hm.matrix.sample_labels[plot] - + tickIdx = plot ax.set_title(title) mat = [] # when drawing a heatmap (in contrast to drawing lines) for data_idx in range(self.numlines): @@ -526,13 +534,12 @@ def plot_heatmap(self): label = sub_matrix['group'] labels.append(label) mat.append(np.ma.__getattribute__(self.averagetype)(sub_matrix['matrix'], axis=0)) - img = ax.imshow(np.vstack(mat), interpolation='nearest', - cmap='RdYlBu_r', aspect='auto', vmin=localYMin, vmax=localYMax) + cmap=cmap[plot], aspect='auto', vmin=localYMin, vmax=localYMax) self.fig.colorbar(img, cax=cax) totalWidth = np.vstack(mat).shape[1] - xticks, xtickslabel = self.getTicks(plot) + xticks, xtickslabel = self.getTicks(tickIdx) if np.ceil(max(xticks)) != float(totalWidth - 1): tickscale = float(totalWidth) / max(xticks) xticks_use = [x * tickscale for x in xticks] diff --git a/deeptools/test/test_readFiltering.py b/deeptools/test/test_readFiltering.py index ae74df5601..65c5a43f3e 100644 --- a/deeptools/test/test_readFiltering.py +++ b/deeptools/test/test_readFiltering.py @@ -4,6 +4,8 @@ import os.path from os import unlink import hashlib +import pysam + ROOT = os.path.dirname(os.path.abspath(__file__)) + "/test_data/" BAMFILE_FILTER = ROOT + "test_filtering.bam" @@ -72,12 +74,12 @@ def test_sieve(): 'test_filtering\t5\t193\n'] assert_equal(resp, expected) unlink(outlog) - h = hashlib.md5(open(outfile, "rb").read()).hexdigest() - assert(h == "977bdab227a4dbfa3fc9f27c23a3e0b7") + h = hashlib.md5(pysam.view(outfile).encode('utf-8')).hexdigest() + assert(h == "acbc4443fb0387bfd6c412af9d4fc414") unlink(outfile) - h = hashlib.md5(open(outfiltered, "rb").read()).hexdigest() - assert(h == "762e79b7a2245ff6b2cea4139a1455de") + h1 = hashlib.md5(pysam.view(outfiltered).encode('utf-8')).hexdigest() + assert(h1 == "b90befdd5f073f14acb9a38661f301ad") unlink(outfiltered) diff --git a/galaxy/wrapper/computeMatrixOperations.xml b/galaxy/wrapper/computeMatrixOperations.xml index 21b0d6231a..40ebbccf1a 100644 --- a/galaxy/wrapper/computeMatrixOperations.xml +++ b/galaxy/wrapper/computeMatrixOperations.xml @@ -71,6 +71,9 @@ -m $submodule.matrixFile -R '#echo "' '".join($files)#' -o $outFileName + #else if $submodule.command == "dataRange": + dataRange + -m $submodule.matrixFile #end if ]]> @@ -85,6 +88,7 @@ + + + + + + @@ -205,7 +215,8 @@ What it does +----------------+--------------------------------------------------------------------------------------------------------------------------+ + sort | Sorts the given file so regions are in the order of occurence in the input BED/GTF file(s). | +----------------+--------------------------------------------------------------------------------------------------------------------------+ - ++ dataRange | Returns the min, max, median, 10th and 90th percentile of the matrix values per sample. | ++----------------+--------------------------------------------------------------------------------------------------------------------------+ These operations are useful when you want to run computeMatrix on multiple files (thereby keeping all of the values together) and later exclude regions/samples or add new ones. Another common use would be if you require the output of computeMatrix to be sorted to match the order of regions in the input file. diff --git a/galaxy/wrapper/deepTools_macros.xml b/galaxy/wrapper/deepTools_macros.xml index 6bd2674dc7..6cc2547fc1 100755 --- a/galaxy/wrapper/deepTools_macros.xml +++ b/galaxy/wrapper/deepTools_macros.xml @@ -1,10 +1,10 @@ --numberOfProcessors "\${GALAXY_SLOTS:-4}" - 3.4.3.0 + 3.5.0.0 - deeptools + deeptools samtools diff --git a/galaxy/wrapper/plotHeatmap.xml b/galaxy/wrapper/plotHeatmap.xml index 9f7b34e94f..98ddc1c5b3 100644 --- a/galaxy/wrapper/plotHeatmap.xml +++ b/galaxy/wrapper/plotHeatmap.xml @@ -75,9 +75,15 @@ #if str($advancedOpt.yMin).strip() != "": --yMin $advancedOpt.yMin #end if - #if $advancedOpt.yMax: + #if str($advancedOpt.yMax).strip() != "": --yMax $advancedOpt.yMax #end if + #if str($advancedOpt.sortUsingSamples).strip() != "": + --sortUsingSamples $advancedOpt.sortUsingSamples + #end if + #if str($advancedOpt.clusterUsingSamples).strip() != "": + --clusterUsingSamples $advancedOpt.clusterUsingSamples + #end if --xAxisLabel '$advancedOpt.xAxisLabel' --yAxisLabel '$advancedOpt.yAxisLabel' @@ -134,6 +140,15 @@ + + + + + + - - - - - heatmap and colorbar - - - - @@ -214,7 +229,7 @@ - + + + + + +