From 3bb56cc0a2d7b5517d6b155368fe2500bfbb3c9a Mon Sep 17 00:00:00 2001
From: LeilyR <leila.rabbani@gmail.com>
Date: Fri, 21 Aug 2020 12:23:52 +0200
Subject: [PATCH] release 3.5.0 (#990)

* Fixes for 3.4.2

* disable azure on OSX

* Closes #945

* computeMatricOperation  dataRange (#951)

* Added dataRange to computeMatricOperation to return min,max,median and 10th and 90th percentile.

* fixed pep8

* more pep8 fix

* fixed test_sieve of azure tests

* one more fix for test_sieve

* imported pysam to test_readFiltering.py

* updated hash values for test_sieve

* fixed galaxy computeMatrixOperation.xml

* More fixes for galaxy wrapper

* fixed a little typo in bamCompare (#967)

* save the output matrix of the plotheatmap in a format to be compatible with running plotheatmap on it again (#968)

* Plot profile color map (#971)

* Now colors can be set by user for any heatmap in plotProfile. A bug in tick index for plotting heatmap is also fixed.

* added a small description

* pep8 fix

* added sortUsingSamples and clusterUsingSamples to the galaxy wrapper

* [WIP] added auto to plotheatmap #908 (#982)

* added auto to plotheatmap

* fixed lint, added warning message, updated the help for zmin, zmax

* galaxy test plotPCA

* lower down the delat for potPCA galaxy test

* fix #969 (#970)

* fix #969

* PEP8

* updated changes.txt

* fixed a little bug in parseCommon.py which caused flake8 failure.

* [WIP] added auto to plotheatmap #908 (#982)

* added auto to plotheatmap

* fixed lint, added warning message, updated the help for zmin, zmax

* galaxy test plotPCA

* lower down the delat for potPCA galaxy test

* updated version and changes

* removed alpha from plotPCA test

* removed compare="sim_size"


* fixed plotHeatmap.xml by removing size from the the params and adding anitizer for the 2 new params.

* upated change.txt

* updated the doc html

Co-authored-by: Devon Ryan <dpryan79@users.noreply.github.com>
---
 CHANGES.txt                                | 10 +++++
 deeptools/_version.py                      |  2 +-
 deeptools/bamCompare.py                    |  2 +-
 deeptools/computeGCBias.py                 |  2 +-
 deeptools/computeMatrixOperations.py       | 29 ++++++++++++++
 deeptools/correctGCBias.py                 |  8 ++--
 deeptools/parserCommon.py                  | 23 +++++------
 deeptools/plotHeatmap.py                   | 29 +++++++++++++-
 deeptools/plotProfile.py                   | 17 +++++---
 deeptools/test/test_readFiltering.py       | 10 +++--
 galaxy/wrapper/computeMatrixOperations.xml | 13 +++++-
 galaxy/wrapper/deepTools_macros.xml        |  4 +-
 galaxy/wrapper/plotHeatmap.xml             | 46 ++++++++++++++++------
 13 files changed, 153 insertions(+), 42 deletions(-)
 mode change 100644 => 100755 deeptools/computeMatrixOperations.py
 mode change 100644 => 100755 deeptools/correctGCBias.py

diff --git a/CHANGES.txt b/CHANGES.txt
index ae06baeb6a..bc55fc6247 100755
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,13 @@
+3.5.0
+
+ * Fixed a small issue in computeGCBias (issue #969)
+ * Added dataRange to computeMatricOperation to return min,max,median and 10th and 90th percentile.
+ * Fixed a small typo in bamCompare. (issue #966)
+ * Save the output matrix of the plotheatmap in a format to be compatible with running plotheatmap on it again.(issue #953)
+ * Different colors can now be set by user for plotProfile --plotType heatmap (issue #956)
+ * Added the `auto` option to the zMin and zMax of plotHeatmap. (issue #908)
+ * Added `--sortUsingSamples` and `--clusterUsingSamples` to the plotHeatmap galaxy wrapper. (issue #976)
+
 3.4.3
 
  * Changed iteritems() in estimateEscaleFactor to its python3 compatible items().
diff --git a/deeptools/_version.py b/deeptools/_version.py
index e6b2d2c77e..a39b0da3ce 100755
--- a/deeptools/_version.py
+++ b/deeptools/_version.py
@@ -2,4 +2,4 @@
 # This file is originally generated from Git information by running 'setup.py
 # version'. Distribution tarballs contain a pre-generated copy of this file.
 
-__version__ = '3.4.3'
+__version__ = '3.5.0'
diff --git a/deeptools/bamCompare.py b/deeptools/bamCompare.py
index bbbbe848bd..9f19321f92 100644
--- a/deeptools/bamCompare.py
+++ b/deeptools/bamCompare.py
@@ -254,7 +254,7 @@ def main(args=None):
     if args.normalizeUsing == 'None':
         args.normalizeUsing = None  # For the sake of sanity
     if args.scaleFactorsMethod != 'None' and args.normalizeUsing:
-        sys.exit("`--normalizeUsing {}` is only valid if you also use `--scaleFactorMethod None`! To prevent erroneous output, I will quit now.\n".format(args.normalizeUsing))
+        sys.exit("`--normalizeUsing {}` is only valid if you also use `--scaleFactorsMethod None`! To prevent erroneous output, I will quit now.\n".format(args.normalizeUsing))
 
     # Get mapping statistics
     bam1, mapped1, unmapped1, stats1 = bamHandler.openBam(args.bamfile1, returnStats=True, nThreads=args.numberOfProcessors)
diff --git a/deeptools/computeGCBias.py b/deeptools/computeGCBias.py
index cc27b88be9..d5d9a326d6 100644
--- a/deeptools/computeGCBias.py
+++ b/deeptools/computeGCBias.py
@@ -604,7 +604,7 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N
     plt.setp(bp['whiskers'], color='black', linestyle='dashed')
     plt.setp(bp['fliers'], marker='None')
     # get the whisker that spands the most
-    y_max = max([x.get_data()[1][1] for x in bp['whiskers']])
+    y_max = np.nanmax([x.get_data()[1][1] for x in bp['whiskers']])
     ax1.set_ylim(0 - (y_max * 0.05), y_max * 1.05)
     ax1.set_ylabel('Number of reads')
     ax1.set_xlabel('GC fraction')
diff --git a/deeptools/computeMatrixOperations.py b/deeptools/computeMatrixOperations.py
old mode 100644
new mode 100755
index a67cdd7a26..2212dd1f85
--- a/deeptools/computeMatrixOperations.py
+++ b/deeptools/computeMatrixOperations.py
@@ -46,6 +46,9 @@ def parse_arguments():
 or
   computeMatrixOperations sort -h
 
+or
+  computeMatrixOperations dataRange -h
+
 """,
         epilog='example usages:\n'
                'computeMatrixOperations subset -m input.mat.gz -o output.mat.gz --group "group 1" "group 2" --samples "sample 3" "sample 10"\n\n'
@@ -126,6 +129,14 @@ def parse_arguments():
         help='Sort a matrix file to correspond to the order of entries in the desired input file(s). The groups of regions designated by the files must be present in the order found in the output of computeMatrix (otherwise, use the subset command first). Note that this subcommand can also be used to remove unwanted regions, since regions not present in the input file(s) will be omitted from the output.',
         usage='Example usage:\n  computeMatrixOperations sort -m input.mat.gz -R regions1.bed regions2.bed regions3.gtf -o input.sorted.mat.gz\n\n')
 
+    # dataRange
+    subparsers.add_parser(
+        'dataRange',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        parents=[infoArgs()],
+        help='Returns the min, max, median, 10th and 90th percentile of the matrix values per sample.',
+        usage='Example usage:\n  computeMatrixOperations dataRange -m input.mat.gz\n\n')
+
     parser.add_argument('--version', action='version',
                         version='%(prog)s {}'.format(__version__))
 
@@ -300,6 +311,22 @@ def printInfo(matrix):
         print("\t{0}".format(sample))
 
 
+def printDataRange(matrix):
+    """
+    Prints the min, max, median, 10th and 90th percentile of the matrix values per sample.
+    """
+    print("Samples\tMin\tMax\tMedian\t10th\t90th")
+    for i, sample in enumerate(matrix.matrix.sample_labels):
+        start = matrix.matrix.sample_boundaries[i]
+        end = matrix.matrix.sample_boundaries[i + 1]
+        sample_matrix = matrix.matrix.matrix[..., start:end]
+        print("{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(sample, np.amin(sample_matrix),
+                                                    np.amax(sample_matrix),
+                                                    np.ma.median(sample_matrix),
+                                                    np.percentile(sample_matrix, 10),
+                                                    np.percentile(sample_matrix, 90)))
+
+
 def relabelMatrix(matrix, args):
     """
     Relabel the samples and groups in a matrix
@@ -771,6 +798,8 @@ def main(args=None):
 
     if args.command == 'info':
         printInfo(hm)
+    if args.command == 'dataRange':
+        printDataRange(hm)
     elif args.command == 'subset':
         sIdx = getSampleBounds(args, hm)
         gIdx, gBounds = getGroupBounds(args, hm)
diff --git a/deeptools/correctGCBias.py b/deeptools/correctGCBias.py
old mode 100644
new mode 100755
index 5ac1ad8b72..d693224836
--- a/deeptools/correctGCBias.py
+++ b/deeptools/correctGCBias.py
@@ -239,10 +239,10 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step):
     try:
         if debug:
             endTime = time.time()
-            print("{}, processing {} ({:.1f} per sec) ")
-            "reads @ {}:{}-{}".format(multiprocessing.current_process().name,
-                                      i, i / (endTime - startTime),
-                                      chrNameBit, start, end)
+            print("{}, processing {} ({:.1f} per sec) "
+                  "reads @ {}:{}-{}".format(multiprocessing.current_process().name,
+                                            i, i / (endTime - startTime),
+                                            chrNameBit, start, end))
     except NameError:
         pass
 
diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py
index f0e9fd1b53..ef4f4d0748 100755
--- a/deeptools/parserCommon.py
+++ b/deeptools/parserCommon.py
@@ -358,11 +358,10 @@ def numberOfProcessors(string):
                 "{} is not a valid number of processors".format(string))
 
         except Exception as e:
-            raise argparse.ArgumentTypeError("the value given is not valid. "
+            raise argparse.ArgumentTypeError("the given value {} is not valid. "
                                              "Error message: {}\nThe number of "
                                              "available processors in your "
-                                             "computer is {}.".format(string, e,
-                                                                      availProc))
+                                             "computer is {}.".format(string, e, availProc))
 
         if numberOfProcessors > availProc:
             numberOfProcessors = availProc
@@ -444,7 +443,7 @@ def heatmapperOutputArgs(args=None,
         output.add_argument('--outFileNameMatrix',
                             help='If this option is given, then the matrix '
                             'of values underlying the heatmap will be saved '
-                            'using this name, e.g. MyMatrix.tab.',
+                            'using this name, e.g. MyMatrix.gz.',
                             metavar='FILE',
                             type=writableFile)
 
@@ -614,9 +613,9 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]):
 
         optional.add_argument('--sortUsingSamples',
                               help='List of sample numbers (order as in matrix), '
-                              'that are used for sorting by --sortUsing, '
-                              'no value uses all samples, '
-                              'example: --sortUsingSamples 1 3',
+                              'which are used by --sortUsing for sorting. '
+                              'If no value is set, it uses all samples. '
+                              'Example: --sortUsingSamples 1 3',
                               type=int, nargs='+')
 
         optional.add_argument('--linesAtTickMarks',
@@ -704,15 +703,17 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]):
                               default=None,
                               help='Minimum value for the heatmap intensities. Multiple values, separated by '
                                    'spaces can be set for each heatmap. If the number of zMin values is smaller than'
-                                   'the number of heatmaps the values are recycled.',
-                              type=float,
+                                   'the number of heatmaps the values are recycled. If a value is set to "auto", it will be set '
+                                   ' to the first percentile of the matrix values.',
+                              type=str,
                               nargs='+')
         optional.add_argument('--zMax', '-max',
                               default=None,
                               help='Maximum value for the heatmap intensities. Multiple values, separated by '
                                    'spaces can be set for each heatmap. If the number of zMax values is smaller than'
-                                   'the number of heatmaps the values are recycled.',
-                              type=float,
+                                   'the number of heatmaps the values are recycled. If a value is set to "auto", it will be set '
+                                   ' to the 98th percentile of the matrix values.',
+                              type=str,
                               nargs='+')
         optional.add_argument('--heatmapHeight',
                               help='Plot height in cm. The default for the heatmap '
diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py
index 0cabe046d7..aee0a6280e 100644
--- a/deeptools/plotHeatmap.py
+++ b/deeptools/plotHeatmap.py
@@ -412,6 +412,16 @@ def plotMatrix(hm, outFileName,
             zMin = [None]
         else:
             zMin = [zMin]  # convert to list to support multiple entries
+    elif 'auto' in zMin:
+        matrix_flatten = hm.matrix.flatten()
+        auto_min = np.percentile(matrix_flatten, 1.0)
+        if np.isnan(auto_min):
+            auto_min = None
+        new_mins = [float(x) if x != 'auto' else auto_min for x in zMin]
+        zMin = new_mins
+    else:
+        new_mins = [float(x) for x in zMin]
+        zMin = new_mins
 
     if zMax is None:
         if matrix_flatten is None:
@@ -422,6 +432,23 @@ def plotMatrix(hm, outFileName,
             zMax = [None]
         else:
             zMax = [zMax]
+    elif 'auto' in zMax:
+        matrix_flatten = hm.matrix.flatten()
+        auto_max = np.percentile(matrix_flatten, 98.0)
+        if np.isnan(auto_max):
+            auto_max = None
+        new_maxs = [float(x) if x != 'auto' else auto_max for x in zMax]
+        zMax = new_maxs
+    else:
+        new_maxs = [float(x) for x in zMax]
+        zMax = new_maxs
+    if (len(zMin) > 1) & (len(zMax) > 1):
+        for index, value in enumerate(zMax):
+            if value <= zMin[index]:
+                sys.stderr.write("Warnirng: In bigwig {}, the given zmin ({}) is larger than "
+                                 "or equal to the given zmax ({}). Thus, it has been set "
+                                 "to None. \n".format(index + 1, zMin[index], value))
+                zMin[index] = None
 
     if yMin is None:
         yMin = [None]
@@ -833,7 +860,7 @@ def main(args=None):
             hm.matrix.computeSilhouette(args.args.hclust)
 
     if args.outFileNameMatrix:
-        hm.save_matrix_values(args.outFileNameMatrix)
+        hm.save_matrix(args.outFileNameMatrix)
 
     if args.outFileSortedRegions:
         hm.save_BED(args.outFileSortedRegions)
diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py
index 2cad0a32cd..e953e4550f 100755
--- a/deeptools/plotProfile.py
+++ b/deeptools/plotProfile.py
@@ -6,6 +6,7 @@
 
 import argparse
 import numpy as np
+from math import ceil
 import matplotlib
 matplotlib.use('Agg')
 matplotlib.rcParams['pdf.fonttype'] = 42
@@ -458,6 +459,13 @@ def plotly_hexbin(self):
         py.plot(fig, filename=self.out_file_name, auto_open=False)
 
     def plot_heatmap(self):
+        cmap = ['RdYlBu_r']
+        if self.color_list is not None:  # check the length to be equal to the numebr of plots otherwise multiply it!
+            cmap = self.color_list
+        if len(cmap) < self.numplots:
+            all_colors = cmap
+            for i in range(ceil(self.numplots / len(cmap))):
+                cmap.extend(all_colors)
         matrix_flatten = None
         if self.y_min == [None]:
             matrix_flatten = self.hm.matrix.flatten()
@@ -479,7 +487,6 @@ def plot_heatmap(self):
 
         ax_list = []
         # turn off y ticks
-
         for plot in range(self.numplots):
             labels = []
             col = plot % self.plots_per_row
@@ -503,9 +510,10 @@ def plot_heatmap(self):
 
             if self.per_group:
                 title = self.hm.matrix.group_labels[plot]
+                tickIdx = plot % self.hm.matrix.get_num_samples()
             else:
                 title = self.hm.matrix.sample_labels[plot]
-
+                tickIdx = plot
             ax.set_title(title)
             mat = []  # when drawing a heatmap (in contrast to drawing lines)
             for data_idx in range(self.numlines):
@@ -526,13 +534,12 @@ def plot_heatmap(self):
                     label = sub_matrix['group']
                 labels.append(label)
                 mat.append(np.ma.__getattribute__(self.averagetype)(sub_matrix['matrix'], axis=0))
-
             img = ax.imshow(np.vstack(mat), interpolation='nearest',
-                            cmap='RdYlBu_r', aspect='auto', vmin=localYMin, vmax=localYMax)
+                            cmap=cmap[plot], aspect='auto', vmin=localYMin, vmax=localYMax)
             self.fig.colorbar(img, cax=cax)
 
             totalWidth = np.vstack(mat).shape[1]
-            xticks, xtickslabel = self.getTicks(plot)
+            xticks, xtickslabel = self.getTicks(tickIdx)
             if np.ceil(max(xticks)) != float(totalWidth - 1):
                 tickscale = float(totalWidth) / max(xticks)
                 xticks_use = [x * tickscale for x in xticks]
diff --git a/deeptools/test/test_readFiltering.py b/deeptools/test/test_readFiltering.py
index ae74df5601..65c5a43f3e 100644
--- a/deeptools/test/test_readFiltering.py
+++ b/deeptools/test/test_readFiltering.py
@@ -4,6 +4,8 @@
 import os.path
 from os import unlink
 import hashlib
+import pysam
+
 
 ROOT = os.path.dirname(os.path.abspath(__file__)) + "/test_data/"
 BAMFILE_FILTER = ROOT + "test_filtering.bam"
@@ -72,12 +74,12 @@ def test_sieve():
                 'test_filtering\t5\t193\n']
     assert_equal(resp, expected)
     unlink(outlog)
-    h = hashlib.md5(open(outfile, "rb").read()).hexdigest()
-    assert(h == "977bdab227a4dbfa3fc9f27c23a3e0b7")
+    h = hashlib.md5(pysam.view(outfile).encode('utf-8')).hexdigest()
+    assert(h == "acbc4443fb0387bfd6c412af9d4fc414")
     unlink(outfile)
 
-    h = hashlib.md5(open(outfiltered, "rb").read()).hexdigest()
-    assert(h == "762e79b7a2245ff6b2cea4139a1455de")
+    h1 = hashlib.md5(pysam.view(outfiltered).encode('utf-8')).hexdigest()
+    assert(h1 == "b90befdd5f073f14acb9a38661f301ad")
     unlink(outfiltered)
 
 
diff --git a/galaxy/wrapper/computeMatrixOperations.xml b/galaxy/wrapper/computeMatrixOperations.xml
index 21b0d6231a..40ebbccf1a 100644
--- a/galaxy/wrapper/computeMatrixOperations.xml
+++ b/galaxy/wrapper/computeMatrixOperations.xml
@@ -71,6 +71,9 @@
                 -m $submodule.matrixFile
                 -R '#echo "' '".join($files)#'
                 -o $outFileName
+            #else if $submodule.command == "dataRange":
+                dataRange
+                -m $submodule.matrixFile
             #end if
 ]]>
     </command>
@@ -85,6 +88,7 @@
                 <option value="rbind">Bind matrices, top to bottom (rbind)</option>
                 <option value="cbind">Bind matrices, left to right (cbind)</option>
                 <option value="sort">Sort by region order in specified files (sort)</option>
+                <option value="dataRange">Returns the min, max, median, 10th and 90th percentile of the matrix values per sample (dataRange)</option>
             </param>
             <when value="info">
                 <param argument="matrixFile" format="deeptools_compute_matrix_archive" type="data"
@@ -151,6 +155,12 @@
                         help="File, in BED format, containing the regions to plot."/>
                 </repeat>
             </when>
+
+            <when value="dataRange">
+                <param argument="matrixFile" format="deeptools_compute_matrix_archive" type="data"
+                       label="Matrix file from the computeMatrix tool" help=""/>
+                </param>
+            </when>
         </conditional>
     </inputs>
     <outputs>
@@ -205,7 +215,8 @@ What it does
 +----------------+--------------------------------------------------------------------------------------------------------------------------+
 + sort           | Sorts the given file so regions are in the order of occurence in the input BED/GTF file(s).                              |
 +----------------+--------------------------------------------------------------------------------------------------------------------------+
-
++ dataRange      | Returns the min, max, median, 10th and 90th percentile of the matrix values per sample.                                  |
++----------------+--------------------------------------------------------------------------------------------------------------------------+
 
 These operations are useful when you want to run computeMatrix on multiple files (thereby keeping all of the values together) and later exclude regions/samples or add new ones. Another common use would be if you require the output of computeMatrix to be sorted to match the order of regions in the input file.
 
diff --git a/galaxy/wrapper/deepTools_macros.xml b/galaxy/wrapper/deepTools_macros.xml
index 6bd2674dc7..6cc2547fc1 100755
--- a/galaxy/wrapper/deepTools_macros.xml
+++ b/galaxy/wrapper/deepTools_macros.xml
@@ -1,10 +1,10 @@
 <macros>
 
     <token name="@THREADS@">--numberOfProcessors "\${GALAXY_SLOTS:-4}"</token>
-    <token name="@WRAPPER_VERSION@">3.4.3.0</token>
+    <token name="@WRAPPER_VERSION@">3.5.0.0</token>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="3.4.3">deeptools</requirement>
+            <requirement type="package" version="3.5.0">deeptools</requirement>
             <requirement type="package" version="1.9">samtools</requirement>
         </requirements>
         <expand macro="stdio" />
diff --git a/galaxy/wrapper/plotHeatmap.xml b/galaxy/wrapper/plotHeatmap.xml
index 9f7b34e94f..98ddc1c5b3 100644
--- a/galaxy/wrapper/plotHeatmap.xml
+++ b/galaxy/wrapper/plotHeatmap.xml
@@ -75,9 +75,15 @@
                 #if str($advancedOpt.yMin).strip() != "":
                     --yMin $advancedOpt.yMin
                 #end if
-                #if $advancedOpt.yMax:
+                #if str($advancedOpt.yMax).strip() != "":
                     --yMax $advancedOpt.yMax
                 #end if
+                #if str($advancedOpt.sortUsingSamples).strip() != "":
+                    --sortUsingSamples $advancedOpt.sortUsingSamples
+                #end if
+                #if str($advancedOpt.clusterUsingSamples).strip() != "":
+                    --clusterUsingSamples $advancedOpt.clusterUsingSamples
+                #end if
 
                 --xAxisLabel '$advancedOpt.xAxisLabel'
                 --yAxisLabel '$advancedOpt.yAxisLabel'
@@ -134,6 +140,15 @@
             <when value="yes">
                 <expand macro="sortRegions" />
                 <expand macro="sortUsing" />
+                <param argument="--sortUsingSamples" type="text"
+                    label="List of samples to be used for sorting"
+                    help="List of sample numbers (order as in matrix), which are used by --sortUsing for sorting.
+                    If no value is set, it uses all samples. Example: 1 3 (space separated!)" />
+                    <sanitizer>
+                        <valid initial="string.printable">
+                            <add value=" "/>
+                        </valid>
+                    </sanitizer>
                 <param argument="--linesAtTickMarks" type="boolean" truevalue="--linesAtTickMarks" falsevalue=""
                     label="Draw dashed lines in heatmap above all tick marks?" />
                 <param argument="--averageTypeSummaryPlot" type="select"
@@ -168,19 +183,19 @@
                     help="The alpha channel (transparency) to use for the heatmaps. The default is 1.0 and values must
                     be between 0 and 1. A value of 0.0 would be fully transparent." />
 
-                <param argument="--colorList" type="text" value="" size="50" optional="True"
+                <param argument="--colorList" type="text" value="" optional="True"
                     label="List of colors for each heatmap. Separate lists by spaces and the colors in the list by comas"
                     help="The color of the heatmaps can be specified as a list of colors separated by comas with
                     not space in between. For example: white,blue white,green will set a color map from white
                     to blue for the first heatmap and for white to green for the next heatmap."/>
                 <expand macro="zMin_zMax" />
-                <param argument="--yMin" type="float" value="" size="3" optional="True"
+                <param argument="--yMin" type="float" value="" optional="True"
                     label="Minimum value for the Y-axis of the summary plot. Leave empty for automatic values" help=""/>
-                <param argument="--yMax" type="float" value="" size="3" optional="True"
+                <param argument="--yMax" type="float" value="" optional="True"
                     label="Maximum value for Y-axis of the summary plot. Leave empty for automatic values" help=""/>
-                <param argument="--xAxisLabel" type="text" value="distance from TSS (bp)" size="200"
+                <param argument="--xAxisLabel" type="text" value="distance from TSS (bp)"
                     label="The x-axis label" help="" />
-                <param argument="--yAxisLabel" type="text" value="genes" size="30"
+                <param argument="--yAxisLabel" type="text" value="genes"
                     label="The y-axis label for the top panel" help="" />
 
                 <param argument="--heatmapWidth" type="float" value="7.5" min="1" max="100"
@@ -195,17 +210,17 @@
                     <option value="heatmap and colorbar">heatmap and colorbar</option>
                 </param>
 
-                <param argument="--startLabel" type="text" value="TSS" size="10"
+                <param argument="--startLabel" type="text" value="TSS"
                     label="Label for the region start"
                     help ="Only for scale-regions mode. Label shown in the plot for the start of the region. Default is TSS (transcription start site), but could be changed to anything, e.g. &quot;peak start&quot;." />
-                <param argument="--endLabel" type="text" value="TES" size="10"
+                <param argument="--endLabel" type="text" value="TES"
                     label="Label for the region end"
                     help="Only for scale-regions mode. Label shown in the plot for the region end. Default is TES (transcription end site)."/>
 
-                <param argument="--referencePointLabel" type="text" value="TSS" size="10"
+                <param argument="--referencePointLabel" type="text" value="TSS"
                     label="Reference point label"
                     help ="Label shown in the plot for the reference-point. Default is the same as the reference point selected (e.g. TSS), but could be anything, e.g. &quot;peak start&quot; etc." />
-                <param argument="--samplesLabel" type="text" size="30"
+                <param argument="--samplesLabel" type="text"
                     label="Labels for the samples (each bigwig) plotted"
                     help="The default is to use the file name of the sample. The sample labels should be separated by
                     spaces and quoted if a label itself contains a space E.g. label-1 &quot;label 2&quot;">
@@ -214,7 +229,7 @@
                         </valid>
                     </sanitizer>
                 </param>
-                <param argument="--regionsLabel" type="text" size="30"
+                <param argument="--regionsLabel" type="text"
                     label="Labels for the regions plotted in the heatmap"
                     help="If more than one region is being plotted a list of labels separated by space is required.
                           If a label itself contains a space, then quotes are needed.
@@ -228,6 +243,15 @@
                     help="The default is to make one plot per bigWig file, i.e., all samples next to each other. Choosing this option will make one plot per group of regions."/>
 
                 <expand macro="kmeans_clustering" />
+                <param argument="--clusterUsingSamples" type="text"
+                    label="List of samples to be used for clustering"
+                    help="List of sample numbers (order as in matrix), which are used by --kmeans or --hclust for clustering.
+                    If no value is set, it uses all samples. Example: 1 3 (space separated!)"/>
+                    <sanitizer>
+                        <valid initial="string.printable">
+                            <add value=" "/>
+                        </valid>
+                    </sanitizer>
             </when>
         </conditional>
     </inputs>