diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index f67afaf4da..b5d524c9c0 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -1308,9 +1308,9 @@ def hmcluster(self, k, evaluate_silhouette=True, method='kmeans', clustering_sam _clustered_matrix = [] cluster_number = 1 for cluster in cluster_order: - self.group_labels.append("cluster_{}".format(cluster_number)) - cluster_number += 1 cluster_ids = _cluster_ids_list[cluster] + self.group_labels.append("cluster_{}_n{}".format(cluster_number,len(cluster_ids))) + cluster_number += 1 self.group_boundaries.append(self.group_boundaries[-1] + len(cluster_ids)) _clustered_matrix.append(self.matrix[cluster_ids, :]) diff --git a/deeptools/heatmapper_utilities.py b/deeptools/heatmapper_utilities.py index e63dfb0226..aa3559096f 100644 --- a/deeptools/heatmapper_utilities.py +++ b/deeptools/heatmapper_utilities.py @@ -9,6 +9,9 @@ old_settings = np.seterr(all='ignore') +debug = 0 +if debug: + from ipdb import set_trace def plot_single(ax, ma, average_type, color, label, plot_type='lines'): """ @@ -18,7 +21,7 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): ---------- ax : matplotlib axis matplotlib axis - ma : numpy array + ma : numpy array or list of numpy array(for plot with --repgrplist, take average between replicates ) numpy array The data on this matrix is summarized according to the `average_type` argument. average_type : str @@ -32,7 +35,9 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): type of plot. Either 'se' for standard error, 'std' for standard deviation, 'overlapped_lines' to plot each line of the matrix, fill to plot the area between the x axis and the value or any other string to - just plot the average line. + just plot the average line. When assign samples to replicates group such as + '--repgrplist WT WT KO KO' : 'std' would be the standard deviation between replicates groups. + 'se' for standard error between replicates groups. Returns ------- @@ -63,7 +68,15 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): """ - summary = np.ma.__getattribute__(average_type)(ma, axis=0) + if isinstance(ma,list): + summary_list = [] + for ma_each in ma: + tmp = np.ma.__getattribute__(average_type)(ma_each, axis=0) + summary_list.append(tmp) + ma = np.array(summary_list) + summary = np.ma.__getattribute__("average")(ma, axis=0) + else: + summary = np.ma.__getattribute__(average_type)(ma, axis=0) # only plot the average profiles without error regions x = np.arange(len(summary)) if isinstance(color, np.ndarray): @@ -72,6 +85,8 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): if plot_type == 'fill': ax.fill_between(x, summary, facecolor=color, alpha=0.6, edgecolor='none') + if debug: + set_trace() if plot_type in ['se', 'std']: if plot_type == 'se': # standard error std = np.std(ma, axis=0) / np.sqrt(ma.shape[0]) diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 37e9f359ab..f1b409ac8e 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -849,6 +849,14 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]): 'group.', action='store_true') + optional.add_argument('--repgrplist', + default=None, + nargs='+', + help='Group profiles by genotype' + 'assign each profile to a group(as replicates) to plot average +/- se/std.' + 'If the number of group values is smaller than' + 'the number of samples, the values will be equally divide into groups.') + optional.add_argument('--plotFileFormat', metavar='', help='Image format type. If given, this ' diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index ad666998e5..7f1746af6b 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -13,8 +13,8 @@ from matplotlib.font_manager import FontProperties import matplotlib.gridspec as gridspec from matplotlib import ticker -import copy -import sys + +import sys, re, os, copy import plotly.offline as py import plotly.graph_objs as go @@ -117,6 +117,21 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro return grids +def autobreaklinetitle(title,sep="[-_,.:]",lmax=15): + outsep = "-" + sss = [ rr for rr in re.split(sep,title) if len(rr) ] + newtitle, tmp = "", "" + for ss in sss: + tmp0 = tmp + tmp += ss + if len(tmp) > lmax: + newtitle += tmp0.strip(outsep) + "\n" + tmp = ss + else: + tmp += outsep + newtitle += tmp.strip(outsep) + newtitle = "\n" + newtitle + return newtitle def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, wspace, hspace, colorbar_position, label_rotation=0.0): """ @@ -146,7 +161,7 @@ def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType else: ax_profile = fig.add_subplot(grids[0, sample_id]) - ax_profile.set_title(title) + ax_profile.set_title(autobreaklinetitle(title)) for group in range(iterNum2): if perGroup: sub_matrix = hm.matrix.get_matrix(sample_id, group) @@ -163,6 +178,7 @@ def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType if sample_id > 0 and len(yMin) == 1 and len(yMax) == 1: plt.setp(ax_profile.get_yticklabels(), visible=False) + ax_profile.get_yaxis().set_tick_params(direction='in',pad=-22) # beisi if sample_id == 0 and yAxisLabel != '': ax_profile.set_ylabel(yAxisLabel) xticks, xtickslabel = hm.getTicks(tickIdx) @@ -595,8 +611,13 @@ def plotMatrix(hm, outFileName, ax_list = addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, kwargs['wspace'], kwargs['hspace'], colorbar_position, label_rotation) if legend_location != 'none': - ax_list[-1].legend(loc=legend_location.replace('-', ' '), ncol=1, prop=fontP, - frameon=False, markerscale=0.5) + ax = ax_list[-1] # beisi + box = ax.get_position() + ax.set_position([box.x0, box.y0 - box.height * 0.1, box.width, box.height * 0.9]) + legend = ax.legend(loc='lower right', shadow=False, fontsize='x-large', bbox_to_anchor=(0, 1.3, 1, .22), ncol=10, frameon=False, prop=fontP) # beisi, legend line + ax.add_artist(legend) + # ax_list[-1].legend(loc=legend_location.replace('-', ' '), ncol=1, prop=fontP, + # frameon=False, markerscale=0.5) first_group = 0 # helper variable to place the title per sample/group for sample in range(hm.matrix.get_num_samples()): @@ -632,7 +653,7 @@ def plotMatrix(hm, outFileName, if group == first_group and not showSummaryPlot and not perGroup: title = hm.matrix.sample_labels[sample] - ax.set_title(title) + ax.set_title(autobreaklinetitle(title)) if box_around_heatmaps is False: # Turn off the boxes around the individual heatmaps @@ -685,9 +706,9 @@ def plotMatrix(hm, outFileName, ax.axes.set_xlabel(xAxisLabel) ax.axes.set_yticks([]) if perGroup and group == 0: - ax.axes.set_ylabel(sub_matrix['sample']) + ax.axes.set_ylabel(sub_matrix['sample'],rotation=75,labelpad=0,fontsize=15) elif not perGroup and sample == 0: - ax.axes.set_ylabel(sub_matrix['group']) + ax.axes.set_ylabel(sub_matrix['group'],rotation=75,labelpad=0,horizontalalignment='right',fontsize=15) # Plot vertical lines at tick marks if desired if linesAtTickMarks: diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 7497875f20..da1c212f39 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- -import sys +import sys, os, re import argparse import numpy as np @@ -26,12 +26,12 @@ from deeptools.heatmapper_utilities import plot_single, plotly_single, getProfileTicks from deeptools.computeMatrixOperations import filterHeatmapValues - debug = 0 +if debug: + from ipdb import set_trace old_settings = np.seterr(all='ignore') plt.ioff() - def parse_arguments(args=None): parser = argparse.ArgumentParser( parents=[parserCommon.heatmapperMatrixArgs(), @@ -51,6 +51,21 @@ def parse_arguments(args=None): return parser +def autobreaklinetitle(title,sep="[-_,.:]",lmax=15): + outsep = "-" + sss = [ rr for rr in re.split(sep,title) if len(rr) ] + newtitle, tmp = "", "" + for ss in sss: + tmp0 = tmp + tmp += ss + if len(tmp) > lmax: + newtitle += tmp0.strip(outsep) + "\n" + tmp = ss + else: + tmp += outsep + newtitle += tmp.strip(outsep) + newtitle = "\n" + newtitle + return newtitle def process_args(args=None): args = parse_arguments().parse_args(args) @@ -98,6 +113,7 @@ def __init__(self, hm, out_file_name, plot_height=7, plot_width=11, per_group=False, + repgrplist=None, plot_type='lines', image_format=None, color_list=None, @@ -124,6 +140,7 @@ def __init__(self, hm, out_file_name, plot_height: in cm plot_width: in cm per_group: bool + repgrplist: list plot_type: string image_format: string color_list: list @@ -147,6 +164,7 @@ def __init__(self, hm, out_file_name, self.plot_height = plot_height self.plot_width = plot_width self.per_group = per_group + self.repgrplist = repgrplist self.plot_type = plot_type self.image_format = image_format self.color_list = color_list @@ -158,11 +176,13 @@ def __init__(self, hm, out_file_name, # Honor reference point labels from computeMatrix if reference_point_label is None: self.reference_point_label = hm.parameters['ref point'] - # decide how many plots are needed if self.per_group: self.numplots = self.hm.matrix.get_num_groups() - self.numlines = self.hm.matrix.get_num_samples() + if self.repgrplist: + self.numlines = len(set(self.repgrplist)) + else: + self.numlines = self.hm.matrix.get_num_samples() else: self.numplots = self.hm.matrix.get_num_samples() self.numlines = self.hm.matrix.get_num_groups() @@ -271,7 +291,7 @@ def plot_hexbin(self): for data_idx in range(self.numlines)[::-1]: ax = self.fig.add_subplot(sub_grid[data_idx, 0]) if data_idx == 0: - ax.set_title(title) + ax.set_title(autobreaklinetitle(title)) if data_idx != self.numlines - 1: plt.setp(ax.get_xticklabels(), visible=False) @@ -512,11 +532,11 @@ def plot_heatmap(self): if self.per_group: title = self.hm.matrix.group_labels[plot] - tickIdx = plot % self.hm.matrix.get_num_samples() + tickIdx = plot % self.numlines else: title = self.hm.matrix.sample_labels[plot] tickIdx = plot - ax.set_title(title) + ax.set_title(autobreaklinetitle(title)) mat = [] # when drawing a heatmap (in contrast to drawing lines) for data_idx in range(self.numlines): if self.per_group: @@ -718,41 +738,84 @@ def plot_profile(self): title = self.hm.matrix.group_labels[plot] if row != 0 and len(self.y_min) == 1 and len(self.y_max) == 1: plt.setp(ax.get_yticklabels(), visible=False) - tickIdx = plot % self.hm.matrix.get_num_samples() + tickIdx = plot % self.numlines else: title = self.hm.matrix.sample_labels[plot] if col != 0 and len(self.y_min) == 1 and len(self.y_max) == 1: plt.setp(ax.get_yticklabels(), visible=False) tickIdx = plot - ax.set_title(title) - for data_idx in range(self.numlines): - if self.per_group: - _row, _col = plot, data_idx - else: - _row, _col = data_idx, plot - if localYMin is None or self.y_min[col % len(self.y_min)] < localYMin: - localYMin = self.y_min[col % len(self.y_min)] - if localYMax is None or self.y_max[col % len(self.y_max)] > localYMax: - localYMax = self.y_max[col % len(self.y_max)] - - sub_matrix = self.hm.matrix.get_matrix(_row, _col) - - if self.per_group: - label = sub_matrix['sample'] - else: - label = sub_matrix['group'] - - if self.numlines > 1: - coloridx = data_idx - else: - coloridx = plot - plot_single(ax, sub_matrix['matrix'], - self.averagetype, - self.color_list[coloridx], - label, - plot_type=self.plot_type) - globalYmin = min(float(globalYmin), ax.get_ylim()[0]) + ax.set_title(autobreaklinetitle(title)) + if localYMin is None or self.y_min[col % len(self.y_min)] < localYMin: + localYMin = self.y_min[col % len(self.y_min)] + if localYMax is None or self.y_max[col % len(self.y_max)] > localYMax: + localYMax = self.y_max[col % len(self.y_max)] + if self.per_group and self.repgrplist: + nsamptmp = self.hm.matrix.get_num_samples() + repgrp_samp_dict = {} + repgrplistuniq = [] + for tmp in self.repgrplist: + if not tmp in repgrplistuniq: + repgrplistuniq.append(tmp) + + for data_idx in range(nsamptmp): + if len(self.repgrplist) >= nsamptmp: + thisrepgrp = self.repgrplist[data_idx] + else: + thisrepgrp = repgrplistuniq[int(data_idx / (nsamptmp/self.numlines))] + try: + repgrp_samp_dict[thisrepgrp].append(data_idx) + except: + repgrp_samp_dict[thisrepgrp] = [ data_idx ] + + if debug: + set_trace() + for irepgrp, repgrp in enumerate(repgrplistuniq): + sub_matrix_list = [] + for data_idx in repgrp_samp_dict[repgrp]: + _row, _col = plot, data_idx + sub_matrix = self.hm.matrix.get_matrix(_row, _col) + sub_matrix_list.append(sub_matrix['matrix']) + + label = f"{repgrp}(n={len(repgrp_samp_dict[repgrp])})" + + if self.numlines > 1: + coloridx = irepgrp + else: + coloridx = plot + plot_single(ax, sub_matrix_list, + self.averagetype, + self.color_list[coloridx], + label, + plot_type=self.plot_type) + else: + for data_idx in range(self.numlines): + if self.per_group: + _row, _col = plot, data_idx + else: + _row, _col = data_idx, plot + if localYMin is None or self.y_min[col % len(self.y_min)] < localYMin: + localYMin = self.y_min[col % len(self.y_min)] + if localYMax is None or self.y_max[col % len(self.y_max)] > localYMax: + localYMax = self.y_max[col % len(self.y_max)] + + sub_matrix = self.hm.matrix.get_matrix(_row, _col) + + if self.per_group: + label = sub_matrix['sample'] + else: + label = sub_matrix['group'] + + if self.numlines > 1: + coloridx = data_idx + else: + coloridx = plot + plot_single(ax, sub_matrix['matrix'], + self.averagetype, + self.color_list[coloridx], + label, + plot_type=self.plot_type) + globalYmin = min(np.float64(globalYmin), ax.get_ylim()[0]) globalYmax = max(globalYmax, ax.get_ylim()[1]) # Exclude ticks from all but one subplot by default @@ -957,6 +1020,7 @@ def main(args=None): plot_height=args.plotHeight, plot_width=args.plotWidth, per_group=args.perGroup, + repgrplist=args.repgrplist, plot_type=args.plotType, image_format=args.plotFileFormat, color_list=args.colors,