-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathheatmaps-2016-07.R
218 lines (141 loc) · 5.6 KB
/
heatmaps-2016-07.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# creating heatmaps in R using gene expression data
# part 1 - set working directory (default location for input and output files)
# select Desktop on a Mac (or any other directory)
setwd("~/Desktop")
# part 2 - get prerequisites
# install the relevant packages if they are not installed
install.packages("RColorBrewer")
install.packages("pheatmap")
# download a table of genes with corresponding FPKM values (filtered for detectable genes)
fileName = "ca-genes-fpkm.csv"
download.file(paste0("https://raw.githubusercontent.com/igordot/tutorials/master/", fileName), destfile=fileName)
# download a table of genes with differential expression stats (filtered for significant genes)
fileName = "ca-genes-stats-sig.csv"
download.file(paste0("https://raw.githubusercontent.com/igordot/tutorials/master/", fileName), destfile=fileName)
# part 3 - load the relevant packages and example data
# load the relevant packages
library(pheatmap)
library(RColorBrewer)
# import gene FPKM values (used for the actual heatmap)
geneVals = read.csv(file="ca-genes-fpkm.csv", row.names=1, check.names=F, stringsAsFactors=F)
# check what you imported
head(geneVals)
dim(geneVals)
# import gene stats (used for filtering the list of genes)
geneStats = read.csv(file="ca-genes-stats-sig.csv", row.names=1, check.names=F, stringsAsFactors=F)
# check what you imported
head(geneStats)
dim(geneStats)
# part 4 - check and adjust expression values
# check the distribution of expression values ("las=2" means labels are perpendicular)
boxplot(geneVals, las=2)
# take a log of values to reduce variance
geneValsLog = log2(geneVals)
head(geneValsLog)
# take a log of values accounting for zeroes
geneValsLog = log2(geneVals + 0.01)
head(geneValsLog)
# check the distribution of logged values
boxplot(geneValsLog, las=2)
# part 5 - generate a basic heatmap
# learn more about pheatmap
?pheatmap
# default heatmap using original values (use 500 genes to save time)
heatmapVals = geneVals[1:500,]
dim(heatmapVals)
pheatmap(heatmapVals)
# default heatmap using logged values (use 500 genes to save time)
heatmapValsLog = geneValsLog[1:500,]
dim(heatmapValsLog)
pheatmap(heatmapValsLog)
# center and scale in the row direction
pheatmap(heatmapValsLog, scale="row")
# part 6 - color options
# manually create color range
myColors = c("green", "black", "red")
myColors
# expand the color range
myColors = colorRampPalette(myColors)(50)
myColors
# visualize results
pheatmap(heatmapValsLog, scale="row", color=myColors)
# learn more about display.brewer.all
?display.brewer.all
# see RColorBrewer color palettes
display.brewer.all()
# select a color palette
myColors = brewer.pal(n=11, name="RdBu")
myColors = colorRampPalette(myColors)(50)
myColors
# visualize results
pheatmap(heatmapValsLog, scale="row", color=myColors)
# reverse the color order
myColors = rev(myColors)
# visualize results
pheatmap(heatmapValsLog, scale="row", color=myColors)
# part 7 - subset genes
# examine the values data frame
dim(geneValsLog)
head(geneValsLog)
# examine the stats data frame
dim(geneStats)
head(geneStats)
# subset for significant genes
heatmapAllSig = geneValsLog[rownames(geneStats),]
dim(heatmapAllSig)
# visualize results
pheatmap(heatmapAllSig, scale="row", color=myColors)
# filter significant genes based on q-value less than 0.01
geneStats[,"q_value"] < 0.01
geneStatsSubset = geneStats[geneStats[,"q_value"] < 0.01,]
dim(geneStatsSubset)
head(geneStatsSubset)
# sort the data frame of significant genes by fold change
geneStatsSorted = geneStatsSubset[order(geneStatsSubset[,"log2_fold_change"]),]
head(geneStatsSorted)
geneStatsSorted = geneStatsSubset[order(abs(geneStatsSubset[,"log2_fold_change"]), decreasing=T),]
head(geneStatsSorted)
# get 50 significant genes with the highest fold change
heatmapTop50 = geneValsLog[rownames(geneStatsSorted[1:50,]),]
dim(heatmapTop50)
head(heatmapTop50)
# visualize results
pheatmap(heatmapTop50, scale="row", color=myColors)
# remove the border around each cell
pheatmap(heatmapTop50, scale="row", color=myColors, border_color=NA)
# reduce row label font
pheatmap(heatmapTop50, scale="row", color=myColors, border_color=NA, fontsize_row=6)
# part 8 - determine gene order
# save heatmap as an object
p = pheatmap(heatmapTop50, scale="row", color=myColors, border_color=NA, fontsize_row=6)
# gene order
p$tree_row$order
# input data frame sorted by heatmap order
heatmapTop50[p$tree_row$order,]
# part 9 - label groups
# create a data frame for sample labels with a column "group" set to sample names
colLabels = data.frame(group=colnames(geneValsLog), stringsAsFactors=F)
colLabels
# convert row names (consecutive integers by default) to sample names (from column "group")
rownames(colLabels) = colLabels[,"group"]
colLabels
# rename contents of column "group" that start with "AL_05" to "young"
colLabels[,"group"] = gsub("AL_05.*", "young", x=colLabels[,"group"])
colLabels
# rename contents of column "group" that start with "AL_15" to "old"
colLabels[,"group"] = gsub("AL_15.*", "old", x=colLabels[,"group"])
colLabels
# visualize results
pheatmap(heatmapTop50, scale="row", color=myColors, border_color=NA, fontsize_row=6,
annotation_col=colLabels)
# create a list of sample label colors
colColors = list(group = c("green", "brown"))
colColors
# assign names to the list of colors using group names
names(colColors$group) = unique(colLabels[,"group"])
colColors
# visualize results
pheatmap(heatmapTop50, scale="row", color=myColors, border_color=NA, fontsize_row=6,
annotation_col=colLabels, annotation_colors=colColors)
# if you prefer to avoid coding, check this listing of graphical alternatives:
# http://bit.ly/heatmapoptions