# 顾祖光(Zuguang Gu)
# 2022-09-20
# 如果你使用本代码,请引用: Zuguang Gu. 2022. Complex heatmap visualization. iMeta 1: e43. https://doi.org/10.1002/imt2.43
#
# 翻译及注释:农心生信工作室
#
# 热图(Heatmap)是矩阵类型数据最为常用的可视化方法,在生物学领域常用于各种组学数据的可视化,例如基因表达量数据、物种丰度分布、表观遗传信号数据等。现如今多组学数据不断涌现, 现有的R语言包例如 gplots、pheatmap、ggplot2_绘制热图的功能较为单一,已无法满足大家从多个维度、对多类型数据更加充分可视化的需求。ComplexHeatmap_ 由德国国家肿瘤疾病中心顾顾祖光博士开发,该R包正如其名一样复杂,但是其绘制热图功能却堪称全面,下面我们将以顾祖光博士发表在iMeta上的方法论文Complex heatmap visualization
#
# iMeta | 德国国家肿瘤中心顾祖光发表复杂热图(ComplexHeatmap)可视化方法
# 本文对其中的Figure 2图A为例,对_ComplexHeatmap_的热图绘制方法进行讲解和探讨,先上原图:
# R包检测和安装
# 1.先安装所_ComplexHeatmap_包及其依赖并将所有包载入
# 检查开发者工具devtools,如没有则安装
if (!require("devtools"))
install.packages("devtools")
# 加载开发者工具devtools
library(devtools)
# 检查热图包,没有则通过github安装最新版
if (!require("ComplexHeatmap"))
install_github("jokergoo/ComplexHeatmap")
# ComplexHeatmap依赖关系
if (!require("circlize"))
install_github("jokergoo/circlize")
# 用于绘制矩阵列方向上的层次聚类结果
if (!require("dendextend"))
install.packages('dendextend')
# 加载包
library(ComplexHeatmap)
library(circlize)
library(dendextend)
# 生成测试数据
# 2.设置随机种子并生成18 * 24的随机数据mat
# 设置随机数种子,确保数据可重复
set.seed(123)
# 分小组,用于形成不同聚类簇
nr1 = 4; nr2 = 8; nr3 = 6; nr = nr1 + nr2 + nr3
nc1 = 6; nc2 = 8; nc3 = 10; nc = nc1 + nc2 + nc3
mat = cbind(rbind(matrix(rnorm(nr1*nc1, mean = 1, sd = 0.5), nr = nr1),
matrix(rnorm(nr2*nc1, mean = 0, sd = 0.5), nr = nr2),
matrix(rnorm(nr3*nc1, mean = 0, sd = 0.5), nr = nr3)),
rbind(matrix(rnorm(nr1*nc2, mean = 0, sd = 0.5), nr = nr1),
matrix(rnorm(nr2*nc2, mean = 1, sd = 0.5), nr = nr2),
matrix(rnorm(nr3*nc2, mean = 0, sd = 0.5), nr = nr3)),
rbind(matrix(rnorm(nr1*nc3, mean = 0.5, sd = 0.5), nr = nr1),
matrix(rnorm(nr2*nc3, mean = 0.5, sd = 0.5), nr = nr2),
matrix(rnorm(nr3*nc3, mean = 1, sd = 0.5), nr = nr3))
)
dim(mat) #查看数据维度
#> [1] 18 24
# 可选从文件读取矩阵
# write.table(mat, file="mat.txt", sep="\t", quote=F, row.names=T, col.names=T)
# mat = as.matrix(read.table(("mat.txt"), header=T, row.names=1, sep="\t", comment.char=""))
mat = mat[sample(nr, nr), sample(nc, nc)] # 打乱数据
rownames(mat) = paste0("row", seq_len(nr)) # 行命名
colnames(mat) = paste0("column", seq_len(nc)) # 列命名
# 热图预览
# 3.开始用ComplexHeatmap中最重要的函数Heatmap()绘制一张简单的热图,可以看到,默认参数下,
# ComplexHeatmap会生成图例、行列名并对行列分别聚类,聚类方法和刚才我们用dendextend做聚类时的’complete’层次聚类方法相同
Heatmap(mat)
# 热图聚类着色
# 4.对列数据做层次聚类,hclust()函数做聚类时默认方法为’complete’即使用样本最大距离定义两个类之间的距离,采用欧式距离作为距离的度量,color_branches()用于给不同分支着色
# 层次聚类
column_dend = as.dendrogram(hclust(dist(t(mat))))
# 分支着色
column_dend = color_branches(column_dend, k = 3)
# 5.尝试一些参数的设置,由于ComplexHeatmap基于grid绘图框架,
# 所以凡是量化长度的参数都需要用unit()函数制定,
# 例如下面的row_dend_width。cluster_columns和cluster_rows参数
# 可以指定聚类方法为我们前面设置好的dendrogram对象
Heatmap(mat, name = "mat",
row_dend_width = unit(2, "cm"), # 行聚类树的宽度
cluster_columns = column_dend, # 列聚类方法指定为前面设置的dendrogram对象
column_title = "(A) A heatmap with various annotations", # 标题
show_column_names = FALSE, # 不显示列名
row_split = rep(c("A", "B"), 9), # 行分割后的命名
row_km = 2, # 行k-means聚类的类别数
column_split = 3 # 列分割数
)
# 这里需要注意的是row_km参数,官方的解释如下: > Apply k-means clustering on rows.
# If the value is larger than 1, the heatmap will be split by rows according to the k-means clustering.
# For each row slice, hierarchical clustering is still applied with parameters above.
# 这就是说这个参数一旦指定一个大于1的数,就会先进行一个k-means聚类,
# 然后在k个类别内用默认的层次聚类方法进行第二次聚类,同时,热图的行也会根据聚类结果切割开来
# 热图注释
# 6.最后,再把行和列的annotation加上,anno_points()用于在矩阵上方绘制点图,数值为0-1之间的随机数;
# anno_barplot()用于在矩阵右边绘制堆叠柱状图,图中颜色属性用grid框架中的gpar()函数控制\
(ht = Heatmap(mat, name = "mat",
row_dend_width = unit(2, "cm"), # 行聚类树的宽度
cluster_columns = column_dend, # 列聚类方法指定为前面设置的dendrogram对象
column_title = "(A) A heatmap with various annotations", # 标题
show_column_names = FALSE, # 不显示列名
row_split = rep(c("A", "B"), 9), # 行分割后的命名
row_km = 2, # 行k-means聚类的类别数
column_split = 3, # 列分割数
top_annotation = HeatmapAnnotation(foo1 = 1:24,
bar1 = anno_points(runif(24))),
right_annotation = rowAnnotation(foo2 = 18:1,
bar2 = anno_barplot(cbind(runif(18),
runif(18)),
gp = gpar(fill = 2:3),
width = unit(2, "cm")))
))
# 保存图为PDF,指定宽和高
pdf("Figure2A.pdf", width = 8, height = 4.5)
print(ht)
dev.off()
#> png
#> 2
#附.完整代码
library(ComplexHeatmap)
library(circlize)
library(dendextend)
set.seed(123)
nr1 = 4; nr2 = 8; nr3 = 6; nr = nr1 + nr2 + nr3
nc1 = 6; nc2 = 8; nc3 = 10; nc = nc1 + nc2 + nc3
mat = cbind(rbind(matrix(rnorm(nr1*nc1, mean = 1, sd = 0.5), nr = nr1),
matrix(rnorm(nr2*nc1, mean = 0, sd = 0.5), nr = nr2),
matrix(rnorm(nr3*nc1, mean = 0, sd = 0.5), nr = nr3)),
rbind(matrix(rnorm(nr1*nc2, mean = 0, sd = 0.5), nr = nr1),
matrix(rnorm(nr2*nc2, mean = 1, sd = 0.5), nr = nr2),
matrix(rnorm(nr3*nc2, mean = 0, sd = 0.5), nr = nr3)),
rbind(matrix(rnorm(nr1*nc3, mean = 0.5, sd = 0.5), nr = nr1),
matrix(rnorm(nr2*nc3, mean = 0.5, sd = 0.5), nr = nr2),
matrix(rnorm(nr3*nc3, mean = 1, sd = 0.5), nr = nr3))
)
mat = mat[sample(nr, nr), sample(nc, nc)] # random shuffle rows and columns
rownames(mat) = paste0("row", seq_len(nr))
colnames(mat) = paste0("column", seq_len(nc))
column_dend = as.dendrogram(hclust(dist(t(mat))))
column_dend = color_branches(column_dend, k = 3) # `color_branches()` returns a dendrogram object
Heatmap(mat, name = "mat",
row_dend_width = unit(2, "cm"), # 行聚类树的宽度
cluster_columns = column_dend, # 列聚类方法指定为前面设置的dendrogram对象
column_title = "(A) A heatmap with various annotations", # 标题
show_column_names = FALSE, # 不显示列名
row_split = rep(c("A", "B"), 9), # 行分割后的命名
row_km = 2, # 行k-means聚类的类别数
column_split = 3, # 列分割数
top_annotation = HeatmapAnnotation(foo1 = 1:24,
bar1 = anno_points(runif(24))),
right_annotation = rowAnnotation(foo2 = 18:1,
bar2 = anno_barplot(cbind(runif(18),
runif(18)),
gp = gpar(fill = 2:3),
width = unit(2, "cm")))
)
#以上代码来自Zuguang Gu博士的githubhttps://github.com/jokergoo/ComplexHeatmap_v2_paper_code,#论文中其他的图都有相应的代码实现,大家可以参考学习,绘制出更加漂亮的热图!