跟着iMeta学做图|ggplot2绘制曼哈顿图展现物种丰度差异
# 农心生信工作室
# 2022-10-05
# 代码编写及注释:农心生信工作室
# 曼哈顿图 (Manhattan Plot) 是散点图的一种,在微生物组研究中通常用于表示微生物差异丰度的结果,
# 是微生物组分析中常用的展示方式。本期我们挑选2022年6月23日刊登在iMeta上的Biochar stimulates tomato roots to recruit a bacterial assemblage contributing to disease resistance against Fusarium wilt- iMeta | 
# 东农吴凤芝/南农韦中等揭示生物炭抑制作物土传病害机理,选择文章的Figure 2D进行复现,
# 降解和探讨曼哈顿图的绘制方法,先上原图:
# 接下来,我们将通过详尽的代码逐步拆解曼哈顿图,最终实现对原图的复现。
# R包检测和安装
# 1.安装核心R包ggplot2以及一些功能辅助性R包,并载入所有R包
if (!require("ggplot2"))
  install.packages('ggplot2') 
if (!require("dplyr"))
  install.packages('dplyr') 
if (!require("ggrepel"))
  install.packages('ggrepel') 
# 加载包
library(ggplot2)
library(dplyr)
library(ggrepel)
# 2.由于并未找到原文图片对应的数据,
# 在这里我们生成测试数据。测试数据应为差异丰度分析后产生的数据框。设置随机种子并生成df
# 设置随机数种子,确保数据可重复
set.seed(123)
# 根据图片,推测至少需要以下几列关键数据
# OTU的ID:字符串,每一个OTU的ID名
# P值:0-1之间的随机数,为了更接近原图,先生成绝大多数0.05-1间的随机数,在根据图像生成一些特定数值
# RA值:这里选择0.05-5之间的随机数
# group:字符串,标明OTU分别属于哪些属
# level:字符串,标注该OTU是否显著富集
# 本例中生成1000 OTUs
df = data.frame(OTU=c(paste("OTU",seq(1,997),sep = "")),
                P=as.matrix(runif(997,min=0.05,max=1)),
                RA=as.matrix(runif(997,min=0.05,max=5)),
                Group=c(rep("Alphaproteobacteria",100),
                        rep("Betaproteobacteria",40),rep("Gammaproteobacteria",60),
                        rep("Deltaproteobacteria",80),rep("Acidobacteria",45),
                        rep("Actinobacteria",110),rep("Bacteroidetes",100),
                        rep("Chloroflexi",140),rep("Gemdfimonadetes",40),
                        rep("Firmicutes",75),rep("Planctomycetes",117),
                        rep("Others",90))
) %>% rbind(data.frame(OTU=c("OTU9998","OTU9999","OTU10000"),P=c(1e-23,1e-6,1e-7),RA=c(5,2.5,0.05),Group=c("Gammaproteobacteria","Actinobacteria","Bacteroidetes")))
#通过ifelse函数,基于P值给df添加新的一列Level,P小于0.05的属于"enriched",其他属于"nosig"
df$Level=ifelse(df$P<0.05,"enriched","nosig")
#给df添加新的一列NegativeLogP,即为P值的以10为底的逆对数
df$NegativeLogP=-log10(df$P)
#观察原图,发现x轴坐标刻度名称是固定的,基于OTU的属分组,因此在这里我们通过factor函数固定属分组顺序,并按属分组对df排序
df$Group=factor(df$Group,levels = c("Alphaproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria","Acidobacteria","Actinobacteria","Bacteroidetes","Chloroflexi","Gemdfimonadetes","Firmicutes","Planctomycetes","Others"))#固定属分组顺序
df=arrange(df,Group)#dplyr包的arrange函数对df排序
df$OTU=factor(df$OTU,levels = df$OTU)#固定OTU名顺序
# 曼哈顿图绘制
# 3.使用ggplot2包绘制一个最简单的曼哈顿图(这时只是一张简单的散点图),x轴为OTU名,y轴为NegativeLogP值,
# 颜色映射到属分组,散点大小映射RA值,散点形状映射Level:
p<-ggplot(df, aes(x=OTU, y=NegativeLogP, color=Group, size=RA, shape=Level)) +
  geom_point(alpha=.7)
# 4. 设置散点的形状以及尺寸,并隐藏color和shape对应的图例
p<-ggplot(df, aes(x=OTU, y=NegativeLogP, color=Group, size=RA, shape=Level)) +
  geom_point(alpha=.7)+
  scale_shape_manual(values=c(16,1))+ #选择形状,nosig对应空心圆,enrich对应实心圆
  scale_size_continuous(range = c(0.5,5),
                        breaks = c(0.05, 0.10,0.50,1.00,5.00),
                        labels = c('0.05', '0.10','0.50','1.00','5.00'))+ #设置散点尺寸
  guides(color="none",shape="none")#隐藏图例
#5. 观察预览图,我们发现x轴刻度标签初显示是一条粗黑线,其实是1000个OTU名同时显示导致的,我们观察原图,发现原图是以属分组的属名作为x轴刻度标签,刻度线是属分组中位的OTU。为了获得作为刻度的每个属中位OTU,我们编写并运行函数getBreak()来实现:
getBreak<-function(x,y){
  freq<-as.vector(table(y))  #table()计算频数
  half_freq<-freq%/%2
  for (i in seq(2,length(freq))){
    new_num<-freq[i]+freq[i-1]
    freq[i]=new_num}
  pos<-freq-half_freq
  break_point<-as.vector(x[pos])
  return(break_point)
}
#运行函数getBreak()后,修改x轴刻度及标签,并增加x轴左右的空白:
p<-ggplot(df, aes(x=OTU, y=NegativeLogP, color=Group, size=RA, shape=Level)) +
  geom_point(alpha=.7)+
  scale_shape_manual(values=c(16,1))+ #选择形状,nosig对应空心圆,enrich对应实心圆
  scale_size_continuous(range = c(0.5,5),
                        breaks = c(0.05, 0.10,0.50,1.00,5.00),
                        labels = c('0.05', '0.10','0.50','1.00','5.00'))+ #设置散点尺寸
  guides(color="none",shape="none")+#隐藏图例
  scale_x_discrete(breaks=getBreak(df$OTU,df$Group),
                   labels=c("Alphaproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria","Acidobacteria","Actinobacteria","Bacteroidetes","Chloroflexi","Gemdfimonadetes","Firmicutes","Planctomycetes","Others"),
                   expand = expansion(mult = 0.03))#expansion函数的参数mult: 百分比间距,可以接受一个向量
# 5 给散点添加文字标签,本例中需要添加标签的是OTU9998,通过ggrepel包的geom_text_repel函数来添加文字标签:
gtext<-filter(df,df$OTU=="OTU9998")

p<-ggplot(df, aes(x=OTU, y=NegativeLogP, color=Group, size=RA, shape=Level)) +
  geom_point(alpha=.7,key_glyph="point")+
  scale_shape_manual(values=c(16,1))+ #选择形状,nosig对应空心圆,enrich对应实心圆
  scale_size_continuous(range = c(0.5,5),
                        breaks = c(0.05, 0.10,0.50,1.00,5.00),
                        labels = c('0.05', '0.10','0.50','1.00','5.00'))+ #设置散点尺寸
  guides(color="none",shape="none")+#隐藏图例
  scale_x_discrete(breaks=getBreak(df$OTU,df$Group),
                   labels=c("Alphaproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria","Acidobacteria","Actinobacteria","Bacteroidetes","Chloroflexi","Gemdfimonadetes","Firmicutes","Planctomycetes","Others"),
                   expand = expansion(mult = 0.03))+#expansion函数的参数mult: 百分比间距,可以接受一个向量
  geom_text_repel(aes(OTU,NegativeLogP,label="OTU9998\n(Psedomonas sp.)"),gtext,colour='black', size =5,box.padding = 1, 
                           point.padding = 0.8, 
                           segment.color = "white",
                           show.legend = F)#添加文字标签
# 6 添加表示阈值的虚线,阈值为p=0.05:
threshold<--log10(0.05)

p<-ggplot(df, aes(x=OTU, y=NegativeLogP, color=Group, size=RA, shape=Level)) +
  geom_point(alpha=.7,key_glyph="point")+
  scale_shape_manual(values=c(16,1))+ #选择形状,nosig对应空心圆,enrich对应实心圆
  scale_size_continuous(range = c(0.5,5),
                        breaks = c(0.05, 0.10,0.50,1.00,5.00),
                        labels = c('0.05', '0.10','0.50','1.00','5.00'))+ #设置散点尺寸
  guides(color="none",shape="none")+#隐藏图例
  scale_x_discrete(breaks=getBreak(df$OTU,df$Group),
                   labels=c("Alphaproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria","Acidobacteria","Actinobacteria","Bacteroidetes","Chloroflexi","Gemdfimonadetes","Firmicutes","Planctomycetes","Others"),
                   expand = expansion(mult = 0.03))+#expansion函数的参数mult: 百分比间距,可以接受一个向量
  geom_text_repel(aes(OTU,NegativeLogP,label="OTU9998\n(Psedomonas sp.)"),gtext,colour='black', size =5,box.padding = 1, 
                           point.padding = 0.8, 
                           segment.color = "white",
                           show.legend = F)+#添加文字标签
  geom_hline(yintercept=threshold, linetype=2, color="grey")
# 7 改变分组颜色,优化背景,修改轴标题,倾斜x轴刻度标签:
#设置分组颜色
mycol<-c("#008087","#00A962","#86C03D","#74BC56",
         "#E53636","#3D7ABB","#EE8B15","#AF4592",
         "#7AC5C1","#906024","#6E1F86","#545556")

p<-ggplot(df, aes(x=OTU, y=NegativeLogP, color=Group, size=RA, shape=Level)) +
  geom_point(alpha=.7,key_glyph="point")+
  scale_shape_manual(values=c(16,1))+ #选择形状,nosig对应空心圆,enrich对应实心圆
  scale_size_continuous(range = c(0.5,5),
                        breaks = c(0.05, 0.10,0.50,1.00,5.00),
                        labels = c('0.05', '0.10','0.50','1.00','5.00'))+ #设置散点尺寸
  guides(color="none",shape="none")+#隐藏图例
  scale_x_discrete(breaks=getBreak(df$OTU,df$Group),
                   labels=c("Alphaproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria","Acidobacteria","Actinobacteria","Bacteroidetes","Chloroflexi","Gemdfimonadetes","Firmicutes","Planctomycetes","Others"),
                   expand = expansion(mult = 0.03))+#expansion函数的参数mult: 百分比间距,可以接受一个向量
  geom_text_repel(aes(OTU,NegativeLogP,label="OTU9998\n(Psedomonas sp.)"),gtext,colour='black', size =5,box.padding = 1, 
                  point.padding = 0.8, 
                  segment.color = "white",
                  show.legend = F)+#添加文字标签
  geom_hline(yintercept=threshold, linetype=2, color="grey")+
  labs(x="", y="-log10(P)")+ #设置坐标轴标题
  scale_color_manual(values = mycol)+ #改变分组颜色
  theme(axis.line = element_line(size = 1),panel.grid=element_blank(),
        panel.background = element_rect(fill = 'white'),
        axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5))
# 8 我们注意到原图不同区块有不同的背景色,我们可以通过annotate()函数来实现,使图形背景在不同区域颜色不同,注意,因为是改变图形背景色,因此图层应在整个图形的底层:
p<-ggplot(df, aes(x=OTU, y=NegativeLogP, color=Group, size=RA, shape=Level)) +
  annotate("rect",xmin = 1, xmax = 281,
           ymin = 0, ymax = 25,
           fill="#F4F4F3")+
  annotate("rect",xmin = 327, xmax = 437,
           ymin = 0, ymax = 25,
           fill="#F4F4F3")+
  annotate("rect",xmin = 539, xmax = 678,
           ymin = 0, ymax = 25,
           fill="#F4F4F3")+
  annotate("rect",xmin = 719, xmax = 793,
           ymin = 0, ymax = 25,
           fill="#F4F4F3")+
  annotate("rect",xmin = 911, xmax = 1000,
           ymin = 0, ymax = 25,
           fill="#F4F4F3")+
  geom_point(alpha=.7,key_glyph="point")+
  scale_shape_manual(values=c(16,1))+ #选择形状,nosig对应空心圆,enrich对应实心圆
  scale_size_continuous(range = c(0.5,5),
                        breaks = c(0.05, 0.10,0.50,1.00,5.00),
                        labels = c('0.05', '0.10','0.50','1.00','5.00'))+ #设置散点尺寸
  guides(color="none",shape="none")+#隐藏图例
  scale_x_discrete(breaks=getBreak(df$OTU,df$Group),
                   labels=c("Alphaproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria","Acidobacteria","Actinobacteria","Bacteroidetes","Chloroflexi","Gemdfimonadetes","Firmicutes","Planctomycetes","Others"),
                   expand = expansion(mult = 0.03))+#expansion函数的参数mult: 百分比间距,可以接受一个向量
  geom_text_repel(aes(OTU,NegativeLogP,label="OTU9998\n(Psedomonas sp.)"),gtext,colour='black', size =5,box.padding = 1, 
                  point.padding = 0.8, 
                  segment.color = "#F4F4F3",
                  show.legend = F)+#添加文字标签
  geom_hline(yintercept=threshold, linetype=2, color="grey")+
  labs(x="", y="-log10(P)")+
  scale_color_manual(values = mycol)+
  theme(axis.line = element_line(size = 1),panel.grid=element_blank(),
        panel.background = element_rect(fill = 'white'),
        axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5))

9 最后,我们通过Adobe Illustrator(AI)来美化一下图片,成品图如下
# 附.完整代码
library(ggplot2)
library(dplyr)
library(ggrepel)

# 设置随机数种子,确保数据可重复
set.seed(123)
# 根据图片,推测至少需要以下几列关键数据
# OTU的ID:字符串,每一个OTU的ID名
# P值:0-1之间的随机数,为了更接近原图,先生成绝大多数0.05-1间的随机数,在根据图像生成一些特定数值
# RA值:这里选择0.05-5之间的随机数
# group:字符串,标明OTU分别属于哪些属
# level:字符串,标注该OTU是否显著富集
# 本例中生成1000 OTUs
df = data.frame(OTU=c(paste("OTU",seq(1,997),sep = "")),
                P=as.matrix(runif(997,min=0.05,max=1)),
                RA=as.matrix(runif(997,min=0.05,max=5)),
                Group=c(rep("Alphaproteobacteria",100),
                        rep("Betaproteobacteria",40),rep("Gammaproteobacteria",60),
                        rep("Deltaproteobacteria",80),rep("Acidobacteria",45),
                        rep("Actinobacteria",110),rep("Bacteroidetes",100),
                        rep("Chloroflexi",140),rep("Gemdfimonadetes",40),
                        rep("Firmicutes",75),rep("Planctomycetes",117),
                        rep("Others",90))
) %>% rbind(data.frame(OTU=c("OTU9998","OTU9999","OTU10000"),P=c(1e-23,1e-6,1e-7),RA=c(5,2.5,0.05),Group=c("Gammaproteobacteria","Actinobacteria","Bacteroidetes")))
#通过ifelse函数,基于P值给df添加新的一列Level,P小于0.05的属于"enriched",其他属于"nosig"
df$Level=ifelse(df$P<0.05,"enriched","nosig")
#给df添加新的一列NegativeLogP,即为P值的以10为底的逆对数
df$NegativeLogP=-log10(df$P)
#观察原图,发现x轴坐标刻度名称是固定的,基于OTU的属分组,因此在这里我们通过factor函数固定属分组顺序,并按属分组对df排序
df$Group=factor(df$Group,levels = c("Alphaproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria","Acidobacteria","Actinobacteria","Bacteroidetes","Chloroflexi","Gemdfimonadetes","Firmicutes","Planctomycetes","Others"))#固定属分组顺序
df=arrange(df,Group)#dplyr包的arrange函数对df排序
df$OTU=factor(df$OTU,levels = df$OTU)#固定OTU名顺序


getBreak<-function(x,y){
  freq<-as.vector(table(y))  #table()计算频数
  half_freq<-freq%/%2
  for (i in seq(2,length(freq))){
    new_num<-freq[i]+freq[i-1]
    freq[i]=new_num}
  pos<-freq-half_freq
  break_point<-as.vector(x[pos])
  return(break_point)
}

gtext<-filter(df,df$OTU=="OTU9998")
threshold<--log10(0.05)
#设置分组颜色
mycol<-c("#008087","#00A962","#86C03D","#74BC56",
         "#E53636","#3D7ABB","#EE8B15","#AF4592",
         "#7AC5C1","#906024","#6E1F86","#545556")


p<-ggplot(df, aes(x=OTU, y=NegativeLogP, color=Group, size=RA, shape=Level)) +
  annotate("rect",xmin = 1, xmax = 281,
           ymin = 0, ymax = 25,
           fill="#F4F4F3")+
  annotate("rect",xmin = 327, xmax = 437,
           ymin = 0, ymax = 25,
           fill="#F4F4F3")+
  annotate("rect",xmin = 539, xmax = 678,
           ymin = 0, ymax = 25,
           fill="#F4F4F3")+
  annotate("rect",xmin = 719, xmax = 793,
           ymin = 0, ymax = 25,
           fill="#F4F4F3")+
  annotate("rect",xmin = 911, xmax = 1000,
           ymin = 0, ymax = 25,
           fill="#F4F4F3")+
  geom_point(alpha=.7,key_glyph="point")+
  scale_shape_manual(values=c(16,1))+ #选择形状,nosig对应空心圆,enrich对应实心圆
  scale_size_continuous(range = c(0.5,5),
                        breaks = c(0.05, 0.10,0.50,1.00,5.00),
                        labels = c('0.05', '0.10','0.50','1.00','5.00'))+ #设置散点尺寸
  guides(color="none",shape="none")+#隐藏图例
  scale_x_discrete(breaks=getBreak(df$OTU,df$Group),
                   labels=c("Alphaproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria","Acidobacteria","Actinobacteria","Bacteroidetes","Chloroflexi","Gemdfimonadetes","Firmicutes","Planctomycetes","Others"),
                   expand = expansion(mult = 0.03))+#expansion函数的参数mult: 百分比间距,可以接受一个向量
  geom_text_repel(aes(OTU,NegativeLogP,label="OTU9998\n(Psedomonas sp.)"),gtext,colour='black', size =5,box.padding = 1, 
                  point.padding = 0.8, 
                  segment.color = "#F4F4F3",
                  show.legend = F)+#添加文字标签
  geom_hline(yintercept=threshold, linetype=2, color="grey")+
  labs(x="", y="-log10(P)")+
  scale_color_manual(values = mycol)+
  theme(axis.line = element_line(size = 1),panel.grid=element_blank(),
        panel.background = element_rect(fill = 'white'),
        axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5))

ggsave("Figure2D.pdf", p, height = 5,width = 10) # 保存图为PDF,指定宽和高
暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇