NC火山图
#Kruse T, Benz C, Garvanska DH, Lindqvist R, Mihalic F, Coscia F, Inturi R, Sayadi A, Simonetti L, Nilsson E, Ali #M, Kliche J, Moliner Morro A, Mund A, Andersson E, McInerney G, Mann M, Jemth P, Davey NE, Överby AK, Nilsson J, #Ivarsson Y. Large scale discovery of coronavirus-host factor protein interaction motifs reveals SARS-CoV-2 #specific mechanisms and vulnerabilities. Nat Commun. 2021 Nov 19;12(1):6761. doi: 10.1038/s41467-021-26498-z. #PMID: 34799561; PMCID: PMC8605023.
# 加载R包
library(ggplot2)
library(tidyverse)

#####################################
# 曲线
fun1 <- function(x){
  input = c(seq(-x,-0.001,0.0001),seq(0.001,x,0.0001))
  y = 1/input
  # 合并为dataframe格式
  df = data.frame(x = input,y = y)
}

# 生成曲线数据
df <- fun1(5)

# 绘图
ggplot(df,aes(x = x,y = y)) +
  geom_point(size = 1) +
  theme_grey(base_size = 18) +
  ylim(-250,250)

######################################
#倒喇叭形
fun2 <- function(x){
  input = seq(0.001,x,0.001)
  y = 1/abs(input)
  # 合并为dataframe格式
  df1 = data.frame(x = input,y = y)
  df2 = data.frame(x = -(input),y = y)
  # 合并
  res <- rbind(df1,df2)
  return(res)
}

# 生成曲线数据
df <- fun2(5)

# 绘图
ggplot(df,aes(x = x,y = y)) +
  geom_point(size = 1) +
  theme_grey(base_size = 18) +
  ylim(0,5)


##############################
# 平移-log10 pvalue
fun3 <- function(x){
  input = seq(0.001,x,0.001)
  y = 1/abs(input) + -log10(0.05)
  # 合并为dataframe格式
  df1 = data.frame(x = input,y = y)
  df2 = data.frame(x = -(input),y = y)
  # 合并
  res <- rbind(df1,df2)
  return(res)
}

# 生成曲线数据
df <- fun3(5)

# 绘图
ggplot(df,aes(x = x,y = y)) +
  geom_point(size = 1) +
  theme_grey(base_size = 18) +
  ylim(0,5)

###########################################
# 加载数据
voca <- read.delim('./vocalnodata.txt',header = T)

# 查看数据
head(voca,3)

# 挑选需要标记的基因
filter_data <- voca %>% filter(gene_name %in% c("NCAP MERS","NCAP SARS",
                                                "G3BP1","G3BP2"))

# 重命名
filter_data[c(1,4),1] <- c('MERS-CoV N (bait)','SARS-CoV-2 N (bait)')

# 添加基因分组
filter_data$type <- c('a','b','b','c')

# 查看数据
filter_data

# 添加曲线的y轴
voca$curve_y <- ifelse(voca$log2FC < 0,1/abs(voca$log2FC) + -log10(0.05),
                 1/voca$log2FC + -log10(0.05))

# 绘制火山图
p <- ggplot(voca,aes(x = log2FC,y = logPvalue)) +
  # 点图层
  geom_point(size = 3,shape = 19,alpha = 0.2) +
  theme_classic(base_size = 16) +
  # 主题细节
  theme(axis.ticks.length = unit(0.3,'cm'),
        aspect.ratio = 0.8) +
  # y轴显示范围及刻度
  scale_x_continuous(breaks = seq(-8,8,2)) +
  # xy轴标签
  xlab('Relative protein level\n (SARS-CoV-2 N vs MERS-CoV N,log2)') +
  ylab('T-test p-value (-log10)') +
  # 添加文字注释
  annotate(geom = 'text',x = -5,y = 7,
           size = 5,
           label = 'MERS-CoV N\n specific') +
  annotate(geom = 'text',x = 5,y = 7,
           size = 5,
           label = 'SARS-CoV-2 N\n specific')

p

# 添加曲线图层
p0 <- p +
  geom_line(aes(y = curve_y),
            size = 1,lty = 'dashed',color = 'grey') +
  # 限制Y轴
  ylim(0,8)

p0

# 添加基因注释
p1 <- p0 + 
  geom_point(data = filter_data,aes(x = log2FC,y = logPvalue,
                                    color = type),
             size = 5,show.legend = F)

p1

# 添加基因名,修改颜色
p1 + 
  geom_text(data = filter_data,aes(label = gene_name,color = type),
            nudge_y = 0.3,nudge_x = 0,
            size = 4.5,fontface = 'italic',
            show.legend = F) +
  # 修改颜色
  scale_color_manual(values = c('a'='#FFCA03','b'='#396EB0','c'='#F90716'))


######################################################
# 第二种火山图

# 计算评估y值和x值
voca$estimate.p <- 1/abs(voca$log2FC) + -log10(0.05)
voca$estimate.log2fc <- ifelse(voca$log2FC < 0,1/(voca$logPvalue + log10(0.05)),
                               1/(voca$logPvalue + log10(0.05)))

# 按规则分类
voca$regulation <- ifelse(abs(voca$estimate.log2fc) <= abs(voca$log2FC) & voca$logPvalue >= voca$estimate.p,
                          ifelse(voca$estimate.log2fc <= voca$log2FC,'UP','DOWN'),'None-Sig')

# 查看基因情况
table(voca$regulation)


##############################################3
# 绘图

library(ggnewscale)

# 图例排序
voca$regulation <- factor(voca$regulation,levels = c('UP','DOWN','None-Sig'))

# 绘图
p2 <- ggplot(voca) +
  geom_point(aes(x = log2FC,y = logPvalue,color = regulation),
             size = 3,alpha = 0.5) +
  theme_classic(base_size = 16) +
  theme(axis.ticks.length = unit(0.3,'cm'),
        aspect.ratio = 0.8) +
  scale_x_continuous(breaks = seq(-8,8,2)) +
  # 定义颜色
  scale_color_manual(values = c('UP'='red','None-Sig'='grey','DOWN'='#3E7C17')) +
  xlab('Relative protein level\n (SARS-CoV-2 N vs MERS-CoV N,log2)') +
  ylab('T-test p-value (-log10)') +
  annotate(geom = 'text',x = -5,y = 7,
           size = 5,
           label = 'MERS-CoV N\n specific') +
  annotate(geom = 'text',x = 5,y = 7,
           size = 5,
           label = 'SARS-CoV-2 N\n specific') +
  geom_line(data = df,aes(x = x,y = y),
            size = 1,lty = 'dashed',color = 'grey') +
  ylim(0,8)

p2

# 添加基因名
p2 +
  new_scale_color() +
  # 添加点图层
  geom_point(data = filter_data,aes(x = log2FC,y = logPvalue,
                                    color = type),
             size = 5,show.legend = F) +
  # 修改颜色
  scale_color_manual(values = c('a'='#FFCA03','b'='#396EB0','c'='#F90716')) +
  # 添加名字
  geom_text(data = filter_data,aes(x = log2FC,y = logPvalue,
                                   label = gene_name,color = type),
            nudge_y = 0.3,nudge_x = 0,
            size = 4.5,fontface = 'italic',
            show.legend = F)
  
暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇