#Kruse T, Benz C, Garvanska DH, Lindqvist R, Mihalic F, Coscia F, Inturi R, Sayadi A, Simonetti L, Nilsson E, Ali #M, Kliche J, Moliner Morro A, Mund A, Andersson E, McInerney G, Mann M, Jemth P, Davey NE, Överby AK, Nilsson J, #Ivarsson Y. Large scale discovery of coronavirus-host factor protein interaction motifs reveals SARS-CoV-2 #specific mechanisms and vulnerabilities. Nat Commun. 2021 Nov 19;12(1):6761. doi: 10.1038/s41467-021-26498-z. #PMID: 34799561; PMCID: PMC8605023.
# 加载R包
library(ggplot2)
library(tidyverse)
#####################################
# 曲线
fun1 <- function(x){
input = c(seq(-x,-0.001,0.0001),seq(0.001,x,0.0001))
y = 1/input
# 合并为dataframe格式
df = data.frame(x = input,y = y)
}
# 生成曲线数据
df <- fun1(5)
# 绘图
ggplot(df,aes(x = x,y = y)) +
geom_point(size = 1) +
theme_grey(base_size = 18) +
ylim(-250,250)
######################################
#倒喇叭形
fun2 <- function(x){
input = seq(0.001,x,0.001)
y = 1/abs(input)
# 合并为dataframe格式
df1 = data.frame(x = input,y = y)
df2 = data.frame(x = -(input),y = y)
# 合并
res <- rbind(df1,df2)
return(res)
}
# 生成曲线数据
df <- fun2(5)
# 绘图
ggplot(df,aes(x = x,y = y)) +
geom_point(size = 1) +
theme_grey(base_size = 18) +
ylim(0,5)
##############################
# 平移-log10 pvalue
fun3 <- function(x){
input = seq(0.001,x,0.001)
y = 1/abs(input) + -log10(0.05)
# 合并为dataframe格式
df1 = data.frame(x = input,y = y)
df2 = data.frame(x = -(input),y = y)
# 合并
res <- rbind(df1,df2)
return(res)
}
# 生成曲线数据
df <- fun3(5)
# 绘图
ggplot(df,aes(x = x,y = y)) +
geom_point(size = 1) +
theme_grey(base_size = 18) +
ylim(0,5)
###########################################
# 加载数据
voca <- read.delim('./vocalnodata.txt',header = T)
# 查看数据
head(voca,3)
# 挑选需要标记的基因
filter_data <- voca %>% filter(gene_name %in% c("NCAP MERS","NCAP SARS",
"G3BP1","G3BP2"))
# 重命名
filter_data[c(1,4),1] <- c('MERS-CoV N (bait)','SARS-CoV-2 N (bait)')
# 添加基因分组
filter_data$type <- c('a','b','b','c')
# 查看数据
filter_data
# 添加曲线的y轴
voca$curve_y <- ifelse(voca$log2FC < 0,1/abs(voca$log2FC) + -log10(0.05),
1/voca$log2FC + -log10(0.05))
# 绘制火山图
p <- ggplot(voca,aes(x = log2FC,y = logPvalue)) +
# 点图层
geom_point(size = 3,shape = 19,alpha = 0.2) +
theme_classic(base_size = 16) +
# 主题细节
theme(axis.ticks.length = unit(0.3,'cm'),
aspect.ratio = 0.8) +
# y轴显示范围及刻度
scale_x_continuous(breaks = seq(-8,8,2)) +
# xy轴标签
xlab('Relative protein level\n (SARS-CoV-2 N vs MERS-CoV N,log2)') +
ylab('T-test p-value (-log10)') +
# 添加文字注释
annotate(geom = 'text',x = -5,y = 7,
size = 5,
label = 'MERS-CoV N\n specific') +
annotate(geom = 'text',x = 5,y = 7,
size = 5,
label = 'SARS-CoV-2 N\n specific')
p
# 添加曲线图层
p0 <- p +
geom_line(aes(y = curve_y),
size = 1,lty = 'dashed',color = 'grey') +
# 限制Y轴
ylim(0,8)
p0
# 添加基因注释
p1 <- p0 +
geom_point(data = filter_data,aes(x = log2FC,y = logPvalue,
color = type),
size = 5,show.legend = F)
p1
# 添加基因名,修改颜色
p1 +
geom_text(data = filter_data,aes(label = gene_name,color = type),
nudge_y = 0.3,nudge_x = 0,
size = 4.5,fontface = 'italic',
show.legend = F) +
# 修改颜色
scale_color_manual(values = c('a'='#FFCA03','b'='#396EB0','c'='#F90716'))
######################################################
# 第二种火山图
# 计算评估y值和x值
voca$estimate.p <- 1/abs(voca$log2FC) + -log10(0.05)
voca$estimate.log2fc <- ifelse(voca$log2FC < 0,1/(voca$logPvalue + log10(0.05)),
1/(voca$logPvalue + log10(0.05)))
# 按规则分类
voca$regulation <- ifelse(abs(voca$estimate.log2fc) <= abs(voca$log2FC) & voca$logPvalue >= voca$estimate.p,
ifelse(voca$estimate.log2fc <= voca$log2FC,'UP','DOWN'),'None-Sig')
# 查看基因情况
table(voca$regulation)
##############################################3
# 绘图
library(ggnewscale)
# 图例排序
voca$regulation <- factor(voca$regulation,levels = c('UP','DOWN','None-Sig'))
# 绘图
p2 <- ggplot(voca) +
geom_point(aes(x = log2FC,y = logPvalue,color = regulation),
size = 3,alpha = 0.5) +
theme_classic(base_size = 16) +
theme(axis.ticks.length = unit(0.3,'cm'),
aspect.ratio = 0.8) +
scale_x_continuous(breaks = seq(-8,8,2)) +
# 定义颜色
scale_color_manual(values = c('UP'='red','None-Sig'='grey','DOWN'='#3E7C17')) +
xlab('Relative protein level\n (SARS-CoV-2 N vs MERS-CoV N,log2)') +
ylab('T-test p-value (-log10)') +
annotate(geom = 'text',x = -5,y = 7,
size = 5,
label = 'MERS-CoV N\n specific') +
annotate(geom = 'text',x = 5,y = 7,
size = 5,
label = 'SARS-CoV-2 N\n specific') +
geom_line(data = df,aes(x = x,y = y),
size = 1,lty = 'dashed',color = 'grey') +
ylim(0,8)
p2
# 添加基因名
p2 +
new_scale_color() +
# 添加点图层
geom_point(data = filter_data,aes(x = log2FC,y = logPvalue,
color = type),
size = 5,show.legend = F) +
# 修改颜色
scale_color_manual(values = c('a'='#FFCA03','b'='#396EB0','c'='#F90716')) +
# 添加名字
geom_text(data = filter_data,aes(x = log2FC,y = logPvalue,
label = gene_name,color = type),
nudge_y = 0.3,nudge_x = 0,
size = 4.5,fontface = 'italic',
show.legend = F)
暂无评论