######################################安装包#########################
#~/MetaboAnalyst,MetaboAnalyst/cq
# # install.packages('devtools')
# # options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor/")
# # options("repos" = c(CRAN="http://mirrors.cloud.tencent.com/CRAN/"))
# # options(download.file.method = 'libcurl')
# # options(url.method='libcurl')
# package_list <- c("tidyverse","readxl",
# "MetaboAnalystR","FELLA",
# "ropls","ggrepel",
# "ggforce","httr",
# "Hmisc","tidytext",
# "psych","corrplot")
# for(p in package_list){
# if(!suppressWarnings(suppressMessages(require(p, character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE)))){
# install.packages(p, warn.conflicts = FALSE)
# suppressWarnings(suppressMessages(library(p, character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE)))
# }
# }
# # library(devtools)
# # install.packages("digest")
# # devtools::install_github("xia-lab/MetaboAnalystR", build = TRUE, build_vignettes = FALSE)
# # BiocManager::install("FELLA")
# # BiocManager::install("ropls")
#######################################################################
library(readxl)
library(tidyverse)
library(MetaboAnalystR)
# setwd("~/Desktop/2021b/article_metirials/metabolomics/serum/")
#PreparePDFReport(mSetObj = mSet_neg_serum, usrName = "wangluyao")
pos_serum <- read_xlsx("./p_n_2.xlsx",sheet = 1)
neg_serum <- read_xlsx("./p_n_2.xlsx",sheet = 2)
serum_metadata <- read_xlsx("./metadata.xlsx")
neg_serum <- neg_serum %>% select(Name, starts_with("s"), starts_with("QC"))
pos_serum <- pos_serum %>% select(Name, starts_with("s"), starts_with("QC"))
Group <- c("Group",serum_metadata$Group)
serum <- rbind(select(neg_serum, -starts_with("QC")),
select(pos_serum, -starts_with("QC"))) %>%
group_by(Name) %>% summarise_all(mean) %>% rbind(Group,.)
Group <- c("Group",serum_metadata$Group,rep("QC",9))
neg_serum_QC <- neg_serum %>% group_by(Name) %>% summarise_all(mean) %>% rbind(Group,.)
Group <- c("Group",serum_metadata$Group,rep("QC",9))
pos_serum_QC <- pos_serum %>% group_by(Name) %>% summarise_all(mean) %>% rbind(Group,.)
Group <- c("Group",serum_metadata$Group,rep("QC",9))
serum_QC <- rbind(select(neg_serum, starts_with("Name"),starts_with("s"),starts_with("QC")),
select(pos_serum, starts_with("Name"),starts_with("s"),starts_with("QC"))) %>%
group_by(Name) %>% summarise_all(mean) %>% rbind(Group,.)
write_csv(neg_serum_QC,"neg_serum_QC.csv")
write_csv(pos_serum_QC,"pos_serum_QC.csv")
write_csv(serum_QC,"serum_QC.csv")
# Standardization and PCA analysis for ESI- and ESI+ respectively
mSet_neg_serum <- InitDataObjects(data.type = "pktable", anal.type = "stat", paired = FALSE)
mSet_neg_serum <- Read.TextData(mSet_neg_serum, "neg_serum_QC.csv", format = "colu", lbl.type = "disc")
mSet_neg_serum <- SanityCheckData(mSet_neg_serum)
mSet_neg_serum <- ReplaceMin(mSet_neg_serum);
mSet_neg_serum <- SanityCheckData(mSet_neg_serum)
mSet_neg_serum <- FilterVariable(mSet_neg_serum, "none", "F", 25)
mSet_neg_serum <- PreparePrenormData(mSet_neg_serum)
mSet_neg_serum <- Normalization(mSet_neg_serum, "MedianNorm", "LogNorm", "NULL", ratio=FALSE, ratioNum=20)
mSet_neg_serum <- PlotNormSummary(mSet_neg_serum, "norm_neg_serum", "png", 300, width=NA)
mSet_neg_serum <- PlotSampleNormSummary(mSet_neg_serum, "snorm_neg_serum", "png", 300, width=NA)
neg_serum_norm <- mSet_neg_serum$dataSet$norm
mSet_neg_serum <- PCA.Anal(mSet_neg_serum)
mSet_neg_serum <- PlotPCA2DScore(mSet_neg_serum, "pca_neg_serum", "png", 300,
width=NA, pcx = 1, pcy = 2, reg = 0.95, show = 0, grey.scale = 0)
mSet_pos_serum <- InitDataObjects(data.type = "pktable", anal.type = "stat", paired = FALSE)
mSet_pos_serum <- Read.TextData(mSet_pos_serum, "pos_serum_QC.csv", format = "colu", lbl.type = "disc")
mSet_pos_serum <- SanityCheckData(mSet_pos_serum)
mSet_pos_serum <- ReplaceMin(mSet_pos_serum);
mSet_pos_serum <- SanityCheckData(mSet_pos_serum)
mSet_pos_serum <- FilterVariable(mSet_pos_serum, "none", "F", 25)
mSet_pos_serum <- PreparePrenormData(mSet_pos_serum)
mSet_pos_serum <- Normalization(mSet_pos_serum, "MedianNorm", "LogNorm", "NULL", ratio=FALSE, ratioNum=20)
mSet_pos_serum <- PlotNormSummary(mSet_pos_serum, "norm_pos_serum", "png", 300, width=NA)
mSet_pos_serum <- PlotSampleNormSummary(mSet_pos_serum, "snorm_pos_serum", "png", 300, width=NA)
pos_serum_norm <- mSet_pos_serum$dataSet$norm
mSet_pos_serum <- PCA.Anal(mSet_pos_serum)
mSet_pos_serum <- PlotPCA2DScore(mSet_pos_serum, "pca_pos_serum", "png", 300,
width=NA, pcx = 1, pcy = 2, reg = 0.95, show = 0, grey.scale = 0)
# Standardization and differential analysis for concatenated table
mSet_serum <- InitDataObjects(data.type = "pktable", anal.type = "stat", paired = FALSE)
mSet_serum <- Read.TextData(mSet_serum, "serum_QC.csv", format = "colu", lbl.type = "disc")
mSet_serum <- SanityCheckData(mSet_serum)
mSet_serum <- ReplaceMin(mSet_serum);
mSet_serum <- SanityCheckData(mSet_serum)
mSet_serum <- FilterVariable(mSet_serum, "none", "F", 25)
mSet_serum <- PreparePrenormData(mSet_serum)
mSet_serum <- Normalization(mSet_serum, "MedianNorm", "LogNorm", "NULL", ratio=FALSE, ratioNum=20)
mSet_serum <- PlotNormSummary(mSet_serum, "norm_serum", "png", 300, width=NA)
mSet_serum <- PlotSampleNormSummary(mSet_serum, "snorm_serum", "png", 300, width=NA)
mSet_serum <- PCA.Anal(mSet_serum)
mSet_serum <- PlotPCA2DScore(mSet_serum, "pca_serum", "png", 300,
width=NA, pcx = 1, pcy = 2, reg = 0.95, show = 0, grey.scale = 0)
# comparisons <- unique(serum_metadata$Group)
# for(comp in 1:length(comparisons)){
# tmp_tab <- serum[,-which(str_detect(serum[1,],comparisons[comp]))]
# tmp_grp <- unique(as.character(tmp_tab[1,-1]))
# tmp_comp <- paste(tmp_grp[1],tmp_grp[2],sep = "vs")
# write_csv(tmp_tab,paste(tmp_comp,"csv",sep = "."))
# }
###########mSet_CH,CONvsCCFM161########
# mSet_CH <- PlotOPLS.Permutation(mSet_CH, "opls_perm_1_", "png", 72, width=NA)
# mSet<-PlotCmpdSummary(mSet, "L-Threonic acid","NA", 1, "png", 72, width=NA)
#######################CONvsI##################
CONvsI <- serum[,c(1,5:8,13:16)]
write_csv(CONvsI,"CONvsI.csv")
mSet_CI <- InitDataObjects(data.type = "pktable", anal.type = "stat", paired = FALSE)
mSet_CI <- Read.TextData(mSet_CI, "CONvsI.csv", format = "colu", lbl.type = "disc")
mSet_CI <- SanityCheckData(mSet_CI)
mSet_CI <- ReplaceMin(mSet_CI)
mSet_CI <- SanityCheckData(mSet_CI)
mSet_CI <- FilterVariable(mSet_CI, "none", "F", 25)
mSet_CI <- PreparePrenormData(mSet_CI)
mSet_CI <- Normalization(mSet_CI, "MedianNorm", "LogNorm", "NULL", ratio=FALSE, ratioNum=20)
mSet_CI <- Ttests.Anal(mSet_CI, nonpar = FALSE, paired = FALSE, equal.var = TRUE,
pvalType = "fdr", all_results = TRUE, threshp = 0.05)
mSet_CI <- FC.Anal(mSet_CI, fc.thresh = 2, cmp.type = 0, paired = FALSE)
mSet_CI <- Volcano.Anal(mSet_CI, paired = FALSE, fcthresh = 2.0, cmpType = 0,
nonpar = FALSE, threshp = 0.05, equal.var = TRUE, pval.type = "raw")
mSet_CI <- PlotVolcano(mSet_CI , "volcano_CI", 1, 0, "png", 300, width=NA)
mSet_CI <- OPLSR.Anal(mSetObj = mSet_CI, reg = TRUE)
mSet_CI <- PlotOPLS2DScore(mSet_CI, "opls_CI", "png", 300, width=NA, 1, 2, 0.95, 0, 0)
#######################CONvsT##################
CONvsT <- serum[,c(1,5:8,22:24)]
write_csv(CONvsT,"CONvsT.csv")
mSet_CT <- InitDataObjects(data.type = "pktable", anal.type = "stat", paired = FALSE)
mSet_CT <- Read.TextData(mSet_CT, "CONvsT.csv", format = "colu", lbl.type = "disc")
mSet_CT <- SanityCheckData(mSet_CT)
mSet_CT <- ReplaceMin(mSet_CT)
mSet_CT <- SanityCheckData(mSet_CT)
mSet_CT <- FilterVariable(mSet_CT, "none", "F", 25)
mSet_CT <- PreparePrenormData(mSet_CT)
mSet_CT <- Normalization(mSet_CT, "MedianNorm", "LogNorm", "NULL", ratio=FALSE, ratioNum=20)
mSet_CT <- Ttests.Anal(mSet_CT, nonpar = FALSE, paired = FALSE, equal.var = TRUE,
pvalType = "fdr", all_results = TRUE, threshp = 0.05)
mSet_CT <- FC.Anal(mSet_CT, fc.thresh = 2, cmp.type = 0, paired = FALSE)
mSet_CT <- Volcano.Anal(mSet_CT, paired = FALSE, fcthresh = 2.0, cmpType = 0,
nonpar = FALSE, threshp = 0.05, equal.var = TRUE, pval.type = "raw")
mSet_CT <- PlotVolcano(mSet_CT , "volcano_CT", 1, 0, "png", 300, width=NA)
mSet_CT <- OPLSR.Anal(mSetObj = mSet_CT, reg = TRUE)
mSet_CT <- PlotOPLS2DScore(mSet_CT, "opls_T", "png", 300, width=NA, 1, 2, 0.95, 0, 0)
sig_ci <- as.data.frame(mSet_CI$analSet$volcano$sig.mat)
sig_ct <- as.data.frame(mSet_CT$analSet$volcano$sig.mat)
vip_ci <- as.data.frame(mSet_CI$analSet$oplsda$vipVn)
vip_ci <- vip_ci[rownames(sig_ci),]
sig_ci <- cbind(sig_ci,vip_ci)
vip_ct <- as.data.frame(mSet_CT$analSet$oplsda$vipVn)
vip_ct <- vip_ct[rownames(sig_ct),]
sig_ct <- cbind(sig_ct,vip_ct)
# metadata <- read_xlsx("~/Desktop/2021b/article_metirials/pheno_data.xlsx")
# metadata$Age <- as.numeric(metadata$Age)
# metadata$BMI <- as.numeric(metadata$BMI)
# impute <- function(x,x.impute){
# ifelse(is.na(x),x.impute,x)
# }
# metadata$Age <- impute(metadata$Age,mean(na.omit(metadata$Age)))
# metadata$BMI <- impute(metadata$BMI,mean(na.omit(metadata$BMI)))
# write_csv(x = metadata %>% filter(ID %in% colnames(serum)),file = "metadata.csv")
#
# mSet_cov <- InitDataObjects("pktable","ts",FALSE)
# mSet_cov <- SetDesignType(mSet_cov,"multi")
# mSet_cov <- Read.TextDataTs(mSet_cov,"serum.csv","colts")
# mSet_cov <- ReadMetaData(mSet_cov,metafilename = "./metadata.csv")
# mSet_cov <- SanityCheckData(mSet_cov)
# mSet_cov <- ReplaceMin(mSet_cov)
# mSet_cov <- SanityCheckMeta(mSet_cov, 1)
# mSet_cov <- SetDataTypeOfMeta(mSet_cov)
# mSet_cov <- SanityCheckData(mSet_cov)
# mSet_cov <- FilterVariable(mSet_cov, "none", "F", 25)
# mSet_cov <- PreparePrenormData(mSet_cov)
# mSet_cov <- Normalization(mSet_cov, "SumNorm", "LogNorm", "NULL", ratio=FALSE, ratioNum=20)
# adj.vec <- c("Gender","BMI","Age")
# mSet_cov <- CovariateScatter.Anal(mSet_cov, "covariate_plot_0_dpi72.png", "png", 72, "default", "Group", "Health", "NA" , 0.05, FALSE)
# maaslin <- as.data.frame(mSet_cov$analSet$cov$sig.mat)
diff_ct <- sig_ct %>% rownames_to_column(var = "Name") %>%
filter(., abs(`log2(FC)`)>1, raw.pval<0.05, vip_ct>1) %>%
pull(Name)
diff_ci <- sig_ci %>% rownames_to_column(var = "Name") %>%
filter(., abs(`log2(FC)`)>1, raw.pval<0.05, vip_ci>1) %>%
pull(Name)
diff_cp_data <- serum %>% filter(Name %in% c("Group",unique(c(diff_ct,diff_ci))))
# Enrichment analysis
library(httr)
call <- "http://api.xialab.ca/mapcompounds"
toSend = list(queryList = diff_cp_data$Name, inputType = "name")
query_results <- httr::POST(call, body = toSend, encode = "json")
query_results$status_code==200
query_results_text <- content(query_results, "text", encoding = "UTF-8")
query_results_json <- rjson::fromJSON(query_results_text,simplify = TRUE)
query_results_table <- cbind.data.frame(query_results_json$Query,query_results_json$Match,
as.character(query_results_json$KEGG)) %>%
rename(query=colnames(.)[1],match=colnames(.)[2],kegg=colnames(.)[3])
# diff_cp_data$Name[which(diff_cp_data$Name %in% c("2-Methyl-4,6-dinitrophenol","D-(-)-Quinic acid",
# "Dodecyl sulfate","L-Threonic acid"))] <-
# c("4,6-Dinitro-o-cresol","Quinate","Erythromycin estolate","Threonate")
tmp_query <- query_results_table %>%
filter(kegg!="NA",kegg!="NULL")
# diff_cp_data <- diff_cp_data %>% filter(Name %in% c("Group","4,6-Dinitro-o-cresol","Quinate",
# "Erythromycin estolate","Threonate",tmp_query$query))
diff_cp_data <- diff_cp_data %>% filter(Name %in% c("Group",tmp_query$query))
write_csv(diff_cp_data,file = "diff_cp_data.csv")
mSet_en <- InitDataObjects("conc","msetqea",FALSE)
mSet_en <- Read.TextData(mSet_en,"diff_cp_data.csv","colu","disc")
mSet_en <- SanityCheckData(mSet_en)
mSet_en <- ReplaceMin(mSet_en)
mSet_en <- CrossReferencing(mSet_en, "name")
mSet_en <- CreateMappingResultTable(mSet_en)
mSet_en <- PreparePrenormData(mSet_en)
mSet_en <- Normalization(mSet_en, "MedianNorm", "LogNorm", "NULL", ratio=FALSE, ratioNum=20)
mSet_en <- SetMetabolomeFilter(mSet_en, F)
mSet_en <- SetCurrentMsetLib(mSet_en, "kegg_pathway", 2)
mSet_en <- CalculateGlobalTestScore(mSet_en)
mSet_en <- PlotQEA.Overview(mSet_en, "qea_bar", "net", "png", 300, width=NA)
mSet_en <- PlotEnrichDotPlot(mSet_en, "qea", "qea_dot", "png", 300, width=NA)
mSet_en <- PlotQEA.MetSet(mSet_en, "Biosynthesis of unsaturated fatty acids", "png", 300, width=NA)
mSet_pw <- InitDataObjects("conc", "pathora", FALSE)
cmpd_vec <- diff_cp_data$Name[-1]
mSet_pw <- Setup.MapData(mSet_pw, cmpd_vec)
mSet_pw <- CrossReferencing(mSet_pw, "name")
mSet_pw <- CreateMappingResultTable(mSet_pw)
mSet_pw <- SetKEGG.PathLib(mSet_pw, "hsa", "current")
mSet_pw <- SetMetabolomeFilter(mSet_pw, F)
mSet_pw <- CalculateOraScore(mSet_pw, "dgr", "hyperg")
mSet_pw <- PlotPathSummary(mSet_pw, F, "pathway", "png", 300, width=NA,NA,NA)
# mSet_nea <- InitDataObjects("conc", "msetora", FALSE)
# mSet_nea <- Setup.MapData(mSet_nea, diff_cp_data$Name[-1])
# mSet_nea <- CrossReferencing(mSet_nea, "name")
# mSet_nea <- CreateMappingResultTable(mSet_nea)
# mSet_nea <- SetMetabolomeFilter(mSet_nea, F)
# mSet_nea <- SetCurrentMsetLib(mSet_nea, "kegg_pathway", 2)
# mSet_nea <- CalculateHyperScore(mSet_nea)
# mSet_nea <- PlotORA(mSet_nea, "bar_nea", "net", "png", 300, width=NA)
# mSet_nea <- PlotEnrichDotPlot(mSet_nea, "ora", "dot_nea", "png", 300, width=NA)
###### FC vs. CT ######
# diff_ch[which(diff_ch=="2-Methyl-4,6-dinitrophenol")] <- "4,6-Dinitro-o-cresol"
# diff_ch[which(diff_ch=="D-(-)-Quinic acid")] <- "Quinate"
# diff_ch[which(diff_ch=="Dodecyl sulfate")] <- "Erythromycin estolate"
# diff_ch[which(diff_ch=="L-Threonic acid")] <- "Threonate"
# mSet_en_ch <- InitDataObjects("conc", "msetora", FALSE)
# mSet_en_ch <- Setup.MapData(mSet_en_ch, diff_ch)
# mSet_en_ch <- CrossReferencing(mSet_en_ch, "name")
# mSet_en_ch <- CreateMappingResultTable(mSet_en_ch)
# mSet_en_ch <- SetMetabolomeFilter(mSet_en_ch, F)
# mSet_en_ch <- SetCurrentMsetLib(mSet_en_ch, "kegg_pathway", 2)
# mSet_en_ch <- CalculateHyperScore(mSet_en_ch)
# mSet_en_ch <- PlotORA(mSet_en_ch, "bar_en_ch", "net", "png", 300, width=NA)
# mSet_en_ch <- PlotEnrichDotPlot(mSet_en_ch, "ora", "dot_en_ch", "png", 300, width=NA)
###### FC vs. IBS-C ######
# diff_ci[which(diff_ci=="2-Methyl-4,6-dinitrophenol")] <- "4,6-Dinitro-o-cresol"
# diff_ci[which(diff_ci=="D-(-)-Quinic acid")] <- "Quinate"
# diff_ci[which(diff_ci=="Dodecyl sulfate")] <- "Erythromycin estolate"
# diff_ci[which(diff_ci=="L-Threonic acid")] <- "Threonate"
mSet_en_ci <- InitDataObjects("conc", "msetora", FALSE)
mSet_en_ci <- Setup.MapData(mSet_en_ci, diff_ci)
mSet_en_ci <- CrossReferencing(mSet_en_ci, "name")
mSet_en_ci <- CreateMappingResultTable(mSet_en_ci)
mSet_en_ci <- SetMetabolomeFilter(mSet_en_ci, F)
mSet_en_ci <- SetCurrentMsetLib(mSet_en_ci, "kegg_pathway", 2)
mSet_en_ci <- CalculateHyperScore(mSet_en_ci)
mSet_en_ci <- PlotORA(mSet_en_ci, "bar_en_ci", "net", "png", 300, width=NA)
mSet_en_ci <- PlotEnrichDotPlot(mSet_en_ci, "ora", "dot_en_ci", "png", 300, width=NA)
###### IBS-C vs. HC ######
# diff_ih[which(diff_ih=="2-Methyl-4,6-dinitrophenol")] <- "4,6-Dinitro-o-cresol"
# diff_ih[which(diff_ih=="D-(-)-Quinic acid")] <- "Quinate"
# diff_ih[which(diff_ih=="Dodecyl sulfate")] <- "Erythromycin estolate"
# diff_ih[which(diff_ih=="L-Threonic acid")] <- "Threonate"
mSet_en_ct <- InitDataObjects("conc", "msetora", FALSE)
mSet_en_ct <- Setup.MapData(mSet_en_ct, diff_ct)
mSet_en_ct <- CrossReferencing(mSet_en_ct, "name")
mSet_en_ct <- CreateMappingResultTable(mSet_en_ct)
mSet_en_ct <- SetMetabolomeFilter(mSet_en_ct, F)
mSet_en_ct <- SetCurrentMsetLib(mSet_en_ct, "kegg_pathway", 2)
mSet_en_ct <- CalculateHyperScore(mSet_en_ct)
mSet_en_ct <- PlotORA(mSet_en_ct, "bar_en_ct", "net", "png", 300, width=NA)
mSet_en_ct <- PlotEnrictDotPlot(mSet_en_ct, "ora", "dot_en_ct", "png", 300, width=NA)
### volcano plot visulization ###
library(ggrepel)
library(ggforce)
library(Hmisc)
volcano_ct <- cbind(mSet_CT$analSet$volcano$fc.all,mSet_CT$analSet$volcano$fc.log,
mSet_CT$analSet$volcano$p.log,mSet_CT$analSet$oplsda$vipVn) %>%
as.data.frame() %>%
rownames_to_column("name") %>%
rename(fc=V1,log_fc=V2,log_p=V3,vip=V4) %>%
mutate(cp_tp=if_else(
log_p < 1.30103,"NoDiff",if_else(
abs(log_fc) < 1,"NoDiff",
if_else(log_fc >=1,"Upward","Downward"))),
comparison="Control vs. T")
vol_num_ct <- volcano_ct %>% group_by(cp_tp) %>% summarise(count=n())
volcano_ci <- cbind(mSet_CI$analSet$volcano$fc.all,mSet_CI$analSet$volcano$fc.log,
mSet_CI$analSet$volcano$p.log,mSet_CI$analSet$oplsda$vipVn) %>%
as.data.frame() %>%
rownames_to_column("name") %>%
rename(fc=V1,log_fc=V2,log_p=V3,vip=V4) %>%
mutate(cp_tp=if_else(
log_p < 1.30103,"NoDiff",if_else(
abs(log_fc) < 1,"NoDiff",
if_else(log_fc >=1,"Upward","Downward"))),
comparison="Control vs. I")
vol_num_ci <- volcano_ci %>% group_by(cp_tp) %>% summarise(count=n())
volcano <- rbind(volcano_ci,volcano_ct)
volcano$cp_tp <- factor(volcano$cp_tp,levels = c("Upward","NoDiff","Downward"))
volcano$name <- capitalize(volcano$name)
volcanolabeld<-volcano[volcano$cp_tp=="Downward",]
volcanolabelu<-volcano[volcano$cp_tp=="Upward",]
volcanolabel<-rbind(volcanolabeld,volcanolabelu)
p1 <- ggplot(volcano,aes(log_fc,log_p)) +
geom_point(aes(color=cp_tp,size=abs(vip)),alpha=0.8)+
geom_hline(yintercept = -log10(0.05),linetype ="dashed") +
geom_vline(xintercept = c(-1,1),linetype = "dashed")+
# geom_mark_circle(data=volcano %>%
# filter(name %in% mSet_en$analSet$qea.hits$`Biosynthesis of unsaturated fatty acids`),
# aes(fill = name,
# label = name),
# expand = unit(1.5, "mm"),
# alpha = 0,
# con.cap = 0,
# label.lineheight = 0.1,
# label.fontsize = c(5, 4),
# show.legend = FALSE)+
geom_label_repel(data=volcano %>%
filter(name %in% mSet_en$analSet$qea.hits$`Biosynthesis of unsaturated fatty acids`),
size=2,
box.padding = 1, #字到点的距离
point.padding = 0.1, #字到点的距离,点周围的空白宽度
min.segment.length = 0, #短线段可以省略
segment.color = "black", #segment.colour = NA, 不显示线段
aes(label=name,color=cp_tp),
arrow = arrow(length=unit(0.01, "npc")),
show.legend = FALSE)+
facet_wrap(.~comparison,nrow=1,scales = "free_y")+
scale_color_manual(values = c("Upward"="#DC0000FF","NoDiff"="grey","Downward"="#00A087FF"))+
scale_size(range = c(0.5,3),breaks = c(0.5,1,2,3))+
guides(color=guide_legend(title = "Regulation",order = 1),
size=guide_legend(title = "VIP",order = 2))+
labs(x="log2(Fold Change)",
y="-log10(pvalue)")+
theme_bw()
p1
ggsave("diff_serum_volcano.pdf",p1,path = "./",units = "mm",width = 210,height = 99)
### Enrichment analysis ###
library(tidytext)
endot_ct <- as.data.frame(mSet_en_ct$analSet$ora.mat) %>%
rownames_to_column("name") %>%
mutate(ratio=hits/expected,
comparison="C vs. T",
log_p=-log10(`Raw p`))
# endot_ih <- as.data.frame(mSet_en_ih$analSet$ora.mat) %>%
# rownames_to_column("name") %>%
# mutate(ratio=hits/expected,
# comparison="IBS-C vs. HC",
# log_p=-log10(`Raw p`))
# endot_ci <- as.data.frame(mSet_en_ci$analSet$ora.mat) %>%
# rownames_to_column("name") %>%
# mutate(ratio=hits/expected,
# comparison="FC vs. IBS-C",
# log_p=-log10(`Raw p`))
# endot <- rbind(endot_ch[1:10,],endot_ci[1:10,],endot_ih[1:10,])
endot <- endot_ct
endot$name <- capitalize(endot$name)
p2 <- ggplot(endot,aes(x=log_p,y=reorder_within(name,log_p,comparison)))+
geom_point(aes(color=`Raw p`,size=ratio))+
geom_vline(xintercept = -log10(0.05),linetype = "dashed")+
facet_wrap(.~comparison,nrow = 3,strip.position = "right",scales = "free_y")+
labs(x="-log10(pvalue)")+
scale_y_reordered()+
#scale_color_gradientn(colors = color_vector_use,trans="reverse")+
guides(color=guide_colorbar(title = "P value"),
size=guide_legend(title = "Enrichmenr ratio"))+
theme_bw()+
theme(panel.grid.minor = element_blank(),
axis.line.x = element_line(size = 0.3, colour = "black"),#x轴连线
axis.ticks.length.x = unit(-0.20, "cm"),#修改x轴刻度的高度,负号表示向上
axis.text.x = element_text(margin = margin(t = 0.3, unit = "cm")),#线与数字不要重叠
axis.ticks.x = element_line(colour = "black",size = 0.3),#修改x轴刻度的线
axis.ticks.y = element_blank(),
axis.title.y = element_blank())
p2
ggsave("serum_enrichment.pdf",p2,path = "./",units = "mm",width = 210,height = 139)
### Unsaturated fatty acids ###
library(ggpubr)
mypal <- c("#0072B5FF","#BC3C29FF","#E18727FF","#20854EFF")
comparisons <- list(c("CON", "I"),
c("CON","T"),
c("I","T"))
diff_usfa <- cbind(mSet_en$dataSet$url.smp.nms,mSet_en$dataSet$orig.cls,mSet_en$analSet$msea.data) %>%
rename(ID=colnames(.)[1],Group=colnames(.)[2]) %>%
select(ID,Group,mSet_en$analSet$qea.hits$`Biosynthesis of unsaturated fatty acids`) %>%
gather(key = "USFAs",value = "Abundance",3)
diff_usfa$Group <- factor(diff_usfa$Group,levels = c("CON", "I","T"))
diff_usfa <- diff_usfa %>%
filter(Group!="NA")
p3 <- ggboxplot(diff_usfa,x="Group",y="Abundance",color = "Group",
add = "jitter",width = .3,size = .7,palette = mypal)+
labs(x="Group",y="Normalized intensity")+
geom_signif(comparisons = comparisons,
map_signif_level=F,vjust=0.5,color="black",
textsize=5,test=wilcox.test,step_increase=0.1,tip_length = 0.015)+
facet_wrap(.~USFAs,scales = "free_y")+
theme_bw()+
theme(axis.text.x=element_blank(), # remove the x axis scale
axis.ticks.x=element_blank(),
axis.title.x = element_blank(),
legend.title = element_blank(),
legend.key=element_blank(),
legend.text = element_text(color="black",size=8),
legend.spacing.x=unit(0,'cm'),
legend.spacing.y = unit(0,'cm'),
legend.background=element_blank(),
legend.box.background=element_rect(colour = "black"),
legend.box.margin = margin(1,1,1,1),
legend.position = c(0.95,0.55),legend.justification = c(0.95,0.1))
p3
ggsave("diff_usfas.pdf",p3,path = "./",units = "in",width = 4.34,height = 3.22)
dev.off()
###### exercise for FELLA ######
library(FELLA)
# A database built from KEGG has to be regenerated for once
fella_graph <- buildGraphFromKEGGREST(organism = "hsa")
buildDataFromGraph(keggdata.graph = fella_graph,
databaseDir = "./fella_database",
internalDir = FALSE,
matrices = "diffusion",
normality = "diffusion",
niter = 50)
fella_data <- loadKEGGdata(databaseDir = "./fella_database",
internalDir = FALSE,
loadMatrix = "diffusion")
cat(getInfo(fella_data))
# A database built from KEGG has to be regenerated for once
fella_df <- tmp_query$kegg
fella_df
fella_anal <- defineCompounds(compounds = fella_df,
data = fella_data)
getExcluded(fella_anal)
fella_anal <- runDiffusion(object = fella_anal,
data = fella_data,
approx = "normality")
FELLA::plot(fella_anal,
method = "diffusion",
data = fella_data,
nlimit = 150,
vertex.label.cex = 0.5)
exportResults(format = "csv",
file = "./ella_res.csv",
method = "diffusion",
object = fella_anal,
data = fella_data)
########################################################################################
###### exercise for FELLA ######
# ## Correlation analysis
# # Pheno_data
# library(psych)
# library(corrplot)
# cor_cp <- mSet_en$dataSet$norm
# colnames(cor_cp) <- capitalize(colnames(cor_cp))
# colnames(cor_cp)[which(str_detect(colnames(cor_cp),"icosapentaenoic acid"))] <- "Eicosapentaenoic acid"
# cor_cp <- cor_cp %>%
# select(mSet_en$analSet$qea.hits$`Biosynthesis of unsaturated fatty acids`,everything())
# pheno_data <- metadata %>%
# filter(ID %in% rownames(cor_cp)) %>%
# column_to_rownames("ID") %>% select(SBM,BSFS,API)
#
# cor_list <- corr.test(pheno_data, cor_cp, method = "spearman", adjust = "BH", alpha = 0.05)
# r_mat <- cor_list$r
# p_mat <- cor_list$p.adj
#
# corrplot(corr = r_mat,p.mat = p_mat, insig = "label_sig",
# sig.level = c(.001, .01, .05), pch.cex = .6, pch.col = "Black", col = color_vector_use,
# tl.col = "black",tl.cex = 0.5,tl.srt = 45,diag = TRUE, cl.cex = 0.5, cl.length = 5,
# outline = "Black")
#
# # differential taxa
# diff_16S <- readRDS("~/Desktop/2021b/article_metirials/microbiome/diff_16S.rds")
# diff_16S <- diff_16S %>% column_to_rownames("ID") %>% select(-group)
# diff_16S <- log1p(diff_16S)
# d <- merge(cor_cp, diff_16S, by = "row.names", all = FALSE) %>%
# column_to_rownames("Row.names")
# dat1 <- d[,1:47]
# dat2 <- d[,48:54]
#
# cor_list <- corr.test(dat1, dat2, method = "spearman", adjust = "BH", alpha = 0.05)
# r_mat <- cor_list$r
# p_mat <- cor_list$p.adj
#
# corrplot(corr = r_mat,p.mat = p_mat, insig = "label_sig",
# sig.level = c(.001, .01, .05), pch.cex = .6, pch.col = "Black", col = color_vector_use,
# tl.col = "black",tl.cex = 0.6,tl.srt = 45,diag = TRUE, cl.cex = 0.5, outline = "Black")
#
# d2 <- pheno_data[rownames(d),]
#
# cor_list <- corr.test(d2, d, method = "spearman", adjust = "BH", alpha = 0.05)
# r_mat <- cor_list$r
# p_mat <- cor_list$p.adj
#
# corrplot(corr = r_mat,p.mat = p_mat, insig = "label_sig",
# sig.level = c(.001, .01, .05), pch.cex = .6, pch.col = "Black", col = color_vector_use,
# tl.col = "black",tl.cex = 0.6,tl.srt = 45,diag = TRUE, cl.cex = 0.5, cl.length = 5,
# outline = "Black")
#
# save(diff_cp_data,file = "~/Desktop/2021b/article_metirials/diff_serum_cmp.rds")
# serum_norm <- mSet_serum$dataSet$norm
# save(serum_norm,file = "~/Desktop/2021b/article_metirials/serum_norm.rds")
#
# # ggClusterNet
# library(phyloseq)
# library(igraph)
# library(network)
# library(sna)
# library(ggClusterNet)
#
# ps_16S <- readRDS("~/Desktop/2021b/article_metirials/ps_16S.rds")
# ps_16S <- prune_samples(rownames(d), ps_16S)
# ps_16S <- filter_OTU_ps(ps_16S, Top = 200)
# net_grp <- as.data.frame(rep("serum_metabolites",24))
# rownames(net_grp) <- colnames(dat1)
# colnames(net_grp)[1] <- "group"
# dat1 <- as.data.frame(dat1) %>% rownames_to_column("ID")
# cor_net <- corBiostripe(data = dat1, group = net_grp, ps = ps_16S, method = "spearman")
#
# tmp_otu = as.data.frame(vegan_otu(ps_16S))
# tmp_cor <- (dat1[-1])
# row.names(tmp_cor) = dat1[[1]]
# tmp_cor <- t(tmp_cor)
# finaldata <- rbind(tmp_otu,tmp_cor)
# occor = corr.test(dat1,tmp_otu,use="pairwise",method= "spearman",adjust="fdr",alpha=.05,)
# occor.r = occor$r
# occor.p = t(occor$p.adj)
# occor.r[occor.p > 0.05|abs(occor.r)<0.6] = 0
#
# ######################### Abolished case #######################################
# # # PCA
# # library(FactoMineR)
# # library(ggsci)
# # library(scales)
# #
# # show_col(pal_nejm(palette = "default")(8))
# # mypal <- c("#0072B5FF","#BC3C29FF","#E18727FF","#20854EFF")
# #
# # pca_neg_serum <- PCA(neg_serum_norm,graph = FALSE)
# # pca_neg_serum$eig #显示特征值、方差贡献率和累计方差贡献率
# # dat_1 <- as.data.frame(pca_neg_serum$ind$coord) #dat_1为主成分得分(样本坐标)
# # name <- rownames(dat_1)
# # Group_neg_serum <- c(serum_metadata$Group,rep("QC",20))
# # dat_1 <- cbind(dat_1,name,Group_neg_serum) %>% select(-Dim.3,-Dim.4,-Dim.5)
# # colnames(dat_1) <- c("x","y","ID","Group")
# # #dat_2 <- as.data.frame(pca$var$coord) #dat_2为主成分载荷(变量坐标)
# # dat_1$Group <- factor(dat_1$Group,levels = c("Health","Constipation","IBS-C","QC"))
# # dat_1$x<-as.numeric(dat_1$x)
# # dat_1$y<-as.numeric(dat_1$y)
# #
# # p1 <- ggplot(dat_1, aes(x, y,color = Group)) + #映射PC1,PC2
# # geom_point(size = 3, alpha=.7) + #绘制得分散点图
# # labs(title = "PCA biplot") + #图表标题
# # xlab(paste("PC1", paste(round(pca_neg_serum$eig[1,2], 2), "%", sep = ""), sep = " ")) + #x轴标题
# # ylab(paste("PC2", paste(round(pca_neg_serum$eig[2,2], 2), "%", sep = ""), sep = " ")) + #y轴标题
# # stat_ellipse( geom = "polygon", alpha = 0.1, level = 0.95, show.legend = F,
# # col="black",size=1)+
# # stat_ellipse(aes(x,y,color=Group),level = .68,inherit.aes = FALSE,size=.8)+#加置信椭圆
# # geom_vline(xintercept=mean(dat_1$x),lty=1,col="black",lwd=0.5)+#添加横线
# # geom_hline(yintercept =mean(dat_1$y),lty=1,col="black",lwd=0.5)+#添加竖线
# # theme_bw()+ scale_color_manual(values = mypal)+
# # theme( plot.background = element_rect(fill="white"),
# # panel.background = element_rect(fill='white', colour='gray'),
# # text=element_text(family="sans", size=12),
# # plot.title = element_text(hjust = .5))
# # p1
# #
# # pca_pos_serum <- PCA(pos_serum_norm,graph = FALSE)
# # dat_2 <- as.data.frame(pca_pos_serum$ind$coord) #dat_2为主成分得分(样本坐标)
# # name <- rownames(dat_2)
# # Group_pos_serum <- c(serum_metadata$Group,rep("QC",13))
# # dat_2 <- cbind(dat_2,name,Group_pos_serum) %>% select(-Dim.3,-Dim.4,-Dim.5)
# # colnames(dat_2) <- c("x","y","ID","Group")
# # #dat_2 <- as.data.frame(pca$var$coord) #dat_2为主成分载荷(变量坐标)
# # dat_2$Group <- factor(dat_2$Group,levels = c("Health","Constipation","IBS-C","QC"))
# # dat_2$x<-as.numeric(dat_2$x)
# # dat_2$y<-as.numeric(dat_2$y)
# #
# # p2 <- ggplot(dat_2, aes(x, y,color = Group)) + #映射PC1,PC2
# # geom_point(size = 3, alpha=.7) + #绘制得分散点图
# # labs(title = "PCA biplot") + #图表标题
# # xlab(paste("PC1", paste(round(pca_pos_serum$eig[1,2], 2), "%", sep = ""), sep = " ")) + #x轴标题
# # ylab(paste("PC2", paste(round(pca_pos_serum$eig[2,2], 2), "%", sep = ""), sep = " ")) + #y轴标题
# # stat_ellipse( geom = "polygon", alpha = 0.1, level = 0.95, show.legend = F,
# # col="black",size=1)+
# # stat_ellipse(aes(x,y,color=Group),level = .68,inherit.aes = FALSE,size=.8)+#加置信椭圆
# # geom_vline(xintercept=mean(dat_2$x),lty=1,col="black",lwd=0.5)+#添加横线
# # geom_hline(yintercept =mean(dat_2$y),lty=1,col="black",lwd=0.5)+#添加竖线
# # theme_bw()+ scale_color_manual(values = mypal)+
# # theme(plot.background = element_rect(fill="white"),
# # panel.background = element_rect(fill='white', colour='gray'),
# # text=element_text(family="sans", size=12),
# # plot.title = element_text(hjust = .5))
# # p2
# #
# # # OPLS-DA
# # library(ropls)
# # # library(o2plsda)
# # # o2plsda makes no sense, not recommended.
# # # detach("package:o2plsda",unload = TRUE)
# # ###### exercise for o2pls-da ######
# # # cv <- o2cv(X = opl_neg_serum, Y = opl_pos_serum,
# # # nc = 1:5, nx = 1:3, ny = 1:3, group = Group_neg_serum, nr_folds = 10)
# # # fit_serum <- o2pls(X = opl_neg_serum, Y = opl_pos_serum, nc = 5, nx = 2, ny = 3)
# # # summary(fit_serum)
# # # Xl <- loadings(fit_serum,loading="Xjoint")
# # # Xs <- scores(fit_serum,score="Xjoint")
# # # plot(fit_serum,type="score",var="Xjoint", group=Group_serum,repel = FALSE)
# # # plot(fit_serum,type="loading",var="Xjoint", group=Group_serum,repel=F,rotation=TRUE)
# # # res_serum <- oplsda(fit_serum, Group_serum, nc=5)
# # # plot(res_serum, type="score", group=Group_serum)
# # # vip <- vip(res_serum)
# # # plot(res_serum,type="vip", group = Group_serum, repel = FALSE, order=TRUE)
# # ###### exercise for o2pls-da ######
# #
# #
# #
# # Group_serum <- c(serum_metadata$Group)
# # comparisons <- data.frame(c("Health","Constipation","IBS-C"),
# # c("Constipation","IBS-C","Health"))
# #
# # for(comp in 1:nrow(comparisons)){
# # comp_name <- paste(comparisons[comp, 1],
# # comparisons[comp, 2],
# # sep = "vs")
# # tmp_tab <- serum
# # tmp_tab$Group <- Group_serum
# # tmp_tab <- tmp_tab %>% filter(Group %in% c(comparisons[comp,1],comparisons[comp,2]))
# # tmp_grp <- tmp_tab$Group
# # tmp_tab <- tmp_tab %>% select(-Group)
# #
# # comp_name <- opls(tmp_tab, tmp_grp, predI = 1, orthoI = 1, crossvalI =10,
# # scaleC="none")
# #
# # VIP<-comp_name@vipVn
# # t1<-comp_name@orthoScoreMN
# # to1<-comp_name@scoreMN
# #
# # j<-data.frame(t1,to1,colnames(scaledata[,-which(str_detect(colnames(scaledata), "QC"))]),rep(c("A","B"),each=3))
# # colnames(j)<-c("o1","p1","name","group")
# # ggplot(j, aes(p1, o1,group = group,col=group)) + #映射PC1,PC2
# # geom_point(size = 7) + #绘制得分散点图
# # labs(title = "PCA - biplot") + #图表标题
# # xlab(paste("to1")) + #x轴标题
# # ylab(paste("t1")) + #y轴标题
# # stat_ellipse( geom = "polygon", alpha = 0.1, level = 0.95, show.legend = F,
# # size=1)+#加置信椭圆
# # geom_vline(xintercept=mean(j$p1),lty=1,col="black",lwd=0.5) +#添加横线
# # geom_hline(yintercept =mean(j$o1),lty=1,col="black",lwd=0.5)+#添加竖线
# # theme( plot.background = element_rect(fill="white"),
# # panel.background = element_rect(fill='white', colour='gray'),
# # strip.text.x=element_text(size=rel(1.2), family="serif", angle=-90),
# # strip.text.y=element_text(size=rel(1.2), family="serif") ,
# # axis.text.x = element_text(size = 14,color="black"),
# # axis.text.y = element_text(size = 20,color="black"),axis.ticks.x=element_blank()
# # )
# # }
暂无评论