对cd数据查询对应KEGG的ID且与他表取交集
library(KEGGREST)
library(readxl)
library(tidyverse)
library(openxlsx)
library(tidyverse)
library(magrittr)
#######读取数据,mzcloud阈值大于80,
CD_c18_pos<-read_xlsx("CD-c18-pos2.1.xlsx",sheet = "Compounds")
CD_c18_pos <- CD_c18_pos %>%
  filter(!is.na(`mzCloud Best Match`))  %>%
  filter(`mzCloud Best Match` > 80) %>%
  group_by(Name)
CD_c18_pos_max<- CD_c18_pos %>%
             summarise_all(max)
fecal<-CD_c18_pos_max
fecal$Name <- gsub("^\\(\\+\\)-", "", fecal$Name)
fecal$Name <- gsub("^\\(\\-\\)-", "", fecal$Name)
fecal$Name <- gsub("^\\(\\+/-\\)-", "", fecal$Name)
fecal$Name <- gsub("^\\(\\+/-\\)", "", fecal$Name)
fecal$Name <- gsub("^\\(\\±/-\\)", "", fecal$Name)
fecal$Name <- gsub("^\\(±\\)", "", fecal$Name)
fecal$Name <- gsub("β", "beta", fecal$Name)
fecal$Name <- gsub("α", "alpha", fecal$Name)
fecal <- fecal[!grepl("^NP-", fecal$Name), ]
dim(fecal)
###########
get_kegg_id <- function(name) {
  kegg_id <- tryCatch(
    {
      keggFind("compound", name)
    },
    error = function(e) {
      NA
    }
  )
  return(kegg_id) # 返回KEGG ID或NA
}
kegg <- lapply(fecal$Name, get_kegg_id)
split_values <- data.frame(unlist(kegg))
split_values2 <- data.frame(unlist(strsplit(split_values$unlist.kegg., "; ")))
colnames(split_values2)<-"kegg"
data <- inner_join(fecal, split_values2, by = c("Name" = "kegg"))
data<-data[,1]
kegg_ids <- lapply(data$Name, get_kegg_id)
new_df<-c()
for (i in 1:length(kegg_ids)) {
  kegg_id <- kegg_ids[[i]]
  kegg_id_df <- as.data.frame(print(kegg_id))
  kegg_id_df$name <- row.names(kegg_id_df)
  name <- row.names(kegg_id_df)
  colnames(kegg_id_df) <- c("kegg_key", "cpd")
  for (j in 1:nrow(kegg_id_df)) {
    split_values <- unlist(strsplit(kegg_id_df$kegg_key[j], "; "))
    new_rows <- data.frame(kegg_key = split_values, cpd = kegg_id_df$cpd[j])
    new_df <- rbind(new_df, new_rows)
  }
}
df <- unique(new_df[new_df$kegg_key %in% data$Name,])
colnames(df)<-c("Name","cpd")
fecal_kegg<-left_join(df,fecal,by="Name")
distinct_fecal_kegg <- distinct(fecal_kegg, Name, .keep_all = TRUE)
distinct_fecal_kegg$cpd <- gsub("cpd:", "", distinct_fecal_kegg$cpd)
dim(distinct_fecal_kegg)
#> dim(distinct_fecal_kegg)
#[1]  88 100
#即CD-c18-pos2.1.xlsx含KEGG的ID有88
MetDNA_C18_pos<-read_xlsx("MetDNA_C18_pos.xlsx",sheet = "MetDNA_C18_pos")
dim(MetDNA_C18_pos)
#> dim(MetDNA_C18_pos)
#[1] 4489   69
MetDNA_C18_pos<-  MetDNA_C18_pos %>%
          filter(`id_kegg` != "NA")
dim(MetDNA_C18_pos)
#去除id_kegg为NA的行
#> dim(MetDNA_C18_pos)
#[1] 4364   69
inner<-inner_join(distinct_fecal_kegg,MetDNA_C18_pos,by=c("cpd"="id_kegg"))
dim(inner)
inner_bijiao<-inner %>% select(Name, name,confidence_level,everything()) 
dim(inner_bijiao)
#> dim(inner_bijiao)
#[1]  57 168
inner_level1<-inner_bijiao[inner_bijiao$confidence_level=="level1",]
dim(inner_level1)
#> dim(inner_level1)
#[1]  39 168
write.xlsx(inner_level1,"inner_level1.xlsx")
暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇