#修改蛋白文件名
# singularity exec orthomcl_latest.sif orthomclAdjustFasta 文件名 文件路径 1
# BLAST:
# makeblastdb -in 12.fasta -dbtype prot/nucl -out database
# blastn -query FldC.fasta -out result.txt -db database -outfmt 6 -evalue 1e-5 -max_target_seqs 1
#find ./ -name "*.cds" -print0 | xargs -0 -I {} cp {} ./
# blast result:
#'query',"subject","identity","Match_length","Mismatch_length","gap","query_start","query_end#","subject_start","subject_end","e-vaule","bitscore"
#singularity exec blast.sif blastn -query all.search.fasta -out result.txt -db target.fasta -outfmt 6 -evalue 1e-5 -max_target_seqs 1
######################################strain_CagA############
strain_CagA<-read.table("result.txt",header = F)
library(tidyverse)
colnames(strain_CagA) <- c('query',"subject","identity","Match_length",
"Mismatch_length","gap","query_start","query_end","subject_start",
"subject_end","e-vaule","bitscore")
strain_CagA<-arrange(strain_CagA,desc(identity)) %>% as_tibble() %>% filter(identity > 40 & Match_length >70 )
strain_CagA_count<-filter(strain_CagA, subject == "CagA26695")
strain_CagA_unique<-separate(data=strain_CagA_count, col=query, into=c("a","b","c","d"), sep = "_")
strain_CagA_unique<-filter(strain_CagA_unique, d == "[CagA-F]")
dim(strain_CagA_unique)
write_csv(strain_CagA_unique,"strain_CagA_unique.csv")
######################################strain_HopQ############
strain_HopQ<-read.table("result.txt",header = F)
library(tidyverse)
colnames(strain_HopQ) <- c('query',"subject","identity","Match_length",
"Mismatch_length","gap","query_start","query_end","subject_start",
"subject_end","e-vaule","bitscore")
strain_HopQ<-arrange(strain_HopQ,desc(identity)) %>% as_tibble() %>% filter(identity > 40 & Match_length >70 )
strain_HopQ_count<-filter(strain_HopQ, subject == "HopQ26695")
strain_HopQ_unique<-separate(data=strain_HopQ_count, col=query, into=c("a","b","c","d"), sep = "_")
strain_HopQ_unique<-filter(strain_HopQ_unique, d == "[HopQ-F]")
dim(strain_HopQ_unique)
write_csv(strain_HopQ_unique,"strain_HopQ_unique.csv")
######################################strain_HtrA############
strain_HtrA<-read.table("result.txt",header = F)
library(tidyverse)
colnames(strain_HtrA) <- c('query',"subject","identity","Match_length",
"Mismatch_length","gap","query_start","query_end","subject_start",
"subject_end","e-vaule","bitscore")
strain_HtrA<-arrange(strain_HtrA,desc(identity)) %>% as_tibble() %>% filter(identity > 40 & Match_length >70 )
strain_HtrA_count<-filter(strain_HtrA, subject == "HtrA26695")
strain_HtrA_unique<-separate(data=strain_HtrA_count, col=query, into=c("a","b","c","d"), sep = "_")
strain_HtrA_unique<-filter(strain_HtrA_unique, d == "[HtrA-F]")
dim(strain_HtrA_unique)
write_csv(strain_HtrA_unique,"strain_HtrA_unique.csv")
######################################strain_IceA############
strain_IceA<-read.table("result.txt",header = F)
library(tidyverse)
colnames(strain_IceA) <- c('query',"subject","identity","Match_length",
"Mismatch_length","gap","query_start","query_end","subject_start",
"subject_end","e-vaule","bitscore")
strain_IceA<-arrange(strain_IceA,desc(identity)) %>% as_tibble() %>% filter(identity > 40 & Match_length >70 )
strain_IceA_count<-filter(strain_IceA, subject == "IceA26695")
strain_IceA_unique<-separate(data=strain_IceA_count, col=query, into=c("a","b","c","d"), sep = "_")
strain_IceA_unique<-filter(strain_IceA_unique, d == "[IceA-F]")
dim(strain_IceA_unique)
write_csv(strain_IceA_unique,"strain_IceA_unique.csv")
######################################strain_NapA############
strain_NapA<-read.table("result.txt",header = F)
library(tidyverse)
colnames(strain_NapA) <- c('query',"subject","identity","Match_length",
"Mismatch_length","gap","query_start","query_end","subject_start",
"subject_end","e-vaule","bitscore")
strain_NapA<-arrange(strain_NapA,desc(identity)) %>% as_tibble() %>% filter(identity > 40 & Match_length >70 )
strain_NapA_count<-filter(strain_NapA, subject == "NapA26695")
strain_NapA_unique<-separate(data=strain_NapA_count, col=query, into=c("a","b","c","d"), sep = "_")
strain_NapA_unique<-filter(strain_NapA_unique, d == "[NapA-F]")
dim(strain_NapA_unique)
write_csv(strain_NapA_unique,"strain_NapA_unique.csv")
######################################strain_OipA############
strain_OipA<-read.table("result.txt",header = F)
library(tidyverse)
colnames(strain_OipA) <- c('query',"subject","identity","Match_length",
"Mismatch_length","gap","query_start","query_end","subject_start",
"subject_end","e-vaule","bitscore")
strain_OipA<-arrange(strain_OipA,desc(identity)) %>% as_tibble() %>% filter(identity > 40 & Match_length >70 )
strain_OipA_count<-filter(strain_OipA, subject == "OipA26695")
strain_OipA_unique<-separate(data=strain_OipA_count, col=query, into=c("a","b","c","d"), sep = "_")
strain_OipA_unique<-filter(strain_OipA_unique, d == "[OipA-F]")
dim(strain_OipA_unique)
write_csv(strain_OipA_unique,"strain_OipA_unique.csv")
######################################strain_Saba############
strain_Saba<-read.table("result.txt",header = F)
library(tidyverse)
colnames(strain_Saba) <- c('query',"subject","identity","Match_length",
"Mismatch_length","gap","query_start","query_end","subject_start",
"subject_end","e-vaule","bitscore")
strain_Saba<-arrange(strain_Saba,desc(identity)) %>% as_tibble() %>% filter(identity > 40 & Match_length >70 )
strain_Saba_count<-filter(strain_Saba, subject == "Saba26695")
strain_Saba_unique<-separate(data=strain_Saba_count, col=query, into=c("a","b","c","d"), sep = "_")
strain_Saba_unique<-filter(strain_Saba_unique, d == "Saba-F")
dim(strain_Saba_unique)
write_csv(strain_Saba_unique,"strain_Saba_unique.csv")
######################################strain_VacA############
strain_VacA<-read.table("result.txt",header = F)
library(tidyverse)
colnames(strain_VacA) <- c('query',"subject","identity","Match_length",
"Mismatch_length","gap","query_start","query_end","subject_start",
"subject_end","e-vaule","bitscore")
strain_VacA<-arrange(strain_VacA,desc(identity)) %>% as_tibble() %>% filter(identity > 40 & Match_length >70 )
strain_VacA_count<-filter(strain_VacA, subject == "VacA26695")
strain_VacA_unique<-separate(data=strain_VacA_count, col=query, into=c("a","b","c","d"), sep = "_")
strain_VacA_unique<-filter(strain_VacA_unique, d == "[VacA-F]")
dim(strain_VacA_unique)
write_csv(strain_VacA_unique,"strain_VacA_unique.csv")
######
library(openxlsx)
# Write the dataframes to different sheets in the same Excel file
write.xlsx(list("CagA" = strain_CagA_unique, "HopQ" = strain_HopQ_unique,
"HtrA" = strain_HtrA_unique, "IceA" = strain_IceA_unique,
"NapA" = strain_NapA_unique, "OipA" = strain_OipA_unique,
"Saba" = strain_Saba_unique, "VacA" = strain_VacA_unique),
file = "sample_26695_F.xlsx")
暂无评论