#找文件
find ./ -name "*.faa" -print0 | xargs -0 -I {} cp {} ./
#改名称与格式化
ls *.faa|awk -F "_" '{print$1}' >junzhunname.txt
ls *.faa >id2
paste junzhunname.txt id2 >idok
cat idok |while read a b
do
echo "orthomclAdjustFasta ${a} ./${b} 1 "
done |bash
mkdir fasta
mv *.fasta ./fasta
cd ./fasta
ls *.fasta |awk -F "." '{print$1}' >name
cat name |while read a
do
echo "mv ${a}.fasta ${a}.faa"
done |bash
#提取单拷贝基因簇的序列为所有菌株共有的序列
ls *.fa >name
cat *.fa >all.faa
cat name |while read a
do
echo "cat ${a} |head -n1 "
done |bash >id.txt
sed 's/>//g' id.txt >id2.txt
cat all.faa |seqkit grep -f id2.txt >new.fa
#查询目的菌株是否含有目的基因
makeblastdb -in target.fasta -dbtype prot/nucl -out database
blastn -query query.fasta -out ncbi120_ABC_out.txt -db database -outfmt 6 -evalue 1e-5 -max_target_seqs 1
#对上述基因进行统计
###########.carbohydrate ncbi120_ABC transporter permease#########
strain_ABC<-read.table("ncbi120_ABC_out.txt",header = F)
library(tidyverse)
strain_ABC<-separate(data=strain_ABC, col=V1, into=c("strain","gene"), sep = "_")
colnames(strain_ABC) <- c('strain', 'query',"subject","identity","Match_length",
"Mismatch_length","gap","query_start","query_end","subject_start",
"subject_end","e-vaule","bitscore")
strain_ABC<-arrange(strain_ABC,desc(identity)) %>% as_tibble() %>% filter(identity > 40 & Match_length >70 )
strain_ABC<-strain_ABC[!duplicated(strain_ABC$query),]
strain_ABC_count = as.data.frame(table(strain_ABC$strain))
colnames(strain_ABC_count)<-c('strain',"count")
strain_ABC_count<-arrange(strain_ABC_count,desc(count)) %>% as_tibble()
strain_ABC_count_12<-filter(strain_ABC_count, strain == "FNXHL2M3" | strain == "FJSNT37M5"
| strain == "A14" | strain == "FFJND7M3"
| strain == "FHuNMY10M3" | strain == "FSHXXA2M9"
| strain == "FFJND17M1" | strain == "FFJNDD6M2"
| strain == "FQHXN83M4" | strain == "FAHBZ9L5"
| strain == "FGSYC12M4" | strain == "FHNXY15M2")
你试试orthofinder吧,看你这个又是PGAP又是 OrthoMCL的,老掉牙了。觉着麻烦的话 PGCGAP一套全跑完。看着你走弯路,帮带带路吧。不过你这个网站拿什么做的?
谢谢评论,这是我用来训练自己的基础命令练习。
你说的,orthofinder方法,在我前面的内容有涉及通过把orthofinder封装成镜像
(https://linguopeng.com/?p=161)在服务器里运算。
网站是通过docker搭建WordPress实现,主题是Argon。