比较基因
#找文件
find ./ -name "*.faa" -print0 | xargs -0 -I {} cp {} ./
#改名称与格式化
ls *.faa|awk -F "_" '{print$1}' >junzhunname.txt
ls *.faa >id2
paste junzhunname.txt id2 >idok
cat idok |while read a b
do
echo "orthomclAdjustFasta ${a} ./${b} 1 "
done |bash 
mkdir fasta
mv *.fasta ./fasta
cd ./fasta
ls *.fasta |awk -F "." '{print$1}' >name
cat name |while read a 
do
echo "mv ${a}.fasta ${a}.faa"
done |bash 
#提取单拷贝基因簇的序列为所有菌株共有的序列
ls *.fa >name
cat *.fa >all.faa
cat name |while read a
do
echo "cat ${a} |head -n1 "
done |bash >id.txt
sed 's/>//g' id.txt >id2.txt
cat all.faa |seqkit grep -f id2.txt >new.fa
#查询目的菌株是否含有目的基因
makeblastdb -in target.fasta -dbtype prot/nucl -out database
blastn -query query.fasta -out ncbi120_ABC_out.txt -db database -outfmt 6 -evalue 1e-5  -max_target_seqs 1
#对上述基因进行统计
###########.carbohydrate ncbi120_ABC transporter permease#########
strain_ABC<-read.table("ncbi120_ABC_out.txt",header = F)
library(tidyverse)
strain_ABC<-separate(data=strain_ABC, col=V1, into=c("strain","gene"), sep = "_")
colnames(strain_ABC) <- c('strain', 'query',"subject","identity","Match_length",
                              "Mismatch_length","gap","query_start","query_end","subject_start",
                              "subject_end","e-vaule","bitscore")
strain_ABC<-arrange(strain_ABC,desc(identity)) %>% as_tibble() %>%  filter(identity > 40 & Match_length >70 ) 
strain_ABC<-strain_ABC[!duplicated(strain_ABC$query),] 
strain_ABC_count = as.data.frame(table(strain_ABC$strain))
colnames(strain_ABC_count)<-c('strain',"count")
strain_ABC_count<-arrange(strain_ABC_count,desc(count)) %>% as_tibble() 
strain_ABC_count_12<-filter(strain_ABC_count, strain == "FNXHL2M3" | strain == "FJSNT37M5"
                            | strain == "A14" | strain == "FFJND7M3"
                            | strain == "FHuNMY10M3" | strain == "FSHXXA2M9"
                            | strain == "FFJND17M1" | strain == "FFJNDD6M2"
                            | strain == "FQHXN83M4" | strain == "FAHBZ9L5"
                            | strain == "FGSYC12M4" | strain == "FHNXY15M2")

评论

  1. lpp1985
    6 月前
    2024-7-09 17:02:15

    你试试orthofinder吧,看你这个又是PGAP又是 OrthoMCL的,老掉牙了。觉着麻烦的话 PGCGAP一套全跑完。看着你走弯路,帮带带路吧。不过你这个网站拿什么做的?

    • admin 博主
      5 月前
      2024-7-30 10:22:25

      谢谢评论,这是我用来训练自己的基础命令练习。
      你说的,orthofinder方法,在我前面的内容有涉及通过把orthofinder封装成镜像
      (https://linguopeng.com/?p=161)在服务器里运算。
      网站是通过docker搭建WordPress实现,主题是Argon。

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇