#参考https://github.com/qiyunlab/HGTector/
#Zhu Q, Kosoy M, Dittmar K. HGTector: an automated method facilitating genome-wide discovery of putative #horizontal gene transfers. BMC Genomics. 2014. 15:717.
#简书https://www.jianshu.com/p/9c529c9064d8
#下载nr数据库
ascp -v -k 1 -T -l 300m -i ~/asperaweb_id_dsa.openssh [email protected]:/blast/db/FASTA/nr.gz ./
#下载分类信息
ascp -i ~/asperaweb_id_dsa.openssh -QTr -l500m [email protected]:pub/taxonomy/taxdump.tar.gz ./
#下载序列信息
ascp -P33001 -l 500m --mode recv -i ~/asperaweb_id_dsa.openssh -QTr -l600m [email protected]:pub/taxonomy/accession2taxid/prot.accession2taxid.gz ./
#安装csvtk ,taxonkit 工具
conda install csvtk -y
conda install taxonkit -y
#构建nr数据库
nohup makeblastdb -parse_seqids -in nr.fa -dbtype prot -out nr &
#分别提取病毒,古细菌,细菌,真菌序列信息
#ids 10239 virus 病毒
#ids 2157 Archaea 古菌
#ids 2 Bacteria 细菌
#ids 4751 Fungi 真菌
taxonkit list -j 45 --ids 10239 --indent "" --data-dir ./taxdump/ > Virus.list
taxonkit list -j 45 --ids 2157 --indent "" --data-dir ./taxdump/ > Archaea.list
taxonkit list -j 45 --ids 2 --indent "" --data-dir ./taxdump/ > Bacteria.list
taxonkit list -j 45 --ids 4751 --indent "" --data-dir ./taxdump/ > Fungi.list
zcat prot.accession2taxid | csvtk -t grep -f taxid -P ../nr/Virus.list | csvtk -t cut -f accession.version > Virus.taxid.acc.txt
zcat prot.accession2taxid | csvtk -t grep -f taxid -P ../nr/Archaea.list | csvtk -t cut -f accession.version > Archaea.taxid.acc.txt
zcat prot.accession2taxid | csvtk -t grep -f taxid -P ../nr/Bacteria.list | csvtk -t cut -f accession.version > Bacteria.taxid.acc.txt
zcat prot.accession2taxid | csvtk -t grep -f taxid -P ../nr/Fungi.list | csvtk -t cut -f accession.version > Fungi.taxid.acc.txt
blastdb_aliastool -seqidlist Virus.taxid.acc.txt -db ./nr/nr -out nr_virues -title nr_virues
blastdb_aliastool -seqidlist Archaea.taxid.acc.txt -db ./nr/nr -out nr_archaea -title nr_archaea
blastdb_aliastool -seqidlist Bacteria.taxid.acc.txt -db ./nr/nr -out nr_bacteria -title nr_bacteria
blastdb_aliastool -seqidlist Fungi.taxid.acc.txt -db ./nr/nr -out nr_fungi -title nr_fungi
##########
#blastdb_aliastool -seqidlist Virus.taxid.acc.txt -db ./nr/nr -out nr_virues -title nr_virues
#Created protein BLAST (alias) database nr_virues with 898896 sequences
#blastdb_aliastool -seqidlist Bacteria.taxid.acc.txt -db ./nr/nr -out nr_bacteria -title nr_bacteria
#Created protein BLAST (alias) database nr_bacteria with 28850848 sequences
#blastdb_aliastool -seqidlist Fungi.taxid.acc.txt -db ./nr/nr -out nr_fungi -title nr_fungi
#Created protein BLAST (alias) database nr_fungi with 2684352 sequences
#blastdb_aliastool -seqidlist Archaea.taxid.acc.txt -db ./nr/nr -out nr_archaea -title nr_archaea
#Created protein BLAST (alias) database nr_archaea with 608013 sequences
###################
blastdbcmd -db nr_virues -entry all -dbtype prot -out nr_Virus.fa
blastdbcmd -db nr_archaea -entry all -dbtype prot -out nr_Archaea.fa
blastdbcmd -db nr_bacteria -entry all -dbtype prot -out nr_Bacteria.fa
blastdbcmd -db nr_fungi -entry all -dbtype prot -out nr_Fungi.fa
#合并微生物序列
cat nr_Virus.fa nr_Archaea.fa nr_Bacteria.fa nr_Fungi.fa >nr_B_F_A_V.fa
#序列与分类合并构建微生物数据库
diamond makedb --threads 45 --in nr_B_F_A_V.fa --taxonmap prot.accession2taxid --taxonnodes taxdump/nodes.dmp --taxonnames taxdump/names.dmp --db diamond/nr_B_F_A_V
#激活hgtector环境,进行分析
hgtector search -i FAHBZ9L5_CDS.faa -o search -m diamond -p 45 -d diamond/nr_B_F_A_V -t taxdump
hgtector analyze -i search -o analyze -t ../taxdump
暂无评论