meta.sh
meta.slurm
#!/bin/bash
#SBATCH -p batch
#SBATCH -n 30
# sh meta.sh
./meta.sh
#!/bin/bash
# HEADER - Do Not Modify!
set -e
shopt -s expand_aliases
export LC_ALL=C
########
export PATH="/public/software/apps/bin:/public/software/apps/bin/singularity:/public/software/apps/go/bin:$PATH"           
alias fastqc="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif fastqc"
alias fastp="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif fastp"
alias bowtie2-build="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif bowtie2-build"
alias bowtie2="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif bowtie2"
alias samtools="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif samtools"
alias kraken2="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif kraken2"
alias bracken="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif bracken"
alias kraken-biom="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif kraken-biom"
alias biom="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif biom"
alias megahit="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif megahit"
alias quast.py="singularity  exec /home/stu_2/linguopeng/sif/MetaGenome.sif quast.py "
alias prokka="singularity  exec /home/stu_2/linguopeng/sif/MetaGenome.sif prokka "
alias seqtk="singularity  exec /home/stu_2/linguopeng/sif/MetaGenome.sif seqtk "
alias cd-hit-est="singularity  exec /home/stu_2/linguopeng/sif/MetaGenome.sif cd-hit-est "
alias perl="singularity  exec /home/stu_2/linguopeng/sif/MetaGenome.sif perl "
alias salmon="singularity  exec /home/stu_2/linguopeng/sif/MetaGenome.sif salmon"
alias emapper.py="singularity  exec /home/stu_2/linguopeng/sif/emapper.sif emapper.py"
alias emapperx.R="singularity  exec /home/stu_2/linguopeng/sif/emapper.sif emapperx.R"
alias diamond="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif diamond"
alias salmon="singularity exec /home/stu_2/linguopeng/sif/MetaGenome.sif salmon"
alias Rscript="singularity  exec /home/stu_2/linguopeng/sif/MetaGenome.sif Rscript"
#level=S
# # HEADER END
# cd /home/stu_2/PRL/
# mkdir -p P1.data_filter/01.quality
# cd P1.data_filter/01.quality
# mkdir ./qc
# #########fastqc#######
# for i in `ls /home/stu_2/PRL/cleanreads | grep _clean_r1.gz`
# do
# fastqc  --outdir ./qc  --threads 16  /home/stu_2/PRL/cleanreads/${i%_clean_r1.gz*}_clean_r1.gz  /home/stu_2/PRL/cleanreads/${i%_clean_r1.gz*}_clean_r2.gz 
# done
# #########fastp#######
# mkdir ./clean_data
# for i in `ls /home/stu_2/PRL/cleanreads | grep _clean_r1.gz`
# do
# fastp  --thread  16 -i /home/stu_2/PRL/cleanreads/${i%_clean_r1.gz*}_clean_r1.gz -I  /home/stu_2/PRL/cleanreads/${i%_clean_r1.gz*}_clean_r2.gz  -o clean_data/${i%_clean_r1.gz*}_1.fq.gz -O clean_data/${i%_clean_r1.gz*}_2.fq.gz -j  clean_data/${i%_clean_r1.gz*}.fastp.json -h  clean_data/${i%_clean_r1.gz*}.fastp.html
# done
# #########bowtie2#######
# # bowtie2-build  genome.fa genome.db
# # bowtie2-build  c57.fa c57.db 
# # bowtie2-build  hg38.fa hg38.db
# cd ..
# mkdir -p 02.contaminant && cd 02.contaminant
# #########sam#######
# mkdir -p sam
# for i in `ls /home/stu_2/PRL/cleanreads | grep _clean_r1.gz`
# do
# bowtie2 --threads 24 -x /home/stu_2/linguopeng/database/genome/hg38.db  -1 ../01.quality/clean_data/${i%_clean_r1.gz*}_1.fq.gz -2 ../01.quality/clean_data/${i%_clean_r1.gz*}_2.fq.gz -S ./sam/${i%_clean_r1.gz*}.sam 2>./sam/${i%_clean_r1.gz*}.map.log 
# done
# #########bam#######
# mkdir -p bam
# for i in `ls /home/stu_2/PRL/cleanreads | grep _clean_r1.gz`
# do
# samtools  view -@ 24 -f 12 ./sam/${i%_clean_r1.gz*}.sam >./bam/${i%_clean_r1.gz*}.unmap.bam 
# done
# rm -rf sam
# #########fastq#######
# mkdir clean_unmap
# for i in `ls /home/stu_2/PRL/cleanreads | grep _clean_r1.gz`
# do
# samtools fastq -1 ./clean_unmap/${i%_clean_r1.gz*}_1.clean.fq.gz -2 ./clean_unmap/${i%_clean_r1.gz*}_2.clean.fq.gz -s ./clean_unmap/${i%_clean_r1.gz*}_singleton.clean.fq.gz ./bam/${i%_clean_r1.gz*}.unmap.bam 
# done
# #######kraken2#######
# cd /home/stu_2/PRL/P1.data_filter/02.contaminant
# cd ../../
# mkdir -p P2.Taxonomic_profiling/1.taxon && cd P2.Taxonomic_profiling/1.taxon
# mkdir kraken
# for i in `ls /home/stu_2/PRL/cleanreads | grep _clean_r1.gz`
# do
# kraken2 --threads 24 --paired --db /home/stu_2/linguopeng/database/k2 --report kraken/${i%_clean_r1.gz*}.kreport --output kraken/${i%_clean_r1.gz*}.kraken /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/${i%_clean_r1.gz*}_1.clean.fq.gz /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/${i%_clean_r1.gz*}_2.clean.fq.gz 
# done
# ##########bracken#######
# cd /home/stu_2/PRL/P2.Taxonomic_profiling/1.taxon
# # mkdir out_$level
# # for i in `ls /home/stu_2/PRL/cleanreads | grep _clean_r1.gz`
# # do
# # bracken -d /home/stu_2/linguopeng/database/k2 -i  kraken/${i%_clean_r1.gz*}.kreport -o out_$level/${i%_clean_r1.gz*}.bracken.$level -w out_$level/${i%_clean_r1.gz*}.kreport  -l  $level -t 24
# # done
# kraken-biom  kraken/*.kreport --max S  -o  ./out_$level/$level.biom  
# biom  convert -i  ./out_$level/$level.biom -o  ./out_$level/$level.count.tsv.tmp  --to-tsv --header-key taxonomy
# sed 's/; g__\([^;]\+\); s__/; g__\1; s__\1 /'   ./out_$level/$level.count.tsv.tmp >  ./out_$level/$level.taxID.count.tsv
# sed  '/^#/! s/^[0-9]\+\t\(.*[A-Za-z]\+__\([^;]\+\)\)$/\2\t\1/'  ./out_$level/$level.taxID.count.tsv  >  ./out_$level/$level.taxName.count.tsv
# sed -e '1d' -e '2s/^#//' ./out_$level/$level.taxName.count.tsv | awk -F "\t" -v OFS="\t" '{NF--; print}' | sed 's/\t$//' > ./out_$level/$level.count.tsv
# Rscript  /home/stu_2/linguopeng/script/draw_taxonBarplot.R    out_$level/$level.count.tsv  10   out_$level/$level.count.out
# mkdir -p /home/stu_2/PRL/P3.Assembly_annotation/01.megahit && cd /home/stu_2/PRL/P3.Assembly_annotation/01.megahit
# for i in `ls /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap | grep _1.clean.fq.gz`
# do
# megahit \
   # -1 /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/${i%_1.clean.fq.gz*}_1.clean.fq.gz \
   # -2 /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/${i%_1.clean.fq.gz*}_2.clean.fq.gz \
   # --min-contig-len 500 \
   # --tmp-dir  ./ \
   # --memory  0.8 \
   # --num-cpu-threads 24 \
   # --out-dir ${i%_clean_r1.gz*}_megahit \
   # --out-prefix  ${i%_clean_r1.gz*}	
# done
# mkdir /home/stu_2/PRL/P3.Assembly_annotation/02.quast  && cd /home/stu_2/PRL/P3.Assembly_annotation/02.quast
# for i in `ls /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap | grep _1.clean.fq.gz`
# do
# ln -s /home/stu_2/PRL/P3.Assembly_annotation/01.megahit/${i%_1.clean.fq.gz*}_1.clean.fq.gz_megahit/${i%_1.clean.fq.gz*}_1.clean.fq.gz.contigs.fa  ./${i%_1.clean.fq.gz*}.megahit.fa
# quast.py ./${i%_1.clean.fq.gz*}.megahit.fa
# done
# mkdir -p /home/stu_2/PRL/P3.Assembly_annotation/03.prokka  
# cd /home/stu_2/PRL/P3.Assembly_annotation/03.prokka 
# for i in `ls /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap | grep _1.clean.fq.gz`
# do
# ln -s /home/stu_2/PRL/P3.Assembly_annotation/01.megahit/${i%_1.clean.fq.gz*}_1.clean.fq.gz_megahit/${i%_1.clean.fq.gz*}_1.clean.fq.gz.contigs.fa ./${i%_1.clean.fq.gz*}.contigs.fa 
# prokka --outdir ${i%_1.clean.fq.gz*}_prokka --prefix ${i%_1.clean.fq.gz*} --addgenes --addmrna --locustag ${i%_1.clean.fq.gz*} --kingdom Bacteria --metagenome --cpus 24 ./${i%_1.clean.fq.gz*}.contigs.fa 
# done
# mkdir -p /home/stu_2/PRL/P3.Assembly_annotation/04.cdhit 
# cd /home/stu_2/PRL/P3.Assembly_annotation/04.cdhit 
# cat  ../03.prokka/*_prokka/*.ffn > all.trans.fa
# cat  ../03.prokka/*_prokka/*.faa > all.pep.fa
# sed -n "s/^>\(\S\+\).*$/\1/p"  all.pep.fa > all.cds.id
# seqtk subseq  all.trans.fa all.cds.id > all.cds.fa
# cd-hit-est -i all.cds.fa  -o  all.cds.cdhit -c 0.95 -aS 0.9  -M 204800 -T 30 
# cp  all.cds.cdhit  unigene_cds.fasta
# awk '$1 ~/^>/{print $1}'  all.cds.cdhit | sed 's/^>//' > unigene.id
# seqtk  subseq  all.pep.fa unigene.id > unigene_pep.fasta
# awk '{if($1~/^>/){printf $1 $2} else if($NF ~ /*$/){print "\t"$3}}' all.cds.cdhit.clstr |sed 's/>//g; s/...$//'|awk '{print $2"\t"$1}'  > map_id.txt
# perl /home/stu_2/linguopeng/script/map_data_ids  map_id.txt  unigene_cds.fasta 
# perl /home/stu_2/linguopeng/script/map_data_ids  map_id.txt  unigene_pep.fasta
# cd  /home/stu_2/PRL/P3.Assembly_annotation && mkdir 05.abundance && cd  05.abundance
# ln  -s   ../04.cdhit/unigene_cds.fasta
# cd  /home/stu_2/PRL/P3.Assembly_annotation/05.abundance
# salmon index -t  unigene_cds.fasta  -i  unigene_index -p 30
#for i in `ls /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap | grep _1.clean.fq.gz`
#do
#salmon  quant --validateMappings  --meta -p 30   -i  unigene_index  -l IU   -1 /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/${i%_1.clean.fq.gz*}_1.clean.fq.gz -2  /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/${i%_1.clean.fq.gz*}_2.clean.fq.gz  -o quants/${i%_1.clean.fq.gz*}.quant
#done
# mkdir /home/stu_2/PRL/P3.Assembly_annotation/06.eggnog
# cd /home/stu_2/PRL/P3.Assembly_annotation/06.eggnog
# emapper.py -i unigene_pep.fasta -o my --itype proteins -m diamond --cpu 30 --override 1>emapper.log 2>&1 
# emapperx.R my.emapper.annotations unigene_pep.fasta
# mkdir /home/stu_2/PRL/P3.Assembly_annotation/07.uniprot
# cd /home/stu_2/PRL/P3.Assembly_annotation/07.uniprot
# ln -s ../04.cdhit/unigene_pep.fasta
# diamond blastp  \
 #   --db /home/stu_2/linguopeng/database/uniprot/uniref90 \
  #  --query  unigene_pep.fasta \
   # --out unigene_pep.uniref90.m6 \
   # --threads 30 \
   # --outfmt 6 \
   # --max-target-seqs 1 \
   # --evalue 1e-5
## 基于idmapping 提取GO注释信息
# cd /home/stu_2/PRL/P3.Assembly_annotation/07.uniprot
# perl /home/stu_2/linguopeng/script/uniref90_idmapping.pl \
  #  unigene_pep.uniref90.m6 \
 #   /home/stu_2/linguopeng/database/uniprot/idmapping_selected.tab \
 #   > unigene_pep.uniref90.GOanno
# Rscript  /home/stu_2/linguopeng/script/GOmapperx.R  unigene_pep.uniref90.GOanno 
# cd /home/stu_2/PRL/P3.Assembly_annotation/05.abundance
# quants=` ls quants/ | awk '{print "quants/"$1 }' | tr '\n'  ' ' `
# names=` ls quants/ | sed 's/.quant$//'| tr '\n'   ' '  `
# salmon quantmerge --quants $quants --names  $names  --column tpm -o unigenens.tpm
# salmon quantmerge --quants $quants --names  $names  --column numreads -o unigenens.count
# sed '1s/^Name\t//' unigenens.count > unigenens.count.matrix
# perl  /home/stu_2/linguopeng/script/run_DE_analysis.pl \
# --matrix unigenens.count.matrix  \
# --method DESeq2 \
# --samples_file  sample.txt \
# --contrasts contrasts_FH_SC.txt \
# --output DE_out_FH_vs_SC
# perl  /home/stu_2/linguopeng/script/run_DE_analysis.pl \
# --matrix unigenens.count.matrix  \
# --method DESeq2 \
# --samples_file  sample.txt \
# --contrasts contrasts_SH_SC.txt \
# --output DE_out_SH_vs_SC
# perl  /home/stu_2/linguopeng/script/run_DE_analysis.pl \
# --matrix unigenens.count.matrix  \
# --method DESeq2 \
# --samples_file  sample.txt \
# --contrasts contrasts_SH_FH.txt \
# --output DE_out_SH_vs_FH
# perl  /home/stu_2/linguopeng/script/run_DE_analysis.pl     --matrix unigenens.count.matrix      --method DESeq2     --samples_file  sample.txt     --contrasts contrasts_SH_SC.txt     --output DE_out_SH_SC
# #ln -s  /home/stu_2/PRL/P3.Assembly_annotation/06.eggnog/R_Library/ .
# #ln -s ../06.eggnog/my.emapper.annotations
# Rscript \
# ../../../linguopeng/script/enrich_analysis.R \
# ./DE_out/unigenens.count.matrix.FH_vs_SC.DESeq2.DE_results \
# ./unigene.annotations \
# ./DE_out/unigenens.count.matrix.FH_vs_SC.DESeq2.DE_results.enrich
# 
# cd /home/stu_2/PRL/P3.Assembly_annotation/08.vfdb
# diamond blastp  \
# --db /home/stu_2/PRL/P3.Assembly_annotation/08.vfdb/VFDB_setB \
# --query  unigene_pep.fasta \
# --out unigene_pep.vfdb.m6 \
# --threads 20 \
# --outfmt 6 \
# --max-target-seqs 1 \
# --evalue 1e-5
level=P
cd /home/stu_2/PRL/P2.Taxonomic_profiling/01.taxon
# mkdir out_$level
# for i in `ls /home/stu_2/PRL/cleanreads | grep _clean_r1.gz`
# do
# bracken -d /home/stu_2/linguopeng/database/k2 -i  kraken/${i%_clean_r1.gz*}.kreport -o out_$level/${i%_clean_r1.gz*}.bracken.$level -w out_$level/${i%_clean_r1.gz*}.kreport  -l  $level -t 24
# done
kraken-biom  kraken/*.kreport --max P -o  ./out_$level/$level.biom 
biom  convert -i  ./out_$level/$level.biom -o  ./out_$level/$level.count.tsv.tmp  --to-tsv --header-key taxonomy
sed 's/; g__\([^;]\+\); s__/; g__\1; s__\1 /'   ./out_$level/$level.count.tsv.tmp >  ./out_$level/$level.taxID.count.tsv
sed  '/^#/! s/^[0-9]\+\t\(.*[A-Za-z]\+__\([^;]\+\)\)$/\2\t\1/'  ./out_$level/$level.taxID.count.tsv  >  ./out_$level/$level.taxName.count.tsv
sed -e '1d' -e '2s/^#//' ./out_$level/$level.taxName.count.tsv | awk -F "\t" -v OFS="\t" '{NF--; print}' | sed 's/\t$//' > ./out_$level/$level.count.tsv
Rscript  /home/stu_2/linguopeng/script/draw_taxonBarplot.R    out_$level/$level.count.tsv  10   out_$level/$level.count.out
#test
#ls *.fna |awk -F "_" '{print$1,$2,$3,$4}' >sample.txt
cat sample.txt | while read  group sample   fq1 fq2 
do 
	echo singularity exec ../../software/MetaGenome.sif kraken2  --threads 4 --quick --paired --db ../../Database/K2/  --report $sample.kreport --output $sample.kraken  $fq1 $fq2 
done > step1.run_kraken2.sh 
#test
#test
gene=abp1
 # HEADER END
  #########bowtie2#######
  # download special genecluster fa in NCBI
  #bowtie2-build  abp1.fa abp1.db
   #########sam#######
   mkdir -p  /home/stu_2/PRL/iald/abp1/sam
   for i in `ls /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/ | grep _1.clean.fq.gz`
   do
	   bowtie2 --threads 30 -x /home/stu_2/PRL/iald/abp1/${gene}.db  -1 /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/${i%_1.clean.fq.gz}_1.clean.fq.gz -2 /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/${i%_1.clean.fq.gz}_2.clean.fq.gz -S /home/stu_2/PRL/iald/abp1/sam/${i%_1.clean.fq.gz}.${gene}.sam 2> /home/stu_2/PRL/iald/abp1/sam/${i%_1.clean.fq.gz}.${gene}.map.log 
   done 
    #########bam#######
    mkdir -p /home/stu_2/PRL/iald/abp1/bam
    for i in `ls /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/ | grep _1.clean.fq.gz`
    do
	    samtools  view -@ 24 -BF 12 /home/stu_2/PRL/iald/abp1/sam/${i%_1.clean.fq.gz}.${gene}.sam >/home/stu_2/PRL/iald/abp1/bam/${i%_1.clean.fq.gz}.${gene}.map.bam 
    done
    rm -r /home/stu_2/PRL/iald/abp1/sam
    #########count#########
    ls /home/stu_2/PRL/iald/abp1/bam > /home/stu_2/PRL/iald/abp1/bam/name.txt
    for i in `ls /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/ | grep _1.clean.fq.gz`
    do
	    cat /home/stu_2/PRL/iald/abp1/bam/${i%_1.clean.fq.gz}.${gene}.map.bam | wc -l >> /home/stu_2/PRL/iald/abp1/bam/${gene}.count_map.txt
    done

    for i in `ls /home/stu_2/PRL/P1.data_filter/02.contaminant/clean_unmap/ | grep _1.clean.fq.gz`
    do
	    cat /home/stu_2/PRL/iald/abp1/bam/${i%_1.clean.fq.gz}.${gene}.map.bam | wc -l >> /home/stu_2/PRL/iald/abp1/bam/${gene}.count_all.txt
    done
    cat /home/stu_2/PRL/iald/abp1/bam/${gene}.count_map.txt /home/stu_2/PRL/iald/abp1/bam/${gene}.count_all.txt > /home/stu_2/PRL/iald/abp1/bam/${gene}.all.txt
#test
find . -name "*contigs.fa" -print0 | xargs -0 -I {} cp {} ./
find /home/stu_2/PRL/P3.Assembly_annotation/ -name "*contigs.fa" -print0 | xargs -0 -I {} ln {} ./

ln -s /home/stu_2/PRL/P3.Assembly_annotation/01.megahit/${i%_1.clean.fq.gz*}_1.clean.fq.gz_megahit/${i%_1.clean.fq.gz*}_1.clean.fq.gz.contigs.fa ./${i%_1.clean.fq.gz*}.contigs.fa
#test
暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇