qiime2.slurm
#!/bin/bash
#SBATCH -p batch
#SBATCH -N 1
#SBATCH -n 30
./qiime2.sh
#bac: V3,17, 341F, CCTACGGGNGGCWGCAG
# V4, 20, 806R, GGACTACHVGGGTWTCTAAT
#bif:greol:23,22: TCCGATTACGAYCGYGAGAAGCT/ CSGCYTCGGTSGTCAGGAACAG
#lac:groel:20,20: GCYGGTGCWAACCCNGTTGG/ AANGTNCCVCGVATCTTGTT
#!/bin/bash
# HEADER - Do Not Modify!
set -e
shopt -s expand_aliases
export LC_ALL=C
#########
export PATH="/public/software/apps/bin:/public/software/apps/bin/singularity:/public/software/apps/go/bin:$PATH"
alias qiime="singularity exec /home/stu_2/linguopeng/sif/qiime2_2022.2_240110.sif qiime"
alias biom="singularity exec /home/stu_2/linguopeng/sif/qiime2_2022.2_240110.sif biom"
export lgp=/home/stu_2/linguopeng/qiime2_240314/test
## mkdir -p ${lgp}/seqs
# singularity instance start qiime2_2022.2_240110.sif qiime2
# singularity shell instance://qiime2
mkdir -p ${lgp}/results
mkdir -p ${lgp}/results/taxa
## Samples_list
cd ${lgp}/seqs
echo "sample-id,absolute-filepath,direction" > Samples_list.csv
for i in `ls *fastq.gz | awk '{print $0}'`;
do
ls $i | grep 1.fastq.gz | awk -v a=`echo $i | sed s/_R1.fastq.gz//` -v b=$PWD -v c=$i '{print a","b"/"c",""forward"}' >> Samples_list.csv ;
ls $i | grep 2.fastq.gz | awk -v a=`echo $i | sed s/_R2.fastq.gz//` -v b=$PWD -v c=$i '{print a","b"/"c",""reverse"}' >> Samples_list.csv ;
done
## import data
qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ${lgp}/seqs/Samples_list.csv \
--output-path ${lgp}/seqs/demux.qza \
--input-format PairedEndFastqManifestPhred33
## demux
qiime demux summarize \
--i-data demux.qza \
--o-visualization demux.qzv
cd ${lgp}/results
qiime dada2 denoise-paired \
--i-demultiplexed-seqs ${lgp}/seqs/demux.qza \
--p-trim-left-f 17 --p-trim-left-r 20 \
--p-trunc-len-f 260 --p-trunc-len-r 260 \
--o-table ${lgp}/results/table.qza \
--o-representative-sequences ${lgp}/results/rep-seqs.qza \
--o-denoising-stats ${lgp}/results/denoising-stats.qza --p-n-threads 30
qiime feature-table summarize \
--i-table ${lgp}/results/table.qza \
--o-visualization ${lgp}/results/table.qzv \
--m-sample-metadata-file ${lgp}/metadata.txt
qiime feature-table tabulate-seqs \
--i-data ${lgp}/results/rep-seqs.qza \
--o-visualization ${lgp}/results/rep-seqs.qzv
## exported-table
qiime tools export \
--input-path ${lgp}/results/table.qza \
--output-path ${lgp}/results/exported-table
## feature-table.tsv
biom convert -i ${lgp}/results/exported-table/feature-table.biom -o ${lgp}/results/exported-table/feature-table.tsv --to-tsv
## rooted-tree.qza
qiime phylogeny align-to-tree-mafft-fasttree \
--i-sequences ${lgp}/results/rep-seqs.qza \
--o-alignment ${lgp}/results/aligned-rep-seqs.qza \
--o-masked-alignment ${lgp}/results/masked-aligned-rep-seqs.qza \
--o-tree ${lgp}/results/unrooted-tree.qza \
--o-rooted-tree ${lgp}/results/rooted-tree.qza
## alpha-rarefaction
qiime diversity alpha-rarefaction \
--i-table ${lgp}/results/table.qza \
--i-phylogeny ${lgp}/results/rooted-tree.qza \
--p-max-depth 100000 \
--m-metadata-file ${lgp}/metadata.txt \
--o-visualization ${lgp}/results/alpha-rarefaction.qzv
## core-metrics-results
qiime diversity core-metrics-phylogenetic \
--i-phylogeny ${lgp}/results/rooted-tree.qza \
--i-table ${lgp}/results/table.qza \
--p-sampling-depth 10000 \
--m-metadata-file ${lgp}/metadata.txt \
--output-dir ${lgp}/results/core-metrics-results
## chao1、simpson、ace、euclidean
qiime diversity alpha \
--i-table ${lgp}/results/table.qza \
--p-metric chao1 \
--o-alpha-diversity ${lgp}/results/core-metrics-results/chao1_vector.qza
qiime diversity alpha \
--i-table ${lgp}/results/table.qza \
--p-metric simpson \
--o-alpha-diversity ${lgp}/results/core-metrics-results/simpson_vector.qza
qiime diversity alpha \
--i-table ${lgp}/results/table.qza \
--p-metric ace \
--o-alpha-diversity ${lgp}/results/core-metrics-results/simpson_vector.qza
qiime diversity beta \
--i-table ${lgp}/results/table.qza \
--p-metric euclidean \
--o-distance-matrix ${lgp}/results/core-metrics-results/euclidean_distance_matrix.qza
## Beta_distance_significance
distance="euclidean jaccard bray_curtis unweighted_unifrac weighted_unifrac"
for i in $distance;
do
for j in `awk 'NR==1{for(k=2;k<=NF;k++) print $k}' ${lgp}/metadata.txt`;
do
qiime diversity beta-group-significance \
--i-distance-matrix ${lgp}/results/core-metrics-results/${i}_distance_matrix.qza \
--m-metadata-file ${lgp}/metadata.txt \
--m-metadata-column ${j} \
--o-visualization ${lgp}/results/core-metrics-results/${i}_${j}_significance.qzv \
--p-pairwise
done
done
# alpha_index_significance
index="observed_features evenness shannon faith_pd chao1 simpson"
for k in $index;
do
qiime diversity alpha-group-significance \
--i-alpha-diversity ${lgp}/results/core-metrics-results/${k}_vector.qza \
--m-metadata-file ${lgp}/metadata.txt \
--o-visualization ${lgp}/results/core-metrics-results/${k}_significance.qzv
done
## export data
mkdir -p ${lgp}/results/core-metrics-results/alpha_diversity ${lgp}/results/core-metrics-results/beta_diversity
for i in $distance;
do
qiime tools export \
--input-path ${lgp}/results/core-metrics-results/${i}_distance_matrix.qza \
--output-path ${lgp}/results/core-metrics-results/beta_diversity/${i}_distance_matrix
done
for k in $index;
do
qiime tools export \
--input-path ${lgp}/results/core-metrics-results/${k}_vector.qza \
--output-path ${lgp}/results/core-metrics-results/alpha_diversity/${k}
done
## annotation 16s
qiime feature-classifier classify-sklearn \
# --i-classifier /database/silva-132-99-nb-341F-806R_classifier.qza \
--i-classifier /home/stu_2/linguopeng/database/silva-132-99-nb-341F-806R_classifier.qza \
--i-reads ${lgp}/results/rep-seqs.qza \
--o-classification ${lgp}/results/taxa/taxonomy.qza --p-n-jobs 30
qiime metadata tabulate \
--m-input-file ${lgp}/results/taxa/taxonomy.qza \
--o-visualization ${lgp}/results/taxa/taxonomy.qzv
## exported-table annotation
qiime tools export \
--input-path ${lgp}/results/taxa/taxonomy.qza \
--output-path ${lgp}/results/exported-table
###############filter-samples
# qiime feature-table filter-samples \
# --i-table ${lgp}/results/table.qza \
# --p-min-frequency 10000 \
# --o-filtered-table ${lgp}/results/table-10k.qza
# qiime taxa barplot \
# --i-table ${lgp}/results/table-10k.qza \
# --i-taxonomy ${lgp}/results/taxa/taxonomy.qza \
# --m-metadata-file ${lgp}/metadata.txt \
# --o-visualization ${lgp}/results/taxa/taxa-bar-plots.qzv
# # filtered-table
# qiime feature-table filter-features \
# --i-table ${lgp}/results/table-10k.qza \
# --p-min-frequency 50 \
# --p-min-samples 4 \
# --o-filtered-table ${lgp}/results/filtered-table.qza
##############un_filter-samples
qiime taxa barplot \
--i-table ${lgp}/results/table.qza \
--i-taxonomy ${lgp}/results/taxa/taxonomy.qza \
--m-metadata-file ${lgp}/metadata.txt \
--o-visualization ${lgp}/results/taxa/taxa-bar-plots.qzv
mv ${lgp}/results/table.qza ${lgp}/results/filtered-table.qza
## rel-abundance
mkdir -p ${lgp}/results/rel-abundance
for i in {2..6};
do
qiime taxa collapse \
--i-table ${lgp}/results/filtered-table.qza \
--i-taxonomy ${lgp}/results/taxa/taxonomy.qza \
--p-level $i \
--o-collapsed-table ${lgp}/results/rel-abundance/table-l${i}.qza
qiime feature-table relative-frequency \
--i-table ${lgp}/results/rel-abundance/table-l${i}.qza \
--o-relative-frequency-table ${lgp}/results/rel-abundance/rel-abun-l${i}.qza
qiime tools export \
--input-path ${lgp}/results/rel-abundance/rel-abun-l${i}.qza \
--output-path ${lgp}/results/rel-abun-l${i}
biom convert -i ${lgp}/results/rel-abun-l${i}/feature-table.biom -o ${lgp}/results/rel-abun-l${i}/rel-abun-l${i}.tsv --to-tsv
done
cd ${lgp}/results/
unzip ${lgp}/results/rep-seqs.qza
mkdir -p ${lgp}/results/picrust2
cp -rf ${lgp}/results/*/data/dna-sequences.fasta ${lgp}/results/picrust2/
cp -rf ${lgp}/results/exported-table/feature-table.biom ${lgp}/results/picrust2/
alias picrust2_pipeline.py="singularity exec /home/stu_2/linguopeng/sif/qiime2_2022.2_240110.sif picrust2_pipeline.py"
alias add_descriptions.py="singularity exec /home/stu_2/linguopeng/sif/qiime2_2022.2_240110.sif add_descriptions.py"
alias pathway_pipeline.py="singularity exec /home/stu_2/linguopeng/sif/qiime2_2022.2_240110.sif add_descriptions.py"
picrust2_pipeline.py -s ${lgp}/results/picrust2/dna-sequences.fasta \
-i ${lgp}/results/picrust2/feature-table.biom \
-o ${lgp}/results/picrust2/picrust2_results \
-p 30
## EC注释结果
add_descriptions.py -i ${lgp}/results/picrust2/picrust2_results/EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -m EC \
-o ${lgp}/results/picrust2/picrust2_results/EC_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
## KO添加注释
add_descriptions.py -i ${lgp}/results/picrust2/picrust2_results/KO_metagenome_out/pred_metagenome_unstrat.tsv.gz -m KO \
-o ${lgp}/results/picrust2/picrust2_results/KO_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
## pathway添加注释,基于MetaCyc数据库
add_descriptions.py -i ${lgp}/results/picrust2/picrust2_results/pathways_out/path_abun_unstrat.tsv.gz -m METACYC \
-o ${lgp}/results/picrust2/picrust2_results/pathways_out/path_abun_unstrat_descrip.tsv.gz
#
# # #KEGG 交互运行
singularity shell /home/stu_2/linguopeng/sif/qiime2_2022.2_240110.sif
pathway_pipeline.py -i ${lgp}/results/picrust2/picrust2_results/KO_metagenome_out/pred_metagenome_unstrat.tsv.gz \
-o ${lgp}/results/picrust2/picrust2_results/KEGG_pathways \--no_regroup \
--map /home/foodbio/miniconda3/envs/qiime2-2022.2/lib/python3.8/site-packages/picrust2/default_files/pathway_mapfiles/KEGG_pathways_to_KO.tsv
#KEGG通路层级汇总
zcat ${lgp}/results/picrust2/picrust2_results/KO_metagenome_out/pred_metagenome_unstrat.tsv.gz \
> ${lgp}/results/picrust2/picrust2_results/KEGG.KO.txt
python3 /home/foodbio/summarizeAbundance.py \
-i ${lgp}/results/picrust2/picrust2_results/KEGG.KO.txt \
-m /home/foodbio/KO1-4.txt \
-c 2,3,4 -s ',+,+,' -n raw \
-o ${lgp}/results/picrust2/picrust2_results/KEGG
暂无评论