展开

统计和可视化代码 Statistics and visualization script

最后发布时间 : 2022-11-11 21:47:29 浏览量 :

宏基因组统计绘图

# 设置结果目录(通常为项目中的result,此处为演示12个样本结果result12)
wd=/c/meta/result12
# 设置脚本所在目录(Script Directory)
sd=/c/EasyMicrobiome/script
# 进入结果目录
cd $wd

物种Metaphlan2

热图

# 显示脚本帮助 help
Rscript $
/metaphlan_hclust_heatmap.R -h
# 按指定分类汇总、排序并取Top25种绘制热图
# -i输入MetaPhlAn2结果转换的spf文件;
# -t指定分类级别,可选Kingdom/Phylum/Class/Order/Family/Genus/Species/Strain(界门纲目科属种株),推荐门,目,属
# -n 输出物种数量,默认为25,最大值为该类型的数量
# -w、-e指定图片的宽和高,单位为毫米(mm)
# -o输出图pdf、表txt前缀,默认Heatmap+(-t)+(-n)

# 科水平Top25热图
Rscript $sd/metaphlan_hclust_heatmap.R
-i metaphlan2/taxonomy.spf
-t Family -n 25
-w 183 -e 118
-o metaphlan2/HeatmapFamily

# 属水平的Top30
Rscript $sd/metaphlan_hclust_heatmap.R
-i metaphlan2/taxonomy.spf
-t Genus -n 30
-w 183 -e 118
-o metaphlan2/HeatmapGenus

维恩图

### 筛选>0.5%的分类
awk 'BEGIN{OFS=FS="\t"}{if(FNR==1) {for(i=2;i<=NF;i++) a[i]=i;} \ else {for(i=2;i<=NF;i++) if(i>0.5) print 1, a[i];}}' \ metaphlan2/taxonomy.tsv \ > metaphlan2/taxonomy_high.tsv wc -l metaphlan2/taxonomy_high.tsv # 本地、或网络绘制 http://www.ehbio.com/test/venn/#/ # 引文:Tong Chen, Haiyan Zhang, Yu Liu, Yong-Xin Liu & Luqi Huang. (2021). EVenn: Easy to create repeatable and editable Venn diagrams and Venn networks online. Journal of Genetics and Genomics, doi: 10.1016/j.jgg.2021.07.007 # 5组比较:-f输入文件,-a/b/c/d/g分组名,-w/u为宽高英寸,-p输出文件名后缀 bash /sp_vennDiagram.sh -f metaphlan2/taxonomy_high.tsv
-a C1 -b C2 -c N1 -d N2 -g N3
-w 4 -u 4
-p C1_C2_N1_N2_N3

功能HUMAnN2

分组聚类热图

bash sd/sp_pheatmap.sh cut -f 1-2 metadata.txt > group.txt # -f输入文件,-H水平聚类,u/v图片宽/高,-P添加行注释文件,-Q添加列注释 bash sd/sp_pheatmap.sh
-f humann2/pathabundance_relab_unstratified.tsv
-H 'TRUE' -u 20 -v 50
-Q group.txt
# 结果为 输入文件名+pheamap.r/pdf,代码和图片

# 水平标准化,-d row,可选column
bash $sd/sp_pheatmap.sh
-f humann2/pathabundance_relab_unstratified.tsv
-H 'TRUE' -u 20 -v 50
-Q group.txt -d row

物种kraken2

Alpha多样性

# 提取种级别注释并抽平至最小测序量,计算6种alpha多样性指数
# 查看帮助
Rscript sd/kraken2alpha.R -h # -d指定最小样本量,默认0为最小值,抽平文件tax_norm.txt,alpha多样性tax_alpha.txt Rscript sd/kraken2alpha.R
--input kraken2/tax_count.mpa
--depth 0
--species kraken2/tax_count.txt
--normalize kraken2/tax_count.norm
--output kraken2/tax_count.alpha

# 绘制Alpha多样性指数,结果为输入文件+类型richness/chao1/ACE/shannon/simpson/invsimpson
# Rscript sd/alpha_boxplot.R -h # 查看参数 Rscript sd/alpha_boxplot.R
-i kraken2/tax_count.alpha
-a shannon
-d metadata.txt
-n Group
-o kraken2/
-w 89 -e 59
# 批量计算6种指数的箱线图+统计
for i in richness chao1 ACE shannon simpson invsimpson;do
Rscript sd/alpha_boxplot.R -i kraken2/tax_count.alpha -a
-d metadata.txt -n Group -w 89 -e 59
-o kraken2/
done

热图

# 转换为metaphalan2 spf格式,分隔符为下划线“
awk 'BEGIN{OFS=FS="\t"}{delete a; a["d"]="unclassified";a["p"]="unclassified";a["c"]="unclassified";a["o"]="unclassified";a["f"]="unclassified";a["g"]="unclassified";a["s"]="unclassified";
split(1,x,"|");for(i in x){split(x[i],b,"_");a[b[1]]=b[2];} \ print a["d"],a["p"],a["c"],a["o"],a["f"],a["g"],a["s"],0;}'
kraken2/tax_count.txt > temp.txt
cut -f 1-7,10- temp.txt > kraken2/tax_count.spf
sed -i '1 s/unclassified\tunclassified\tunclassified\tunclassified\tunclassified\tunclassified\tunclassified/Domain\tPhylum\tClass\tOrder\tFamily\tGenus\tSpecies/'
kraken2/tax_count.spf
# 绘制热图,可选域、门、纲、目、科、属、种(Domain Phylum Class Order Family Genus Species)
tax=Genus
Rscript sd/metaphlan_hclust_heatmap.R \ -i kraken2/tax_count.spf \ -t
-n 25
-w 118 -e 118
-o kraken2/heatmap
$

箱线图

# 绘制属水平Top30箱线图
Rscript sd/metaphlan_boxplot.R \ -i kraken2/tax_count.spf \ -t Genus \ -n 30 \ -o kraken2/boxplot_Genus # 绘制门水平Top10箱线图 Rscript sd/metaphlan_boxplot.R
-i kraken2/tax_count.spf
-t Phylum
-n 10 -w 6 -e 4
-o kraken2/boxplot_Phylum

物种kraken2-braken2

Alpha多样性

# 提取种级别注释并抽平至最小测序量,计算6种alpha多样性指数
# 查看帮助
Rscript sd/otutab_rare.R -h # -d指定最小样本量,默认0为最小值,抽平文件tax_norm.txt,alpha多样性tax_alpha. tax=S Rscript sd/otutab_rare.R
--input kraken2/bracken.{tax}.txt \ --depth 0 --seed 1 \ --normalize kraken2/bracken..norm
--output kraken2/bracken.$.alpha

# 绘制Alpha多样性指数,结果为输入文件+类型richness/chao1/ACE/shannon/simpson/invsimpson
# Rscript sd/alpha_boxplot.R -h # 查看参数 mkdir -p kraken2/
Rscript sd/alpha_boxplot.R \ -i kraken2/bracken..alpha
-a shannon
-d metadata.txt
-n Group
-o kraken2/{tax} \ -w 89 -e 59 # 批量计算6种指数的箱线图+统计 for i in richness chao1 ACE shannon simpson invsimpson;do Rscript sd/alpha_boxplot.R -i kraken2/bracken.{tax}.alpha -a
-d metadata.txt -n Group -w 89 -e 59
-o kraken2/$
done

Beta多样性

# Beta多样性距离矩阵计算
mkdir -p kraken2/beta/
/c/db/win/usearch -beta_div kraken2/bracken.$
.norm
-filename_prefix kraken2/beta/

# PCoA分析输入文件,选择分组,输出文件,图片尺寸mm,统计见beta_pcoa_stat.txt
# 可选距离有 bray_curtis, euclidean, jaccard, manhattan
dis=bray_curtis
Rscript sd/beta_pcoa.R \ --input kraken2/beta/.txt
--design metadata.txt
--group Group
--width 89 --height 59
--output kraken2/pcoa.$.pdf

堆叠柱状图

# 以门(P)/种(S)水平为例,结果包括output.sample/group.pdf两个文件
tax=P
Rscript {sd}/tax_stackplot.R \ --input kraken2/bracken..txt --design metadata.txt
--group Group --output kraken2/bracken.$.stackplot
--legend 10 --width 89 --height 59

附录

windows换行符处理

# 查看行尾是否有^M
cat -A sd/metaphlan_hclust_heatmap.R | head # 转换Windows为Linux换行符 dos2unix sd/metaphlan_hclust_heatmap.R

表格合并

# 按末列注释,必须名叫KO
Rscript $sd/mat_gene2ko.R
-i temp/dbcan2/gene_fam.count
-o dbcan2/fam_merge.count