物种组成差异分析可视化
最后发布时间 : 2022-11-05 09:57:52
浏览量 :
KO-WT.txt
ID | log2FC | log2CPM | PValue | FDR | level | MeanA | MeanB | KO1 | KO2 | KO3 | KO4 | KO5 | KO6 | WT1 | WT2 | WT3 | WT4 | WT5 | WT6 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ASV_111 | -1.989 | 11.002 | 7.02361314832557e-06 | 0.00115187255632539 | Depleted | 0.048 | 0.194 | 0.079 | 0.048 | 0.052 | 0.046 | 0.022 | 0.04 | 0.222 | 0.294 | 0.192 | 0.249 | 0.142 | 0.064 |
ASV_6 | 0.966 | 14.885 | 1.69931029653765e-05 | 0.00139343444316087 | Enriched | 2.607 | 1.36 | 2.665 | 1.868 | 2.288 | 2.349 | 3.554 | 2.915 | 1.409 | 1.477 | 1.285 | 1.476 | 1.244 | 1.271 |
ASV_72 | 1.221 | 12.138 | 0.00037192159067453 | 0.020331713623541 | Enriched | 0.396 | 0.171 | 0.353 | 0.201 | 0.328 | 0.631 | 0.29 | 0.575 | 0.142 | 0.178 | 0.125 | 0.225 | 0.197 | 0.161 |
ASV_38 | 1.018 | 13.06 | 0.00053201617181832 | 0.0218126630445512 | Enriched | 0.736 | 0.366 | 0.759 | 0.451 | 0.447 | 0.798 | 0.827 | 1.134 | 0.364 | 0.394 | 0.275 | 0.417 | 0.409 | 0.338 |
ASV_147 | 1.21 | 11.144 | 0.0008471096494071 | 0.0277851965005532 | Enriched | 0.187 | 0.081 | 0.247 | 0.177 | 0.164 | 0.167 | 0.181 | 0.184 | 0.079 | 0.147 | 0.05 | 0.056 | 0.039 | 0.113 |
ASV_146 | -1.308 | 11.003 | 0.00145432547576994 | 0.0350184329059697 | Depleted | 0.069 | 0.175 | 0.106 | 0.016 | 0.097 | 0.076 | 0.029 | 0.088 | 0.103 | 0.201 | 0.2 | 0.249 | 0.15 | 0.145 |
火山图
diff = read.table("results/compare/KO-WT.txt", header=T, row.names=1, sep="\t", comment.char="")
diffN = as.data.frame(table(diff$level))
ggplot(diff, aes(x=log2FC, y=log2CPM, color=level)) +
geom_point() + xlim(-4, 4) + theme_classic()+
scale_colour_manual(values=c("green","red","grey")) +
labs(x="log2(fold change)", y="log2(count per million)",
title="KO-WT")+
annotate("text",x=-3,y=15,label=table(diff$level)[1])+
annotate("text",x=3,y=15,label=table(diff$level)[2]) +
theme(panel.background = element_blank(),
panel.grid = element_blank(),
axis.line.x = element_line(size = .5, colour = "black"),
axis.line.y = element_line(size = .5, colour = "black"),
axis.ticks = element_line(size = .5, color = "black"),
axis.text = element_text(size = 7, color = "black"),
legend.position = "right",
legend.background = element_blank(),
legend.key = element_blank(),
legend.text = element_text(size = 7),
text = element_text(family = "sans", size = 7))
热图
taxonomy.txt
OTUID | Kingdom | Phylum | Class | Order | Family | Genus | Species |
---|---|---|---|---|---|---|---|
ASV_1 | Bacteria | Actinobacteria | Actinobacteria | Streptosporangiales | Thermomonosporaceae | Actinocorallia | Unassigned |
ASV_2 | Bacteria | Proteobacteria | Betaproteobacteria | Burkholderiales | Comamonadaceae | Pelomonas | Unassigned |
ASV_3 | Bacteria | Proteobacteria | Betaproteobacteria | Burkholderiales | Comamonadaceae | Rhizobacter | Unassigned |
ASV_4 | Bacteria | Proteobacteria | Betaproteobacteria | Burkholderiales | Comamonadaceae | Rhizobacter | Unassigned |
ASV_8 | Bacteria | Actinobacteria | Actinobacteria | Streptomycetales | Streptomycetaceae | Streptomyces | Unassigned |
ASV_6 | Bacteria | Proteobacteria | Betaproteobacteria | Burkholderiales | Comamonadaceae | Piscinibacter | Unassigned |
library(pheatmap)
input = read.table("results/compare/KO-WT.txt", header=T, row.names=1, sep="\t", comment.char="")
input = subset(input, level %in% c("Enriched","Depleted"))
design = read.table("data/metadata.txt", header=T, row.names=1, sep="\t", comment.char="")
taxonomy = read.table("results/taxonomy.txt", sep = "\t", row.names=1, header=T)
design$group = design$Group
norm = input[,-(1:7)]
if (dim(norm)[1]>1){
idx = rownames(design) %in% colnames(norm)
design = design[idx,]
anno_row = data.frame(Level = input$level, row.names = rownames(input))
anno_col = data.frame(Group = design$Group, row.names = rownames(design))
input$Phylum = taxonomy[rownames(input),"Phylum"]
anno_row = data.frame(Level = input$level, Taxonomy = input$Phylum, row.names = rownames(input))
}
pheatmap(norm,
scale = "row",
cutree_rows=2,cutree_cols = 2,
cluster_cols = T,
annotation_col = anno_col,
annotation_row = anno_row,
# filename = paste("${output}", ".heatmap.pdf", sep=""),
# width=$width, height=$height,
annotation_names_row= T,annotation_names_col=T,
show_rownames=T,show_colnames=T,
# fontsize=$text_size,
display_numbers=F)
曼哈顿图
x = read.table("results/compare/KO-WT.txt", header=T, row.names=1, sep="\t", comment.char="")
x = x[,1:7]
x = na.omit(x)
# 添加物种注释
taxonomy = read.table("results/taxonomy.txt", sep = "\t", row.names=1, header=T)
taxonomy = taxonomy[rownames(x),]
x = cbind(x, taxonomy)
# P值求负对数
x$neglogp = -log10(x$PValue)
x$otu=rownames(x)
x = arrange(x, Kingdom, Phylum, Class, Order, Family, Genus, Species)
x$otu = factor(x$otu, levels=x$otu) # set x order
x$num = 1:dim(x)[1]
per= read.delim("results/tax/sum_p.txt", sep = "\t", row.names=1, header=T)
mean = rowMeans(per)
per = as.data.frame(mean[order(mean, decreasing = T)])
top_tax=head(rownames(per), n=9)
x$tax = x$Phylum
if (length(unique(x$tax)) > length(top_tax)){
x[!(x$tax %in% top_tax),]$tax = "Low Abundance" # no level can get value
}
# 调置标签顺序
label = unique(x$tax)
label = label[!(label %in% "Low Abundance")] # 删除低丰度
x$tax = factor(x$tax, levels = c(label, "Low Abundance"))
# 计算标签中位数
temp = x[x$tax %in% label, c("tax","num")]
mat_mean = aggregate(temp[,-1], by=temp[1], FUN=mean) # mean
# 调整过大的负对数
if (max(x$neglogp)>20){
x[x$neglogp>20,]$neglogp = 20
}
colnames(x)[2]="log2CPM"
# Manhattan plot
FDR = min(x$neglogp[x$level!="NotSig"])
main_theme = theme(panel.background=element_blank(), panel.grid=element_blank(),
axis.line.x=element_line(size=.5, colour="black"), axis.line.y=element_line(size=.5, colour="black"),
axis.ticks=element_line(color="black"), axis.text=element_text(color="black", size=7),
legend.position="right", legend.background=element_blank(), legend.key=element_blank(), legend.text= element_text(size=7),
text=element_text(family="sans", size=7))
p = ggplot(x, aes(x=num, y=neglogp, color=tax, size=log2CPM, shape=level)) +
geom_point(alpha=.7) +
geom_hline(yintercept=FDR, linetype=2, color="lightgrey") +
scale_shape_manual(values=c(25, 17, 20))+
scale_size(breaks=c(5, 10, 15)) +
labs(x="Phylum of Features", y="-log10(P)") +
main_theme +
# theme(legend.position="top") +
scale_x_continuous(breaks=mat_mean$x, labels=mat_mean$tax) +
theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) # + ylim(0,${ymax})
# ylim保持多组y轴范围一致,一般在最终出版时才使用
p
-
wangyang2022-11-06 22:24:06删除 回复 取消回复
-
wangyang 回复:wangyang2022-11-06 22:59:54删除 回复 取消回复
-
-
wangyang2022-11-06 22:25:03删除 回复 取消回复
-
wangyang 回复:wangyang2022-11-06 22:39:56删除 回复 取消回复
-
wangyang 回复:wangyang2022-11-06 22:51:02删除 回复 取消回复
-
wangyang 回复:wangyang2022-11-06 22:55:31删除 回复 取消回复
-
zhangyudan 回复:wangyang2022-11-06 23:01:17删除 回复 取消回复
-
-
zhangyudan2022-11-06 22:29:55删除 回复 取消回复
-
test 回复:zhangyudan2022-11-06 22:30:25删除 回复 取消回复
-