物种组成差异分析可视化

最后发布时间 : 2022-11-05 09:57:52 浏览量 :

KO-WT.txt

ID	log2FC	log2CPM	PValue	FDR	level	MeanA	MeanB	KO1	KO2	KO3	KO4	KO5	KO6	WT1	WT2	WT3	WT4	WT5	WT6
ASV_111	-1.989	11.002	7.02361314832557e-06	0.00115187255632539	Depleted	0.048	0.194	0.079	0.048	0.052	0.046	0.022	0.04	0.222	0.294	0.192	0.249	0.142	0.064
ASV_6	0.966	14.885	1.69931029653765e-05	0.00139343444316087	Enriched	2.607	1.36	2.665	1.868	2.288	2.349	3.554	2.915	1.409	1.477	1.285	1.476	1.244	1.271
ASV_72	1.221	12.138	0.00037192159067453	0.020331713623541	Enriched	0.396	0.171	0.353	0.201	0.328	0.631	0.29	0.575	0.142	0.178	0.125	0.225	0.197	0.161
ASV_38	1.018	13.06	0.00053201617181832	0.0218126630445512	Enriched	0.736	0.366	0.759	0.451	0.447	0.798	0.827	1.134	0.364	0.394	0.275	0.417	0.409	0.338
ASV_147	1.21	11.144	0.0008471096494071	0.0277851965005532	Enriched	0.187	0.081	0.247	0.177	0.164	0.167	0.181	0.184	0.079	0.147	0.05	0.056	0.039	0.113
ASV_146	-1.308	11.003	0.00145432547576994	0.0350184329059697	Depleted	0.069	0.175	0.106	0.016	0.097	0.076	0.029	0.088	0.103	0.201	0.2	0.249	0.15	0.145

火山图

diff = read.table("results/compare/KO-WT.txt", header=T, row.names=1, sep="\t", comment.char="")
diffN = as.data.frame(table(diff$level))
ggplot(diff, aes(x=log2FC, y=log2CPM, color=level)) +
  geom_point() + xlim(-4, 4) + theme_classic()+
  scale_colour_manual(values=c("green","red","grey")) +
  labs(x="log2(fold change)", y="log2(count per million)",
       title="KO-WT")+
  annotate("text",x=-3,y=15,label=table(diff$level)[1])+
  annotate("text",x=3,y=15,label=table(diff$level)[2]) +
  theme(panel.background = element_blank(),
                   panel.grid = element_blank(),
                   axis.line.x = element_line(size = .5, colour = "black"),
                   axis.line.y = element_line(size = .5, colour = "black"),
                   axis.ticks = element_line(size = .5, color = "black"),
                   axis.text = element_text(size = 7, color = "black"),
                   legend.position = "right",
                   legend.background = element_blank(),
                   legend.key = element_blank(),
                   legend.text = element_text(size = 7),
                   text = element_text(family = "sans", size = 7))

热图

taxonomy.txt

OTUID	Kingdom	Phylum	Class	Order	Family	Genus	Species
ASV_1	Bacteria	Actinobacteria	Actinobacteria	Streptosporangiales	Thermomonosporaceae	Actinocorallia	Unassigned
ASV_2	Bacteria	Proteobacteria	Betaproteobacteria	Burkholderiales	Comamonadaceae	Pelomonas	Unassigned
ASV_3	Bacteria	Proteobacteria	Betaproteobacteria	Burkholderiales	Comamonadaceae	Rhizobacter	Unassigned
ASV_4	Bacteria	Proteobacteria	Betaproteobacteria	Burkholderiales	Comamonadaceae	Rhizobacter	Unassigned
ASV_8	Bacteria	Actinobacteria	Actinobacteria	Streptomycetales	Streptomycetaceae	Streptomyces	Unassigned
ASV_6	Bacteria	Proteobacteria	Betaproteobacteria	Burkholderiales	Comamonadaceae	Piscinibacter	Unassigned

library(pheatmap)
input = read.table("results/compare/KO-WT.txt", header=T, row.names=1, sep="\t", comment.char="")
input = subset(input, level %in% c("Enriched","Depleted"))
design = read.table("data/metadata.txt", header=T, row.names=1, sep="\t", comment.char="")
taxonomy = read.table("results/taxonomy.txt", sep = "\t", row.names=1, header=T)

design$group = design$Group
norm = input[,-(1:7)]
if (dim(norm)[1]>1){
   idx = rownames(design) %in% colnames(norm)
   design = design[idx,]

   anno_row = data.frame(Level = input$level, row.names = rownames(input))
   anno_col = data.frame(Group = design$Group, row.names = rownames(design))
   input$Phylum = taxonomy[rownames(input),"Phylum"]
   anno_row = data.frame(Level = input$level, Taxonomy = input$Phylum, row.names = rownames(input))
}
pheatmap(norm,
         scale = "row",
         cutree_rows=2,cutree_cols = 2,
         cluster_cols = T,
         annotation_col = anno_col,
         annotation_row = anno_row,
      #    filename = paste("${output}", ".heatmap.pdf", sep=""),
      #   width=$width, height=$height,
         annotation_names_row= T,annotation_names_col=T,
         show_rownames=T,show_colnames=T,
      #   fontsize=$text_size,
         display_numbers=F)

曼哈顿图

x = read.table("results/compare/KO-WT.txt", header=T, row.names=1, sep="\t", comment.char="")
x = x[,1:7]
x = na.omit(x)
# 添加物种注释
taxonomy = read.table("results/taxonomy.txt", sep = "\t", row.names=1, header=T)
taxonomy = taxonomy[rownames(x),]
x = cbind(x, taxonomy)

# P值求负对数
x$neglogp = -log10(x$PValue)

x$otu=rownames(x)
x = arrange(x, Kingdom, Phylum, Class, Order, Family, Genus, Species)
x$otu = factor(x$otu, levels=x$otu)   # set x order
x$num = 1:dim(x)[1]

per= read.delim("results/tax/sum_p.txt", sep = "\t", row.names=1, header=T)
mean = rowMeans(per)
per = as.data.frame(mean[order(mean, decreasing = T)])
top_tax=head(rownames(per), n=9)
x$tax = x$Phylum

if (length(unique(x$tax)) > length(top_tax)){
	x[!(x$tax %in% top_tax),]$tax = "Low Abundance" # no level can get value
}

# 调置标签顺序
label = unique(x$tax)
label = label[!(label %in% "Low Abundance")] # 删除低丰度
x$tax = factor(x$tax, levels = c(label, "Low Abundance"))
# 计算标签中位数
temp = x[x$tax %in% label, c("tax","num")]
mat_mean = aggregate(temp[,-1], by=temp[1], FUN=mean) # mean



# 调整过大的负对数
if (max(x$neglogp)>20){
  x[x$neglogp>20,]$neglogp = 20
}

colnames(x)[2]="log2CPM"

# Manhattan plot
FDR = min(x$neglogp[x$level!="NotSig"])

main_theme = theme(panel.background=element_blank(), panel.grid=element_blank(),
	axis.line.x=element_line(size=.5, colour="black"), axis.line.y=element_line(size=.5, colour="black"),
	axis.ticks=element_line(color="black"), axis.text=element_text(color="black", size=7),
	legend.position="right", legend.background=element_blank(), legend.key=element_blank(), legend.text= element_text(size=7),
	text=element_text(family="sans", size=7))

p = ggplot(x, aes(x=num, y=neglogp, color=tax, size=log2CPM, shape=level)) +
  geom_point(alpha=.7) +
  geom_hline(yintercept=FDR, linetype=2, color="lightgrey") +
  scale_shape_manual(values=c(25, 17, 20))+
  scale_size(breaks=c(5, 10, 15)) +
  labs(x="Phylum of Features", y="-log10(P)") +
  main_theme +
  # theme(legend.position="top") +
  scale_x_continuous(breaks=mat_mean$x, labels=mat_mean$tax) +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) # + ylim(0,${ymax})
  # ylim保持多组y轴范围一致，一般在最终出版时才使用
p

物种组成可视化 EasyMicroPlot

wangyang

2022-11-06 22:24:06

删除回复取消回复
- wangyang 回复：wangyang
  
  2022-11-06 22:59:54
  
  删除回复取消回复

wangyang

2022-11-06 22:25:03

删除回复取消回复
- wangyang 回复：wangyang
  
  2022-11-06 22:39:56
  
  删除回复取消回复
- wangyang 回复：wangyang
  
  2022-11-06 22:51:02
  
  删除回复取消回复
- wangyang 回复：wangyang
  
  2022-11-06 22:55:31
  
  删除回复取消回复
- zhangyudan 回复：wangyang
  
  2022-11-06 23:01:17
  
  删除回复取消回复

zhangyudan

2022-11-06 22:29:55

删除回复取消回复
- test 回复：zhangyudan
  
  2022-11-06 22:30:25
  
  删除回复取消回复