展开

物种组成差异分析可视化

最后发布时间 : 2022-11-05 09:57:52 浏览量 :

KO-WT.txt

IDlog2FClog2CPMPValueFDRlevelMeanAMeanBKO1KO2KO3KO4KO5KO6WT1WT2WT3WT4WT5WT6
ASV_111-1.98911.0027.02361314832557e-060.00115187255632539Depleted0.0480.1940.0790.0480.0520.0460.0220.040.2220.2940.1920.2490.1420.064
ASV_60.96614.8851.69931029653765e-050.00139343444316087Enriched2.6071.362.6651.8682.2882.3493.5542.9151.4091.4771.2851.4761.2441.271
ASV_721.22112.1380.000371921590674530.020331713623541Enriched0.3960.1710.3530.2010.3280.6310.290.5750.1420.1780.1250.2250.1970.161
ASV_381.01813.060.000532016171818320.0218126630445512Enriched0.7360.3660.7590.4510.4470.7980.8271.1340.3640.3940.2750.4170.4090.338
ASV_1471.2111.1440.00084710964940710.0277851965005532Enriched0.1870.0810.2470.1770.1640.1670.1810.1840.0790.1470.050.0560.0390.113
ASV_146-1.30811.0030.001454325475769940.0350184329059697Depleted0.0690.1750.1060.0160.0970.0760.0290.0880.1030.2010.20.2490.150.145

火山图

diff = read.table("results/compare/KO-WT.txt", header=T, row.names=1, sep="\t", comment.char="")
diffN = as.data.frame(table(diff$level))
ggplot(diff, aes(x=log2FC, y=log2CPM, color=level)) +
  geom_point() + xlim(-4, 4) + theme_classic()+
  scale_colour_manual(values=c("green","red","grey")) +
  labs(x="log2(fold change)", y="log2(count per million)",
       title="KO-WT")+
  annotate("text",x=-3,y=15,label=table(diff$level)[1])+
  annotate("text",x=3,y=15,label=table(diff$level)[2]) +
  theme(panel.background = element_blank(),
                   panel.grid = element_blank(),
                   axis.line.x = element_line(size = .5, colour = "black"),
                   axis.line.y = element_line(size = .5, colour = "black"),
                   axis.ticks = element_line(size = .5, color = "black"),
                   axis.text = element_text(size = 7, color = "black"),
                   legend.position = "right",
                   legend.background = element_blank(),
                   legend.key = element_blank(),
                   legend.text = element_text(size = 7),
                   text = element_text(family = "sans", size = 7))

生信小木屋

热图

taxonomy.txt

OTUIDKingdomPhylumClassOrderFamilyGenusSpecies
ASV_1BacteriaActinobacteriaActinobacteriaStreptosporangialesThermomonosporaceaeActinocoralliaUnassigned
ASV_2BacteriaProteobacteriaBetaproteobacteriaBurkholderialesComamonadaceaePelomonasUnassigned
ASV_3BacteriaProteobacteriaBetaproteobacteriaBurkholderialesComamonadaceaeRhizobacterUnassigned
ASV_4BacteriaProteobacteriaBetaproteobacteriaBurkholderialesComamonadaceaeRhizobacterUnassigned
ASV_8BacteriaActinobacteriaActinobacteriaStreptomycetalesStreptomycetaceaeStreptomycesUnassigned
ASV_6BacteriaProteobacteriaBetaproteobacteriaBurkholderialesComamonadaceaePiscinibacterUnassigned
library(pheatmap)
input = read.table("results/compare/KO-WT.txt", header=T, row.names=1, sep="\t", comment.char="")
input = subset(input, level %in% c("Enriched","Depleted"))
design = read.table("data/metadata.txt", header=T, row.names=1, sep="\t", comment.char="")
taxonomy = read.table("results/taxonomy.txt", sep = "\t", row.names=1, header=T)

design$group = design$Group
norm = input[,-(1:7)]
if (dim(norm)[1]>1){
   idx = rownames(design) %in% colnames(norm)
   design = design[idx,]

   anno_row = data.frame(Level = input$level, row.names = rownames(input))
   anno_col = data.frame(Group = design$Group, row.names = rownames(design))
   input$Phylum = taxonomy[rownames(input),"Phylum"]
   anno_row = data.frame(Level = input$level, Taxonomy = input$Phylum, row.names = rownames(input))
}
pheatmap(norm,
         scale = "row",
         cutree_rows=2,cutree_cols = 2,
         cluster_cols = T,
         annotation_col = anno_col,
         annotation_row = anno_row,
      #    filename = paste("${output}", ".heatmap.pdf", sep=""),
      #   width=$width, height=$height,
         annotation_names_row= T,annotation_names_col=T,
         show_rownames=T,show_colnames=T,
      #   fontsize=$text_size,
         display_numbers=F)

生信小木屋

曼哈顿图

x = read.table("results/compare/KO-WT.txt", header=T, row.names=1, sep="\t", comment.char="")
x = x[,1:7]
x = na.omit(x)
# 添加物种注释
taxonomy = read.table("results/taxonomy.txt", sep = "\t", row.names=1, header=T)
taxonomy = taxonomy[rownames(x),]
x = cbind(x, taxonomy)

# P值求负对数
x$neglogp = -log10(x$PValue)

x$otu=rownames(x)
x = arrange(x, Kingdom, Phylum, Class, Order, Family, Genus, Species)
x$otu = factor(x$otu, levels=x$otu)   # set x order
x$num = 1:dim(x)[1]

per= read.delim("results/tax/sum_p.txt", sep = "\t", row.names=1, header=T)
mean = rowMeans(per)
per = as.data.frame(mean[order(mean, decreasing = T)])
top_tax=head(rownames(per), n=9)
x$tax = x$Phylum

if (length(unique(x$tax)) > length(top_tax)){
	x[!(x$tax %in% top_tax),]$tax = "Low Abundance" # no level can get value
}

# 调置标签顺序
label = unique(x$tax)
label = label[!(label %in% "Low Abundance")] # 删除低丰度
x$tax = factor(x$tax, levels = c(label, "Low Abundance"))
# 计算标签中位数
temp = x[x$tax %in% label, c("tax","num")]
mat_mean = aggregate(temp[,-1], by=temp[1], FUN=mean) # mean



# 调整过大的负对数
if (max(x$neglogp)>20){
  x[x$neglogp>20,]$neglogp = 20
}

colnames(x)[2]="log2CPM"

# Manhattan plot
FDR = min(x$neglogp[x$level!="NotSig"])

main_theme = theme(panel.background=element_blank(), panel.grid=element_blank(),
	axis.line.x=element_line(size=.5, colour="black"), axis.line.y=element_line(size=.5, colour="black"),
	axis.ticks=element_line(color="black"), axis.text=element_text(color="black", size=7),
	legend.position="right", legend.background=element_blank(), legend.key=element_blank(), legend.text= element_text(size=7),
	text=element_text(family="sans", size=7))

p = ggplot(x, aes(x=num, y=neglogp, color=tax, size=log2CPM, shape=level)) +
  geom_point(alpha=.7) +
  geom_hline(yintercept=FDR, linetype=2, color="lightgrey") +
  scale_shape_manual(values=c(25, 17, 20))+
  scale_size(breaks=c(5, 10, 15)) +
  labs(x="Phylum of Features", y="-log10(P)") +
  main_theme +
  # theme(legend.position="top") +
  scale_x_continuous(breaks=mat_mean$x, labels=mat_mean$tax) +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) # + ylim(0,${ymax})
  # ylim保持多组y轴范围一致,一般在最终出版时才使用
p

生信小木屋