ORA
最后发布时间 : 2024-11-30 20:07:34
浏览量 :
Biomedical Knowledge Mining using GOSemSim and clusterProfiler
KEGG
devtools::install_github("BioinfoFungi/enrichplot2")
library(clusterProfiler)
library(org.Mm.eg.db)
library(KEGG.db)
annotations_orgDb <- AnnotationDbi::select(org.Mm.eg.db,
keys = rownames(wt_control_deg),
columns = c("ENTREZID","GENENAME"),
keytype = "SYMBOL")
kegg<- clusterProfiler::enrichKEGG(gene =annotations_orgDb$ENTREZID, organism = "mmu",
keyType = "SYMBOL",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
minGSSize = 10,
maxGSSize = 500,
qvalueCutoff = 10000,
use_internal_data = TRUE)
kegg <- setReadable(kegg, 'org.Mm.eg.db', 'ENTREZID')
png(filename = paste0(wt_control_vs_ko_control,"/KEGG.png"),width = 7,height = 6,res = 300,units = "in")
dotplot(kegg,showCategory=20,color="pvalue")
dev.off()
png(filename = paste0(wt_control_vs_ko_control,"/heat.png"),width = 10,height =5,res = 300,units = "in")
heatplot(kegg,showCategory=20,color="pvalue")
dev.off()
write_tsv(arrange(kegg@result,pvalue), file=paste0(wt_control_vs_ko_control,"/KEGG.tsv"))
注意
pvalueCutoff=10000
,dotplot
默认使用在enrichKEGG
函数中设置的pvalueCutoff = 0.05
筛选显著富集的通路。及使用0.05的阈值筛选矫正后的pvalue,进行可视化的。因此如果基因太少则不能富集到通路,此时可以将改值调大,或者修改dotplot
函数的源码实现根据pvalue进行矫正。有关dotplot
源码相关内容见enrichplot源码阅读。
查看感兴趣基因与通路基因的交集
suppressMessages(library(KEGG.db))
suppressMessages(library(clusterProfiler))
suppressMessages(library(org.Mm.eg.db))
ls("package:KEGG.db")
frame = toTable(KEGGPATHID2EXTID)
x <- as.list(KEGG.db::KEGGPATHID2EXTID)
x["mmu04020"][[1]] |> length()
annotations <- AnnotationDbi::select(org.Mm.eg.db,
keys = x["mmu04020"][[1]] ,
columns = c("ENTREZID","SYMBOL"),
keytype = "ENTREZID") |>
na.omit()
annotations$SYMBOL |> length()
intersect(rownames(deg),annotations_foldchanges$SYMBOL)
ggvenn(
list("Calcium-signaling-pathway" = annotations_foldchanges$SYMBOL ,"mcherry-PKH vs mcherry deg" = rownames(deg) ),
stroke_size = 0.6, set_name_size = 2.8, text_size = 5,show_percentage=F
)
GO KEGG
library(clusterProfiler)
library(org.Hs.eg.db)
entrez_id <- bitr(top20_gene, fromType = "SYMBOL",
toType = c("ENTREZID"),
OrgDb = org.Hs.eg.db)%>%
pull("ENTREZID")%>%unique()
kegg<- clusterProfiler::enrichKEGG(gene =entrez_id,organism = 'hsa',pAdjustMethod="none")
kegg <- setReadable(kegg, 'org.Hs.eg.db', 'ENTREZID')
png(filename = "figure/KEGG.png",width = 7,height = 6,res = 300,units = "in")
dotplot(kegg,showCategory=20)
dev.off()
ego<-enrichGO(gene = entrez_id,OrgDb = org.Hs.eg.db,keyType = 'ENTREZID',ont = 'all',readable = T ,pAdjustMethod="BH")
png(filename = "figure/GO.png",width = 10,height = 9,res = 300,units = "in")
colorSel="qvalue"
dotplot(ego,showCategory = 10,split="ONTOLOGY",orderBy = "GeneRatio", color = colorSel) + facet_grid(ONTOLOGY~., scale='free')
dev.off()
KEGG本地化
https://github.com/YuLab-SMU/createKEGGdb
remotes::install_github("YuLab-SMU/createKEGGdb")
library(createKEGGdb)
species <-c("hsa","rno","mmu")
create_kegg_db(species)
install.packages("./KEGG.db_1.0.tar.gz", repos=NULL)
library(KEGG.db)
library(clusterProfiler)
data(geneList, package="DOSE")
gene <- names(geneList)[abs(geneList) > 2]
kk <- enrichKEGG(gene = gene,
organism = 'hsa',
pvalueCutoff = 0.05,
qvalueCutoff = 0.05,
use_internal_data =T)
合并GO结果
ALL
ONTOLOGY | ID | Description | GeneRatio | BgRatio | pvalue | p.adjust | qvalue | geneID | Count |
---|---|---|---|---|---|---|---|---|---|
BP | GO:0006641 | triglyceride metabolic process | 2/5 | 112/18112 | 3.744105884395701e-4 | 3.744105884395701e-4 | 0.009409082606148922 | Scd/Pnpla2 | 2 |
BP | GO:0006639 | acylglycerol metabolic process | 2/5 | 141/18112 | 5.92595747195959e-4 | 5.92595747195959e-4 | 0.009409082606148922 | Scd/Pnpla2 | 2 |
BP | GO:0006638 | neutral lipid metabolic process | 2/5 | 143/18112 | 6.09451941534646e-4 | 6.09451941534646e-4 | 0.009409082606148922 | Scd/Pnpla2 | 2 |
BP | GO:0055088 | lipid homeostasis | 2/5 | 171/18112 | 8.697839126288131e-4 | 8.697839126288131e-4 | 0.010071182146228362 | Scd/Pnpla2 | 2 |
BP | GO:0010898 | positive regulation of triglyceride catabolic process | 1/5 | 12/18112 | 0.0033086990035544606 | 0.0033086990035544606 | 0.016408231859088875 | Pnpla2 | 1 |
BP | GO:0048733 | sebaceous gland development | 1/5 | 13/18112 | 0.0035840280922274648 | 0.0035840280922274648 | 0.016408231859088875 | Scd | 1 |
BP
ID | Description | GeneRatio | BgRatio | pvalue | p.adjust | qvalue | geneID | Count |
---|---|---|---|---|---|---|---|---|
GO:0006641 | triglyceride metabolic process | 2/5 | 112/18112 | 3.744105884395701e-4 | 3.744105884395701e-4 | 0.009409082606148922 | Scd/Pnpla2 | 2 |
GO:0006639 | acylglycerol metabolic process | 2/5 | 141/18112 | 5.92595747195959e-4 | 5.92595747195959e-4 | 0.009409082606148922 | Scd/Pnpla2 | 2 |
GO:0006638 | neutral lipid metabolic process | 2/5 | 143/18112 | 6.09451941534646e-4 | 6.09451941534646e-4 | 0.009409082606148922 | Scd/Pnpla2 | 2 |
GO:0055088 | lipid homeostasis | 2/5 | 171/18112 | 8.697839126288131e-4 | 8.697839126288131e-4 | 0.010071182146228362 | Scd/Pnpla2 | 2 |
GO:0010898 | positive regulation of triglyceride catabolic process | 1/5 | 12/18112 | 0.0033086990035544606 | 0.0033086990035544606 | 0.016408231859088875 | Pnpla2 | 1 |
GO:0048733 | sebaceous gland development | 1/5 | 13/18112 | 0.0035840280922274648 | 0.0035840280922274648 | 0.016408231859088875 | Scd | 1 |