R语言ceRNA绘图代码
最后发布时间:2022-04-13 15:41:57
浏览量:
ceRNAs network analysis using GDCRNATools
GDCRNATools: integrative analysis of protein coding genes, long non-coding genes, and microRNAs in GDC
github
library(GDCRNATools)
####### ceRNA network analysis #######
deLNC <- c('ENSG00000260920','ENSG00000242125','ENSG00000261211')
dePC <- c('ENSG00000043355','ENSG00000109586','ENSG00000144355')
genes <- c(deLNC, dePC)
samples <- c('TCGA-2F-A9KO-01', 'TCGA-2F-A9KP-01',
'TCGA-2F-A9KQ-01', 'TCGA-2F-A9KR-01',
'TCGA-2F-A9KT-01', 'TCGA-2F-A9KW-01')
rnaExpr <- data.frame(matrix(c(2.7,7.0,4.9,6.9,4.6,2.5,
0.5,2.5,5.7,6.5,4.9,3.8,
2.1,2.9,5.9,5.7,4.5,3.5,
2.7,5.9,4.5,5.8,5.2,3.0,
2.5,2.2,5.3,4.4,4.4,2.9,
2.4,3.8,6.2,3.8,3.8,4.2),6,6),
stringsAsFactors=FALSE)
rownames(rnaExpr) <- genes
colnames(rnaExpr) <- samples
mirExpr <- data.frame(matrix(c(7.7,7.4,7.9,8.9,8.6,9.5,
5.1,4.4,5.5,8.5,4.4,3.5,
4.9,5.5,6.9,6.1,5.5,4.1,
12.4,13.5,15.1,15.4,13.0,12.8,
2.5,2.2,5.3,4.4,4.4,2.9,
2.4,2.7,6.2,1.5,4.4,4.2),6,6),
stringsAsFactors=FALSE)
colnames(mirExpr) <- samples
rownames(mirExpr) <- c('hsa-miR-340-5p','hsa-miR-181b-5p',
'hsa-miR-181a-5p', 'hsa-miR-181c-5p',
'hsa-miR-199b-5p','hsa-miR-182-5p')
rnaExpr[1:3,1:3]
mirExpr[1:3,1:3]
ceOutput <- gdcCEAnalysis(lnc = deLNC,
pc = dePC,
lnc.targets = 'starBase',
pc.targets = 'starBase',
rna.expr = rnaExpr,
mir.expr = mirExpr)
ceOutput
# ceOutput2 <- ceOutput[ceOutput$hyperPValue<0.01 &
# ceOutput$corPValue<0.01 & ceOutput$regSim != 0,]
edges <- gdcExportNetwork(ceNetwork = ceOutput, net = 'edges')
nodes <- gdcExportNetwork(ceNetwork = ceOutput, net = 'nodes')
edges
nodes
library(ggplot2)
library(ggridges)
head(diamonds)
ggplot(diamonds, aes(x = price, y = cut,fill=cut,
group=cut, height=..density..)) +
geom_density_ridges(scale = 4) +
scale_y_discrete(expand = c(0, 0)) + # will generally have to set the `expand` option
scale_x_continuous(expand = c(0, 0)) + # for both axes to remove unneeded padding
coord_cartesian(clip = "off") + # to avoid clipping of the very top of the top ridgeline
theme_ridges()
Venn
png(
file = "figure/ggvenn.png",
width = 15, height = 15, units = "cm", bg = "white", res = 300
)
ggvenn(
list("TCGA Mutation" = TCGA_mutation, "Sequencing Mutation" = my_mutation),
stroke_size = 0.5, set_name_size = 5, text_size = 5
)
dev.off()
################################################################################
#
# 食管癌GEO ceRNA
#
################################################################################
{
library("optparse")
library(tidyverse)
library(tidyverse)
library(ggrepel)
library(edgeR)
library(pheatmap)
library('SpidermiR')
library("ggvenn")
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggpubr)
library(lattice)
library(survivalROC)
option_list = list(make_option(c("-d","--directory"), type = "character", help = "test"))
args <- parse_args(OptionParser(option_list=option_list))
if(!is.null(args$directory)){
if(!file.exists(args$directory)){
message("请确保 ",args$directory," 存在!")
q()
}
setwd(args$directory)
message("切换工作目录到:",args$directory)
}
if(!file.exists("figures")){
dir.create(file.path("figures"))
}
if(!file.exists("figures/GEO_Volcano_heatmap")){
dir.create(file.path("figures/GEO_Volcano_heatmap"))
}
if(!file.exists("figures/venn")){
dir.create(file.path("figures/venn"))
}
if(!file.exists("figures/survival")){
dir.create(file.path("figures/survival"))
}
if(!file.exists("figures/cox")){
dir.create(file.path("figures/cox"))
}
if(!file.exists("result/enrichment")){
dir.create(file.path("result/enrichment"))
}
}
{
IS_DEBUG=T
LOG_FILENAME= "report_download.txt"
IS_EMPTY=T
LOG_FILENAME= source("/home/wangyang/workspace/bioinfo_analysis/Rscript/tools/tools.R")
}
####################################################
## 公共函数
###################################################
{
getDeg <- function(deg){
res <- deg %>% rownames_to_column("symbol") %>%
dplyr::select(symbol,logFC=logFC,Pvalue=adj.P.Val)
return(res)
}
Pheatmap <- function(deg,expr,filename){
deg_gene <- getDeg(deg)%>%
filter(abs(logFC)>2,Pvalue<0.01)%>%
arrange(desc(abs(logFC)))%>%
pull("symbol")
deg_expr <- expr[deg_gene,]%>%
{log10(cpm(.)+0.01)}
#deg_expr <- t(scale(t(deg_expr)))
#deg_expr[deg_expr > 2] = 2
#deg_expr[deg_expr < -2] = -2
col <- colorRampPalette(c("green","white","red"))(256)
pheatmap(deg_expr,col=col,fontsize=8,show_colnames =F,show_rownames = F,
filename = paste0("figures/",filename,".png"))
return(paste0("figures/",filename,".png"))
}
Volcano <-function(deg,filename,title){
# 火山图----------------------
getDeg(deg)%>%
mutate(direction = factor(ifelse(Pvalue < 0.01 & abs(logFC)>2,
ifelse(logFC>1,"Up","Down"),
"NS"),levels = c("Up","Down","NS"))) %>%
na.omit()%>%
ggplot(aes(x=logFC,y=-log10(Pvalue),colour=direction))+
geom_point(alpha=0.6) +
scale_color_manual(values = c("red","green","#000000"))+
xlab(expression(log[2]("Fold Change")))+
ylab(expression(-log[10]("P Value")))+
theme_bw()+
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5,size = 24, face= "bold"),
axis.title = element_text(size=20),
legend.text= element_text(size=20),
aspect.ratio=1)+
geom_vline(xintercept = c(-2,2),lty=2,col="black",lwd=0.6)+
geom_hline(yintercept = -log10(0.01),lty=2,col="black",lwd=0.6) +
labs(title = title)+
geom_text_repel(data=. %>%filter(Pvalue<0.001,abs(logFC)>2),
aes(label=symbol),
size=3,
segment.color="black",
show.legend = F)+
ggsave(filename = paste0("figures/",filename,".png"),width = 20,height = 15,units="cm")
return(paste0("figures/",filename,".png"))
}
get_mircode <- function(){
if(!file.exists("result/mircode.rda")){
mircode <- read.table("/home/wangyang/workspace/bioinfo_analysis/Rscript/data/mircode_highconsfamilies.txt",sep = "\t",header = T)
table(mircode$gene_class)
lncmiRcode = mircode[mircode$gene_class %in%c("lncRNA, intergenic","lncRNA, overlapping"),1:4]
head(lncmiRcode$microrna)
library(stringr)
p1 = str_starts(lncmiRcode$microrna,"miR-")
table(p1)
p2 = str_starts(lncmiRcode$microrna,"let-")
table(p2)
lncmiRcode_let <- lncmiRcode[p2,]%>%
dplyr::select(c("gene_symbol","microrna"))%>%
mutate(miRNA=str_replace_all(microrna,"let-",""))%>%
separate_rows(miRNA,sep = "/",convert = T)%>%
mutate_at(vars(contains("miRNA")), ~ paste0("hsa-let-",.))%>%
mutate(miRNA = str_extract(miRNA,"hsa-let-[0-9]+[a-z]?"))%>%
dplyr::select(c("gene_symbol","miRNA"))
lncmiRcode_miR <- lncmiRcode[p1,]%>%
dplyr::select(c("gene_symbol","microrna"))%>%
mutate(miRNA=str_replace_all(microrna,"miR-",""))%>%
separate_rows(miRNA,sep = "/",convert = T)%>%
mutate_at(vars(contains("miRNA")), ~ paste0("hsa-mir-",.))%>%
mutate(miRNA = str_extract(miRNA,"hsa-mir-[0-9]+[a-z]?"))%>%
dplyr::select(c("gene_symbol","miRNA"))
lncmiRcode_f <- rbind(lncmiRcode_let,lncmiRcode_miR)
saveRDS(lncmiRcode_f,file = "result/mircode.rda")
return(lncmiRcode_f)
}
return(readRDS("result/mircode.rda"))
}
Volcano_sig <-function(deg,title=NULL,filename=NULL,gene=NULL){
# 火山图----------------------
res <- getDeg(deg)%>%
mutate(direction = factor(ifelse(Pvalue < 0.01 & abs(logFC)>2,
ifelse(logFC>1,"Up","Down"),
"NS"),levels = c("Up","Down","NS"))) %>%
na.omit()%>%
ggplot(aes(x=logFC,y=-log10(Pvalue),colour=direction))+
geom_point(alpha=0.6) +
scale_color_manual(values = c("red","green","#000000"))+
xlab(expression(log[2]("Fold Change")))+
ylab(expression(-log[10]("P Value")))+
theme_bw()+
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5,size = 24, face= "bold"),
axis.title = element_text(size=20),
legend.text= element_text(size=20),
aspect.ratio=1)+
geom_vline(xintercept = c(-2,2),lty=2,col="black",lwd=0.6)+
geom_hline(yintercept = -log10(0.01),lty=2,col="black",lwd=0.6)
if(!is.null(title)){
res <- res+labs(title = title)
}
if(!is.null(gene)){
res <- res + geom_text_repel(data=. %>%filter(symbol %in% gene),
aes(label=symbol),
size=3,
segment.color="black",
show.legend = F)
}
if(!is.null(filename)){
res <- res + ggsave(filename = paste0("figures/",filename,".png"),width = 20,height = 15,units="cm")
}
return(res)
}
basesave <- function(expr, filename, print=T) {
#extension
exten = stringr::str_match(filename, "\\.(\\w+)$")[, 2]
switch(exten,
png = {
png(filename)
eval(expr, envir = parent.frame())
dev.off()
},
{stop("filetype not recognized")})
#print?
if (print) eval(expr, envir = parent.frame())
invisible(NULL)
}
}
#####################################################
## 热图火山图
#####################################################
(function(){
deg <- readRDS("result/limma_mRNA_deg.rds")
Volcano(deg,"GEO_Volcano_heatmap/1_mRNA_Volcano",title = "mRNA Volcano")
expr <- readRDS("result/gset_expr_annot_mRNA_GSE89102.rda")
Pheatmap(deg,expr,filename="GEO_Volcano_heatmap/1_mRNA_pheatmap")
deg <- readRDS("result/limma_lnRNA_deg.rds")
Volcano(deg,"GEO_Volcano_heatmap/1_lnRNA_Volcano",title = "lnRNA Volcano")
expr <- readRDS("result/gset_expr_annot_lnRNA_GSE89102.rda")
Pheatmap(deg,expr,filename="GEO_Volcano_heatmap/1_lnRNA_pheatmap")
deg <- readRDS("result/limma_miRNA_deg_2.rds")
Volcano(deg,"GEO_Volcano_heatmap/1_miRNA_Volcano",title = "miRNA Volcano")
if(F){
res <- getDeg(deg)%>%
mutate(direction = factor(ifelse(Pvalue < 0.05 & abs(logFC)>2,
ifelse(logFC>1,"Up","Down"),
"NS"),levels = c("Up","Down","NS"))) %>%
na.omit()%>%
ggplot(aes(x=logFC,y=-log10(Pvalue),colour=direction))+
geom_point(alpha=0.6) +
scale_color_manual(values = c("red","green","#000000"))+
xlab(expression(log[2]("Fold Change")))+
ylab(expression(-log[10]("P Value")))+
theme_bw()+
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5,size = 24, face= "bold"),
axis.title = element_text(size=20),
legend.text= element_text(size=20),
aspect.ratio=1)+
geom_vline(xintercept = c(-2,2),lty=2,col="black",lwd=0.6)+
geom_hline(yintercept = -log10(0.05),lty=2,col="black",lwd=0.6)
print(res)
}
expr <- readRDS("result/gset_expr_annot_miRNA_GSE59973.rda")
Pheatmap(deg,expr,filename="GEO_Volcano_heatmap/1_miRNA_pheatmap")
if(F){
deg_gene <- getDeg(deg)%>%
filter(abs(logFC)>2,Pvalue<0.05)%>%
arrange(desc(abs(logFC)))%>%
pull("symbol")
deg_expr <- expr[deg_gene,]
#deg_expr <- t(scale(t(deg_expr)))
#deg_expr[deg_expr > 2] = 2
#deg_expr[deg_expr < -2] = -2
col <- colorRampPalette(c("green","white","red"))(256)
pheatmap(deg_expr,col=col,fontsize=8,show_colnames =F,show_rownames = F)
# return(paste0("figures/",filename,".png"))
}
})()
if(F){
deg <- readRDS("result/miRNA_iso_expr_DESeq.rda")
Volcano(deg,"1_miRNA_iso_Volcano",title = "miRNA Volcano")
expr <- readRDS("result/miRNA_iso_expr.rda")
Pheatmap(deg,expr,filename="1_miRNA_iso_pheatmap")
deg_gene <- getDeg(deg)%>%
filter(abs(logFC)>1,Pvalue<0.05)%>%
arrange(desc(abs(logFC)))%>%
pull("symbol")
deg_expr <- expr[deg_gene,]%>%
{log10(cpm(.)+0.01)}
#deg_expr <- t(scale(t(deg_expr)))
#deg_expr[deg_expr > 2] = 2
#deg_expr[deg_expr < -2] = -2
col <- colorRampPalette(c("green","white","red"))(256)
pheatmap(deg_expr,col=col,fontsize=8,show_colnames =F,show_rownames = F,
filename = "figures//1_miRNA_iso_pheatmap.png")
}
#############################################
## 差异基因提取 变量
#############################################
miRNA_deg_sig <- getDeg(readRDS("result/limma_miRNA_deg_2.rda"))%>%
filter(abs(logFC)>2,Pvalue<0.01)%>%
mutate(symbol=sub("R","r",symbol))%>%
mutate(symbol= case_when(grepl("hsa-mir",symbol)~str_extract(symbol,"hsa-mir-[0-9]+[a-z]?"),
grepl("hsa-let",symbol)~str_extract(symbol,"hsa-let-[0-9]+[a-z]?")))%>%
arrange(Pvalue)
report("以abs(logFC)>2 & Pvalue<0.01 为条件筛选差异miRNA共 ",length(unique(miRNA_deg_sig$symbol))," 个")
lncRNA_deg_sig <- getDeg(readRDS("result/limma_lnRNA_deg.rda"))%>%
filter(abs(logFC)>2,Pvalue<0.01)
cat("以abs(logFC)>2 & Pvalue<0.01 为条件筛选差异lncRNA共 ",length(unique(lncRNA_deg_sig$symbol))," 个")
mRNA_deg_sig <- getDeg(readRDS("result/limma_mRNA_deg.rda"))%>%
filter(abs(logFC)>2,Pvalue<0.01)
cat("以abs(logFC)>2 & Pvalue<0.01 为条件筛选差异mRNA共 ",length(unique(mRNA_deg_sig$symbol))," 个")
lnRNA_expr <- readRDS("result/lncRNA_expr.rda")%>%
dplyr::select(!matches("TCGA-([A-Z 0-9]*?)-([A-Z 0-9]*?)-(11.*?)-(.*?)"))%>%
rownames_to_column("symbol")%>%
mutate(symbol=str_extract(symbol,"[A-Z 0-9 -]+"))
lnRNA_expr <- lnRNA_expr[!duplicated(lnRNA_expr$symbol),]%>%
rownames_to_column("id")%>%
select(-id)%>%
column_to_rownames("symbol")
sum(rownames(lnRNA_expr) %in% lncRNA_deg_sig$symbol)
lnRNA_expr <- lnRNA_expr[rownames(lnRNA_expr) %in% lncRNA_deg_sig$symbol,]
mRNA_expr <- readRDS("result/mRNA_expr.rda")%>%
dplyr::select(!matches("TCGA-([A-Z 0-9]*?)-([A-Z 0-9]*?)-(11.*?)-(.*?)"))
miRNA_expr <- readRDS("result/miRNA_expr.rda")%>%
dplyr::select(!matches("TCGA-([A-Z 0-9]*?)-([A-Z 0-9]*?)-(11.*?)-(.*?)"))%>%
rownames_to_column("symbol")%>%
mutate(symbol= case_when(grepl("hsa-mir",symbol)~str_extract(symbol,"hsa-mir-[0-9]+[a-z]?"),
grepl("hsa-let",symbol)~str_extract(symbol,"hsa-let-[0-9]+[a-z]?")))%>%
aggregate(.~symbol,.,mean)%>%
column_to_rownames("symbol")
#############################################
## lnRNA ralated miRNA
#############################################
## 获得miRNA与lnRNA对应关系
(function(){
cat("\n***************************lnRNA ralated miRNA***************************")
if(T){
mircode <- read_tsv("/home/wangyang/workspace/pipeline/CANCERjupyter/Common/lncRNA_miRCODE_miRNA.csv")%>%
plyr::rename(c(lncRNA="gene_symbol"))
}else{
mircode <- get_mircode()
}
cat("从miRcode数据库获得lnRNA-miRNA的对应关系:共有 ",dim(mircode)[1]," 个")
lnRNAPrediction <- mircode%>%
filter(gene_symbol %in% rownames(lnRNA_expr))
cat("在miRcode中找到的lnRNA共有 ",length(unique(lnRNAPrediction$gene_symbol))," 个,","对应的miRNA有 ",length(unique(lnRNAPrediction$miRNA))," 个")
miRNA_lnRNA <- lnRNAPrediction%>%
filter(miRNA %in% miRNA_deg_sig$symbol)%>%
plyr::rename(c(gene_symbol="lnRNA"))%>%
dplyr::select(-gene_id)
cat("预测得到的 ",length(unique(lnRNAPrediction$miRNA))," 个miRNA与差异的miRNA取交集, 得到 ",length(unique(miRNA_lnRNA$miRNA)), "个miRNA")
miRNA_lnRNA <- miRNA_lnRNA[!duplicated(miRNA_lnRNA),]
saveRDS(miRNA_lnRNA,file = "result/miRNA_lnRNA.rda")
miRNA_intersect <- unique(miRNA_lnRNA$miRNA)
lnRNA_intersect <- unique(miRNA_lnRNA$lnRNA)
#length(miRNA_intersect)
#length(lnRNA_intersect)
cat("最后得到 ",length(lnRNA_intersect)," 个lncRNA可以与 ",length(miRNA_intersect)," 个miRNA相互作用")
#png(filename="plot_%02d.png")
png(file="figures/venn/ggvenn_lnRNA_prediction_miRNA.png",
width = 15,height = 15,units = "cm",bg = "white",res=300)
res <- ggvenn(
list("lncRNA-ralated miRNA"=lnRNAPrediction$miRNA,"DE miRNAs"=miRNA_deg_sig$symbol),
stroke_size = 0.5, set_name_size = 5,text_size = 5
)
print(res)
dev.off()
cat("lnRNA预测miRNA与差异miRNA Venn 图见figures/ggvenn_lnRNA_prediction_miRNA.png")
cat("***************************lnRNA ralated miRNA***************************")
})()
#############################################
## miRNA ralated mRNA
#############################################
(function(){
cat("\n***************************miRNA ralated mRNA***************************")
miRNA_lnRNA <- readRDS("result/miRNA_lnRNA.rda")
miRNA_intersect <- unique(miRNA_lnRNA$miRNA)
## test start hsa-let-7a-2 预测时去掉 -2
if(F){
miRNAprediction <- SpidermiRdownload_miRNAprediction(mirna_list=c("hsa-let-7a"))
}
## test end
if(!file.exists("result/miRNAprediction.rda")){
miRNAprediction <- SpidermiRdownload_miRNAprediction(mirna_list=miRNA_intersect)
save(miRNAprediction,file = "result/miRNAprediction.rda")
}else{
load("result/miRNAprediction.rda")
}
cat("用 ",length(miRNA_intersect)," 个miRMA使用[R package SpidermiR]预测得到 ",length(miRNAprediction$V2)," 个mRNA")
miRNA_mRNA <- miRNAprediction%>%
plyr::rename(c(V1="miRNA",V2="mRNA"))%>%
filter(mRNA %in% mRNA_deg_sig$symbol)
miRNA_mRNA <- miRNA_mRNA[!duplicated(miRNA_mRNA),]
saveRDS(miRNA_mRNA,file = "result/miRNA_mRNA.rda")
cat("用预测得到mRNA与差异的miRNA去交集, 得到 ",length(unique(miRNA_mRNA$mRNA)), "个mRNA")
miRNA_intersect <- unique(miRNA_mRNA$miRNA)
mRNA_intersect <- unique(miRNA_mRNA$mRNA)
#length(miRNA_intersect)
#length(mRNA_intersect)
cat("最后得到 ",length(miRNA_intersect)," 个miRNA可以靶向 ",length(mRNA_intersect)," 个mRNA相互作用")
library("ggvenn")
png(file="figures/venn/ggvenn_miRNA_predication_mRNA.png",
width = 15,height = 15,units = "cm",bg = "white",res=300)
res <- ggvenn(
list("miRNA-ralated mRNA"=miRNAprediction$V2,"DE mRNAs"=mRNA_deg_sig$symbol),
stroke_size = 0.5, set_name_size = 5,text_size = 5
)
print(res)
dev.off()
cat("miRNA预测mRNA与差异miRNA Venn 图见figures/ggvenn_miRNA_predication_mRNA.png")
cat("***************************lnRNA ralated miRNA***************************")
})()
if(F){
list<-SpidermiRdownload_miRNAvalidate(validated)
list%>%
filter(V1 %in% miRNA_intersect)
org<-SpidermiRquery_species(species)
net_type<-SpidermiRquery_networks_type(organismID=org[6,])
net_shar_prot<-SpidermiRquery_spec_networks(organismID = org[6,]
,network = "pred")
}
#############################################
### 构建ceRNA网络
#############################################
if(F){
getDeg(readRDS("result/miRNA_expr_DESeq.rda"))%>%
mutate(direction = factor(ifelse(Pvalue < 0.01 & abs(logFC)>2,
ifelse(logFC>1,"Up","Down"),
"NS"),levels = c("Up","Down","NS"))) %>%
na.omit()%>%
ggplot(aes(x=logFC,y=-log10(Pvalue),colour=direction))+
geom_point(alpha=0.6) +
scale_color_manual(values = c("red","green","#000000"))+
xlab(expression(log[2]("Fold Change")))+
ylab(expression(-log[10]("P Value")))+
theme_bw()+
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5,size = 24, face= "bold"),
axis.title = element_text(size=20),
legend.text= element_text(size=20),
aspect.ratio=1)+
geom_vline(xintercept = c(-2,2),lty=2,col="black",lwd=0.6)+
geom_hline(yintercept = -log10(0.01),lty=2,col="black",lwd=0.6) +
geom_text_repel(data=. %>%filter(symbol %in% c("hsa-mir-301b")),
aes(label=symbol),
size=3,
segment.color="black",
show.legend = F)
}
report("\n***************************构建ceRNA网络***************************")
miRNA_lnRNA <- readRDS("result/miRNA_lnRNA.rda")
miRNA_mRNA <- readRDS("result/miRNA_mRNA.rda")
lnRNA_miRNA_mRNA <- merge(miRNA_lnRNA,miRNA_mRNA,by="miRNA")
if(F){
miRNA <- unique(lnRNA_miRNA_mRNA$miRNA)
length(miRNA)
#Volcano_sig(readRDS("result/miRNA_expr_DESeq.rda"),gene=miRNA)
lnRNA <- unique(lnRNA_miRNA_mRNA$lnRNA)
length(lnRNA)
#Volcano_sig(readRDS("result/lnRNA_expr_DESeq.rda"),gene=lnRNA)
mRNA <-unique(lnRNA_miRNA_mRNA$mRNA)
length(mRNA)
}
if(T){
lncRNA_deg_ce <- lncRNA_deg_sig%>%
plyr::rename(c(symbol="lnRNA"))%>%
mutate(lnRNA_direction=ifelse(logFC>0,"up","down"))%>%
dplyr::select(lnRNA,lnRNA_direction)
miRNA_deg_ce <- miRNA_deg_sig%>%
plyr::rename(c(symbol="miRNA"))%>%
mutate(miRNA_direction=ifelse(logFC>0,"up","down"))%>%
dplyr::select(miRNA,miRNA_direction)
mRNA_deg_ce <- mRNA_deg_sig%>%
plyr::rename(c(symbol="mRNA"))%>%
mutate(mRNA_direction=ifelse(logFC>0,"up","down"))%>%
dplyr::select(mRNA,mRNA_direction)
lnRNA_miRNA_mRNA_ <- lnRNA_miRNA_mRNA
lnRNA_miRNA_mRNA <- lnRNA_miRNA_mRNA_ %>%
inner_join(lncRNA_deg_ce,by="lnRNA")%>%
inner_join(miRNA_deg_ce,by="miRNA")%>%
inner_join(mRNA_deg_ce,by="mRNA")%>%
filter(lnRNA_direction!=miRNA_direction)%>%
filter(miRNA_direction!=mRNA_direction)
}
#saveRDS(lnRNA_miRNA_mRNA,file = "result/lnRNA_miRNA_mRNA.rda")
lnRNA_miRNA_mRNA <- readRDS("result/lnRNA_miRNA_mRNA.rda")
unique(lnRNA_miRNA_mRNA$miRNA)
length(unique(lnRNA_miRNA_mRNA$lnRNA))
ce_lnRNA <- unique(lnRNA_miRNA_mRNA$lnRNA)
ce_lnRNA <- ce_lnRNA[ce_lnRNA %in% rownames(lnRNA_expr)]
ce_miRNA <- unique(lnRNA_miRNA_mRNA$miRNA)
ce_miRNA <- ce_lnRNA[ce_miRNA %in% rownames(mRNA_expr)]
ce_mRNA <- unique(lnRNA_miRNA_mRNA$mRNA)
ce_mRNA <- ce_lnRNA[ce_miRNA %in% rownames(mRNA_expr)]
cytoscape <- function(lnRNA_miRNA_mRNA,filename){
ce_lnRNA <- unique(lnRNA_miRNA_mRNA$lnRNA)
ce_miRNA <- unique(lnRNA_miRNA_mRNA$miRNA)
ce_mRNA <- unique(lnRNA_miRNA_mRNA$mRNA)
report("ceRNA网络中有: ", length(ce_lnRNA),
" 个lnRNA对应 ",length(ce_miRNA)," 个miRNA, ",
length(ce_miRNA)," 的miRNA对应 ",
length(ce_mRNA), "的mRNA")
miRNA_lnRNA <<- unique(lnRNA_miRNA_mRNA%>%dplyr::select(lnRNA,miRNA))
miRNA_mRNA <<- unique(lnRNA_miRNA_mRNA%>%dplyr::select(miRNA,mRNA))
lncRNA_deg_ce <- lncRNA_deg_sig%>%
plyr::rename(c(symbol="lnRNA"))%>%
filter(lnRNA %in% ce_lnRNA)%>%
mutate(direction=ifelse(logFC>0,"up","down"),type="lncRNA")%>%
dplyr::select(name=lnRNA,direction,type)
miRNA_deg_ce <- miRNA_deg_sig%>%
plyr::rename(c(symbol="miRNA"))%>%
filter(miRNA %in% ce_miRNA)%>%
mutate(direction=ifelse(logFC>0,"up","down"),type="miRNA")%>%
dplyr::select(name=miRNA,direction,type)
mRNA_deg_ce <- mRNA_deg_sig%>%
plyr::rename(c(symbol="mRNA"))%>%
filter(mRNA %in% ce_mRNA)%>%
mutate(direction=ifelse(logFC>0,"up","down"),type="mRNA")%>%
dplyr::select(name=mRNA,direction,type)
cytoscape_type <- bind_rows(mRNA_deg_ce,miRNA_deg_ce,lncRNA_deg_ce)%>%
mutate(type = str_c(direction,type,sep="_"))%>%
dplyr::select(-2)
write.csv(cytoscape_type,file = paste0("result/",filename,"_type.csv"),row.names = F,quote = F)
report("写入网络节点类型到 ",paste0("result/",filename,"_type.csv"), " 共有 ",dim(cytoscape_type)[1]," 个")
ceRAN_pair1 <- miRNA_lnRNA%>%
dplyr::select(miRNA,name=lnRNA)
ceRAN_pair2 <- miRNA_mRNA%>%
dplyr::select(miRNA,name=mRNA)
cytoscape_input <- bind_rows(ceRAN_pair1,ceRAN_pair2)
write.csv(cytoscape_input,file = paste0("result/",filename,"_input.csv"),row.names = F,quote = F)
report("写入网络节点关系:",paste0("result/",filename,"_input.csv")," 共有 ",dim(cytoscape_input)[1]," 个")
}
cytoscape(lnRNA_miRNA_mRNA,filename = "geo__")
a <- unique(lnRNA_miRNA_mRNA$mRNA)
length(a)
write.csv(a,file = "result/gene_all.csv",quote = F,row.names = F)
genes <- "MDM2,CCN1,HIF1A,TGFB1,MYCN,MXD1,NSD3,BTG1,PPP3R1,PPP6C"
genes <- unlist(str_split(genes,pattern = ","))
lnRNA_miRNA_mRNA_top10 <- lnRNA_miRNA_mRNA%>%
filter(mRNA %in% genes)
cytoscape(lnRNA_miRNA_mRNA_top10,filename = "geo_top10")
genes %in% unique(lnRNA_miRNA_mRNA_top10$mRNA)
unique(lnRNA_miRNA_mRNA_top10$mRNA)
unique(lnRNA_miRNA_mRNA_top10$lnRNA)
#Volcano_sig(readRDS("result/mRNA_expr_DESeq.rda"),gene=mRNA)
report("***************************构建ceRNA网络***************************")
#############################################
## lncRNA miRNA mRNA 生存分析
#############################################
report("\n***************************lncRNA miRNA mRNA 生存分析***************************")
library(survival)
library(survminer)
survival_p <- function(data,gene,prefix,pcutoff=0.05){
low_high <- ifelse(data[,gene]<=median(data[,gene]),"Low","Hight")
diff <- survdiff(Surv(futime_year,fustat)~low_high,data = data)
pVal <- 1 -pchisq(diff$chisq,df =1)
pValue <- signif(pVal,4)
if(pValue<pcutoff){
fit <- surv_fit(Surv(futime_year,fustat) ~ low_high, data = data)
ggsurvplot(fit, pval=T, risk.table=F,
legend.title=gene,
risk.table.height = 0.3,data = data)+
ggsave(filename = paste0("figures/survival/",prefix,"-",gene,".png"))
}
return(pValue)
}
Survival <- function(expr,gene,clinical,prefix,pcutoff=0.05){
expr_survival <- expr[gene,]%>%
t()%>%
as.data.frame()%>%
rownames_to_column("TCGA_full_id")%>%
mutate(patient_id =substr(TCGA_full_id,1,12))%>%
inner_join(clinical,by="patient_id")
mRNA_survival <- sapply(gene,
function(x){ survival_p(expr_survival,x,prefix,pcutoff)})
return(mRNA_survival)
}
mRNA_lnRNA_clinical <- readRDS("result/mRNA_lnRNA_clinical.rda")
res <- Survival(mRNA_expr,ce_mRNA,mRNA_lnRNA_clinical,prefix="mRNA")
report("使用[R pacakge Survival]得到 ",length(ce_mRNA)," 个mRNA中(以p<0.05筛选)有 ",sum(res<0.05)," 个mRNA与总生存率相关")
report(paste(names(res[res<0.05]),collapse = ","))
## test start ceRNAw网络mRNA的表达与患者总生存率之间关系
if(F){
mRNA_expr_survival <- mRNA_expr[mRNA,]%>%
t()%>%
as.data.frame()%>%
rownames_to_column("TCGA_full_id")%>%
mutate(patient_id =substr(TCGA_full_id,1,12))%>%
inner_join(mRNA_lnRNA_clinical,by="patient_id")
survival_p(mRNA_expr_survival,"PHOX2B")
mRNA_survival <- sapply(mRNA,
function(x){ survival_p(mRNA_expr_survival,x)})
table(mRNA_survival<0.05)
mRNA_expr_survival <- mRNA_expr_survival%>% filter(futime_year!=0)
low_high <- ifelse(mRNA_expr_survival[,"PHOX2B"] <= median(mRNA_expr_survival[,"PHOX2B"]),"Low","Hight")
mRNA_expr_survival$futime_year <- as.numeric(mRNA_expr_survival$futime_year)
mRNA_expr_survival$fustat <- as.numeric(mRNA_expr_survival$fustat)
fit <- surv_fit(Surv(futime_year,fustat) ~ low_high, data = mRNA_expr_survival)
# pdf(file = paste0("result/PHOX2B.pdf"),width = 5.5,height = 5)
ggsurvplot(fit, pval=T, risk.table=F,
legend.title='PHOX2B',
risk.table.height = 0.3,data = mRNA_expr_survival)+
ggsave(filename = "result/aa.png")
# dev.off()
# ggsurvplot(fit, data = mRNA_expr_survival,
# surv.median.line = "hv", # 增加中位生存时间
# conf.int = TRUE) # 增加置信区间
}
## test End
mRNA_lnRNA_clinical <- readRDS("result/mRNA_lnRNA_clinical.rda")
res <- Survival(lnRNA_expr,ce_lnRNA,mRNA_lnRNA_clinical,prefix="lnRNA")
report("使用[R pacakge Survival]得到 ",length(ce_lnRNA)," 个lnRNA中(以p<0.05筛选)有 ",sum(res<0.05)," 个lnRNA与总生存率相关")
report(paste(names(res[res<0.05]),collapse = ","))
## 由于miRNA存在hsa-let-7a-2 这种形式,预测mRNA时是去掉-2 ,以hsa-let-7a预测
miRNA_clinical <- readRDS("result/miRNA_clinical.rda")
res <- Survival(miRNA_expr,ce_miRNA,miRNA_clinical,prefix="miRNA",pcutoff = 0.09)
report("使用[R pacakge Survival]得到 ",length(ce_miRNA)," 个lnRNA中(以p<0.05筛选)有 ",sum(res<0.09)," 个lnRNA与总生存率相关")
report(paste(names(res[res<0.09]),collapse = ","))
report("***************************lncRNA miRNA mRNA 生存分析***************************")
#############################################
## cox 回归分析
#############################################
library(survivalROC)
library(survival)
library(survminer)
Cox_single <- function(data,gene,filename=NULL){
outTab <- data.frame()
for(i in gene){
cox <- coxph(Surv(futime_year,fustat) ~ data[,i],data=data)
coxSummary <- summary(cox)
outTab <- rbind(outTab,cbind(id=i,
HR=coxSummary$conf.int[,"exp(coef)"],
HR.95L=coxSummary$conf.int[,"lower .95"],
HR.95H=coxSummary$conf.int[,"upper .95"],
pvalue=coxSummary$coefficients[,"Pr(>|z|)"]))
}
if(!is.null(filename)){
write.csv(outTab,file = paste0("result/",filename,".csv"),quote = F,row.names = F)
}
return(outTab)
}
Cox_multi <- function(data,gene,filename=NULL){
if(sum(grepl("-",gene))>0){
gene <- sapply(gene,function(x){sub("-","",x)})
data<- data%>%
rename_all(~sub("-","",.))
}
formula <-as.formula(paste0("Surv(futime_year,fustat)~",paste0(gene,collapse = "+")))
cox <- coxph(formula,data=data)
#cox <- step(cox,direction = "both")
coxSummary <- summary(cox)
outTab <- data.frame()
outTab <- rbind(outTab,cbind(HR=coxSummary$conf.int[,"exp(coef)"],
HR.95L=coxSummary$conf.int[,"lower .95"],
HR.95H=coxSummary$conf.int[,"upper .95"],
pvalue=coxSummary$coefficients[,"Pr(>|z|)"]))
riskScore <- predict(cox,type = "risk",newdata = data)
expr_survival_riskScore <- data %>%
mutate(riskScore = as.vector(ifelse(riskScore>median(riskScore),"high","low")))
fit <- surv_fit(Surv(futime_year,fustat) ~ riskScore, data = expr_survival_riskScore)
res <- ggsurvplot(fit, pval=T, risk.table=F,
risk.table.height = 0.3,data = expr_survival_riskScore)
if(!is.null(filename)){
res+ ggsave(filename = paste0("figures/cox/",filename,".png"))
write.csv(outTab,file = paste0("result/",filename,".csv"),quote = F,row.names = F)
}
return(list(outTab=outTab,image=res))
}
riskScore_pheatmap <- function(data,gene,filename=NULL){
if(sum(grepl("-",gene))>0){
gene <- sapply(gene,function(x){sub("-","",x)})
data<- lnRNA_expr_survival%>%
rename_all(~sub("-","",.))
}
formula <-as.formula(paste0("Surv(futime_year,fustat)~",paste0(gene,collapse = "+")))
cox <- coxph(formula,data=data)
#cox <- step(cox,direction = "both")
riskScore <- predict(cox,type = "risk",newdata = data)
expr_survival_riskScore <- data %>%
mutate(riskScore = as.vector(ifelse(riskScore>median(riskScore),"high","low")))
deg_expr <- expr[deg_gene,]%>%
{log10(cpm(.)+0.01)}
#deg_expr <- t(scale(t(deg_expr)))
#deg_expr[deg_expr > 2] = 2
#deg_expr[deg_expr < -2] = -2
col <- colorRampPalette(c("green","white","red"))(256)
pheatmap(deg_expr,col=col,fontsize=8,show_colnames =F,show_rownames = F,
filename = paste0("figures/cox/",filename,".png"))
return(paste0("figures/",filename,".png"))
}
Cox_multi_ROC <- function(data,gene,filename=NULL,predict.time=5){
if(sum(grepl("-",gene))>0){
gene <- sapply(gene,function(x){sub("-","",x)})
data<- data%>%
rename_all(~sub("-","",.))
}
formula <-as.formula(paste0("Surv(futime_year,fustat)~",paste0(gene,collapse = "+")))
cox <- coxph(formula,data=data)
#message(summary(cox))
#cox <- step(cox,direction = "both")
riskScore <- predict(cox,type = "risk",newdata = data)
roc <- survivalROC(Stime=data$futime_year ,status=data$fustat,marker=riskScore,
predict.time=predict.time,method="KM")
res <- xyplot(TP~FP,roc,main=paste("ROC curve(","AUC=",round(roc$AUC,3),")"),
xlab="False postive rate",ylab="True positive rate",panel = function(x,y){
panel.xyplot(x,y,type="l",lwd=2,cex.main=1.3,cex.lab=1.2,cex.axis=1.2,font=1.2,col="red")
panel.abline(c(0,1))
})
if(!is.null(filename)){
png(filename = paste0("figures/cox/",filename,".png"))
print(res)
dev.off()
}
return(res)
}
lnRNA_expr <- readRDS("result/lncRNA_expr.rda")
mRNA_lnRNA_clinical <- readRDS("result/mRNA_lnRNA_clinical.rda")
lnRNA_expr_survival <- lnRNA_expr[lnRNA_intersect,]%>%
filter()%>%
t()%>%
as.data.frame()%>%
rownames_to_column("TCGA_full_id")%>%
mutate(patient_id =substr(TCGA_full_id,1,12))%>%
inner_join(mRNA_lnRNA_clinical,by="patient_id")
if(F){
lnRNA_expr_ <- lnRNA_expr[ce_lnRNA,]
#lnRNA_expr_ <- log2(lnRNA_expr_+0.01)
par(mfcol=c(2,1))
boxplot(lnRNA_expr_)
boxplot(log2(lnRNA_expr_+1))
boxplot( log2(lnRNA_expr_+0.01))
boxplot( log2(lnRNA_expr_+1))
boxplot(scale(cpm(lnRNA_expr_),center = F, scale = TRUE))
lnRNA_expr_survival <- lnRNA_expr_%>%
t()%>%
as.data.frame()%>%
rownames_to_column("TCGA_full_id")%>%
mutate(patient_id =substr(TCGA_full_id,1,12))%>%
inner_join(mRNA_lnRNA_clinical,by="patient_id")
Cox_single_res <- Cox_single(lnRNA_expr_survival,ce_lnRNA,filename = "singleCox_lnRNA")
genes <- Cox_single_res%>%filter(pvalue<0.09)%>%pull("id")
Cox_multi(lnRNA_expr_survival,genes)$image
Cox_multi_ROC(lnRNA_expr_survival,genes,predict.time=3)
}
## 22个lnRNA 先单因素cox再多因素cox LINC00486,LINC00365
Cox_single_res <- Cox_single(lnRNA_expr_survival,lnRNA_intersect,filename = "singleCox_lnRNA")
genes <- Cox_single_res%>%filter(pvalue<0.09)%>%pull("id")
saveRDS(genes,file = "result/lnRNA_cox_mutil.rda")
res <- Cox_multi(lnRNA_expr_survival,genes,filename = "muitiCox_lnRNA")
res$outTab
print(res$image)
Cox_multi_ROC(lnRNA_expr_survival,genes,predict.time=3,filename = "muitiCox_lnRNA_lnRNA_ROC")
if(F){
cox <- coxph(Surv(futime_year,fustat) ~ lnRNA_expr_survival[,"HOTAIR"],data=lnRNA_expr_survival)
coxSummary <- summary(cox)
outTab <- data.frame()
outTab <- rbind(outTab,cbind(HR=coxSummary$conf.int[,"exp(coef)"],
HR.95L=coxSummary$conf.int[,"lower .95"],
HR.95H=coxSummary$conf.int[,"upper .95"],
pvalue=coxSummary$coefficients[,"Pr(>|z|)"]))
wald.test <- signif(coxSummary$wald["pvalue"],digits = 3)
beta <- signif(coxSummary$wald["test"],digits = 3)
HR <- signif(coxSummary$coef[1],digits = 3)
HR.lower = coxSummary$conf.int[,"lower .95"]
HR.upper = coxSummary$conf.int[,"upper .95"]
}
if(F){
if(sum(grepl("-",genes))>0){
genes <- sapply(genes,function(x){sub("-","",x)})
lnRNA_expr_survival<- lnRNA_expr_survival%>%
rename_all(~sub("-","",.))
}
formula <-as.formula(paste0("Surv(futime_year,fustat)~",paste0(genes,collapse = "+")))
cox <- coxph(formula,data=lnRNA_expr_survival)
cox <- step(cox,direction = "both")
coxSummary <- summary(cox)
outTab <- data.frame()
outTab <- rbind(outTab,cbind(HR=coxSummary$conf.int[,"exp(coef)"],
HR.95L=coxSummary$conf.int[,"lower .95"],
HR.95H=coxSummary$conf.int[,"upper .95"],
pvalue=coxSummary$coefficients[,"Pr(>|z|)"]))
riskScore <- predict(cox,type = "risk",newdata = lnRNA_expr_survival)
lnRNA_expr_survival_riskScore <- lnRNA_expr_survival %>%
mutate(riskScore = as.vector(ifelse(riskScore< median(riskScore),"high","low")))
fit <- surv_fit(Surv(futime_year,fustat) ~ riskScore, data = lnRNA_expr_survival_riskScore)
ggsurvplot(fit, pval=T, risk.table=F,
risk.table.height = 0.3,data = lnRNA_expr_survival_riskScore,xlab="Time(Years)")
roc <- survivalROC(Stime=lnRNA_expr_survival_riskScore$futime_year ,status=lnRNA_expr_survival_riskScore$fustat,marker=riskScore,
predict.time=5,method="KM")
plot(roc$FP,roc$TP,type="l",xlim=c(0,1),ylim=c(0,1),col="red",main=paste("ROC curve(","AUC=",round(roc$AUC,3),")"),
xlab="False postive rate",ylab="True positive rate",
lwd=2,cex.main=1.3,cex.lab=1.2,cex.axis=1.2,font=1.2)
abline(0,1)
lnRNA_expr_survival <- lnRNA_expr_survival%>%
select(c(genes,futime_year,fustat))
library(ggrisk)
ggrisk(lnRNA_expr_survival,time="futime_year",event="fustat",heatmap.genes = genes)
}
if(F){
formula <-as.formula(paste0("Surv(futime_year,fustat)~",paste0(genes,collapse = "+")))
cox <- coxph(formula,data=lnRNA_expr_survival)
riskScore <- predict(cox,type = "risk",newdata = lnRNA_expr_survival)
expr_survival_riskScore <- lnRNA_expr_survival %>%
mutate(riskScore = as.vector(ifelse(riskScore>median(riskScore),"high","low")))
lnRNA_expr_group <- data.frame(row.names = expr_survival_riskScore$TCGA_full_id,group=expr_survival_riskScore$riskScore)%>%
arrange(desc(group))
lnRNA_expr <- lnRNA_expr[,rownames(lnRNA_expr_group)]
identical(rownames(lnRNA_expr_group),colnames(lnRNA_expr))
deg_expr <- lnRNA_expr[genes,]%>%
{log2(cpm(.)+ 1)}
deg_expr <- t(scale(t(deg_expr)))
deg_expr[deg_expr>2] = 2
deg_expr[deg_expr< -2] = -2
col <- colorRampPalette(c("green","white","red"))(256)
pheatmap(deg_expr,col=col,fontsize=8,show_colnames =F,scale = "row",annotation_col=lnRNA_expr_group,cluster_cols=F)
}
if(F){
## 单基因
my_comparisons <- list( c("i", "ii"))
lnRNA_expr_survival%>%
dplyr::select(c(tumor_stage,LINC00365))%>%
filter(tumor_stage!="unknow") %>%
ggplot(aes(x=tumor_stage,y=LINC00365))+
stat_boxplot(geom="errorbar",width=0.15,aes(color=tumor_stage))+
geom_boxplot(aes(fill=tumor_stage))+
ylim(0, 1000)+
stat_compare_means(comparisons =my_comparisons)+
stat_compare_means(method = "kruskal.test",label = "p.format")
}
mRNA_lnRNA_clinical <- readRDS("result/mRNA_lnRNA_clinical.rda")
mRNA_expr_survival <- mRNA_expr[ce_mRNA,]%>%
t()%>%
as.data.frame()%>%
rownames_to_column("TCGA_full_id")%>%
#filter(substr(TCGA_full_id,14,15)!=11)%>%
mutate(patient_id =substr(TCGA_full_id,1,12))%>%
inner_join(mRNA_lnRNA_clinical,by="patient_id")
Cox_single_res <- Cox_single(mRNA_expr_survival,ce_mRNA,filename = "singleCox_mRNA")
genes <- Cox_single_res%>%filter(pvalue<0.05)%>%pull("id")
res <- Cox_multi(mRNA_expr_survival,genes,filename = "muitiCox_mRNA")
res$outTab
#print(res$image)
Cox_multi_ROC(mRNA_expr_survival,genes,predict.time=5,filename = "muitiCox_mRNA_ROC")
if(F){
if(sum(grepl("-",genes))>0){
genes <- sapply(genes,function(x){sub("-","",x)})
data<- mRNA_expr_survival%>%
rename_all(~sub("-","",.))
}
formula <-as.formula(paste0("Surv(futime_year,fustat)~",paste0(genes,collapse = "+")))
cox <- coxph(formula,data=data)
riskScore <- predict(cox,type = "risk",newdata = data)
expr_survival_riskScore <- data %>%
mutate(riskScore = as.vector(ifelse(riskScore>median(riskScore),"high","low")))
lnRNA_expr_group <- data.frame(row.names = expr_survival_riskScore$TCGA_full_id,group=expr_survival_riskScore$riskScore)%>%
arrange(desc(group))
lnRNA_expr <- lnRNA_expr[,rownames(lnRNA_expr_group)]
identical(rownames(lnRNA_expr_group),colnames(lnRNA_expr))
deg_expr <- lnRNA_expr[genes,]%>%
{log2(cpm(.)+ 1)}
deg_expr <- t(scale(t(deg_expr)))
deg_expr[deg_expr>2] = 2
deg_expr[deg_expr< -2] = -2
col <- colorRampPalette(c("green","white","red"))(256)
pheatmap(deg_expr,col=col,fontsize=8,show_colnames =F,scale = "row",annotation_col=lnRNA_expr_group,cluster_cols=F)
}
if(F){
if(sum(grepl("-",genes))>0){
genes <- sapply(genes,function(x){sub("-","",x)})
lnRNA_expr_survival<- lnRNA_expr_survival%>%
rename_all(~sub("-","",.))
}
formula <-as.formula(paste0("Surv(futime_year,fustat)~",paste0(genes,collapse = "+")))
cox <- coxph(formula,data=lnRNA_expr_survival)
cox <- step(cox,direction = "both")
coxSummary <- summary(cox)
outTab <- data.frame()
outTab <- rbind(outTab,cbind(HR=coxSummary$conf.int[,"exp(coef)"],
HR.95L=coxSummary$conf.int[,"lower .95"],
HR.95H=coxSummary$conf.int[,"upper .95"],
pvalue=coxSummary$coefficients[,"Pr(>|z|)"]))
riskScore <- predict(cox,type = "risk",newdata = lnRNA_expr_survival)
lnRNA_expr_survival_riskScore <- lnRNA_expr_survival %>%
mutate(riskScore = as.vector(ifelse(riskScore< median(riskScore),"high","low")))
fit <- surv_fit(Surv(futime_year,fustat) ~ riskScore, data = lnRNA_expr_survival_riskScore)
ggsurvplot(fit, pval=T, risk.table=F,
risk.table.height = 0.3,data = lnRNA_expr_survival_riskScore,xlab="Time(Years)")
roc <- survivalROC(Stime=lnRNA_expr_survival_riskScore$futime_year ,status=lnRNA_expr_survival_riskScore$fustat,marker=riskScore,
predict.time=5,method="KM")
xyplot(TP~FP,roc,main=paste("ROC curve(","AUC=",round(roc$AUC,3),")"),
xlab="False postive rate",ylab="True positive rate",panel = function(x,y){
panel.xyplot(x,y,type="l",lwd=2,cex.main=1.3,cex.lab=1.2,cex.axis=1.2,font=1.2,col="red")
panel.abline(c(0,1))
})
lnRNA_expr_survival <- lnRNA_expr_survival%>%
select(c(genes,futime_year,fustat))
library(ggrisk)
ggrisk(lnRNA_expr_survival,time="futime_year",event="fustat",heatmap.genes = genes)
}
if(F){
my_comparisons <- list( c("i", "ii"))
mRNA_expr_survival%>%
dplyr::select(c(tumor_stage,CNTN3))%>%
filter(tumor_stage!="unknow") %>%
ggplot(aes(x=tumor_stage,y=CNTN3))+
stat_boxplot(geom="errorbar",width=0.15,aes(color=tumor_stage))+
geom_boxplot(aes(fill=tumor_stage))+
stat_compare_means(comparisons =my_comparisons)+
stat_compare_means(method = "kruskal.test",label = "p.format")
}
#############################################
## ceRNA 子网络构建
#############################################
lnRNA_cox_mutil <- readRDS("result/lnRNA_cox_mutil.rda")
lnRNA_miRNA_mRNA <- readRDS("result/lnRNA_miRNA_mRNA.rda")
lnRNA_miRNA_mRNA_sub <- lnRNA_miRNA_mRNA%>%
filter(lnRNA %in% lnRNA_cox_mutil)
cytoscape(lnRNA_miRNA_mRNA_sub,filename = "small")
saveRDS(lnRNA_miRNA_mRNA_sub,file = "result/sub_netwrok_gene.rda")
if(F){
miRNA_lnRNA_clinical <- readRDS("result/mRNA_lnRNA_clinical.rda")
miRNA_expr <- readRDS("result/miRNA_expr.rda")%>%
rownames_to_column("symbol")%>%
mutate(miRNA_= case_when(grepl("hsa-mir",symbol)~str_extract(symbol,"hsa-mir-[0-9]+[a-z]?"),
grepl("hsa-let",symbol)~str_extract(symbol,"hsa-let-[0-9]+[a-z]?")))%>%
filter(miRNA_ %in% miRNA)%>%
dplyr::select(-miRNA_)%>%
column_to_rownames("symbol")
miRNA_expr_survival <- miRNA_expr%>%
t()%>%
as.data.frame()%>%
rownames_to_column("TCGA_full_id")%>%
mutate(patient_id =substr(TCGA_full_id,1,12))%>%
inner_join(mRNA_lnRNA_clinical,by="patient_id")
Cox_single_res <- Cox_single(miRNA_expr_survival, rownames(miRNA_expr))
genes <- Cox_single_res%>%filter(pvalue<0.05)%>%pull("id")
Cox_multi(mRNA_expr_survival,genes,filename = "ce-lnRNA-Cox_multi")
Cox_multi_ROC(miRNA_expr_survival,predict.time=3,filename = "ce-lnRNA-Cox_multi")
}
##############################################
## GO KEGG
##############################################
(function(){
if(!file.exists("enrichment")){
dir.create(file.path("enrichment"))
}
sub_netwrok_gene <- readRDS("result/sub_netwrok_gene.rda")
if(F){
gene <- unique(sub_netwrok_gene[sub_netwrok_gene$miRNA=="hsa-mir-34b",]$mRNA)
write.csv(gene,file = "result/GO_KEKK_GENE.csv",quote = F,row.names = F)
}
gene <- unique(sub_netwrok_gene$mRNA)
entrez_id <- bitr(gene, fromType = "SYMBOL",
toType = c("ENTREZID"),
OrgDb = org.Hs.eg.db)%>%
pull("ENTREZID")%>%unique()
egobp_BP<-enrichGO(gene = entrez_id,OrgDb = org.Hs.eg.db,keyType = 'ENTREZID',ont = 'BP',pAdjustMethod="none") # BP MF CC ALL
#dotplot(egobp_BP,showCategory=20)
egobp_BP@result%>%
arrange(p.adjust)%>%
write.csv(file = "result/enrichment/egobp_BP.csv",quote = F,row.names = F)
write.csv(egobp_BP@result)
egobp_MF<-enrichGO(gene = entrez_id,OrgDb = org.Hs.eg.db,keyType = 'ENTREZID',ont = 'MF',pAdjustMethod="none") # BP MF CC ALL
#dotplot(egobp_MF,showCategory=20)
egobp_MF@result%>%
arrange(p.adjust)%>%
write.csv(file = "result/enrichment/egobp_MF.csv",quote = F,row.names = F)
egobp_CC<-enrichGO(gene = entrez_id,OrgDb = org.Hs.eg.db,keyType = 'ENTREZID',ont = 'CC',pAdjustMethod="none") # BP MF CC ALL
#dotplot(egobp_CC,showCategory=20)
egobp_CC@result%>%
arrange(p.adjust)%>%
write.csv(file = "result/enrichment/egobp_CC.csv",quote = F,row.names = F)
kk <- clusterProfiler::enrichKEGG(gene =entrez_id,organism = 'hsa',pvalueCutoff = 0.4,qvalueCutoff = 0.3,pAdjustMethod="none")
#dotplot(kk,showCategory=20)
kk@result%>%
arrange(p.adjust)%>%
write.csv(file = "result/enrichment/kegg.csv",quote = F,row.names = F)
})()