library(GEOquery) library(limma) library(edgeR) library(tidyverse) (function(){ GSE ="GSE126094" message("开始分析: ",GSE," Hsa_circ_101555 functions as a competing endogenous RNA of miR-597-5p to promote colorectal cancer progression [Circular RNAs]") if(!file.exists(paste0("GEO-COAD/",GSE,".rds"))){ gset <- getGEO(GSE,AnnotGPL=T,destdir = "GEO-COAD") saveRDS(gset,paste0("GEO-COAD/",GSE,".rds")) } gset <- readRDS(paste0("GEO-COAD/",GSE,".rds")) gset <- gset[[1]] gset_expr <- exprs(gset)%>% as.data.frame() message(GSE," 使用GPL平台 ",gset@annotation) #boxplot(log2(gset_expr),outline=FALSE) #title(main=GSE) gset_pd <- pData(gset)%>% dplyr::select(sample_id=geo_accession,sample_name=title)%>% mutate(group=case_when(grepl("normal",sample_name)~"Normal", T ~"Tumor")) gset_annot <- gset@featureData@data gset_expr_annot <- gset_expr %>% mutate(symbol=gset_annot$circRNA)%>% rownames_to_column("id")%>% dplyr::select(-id) %>% filter(symbol!="") %>% stats::aggregate(. ~ symbol, data =., FUN = mean)%>% tibble::column_to_rownames("symbol") saveRDS(gset_expr_annot,paste0("GEO-COAD/gset_expr_annot_circRNA_",GSE,".rda")) saveRDS(gset_pd,paste0("GEO-COAD/metadata_circRNA_",GSE,".rda")) })() (function(){ GSE ="GSE138589" message("开始分析: ",GSE,"circRNA Microarray for human colorectal adenocarcinoma and normal mucosa") if(!file.exists(paste0("GEO-COAD/",GSE,".rds"))){ gset <- getGEO(GSE,AnnotGPL=T,destdir = "GEO-COAD") saveRDS(gset,paste0("GEO-COAD/",GSE,".rds")) } gset <- readRDS(paste0("GEO-COAD/",GSE,".rds")) gset <- gset[[1]] gset_expr <- exprs(gset)%>% as.data.frame() message(GSE," 使用GPL平台 ",gset@annotation) #boxplot(gset_expr,outline=FALSE) #title(main=GSE) gset_pd <- pData(gset)%>% dplyr::select(sample_id=geo_accession,sample_name=title)%>% mutate(group=case_when(grepl("normal",sample_name)~"Normal", T ~"Tumor")) gset_annot <- gset@featureData@data gset_expr_annot <- gset_expr %>% mutate(symbol=gset_annot$circRNA)%>% rownames_to_column("id")%>% dplyr::select(-id) %>% filter(symbol!="") %>% stats::aggregate(. ~ symbol, data =., FUN = mean)%>% tibble::column_to_rownames("symbol") saveRDS(gset_expr_annot,paste0("GEO-COAD/gset_expr_annot_circRNA_",GSE,".rda")) saveRDS(gset_pd,paste0("GEO-COAD/metadata_circRNA_",GSE,".rda")) }) (function(){ GSE ="GSE142837" message("开始分析: ",GSE," circRNA expression data from human colorectal cancer tissues and the correponding non-cancerous tissues") if(!file.exists(paste0("GEO-COAD/",GSE,".rds"))){ gset <- getGEO(GSE,AnnotGPL=T,destdir = "GEO-COAD") saveRDS(gset,paste0("GEO-COAD/",GSE,".rds")) } gset <- readRDS(paste0("GEO-COAD/",GSE,".rds")) gset <- gset[[1]] gset_expr <- exprs(gset)%>% as.data.frame() message(GSE," 使用GPL平台 ",gset@annotation) #boxplot(log2(gset_expr),outline=FALSE) #title(main=GSE) gset_pd <- pData(gset)%>% dplyr::select(sample_id=geo_accession,sample_name=title)%>% mutate(group=case_when(grepl("non-cancerous",sample_name)~"Normal", T ~"Tumor")) gset_annot <- gset@featureData@data gset_expr_annot <- gset_expr %>% mutate(symbol=gset_annot$circRNA)%>% rownames_to_column("id")%>% dplyr::select(-id) %>% filter(symbol!="") %>% stats::aggregate(. ~ symbol, data =., FUN = mean)%>% tibble::column_to_rownames("symbol") saveRDS(gset_expr_annot,paste0("GEO-COAD/gset_expr_annot_circRNA_",GSE,".rda")) saveRDS(gset_pd,paste0("GEO-COAD/metadata_circRNA_",GSE,".rda")) })()
library("maptools") # point labels without overlaps library(umap) (function(){ GEOS <- c("GSE126094","GSE142837") Normalized <- c(F,F) for(i in 1:length(GEOS)){ GEO <- GEOS[i] message("Normalized:",Normalized[i]) message("开始分析:",GEO) expr <- readRDS(paste0("GEO-COAD/gset_expr_annot_circRNA_",GEO,".rda")) metadata <- readRDS(paste0("GEO-COAD/metadata_circRNA_",GEO,".rda")) message("分组信息列名与表达矩阵的行名相同",identical(colnames(expr),rownames(metadata))) group <- factor(metadata$group, levels = c("Normal", "Tumor")) ## 检测是否需要log2转换 qx <- as.numeric(quantile(expr, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T)) LogC <- (qx[5] > 100) || (qx[6]-qx[1] > 50 && qx[2] > 0) if (LogC) { message("##############",GEO,"log2 转换") expr[which(expr <= 0)] <- NaN expr <- log2(expr) } ## 质控 if(T){ par(mfrow=c(1,3),mar=c(7,2,1,6)) ord <- order(group) boxplot(expr[,ord], boxwex=0.6, notch=T, main=GEO, outline=FALSE, las=2, col=group[ord]) plotDensities(expr, group=group, main=GEO, legend ="topright") ump <- umap(t(expr), n_neighbors = 9, random_state = 123) plot(ump$layout, main="UMAP plot, nbrs=9", xlab="", ylab="", col=group, pch=20, cex=1.5) legend("topright", legend=levels(group), pch=20, col=1:nlevels(group), title="Group", pt.cex=1.5,inset=c(-0.6,0)) pointLabel(ump$layout, labels = rownames(ump$layout), method="SANN", cex=0.6) } design <- model.matrix( ~ 0 + group) colnames(design) <- levels(group) message("Tumor-Normal") contrast.matrix <- makeContrasts("Tumor-Normal", levels = design) ## 是否强制标准化 if(!Normalized[i]){ message(GEO," 进行Normalized") if(F){ dge <- DGEList(counts=expr) dge <- calcNormFactors(dge) v <- voom(dge,design,plot=TRUE, normalize="quantile") } if(F){ expr <- expr[complete.cases(expr), ] v <- vooma(log2(expr), design, plot=T) expr <- normalizeBetweenArrays(expr) # normalize data par(mfrow=c(1,1),mar=c(7,3,2,2)) boxplot(log2(v[['E']]), boxwex=0.6, notch=T, main=GEO, outline=FALSE, las=2, col=group[ord]) } expr<- normalizeBetweenArrays(expr) par(mfrow=c(1,3),mar=c(7,2,1,6)) ord <- order(group) boxplot(expr[,ord], boxwex=0.6, notch=T, main=GEO, outline=FALSE, las=2, col=group[ord]) plotDensities(expr, group=group, main=GEO, legend ="topright") ump <- umap(t(expr), n_neighbors = 9, random_state = 123) plot(ump$layout, main="UMAP plot, nbrs=9", xlab="", ylab="", col=group, pch=20, cex=1.5) legend("topright", legend=levels(group), pch=20, col=1:nlevels(group), title="Group", pt.cex=1.5, inset=c(-0.6,0)) pointLabel(ump$layout, labels = rownames(ump$layout), method="SANN", cex=0.6) } fit <- lmFit(expr, design)%>% contrasts.fit(contrast.matrix)%>% eBayes() #contrast <- paste0(rev(levels(group)), collapse = "-") res <- topTable(fit, coef = 1, n = Inf) saveRDS(res,file = paste0("GEO-COAD/limma_",GEO,".rda")) } if(F){ GEO <- GEOS[1] expr <- readRDS(paste0("GEO-COAD/gset_expr_annot_circRNA_",GEO,".rda")) metadata <- readRDS(paste0("GEO-COAD/metadata_circRNA_",GEO,".rda")) message("分组信息列名与表达矩阵的行名相同",identical(colnames(expr),rownames(metadata))) group <- factor(metadata$group, levels = c("Normal", "Tumor")) par(mfrow=c(1,3),mar=c(7,2,1,5)) ord <- order(group) boxplot(log2(expr[,ord]), boxwex=0.6, notch=T, main=GEO, outline=FALSE, las=2, col=group[ord]) plotDensities(log2(expr), group=group, main=GEO, legend ="topright") #par(mfrow=c(1,1),mar=c(2,2,2,2)) ump <- umap(t(log2(expr)), n_neighbors = 9, random_state = 123) #par(mar=c(3,3,2,6), xpd=TRUE) plot(ump$layout, main="UMAP plot, nbrs=9", xlab="", ylab="", col=group, pch=20, cex=1.5) legend("topright", legend=levels(group), pch=20, col=1:nlevels(group), title="Group", pt.cex=1.5, inset=c(-0.3,0)) pointLabel(ump$layout, labels = rownames(ump$layout), method="SANN", cex=0.6) design <- model.matrix( ~ 0 + group) colnames(design) <- levels(group) contrast <- paste0(rev(levels(group)), collapse = "-") message(contrast) contrast.matrix <- makeContrasts(contrast, levels = design) if(F){ dge <- DGEList(counts=log2(expr)) dge <- calcNormFactors(dge) v <- voom(dge,design,plot=TRUE, normalize="quantile") v <- vooma(expr, design, plot=T) par(mfrow=c(1,1),mar=c(7,3,2,2)) boxplot(log2(v[['E']]), boxwex=0.6, notch=T, main=GEO, outline=FALSE, las=2, col=group[ord]) } expr<- normalizeBetweenArrays(log2(expr)) boxplot(expr, boxwex=0.6, notch=T, main=GEO, outline=FALSE, las=2, col=group[ord]) fit <- lmFit(expr, design)%>% contrasts.fit(contrast.matrix)%>% eBayes() res <- topTable(fit, coef = 1, n = Inf) saveRDS(res,file = paste0("GEO-COAD/limma_",GEO,".rda")) } })()