图片alt
library(GEOquery) expr <- getGEO(filename="data/GSE161784_series_matrix.txt.gz",destdir="data",getGPL=T,AnnotGPL=T)
getGEOfile <- function(GEO,destdir=tempdir(),AnnotGPL=FALSE, amount=c('full','brief','quick','data')) { amount <- match.arg(amount) geotype <- toupper(substr(GEO,1,3)) # GSE mode <- 'wb' GEO <- toupper(GEO) stub = gsub('\\d{1,3}$','nnn',GEO,perl=TRUE) # GSE2nnn if (geotype == 'GDS') { gdsurl <- 'https://ftp.ncbi.nlm.nih.gov/geo/datasets/%s/%s/soft/%s' myurl <- sprintf(gdsurl,stub,GEO,paste0(GEO,'.soft.gz')) destfile <- file.path(destdir,paste0(GEO,'.soft.gz')) } if (geotype == 'GSE' & amount=='full') { gseurl <- 'https://ftp.ncbi.nlm.nih.gov/geo/series/%s/%s/soft/%s' # https://ftp.ncbi.nlm.nih.gov/geo/series/GSE161nnn/GSE161784/soft/GSE161784_family.soft.gz myurl <- sprintf(gseurl,stub,GEO,paste0(GEO,'_family.soft.gz')) destfile <- file.path(destdir,paste(GEO,'.soft.gz',sep="")) # data/GSE161784.soft.gz } if (geotype == 'GSE' & amount!='full' & amount!='table') { gseurl <- "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi" myurl <- paste(gseurl,'?targ=self&acc=',GEO,'&form=text&view=',amount,sep='') destfile <- file.path(destdir,paste(GEO,'.soft',sep="")) mode <- 'w' }
↓
如果AnnotGPL=T从https://ftp.ncbi.nlm.nih.gov/geo/platforms/GPL19nnn/GPL19117/annot/GPL19117.annot.gz下载GPL19117.annot.gz,如果这个链接不存在GPL19117.annot.gz(Annotation GPL not available, so will use submitter GPL instead),则从https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?targ=self&acc=GPL19117&form=text&view=full下载
AnnotGPL=T
GPL19117.annot.gz
if (geotype == 'GPL') { # getGEO(filename="data/GSE161784_series_matrix.txt.gz",destdir="data",getGPL=T,AnnotGPL=T) # AnnotGPL 从https://ftp.ncbi.nlm.nih.gov/geo/platforms/GPL19nnn/GPL19117/annot/GPL19117.annot.gz下载 if (AnnotGPL) { gplurl <- 'https://ftp.ncbi.nlm.nih.gov/geo/platforms/%s/%s/annot/%s' myurl <- sprintf(gplurl,stub,GEO,paste0(GEO,'.annot.gz')) destfile <- file.path(destdir,paste(GEO,'.annot.gz',sep="")) # check to see if Annotation GPL is present. If so, # use it, else move on to submitter GPL res=try({ if(!file.exists(destfile)) { downloadFile(myurl, destfile, mode) message('File stored at: ') message(destfile) } else { message(sprintf('Using locally cached version of %s found here:\n%s ',GEO,destfile)) } },silent=TRUE) if(!inherits(res,'try-error')) { return(invisible(destfile)) } else { message('Annotation GPL not available, so will use submitter GPL instead') } } gseurl <- "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi" myurl <- paste(gseurl,'?targ=self&acc=',GEO,'&form=text&view=',amount,sep='') destfile <- file.path(destdir,paste(GEO,'.soft.gz',sep="")) mode <- 'w' if(!file.exists(destfile)) { # myurl=https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?targ=self&acc=GPL19117&form=text&view=full # destfile=data/GPL19117.soft.gz # mode=w # getOption('download.file.method.GEOquery')=auto download.file(myurl,destfile,mode=mode,quiet=TRUE,method=getOption('download.file.method.GEOquery'), headers = c("accept-encoding"="gzip")) message('File stored at: ') message(destfile) } else { message(sprintf('Using locally cached version of %s found here:\n%s ',GEO,destfile)) } return(invisible(destfile)) }
由于soft文件经常很大,导致下面错误,解决方法就是:在浏览器打开https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?targ=self&acc=GPL19117&form=text&view=full,下载文件GPL19117.txt, 执行以下命令, 之后替换当前目录data/GPL19117.soft.gz
GPL19117.txt
data/GPL19117.soft.gz
gzip GPL19117.txt mv GPL19117.txt.gz GPL19117.soft.gz
注:GPL文件的下载路径需要使用getGEO的destdir="data"参数指定
destdir="data"
download.file("https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?targ=self&acc=GPL19117&form=text&view=full", "data/GPL19117.soft.gz",mode="w",quiet=TRUE,method="auto",headers = c("accept-encoding"="gzip"))
if (geotype == 'GSM') { gseurl <- "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi" myurl <- paste(gseurl,'?targ=self&acc=',GEO,'&form=text&view=',amount,sep='') destfile <- file.path(destdir,paste(GEO,'.soft',sep="")) mode <- 'w' } # 判断文件是否存在 if(!file.exists(destfile)) { downloadFile(myurl, destfile, mode) message('File stored at: ') message(destfile) } else { message(sprintf('Using locally cached version of %s found here:\n%s ',GEO,destfile)) } }
解析data/GSE161784_series_matrix.txt.gz获取GPLXXX号
data/GSE161784_series_matrix.txt.gz
parseGSEMatrix <- function(fname,AnnotGPL=FALSE,destdir=tempdir(),getGPL=TRUE,parseCharacteristics=TRUE) { // 从data/GSE161784_series_matrix.txt.gz解析到GPL,再次调用getGEO,从网络下载 if(getGPL) { gpl <- getGEO(GPL,AnnotGPL=AnnotGPL,destdir=destdir) } eset <- new('ExpressionSet', phenoData=as(sampledat,'AnnotatedDataFrame'), annotation=GPL, featureData=fd, experimentData=ed, exprs=as.matrix(datamat)) return(list(GPL=as.character(sampledat[1,grep('platform_id',colnames(sampledat),ignore.case=TRUE)]),eset=eset)) }
getGEO('GSE161784',GSEMatrix=T, destdir="data")
getGEO('GSE161784', GSEMatrix=F, destdir="data")
getGEO(filename="data/GSE161784_series_matrix.txt.gz",destdir="data",getGPL=T,AnnotGPL=T)
getGEO(filename="data/GSE161784_series_matrix.txt.gz",destdir="data",getGPL=F,AnnotGPL=T)
不会下载东西
GSEMatrix=T
GSEMatrix=F
下载区别
gse <- getGEO("GSE58469",destdir="data") gpl97 <- getGEO('GPL8179',destdir=".") download.file("https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?targ=self&acc=GPL8179&form=text&view=full", "GPL8179.soft.gz",mode="w",quiet=TRUE,method="auto",headers = c("accept-encoding"="gzip")) # https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?targ=self&acc=GPL8179&form=text&view=full gpl97 <- getGEO('GPL8179',destdir=".",AnnotGPL=T) #Annotation GPL not available, so will use submitter GPL instead #https://ftp.ncbi.nlm.nih.gov/geo/platforms/GPL8nnn/GPL8179/annot/GPL8179.annot.gz