log2(18*1000000/sum(dat[,3])+1)
## 18 -- > 6.459884 ## SS2_15_0048_A5
dat=log2(edgeR::cpm(dat)+1)
##归一化的一种选择,这里是CPM(count-per-million,每百万碱基中每个转录本的count值)
###CPM只对read count相对总reads数做了数量的均一化,去除文库大小差异。
apply(mRNA_count,2,function(x)x/sum(x)*1000000)
以下三者等价
pheatmap(expr_sig,col=col,show_colnames=F,show_rownames=F,scale="row")
pheatmap(t(scale(t(expr_sig))),col=col,show_colnames=F,show_rownames=F)
pheatmap(t(apply(expr_sig,1,function(x){ ( x-mean(x))/sd(x) })),col=col,show_colnames=F,show_rownames=F)
过滤表达量为0的行
dat=a[apply(a,1, function(x) sum(x>1) > floor(ncol(a)/50)),]
scale: 对列进行操作
mat <- matrix(1:12, ncol = 3)
[,1] [,2] [,3]
[1,] 1 5 9
[2,] 2 6 10
[3,] 3 7 11
[4,] 4 8 12
scale(mat,center = T, scale = T)
apply(mat,2,function(x){ ( x-mean(x)) /sd(x) })
[,1] [,2] [,3]
[1,] -1.1618950 -1.1618950 -1.1618950
[2,] -0.3872983 -0.3872983 -0.3872983
[3,] 0.3872983 0.3872983 0.3872983
[4,] 1.1618950 1.1618950 1.1618950
scale默认对行进行归一化(减去均值,除以标准差)
gene_count <- apply(gene_count,1,function(x){ (x-mean(x)) / sd(x)})%>%t()
dge <- DGEList(counts=expr)
dge <- calcNormFactors(dge)
v <- voom(dge,design,plot=TRUE, normalize="quantile")
par(mfrow=c(1,1),mar=c(7,3,2,2))
boxplot(log2(v[['E']]), boxwex=0.6, notch=T, main=GEO, outline=FALSE, las=2, col=group[ord])
expr <- expr[complete.cases(expr), ]
v <- vooma(log2(expr), design, plot=T)
expr <- normalizeBetweenArrays(expr) # normalize data
par(mfrow=c(1,1),mar=c(7,3,2,2))
boxplot(log2(v[['E']]), boxwex=0.6, notch=T, main=GEO, outline=FALSE, las=2, col=group[ord])
qx <- as.numeric(quantile(expr, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <- (qx[5] > 100) ||
(qx[6]-qx[1] > 50 && qx[2] > 0)
if (LogC) {
message("##############",GEO,"log2 转换")
expr[which(expr <= 0)] <- NaN
expr <- log2(expr)
}
expr<- normalizeBetweenArrays(expr)
par(mfrow=c(1,1),mar=c(7,3,2,2))
boxplot(expr, boxwex=0.6, notch=T, main=GEO, outline=FALSE, las=2, col=group[ord])
GSM1062765 GSM1062766 GSM1062767 GSM1062768 GSM1062769
ABCA8 6.782756 7.128726 4.571173 5.631289 7.457750
ABCC3 10.612708 10.651995 9.720073 9.887744 6.458206
ABI3BP 10.810602 9.640165 8.725722 7.912306 8.213121
ACADL 5.396239 7.040040 4.790250 7.120997 5.107281
ADGRF1 5.463918 9.036063 9.297348 10.522882 5.316363
rowSums(t(apply(expr_sig,1,function(x){ ( x-mean(x))/sd(x) })))[1:5]
# ABCA8 ABCC3 ABI3BP ACADL ADGRF1
#-2.681882e-15 2.740169e-14 -1.838980e-14 -1.357768e-14 2.004646e-14
rowSums(t(apply(expr_sig,1,function(x){ ( x/sum(x))*1000 })))[1:5]
# ABCA8 ABCC3 ABI3BP ACADL ADGRF1
# 1000 1000 1000 1000 1000
vsd <- varianceStabilizingTransformation(dds_filt,blind=FALSE)
vsd@assays@data@listData[[1]]
assay(vsd)