单细胞数据分析
最后发布时间:2022-01-17 16:08:25
浏览量:
# 质量控制: quality control
# 标准化: normalization
# 归一化: scaling
# 选取高异质性基因: highly variable genes
# 降维-二维平面可视化: dimension reduction, PCA, tSNE, UMAP
# 分群: clustering
# 定义细胞类型: biomarker
library(Seurat)
packageVersion('Seurat')
library(dplyr)
library(patchwork)
#### load data
count_all<-read.csv('/home/wangyang/workspace/scRNASeq/data/pit_dev_count.csv',header = T,row.names = 1)
pbmc.all <- readRDS("/home/wangyang/workspace/scRNASeq/result/pit_dev_seuratv4_pc15.rds")
#pbmc.all <- FindNeighbors(pbmc.all, dims = 1:15)
#pbmc.all <- FindClusters(pbmc.all, resolution = 0.5)
#pbmc.all <- RunUMAP(pbmc.all, dims = 1:15)
#DimPlot(pbmc.all, reduction = "umap")
DimPlot(pbmc.all, reduction = "umap",label = T)
####
cell.used <- rownames(pbmc.all@meta.data[which(pbmc.all@meta.data$seurat_clusters %in% c(1,6,2,7,3,5)),])
length(cell.used)
pbmc <- CreateSeuratObject(counts =count_all[,cell.used], # pbmc@assays$RNA@counts[1:5,1:5]
project = c("endo"),
min.cells =3,# min cell
min.features = 2000,# cell's gene
names.field = 1,
names.delim = "W")
pbmc # 20660 features across 2879 samples within 1 assay
#### 质量控制: quality control
pbmc[['percent.mt']] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
VlnPlot(pbmc, features = c("nFeature_RNA","nCount_RNA","percent.mt"))
#### 标准化: normalization
pbmc <- NormalizeData(pbmc, normalization.method = c("LogNormalize"),scale.factor = 100000) # pbmc@assays$RNA@data[1:5,1:5]
#### 选取高异质性基因: highly variable genes
pbmc <- FindVariableFeatures(pbmc,selection.method = "vst",nfeatures = 1500)
#### 归一化: scaling
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes) # pbmc@assays$RNA@scale.data
#### 降维-二维平面可视化: dimension reduction, PCA, tSNE, UMAP
variable_features <- VariableFeatures(object=pbmc)
pbmc <- RunPCA(pbmc, features = variable_features)
DimPlot(pbmc,reduction = "pca",dims = c(1,2), group.by = "orig.ident")
FeaturePlot(pbmc,c("SOX2","TBX19"),reduction = "pca")
pbmc <- JackStraw(pbmc, dims =25,num.replicate = 100)
pbmc <- ScoreJackStraw(pbmc,dims = 1:25)
JackStrawPlot(pbmc,dims = 1:25)
pbmc <- FindNeighbors(pbmc, dims = 1:8)
pbmc<- FindClusters(pbmc, resolution = 0.3) # resolution
pbmc <- RunUMAP(pbmc, dims = 1:8)
DimPlot(pbmc, reduction = "umap",label = T)
pbmc<- FindClusters(pbmc, resolution = 1.8) # resolution
pbmc <- RunUMAP(pbmc, dims = 1:8)
DimPlot(pbmc, reduction = "umap",label = T)
#### 细胞分群 marker gene
pit.tf<-c("PITX1","MKI67","SOX2","PROP1","TBX19","POMC","POU1F1","GH2","PRL","TSHB","GATA2","NR5A1","ISL1","NEUROD1","FSHB","LHB") #"SOX2","LHX3","PROP1","HESX1",,'HES1'
FeaturePlot(pbmc, features = pit.tf,reduction = c("umap"),label.size = 1)
current.cluster.ids <- c(10,17,0,3,5,12,14,15,16,1,2,6,11,7,9,8,4,13)
new.cluster.ids <- c("CC","CC","Stem","Stem","Corticotrope","Pro.PIT1","Pro.PIT1","Pro.PIT1","Pro.PIT1","Somatotrope","Somatotrope", "Somatotrope","Somatotrope","Lactotrope", "Thyrotrope","Pre.Gonado","Gonadotrope","Gonadotrope")
pbmc@active.ident <- plyr::mapvalues(x = pbmc@active.ident, from = current.cluster.ids, to = new.cluster.ids)
pbmc@active.ident<-factor(pbmc@active.ident,levels = unique(new.cluster.ids))
DimPlot(pbmc, reduction = "umap",label = TRUE)
FeaturePlot(pbmc,c("PDGFRA"))