# 质量控制: quality control # 标准化: normalization # 归一化: scaling # 选取高异质性基因: highly variable genes # 降维-二维平面可视化: dimension reduction, PCA, tSNE, UMAP # 分群: clustering # 定义细胞类型: biomarker library(Seurat) packageVersion('Seurat') library(dplyr) library(patchwork) #### load data count_all<-read.csv('/home/wangyang/workspace/scRNASeq/data/pit_dev_count.csv',header = T,row.names = 1) pbmc.all <- readRDS("/home/wangyang/workspace/scRNASeq/result/pit_dev_seuratv4_pc15.rds") #pbmc.all <- FindNeighbors(pbmc.all, dims = 1:15) #pbmc.all <- FindClusters(pbmc.all, resolution = 0.5) #pbmc.all <- RunUMAP(pbmc.all, dims = 1:15) #DimPlot(pbmc.all, reduction = "umap") DimPlot(pbmc.all, reduction = "umap",label = T) #### cell.used <- rownames(pbmc.all@meta.data[which(pbmc.all@meta.data$seurat_clusters %in% c(1,6,2,7,3,5)),]) length(cell.used) pbmc <- CreateSeuratObject(counts =count_all[,cell.used], # pbmc@assays$RNA@counts[1:5,1:5] project = c("endo"), min.cells =3,# min cell min.features = 2000,# cell's gene names.field = 1, names.delim = "W") pbmc # 20660 features across 2879 samples within 1 assay #### 质量控制: quality control pbmc[['percent.mt']] <- PercentageFeatureSet(pbmc, pattern = "^MT-") VlnPlot(pbmc, features = c("nFeature_RNA","nCount_RNA","percent.mt")) #### 标准化: normalization pbmc <- NormalizeData(pbmc, normalization.method = c("LogNormalize"),scale.factor = 100000) # pbmc@assays$RNA@data[1:5,1:5] #### 选取高异质性基因: highly variable genes pbmc <- FindVariableFeatures(pbmc,selection.method = "vst",nfeatures = 1500) #### 归一化: scaling all.genes <- rownames(pbmc) pbmc <- ScaleData(pbmc, features = all.genes) # pbmc@assays$RNA@scale.data #### 降维-二维平面可视化: dimension reduction, PCA, tSNE, UMAP variable_features <- VariableFeatures(object=pbmc) pbmc <- RunPCA(pbmc, features = variable_features) DimPlot(pbmc,reduction = "pca",dims = c(1,2), group.by = "orig.ident") FeaturePlot(pbmc,c("SOX2","TBX19"),reduction = "pca") pbmc <- JackStraw(pbmc, dims =25,num.replicate = 100) pbmc <- ScoreJackStraw(pbmc,dims = 1:25) JackStrawPlot(pbmc,dims = 1:25) pbmc <- FindNeighbors(pbmc, dims = 1:8) pbmc<- FindClusters(pbmc, resolution = 0.3) # resolution pbmc <- RunUMAP(pbmc, dims = 1:8) DimPlot(pbmc, reduction = "umap",label = T) pbmc<- FindClusters(pbmc, resolution = 1.8) # resolution pbmc <- RunUMAP(pbmc, dims = 1:8) DimPlot(pbmc, reduction = "umap",label = T) #### 细胞分群 marker gene pit.tf<-c("PITX1","MKI67","SOX2","PROP1","TBX19","POMC","POU1F1","GH2","PRL","TSHB","GATA2","NR5A1","ISL1","NEUROD1","FSHB","LHB") #"SOX2","LHX3","PROP1","HESX1",,'HES1' FeaturePlot(pbmc, features = pit.tf,reduction = c("umap"),label.size = 1) current.cluster.ids <- c(10,17,0,3,5,12,14,15,16,1,2,6,11,7,9,8,4,13) new.cluster.ids <- c("CC","CC","Stem","Stem","Corticotrope","Pro.PIT1","Pro.PIT1","Pro.PIT1","Pro.PIT1","Somatotrope","Somatotrope", "Somatotrope","Somatotrope","Lactotrope", "Thyrotrope","Pre.Gonado","Gonadotrope","Gonadotrope") pbmc@active.ident <- plyr::mapvalues(x = pbmc@active.ident, from = current.cluster.ids, to = new.cluster.ids) pbmc@active.ident<-factor(pbmc@active.ident,levels = unique(new.cluster.ids)) DimPlot(pbmc, reduction = "umap",label = TRUE) FeaturePlot(pbmc,c("PDGFRA"))