-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData loading and cell clustering
152 lines (125 loc) · 6.16 KB
/
Data loading and cell clustering
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
library(dplyr)
library(Seurat)
library(ggplot2)
library(stringr)
library(ggrepel)
library(EnhancedVolcano)
library(GOplot)
library(DoubletFinder)
#Load raw matrix data
NPC.data <- Read10X(data.dir = "D:/Prof. Guan's Lab/Lanqi/NPC single-cell/NPC_all_mixed")
NPC <- CreateSeuratObject(counts = NPC.data, project = "NPC", min.cells = 3, min.features = 200)
#Plot matrix parameters
NPC[["percent.mt"]] <- PercentageFeatureSet(NPC, pattern = "^MT-")
VlnPlot(NPC, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(NPC, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(NPC, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
#Acquire a subset of the raw data
NPC <- subset(NPC, subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 15)
#Data normalization
NPC <- NormalizeData(NPC, normalization.method = "LogNormalize", scale.factor = 10000)
NPC <- FindVariableFeatures(NPC, selection.method = "vst", nfeatures = 3000)
# Identify the 15 most highly variable genes
top15 <- head(VariableFeatures(NPC), 15)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(NPC)
plot2 <- LabelPoints(plot = plot1, points = top15, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
#Scale data (for large data, may not need to scale all genes) and perform PCA
NPC <- ScaleData(NPC)
NPC <- RunPCA(NPC, features = VariableFeatures(object = NPC))
print(NPC[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(NPC, dims = 1:2, reduction = "pca")
DimPlot(NPC, reduction = "pca")
DimHeatmap(NPC, dims = 1, cells = 500, balanced = TRUE)
ElbowPlot(NPC,ndims = 50)
#Perform umap reduction
NPC <- FindNeighbors(NPC, dims = 1:25)
NPC <- FindClusters(NPC, resolution = 1)
head(Idents(NPC), 5)
NPC <- RunUMAP(NPC, dims = 1:25)
DimPlot(NPC, reduction = "umap", pt.size = 1, label = TRUE, label.size = 6) + NoLegend()
#Second-round UMAP of T cells
T_cell <- subset(NPC, idents = "T cells")
ElbowPlot(T_cell,ndims = 50)
T_cell <- FindNeighbors(T_cell, dims = 1:25)
T_cell <- FindClusters(T_cell, resolution = 1)
head(Idents(T_cell), 5)
T_cell <- RunUMAP(T_cell, dims = 1:25)
DimPlot(T_cell, reduction = "umap", label = FALSE, pt.size = 1) +NoLegend()
#Identify DEGs
T_markers_TvsN <- FindMarkers(T_cell, ident.1 = "T_tumor", ident.2 = 'T_normal', test.use = "MAST", logfc.threshold = 0, max.cells.per.ident = 250)
#Volcano plot
EnhancedVolcano(T_markers_TvsN,
lab = rownames(T_markers_TvsN),
x = 'avg_logFC',
y = 'adj_p_val',
xlim = c(-1.5, 1.5),
ylim = c(0, 70),
pCutoff = 5e-2,
FCcutoff = 0.5,
legendPosition = 'Right')
#Relative abundance
a<- GetAssayData(object = T_cell, slot = 'scale.data')
s <-sapply(strsplit(as.character(colnames(a)),"-"),function(x)x[[2]])
g <- Idents(T_cell)
table(g,s)
#Identification of marker gene expression across clusters
T_markers <- c('CCR7','LEF1','SELL','TCF7','IFNG','NKG7','GZMA','GZMK','KLRG1','LAG3','PDCD1','HAVCR2')
DoHeatmap(T_cell_avg, features = T_markers, size = 3, draw.lines = FALSE)+ scale_fill_gradientn(colors = c("dodgerblue2",'cornflowerblue', "aliceblue", "darkorange", 'firebrick3'))
#Find correlated genes
gene_data <- GetAssayData(object = subset(T_cell, idents = c('1','10')), slot = 'scale.data')
gene_specific <- gene_data['GZMA',]
result <- NULL
for(i in 1:nrow(gene_data)){
result <-c(result,cor.test(gene_specific, gene_data[i,],method="spearman")$estimate)
cat(i,'out of',nrow(gene_data),'\n')
}
result<-data.frame (genes = rownames(gene_data),cor=result)
#Calculate the parameters for naive score
a <- c('CCR7', 'TCF7', 'LEF1', 'ACTN1','S1PR1', 'MAL', 'IL7R', 'PLAC8', 'SPINT2' ,'BACH2')
gene_specific <- gene_data[a,]
as.character(Idents(T_cell))
y<-rep(0,ncol(gene_specific))
y[as.character(Idents(T_cell))=="CD4+_Naive"|as.character(Idents(T_cell))=="CD8+_Naive"]<-1
cbind(y,as.character(Idents(T_cell)))
y <- as.numeric(y)
b <-glm(y~gene_specific[1,]+gene_specific[2,]+gene_specific[3,]+gene_specific[4,]+
gene_specific[5,]+gene_specific[6,]+gene_specific[7,]+gene_specific[8,]+
gene_specific[9,]+gene_specific[10,], family = "binomial")
summary(b)
#Extract B cell clusters and re-cluster using UMAP
B_cell=subset(NPC, idents = ("B cells"))
B_cell <- FindNeighbors(B_cell, dims = 1:20)
B_cell <- FindClusters(B_cell, resolution = 1)
head(Idents(B_cell), 5)
B_cell <- RunUMAP(B_cell, dims = 1:20)
DimPlot(B_cell, reduction = "umap", pt.size = 1, label = TRUE, label.size = 6) + NoLegend()
#Violin plots for selected genes
VlnPlot(B_cell, features = c('NR4A1'), pt.size = 0) + NoLegend()
VlnPlot(B_cell, features = c('EBI3'), pt.size = 0) + NoLegend()
VlnPlot(B_cell, features = c('CD19'), pt.size = 0) + NoLegend()
VlnPlot(B_cell, features = c('CD27'), pt.size = 0) + NoLegend()
VlnPlot(B_cell, features = c('IGHM'), pt.size = 0) + NoLegend()
VlnPlot(B_cell, features = c('IGHD'), pt.size = 0) + NoLegend()
VlnPlot(B_cell, features = c('MEF2B'), pt.size = 0) + NoLegend()
VlnPlot(B_cell, features = c('FCRL4'), pt.size = 0) + NoLegend()
VlnPlot(B_cell, features = c('MKI67'), pt.size = 0) + NoLegend()
#Calculate the normolized mean expression of genes in each cluster
orig.levels <- levels(B_cell)
Idents(B_cell) <- gsub(pattern = " ", replacement = "_", x = Idents(B_cell_remove_doublet))
orig.levels <- gsub(pattern = " ", replacement = "_", x = orig.levels)
levels(B_cell) <- orig.levels
B_cell_avg <- AverageExpression(B_cell, return.seurat = TRUE)
VlnPlot(B_cell, features = c('IFIT3'), pt.size = 0) + NoLegend()
#Extract myeloid cell clusters and re-cluster using UMAP
myeloid <- subset(NPC, idents = c('Myeloids'))
ElbowPlot(myeloid,ndims = 50)
myeloid <- FindNeighbors(myeloid, dims = 1:22)
myeloid <- FindClusters(myeloid, resolution = 1)
head(Idents(myeloid), 5)
myeloid <- RunUMAP(myeloid, dims = 1:25)
DimPlot(myeloid, reduction = "umap", pt.size = 1, label = TRUE, label.size = 6) + NoLegend()
DotPlot(myeloid, features = c('CXCL10','CD1E', 'LAMP3', 'LGALS2','CD163', 'MS4A4A','LGMN','CD14','FCGR3A',
'S100A8','S100A9','TGFB1', 'VEGFA','IDO1','IL10'), dot.scale = 10, dot.min = 0.1) + RotatedAxis()