-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathClustering.R
112 lines (80 loc) · 4.02 KB
/
Clustering.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
library(umap)
library(Seurat)
library(limma)
##############################################
setwd("C:/Users/micha/OneDrive/Documents/R/atheroexpress/analysis_draft")
#read in read count matrix files
raw.genecounts = read.table (file = "raw_counts.txt.minRib.txt.PC.txtt",header = T,sep = "\t",row.names = 1)
#correct for UMI saturation the log = ln!!!! no log10 no log2 its natural log; there are 4096 possible UMIs
raw.genecounts=round(-4096*(log(1-(raw.genecounts/4096))))
#check
raw.genecounts[1:10,1:10]
#exclude spike-ins
raw.genecounts=raw.genecounts[grep("ERCC",row.names(raw.genecounts),invert=TRUE),]
#exclude unvanted genes - historically those genes have mapping issues
raw.genecounts=raw.genecounts[grep("UGDH.AS1",row.names(raw.genecounts),invert=TRUE),]
raw.genecounts=raw.genecounts[grep("PGM2P2",row.names(raw.genecounts),invert=TRUE),]
raw.genecounts=raw.genecounts[grep("LOC100131257",row.names(raw.genecounts),invert=TRUE),]
raw.genecounts=raw.genecounts[grep("KCNQ1OT1",row.names(raw.genecounts),invert=TRUE),]
raw.genecounts=raw.genecounts[grep("MALAT1",row.names(raw.genecounts),invert=TRUE),]
raw.genecounts=raw.genecounts[grep("PGM5P2",row.names(raw.genecounts),invert=TRUE),]
raw.genecounts=raw.genecounts[grep("MAB21L3",row.names(raw.genecounts),invert=TRUE),]
raw.genecounts=raw.genecounts[grep("EEF1A1",row.names(raw.genecounts),invert=TRUE),]
raw.genecounts=raw.genecounts[grep("PGM5P2",row.names(raw.genecounts),invert=TRUE),]
#Scale before quantile normalization
raw.genecounts=t(t(raw.genecounts)/colSums(raw.genecounts))*100000
#quantile normalize
raw.genecounts=round(limma::normalizeQuantiles(raw.genecounts))
#create seurat object
plaque <- CreateSeuratObject(raw.genecounts, min.cells = 1,min.features = 1,
project = "AE")
#add correlation coefficients into the object
cor.matrix = cor(log2(raw.genecounts+10))
plaque[["mean.cor"]] <- colMeans(cor.matrix)
aa <- cor.matrix[order(row(cor.matrix), -cor.matrix)]
second.best= (matrix(aa, nrow(cor.matrix), byrow = TRUE)[, 2])
tenth.best= (matrix(aa, nrow(cor.matrix), byrow = TRUE)[, 10])
names(second.best)=row.names(cor.matrix)
plaque[["second.best.cor"]] <- second.best
names(tenth.best)=row.names(cor.matrix)
plaque[["tenth.best.cor"]] <- tenth.best
#get percentatge of mito genes
plaque[["percent.mt"]] <- PercentageFeatureSet(plaque, pattern = "^MT-")
percent.mt=PercentageFeatureSet(plaque, pattern = "^MT-")
VlnPlot(plaque, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(plaque, features = c("second.best.cor", "tenth.best.cor", "mean.cor"), ncol = 3)
plaque<- FindVariableFeatures(plaque, selection.method = "vst", nfeatures = 5000)
top <- head(VariableFeatures(plaque), 35)
#plot variable genes
plot1 <- VariableFeaturePlot(plaque)
plot1
plot2 <- LabelPoints(plot = plot1, points = top, repel = TRUE)
plot2
#scale data based on all genes
all.genes <- rownames(plaque)
plaque <- ScaleData(plaque, features = c(all.genes))
#runPCAS
plaque <- RunPCA(plaque, features = VariableFeatures(object = plaque))
VizDimLoadings(plaque, dims = 1:2, reduction = "pca")
DimPlot(plaque, reduction = "pca")
DimHeatmap(plaque, dims = 1, cells = 500, balanced = TRUE)
DimHeatmap(plaque, dims = 1:9, cells = 500, balanced = TRUE)
plaque <- JackStraw(plaque, num.replicate = 100)
plaque <- ScoreJackStraw(plaque, dims = 1:20)
JackStrawPlot(plaque, dims = 1:20)
ElbowPlot(plaque)
plaque <- NormalizeData(object = plaque, normalization.method = "LogNormalize",
scale.factor = 10000)
#find clusters and make tSNE
plaque <- FindNeighbors(plaque, dims = 1:12)
plaque <- FindClusters(plaque, resolution = 0.5)
head(Idents(plaque), 10)
plaque <- RunTSNE(object = plaque, dims.use = 1:12, do.fast = TRUE)
plaque <- RunUMAP(object = plaque, dims = 1:12)
#DimPlot(plaque)
DimPlot(plaque, reduction = "umap")
DimPlot(plaque, reduction = "pca")
DimPlot(plaque, reduction = "tsne")
TSNEPlot(object = plaque)
plaque.markers <- FindAllMarkers(object = plaque, only.pos = TRUE, min.pct = 0.2,
thresh.use = 0.2)