forked from jmzeng1314/scRNA_10X
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexe01.R
More file actions
168 lines (145 loc) · 5.2 KB
/
exe01.R
File metadata and controls
168 lines (145 loc) · 5.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
rm(list = ls()) # clear the environment
#load all the necessary libraries
options(warn=-1) # turn off warning message globally
suppressMessages(library(Seurat))
library(ggfortify)
load(file = 'PBMC_RespD376_for_DEG.Rdata')
count_matrix[1:4,1:4]
dim(count_matrix)
table(cluster)
table(cluster)
col=rainbow(2)[as.factor(cluster)]
table(col)
## only tSNE
if(T){
choose_T_counts=count_matrix
choose_T_counts=choose_T_counts[apply(choose_T_counts,1, sd)>0,]
choose_T_counts=choose_T_counts[names(tail(sort(apply(choose_T_counts,1, sd)),1000)),]
pca_dat <- prcomp(t(choose_T_counts), scale. = TRUE)
p=autoplot(pca_dat,col=col) + theme_classic() + ggtitle('PCA plot')
print(p)
str(pca_dat)
pca_dat$x[1:4,1:4]
new_pca <- FactoMineR::PCA(
t(choose_T_counts),
ncp = ncol(choose_T_counts),
graph=FALSE
)
set.seed(42)
# TSNE即t-distributed Stochastic Neighbor Embedding.
library(Rtsne)
tsne_out <- Rtsne(pca_dat$x[,1:5], perplexity = 10,
pca=FALSE,
max_iter=2000,
verbose=TRUE) # Run TSNE
tsnes=tsne_out$Y
colnames(tsnes) <- c("tSNE1", "tSNE2")
ggplot(tsnes, aes(x = tSNE1, y = tSNE2))+ geom_point(col=col)
}
## for seurat 2
if(T){
PBMC <- CreateSeuratObject(raw.data = count_matrix,
min.cells = 1, min.genes = 0,
project = '10x_PBMC') # already normalized
# Add meta.data (nUMI and cluster)
PBMC <- AddMetaData(object = PBMC,
metadata = apply(count_matrix, 2, sum),
col.name = 'nUMI_raw')
PBMC <- AddMetaData(object = PBMC,
metadata = cluster,
col.name = 'cluster')
sce=PBMC
VlnPlot(object = sce,
features.plot = c("nGene", "nUMI"),
group.by = 'cluster', nCol = 2)
tmp=sce@meta.data
# 需要搞清楚去除了什么 ScaleData
PBMC <- ScaleData(object = PBMC,
vars.to.regress = c('nGene',"nUMI"),
model.use = 'linear',
use.umi = FALSE)
PBMC <- FindVariableGenes(object = PBMC,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.0125,
x.high.cutoff = 3, y.cutoff = 0.5)
length( PBMC@var.genes)
PBMC <- RunPCA(object = PBMC,
pc.genes = PBMC@var.genes)
PBMC <- RunTSNE(object = PBMC, dims.use = 1:20,
perplexity = 25)
TSNEPlot(PBMC, group.by = 'cluster')
TSNEPlot(PBMC )
}
# for monocle
if(T){
## 首先构建 monocle 对象
library(monocle)
expr_matrix <- as.matrix(count_matrix)
sample_sheet <- data.frame(cells=names(count_matrix),
cellType=cluster)
rownames(sample_sheet)<- names(count_matrix)
gene_annotation <- as.data.frame(rownames(count_matrix))
rownames(gene_annotation)<- rownames(count_matrix)
colnames(gene_annotation)<- "genes"
pd <- new("AnnotatedDataFrame", data = sample_sheet)
fd <- new("AnnotatedDataFrame", data = gene_annotation)
# Create a CellDataSet from the relative expression levels
HSMM <- newCellDataSet(
as(expr_matrix, "sparseMatrix"),
phenoData = pd,
featureData = fd,
lowerDetectionLimit=0.5,
expressionFamily=negbinomial.size()
)
HSMM <- detectGenes(HSMM, min_expr = 1)
# HSMM <- HSMM[fData(HSMM)$num_cells_expressed > 5, ]
HSMM <- HSMM[fData(HSMM)$num_cells_expressed > 1, ]
HSMM
HSMM <- estimateSizeFactors(HSMM)
HSMM <- estimateDispersions(HSMM)
HSMM
cds=HSMM
# 单细胞转录组最重要的就是把细胞分群啦,这里可供选择的算法非常多,我们首先演示PCA结果。
# 并不是所有的基因都有作用,所以先进行挑选,合适的基因用来进行聚类。
disp_table <- dispersionTable(cds)
unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1)
cds <- setOrderingFilter(cds, unsup_clustering_genes$gene_id)
plot_ordering_genes(cds)
# plot_pc_variance_explained(cds, return_all = F) # norm_method='log'
# 其中 num_dim 参数选择基于上面的PCA图
cds <- reduceDimension(cds, max_components = 2, num_dim = 6,
reduction_method = 'tSNE', verbose = T)
cds <- clusterCells(cds, num_clusters = 3)
plot_cell_clusters(cds, 1, 2, color = "cellType")
table(pData(cds)$Cluster,cluster)
plot_cell_clusters(cds, 1, 2 )
}
## for scater
if(T){
library(scater)
ct=count_matrix
pheno_data <- data.frame(cluster)
pheno_data$Cell=rownames(pheno_data)
# data("sc_example_counts")
# data("sc_example_cell_info")
# ct=sc_example_counts
# pheno_data=sc_example_cell_info
# 参考 https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/vignette-intro.R
dim(ct)
dim(pheno_data)
sce <- SingleCellExperiment(
assays = list(counts = as.matrix(ct)),
colData = pheno_data
)
sce
# 举例
exprs(sce) <- log2(
calculateCPM(sce ) + 1)
sce <- runPCA(sce)
# 这里并没有进行任何基因的挑选,就直接进行了PCA,与 seurat包不一样。
reducedDimNames(sce)
## --------------
plotReducedDim(sce, use_dimred = "PCA",
colour_by = "cluster" )
}