Skip to content

018‐ scATAC‐Seq

Alireza Khodadadi-Jamayran edited this page Nov 5, 2025 · 2 revisions

Single cell ATAC sequencing with scRNA-Seq (scATAC-Seq)

library("iCellR")
my.data <- load10x("filtered_gene_bc_matrices/")

# see the row names
row.names(my.data)

# get peak names
ATAC <- grep("^chr",row.names(my.data),value=T)

# get scATAC data
MyATAC <- subset(my.data, row.names(my.data) %in% ATAC)
head(MyATAC)[1:3]
#                   AAACAGCCAAGTGAAC.1 AAACAGCCACTGACCG.1 AAACAGCCATGATTGT.1
#chr1.181218.181695                  0                  0                  1
#chr1.191296.191699                  0                  0                  0
#chr1.629770.630129                  0                  0                  0
#chr1.633806.634251                  0                  0                  0
#chr1.778422.779040                  0                  0                  0
#chr1.827306.827702                  0                  0                  0

dim(MyATAC)
# [1] 21923  6326

# get RNA data
MyRNAs <- subset(my.data, !row.names(my.data) %in% ATAC)
head(MyRNAs)[1:3]
#            AAACAGCCAAGTGAAC.1 AAACAGCCACTGACCG.1 AAACAGCCATGATTGT.1
#MIR1302.2HG                  0                  0                  0
#FAM138A                      0                  0                  0
#OR4F5                        0                  0                  0
#AL627309.1                   0                  0                  0
#AL627309.3                   0                  0                  0
#AL627309.2                   0                  0                  0

dim(MyRNAs)
#[1] 36633  6326

# make iCellR object
my.obj <- make.obj(MyRNAs)

# add ATAC-Seq data
my.obj@atac.raw <- MyATAC
my.obj@atac.main <- MyATAC

# check your object
my.obj


###################################
,--. ,-----.       ,--.,--.,------.
`--''  .--./ ,---. |  ||  ||  .--. '
,--.|  |    | .-. :|  ||  ||  '--'.'
|  |'  '--'\   --. |  ||  ||  |
`--' `-----' `----'`--'`--'`--' '--'
###################################
An object of class iCellR version: 1.6.2
Raw/original data dimentions (rows,columns): 24127,6326
Data conditions: no conditions/single sample
Row names: MIR1302.2HG,TTLL10.AS1,MRPL20.AS1 ...
Columns names: AAACAGCCAAGTGAAC.1,AAACAGCCACTGACCG.1,AAACAGCCATGATTGT.1 ...
###################################
   QC stats performed:FALSE, PCA performed:FALSE
   Clustering performed:FALSE, Number of clusters:0
   tSNE performed:FALSE, UMAP performed:FALSE, DiffMap performed:FALSE
   Main data dimensions (rows,columns): 0,0
   Normalization factors:,...
   Imputed data dimensions (rows,columns):0,0
############## scVDJ-seq ###########
VDJ data dimentions (rows,columns):0,0
############## CITE-seq ############
   ADT raw data  dimensions (rows,columns):0,0
   ADT main data  dimensions (rows,columns):0,0
   ADT columns names:...
   ADT row names:...
############## scATAC-seq ############
   ATAC raw data  dimensions (rows,columns):21923,6326
   ATAC main data  dimensions (rows,columns):21923,6326
   ATAC columns names:AAACAGCCAAGTGAAC.1...
   ATAC row names:chr1.181218.181695...
############## Spatial ###########
Spatial data dimentions (rows,columns):0,0
########### iCellR object ##########

From here do the regular scRNA-seq as expleind above. See example below

# QC
my.obj <- qc.stats(my.obj,
   s.phase.genes = s.phase, 
   g2m.phase.genes = g2m.phase)

# plot as mentioned above

# filter 
my.obj <- cell.filter(my.obj,
   min.mito = 0,
   max.mito = 0.07 ,
   min.genes = 500,
   max.genes = 4000,
   min.umis = 0,
   max.umis = Inf)

# normalize RNA
my.obj <- norm.data(my.obj, norm.method = "ranked.glsf", top.rank = 500) 

# normalize ADT
my.obj <- norm.adt(my.obj)

# gene stats
my.obj <- gene.stats(my.obj, which.data = "main.data")

# find genes for PCA
my.obj <- make.gene.model(my.obj, my.out.put = "data",
   dispersion.limit = 1.5, 
   base.mean.rank = 500, 
   no.mito.model = T, 
   mark.mito = T, 
   interactive = F,
   no.cell.cycle = T,
   out.name = "gene.model")

# run PCA and the rest is as above

my.obj <- run.pca(my.obj, method = "gene.model", gene.list = my.obj@gene.model,data.type = "main")

# tSNE
my.obj <- run.pc.tsne(my.obj, dims = 1:10)

# UMAP
my.obj <- run.umap(my.obj, dims = 1:10)

# KNetL
my.obj <- run.knetl(my.obj, dims = 1:20, zoom = 200, dim.redux = "umap") 

# clustering based on KNetL

my.obj <- iclust(my.obj, sensitivity = 200, data.type = "knetl")

# clustering based on PCA

# my.obj <- iclust(my.obj, sensitivity = 100, data.type = "pca", dims=1:10) 

# check clusters and adjust if needed (optinal)
# cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T)
# my.obj <- change.clust(my.obj, change.clust = 3, to.clust = 4)
# my.obj <- change.clust(my.obj, change.clust = 3, to.clust = 10)

# order clusters
my.obj <- clust.ord(my.obj,top.rank = 500, how.to.order = "distance")


# plot
A= cluster.plot(my.obj,plot.type = "pca",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T)
B= cluster.plot(my.obj,plot.type = "umap",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T)
C= cluster.plot(my.obj,plot.type = "tsne",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T)
D= cluster.plot(my.obj,plot.type = "knetl",interactive = F,cell.size = 0.5,cell.transparency = 1, anno.clust=T)

library(gridExtra)
png('AllClusts.png', width = 12, height = 10, units = 'in', res = 300)
grid.arrange(A,B,C,D)
dev.off()

# save object
save(my.obj, file = "my.obj.Robj")

# find markers 
marker.genes <- findMarkers(my.obj,
data.type = "main",
fold.change = 2,
padjval = 0.1,
uniq = F,
positive = T)

marker.genes1 <- cbind(row = rownames(marker.genes), marker.genes)
write.table((marker.genes1),file="marker.genes.tsv", sep="\t", row.names =F)

MyGenes <- top.markers(marker.genes, topde = 10, min.base.mean = 0.2, filt.ambig = F)
MyGenes <- unique(MyGenes)

png('heatmap_gg_genes.png', width = 10, height = 10, units = 'in', res = 300)
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters",cell.sort = F, conds.to.plot = NULL)
dev.off()

Work on scATAC data (normalize and find marker peaks for each cluster)

# normalize ACAT
my.obj <- norm.data(my.obj, norm.method = "ranked.glsf", top.rank = 500, ATAC.data = TRUE, ATAC.filter = TRUE) 

marker.peaks <- findMarkers(my.obj,
 data.type = "atac",
 fold.change = 2,
 padjval = 0.1,
 uniq = F,
 positive = T)
 
marker.peaks1 <- cbind(row = rownames(marker.peaks), marker.peaks)
write.table((marker.peaks1),file="marker.peaks.tsv", sep="\t", row.names =F)

head(marker.peaks1)
#                                             row   baseMean     baseSD
#chr17.64986035.64986113   chr17.64986035.64986113 0.01217359 0.18257818
#chr1.26542287.26542678     chr1.26542287.26542678 0.05828764 0.80077656
#chr4.8199063.8199275         chr4.8199063.8199275 0.04280424 0.56205649
#chr20.50274929.50275237   chr20.50274929.50275237 0.04684509 0.63361490
#chr2.218382038.218382236 chr2.218382038.218382236 0.03122394 0.31153105
#chr11.1760469.1760814       chr11.1760469.1760814 0.07050175 0.63322284
#                         AvExpInCluster AvExpInOtherClusters foldChange
#chr17.64986035.64986113      0.07868394          0.002346603  33.530999
#chr1.26542287.26542678       0.33849093          0.016887273  20.044144
#chr4.8199063.8199275         0.24803497          0.012481148  19.872769
#chr20.50274929.50275237      0.27007513          0.013862584  19.482308
#chr2.218382038.218382236     0.17736269          0.009631770  18.414340
#chr11.1760469.1760814        0.39043782          0.023230813  16.806894
#                         log2FoldChange         pval         padj clusters
#chr17.64986035.64986113        5.067424 1.187697e-05 2.875415e-02        1
#chr1.26542287.26542678         4.325109 3.653916e-05 8.539202e-02        1
#chr4.8199063.8199275           4.312721 6.059691e-06 1.489472e-02        1
#chr20.50274929.50275237        4.284093 2.871301e-05 6.779143e-02        1
#chr2.218382038.218382236       4.202758 6.359572e-10 1.677019e-06        1
#chr11.1760469.1760814          4.070981 6.813447e-11 1.800794e-07        1
#                                             gene    cluster_1   cluster_2
#chr17.64986035.64986113   chr17.64986035.64986113 0.0786839378 0.000000000
#chr1.26542287.26542678     chr1.26542287.26542678 0.3384909326 0.008895062
#chr4.8199063.8199275         chr4.8199063.8199275 0.2480349741 0.038672840
#chr20.50274929.50275237   chr20.50274929.50275237 0.2700751295 0.028703704
#chr2.218382038.218382236 chr2.218382038.218382236 0.1773626943 0.000000000
#chr11.1760469.1760814       chr11.1760469.1760814 0.3904378238 0.004537037
#                           cluster_3    cluster_4    cluster_5    cluster_6
#chr17.64986035.64986113  0.000000000 0.0000000000 0.0006934750 0.0010038760
#chr1.26542287.26542678   0.031485714 0.0029244992 0.0052261002 0.0041264535
#chr4.8199063.8199275     0.000000000 0.0007226502 0.0027450683 0.0113212209
#chr20.50274929.50275237  0.027092857 0.0121741140 0.0041820941 0.0051516473
#chr2.218382038.218382236 0.004292857 0.0042095532 0.0006722307 0.0038561047
#chr11.1760469.1760814    0.071678571 0.0177288136 0.0141820941 0.0061099806
#                           cluster_7   cluster_8
#chr17.64986035.64986113  0.003099029 0.009080357
#chr1.26542287.26542678   0.047110680 0.044484375
#chr4.8199063.8199275     0.014819417 0.028651786
#chr20.50274929.50275237  0.025499029 0.028765625
#chr2.218382038.218382236 0.011988350 0.035508929
#chr11.1760469.1760814    0.042093204 0.050708705

MyGenes <- top.markers(marker.peaks, topde = 10, min.base.mean = 0.2, filt.ambig = F)
MyGenes <- unique(MyGenes)

png('heatmap_gg_peaks.png', width = 10, height = 10, units = 'in', res = 300)
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters",cell.sort = F, conds.to.plot = NULL, data.type = "atac")
dev.off()

my.obj <- run.impute(my.obj,data.type = "knetl", nn = 10, ATAC.data = FALSE)


png('heatmap_gg_peaks.png', width = 10, height = 10, units = 'in', res = 300)
heatmap.gg.plot(my.obj, gene = MyGenes, interactive = F, cluster.by = "clusters",cell.sort = F, conds.to.plot = NULL, data.type = "atac.imputed")
dev.off()

## you can also find avarage peak intensity per cluster

my.obj <- clust.avg.exp(my.obj, data.type = "atac")
head(my.obj@clust.avg)

#gene  cluster_1  cluster_2	...
#chr1.100037799.100038931 0.38238731 0.36750000	...
#chr1.100132733.100133298 0.11195725 1.13593827	...
#chr1.100249637.100250160 0.09851425 0.09511728	...
#chr1.100265992.100266479 0.06768394 0.17707407	...
#chr1.10032488.10033387 0.35273705 0.14885802	...
#chr1.100352150.100352921 0.12006088 0.00000000	...

# find out which cluster has the highest number 

dat <- as.data.frame(t((my.obj@clust.avg)[,-1]))
dat <- hto.anno(hto.data = dat)

head(dat$assignment.annotatio)
#[1] cluster_1 cluster_2 cluster_4 cluster_2 cluster_3 cluster_4
#8 Levels: cluster_1 cluster_2 cluster_3 cluster_4 cluster_5 ... cluster_8

Peak analysis

# make a bed file per cluster from the marker.peaks file you made up here
make.bed(marker.peaks)

# load packages 
library(ChIPseeker)
library(clusterProfiler)

# load genome
require(TxDb.Hsapiens.UCSC.hg38.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
Anno="org.Hs.eg.db"

# load bed files
Mylist1 = list.files(pattern=".bed")
Mylist1
 
Mylist <- as.list(Mylist1)
NAMES <- gsub('_peaks.bed','',Mylist1)
names(Mylist) <- NAMES
files <- Mylist
files

# perform analysis (example)
promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
tagMatrixList <- lapply(files, getTagMatrix, windows=promoter)

pdf("Plot_ProfileLineAll.pdf")
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000))
dev.off()

pdf('Plot_ProfileLine.pdf', width = 8, height = 10)
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000), facet="row")
dev.off()

pdf("Plot_heatmaps.pdf", width = 50, height = 6)
tagHeatmap(tagMatrixList, xlim=c(-3000, 3000), color=NULL)
dev.off()
 
# annotate
peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb,
                       tssRegion=c(-3000, 3000), verbose=FALSE)
# plot annotatin
pdf("Plot_AnnoBar.pdf")
plotAnnoBar(peakAnnoList)
dev.off()

############### peak annotation

peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb,
                       tssRegion=c(-3000, 3000), verbose=FALSE, annoDb=Anno)

capture.output(peakAnnoList, file = "peakAnnoList.txt")

genes = lapply(peakAnnoList, function(i) as.data.frame(i))

lapply(1:length(genes), function(i) write.table(genes[[i]],
                                      file = paste0(names(genes[i]), ".xls"),
                                      row.names = FALSE, sep="\t"))

Merging scATAC files with different intervals (as dipicted in bedtools website)

# Let's say you have 2 files that you need to merege

# example file
C <- load10x("count-JJsn_C_cDNA/",gene.name = 2)
M <- load10x("count-JJsn_M_cDNA/",gene.name = 2)

ATAC.C <- grep("^chr",row.names(C),value=T)
ATAC.M <- grep("^chr",row.names(M),value=T)

MyATAC.C <- subset(C, row.names(C) %in% ATAC.C)
MyATAC.M <- subset(M, row.names(M) %in% ATAC.M)

head(MyATAC.C)[1:3]

C <- MyATAC.C
M <- MyATAC.M

dim(C)
#[1] 58678  4211
dim(M)
#[1] 57776  4736

f1 <- row.names(C)
f2 <- row.names(M)

all.peaks <- c(f1,f2)
length(all.peaks)
#[1] 116454

# make a bed file
chr <- as.character(as.matrix(data.frame(do.call('rbind', strsplit(as.character(all.peaks),'.',fixed=TRUE)))[1]))
start <- data.frame(do.call('rbind', strsplit(as.character(all.peaks),'.',fixed=TRUE)))[2]
end <- data.frame(do.call('rbind', strsplit(as.character(all.peaks),'.',fixed=TRUE)))[3]

DAT <- as.data.frame(chr)
DAT$start <- as.numeric(as.matrix(start))
DAT$end <- as.numeric(as.matrix(end))
head(DAT)
#   chr  start    end
#1 chr1 181218 181695
#2 chr1 191296 191699
#3 chr1 629770 630129
#4 chr1 633806 634251
#5 chr1 778422 779040
#6 chr1 827306 827702

# make Genomic Ranges
library("GenomicRanges")

all.gr <- GRanges(seqnames=DAT$chr,ranges=IRanges(start=DAT$start,end=DAT$end))

all.gr
#GRanges object with ?? ranges and 0 metadata columns:
#       seqnames          ranges strand
#          <Rle>       <IRanges>  <Rle>
#   [1]     chr1   181218-181695      *
#   [2]     chr1   191296-191699      *
#   [3]     chr1   629770-630129      *
#   [4]     chr1   633806-634251      *
#   [5]     chr1   778422-779040      *
#   ...      ...             ...    ...
#  [52]     chr1 1303892-1306216      *
#  [53]     chr1 1307242-1309359      *
#  [54]     chr1 1324425-1325236      *
#  [55]     chr1 1348940-1349958      *
#  [56]     chr1 1372031-1372220      *
#  -------
#  seqinfo: 1 sequence from an unspecified genome; no seqlengths

################## sort and merge the peaks

mrg <- reduce(all.gr)

#Before merge
length(all.gr)
#[1] 116454
#after merge
length(mrg)
#[1] 71426

########################## choose file and give name
MyFile <- f1
name="f1_new.bed"
########################## copy paste the code here to make a new bed file
########################## the new bed has the old and new intervals (new intervals to be replaced with old)

chr <- as.character(as.matrix(data.frame(do.call('rbind', strsplit(as.character(MyFile),'.',fixed=TRUE)))[1]))
start <- data.frame(do.call('rbind', strsplit(as.character(MyFile),'.',fixed=TRUE)))[2]
end <- data.frame(do.call('rbind', strsplit(as.character(MyFile),'.',fixed=TRUE)))[3]
# make a bed file
DAT <- as.data.frame(chr)
DAT$start <- as.numeric(as.matrix(start))
DAT$end <- as.numeric(as.matrix(end))
MyFile <- DAT

# make intrval file to replace to new regions
MyFile.gr <- GRanges(seqnames=MyFile$chr,ranges=IRanges(start=MyFile$start,end=MyFile$end))

OverLap <- findOverlaps(MyFile.gr, mrg)

OLD1 <- (OverLap@from)
NEW1 <- (OverLap@to)

OLD = MyFile.gr[OLD1]
NEW = mrg[NEW1]

chr <- as.character(OLD@seqnames)
DAT <- as.data.frame(chr)
DAT$start <- OLD@ranges@start
DAT$end <- (OLD@ranges@start + OLD@ranges@width) - 1

DAT$new.chr<- as.character(NEW@seqnames)
DAT$new.start <- NEW@ranges@start
DAT$new.end <- (NEW@ranges@start + NEW@ranges@width) - 1

head(DAT)
#   chr   start     end new.chr new.start new.end
#1 chr1 3247563 3248453    chr1   3247563 3248453
#2 chr1 3360706 3361554    chr1   3360706 3361554
#3 chr1 3552372 3553230    chr1   3552372 3553230
#4 chr1 3645171 3646034    chr1   3645093 3646034
#5 chr1 3670318 3671081    chr1   3670318 3671090
#6 chr1 3671326 3672230    chr1   3671314 3672230

dim(DAT)
#58678     6
length(MyFile.gr)
#58678

# diff

have = mrg[unique(NEW1)]
dontHave = mrg[-unique(NEW1)]

ADD <- dontHave
L <- length(as.character(ADD@seqnames))
chr <-rep("NA",L)
DAT1 <- as.data.frame(chr)
DAT1$start <- rep("NA",L)
DAT1$end <- rep("NA",L)
DAT1$new.chr<- as.character(ADD@seqnames)
DAT1$new.start <- ADD@ranges@start
DAT1$new.end <- (ADD@ranges@start + ADD@ranges@width) - 1

Final.DAT <- rbind(DAT,DAT1)

##### Write

write.table(Final.DAT,name,row.names=FALSE,sep="\t", quote = FALSE)

### reapeat this process for f2 (M) as well

# The first 3 columns are the original peaks and the last 3 are the ones that need to be replaced with original one. The NA peaks would also get the new peak ids but in the matrix the cells will have 0 for expressions. To do this use the iCellR function replace.peak.id.

MyATAC.C <- replace.peak.id(atac.data=MyATAC.C, bed.file = Final.DAT.C)
MyATAC.M <- replace.peak.id(atac.data=MyATAC.M, bed.file = Final.DAT.M)

# finally aggregate the samples and add to iCellR object

my.atac.data <- data.aggregation(samples = c("MyATAC1","MyATAC2","MyATAC3"),
	condition.names = c("WT","KO","Ctrl"))
	
# add ATAC-Seq data
my.obj@atac.raw <- my.atac.data
my.obj@atac.main <- my.atac.data

Clone this wiki locally