Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions R/readAnnotate.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,9 @@ bed12ToExons<-function(ref){
as.integer(unlist(strsplit(as.character(ref$V11),"," ))))
rep.ref = ref[rep(1:nrow(ref),ref[,10]),] # replicate rows occurs as many as its exon number

exon.id = Map(f = function(x, y){ if(x == "+") 1:y else y:1 }, ref[, 6], ref[, 10])
exon.id = unlist(exon.id)

exon.id = unlist( mapply( function(x,y)
if(x=="+"){return(1:y)}
else{return(y:1)},
ref[,6],ref[,10]))
rep.ref$V5=exon.id

rep.ref$V3 = rep.ref$V2+b.start.size[,1]+b.start.size[,2] # calculate exon start and ends
Expand Down Expand Up @@ -69,10 +67,8 @@ bed12ToIntrons<-function(ref){
# replicate rows occurs as many as its exon number
rep.ref = ref[rep(1:nrow(ref),ref[,10]),]

exon.id = unlist( mapply( function(x,y)
if(x=="+"){return(1:y)}
else{return(y:1)} ,
ref[,6],ref[,10] ) )
exon.id = Map(f = function(x, y){ if(x == "+") 1:y else y:1 }, ref[, 6], ref[, 10])
exon.id = unlist(exon.id)
rep.ref$V5 = exon.id

# calculate exon start and ends
Expand Down
28 changes: 17 additions & 11 deletions R/readData.R
Original file line number Diff line number Diff line change
Expand Up @@ -474,45 +474,51 @@ setGeneric("readTranscriptFeatures",
setMethod("readTranscriptFeatures",
signature(location = "character"),
function(location,remove.unusual,up.flank ,down.flank ,unique.prom){
# readBed6

# readBed12
message('Reading the table...\r')
bed=readTableFast(location,header=FALSE,skip="auto")
bed = readTableFast(location, header = FALSE, skip = "auto")

if(ncol(bed) < 12){
stop("readTranscriptFeatures require 12 column bed file, only ",
ncol(bed), " columns found.", call. = FALSE)
}

if(remove.unusual)
bed=bed[grep("_", as.character(bed[,1]),invert=TRUE),]

# introns
message('Calculating intron coordinates...\r')
introns = convertBed2Introns(bed)
# exons
message('Calculating exon coordinates...\r')
exons = convertBed2Exons(bed)

# get the locations of TSSes
message('Calculating TSS coordinates...\r')
tss=bed
# + strand
tss[tss$V6=="+",3] = tss[tss$V6=="+",2]
# - strand
tss[tss$V6=="-",2]=tss[tss$V6=="-",3]

tssg = GRanges(seqnames=as.character(tss$V1),
ranges=IRanges(start=tss$V2, end=tss$V3),
strand=as.character(tss$V6),
score=rep(0,nrow(tss)),
name=tss$V4)

message('Calculating promoter coordinates...\r')
# get the locations of promoters
# + strand
bed[bed$V6=="+",3]=bed[bed$V6=="+",2]+down.flank
bed[bed$V6=="+",2]=bed[bed$V6=="+",2]-up.flank

# - strand
bed[bed$V6=="-",2]=bed[bed$V6=="-",3]-down.flank
bed[bed$V6=="-",3]=bed[bed$V6=="-",3]+up.flank


if(! unique.prom){
prom.df = (bed[,c(1,2,3,4,6)])
prom = GRanges(seqnames=as.character(prom.df$V1),
Expand All @@ -528,7 +534,7 @@ setMethod("readTranscriptFeatures",
score=rep(0,nrow(prom.df)),
name=rep(".",nrow(prom.df)) )
}

message('Outputting the final GRangesList...\r\n')
GRangesList(exons=exons,introns=introns,promoters=prom,TSSes=tssg)
})
Expand Down
2 changes: 2 additions & 0 deletions inst/unitTests/bed12.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ch1 10000 20000 f1 0 + 10000 20000 0 3 1000,1000,1000, 2000,4000,6000,
ch1 20000 30000 f2 0 - 20000 30000 0 3 1000,1000,1000, 2000,4000,6000,
2 changes: 2 additions & 0 deletions inst/unitTests/bed6.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ch1 10000 20000 f1 0 +
ch1 20000 30000 f2 0 -
53 changes: 53 additions & 0 deletions inst/unitTests/test_annotateWithGeneParts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
test_annotateWithGeneParts = function(){
# bed with two features:
# 10 000 - 20 000 (exons: 12 000 - 13 000, 14 000 - 15 000, 16 000 - 17 000)
bed12 = system.file("unitTests/bed12.bed", package = "genomation")
features = suppressMessages(readTranscriptFeatures(bed12))

# 4 features with respect to above bed12:
# intergenic
# promoter
# intronic
# exonic
target = GRanges(
rep('ch1', 4),
IRanges(c(8001, 10001, 13001, 14001), c(8500, 10500, 13500, 14500))
)
result = annotateWithGeneParts(target, features)

members_expected = matrix(
c(0, 0, 0,
1, 0, 0,
0, 0, 1,
0, 1, 0),
ncol = 3, nrow = 4, byrow = TRUE,
dimnames = list(NULL, c("prom", "exon", "intron"))
)
checkIdentical(members_expected, result@members)


# distance is calculated (for both positive strand):
# start/end - TSS - 1
# if > 0, then +2 (last base of codon)
# smaller of abs(dist_start), abs(dist_end)
dist_to_tss_expected = data.frame(
target.row = c(1L, 2L, 3L, 4L),
dist.to.feature = c(-1501, 0, 3002, 4002),
feature.name = rep("f1", 4),
feature.strand = factor("+", levels = c("+", "-", "*")),
row.names = make.unique(rep("1", 4))
)

checkIdentical(dist_to_tss_expected, [email protected])

df = as.data.frame(result)
df_expected = data.frame(
"prom" = c(0, 1, 0, 0),
"exon" = c(0, 0, 0, 1),
"intron" = c(0, 0, 1, 0),
"dist.to.feature" = c(-1501, 0, 3002, 4002),
"feature.name" = rep("f1", 4),
row.names = 1:4
)
checkIdentical(df_expected, df)
}
58 changes: 58 additions & 0 deletions inst/unitTests/test_readTranscriptFeatures.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
test_readTranscriptFeatures = function(){
bed6 = system.file("unitTests/bed6.bed", package = "genomation")
checkException(suppressMessages(readTranscriptFeatures(bed6)))

bed12 = system.file("unitTests/bed12.bed", package = "genomation")
features = suppressMessages(readTranscriptFeatures(bed12))

checkTrue(inherits(features, "GenomicRangesList"))
checkIdentical(c("exons", "introns", "promoters", "TSSes"), names(features))

exons = as.data.frame(features$exons)
exons_expected = data.frame(
seqnames = factor("ch1"),
start = c(12001L, 14001L, 16001L, 22001L, 24001L, 26001L),
end = c(13000L, 15000L, 17000L, 23000L, 25000L, 27000L),
width = 1000L,
strand = factor(c("+", "+", "+", "-", "-", "-"), levels = c("+", "-", "*")),
score = as.numeric(c(1:3, 3:1)),
name = c(rep("f1", 3), rep("f2", 3))
)
checkIdentical(exons_expected, exons)

introns = as.data.frame(features$introns)
introns_expected = data.frame(
seqnames = factor("ch1"),
start = c(13001L, 15001L, 23001L, 25001L),
end = c(14000L, 16000L, 24000L, 26000L),
width = 1000L,
strand = factor(c("+", "+", "-", "-"), levels = c("+", "-", "*")),
score = c(1, 2, 2, 1),
name = c("f1", "f1", "f2", "f2")
)
checkIdentical(introns_expected, introns)

promoters = as.data.frame(features$promoters)
promoters_expected = data.frame(
seqnames = factor("ch1"),
start = c(9000L, 29000L),
end = c(11000L, 31000L),
width = 2001L,
strand = factor(c("+", "-"), levels = c("+", "-", "*")),
score = 0,
name = "."
)
checkIdentical(promoters_expected, promoters)

TSSes = as.data.frame(features$TSSes)
TSSes_expected = data.frame(
seqnames = factor("ch1"),
start = c(10000L, 30000L),
end = c(10000L, 30000L),
width = 1L,
strand = factor(c("+", "-"), levels = c("+", "-", "*")),
score = 0,
name = c("f1", "f2")
)
checkIdentical(TSSes_expected, TSSes)
}