Skip to content

Commit 9247c92

Browse files
authored
Merge pull request #21 from gustaveroussy/winsfix
Winsfix
2 parents abb78d9 + 71d4de2 commit 9247c92

File tree

4 files changed

+56
-24
lines changed

4 files changed

+56
-24
lines changed

DESCRIPTION

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ Encoding: UTF-8
22
Package: EaCoN
33
Type: Package
44
Title: EaCoN : Easy Copy Number !
5-
Version: 0.3.4-1
6-
Date: 2018-12-10
5+
Version: 0.3.5
6+
Date: 2020-08-17
77
Author: Bastien Job
88
Authors@R: person("Bastien", "Job", email = "[email protected]", role = c("aut", "cre"))
99
Depends: R(>= 3.1.0)

NEWS

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
EaCoN
22
-----
33

4+
v0.3.5 (20200817)*CloudyMonday*
5+
-----------------
6+
* CORR : Segment.*() : Added a patch to handle the NA behavior in copynumber::winsorize (error raised by new handling of NA values in runmed). The patch consists on applying winsorization on non-NA values only (whereas all values were transmitted in earlier versions).
7+
* CORR : WES.Bin() : Better handling of a possible desynch in chr names (when a canonical chr had no remaining values, its level was kept. This raised a rare error).
8+
* MOD : Many funcs : Fixed calls to the "%do%" and "%dopar" operators without loading it.
9+
410
v0.3.4-1 (20181210) *PostRoscovite*
511
-----------------
612
* CORR : SNP6.Process(), CSHD.Process() : Edited code to handle changes in the rcnorm package, to discard the "chromosomes" package dependency.
@@ -83,7 +89,7 @@ v0.3.0 (20180724) *PapoQueen*
8389
* All : Removed "EaCoN." prefix from most functions (less self-centric...)
8490
* All : Took care of vectors and columns that could be converted to factor or integer (to free some RAM up).
8591
* All : Added missing support for manual PELT penalty (only asymptotic mode was considered when SER.value was numeric).
86-
* SNP6 : Revamped BAF homozygous calling and rescaling.
92+
* SNP6 : Revamped BAF homozygous calling and rescaling.
8793
* Defined the novel sets of default parameters for all supported technologies.
8894
* Redacted the README.md
8995

R/EaCoN_functions.R

+43-21
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
2020
# source("~/git_gustaveroussy/EaCoN/R/mini_functions.R")
2121
# source("~/git_gustaveroussy/EaCoN/R/plot_functions.R")
2222

23+
`%do%` <- foreach::"%do%"
2324

2425
calling.method <- tolower(calling.method)
2526

@@ -93,11 +94,13 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
9394

9495
## Winsorization
9596
if(!is.null(smooth.k)) {
96-
tmsg("Smoothing L2R outliers ...")
97+
tmsg("Smoothing L2R outliers ...")
9798
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
99+
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
100+
cndf <- cndf[l2r.nona,]
98101
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
99-
data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE]
100-
rm(list = c("cndf", "cndf.wins"))
102+
data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE]
103+
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
101104
}
102105

103106
## BAF filtering
@@ -130,7 +133,8 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
130133

131134
## Computing gaps
132135
if (!is.null(mingap)) {
133-
data$data$chr <- foreach(k = data$data$ch, .combine = "c") %do% {
136+
# `%do%` <- foreach::"%do%"
137+
data$data$chr <- foreach::foreach(k = data$data$ch, .combine = "c") %do% {
134138
gapz <- which(diff(data$data$SNPpos$pos[k]) >= mingap)
135139
return(unname(split(k, findInterval(k, k[gapz+1]))))
136140
}
@@ -202,11 +206,13 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
202206
## Winsorization (for aesthetics)
203207
tmsg("Smoothing L2R (for plots)...")
204208
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
209+
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
210+
cndf <- cndf[l2r.nona,]
205211
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
206-
data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE]
212+
data$data$Tumor_LogR_wins <- data$data$Tumor_LogR
213+
data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE]
207214
colnames(data$data$Tumor_LogR_wins) <- samplename
208-
rm(list = c("cndf", "cndf.wins"))
209-
215+
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
210216

211217
## PELT rescue
212218
if (!is.null(SER.pen)) {
@@ -239,7 +245,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
239245
tmsg(paste0(" Found ", length(rescued), "."))
240246
if (length(rescued) > seg.maxn) tmsg("WARNING : Many small events found, profile may be noisy ! Consider using 'smooth.k', or for WES data, strengthen low depth filtering !")
241247
data$meta$eacon[["PELT-nseg"]] <- length(rescued)
242-
`%do%` <- foreach::"%do%"
248+
# `%do%` <- foreach::"%do%"
243249
foreach::foreach(re = rescued, .combine = "c") %do% {
244250
interv <- mydf$idx.ori[seg.start[re]]:mydf$idx.ori[seg.end[re]]
245251
data$data$Tumor_LogR_segmented[interv] <- median(data$data$Tumor_LogR[interv, 1], na.rm = TRUE)
@@ -354,6 +360,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
354360
Start = as.integer(data$data$SNPpos$pos),
355361
End = as.integer(data$data$SNPpos$pos),
356362
Value = data$data$Tumor_LogR_wins[,1],
363+
# Value = data$data$Tumor_LogR[,1],
357364
stringsAsFactors = FALSE)
358365
baf.value <- data.frame(Chr = l2r.chr,
359366
Start = as.integer(data$data$SNPpos$pos),
@@ -475,12 +482,14 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC
475482
))
476483

477484
## Winsorization
478-
if(!is.null(smooth.k)) {
485+
if(!is.null(smooth.k)) {
479486
tmsg("Smoothing L2R outliers ...")
480487
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
488+
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
489+
cndf <- cndf[l2r.nona,]
481490
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
482-
data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE]
483-
rm(list = c("cndf", "cndf.wins"))
491+
data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE]
492+
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
484493
}
485494

486495
## BAF filtering
@@ -624,13 +633,16 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC
624633
tmsg("No recentering.")
625634
} else stop(tmsg("Invalid recentering method called !"), call. = FALSE)
626635

627-
## Winsorization
636+
## Winsorization (for aesthetics)
628637
tmsg("Smoothing L2R (for plots)...")
629638
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
639+
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
640+
cndf <- cndf[l2r.nona,]
630641
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
631-
data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE]
642+
data$data$Tumor_LogR_wins <- data$data$Tumor_LogR
643+
data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE]
632644
colnames(data$data$Tumor_LogR_wins) <- samplename
633-
rm(list = c("cndf", "cndf.wins"))
645+
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
634646

635647

636648
## PELT rescue
@@ -782,6 +794,7 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC
782794
Start = data$data$SNPpos$pos,
783795
End = data$data$SNPpos$pos,
784796
Value = data$data$Tumor_LogR_wins[,1],
797+
# Value = data$data$Tumor_LogR[,1],
785798
stringsAsFactors = FALSE)
786799
# baf.chr <- if(length(grep(pattern = "chr", x = names(cs$chrom2chr), ignore.case = TRUE)) > 0) unlist(cs$chrom2chr[paste0("chr", as.character(data$data$SNPpos$chrs))]) else unlist(cs$chrom2chr[as.character(data$data$SNPpos$chrs)])
787800
baf.value <- data.frame(Chr = l2r.chr,
@@ -844,6 +857,8 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
844857

845858
calling.method <- tolower(calling.method)
846859

860+
`%do%` <- foreach::"%do%"
861+
847862
if (!is.list(data)) stop(tmsg("data should be a list !"), call. = FALSE)
848863
if (!dir.exists(out.dir)) stop(tmsg(paste0("Output directory [", out.dir, "] does not exist !")), call. = FALSE)
849864
if (!(calling.method %in% c("mad", "density"))) stop(tmsg("calling.method should be 'MAD' or 'density' !"), call. = FALSE)
@@ -900,9 +915,11 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
900915
if(!is.null(smooth.k)) {
901916
tmsg("Smoothing L2R outliers ...")
902917
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
918+
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
919+
cndf <- cndf[l2r.nona,]
903920
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
904-
data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE]
905-
rm(list = c("cndf", "cndf.wins"))
921+
data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE]
922+
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
906923
}
907924

908925
## BAF filtering
@@ -1059,14 +1076,16 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
10591076
tmsg("No recentering.")
10601077
} else stop(tmsg("Invalid recentering method called !"), call. = FALSE)
10611078

1062-
## Winsorization
1079+
## Winsorization (for aesthetics)
10631080
tmsg("Smoothing L2R (for plots)...")
10641081
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
1082+
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
1083+
cndf <- cndf[l2r.nona,]
10651084
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
1066-
data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE]
1085+
data$data$Tumor_LogR_wins <- data$data$Tumor_LogR
1086+
data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE]
10671087
colnames(data$data$Tumor_LogR_wins) <- samplename
1068-
rm(list = c("cndf", "cndf.wins"))
1069-
1088+
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
10701089

10711090
## PELT rescue
10721091
if (!is.null(SER.pen)) {
@@ -1099,7 +1118,6 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
10991118
tmsg(paste0(" Found ", length(rescued), "."))
11001119
if (length(rescued) > seg.maxn) tmsg("WARNING : Many small events found, profile may be noisy ! Consider using 'smooth.k', or for WES data, strengthen low depth filtering !")
11011120
data$meta$eacon[["PELT-nseg"]] <- length(rescued)
1102-
`%do%` <- foreach::"%do%"
11031121
foreach::foreach(re = rescued, .combine = "c") %do% {
11041122
interv <- mydf$idx.ori[seg.start[re]]:mydf$idx.ori[seg.end[re]]
11051123
data$data$Tumor_LogR_segmented[interv] <- median(data$data$Tumor_LogR[interv, 1], na.rm = TRUE)
@@ -1217,6 +1235,7 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
12171235
Start = data$data$SNPpos$pos,
12181236
End = data$data$SNPpos$pos,
12191237
Value = data$data$Tumor_LogR_wins[,1],
1238+
# Value = data$data$Tumor_LogR[,1],
12201239
stringsAsFactors = FALSE)
12211240
# baf.chr <- if(length(grep(pattern = "chr", x = names(cs$chrom2chr), ignore.case = TRUE)) > 0) unlist(cs$chrom2chr[paste0("chr", as.character(data$data$SNPpos$chrs))]) else unlist(cs$chrom2chr[as.character(data$data$SNPpos$chrs)])
12221241
baf.value <- data.frame(Chr = l2r.chr,
@@ -1334,6 +1353,7 @@ ASCN.ASCAT <- function(data = NULL, gammaRange = c(.35,.95), nsubthread = 1, clu
13341353
cls <- parallel::makeCluster(spec = nsubthread, type = cluster.type, outfile = "")
13351354
doParallel::registerDoParallel(cls)
13361355
gamma <- 0
1356+
`%dopar%` <- foreach::"%dopar%"
13371357
fit.val <- as.data.frame(foreach::foreach(gamma = gammavec, .combine = "rbind", .inorder = TRUE) %dopar% {
13381358
tmsg(paste0(" gamma = ", gamma))
13391359
odirg <- paste0(odir, "/gamma", sprintf("%.2f", gamma))
@@ -2015,6 +2035,8 @@ Annotate <- function(data = NULL, refGene.table = NULL, targets.table = NULL, re
20152035

20162036
oridir <- getwd()
20172037

2038+
`%do%` <- foreach::"%do%"
2039+
20182040
if (!is.list(data)) stop(tmsg("data should be a list !"), call. = FALSE)
20192041

20202042
valid.genomes <- get.valid.genomes()

R/wes_process.R

+4
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,10 @@ WES.Bin <- function(testBAM = NULL, refBAM = NULL, BINpack = NULL, samplename =
338338
meta.w$SNP.tot.count.ref.summary <- my.summary(SNP.all$tot_count.ref[!is.na(SNP.all$tot_count.ref)])
339339
gc()
340340

341+
## Cleaning uncovered chr levels
342+
CN.all$chr <- droplevels(CN.all$chr)
343+
SNP.all$chr <- droplevels(SNP.all$chr)
344+
341345
WESobj <- list(RD = CN.all, SNP = SNP.all, meta = list(basic = meta.b, WES = meta.w))
342346
rm(CN.all, SNP.all)
343347
gc()

0 commit comments

Comments
 (0)