Skip to content

Commit 021f859

Browse files
authored
Merge pull request #8 from gustaveroussy/dev
Dev to masta : v0.3.3-1
2 parents 8961d31 + 82d7975 commit 021f859

12 files changed

+647
-56
lines changed

DESCRIPTION

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ Encoding: UTF-8
22
Package: EaCoN
33
Type: Package
44
Title: EaCoN : Easy Copy Number !
5-
Version: 0.3.3
6-
Date: 2018-09-11
5+
Version: 0.3.3-1
6+
Date: 2018-09-18
77
Author: Bastien Job
88
Maintainer: Bastien JOB <[email protected]>
99
Depends: R(>= 3.1.0)

NEWS

+14-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
11
EaCoN
22
-----
33

4+
v0.3.3-1 (20181002) *LittleWomanNoCry*
5+
-----------------
6+
* BUG : Segment.SEQUENZA() : added imputation of NA values in L2R object that made copynumber::aspcf() unable to work (happened with microarrays for flagged probes, not WES).
7+
* BUG : Segment.SEQUENZA() : BAF filtering wasn't working properly, resulting in wrong BAF segmentation, for all microarrays.
8+
* BUG : CS.Process.Batch() : wrong variable name in header check.
9+
* CORR : OS.Process() : corrected wrong handling of sex.chr output was forced as c("X", "Y") instead of variable, default c("chrX", "chrY").
10+
* CORR : Segment.FACETS() Segment.SEQUENZA() : Added missing meta 'BAF.filter' in the object.
11+
* CORR : README.md : fixed few links to dependencies, corrected default regex.
12+
* MOD : Segment.* : Changed the structure of the profile PNG filename to "[samplename].SEG.[segmenter].png" (to ease the use of regex for further steps in batch mode).
13+
* MOD : ASCN.ff.Batch() Annotate.ff.Batch() : corrected the default regex.
14+
415
v0.3.3 (20180911) *Trinity*
516
-----------------
6-
* NEW : SEQUENZA segmentation plainly implemented, for both L2R+BAF bivariate segmentation [Segment.SEQUENZA()] AND copy number estimation [ASCN.SEQUENZA()].
17+
* NEW : SEQUENZA segmentation plainly implemented, for both L2R+BAF bivariate segmentation Segment.SEQUENZA() AND copy number estimation ASCN.SEQUENZA().
718
* BUG : Segment.ff() : Corrected wrong do.call() call (parameters not given as a list).
819
* CORR : ASCN.ASCAT() : CN output file was badly formatted.
920
* MOD : ASCN.ff() : Suppressed the "segmenter" parameter, which is read from the RDS meta$eacon$segmenter value.
@@ -49,13 +60,13 @@ v0.3.0 (20180724) *PapoQueen*
4960
* All : Removed "EaCoN." prefix from most functions (less self-centric...)
5061
* All : Took care of vectors and columns that could be converted to factor or integer (to free some RAM up).
5162
* All : Added missing support for manual PELT penalty (only asymptotic mode was considered when SER.value was numeric).
52-
* SNP6 : Revamped BAF homozygous calling and rescaling.
63+
* SNP6 : Revamped BAF homozygous calling and rescaling.
5364
* Defined the novel sets of default parameters for all supported technologies.
5465
* Redacted the README.md
5566

5667
v0.2.13 (20180531) *SunIsBack*
5768
------------------
58-
* WES : Added more data to the BIN RDS output (counts with the reference genome nucleotide for both test and ref BAMs). This is in order to 1) filter out on a minimum alternative allele count 2) allow the use of other segmenters that do not rely on BAF but rather on AD (like PSCBS) or logOR (like FACETS).
69+
* WES : Added more data to the BIN RDS output (counts with the reference genome nucleotide for both test and ref BAMs). This is in order to 1) filter out on a minimum alternative allele count 2) allow the use of other segmenters that do not rely on BAF but rather on AD (like PSCBS) or logOR (like FACETS).
5970
* Modified the subthreading scheme for EaCoN.WES.Bin() : now each subthread has its own connection to the BAM files. This allows each thread to work fully (but increases simultaneous IO).
6071
* Now each SNP variant has its corresponding bin index, which will allow to perform density-based selection like in FACETS.
6172

R/EaCoN_functions.R

+593-32
Large diffs are not rendered by default.

R/apt_cytoscan_process.R

+8-6
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ CS.Process <- function(CEL = NULL, samplename = NULL, dual.norm = FALSE, normal.
2727
# require(foreach)
2828
# source("~/git_gustaveroussy/EaCoN/R/mini_functions.R")
2929
# source("~/git_gustaveroussy/EaCoN/R/renorm_functions.R")
30-
# source("~/git_gustaveroussy/EaCoN/R/germline_functions.R")
30+
3131

3232

3333
## Early checks
@@ -384,8 +384,9 @@ CS.Process.Batch <- function(CEL.list.file = NULL, nthread = 1, cluster.type = "
384384
if (!file.exists(CEL.list.file)) stop("Could not find CEL.list.file !")
385385
message("Reading and checking CEL.list.file ...")
386386
myCELs <- read.table(file = CEL.list.file, header = TRUE, sep="\t", check.names = FALSE, as.is = TRUE)
387-
head.ok <- c("cel_files", "SampleName")
388-
head.chk <- all(colnames(CEL.list.file) == head.ok)
387+
head.ok <- c("CEL", "SampleName")
388+
head.chk <- all(colnames(myCELs) == head.ok)
389+
389390
if (!head.chk) {
390391
message("Invalid header in CEL.list.file !")
391392
message(paste0("EXPECTED : ", head.ok))
@@ -398,9 +399,10 @@ CS.Process.Batch <- function(CEL.list.file = NULL, nthread = 1, cluster.type = "
398399
message(myCELs$SampleName[which(duplicated(myCELs$SampleName))])
399400
stop("Duplicated SampleNames.")
400401
}
401-
fecheck <- !vapply(myCELs$cel_files, file.exists, TRUE)
402+
403+
fecheck <- !vapply(myCELs$CEL, file.exists, TRUE)
402404
fecheck.pos <- which(fecheck)
403-
if (length(fecheck.pos) > 0) stop(paste0("\n", "CEL file could not be found : ", myCELs$cel_files[fecheck.pos], collapse = ""))
405+
if (length(fecheck.pos) > 0) stop(paste0("\n", "CEL file could not be found : ", myCELs$CEL[fecheck.pos], collapse = ""))
404406

405407
message(paste0("Found ", nrow(myCELs), " samples to process."))
406408

@@ -421,7 +423,7 @@ CS.Process.Batch <- function(CEL.list.file = NULL, nthread = 1, cluster.type = "
421423
p <- 0
422424
csres <- foreach::foreach(p = seq_len(nrow(myCELs)), .inorder = FALSE, .errorhandling = "pass") %dopar% {
423425
EaCoN.set.bitmapType(type = current.bitmapType)
424-
CS.Process(CEL = myCELs$cel_files[p], samplename = myCELs$SampleName[p], ...)
426+
CS.Process(CEL = myCELs$CEL[p], samplename = myCELs$SampleName[p], ...)
425427
}
426428

427429
## Stopping cluster

R/apt_oncoscan_process.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ OS.Process <- function(ATChannelCel = NULL, GCChannelCel = NULL, samplename = NU
263263
chrs = unique(ao.df$chr),
264264
samples = samplename,
265265
gender = as.vector(meta.b$predicted.gender),
266-
sexchromosomes = c("X", "Y"),
266+
sexchromosomes = sex.chr,
267267
failedarrays = NULL
268268
),
269269
meta = list(

README.md

+10-5
Original file line numberDiff line numberDiff line change
@@ -344,10 +344,10 @@ As for the **WES.Normalize.ff.Batch** function, the **Segment.ff.Batch** functio
344344
Here is a synthetic example that will segment our CytoScan HD samples (as defined by the _pattern_ below) using ASCAT :
345345

346346
```R
347-
Segment.ff.Batch(RDS.files = list.files(path = getwd(), pattern = "_CSHD.*_processed.RDS$", full.names = TRUE, recursive = TRUE), segmenter = "ASCAT", smooth.k = 5, SER.pen = 20, nrf = 1.0, nthread = 2)
347+
Segment.ff.Batch(RDS.files = list.files(path = getwd(), pattern = ".*_processed.RDS$", full.names = TRUE, recursive = TRUE), segmenter = "ASCAT", smooth.k = 5, SER.pen = 20, nrf = 1.0, nthread = 2)
348348
```
349349

350-
- To perform the same using the **FACETS** segmenter, just change the value of the _segmenter_ parameter !
350+
- To perform the same using the **FACETS** segmenter, just change the value of the _segmenter_ parameter, but **please remember that FACETS will only work with WES data !**
351351

352352
- I suppose you guessed how to do the same with **SEQUENZA**, right ? ;)
353353

@@ -356,17 +356,22 @@ Segment.ff.Batch(RDS.files = list.files(path = getwd(), pattern = "_CSHD.*_proce
356356
Still the same, with the **ASCN.ff.Batch** :
357357

358358
```R
359-
ASCN.ff.Batch(RDS.files = list.files(path = getwd(), pattern = "_CSHD.*_EaCoN.ASPCF.RDS$", full.names = TRUE, recursive = TRUE), nthread = 2)
359+
ASCN.ff.Batch(RDS.files = list.files(path = getwd(), pattern = "SEG\\.ASCAT\\.RDS$", full.names = TRUE, recursive = TRUE), nthread = 2)
360360
```
361361

362+
- To perform the same using results obtained using the **FACETS** or **SEQUENZA** segmenter, just edit the _pattern_ argument with the name of corresponding segmenter.
363+
364+
362365
#### **HTML reporting**
363366

364367
And here again with the **Annotate.ff.Batch** :
365368

366369
```R
367-
Annotate.ff.Batch(RDS.files = list.files(path = getwd(), pattern = "_CSHD.*_EaCoN.ASPCF.RDS$", full.names = TRUE, recursive = TRUE), author.name = "Me!")
370+
Annotate.ff.Batch(RDS.files = list.files(path = getwd(), pattern = "SEG\\.ASCAT\\.RDS$", full.names = TRUE, recursive = TRUE), author.name = "Me!")
368371
```
369372

373+
- To perform the same using results obtained using the **FACETS** or **SEQUENZA** segmenter, just edit the _pattern_ argument with the name of corresponding segmenter.
374+
370375
### **Piped**
371376

372377
EaCoN has been implemented in a way that one can also choose to launch the full workflow in a single command line for a single sample, using pipes from the [magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) package. However, this is not recommended as default use : even though EaCoN is provided with recommandations that should fit most case, the user may have to deal with particular profiles that would require parameter tweaking, which is not possible in piped mode...
@@ -393,7 +398,7 @@ OS.Process(ATChannelCel = "/home/me/my_project/CEL/SAMPLE1_OncoScan_CNV_A.CEL",
393398

394399
## **GUIDELINES**
395400

396-
### **Segmentation using ASCAT**
401+
### **Segmentation**
397402

398403
- For each step, default values for each data source already correspond to recommendations. However, for the common **segmentation** step using the ASCAT segmenter, adaptation to the data source is recommended, by changing few parameters :
399404

inst/extdata/html_report.Rmd

+13-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ always_allow_html: yes
2121

2222
```{r setup, echo = FALSE, include = FALSE}
2323
`%>%` <- magrittr::"%>%"
24-
show.flag <- if ((data$meta$basic$source == "microarray") & (data$meta$basic$manufacturer == "Affymetrix")) TRUE else FALSE
24+
# show.flag <- if ((data$meta$basic$source == "microarray") & (data$meta$basic$manufacturer == "Affymetrix")) TRUE else FALSE
2525
knitr::opts_knit$set(base.dir = tempdir())
2626
```
2727

@@ -36,6 +36,12 @@ DT::datatable(data = array.df, rownames = FALSE, caption = "", class = "cell-bor
3636
cat("<HR><HR><BR><BR>\n")
3737
```
3838

39+
```{r wes_info, results = "asis", echo = FALSE, eval = as.logical(!show.flag)}
40+
cat('# WES Data Information\n')
41+
## Insert table here !
42+
cat("<HR><HR><BR><BR>\n")
43+
```
44+
3945
<!-- *** -->
4046
<!-- *** -->
4147

@@ -47,6 +53,12 @@ cat(paste0("\n![](", intplotf, ")\n"))
4753
cat("<HR><HR><BR><BR>\n")
4854
```
4955

56+
```{r covplot, results = "asis", fig.height = 10, fig.width = 10, fig.align="center", echo = FALSE, eval = FALSE}
57+
cat("# Coverage Plot\n")
58+
cat(paste0("\n![](", covplotf, ")\n"))
59+
cat("<HR><HR><BR><BR>\n")
60+
```
61+
5062
<!-- *** -->
5163
<!-- *** -->
5264

man/ASCN.ff.Batch.Rd

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
\title{Allele-Specific Copy Number estimation, from RDS files in batch mode, with multithreading.}
44
\usage{
55
ASCN.ff.Batch(RDS.files = list.files(path = getwd(),
6-
pattern = ".EaCoN.ASPCF.RDS$", full.names = TRUE, recursive = TRUE,
6+
pattern = "SEG\\.ASCAT\\.RDS$", full.names = TRUE, recursive = TRUE,
77
ignore.case = TRUE, include.dirs = FALSE), nthread = 1,
88
cluster.type = "PSOCK", ...)
99
}

man/Annotate.ff.Batch.Rd

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
}
77
\usage{
88
Annotate.ff.Batch(RDS.files = list.files(path = getwd(),
9-
pattern = ".EaCoN.ASPCF.RDS$", full.names = TRUE, recursive = TRUE,
9+
pattern = "\\.SEG\\.ASCAT\\.RDS$", full.names = TRUE, recursive = TRUE,
1010
ignore.case = TRUE, include.dirs = FALSE), nthread = 1,
1111
cluster.type = "PSOCK", ...)
1212
}

man/CS.Process.Batch.Rd

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
\details{
1717
\code{CEL.list.file} is a tab-separated text file containing 2 columns (header and specified column names are mandatory) :
1818
\itemize{
19-
\item{cel_files : Name (and path) of the CEL file(s)}
19+
\item{CEL : Name (and path) of the CEL file(s)}
2020
\item{SampleName : The output sample name(s)}
2121
}
2222
}

man/Segment.FACETS.Rd

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
\alias{Segment.FACETS}
33
\title{L2R and BAF joint segmentation using FACETS.}
44
\usage{
5-
Segment.FACETS(data = NULL, smooth.k = NULL, BAF.filter = .9, homoCut = .05,
5+
Segment.FACETS(data = NULL, smooth.k = NULL, BAF.filter = .75, homoCut = .05,
66
FACETS.pen = 150, recenter = "l2r.centeredpeak", calling.method = "mad",
77
nrf = .5, SER.pen = 2, out.dir = getwd(), return.data = FALSE,
88
write.data = TRUE, plot = TRUE, force = FALSE)

man/Segment.SEQUENZA.Rd

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
\alias{Segment.SEQUENZA}
33
\title{L2R and BAF joint segmentation using SEQUENZA.}
44
\usage{
5-
Segment.SEQUENZA(data = NULL, smooth.k = NULL, BAF.filter = .9, homoCut = .05,
5+
Segment.SEQUENZA(data = NULL, smooth.k = NULL, BAF.filter = .75, homoCut = .05,
66
SEQUENZA.pen = 50, recenter = "l2r.centeredpeak", calling.method = "mad",
7-
nrf = .5, SER.pen = 2, out.dir = getwd(), return.data = FALSE,
7+
nrf = .5, SER.pen = 40, out.dir = getwd(), return.data = FALSE,
88
write.data = TRUE, plot = TRUE, force = FALSE)
99
}
1010
\arguments{

0 commit comments

Comments
 (0)