Skip to content

Commit cb60532

Browse files
authored
Dev (#18)
* 3DplottingFun * downSampling
1 parent c6b8b02 commit cb60532

10 files changed

Lines changed: 220 additions & 7 deletions

File tree

randPedPCA/DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: randPedPCA
22
Type: Package
33
Title: Fast PCA for large pedigrees
4-
Version: 0.9.4
4+
Version: 0.9.6
55
Authors@R: c(
66
person("Hanbin", "Lee", email = "hblee@umich.edu", role = "aut"),
77
person("Hannes", "Becher", email = "h.becher@ed.ac.uk", role = "cre"),
@@ -23,7 +23,8 @@ Depends:
2323
Suggests:
2424
knitr,
2525
rmarkdown,
26-
testthat (>= 3.0.0)
26+
testthat (>= 3.0.0),
27+
rgl
2728
RoxygenNote: 7.3.2
2829
Config/testthat/edition: 3
2930
VignetteBuilder: knitr

randPedPCA/NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ S3method(plot,rppca)
44
S3method(rppca,pedigree)
55
S3method(rppca,spam)
66
S3method(summary,rppca)
7+
export(dspc)
78
export(hutchpp)
89
export(importLinv)
10+
export(plot3D)
911
export(randRangeFinder)
1012
export(randSVD)
1113
export(rppca)

randPedPCA/R/downsample.R

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
2+
3+
4+
#' Add downsampling index to rppca object
5+
#'
6+
#' This index is used by plot.rppca to downsample the col (colour) values. It
7+
#' is stored in the rppca object's ds slot.
8+
#'
9+
#' @param pc an object of class rppca
10+
#' @param to The down-sampling parameter. A numeric > 0 or a vector or NA. Interpreted
11+
#' as a proportion or integer or a index vector, see details.
12+
#'
13+
#' @details
14+
#' The parameter `to` is used to specify and possibly which individuals are sampled.
15+
#' If NA, all individuals are retained. If `to` is of length one and is between 0 and 1,
16+
#' then it is interpreted as a proportion. If it is greater than 1, it is taken to be
17+
#' the number of individuals to be sampled (possibly rounded by sample.int). If
18+
#' `to` is a logical or an integer vector, it is used for logical or integer indexing, respectively.
19+
#' The integer indices of the sample individuals are written to the `ds` slot.
20+
#' If `ds` exists, it is overwritten with a warning.
21+
#'
22+
#' @return An (invisible) object of class `rppca` with a slot `ds` added.
23+
#'
24+
#' @export
25+
dspc <- function(pc, to=10000){
26+
stopifnot(inherits(pc, "rppca"))
27+
28+
nr <- nrow(pc$x)
29+
30+
31+
if(length(to)==1){ # If 'to' is a scalar
32+
if(is.na(to)){ # "sample" all
33+
pc$ds <- 1:nr
34+
return(invisible(pc))
35+
}
36+
37+
stopifnot(to > 0)
38+
if(to <1) n <- ceiling(nr * to) else n <- to
39+
if( n < nr){
40+
n <- min(c(n, nr))
41+
message(paste0("Downsampling to ", n, " individuals."))
42+
ind <- sample.int(nr, n)
43+
} else {
44+
ind <- 1:nr
45+
}
46+
if(!is.null(pc$ds)) warning("The existing downsampling slot was overwritten.")
47+
pc$ds <- ind
48+
return(invisible(pc))
49+
} else { # If 'to' is a vector
50+
pc$ds <- (1:nr)[to]
51+
message(paste0("Downsampling to ", length(pc$ds), " individuals."))
52+
return(invisible(pc))
53+
}
54+
55+
stop("Should never reach this point.")
56+
}

randPedPCA/R/plot.R

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,34 @@
33

44
#' @method plot rppca
55
#' @export
6-
plot.rppca <- function(x, dims=c(1,2), ...){
7-
ss <- summary(x)$importance
8-
if(dim(ss)[1] == 3){
6+
plot.rppca <- function(x, dims=c(1,2), to=10000, col=NULL, ...){
7+
ss <- summary(x)$importance # get variance components
8+
9+
if(is.null(x$ds)) x <- dspc(x, to) # add index for downsampling (if not present)
10+
11+
if(!is.null(col)) {
12+
if(length(col)>1){
13+
# warn if col has different length to number of individuals
14+
if(length(col) != nrow(x$x)) warning(paste0("The length of col was ", length(col), " but there were ", nrow(x$x), " individuals."))
15+
16+
message("Downsampling colours.")
17+
cols <- col[x$ds] # downsample colours if any
18+
} else {
19+
cols <- col
20+
}
21+
}
22+
if(dim(ss)[1] == 3){ # add var proportions if any
923
plot(
10-
x$x[,dims],
24+
x$x[x$ds,dims],
1125
xlab=paste0("PC", dims[1], " (", signif(100*ss[2,dims[1]], 3), "%)"),
1226
ylab=paste0("PC", dims[2], " (", signif(100*ss[2,dims[2]], 3), "%)"),
27+
col = cols,
1328
...
1429
)
1530
} else {
1631
plot(
17-
x$x[,dims],
32+
x$x[x$ds,dims],
33+
col = cols,
1834
...
1935
)
2036
}

randPedPCA/R/plot3D.R

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
2+
3+
4+
5+
#' 3D plot using rgl
6+
#'
7+
#' A simple wrapper around rgl's pot3d function.
8+
#'
9+
#' @param x, an rppca object
10+
#'
11+
#' @param dims, vector of length 3 - indices of the PCs to plot
12+
#' @param ... additional arguments passed to rgl::plot3d
13+
#'
14+
#' @details
15+
#' Note, different to `plot.rppca`, which is relatively slow, `plot3D` does
16+
#' not down-sample the principal components and it ignores the `ds` slot of an
17+
#' `rppca` object if present.
18+
#'
19+
#'
20+
#' @export
21+
plot3D <- function(x, dims=c(1,2,3), ...) {
22+
if (!requireNamespace("rgl", quietly = TRUE)) {
23+
stop(
24+
"Package \"rgl\" must be installed to use this function.",
25+
call. = FALSE
26+
)
27+
} else {
28+
ss <- summary(x)$importance
29+
if(dim(ss)[1] == 3){
30+
rgl::plot3d(
31+
x$x[,dims],
32+
xlab=paste0("PC", dims[1], " (", signif(100*ss[2,dims[1]], 3), "%)"),
33+
ylab=paste0("PC", dims[2], " (", signif(100*ss[2,dims[2]], 3), "%)"),
34+
zlab=paste0("PC", dims[3], " (", signif(100*ss[2,dims[3]], 3), "%)"),
35+
...
36+
)
37+
} else {
38+
plot3d(
39+
x$x[,dims],
40+
...
41+
)
42+
}
43+
}
44+
}

randPedPCA/man/dspc.Rd

Lines changed: 30 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

randPedPCA/man/plot3D.Rd

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
4.33 KB
Binary file not shown.

randPedPCA/tests/testthat/test_rppca.R

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,29 @@ test_that("Comparing Hutch++ estimate to inbreeding-based vals (with centring)",
150150

151151

152152

153+
# Subsampling -------------------------------------------------------------
153154

155+
test_that("Sub-sampling", {
156+
pc <- rppca(pedLInv)
154157

158+
expect_error(dspc(1)) # error, input must inherit 'rppca'
155159

160+
# default val of 'to' is 10k, greateer then number of individuals, no downsampling and no message
161+
expect_no_condition(pcd <- dspc(pc))
156162

163+
expect_warning(pcd <- dspc(pcd)) # warning about overwriting existing index
164+
expect_message(dspc(pc, c(T, F))) # message about to which number of individuals we downsampled
165+
expect_message(dspc(pc, c(1,3))) # same here
157166

167+
# though 100000 is greater than the number of individuals in this PCA
168+
# there is no error/warning. This is handled by R. To high index results in NA.
169+
expect_message(dspc(pc, c(1,3, 100000)))
170+
})
171+
172+
173+
# Plotting ----------------------------------------------------------------
174+
test_that("Plotting",{
175+
pc <- rppca(pedLInv, center=T, totVar=2694.038)
176+
# length of 'col' is different from number of individuals in the PCA
177+
expect_warning(plot(pc, col=c("yellow", "green"), to = 0.5))
178+
})

randPedPCA/vignettes/pedigree-pca.Rmd

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,26 @@ plot(pc06, col=pedMeta$population, main="My pedigree PCA")
205205
plot(pc06, dims=c(1,3), col=pedMeta$population, main="My pedigree PCA,\ncustom PCs shown")
206206
```
207207

208+
## Down-sampling
209+
Because `plot.rppca` relies on R's relatively slow base plotting function, it
210+
is useful to down-sample the individuals to be plotted. Thus, `plot.rppca` will
211+
automatically down-sample to 10,000 individuals if there are more in the dataset.
212+
Technically there is no down-sampling and all the data are retained. Instead, an
213+
integer vector is added in the `ds` slot of the `rppca` object.
214+
That vector is automatically used by `plot.rppca` for individuals and any
215+
colours specified by the `col` parameter. The value of `col` should thus have
216+
the same length as there were individuals in the full dataset.
217+
To adjust the down-sampling, one can use the parameter `to` of `plot.rppca`.
218+
The value of `to` is passed to the function `dspc`, which carries out the down-sampling/vector
219+
generation. See `?dspc` for details. To plot wihthout any down-sampling, do `plot(myPc, to=NA)`.
220+
221+
One potential issue is that every time `plot.rppca` is run, a new set of
222+
individuals is sampled, so that repeated plots of the same dataset will look different.
223+
To retain a specific down-sampling, one can run `dspc` outside of `plot.rppca`,
224+
like `pcDown <- dspc(pc)`. Calling `plot(pcDown)` repeatedly will generate the same plot every time.
225+
226+
227+
208228

209229
# Using one's own data
210230
Users will mainly want to analyse their own empirical or simulated datasets. This

0 commit comments

Comments
 (0)