Skip to content

Commit d6f5a64

Browse files
authored
Enhance test coverage (#18)
* CHG: rename function by camel style; add a new test case for spatpca * CHG: add essential modified files * CHG: add new helper for validating input data; add test cases; modify the manual * CHG: add a new helper function - detrend; add more test cases * CHG: add helper functions for tuning parameter settings; add test cases for them * CHG: add an auxillary function for selecting K with spatpacCV; add test cases * CHG: add a helper function for setting gamma; add test cases * CHG: add a missing maunaul; modify the expected values for testing on higher R version * CHG: correct the expected values for testing spatpca * CHG: correct the expected values for testing spatpca * CHG: correct the expected values for testing spatpca * CHG: correct the expected values for testing spatpca * CHG: correct the expected values for testing spatpca * CHG: correct the expected values for testing spatpca * CHG: add test cases for spatpca and thin-plate splines
1 parent c253ea0 commit d6f5a64

19 files changed

+546
-141
lines changed

.Rbuildignore

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
.travis.yml
77
^\.github$
88
^codecov\.yml$
9+
^.*\.gcno$

R/SpatPCA.R

+63-86
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,39 @@
11
# This file was generated by Rcpp::compileAttributes
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3+
#'
4+
#' Internal function: M-fold CV of SpatPCA with selecting K
5+
#'
6+
#' @keywords internal
7+
#'
8+
#' @param x Location matrix
9+
#' @param Y Data matrix
10+
#' @param M The number of folds for cross validation; default is 5.
11+
#' @param tau1 Vector of a nonnegative smoothness parameter sequence. If NULL, 10 tau1 values in a range are used.
12+
#' @param tau2 Vector of a nonnegative sparseness parameter sequence. If NULL, none of tau2 is used.
13+
#' @param gamma Vector of a nonnegative hyper parameter sequence for tuning eigenvalues. If NULL, 10 values in a range are used.
14+
#' @param shuffle_split Vector of indeces for random splitting Y into training and test sets
15+
#' @param maxit Maximum number of iterations. Default value is 100.
16+
#' @param thr Threshold for convergence. Default value is \eqn{10^{-4}}.
17+
#' @param l2 Vector of a nonnegative tuning parameter sequence for ADMM use
18+
#' @return A list of objects including
19+
#' \item{cv_result}{A list of resultant objects produced by `spatpcaCV`}
20+
#' \item{selected_K}{Selected K based on CV.}
21+
#'
22+
spatpcaCVWithSelectingK <- function(x, Y, M, tau1, tau2, gamma, shuffle_split, maxit, thr, l2) {
23+
upper_bound <- fetchUpperBoundNumberEigenfunctions(Y, M)
24+
prev_cv_selection <- spatpcaCV(x, Y, M, 1, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
25+
26+
for (k in 2:upper_bound) {
27+
cv_selection <- spatpcaCV(x, Y, M, k, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
28+
difference <- prev_cv_selection$selected_gamma - cv_selection$selected_gamma
29+
prev_cv_selection <- cv_selection
30+
if (difference <= 0 || abs(difference) <= 1e-8) {
31+
break
32+
}
33+
}
34+
return(list(cv_result = cv_selection, selected_K = k - 1))
35+
}
36+
337
#'
438
#' @title Regularized PCA for spatial data
539
#'
@@ -12,8 +46,8 @@
1246
#' @param tau1 Optional user-supplied numeric vector of a nonnegative smoothness parameter sequence. If NULL, 10 tau1 values in a range are used.
1347
#' @param tau2 Optional user-supplied numeric vector of a nonnegative sparseness parameter sequence. If NULL, none of tau2 is used.
1448
#' @param gamma Optional user-supplied numeric vector of a nonnegative tuning parameter sequence. If NULL, 10 values in a range are used.
15-
#' @param M Optional number of folds; default is 5.
16-
#' @param is_Y_centered If TRUE, center the columns of Y. Default is FALSE.
49+
#' @param M Optional number of folds for cross validation; default is 5.
50+
#' @param is_Y_detrended If TRUE, center the columns of Y. Default is FALSE.
1751
#' @param maxit Maximum number of iterations. Default value is 100.
1852
#' @param thr Threshold for convergence. Default value is \eqn{10^{-4}}.
1953
#' @param num_cores Number of cores used to parallel computing. Default value is NULL (See `RcppParallel::defaultNumThreads()`)
@@ -32,7 +66,7 @@
3266
#' \item{tau1}{Sequence of tau1-values used in the process.}
3367
#' \item{tau2}{Sequence of tau2-values used in the process.}
3468
#' \item{gamma}{Sequence of gamma-values used in the process.}
35-
#' \item{centered_Y}{If is_Y_centered is TRUE, centered_Y is the centered Y; else, centered_Y is equal to Y.}
69+
#' \item{detrended_Y}{If is_Y_detrended is TRUE, detrended_Y means Y is detrended; else, detrended_Y is equal to Y.}
3670
#' \item{scaled_x}{Input location matrix. Only scale when it is one-dimensional}
3771
#'
3872
#' @details An ADMM form of the proposed objective function is written as
@@ -81,7 +115,7 @@
81115
#' K = cv$selected_K,
82116
#' tau1 = cv$selected_tau1,
83117
#' tau2 = cv$selected_tau2)
84-
#' predicted_eof <- predict_eigenfunction(eof, xx_new)
118+
#' predicted_eof <- predictEigenfunction(eof, xx_new)
85119
#' quilt.plot(xx_new,
86120
#' predicted_eof[,1],
87121
#' nx = new_p,
@@ -115,92 +149,37 @@ spatpca <- function(x,
115149
tau1 = NULL,
116150
tau2 = NULL,
117151
gamma = NULL,
118-
is_Y_centered = FALSE,
152+
is_Y_detrended = FALSE,
119153
maxit = 100,
120154
thr = 1e-04,
121155
num_cores = NULL) {
122156
call2 <- match.call()
157+
checkInputData(Y, x, M)
123158
setCores(num_cores)
124159

160+
# Transform main objects
125161
x <- as.matrix(x)
162+
Y <- detrend(Y, is_Y_detrended)
163+
K <- setNumberEigenfunctions(K, Y, M)
126164
p <- ncol(Y)
127165
n <- nrow(Y)
128-
if (p < 3) {
129-
stop("Number of locations must be larger than 2.")
130-
}
131-
if (nrow(x) != p) {
132-
stop("The number of rows of x should be equal to the number of columns of Y.")
133-
}
134-
if (ncol(x) > 3) {
135-
stop("Dimension of locations must be less than 4.")
136-
}
137-
if (M >= n) {
138-
stop("Number of folds must be less than sample size.")
139-
}
140-
if (!is.null(K)) {
141-
if (K > min(floor(n - n / M), p)) {
142-
K <- min(floor(n - n / M), p)
143-
warning("K must be smaller than min(floor(n - n/M), p).")
144-
}
145-
}
146-
# Remove the mean trend of Y
147-
if (is_Y_centered) {
148-
Y <- Y - apply(Y, 2, "mean")
149-
}
150-
# Initialize candidates of tuning parameters
151-
if (is.null(tau2)) {
152-
tau2 <- 0
153-
num_tau2 <- 1
154-
} else {
155-
num_tau2 <- length(tau2)
156-
}
157-
if (is.null(tau1)) {
158-
num_tau1 <- 11
159-
tau1 <- c(0, exp(seq(log(1e-6), 0, length = num_tau1 - 1)))
160-
} else {
161-
num_tau1 <- length(tau1)
162-
}
163-
164-
if (M < 2 && (num_tau1 > 1 || num_tau2 > 1)) {
165-
num_tau1 <- num_tau2 <- 1
166-
warning("Only produce the result based on the largest tau1 and largest tau2.")
167-
}
168-
169-
stra <- sample(rep(1:M, length.out = nrow(Y)))
170-
if (is.null(gamma)) {
171-
num_gamma <- 11
172-
svd_Y_partial <- svd(Y[which(stra != 1), ])
173-
max_gamma <- svd_Y_partial$d[1]^2 / nrow(Y[which(stra != 1), ])
174-
log_scale_candidates <-
175-
seq(log(max_gamma / 1e4), log(max_gamma), length = num_gamma - 1)
176-
gamma <- c(0, log_scale_candidates)
177-
}
178-
179166
scaled_x <- scaleLocation(x)
167+
shuffle_split <- sample(rep(1:M, length.out = nrow(Y)))
168+
169+
# Initialize candidates of tuning parameters
170+
tau1 <- setTau1(tau1, M)
171+
tau2 <- setTau2(tau2, M)
172+
l2 <- setL2(tau2)
173+
gamma <- setGamma(gamma, Y[which(shuffle_split != 1), ])
180174

181-
if (num_tau2 == 1 && tau2 > 0) {
182-
l2 <- ifelse(tau2 != 0,
183-
c(0, exp(seq(log(tau2 / 1e4), log(tau2), length = 10))),
184-
tau2
185-
)
186-
} else {
187-
l2 <- 1
188-
}
189175

190176
if (is_K_selected) {
191-
cv_result <- spatpcaCV(scaled_x, Y, M, 1, tau1, tau2, gamma, stra, maxit, thr, l2)
192-
for (k in 2:min(floor(n - n / M), p)) {
193-
cv_new_result <- spatpcaCV(scaled_x, Y, M, k, tau1, tau2, gamma, stra, maxit, thr, l2)
194-
difference <- cv_result$selected_gamma - cv_new_result$selected_gamma
195-
if (difference <= 0 || abs(difference) <= 1e-8) {
196-
break
197-
}
198-
cv_result <- cv_new_result
199-
}
200-
selected_K <- k - 1
177+
cv_with_selected_k <- spatpcaCVWithSelectingK(scaled_x, Y, M, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
178+
cv_result <- cv_with_selected_k$cv_result
179+
selected_K <- cv_with_selected_k$selected_K
201180
}
202181
else {
203-
cv_result <- spatpcaCV(scaled_x, Y, M, K, tau1, tau2, gamma, stra, maxit, thr, l2)
182+
cv_result <- spatpcaCV(scaled_x, Y, M, K, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
204183
selected_K <- K
205184
}
206185

@@ -225,14 +204,13 @@ spatpca <- function(x,
225204
tau1 = tau1,
226205
tau2 = tau2,
227206
gamma = gamma,
228-
centered_Y = Y,
207+
detrended_Y = Y,
229208
scaled_x = scaled_x
230209
)
231210
class(obj.cv) <- "spatpca"
232211
return(obj.cv)
233212
}
234213

235-
236214
#' @title Spatial dominant patterns on new locations
237215
#'
238216
#' @description Estimate K eigenfunctions on new locations
@@ -252,10 +230,10 @@ spatpca <- function(x,
252230
#' Y_1Drm <- Y_1D[, -rm_loc]
253231
#' x_1Dnew <- as.matrix(seq(-5, 5, length = 20))
254232
#' cv_1D <- spatpca(x = x_1Drm, Y = Y_1Drm, tau2 = 1:100, num_cores = 2)
255-
#' dominant_patterns <- predict_eigenfunction(cv_1D, x_new = x_1Dnew)
233+
#' dominant_patterns <- predictEigenfunction(cv_1D, x_new = x_1Dnew)
256234
#'
257-
predict_eigenfunction <- function(spatpca_object, x_new) {
258-
check_new_locations_for_spatpca_object(spatpca_object, x_new)
235+
predictEigenfunction <- function(spatpca_object, x_new) {
236+
checkNewLocationsForSpatpcaObject(spatpca_object, x_new)
259237
scaled_x_new <- scaleLocation(x_new)
260238

261239
predicted_eigenfn <- eigenFunction(
@@ -286,25 +264,24 @@ predict_eigenfunction <- function(spatpca_object, x_new) {
286264
#' Y_1Drm <- Y_1D[, -rm_loc]
287265
#' x_1Dnew <- as.matrix(seq(-5, 5, length = 20))
288266
#' cv_1D <- spatpca(x = x_1Drm, Y = Y_1Drm, tau2 = 1:100, num_cores = 2)
289-
#' predictions <- predict_on_new_locations(cv_1D, x_new = x_1Dnew)
267+
#' predictions <- predict(cv_1D, x_new = x_1Dnew)
290268
#'
291-
predict_on_new_locations <- function(spatpca_object, x_new, eigen_patterns_on_new_site = NULL) {
292-
check_new_locations_for_spatpca_object(spatpca_object, x_new)
269+
predict <- function(spatpca_object, x_new, eigen_patterns_on_new_site = NULL) {
270+
checkNewLocationsForSpatpcaObject(spatpca_object, x_new)
293271

294272
if (is.null(eigen_patterns_on_new_site)) {
295-
eigen_patterns_on_new_site <- predict_eigenfunction(spatpca_object, x_new)
273+
eigen_patterns_on_new_site <- predictEigenfunction(spatpca_object, x_new)
296274
}
297275

298276
spatial_prediction <- spatialPrediction(
299277
spatpca_object$eigenfn,
300-
spatpca_object$centered_Y,
278+
spatpca_object$detrended_Y,
301279
spatpca_object$selected_gamma,
302280
eigen_patterns_on_new_site
303281
)
304282
return(spatial_prediction$predict)
305283
}
306284

307-
308285
#'
309286
#' @title Display the cross-validation results
310287
#'

0 commit comments

Comments
 (0)