AWF-GAUG · rnuske · Feb 14, 2020 · Feb 14, 2020 · Feb 14, 2020 · Feb 14, 2020
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,24 +1,28 @@
 Package: fisim
 Type: Package
 Title: Forest Inventory Simulator (fisim) is a package for simulating response
-    and samplign designs common in forest inventories.
+    and samplign designs common in forest inventories
 Version: 0.1.0
 Author: Sebastian Schnell, Henning Aberle
 Maintainer: Sebastian Schnell <Sebastian.Schnell@uni-goettingen.de>
 URL: http://github.com/sebschnell/
-Description: The main purpose of the package is to illustrate basic statisitcal
+Description: The main purpose of the package is to illustrate basic statistical
     principles of surveys of environmental resources with a focus on forest
     inventories. The main intention is to use the package in combination with a
     Shiny application to assist teaching in bachelor and master courses on forest
     monitoring.
 License: What license is it under?
 Encoding: UTF-8
 LazyData: true
-Depends: data.table, sp, maptools, spatstat, rgeos
+Depends: R (>= 3.5.0)
 Imports:
-    sp,
+    data.table,
+    sf,
     maptools,
     spatstat,
     RANN,
-    rgeos
-RoxygenNote: 6.0.1
+    stats,
+    utils
+Suggests: 
+    testthat (>= 2.1.0)
+RoxygenNote: 7.0.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -18,6 +18,3 @@ export(sum_data)
 export(tree_pop)
 export(xy_sample)
 import(data.table)
-import(rgeos)
-import(sp)
-import(spatstat)
diff --git a/R/class_definitions.R b/R/class_definitions.R
@@ -45,8 +45,10 @@
 #' @export
 #'
 #' @examples
-#' library(spatstat);
-#' xy_sample(letterR, n = 10, M = 10, method = "random");
+#' library(sf)
+#' poly <- st_polygon(list(rbind(c(0,0), c(1,0), c(3,2), c(2,4), c(1,4), c(0,0)),
+#'                         rbind(c(1,1), c(1,2), c(2,2), c(1,1))))
+#' xy_sample(poly, n = 10, M = 10, method = "random");
 sample_loc <- function(data, n, M, method) {
   if (!is.data.table(data)) {
     stop("'data' is not a data.table object!");
@@ -100,8 +102,8 @@ is.sample_loc <- function(x) {
 #'   individual trees in a forest. Each row represents a tree, while tree
 #'   attributes are stored in columns. A minimum set of attributes with specific
 #'   columns names is required; see Details section.
-#' @param boundary A \code{\link[sp]{SpatialPolygons}} object that defines the
-#'   border of the forest.
+#' @param boundary An \code{\link[sf]{sf}} object of geometry type POLYGON that
+#'   defines the border of the forest.
 #'
 #' @details The following columns are needed as a minimum requirement in
 #'   \code{data}:
@@ -157,25 +159,25 @@ is.sample_loc <- function(x) {
 #'   \enumerate{
 #'   \item \code{$data} A \code{\link[data.table]{data.table}}
 #'   object as described above.
-#'   \item \code{$boundary} A \code{\link[sp]{SpatialPolygons}} object that
-#'   defines the border of the forest.
+#'   \item \code{$boundary} A \code{\link[sf]{sf}} object of geometry type
+#'   POLYGON that defines the border of the forest.
 #'   }
 #' @export
 tree_pop <- function(data, boundary) {
   if (!is.data.table(data)) {
     stop("'data' is not a data.table object!");
   }
-  sp_class <- c("SpatialPolygons", "SpatialPolygonsDataFrame");
-  if (is.na(match(class(boundary), sp_class))) {
-    stop("'boundary' neither of class 'SpatialPolygons' nor of class 'SpatialPolygonsDataFrame'");
+  if (!inherits(boundary, 'sf') ||
+      sf::st_geometry_type(boundary) %in% c("POLYGON", "MULTIPOLYGON")) {
+    stop("'boundary' neither of class 'POLYGON' nor 'MULTIPOLYGON'");
   }
   col_names <- c("id_tree", "id_stem", "x_tree", "y_tree", "is_dead", "dbh");
   if (sum(is.na(match(col_names, names(data)))) > 0) {
     stop("Column names of 'data' do not meet class requirements!");
   }
   x_range <- data[, range(x_tree)];
   y_range <- data[, range(y_tree)];
-  ext <- bbox(boundary);
+  ext <- sf::st_bbox(boundary);
   if (x_range[1] < ext["x", "min"]) {
     stop("x-coordinate in 'data' smaller than extent of 'boundary'");
   }
@@ -219,20 +221,14 @@ is.tree_pop <- function(x) {
 #'
 #' @param data A \code{\link[data.table]{data.table}} object holding identifiers
 #'   for the sample and respective point locations and row indices that indicate
-#'   which population elements are selected into the sample.
+#'   which population elements are selected into the sample. data must have
+#'   the columns "id_sample", "id_point", "s" and "ef".
 #' @param r_design A \code{character} string indicating the type of the response
 #'   design. One of the following: "fixed_area", "k_tree", "angle_count".
 #' @param r_design_parm A response design specific parameter of type
 #'   \code{numeric}. For "fixed_area" the plot radius in meter; for "k_tree" the
 #'   number of trees closest to the sample location; for "angle_count" the basal
 #'   area factor.
-#' @param ef The expansion factor, i.e., the factor that is used to prorate tree
-#'   attributes to per hectare values (the inverse of the inclusion
-#'   probabilities). Provided by the particular response design functions.
-#' @param ef_alt1 Alternative expansion factor that may be applied when using
-#'   k-tree sampling.
-#' @param ef_alt2 A third alternative expansion factor approximation for k-tree
-#'   sampling
 #'
 #' @details The \code{\link[data.table]{data.table}} object (\code{data}) should
 #'   have the following columns:
@@ -281,12 +277,14 @@ is.response <- function(x) {
   inherits(x, "response");
 }
 
+
+#' Tree Sample
+#'
 #' The function \code{tree_sample} creates tree sample objects. Such objects
 #' assign individual tree information from the population to the sample
 #' locations following a particular response design. The function is only used
 #' internally by \code{\link{extract_data}}.
 #'
-#'
 #' @param data A \code{\link[data.table]{data.table}} object holding individual
 #'   tree data together with identifiers that indicate the sample and the point
 #'   location at which trees were selected.
@@ -316,8 +314,8 @@ is.response <- function(x) {
 #'
 #'   Some response designs (currently only \code{\link{k_tree}}), may have
 #'   additional alternative expansion factors as optional approximations, where
-#'   unbiased estimators are missing or difficult to derive. As the column names
-#'   for these alternatives \code{ef_alt1} and \code{ef_alt2} are used.
+#'   unbiased estimators are missing or difficult to derive. These additional
+#'   columns are \code{ef_alt1} and \code{ef_alt2}.
 #'
 #' @return
 #'  \enumerate{

diff --git a/R/data.R b/R/data.R
@@ -43,3 +43,25 @@
 #'     how often a tree is counted during estimation correcting for edge
 #'     effects, unitless}}
 "hberg_beech"
+
+
+#' Full census of a stand of random trees
+#'
+#' A dataset containing tree stem locations and other attributes of 20000 trees
+#' randomly generated (by ...) on a 1000m by 500m rectangular plot.
+#'
+#' @format An object of class \code{\link{tree_pop}}. The data contains 20000
+#'   rows and 9 columns: \describe{
+#'   \item{id_tree}{Stem number - one tree can have mulitple stems}
+#'   \item{id_stem}{Tree number - repetitions possible due to multiple stems}
+#'   \item{x_tree}{Relative position of the stem center in x-direction}
+#'   \item{y_tree}{Relative position of the stem center in y-direction}
+#'   \item{is_dead}{Was the tree alive or dead?}
+#'   \item{dbh}{Diameter of the stem measured at breast height (1.3m) in cm}
+#'   \item{ba}{Cross-sectional area of the stem at breast height in square
+#'     meters}
+#'   \item{n}{Stem count - helper variable for stem number estimation, unitless}
+#'   \item{f_edge}{Edge factor - helper variable for edge_correction, indicates
+#'     how often a tree is counted during estimation correcting for edge
+#'     effects, unitless}}
+"random_tree"
diff --git a/R/estimation_design.R b/R/estimation_design.R
@@ -1,6 +1,6 @@
 #' Simple random sampling estimator of population means
 #'
-#' @param data A data.table object with plot-level information of stand
+#' @param point_data A data.table object with plot-level information of stand
 #'   characteristics as coming from the \code{sum_data} function.
 #' @param est_col A character vector of column names for which estimates are
 #'   produced. Used when there is more than one way to expand observations from
@@ -26,7 +26,7 @@ est_srs <- function(point_data, est_col = NULL) {
   dt_est <- point_data$data[,
                             list(est_appr = names(.SD),
                                  y_hat = sapply(.SD, mean),
-                                 v_hat = sapply(.SD, var)/.N),
+                                 v_hat = sapply(.SD, stats::var)/.N),
                             by = list(id_sample, variable),
                             .SDcols = est_col];
   return(dt_est);

diff --git a/R/fisim_package.R b/R/fisim_package.R
@@ -5,5 +5,10 @@
 #'
 #' @name fisim
 #' @docType package
-#' @import data.table spatstat sp rgeos
 NULL
+
+# avoid R CMD check warnings due to global variables
+utils::globalVariables(c("N", "d", "dbh", "ef", "ef_alt1", "ef_alt2", "f_edge",
+                         "id_point", "id_sample", "id_stem", "id_tree",
+                         "is_edge", "n", "n_corr", "s", "variable", "x_s",
+                         "x_tree", "x_wt", "y_s", "y_tree", "y_wt"))
diff --git a/R/sampling_design.R b/R/sampling_design.R
@@ -1,7 +1,7 @@
 #' Sample locations in a study area of rectangular shape
 #'
-#' @param sp_poly An object of class \code{\link[sp]{SpatialPolygons}} from the
-#'   \code{sp} package.
+#' @param sf_poly An object of class \code{\link[sf]{sf}} and geometry type
+#'   POLYGON from the \code{sf} package.
 #' @param n Sample size, i.e. number of sample locations.
 #' @param M Number of independent samples of size \code{n}.
 #' @param method character string; 'random' for completely random placement of
@@ -15,30 +15,22 @@
 #' @return A \code{\link[data.table]{data.table}} object with \code{M} times
 #'   \code{n} rows holding an identifier and xy-coordinates.
 #' @export
-xy_sample <- function(sp_poly, n, M = 1, method = 'random', ...) {
-  if (tolower(method) == 'random') {
-    return(sample_loc(data = xy_sample_random(sp_poly = sp_poly,
-                                              n = n,
-                                              M = M),
-                      n = n,
-                      M = M,
-                      method = tolower(method)));
-  } else if (tolower(method) == 'regular') {
-    return(sample_loc(data = xy_sample_regular(sp_poly = sp_poly,
-                                               n = n,
-                                               M = M,
-                                               ...),
-                      n = n,
-                      M = M,
-                      method = tolower(method)))
+xy_sample <- function(sf_poly, n, M = 1, method = 'random', ...) {
+  method <- match.arg(tolower(method), c('random', 'regular'))
+  if (method == 'random') {
+    return(sample_loc(data = xy_sample_random(sf_poly = sf_poly, n = n, M = M),
+                      n = n, M = M, method = method));
+  } else if (method == 'regular') {
+    return(sample_loc(data = xy_sample_regular(sf_poly = sf_poly, n = n, M = M, ...),
+                      n = n, M = M, method = method))
   }
 }
 
 
 #' Place a number of points randomly in a polygon of arbitrary shape
 #'
-#' @param sp_poly An object of class \code{\link[sp]{SpatialPolygons}} from the
-#'   \code{sp} package.
+#' @param sf_poly An object of class \code{\link[sf]{sf}} and geometry type
+#'   POLYGON from the \code{sf} package.
 #' @param n Sample size
 #' @param M Number of independent samples of size \code{n}
 #'
@@ -60,17 +52,18 @@ xy_sample <- function(sp_poly, n, M = 1, method = 'random', ...) {
 #'
 #' @return A \code{\link[data.table]{data.table}} object with \code{M} times
 #'   \code{n} rows holding an identifier and xy-coordinates.
-xy_sample_random <- function(sp_poly, n, M = 1) {
-  tri <- spatstat::triangulate.owin(spatstat::as.owin(sp_poly));
+xy_sample_random <- function(sf_poly, n, M = 1) {
+  # as.owin(sf) leads to different order of tiles and thus different sample points
+  tri <- spatstat::triangulate.owin(maptools::as.owin.SpatialPolygons(sf::as_Spatial(sf::st_geometry(sf_poly))));
   a <- unlist(lapply(tri$tiles, spatstat::area));
   w <- a/sum(a);
 
   x <- unlist(lapply(tri$tiles, function(x) x$bdry[[1]]$x));
   y <- unlist(lapply(tri$tiles, function(x) x$bdry[[1]]$y));
 
   s <- sample(length(a), n*M, prob = w, replace = TRUE);
-  r1 <- runif(n*M);
-  r2 <- runif(n*M);
+  r1 <- stats::runif(n*M);
+  r2 <- stats::runif(n*M);
 
   n_vtc <- 3; # Number of vertices in a triangle
   p4_x <- (1 - r1)*x[(s*n_vtc - 2)] + r1*x[(s*n_vtc - 1)];
@@ -88,8 +81,8 @@ xy_sample_random <- function(sp_poly, n, M = 1) {
 
 #' Place a regular sampling grid over a polygon of arbitrary shape
 #'
-#' @param sp_poly An object of class \code{\link[sp]{SpatialPolygons}} from the
-#'   \code{sp} package.
+#' @param sf_poly An object of class \code{\link[sf]{sf}} and geometry type
+#'   POLYGON from the \code{sf} package.
 #' @param n Sample size
 #' @param M Number of independent samples of size \code{n}
 #' @param cell_size If missing, a cell size is derived from the sample size
@@ -112,10 +105,10 @@ xy_sample_random <- function(sp_poly, n, M = 1) {
 #'   computation of the \code{M} samples will take while.
 #' @return A \code{\link[data.table]{data.table}} object with approximately
 #'   \code{M} times \code{n} rows holding an identifier and xy-coordinates.
-xy_sample_regular <- function(sp_poly, n, M = 1, cell_size = NULL, random_rot = TRUE) {
-  e <- sp::bbox(sp_poly);
-  a_ext <- prod(apply(e, 1, diff));
-  a <- sum(extract_area(sp_poly));
+xy_sample_regular <- function(sf_poly, n, M = 1, cell_size = NULL, random_rot = TRUE) {
+  e <- sf::st_bbox(sf_poly)
+  a_ext <- (e$xmax - e$xmin) * (e$ymax - e$ymin)
+  a <- sum(sf::st_area(sf_poly))
 
   if (is.null(cell_size)) {
     n_os <- n*a_ext/a; # Oversampling
@@ -131,19 +124,23 @@ xy_sample_regular <- function(sp_poly, n, M = 1, cell_size = NULL, random_rot =
                        y_s = 0.0);
 
   # Center coordinates
-  e_cent <- e;
-  e_cent["x", ] <- e["x", ] - e["x", "max"]/2;
-  e_cent["y", ] <- e["y", ] - e["y", "max"]/2;
+  #e_cent_x <- e$xmin + (e$xmax - e$xmin)/2
+  #e_cent_y <- e$ymin + (e$ymax - e$ymin)/2
+  e_cent <- e
+  e_cent[c(1, 3)] <- e_cent[c(1, 3)] - e_cent$xmax/2
+  e_cent[c(2, 4)] <- e_cent[c(2, 4)] - e_cent$ymax/2
+
   r_start <- 0L;
   r_end <- 0L;
   if (random_rot) {
-    alpha <- runif(M, min = 0, max = pi);
+    alpha <- stats::runif(M, min = 0, max = pi);
   }
-  u_d <- cbind(runif(M)*d, runif(M)*d);
+
+  u_d <- cbind(stats::runif(M)*d, stats::runif(M)*d);
   for (m in seq.int(M)) {
     # Enlarge grid to account for possible rotation
-    x <- seq(1.5*min(e_cent["x", "min"]) + u_d[m, 1], 1.5*max(e_cent["x", "max"]), d);
-    y <- seq(1.5*min(e_cent["y", "min"]) + u_d[m, 2], 1.5*max(e_cent["y", "max"]), d);
+    x <- seq(1.5 * e_cent$xmin + u_d[m, 1], 1.5 * e_cent$xmax, d);
+    y <- seq(1.5 * e_cent$ymin + u_d[m, 2], 1.5 * e_cent$ymax, d);
     xy <- expand.grid(x, y);
     # Random rotation
     if (random_rot) {
@@ -152,14 +149,15 @@ xy_sample_regular <- function(sp_poly, n, M = 1, cell_size = NULL, random_rot =
                     ncol = 2, byrow = TRUE);
       xy <- t(rot %*% t(as.matrix(xy)));
     }
+
     # Back to original coordinates
-    xy[, 1] <- xy[, 1] + e["x", "max"]/2;
-    xy[, 2] <- xy[, 2] + e["y", "max"]/2;
-    sp_xy <- sp::SpatialPoints(xy);
+    xy[, 1] <- xy[, 1] + e$xmax/2
+    xy[, 2] <- xy[, 2] + e$ymax/2
+    sf_xy <- sf::st_as_sf(as.data.frame(xy), coords=1:2)
 
     # Reject points outside the study area
-    idx_over <- is.na(sp::over(sp_xy, sp_poly)) == FALSE;
-    xy_sub <- sp::coordinates(sp_xy[idx_over]);
+    idx_over <- sf::st_contains(sf_poly, sf_xy, prepared=TRUE, sparse=FALSE)
+    xy_sub <- sf::st_coordinates(sf_xy[idx_over, ]);
 
     # Collect results
     n_m <- nrow(xy_sub);