|
84 | 84 | #' mod.dat <- spat_out$dataset |
85 | 85 | #' } |
86 | 86 | #' |
87 | | -spatial_qaqc <- function(dat, project, spat, lon.dat, lat.dat, lon.spat = NULL, |
88 | | - lat.spat = NULL, id.spat = NULL, epsg = NULL, date = NULL, |
89 | | - group = NULL, filter_dist = NULL) { |
| 87 | +spatial_qaqc <- function(dat, |
| 88 | + project, |
| 89 | + spat, |
| 90 | + lon.dat, |
| 91 | + lat.dat, |
| 92 | + lon.spat = NULL, |
| 93 | + lat.spat = NULL, |
| 94 | + id.spat = NULL, |
| 95 | + epsg = NULL, |
| 96 | + date = NULL, |
| 97 | + group = NULL, |
| 98 | + filter_dist = NULL) { |
90 | 99 |
|
91 | 100 | # Pull primary data and spatial data |
92 | 101 | out <- data_pull(dat, project) |
@@ -424,15 +433,77 @@ spatial_qaqc <- function(dat, project, spat, lon.dat, lat.dat, lon.spat = NULL, |
424 | 433 | type = "error", |
425 | 434 | duration = 60) |
426 | 435 |
|
427 | | - } else if(sum(obs_outside) > 0) { |
| 436 | + } else if (sum(obs_outside) > 0) { |
428 | 437 | # If any observations are outside the zones, calculate distances |
429 | | - # Find nearest feature and get distance |
430 | | - nearest <- sf::st_nearest_feature(dat_sf[obs_outside, ], spatdat) |
431 | | - dist.rec <- sf::st_distance(dat_sf[obs_outside, ], spatdat[nearest, ], |
432 | | - by_element = TRUE) |
| 438 | + |
| 439 | + # Prep data and fix units |
| 440 | + # Project to EPSG:3857 (Web Mercator) to ensure results are in METERS. |
| 441 | + metric_crs <- 3857 |
| 442 | + pts_calc <- sf::st_transform(dat_sf[obs_outside, ], metric_crs) |
| 443 | + |
| 444 | + # Simplify and project spatial data |
| 445 | + spat_simp <- sf::st_simplify(spatdat, preserveTopology = TRUE, dTolerance = 50) |
| 446 | + |
| 447 | + # Handle empty geometries from simplification |
| 448 | + empty_idx <- sf::st_is_empty(spat_simp) |
| 449 | + if (any(empty_idx)) spat_simp[empty_idx, ] <- spatdat[empty_idx, ] |
| 450 | + |
| 451 | + # Transform to metric and cast to boundary lines |
| 452 | + spat_calc <- sf::st_transform(spat_simp, metric_crs) |
| 453 | + spat_calc <- sf::st_boundary(spat_calc) |
| 454 | + |
| 455 | + # Initialize distance vector |
| 456 | + dist_vec <- numeric(nrow(pts_calc)) |
| 457 | + |
| 458 | + # Parallel or serial computation |
| 459 | + # Only pay the "startup cost" of parallel processing if we have many points (> 2000). |
| 460 | + run_parallel <- nrow(pts_calc) > 2000 |
| 461 | + |
| 462 | + if (run_parallel) { |
| 463 | + # --- PARALLEL MODE (For Large Data) --- |
| 464 | + n_cores <- parallel::detectCores(logical = FALSE) - 1 |
| 465 | + if (n_cores < 1) n_cores <- 1 |
| 466 | + |
| 467 | + parts <- split(1:nrow(pts_calc), cut(1:nrow(pts_calc), n_cores)) |
| 468 | + |
| 469 | + calc_dist_chunk <- function(indices, points, polys) { |
| 470 | + # Explicitly ensure sf is loaded inside worker to prevent method errors |
| 471 | + library(sf) |
| 472 | + pts_sub <- points[indices, ] |
| 473 | + near_idx <- sf::st_nearest_feature(pts_sub, polys) |
| 474 | + sf::st_distance(pts_sub, polys[near_idx, ], by_element = TRUE) |
| 475 | + } |
| 476 | + |
| 477 | + cl <- parallel::makeCluster(n_cores) |
| 478 | + on.exit(parallel::stopCluster(cl), add = TRUE) |
| 479 | + |
| 480 | + parallel::clusterEvalQ(cl, { |
| 481 | + library(sf) |
| 482 | + sf::sf_use_s2(FALSE) |
| 483 | + }) |
| 484 | + |
| 485 | + # Use tryCatch to fall back to serial if parallel fails |
| 486 | + tryCatch({ |
| 487 | + dist_list <- parallel::parLapply(cl, parts, calc_dist_chunk, |
| 488 | + points = pts_calc, |
| 489 | + polys = spat_calc) |
| 490 | + dist_vec <- unlist(dist_list) |
| 491 | + }, error = function(e) { |
| 492 | + warning("Parallel processing failed. Falling back to serial calculation.") |
| 493 | + run_parallel <<- FALSE # Fallback flag |
| 494 | + }) |
| 495 | + } |
| 496 | + |
| 497 | + if (!run_parallel) { |
| 498 | + # --- SERIAL MODE (For Unit Tests & Small Data) --- |
| 499 | + # This runs simply in the main process, avoiding firewall checks and method errors. |
| 500 | + nearest <- sf::st_nearest_feature(pts_calc, spat_calc) |
| 501 | + dist_vec <- sf::st_distance(pts_calc, spat_calc[nearest, ], by_element = TRUE) |
| 502 | + } |
433 | 503 |
|
434 | 504 | # Add the distance to the dataset |
435 | | - dataset[obs_outside, "dist"] <- as.numeric(dist.rec) |
| 505 | + dataset[obs_outside, "dist"] <- as.numeric(dist_vec) |
| 506 | + |
436 | 507 | dataset$dist[is.na(dataset$dist)] <- 0 |
437 | 508 | dist_vec <- dataset$dist |
438 | 509 |
|
|
0 commit comments