Skip to content

Commit 786b72a

Browse files
committed
minor updates
1 parent 06d3de1 commit 786b72a

File tree

2 files changed

+63
-26
lines changed

2 files changed

+63
-26
lines changed

R/spatial_qaqc.R

Lines changed: 63 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -434,39 +434,76 @@ spatial_qaqc <- function(dat,
434434
duration = 60)
435435

436436
} else if (sum(obs_outside) > 0) {
437-
is_lonlat <- sf::st_is_longlat(spatdat)
438-
tol_val <- if (is_lonlat) 0.001 else 100
439-
440437
# If any observations are outside the zones, calculate distances
441-
# Find nearest feature and get distance
442-
# Simplify with preserveTopology = TRUE to prevent polygons from vanishing
443-
spat_optim <- sf::st_simplify(spatdat, preserveTopology = TRUE, dTolerance = tol_val)
444438

445-
# SAFETY CHECK: If simplification created empty geometries, revert those specific
446-
# features back to the original spatdat to avoid NA errors.
447-
empty_idx <- sf::st_is_empty(spat_optim)
448-
if (any(empty_idx)) {
449-
spat_optim[empty_idx, ] <- spatdat[empty_idx, ]
450-
}
439+
# Prep data and fix units
440+
# Project to EPSG:3857 (Web Mercator) to ensure results are in METERS.
441+
metric_crs <- 3857
442+
pts_calc <- sf::st_transform(dat_sf[obs_outside, ], metric_crs)
443+
444+
# Simplify and project spatial data
445+
spat_simp <- sf::st_simplify(spatdat, preserveTopology = TRUE, dTolerance = 50)
451446

452-
# Cast to boundary lines for faster distance calculation
453-
# (st_boundary is safer than st_cast for Polygons)
454-
spat_optim <- sf::st_boundary(spat_optim)
447+
# Handle empty geometries from simplification
448+
empty_idx <- sf::st_is_empty(spat_simp)
449+
if (any(empty_idx)) spat_simp[empty_idx, ] <- spatdat[empty_idx, ]
455450

456-
# Find nearest feature using the OPTIMIZED object
457-
nearest <- sf::st_nearest_feature(dat_sf[obs_outside, ], spat_optim)
458-
# nearest <- sf::st_nearest_feature(dat_sf[obs_outside, ], spatdat)
451+
# Transform to metric and cast to boundary lines
452+
spat_calc <- sf::st_transform(spat_simp, metric_crs)
453+
spat_calc <- sf::st_boundary(spat_calc)
459454

460-
# Calculate distance
461-
dist.rec <- sf::st_distance(dat_sf[obs_outside, ],
462-
spat_optim[nearest, ],
463-
by_element = TRUE)
464-
# dist.rec <- sf::st_distance(dat_sf[obs_outside, ],
465-
# spatdat[nearest, ],
466-
# by_element = TRUE)
455+
# Initialize distance vector
456+
dist_vec <- numeric(nrow(pts_calc))
457+
458+
# Parallel or serial computation
459+
# Only pay the "startup cost" of parallel processing if we have many points (> 2000).
460+
run_parallel <- nrow(pts_calc) > 2000
461+
462+
if (run_parallel) {
463+
# --- PARALLEL MODE (For Large Data) ---
464+
n_cores <- parallel::detectCores(logical = FALSE) - 1
465+
if (n_cores < 1) n_cores <- 1
466+
467+
parts <- split(1:nrow(pts_calc), cut(1:nrow(pts_calc), n_cores))
468+
469+
calc_dist_chunk <- function(indices, points, polys) {
470+
# Explicitly ensure sf is loaded inside worker to prevent method errors
471+
library(sf)
472+
pts_sub <- points[indices, ]
473+
near_idx <- sf::st_nearest_feature(pts_sub, polys)
474+
sf::st_distance(pts_sub, polys[near_idx, ], by_element = TRUE)
475+
}
476+
477+
cl <- parallel::makeCluster(n_cores)
478+
on.exit(parallel::stopCluster(cl), add = TRUE)
479+
480+
parallel::clusterEvalQ(cl, {
481+
library(sf)
482+
sf::sf_use_s2(FALSE)
483+
})
484+
485+
# Use tryCatch to fall back to serial if parallel fails
486+
tryCatch({
487+
dist_list <- parallel::parLapply(cl, parts, calc_dist_chunk,
488+
points = pts_calc,
489+
polys = spat_calc)
490+
dist_vec <- unlist(dist_list)
491+
}, error = function(e) {
492+
warning("Parallel processing failed. Falling back to serial calculation.")
493+
run_parallel <<- FALSE # Fallback flag
494+
})
495+
}
496+
497+
if (!run_parallel) {
498+
# --- SERIAL MODE (For Unit Tests & Small Data) ---
499+
# This runs simply in the main process, avoiding firewall checks and method errors.
500+
nearest <- sf::st_nearest_feature(pts_calc, spat_calc)
501+
dist_vec <- sf::st_distance(pts_calc, spat_calc[nearest, ], by_element = TRUE)
502+
}
467503

468504
# Add the distance to the dataset
469-
dataset[obs_outside, "dist"] <- as.numeric(dist.rec)
505+
dataset[obs_outside, "dist"] <- as.numeric(dist_vec)
506+
470507
dataset$dist[is.na(dataset$dist)] <- 0
471508
dist_vec <- dataset$dist
472509

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)