@@ -434,39 +434,76 @@ spatial_qaqc <- function(dat,
434434 duration = 60 )
435435
436436 } else if (sum(obs_outside ) > 0 ) {
437- is_lonlat <- sf :: st_is_longlat(spatdat )
438- tol_val <- if (is_lonlat ) 0.001 else 100
439-
440437 # If any observations are outside the zones, calculate distances
441- # Find nearest feature and get distance
442- # Simplify with preserveTopology = TRUE to prevent polygons from vanishing
443- spat_optim <- sf :: st_simplify(spatdat , preserveTopology = TRUE , dTolerance = tol_val )
444438
445- # SAFETY CHECK: If simplification created empty geometries, revert those specific
446- # features back to the original spatdat to avoid NA errors.
447- empty_idx <- sf :: st_is_empty(spat_optim )
448- if (any(empty_idx )) {
449- spat_optim [empty_idx , ] <- spatdat [empty_idx , ]
450- }
439+ # Prep data and fix units
440+ # Project to EPSG:3857 (Web Mercator) to ensure results are in METERS.
441+ metric_crs <- 3857
442+ pts_calc <- sf :: st_transform(dat_sf [obs_outside , ], metric_crs )
443+
444+ # Simplify and project spatial data
445+ spat_simp <- sf :: st_simplify(spatdat , preserveTopology = TRUE , dTolerance = 50 )
451446
452- # Cast to boundary lines for faster distance calculation
453- # (st_boundary is safer than st_cast for Polygons )
454- spat_optim <- sf :: st_boundary( spat_optim )
447+ # Handle empty geometries from simplification
448+ empty_idx <- sf :: st_is_empty( spat_simp )
449+ if (any( empty_idx )) spat_simp [ empty_idx , ] <- spatdat [ empty_idx , ]
455450
456- # Find nearest feature using the OPTIMIZED object
457- nearest <- sf :: st_nearest_feature( dat_sf [ obs_outside , ], spat_optim )
458- # nearest <- sf::st_nearest_feature(dat_sf[obs_outside, ], spatdat )
451+ # Transform to metric and cast to boundary lines
452+ spat_calc <- sf :: st_transform( spat_simp , metric_crs )
453+ spat_calc <- sf :: st_boundary( spat_calc )
459454
460- # Calculate distance
461- dist.rec <- sf :: st_distance(dat_sf [obs_outside , ],
462- spat_optim [nearest , ],
463- by_element = TRUE )
464- # dist.rec <- sf::st_distance(dat_sf[obs_outside, ],
465- # spatdat[nearest, ],
466- # by_element = TRUE)
455+ # Initialize distance vector
456+ dist_vec <- numeric (nrow(pts_calc ))
457+
458+ # Parallel or serial computation
459+ # Only pay the "startup cost" of parallel processing if we have many points (> 2000).
460+ run_parallel <- nrow(pts_calc ) > 2000
461+
462+ if (run_parallel ) {
463+ # --- PARALLEL MODE (For Large Data) ---
464+ n_cores <- parallel :: detectCores(logical = FALSE ) - 1
465+ if (n_cores < 1 ) n_cores <- 1
466+
467+ parts <- split(1 : nrow(pts_calc ), cut(1 : nrow(pts_calc ), n_cores ))
468+
469+ calc_dist_chunk <- function (indices , points , polys ) {
470+ # Explicitly ensure sf is loaded inside worker to prevent method errors
471+ library(sf )
472+ pts_sub <- points [indices , ]
473+ near_idx <- sf :: st_nearest_feature(pts_sub , polys )
474+ sf :: st_distance(pts_sub , polys [near_idx , ], by_element = TRUE )
475+ }
476+
477+ cl <- parallel :: makeCluster(n_cores )
478+ on.exit(parallel :: stopCluster(cl ), add = TRUE )
479+
480+ parallel :: clusterEvalQ(cl , {
481+ library(sf )
482+ sf :: sf_use_s2(FALSE )
483+ })
484+
485+ # Use tryCatch to fall back to serial if parallel fails
486+ tryCatch({
487+ dist_list <- parallel :: parLapply(cl , parts , calc_dist_chunk ,
488+ points = pts_calc ,
489+ polys = spat_calc )
490+ dist_vec <- unlist(dist_list )
491+ }, error = function (e ) {
492+ warning(" Parallel processing failed. Falling back to serial calculation." )
493+ run_parallel <<- FALSE # Fallback flag
494+ })
495+ }
496+
497+ if (! run_parallel ) {
498+ # --- SERIAL MODE (For Unit Tests & Small Data) ---
499+ # This runs simply in the main process, avoiding firewall checks and method errors.
500+ nearest <- sf :: st_nearest_feature(pts_calc , spat_calc )
501+ dist_vec <- sf :: st_distance(pts_calc , spat_calc [nearest , ], by_element = TRUE )
502+ }
467503
468504 # Add the distance to the dataset
469- dataset [obs_outside , " dist" ] <- as.numeric(dist.rec )
505+ dataset [obs_outside , " dist" ] <- as.numeric(dist_vec )
506+
470507 dataset $ dist [is.na(dataset $ dist )] <- 0
471508 dist_vec <- dataset $ dist
472509
0 commit comments