diff --git a/.github/workflows/java-r5rcore-build.yaml b/.github/workflows/java-r5rcore-build.yaml index f284a11b..bd8e6d5f 100644 --- a/.github/workflows/java-r5rcore-build.yaml +++ b/.github/workflows/java-r5rcore-build.yaml @@ -25,6 +25,8 @@ jobs: - name: Install R packages run: | + # not sure why this is necessary now, but it fails without it + options(repos=list(CRAN="https://packagemanager.posit.co/cran/__linux__/noble/latest")) install.packages(c('devtools', 'remotes')) remotes::install_deps(dependencies = TRUE) shell: Rscript {0} diff --git a/java-r5rcore/gradle/wrapper/gradle-wrapper.jar b/java-r5rcore/gradle/wrapper/gradle-wrapper.jar index ccebba77..d64cd491 100644 Binary files a/java-r5rcore/gradle/wrapper/gradle-wrapper.jar and b/java-r5rcore/gradle/wrapper/gradle-wrapper.jar differ diff --git a/java-r5rcore/gradle/wrapper/gradle-wrapper.properties b/java-r5rcore/gradle/wrapper/gradle-wrapper.properties index 3499ded5..1af9e093 100644 --- a/java-r5rcore/gradle/wrapper/gradle-wrapper.properties +++ b/java-r5rcore/gradle/wrapper/gradle-wrapper.properties @@ -2,5 +2,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip networkTimeout=10000 +validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/java-r5rcore/gradlew b/java-r5rcore/gradlew index 79a61d42..1aa94a42 100755 --- a/java-r5rcore/gradlew +++ b/java-r5rcore/gradlew @@ -83,10 +83,8 @@ done # This is normally unused # shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} -APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum @@ -133,10 +131,13 @@ location of your Java installation." fi else JAVACMD=java - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the location of your Java installation." + fi fi # Increase the maximum file descriptors if we can. @@ -144,7 +145,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then case $MAX_FD in #( max*) # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC2039,SC3045 MAX_FD=$( ulimit -H -n ) || warn "Could not query maximum file descriptor limit" esac @@ -152,7 +153,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then '' | soft) :;; #( *) # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC2039,SC3045 ulimit -n "$MAX_FD" || warn "Could not set maximum file descriptor limit to $MAX_FD" esac @@ -197,11 +198,15 @@ if "$cygwin" || "$msys" ; then done fi -# Collect all arguments for the java command; -# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of -# shell script including quotes and variable substitutions, so put them in -# double quotes to make sure that they get re-expanded; and -# * put everything else in single quotes, so that it's not re-expanded. + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. set -- \ "-Dorg.gradle.appname=$APP_BASE_NAME" \ diff --git a/java-r5rcore/src/org/ipea/r5r/Network/NetworkBuilder.java b/java-r5rcore/src/org/ipea/r5r/Network/NetworkBuilder.java index 5d0631cd..5c1dec71 100644 --- a/java-r5rcore/src/org/ipea/r5r/Network/NetworkBuilder.java +++ b/java-r5rcore/src/org/ipea/r5r/Network/NetworkBuilder.java @@ -2,6 +2,9 @@ import com.conveyal.analysis.datasource.DataSourceException; import com.conveyal.gtfs.GTFSFeed; +import com.conveyal.gtfs.error.GTFSError; +import com.conveyal.gtfs.validator.PostLoadValidator; +import com.conveyal.gtfs.validator.model.Priority; import com.conveyal.osmlib.OSM; import com.conveyal.r5.analyst.scenario.RasterCost; import com.conveyal.r5.kryo.KryoNetworkSerializer; @@ -10,6 +13,7 @@ import com.conveyal.r5.transit.TransportNetwork; import org.apache.commons.io.FilenameUtils; import org.ipea.r5r.R5RCore; +import org.ipea.r5r.RDataFrame; import java.io.File; import java.io.FileWriter; @@ -19,42 +23,50 @@ import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; +import java.util.zip.ZipFile; +import java.nio.file.Files; +import java.nio.file.Path; public class NetworkBuilder { - public static boolean useNativeElevation = false; - public static String elevationCostFunction = "NONE"; + public boolean useNativeElevation = false; + public String elevationCostFunction = "NONE"; - private static List gtfsFiles = new ArrayList<>(); - private static String osmFilename; + private List gtfsFiles = new ArrayList<>(); + private String osmFilename; - private static OSM osmFile; - private static Stream gtfsFeeds; - private static String tiffFile = ""; + private OSM osmFile; + private Stream gtfsFeeds; + private String tiffFile = ""; - public static TransportNetwork checkAndLoadR5Network(String dataFolder) throws Exception { + public RDataFrame gtfsErrors = null; + + public boolean highPriorityErrors = false; + + public TransportNetwork checkAndLoadR5Network(String dataFolder) throws Exception { File file = new File(dataFolder, "network.dat"); if (!file.isFile()) { // network.dat file does not exist. create! - NetworkBuilder.createR5Network(dataFolder); + createR5Network(dataFolder); } else { // network.dat file exists // check version if (!NetworkChecker.checkR5NetworkVersion(dataFolder)) { // incompatible versions. try to create a new one // network could not be loaded, probably due to incompatible versions. create a new one - NetworkBuilder.createR5Network(dataFolder); + createR5Network(dataFolder); } } // compatible versions, load network - return NetworkBuilder.loadR5Network(dataFolder); + // if there were high priority errors, though, don't return network - don't even tempt someone to use it. + return highPriorityErrors ? null : loadR5Network(dataFolder); } - public static TransportNetwork loadR5Network(String dataFolder) throws Exception { + public TransportNetwork loadR5Network(String dataFolder) throws Exception { return KryoNetworkSerializer.read(new File(dataFolder, "network.dat")); } - public static void createR5Network(String dataFolder) { + public void createR5Network(String dataFolder) { File dir = new File(dataFolder); cleanUpMapdb(dir); @@ -66,14 +78,18 @@ public static void createR5Network(String dataFolder) { Map networkConfig = buildNetworkConfig(); try { - KryoNetworkSerializer.write(tn, new File(dataFolder, "network.dat")); - writeNetworkSettings(dataFolder, networkConfig); + // if there were high priority errors, network is unusable. Don't even serialize it as that might tempt someone + // to use it. + if (!highPriorityErrors) { + KryoNetworkSerializer.write(tn, new File(dataFolder, "network.dat")); + writeNetworkSettings(dataFolder, networkConfig); + } } catch (IOException e) { e.printStackTrace(); } } - private static void writeNetworkSettings(String dataFolder, Map networkConfig) throws IOException { + private void writeNetworkSettings(String dataFolder, Map networkConfig) throws IOException { String json = "{" + networkConfig.entrySet().stream() .map(e -> "\""+ e.getKey() + "\":\"" + e.getValue() + "\"") .collect(Collectors.joining(", "))+"}"; @@ -84,7 +100,7 @@ private static void writeNetworkSettings(String dataFolder, Map printWriter.close(); } - private static Map buildNetworkConfig() { + private Map buildNetworkConfig() { Map networkConfig = new LinkedHashMap<>(); networkConfig.put("r5_version", R5RCore.R5_VERSION); @@ -105,7 +121,7 @@ private static Map buildNetworkConfig() { return networkConfig; } - private static TransportNetwork createNetwork() { + private TransportNetwork createNetwork() { TransportNetwork network = new TransportNetwork(); network.scenarioId = "r5r"; @@ -167,7 +183,7 @@ private static TransportNetwork createNetwork() { return network; } - public static void loadDirectory(File directory) { + public void loadDirectory(File directory) { osmFilename = ""; tiffFile = ""; gtfsFiles.clear(); @@ -187,11 +203,75 @@ public static void loadDirectory(File directory) { if (name.endsWith(".tif") | name.endsWith(".tiff")) tiffFile = file.getAbsolutePath(); } + initializeGtfsErrors(); + // Supply feeds with a stream, so they do not sit open in memory while other feeds are being processed. - gtfsFeeds = gtfsFiles.stream().map(GTFSFeed::readOnlyTempFileFromGtfs); + gtfsFeeds = gtfsFiles.stream().map(this::readFeed); + } + + // read a feed and store errors + // We can't just use the R5 readOnlyTempFileFromGtfs, because then the errors get lost before + // we have access to the object. + private GTFSFeed readFeed (String feedFile) { + try { + // we use a directory to make sure that all sidecar files get deleted on exit + File tempDir = Files.createTempDirectory("gtfs").toFile(); + File dbFile = new File(tempDir, "gtfs.db"); + File dbpFile = new File(tempDir, "gtfs.db.p"); + + GTFSFeed feed = GTFSFeed.newWritableFile(dbFile); + feed.loadFromFile(new ZipFile(feedFile), null); + + // add additional errors + new PostLoadValidator(feed).validate(); + + // parse errors + parseGtfsErrors(feed); + + feed.close(); + + // clean up + // has to be done first, deleteOnExit deletes in reverse order and directories must be empty + tempDir.deleteOnExit(); + dbFile.deleteOnExit(); + dbpFile.deleteOnExit(); + + // re-open read only (this loses the errors, see the example in R5 BundleController.java) + return GTFSFeed.reopenReadOnly(dbFile); + } catch (Exception e) { + // re-throw as unchecked + throw new RuntimeException(e); + } + } + + private void initializeGtfsErrors() { + gtfsErrors = new RDataFrame(); + gtfsErrors.addStringColumn("file", ""); + // It's a long on the Java side + gtfsErrors.addIntegerColumn("line", -1); + gtfsErrors.addStringColumn("type", ""); + gtfsErrors.addStringColumn("field", ""); + gtfsErrors.addStringColumn("id", ""); + gtfsErrors.addStringColumn("priority", ""); + } + + private void parseGtfsErrors(GTFSFeed feed) { + for (GTFSError error : feed.errors) { + gtfsErrors.append(); + gtfsErrors.set("file", error.file); + gtfsErrors.set("line", (int) error.line); + gtfsErrors.set("type", error.errorType); + gtfsErrors.set("field", error.field); + gtfsErrors.set("id", error.affectedEntityId); + gtfsErrors.set("priority", error.getPriority().name()); + + if (error.getPriority() == Priority.HIGH) { + highPriorityErrors = true; + } + } } - private static void cleanUpMapdb(File dir) { + private void cleanUpMapdb(File dir) { // clean up older mapdb files File[] mapdbFiles = dir.listFiles((d, name) -> name.contains(".mapdb")); diff --git a/java-r5rcore/src/org/ipea/r5r/R5RCore.java b/java-r5rcore/src/org/ipea/r5r/R5RCore.java index f284793f..97ee33d8 100644 --- a/java-r5rcore/src/org/ipea/r5r/R5RCore.java +++ b/java-r5rcore/src/org/ipea/r5r/R5RCore.java @@ -47,6 +47,8 @@ public class R5RCore { private final String dataPath; + public final RDataFrame gtfsErrors; + public double getWalkSpeed() { return this.routingProperties.walkSpeed; } @@ -247,13 +249,18 @@ public R5RCore(String dataFolder, boolean verbose, String nativeElevationFunctio WorkerComponents.fileStorage = new R5RFileStorage(null); nativeElevationFunction = nativeElevationFunction.toUpperCase(); - NetworkBuilder.useNativeElevation = !nativeElevationFunction.equals("NONE"); - NetworkBuilder.elevationCostFunction = nativeElevationFunction; + + NetworkBuilder builder = new NetworkBuilder(); + + builder.useNativeElevation = !nativeElevationFunction.equals("NONE"); + builder.elevationCostFunction = nativeElevationFunction; dataPath = dataFolder; Path path = Paths.get(dataFolder).toAbsolutePath().normalize(); - TransportNetwork network = NetworkBuilder.checkAndLoadR5Network(path.toString()); - this.routingProperties = new RoutingProperties(network); + // network will be null if there were high priority errors + TransportNetwork network = builder.checkAndLoadR5Network(path.toString()); + this.routingProperties = network == null ? null : new RoutingProperties(network); + this.gtfsErrors = builder.gtfsErrors; } // --------------------------------------------------------------------------------------------------- diff --git a/r-package/R/build_network.R b/r-package/R/build_network.R index ff032491..99b0c94e 100644 --- a/r-package/R/build_network.R +++ b/r-package/R/build_network.R @@ -126,9 +126,19 @@ build_network <- function(data_path, )) } - cli::cli_inform(c( - v = "Finished building network at {.path {data_path}}" - )) + errors = java_to_dt(r5r_network$gtfsErrors) + if (any(errors$priority == "HIGH")) { + cli::cli_alert_danger("High priority errors found; network will not be usable. Use gtfs_errors(r5r_network) to see them.") + # TODO some way to make it so the network can't be passed to any analysis functions but gtfs_errors still works? + } else { + if (nrow(errors) > 0) { + cli::cli_alert_warning("{nrow(errors)} errors found in GTFS. Use gtfs_errors(r5r_network) to see them.") + } + + cli::cli_inform(c( + v = "Finished building network at {.path {data_path}}" + )) + } } return(wrap_r5r_network(r5r_network)) diff --git a/r-package/R/gtfs_errors.R b/r-package/R/gtfs_errors.R new file mode 100644 index 00000000..22c503cc --- /dev/null +++ b/r-package/R/gtfs_errors.R @@ -0,0 +1,25 @@ +#' Get GTFS errors encountered in network building +#' +#' This returns a data frame of GTFS errors R5 encountered when building the network. +#' Any high-priority errors will prevent routing from functioning, while other errors +#' may lead to unexpected results. +#' +#' @param r5r_network the R5R network object +#' +#' Since GTFS errors are not stored as part of the cached network, this function will only +#' return results on a newly built network, and will error on a network loaded from a cache. +#' +#' @export +gtfs_errors <- function (r5r_network) { + checkmate::assert_class(r5r_network, "r5r_network") + + jerr = r5r_network@jcore$gtfsErrors + + if (rJava::is.jnull(jerr)) { + stop("Errors are only available when transport network is first built") + } else { + # indirection to make it visible again + dt = java_to_dt(jerr) + return(dt) + } +} \ No newline at end of file diff --git a/r-package/inst/jar/r5r.jar b/r-package/inst/jar/r5r.jar index 716341a7..19cdb461 100644 Binary files a/r-package/inst/jar/r5r.jar and b/r-package/inst/jar/r5r.jar differ