persistent MongoCollections in unmodifiable map

abyrd · abyrd · commit 7d3188989c0d · 2022-05-10T15:53:36.000+08:00
also url-safe tokens (can be included as a query param)
factor out logic from streaming / non-streaming database endpoints
added login and token example to vector tile client
diff --git a/src/main/java/com/conveyal/analysis/components/HttpApi.java b/src/main/java/com/conveyal/analysis/components/HttpApi.java
@@ -78,9 +78,21 @@ private spark.Service configureSparkService () {
         LOG.info("Analysis server will listen for HTTP connections on port {}.", config.serverPort());
         spark.Service sparkService = spark.Service.ignite();
         sparkService.port(config.serverPort());
+        //sparkService.threadPool(1000);
+
+        // Set up TLS (HTTPS). Unfortunately Spark HTTP only accepts String paths to keystore files.
+        // We want to build a Keystore instance programmatically from PEM files.
+        // Digging through the Spark source code it seems extremely convoluted to directly inject a Keystore instance.
+        // sparkService.secure();
+        // Usage examples at:
+        // https://github.com/Hakky54/sslcontext-kickstart/blob/master/sslcontext-kickstart-for-pem/src/test/java/nl/altindag/ssl/util/PemUtilsShould.java
+        // Dependency:
+        // Tools to load PEM files into Java Keystore (so we don't have to use arcane Java keytool)
+        // implementation 'io.github.hakky54:sslcontext-kickstart-for-pem:7.4.1'
+
         // Serve up UI files. staticFileLocation("vector-client") inside classpath will not see changes to files.
-        // Note that this eliminates the need for CORS.
-        sparkService.externalStaticFileLocation("src/main/resources/vector-client");
+        // Note that this eliminates the need for CORS headers and eliminates CORS preflight request latency.
+        sparkService.externalStaticFileLocation("../r5/src/main/resources/vector-client");
 
         // Specify actions to take before the main logic of handling each HTTP request.
         sparkService.before((req, res) -> {
diff --git a/src/main/java/com/conveyal/analysis/components/TokenAuthentication.java b/src/main/java/com/conveyal/analysis/components/TokenAuthentication.java
@@ -54,31 +54,37 @@ public TokenAuthentication (AnalysisDB database) {
 
     @Override
     public UserPermissions authenticate(Request request) {
-        String authHeader = request.headers("authorization");
-        if (authHeader == null) {
-            throw new AnalysisServerException(UNAUTHORIZED, "Authorization header mising.", 401);
+        String token = request.headers("authorization");
+        // Some places such as MopboxGL do not make it easy to add headers, so also accept token in query parameter.
+        // The MapboxGL transformUrl setting seems to be missing from recent versions of the library.
+        if (token == null) {
+            token = request.queryParams("token");
         }
-        if ("sesame".equalsIgnoreCase(authHeader)) {
+        if (token == null) {
+            throw new AnalysisServerException(UNAUTHORIZED, "Authorization token mising.", 401);
+        }
+        if ("sesame".equalsIgnoreCase(token)) {
             return new UserPermissions("local", true, "local");
         }
-        UserPermissions userPermissions = userForToken(authHeader);
+        UserPermissions userPermissions = userForToken(token);
         if (userPermissions == null) {
-            throw new AnalysisServerException(UNAUTHORIZED, "Inalid authorization token.", 401);
+            throw new AnalysisServerException(UNAUTHORIZED, "Invalid authorization token.", 401);
         } else {
             return userPermissions;
         }
     }
 
     /**
      * TODO is SecureRandom a sufficiently secure source of randomness when used this way?
-     * Should we be creating a new instance each time?
-     * @return A Base64 encoded representation of 32 random bytes
+     * Should we be creating a new instance of SecureRandom each time or reusing it?
+     * Do not use basic Base64 encoding since it contains some characters that are invalid in URLs.
+     * @return A url-safe representation of 32 random bytes
      */
     public static String generateToken () {
         Random random = new SecureRandom();
         byte[] tokenBytes = new byte[32];
         random.nextBytes(tokenBytes);
-        String token = Base64.getEncoder().encodeToString(tokenBytes);
+        String token = Base64.getUrlEncoder().encodeToString(tokenBytes);
         return token;
     }
 
diff --git a/src/main/java/com/conveyal/analysis/controllers/AuthTokenController.java b/src/main/java/com/conveyal/analysis/controllers/AuthTokenController.java
@@ -48,6 +48,7 @@ private Object createUser (Request req, Response res) {
      * Create a new token, replacing any existing one for the same user (email).
      */
     private Map getTokenForEmail (Request req, Response res) {
+        // These should probably be in the body not URL, to prevent them from appearing as plaintext in history.
         String email = req.queryParams("email");
         String password = req.queryParams("password");
         // Crude rate limiting, might just lead to connections piling up in event of attack.
diff --git a/src/main/java/com/conveyal/analysis/controllers/DatabaseController.java b/src/main/java/com/conveyal/analysis/controllers/DatabaseController.java
@@ -3,27 +3,25 @@
 import com.conveyal.analysis.UserPermissions;
 import com.conveyal.analysis.persistence.AnalysisDB;
 import com.google.common.collect.Lists;
+import com.mongodb.client.FindIterable;
 import com.mongodb.client.MongoCollection;
-import com.mongodb.util.JSON;
-import org.bson.BsonArray;
 import org.bson.Document;
 import org.bson.conversions.Bson;
-import org.bson.json.JsonWriter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import spark.Request;
 import spark.Response;
 
-import java.io.IOException;
 import java.io.OutputStream;
-import java.io.PrintWriter;
-import java.io.Writer;
 import java.lang.invoke.MethodHandles;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import static com.conveyal.analysis.util.JsonUtil.toJson;
+import static com.google.common.base.Preconditions.checkNotNull;
 import static com.mongodb.client.model.Filters.and;
 import static com.mongodb.client.model.Filters.eq;
 
@@ -38,61 +36,72 @@ public class DatabaseController implements HttpController {
 
     private final AnalysisDB database;
 
-    private final MongoCollection<Document> regions;
-    private final MongoCollection<Document> bundles;
+    private final Map<String, MongoCollection<Document>> mongoCollections;
+
+    // Preloading these avoids synchronization during handling http requests by reading from an immutable map.
+    // TODO verify if it is threadsafe to reuse MongoCollection in all threads.
+    // Amazingly there seems to be no documentation on this at all. Drilling down into the function calls, it seems
+    // to create a new session on each find() call, so should presumably go through synchronization.
+    // In testing with siege and other http benchmarking tools, reusing the MongoCollection seems to result in much
+    // smoother operation; creating a new MongoCollection on each request seems to jam up after a certain number
+    // of requests (perhaps waiting for idle MongoCollectons to be cleaned up).
+    public Map<String, MongoCollection<Document>> mongoCollectionMap (String... collectionNames) {
+        Map<String, MongoCollection<Document>> map = new HashMap<>();
+        for (String name : collectionNames) {
+            map.put(name, database.getBsonCollection(name));
+        }
+        // Make the map immutable for threadsafe reading and return.
+        return Map.copyOf(map);
+    }
 
     public DatabaseController(AnalysisDB database) {
         this.database = database;
-        // TODO verify if it is threadsafe to reuse this collection in all threads
-        // Also verify whether it's any slower to just get the collection on every GET operation.
-        // Testing with Apache bench, retaining and reusing the collection seems much smoother.
-        this.regions = database.getBsonCollection("regions");
-        this.bundles = database.getBsonCollection("bundles");
+        this.mongoCollections = mongoCollectionMap("regions", "bundles");
     }
 
-    /**
-     * Fetch anything from database. Buffers in memory so not suitable for huge responses.
-     * register serialization with sparkService.get("/api/db/:collection", this::getDocuments, toJson);
-     */
-    private Iterable<Document> getDocuments (Request req, Response res) {
+    /** Factored out for experimenting with streaming and non-streaming approaches to serialization. */
+    private FindIterable<Document> getDocuments (Request req) {
         String accessGroup = UserPermissions.from(req).accessGroup;
         final String collectionName = req.params("collection");
-        MongoCollection<Document> collection = collectionName.equals("bundles") ? bundles :
-                database.getBsonCollection(collectionName);
+        MongoCollection<Document> collection = mongoCollections.get(collectionName);
+        checkNotNull(collection, "Collection not available: " + collectionName);
         List<Bson> filters = Lists.newArrayList(eq("accessGroup", accessGroup));
         req.queryMap().toMap().forEach((key, values) -> {
             for (String value : values) {
                 filters.add(eq(key, value));
             }
         });
+        return collection.find(and(filters));
+    }
+
+    /**
+     * Fetch anything from database. Buffers all documents in memory so may not not suitable for large responses.
+     * Register result serialization with: sparkService.get("/api/db/:collection", this::getDocuments, toJson);
+     */
+    private Iterable<Document> getDocuments (Request req, Response res) {
+        FindIterable<Document> docs = getDocuments(req);
         List<Document> documents = new ArrayList<>();
-        collection.find(and(filters)).into(documents);
+        docs.into(documents);
         return documents;
     }
 
     /**
      * Fetch anything from database. Streaming processing, no in-memory buffering of the BsonDocuments.
      * The output stream does buffer to some extent but should stream chunks instead of serializing into memory.
+     * Anecdotally in testing with seige this does seem to almost double the response rate and allow double the
+     * concurrent connections without stalling (though still low at 20, and it eventually does stall).
      */
     private Object getDocumentsStreaming (Request req, Response res) {
-        String accessGroup = UserPermissions.from(req).accessGroup;
-        final String collectionName = req.params("collection");
-        MongoCollection<Document> collection = collectionName.equals("bundles") ? bundles :
-                database.getBsonCollection(collectionName);
-        List<Bson> filters = Lists.newArrayList(eq("accessGroup", accessGroup));
-        req.queryMap().toMap().forEach((key, values) -> {
-            for (String value : values) {
-                filters.add(eq(key, value));
-            }
-        });
+        FindIterable<Document> docs = getDocuments(req);
         // getOutputStream returns a ServletOutputStream, usually Jetty implementation HttpOutputStream which
         // buffers the output. doc.toJson() creates a lot of short-lived objects which could be factored out.
         // The Mongo driver says to use JsonWriter or toJson() rather than utility methods:
         // https://github.com/mongodb/mongo-java-driver/commit/63409f9cb3bbd0779dd5139355113d9b227dfa05
-        try (OutputStream out = res.raw().getOutputStream()) {
+        try {
+            OutputStream out = res.raw().getOutputStream();
             out.write('['); // Begin JSON array.
             boolean firstElement = true;
-            for (Document doc : collection.find(and(filters))) {
+            for (Document doc : docs) {
                 if (firstElement) {
                     firstElement = false;
                 } else {
@@ -101,17 +110,16 @@ private Object getDocumentsStreaming (Request req, Response res) {
                 out.write(doc.toJson().getBytes(StandardCharsets.UTF_8));
             }
             out.write(']'); // Close JSON array.
-        } catch (IOException e) {
+            // We do not close the OutputStream, even implicitly with a try-with-resources.
+            // The thinking is that closing the stream might close the underlying connection, which might be keepalive.
+        } catch (Exception e) {
             throw new RuntimeException("Failed to write database records as JSON.", e);
         }
         // Since we're directly writing to the OutputStream, no need to return anything.
         // But do not return null or Spark will complain cryptically.
         return "";
     }
 
-    // Testing with Apache bench shows some stalling
-    // -k keepalive connections fails immediately
-
     @Override
     public void registerEndpoints (spark.Service sparkService) {
         sparkService.get("/api/db/:collection", this::getDocuments, toJson);
diff --git a/src/main/resources/vector-client/index.html b/src/main/resources/vector-client/index.html
@@ -34,11 +34,16 @@
 
 mapboxgl.accessToken = 'TOKEN_HERE';
 
+let token = new URLSearchParams(window.location.search).get('token')
+function authFetch (url) {
+    return fetch(url, { headers: {'Authorization': token }})
+}
+
 const regionSelectElement = document.getElementById("regions")
 function updateRegionSelector () {
     regionSelectElement.add(new Option("None"));
     // Returns an array of regions. Add them to the region selector DOM element.
-    fetch('http://localhost:7070/api/db/regions')
+    authFetch('http://localhost:7070/api/db/regions')
         .then(response => response.json())
         .then(regions => {
             for (const region of regions) {
@@ -53,7 +58,7 @@
     // Returns an array of bundles. Add them to the bundle selector DOM element.
     document.querySelectorAll('#bundles option').forEach(option => option.remove())
     bundleSelectElement.add(new Option("None"));
-    fetch(`http://localhost:7070/api/db/bundles?regionId=${regionId}`)
+    authFetch(`http://localhost:7070/api/db/bundles?regionId=${regionId}`)
         .then(response => response.json())
         .then(bundles => {
             for (const bundle of bundles) {
@@ -80,7 +85,7 @@
 feedId = null;
 
 function updateRegion (regionId) {
-    fetch(`http://localhost:7070/api/db/regions?_id=${regionId}`)
+    authFetch(`http://localhost:7070/api/db/regions?_id=${regionId}`)
         .then(response => response.json())
         .then(r => {
             region = r[0];
@@ -91,7 +96,7 @@
 }
 
 function updateBundle (bundleId) {
-    fetch(`http://localhost:7070/api/db/bundles?_id=${bundleId}`)
+    authFetch(`http://localhost:7070/api/db/bundles?_id=${bundleId}`)
         .then(response => response.json())
         .then(b => {
             bundle = b[0];
@@ -112,8 +117,8 @@
 
 feedSelectElement.onchange = function (event) {
     feedId = event.target.value;
-    // setUrl expects a URL to TileJSON, not a URL to the tiles themselves.
-    map.getSource('r5').setTiles([`http://localhost:7070/api/gtfs/${bundle.feedGroupId}/${feedId}/tiles/{z}/{x}/{y}`]);
+    // setUrl expects a URL to TileJSON, not a URL to the tiles themselves (use setTiles()).
+    map.getSource('r5').setTiles([`http://localhost:7070/api/gtfs/${bundle.feedGroupId}/${feedId}/tiles/{z}/{x}/{y}?token=${token}`]);
 }
 
 let map = new mapboxgl.Map({
diff --git a/src/main/resources/vector-client/login.html b/src/main/resources/vector-client/login.html
@@ -0,0 +1,41 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="utf-8" />
+    <title>Conveyal Login</title>
+    <style>
+        body { margin: 0; padding: 0; font-family: sans-serif}
+        #panel {
+            display: flex; flex-direction: column; width: 20%; padding: 20px; margin: auto;
+        }
+    </style>
+</head>
+<body>
+<div id="panel">
+    <label for="email">Email:</label>
+    <input type="email" id="email"></input>
+    <label for="password">Password:</label>
+    <input type="password" id="password"></input>
+    <button name="login" id="login">Log In</button>
+</div>
+<script>
+
+let emailField = document.getElementById("email");
+let passwordField = document.getElementById("password");
+let loginButton = document.getElementById("login");
+loginButton.onclick = function (e) {
+    // TODO validate/sanitize
+    let email = emailField.value;
+    let password = passwordField.value;
+    let url = `http://localhost:7070/token?email=${email}&password=${password}`
+    fetch(url)
+        .then(response => response.json())
+        .then(response => {
+            console.log(response.token);
+            window.location.href = `index.html?token=${response.token}`;
+        });
+};
+
+</script>
+</body>
+</html>
diff --git a/src/main/resources/vector-client/vectorstyle.json b/src/main/resources/vector-client/vectorstyle.json
@@ -5,7 +5,7 @@
     "r5": {
       "type": "vector",
       "tiles": [
-        "http://localhost:7070/api/gtfs/61137f589919c7627cb5647f/61137f589919c7627cb56480/tiles/{z}/{x}/{y}"
+        "http://localhost:7070/dummy"
       ],
       "maxzoom": 14
     },