y-scope · junhaoliao · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 18, 2026
@@ -200,6 +200,8 @@ clpConfig:
   # Use clp-text, instead of clp-json (default)
   package:
     storage_engine: "clp"  # Use "clp-s" for clp-json, "clp" for clp-text
+
+  webui:
     query_engine: "clp"   # Use "clp-s" for clp-json, "clp" for clp-text, "presto" for Presto
 
   # Configure archive output
@@ -246,11 +248,92 @@ helm template clp . -f custom-values.yaml
 
 ::::
 
+### Using Presto as the query engine
+
+To use [Presto][presto-guide] as the query engine, set `webui.query_engine` to `"presto"` and
+configure the Presto-specific settings. The `query_engine` setting controls which search interface
+the Web UI displays. Presto runs alongside the existing compression pipeline; setting the clp-s
+native query components to `null` is optional but recommended to save resources when you don't need
+both query paths:
+
+```{code-block} yaml
+:caption: presto-values.yaml
+
+image:
+  prestoCoordinator:
+    repository: "ghcr.io/y-scope/presto/coordinator"
+    tag: "clp-v0.10.0"
+  prestoWorker:
+    repository: "ghcr.io/y-scope/presto/prestissimo-worker"
+    tag: "clp-v0.10.0"
+
+prestoWorker:
+  # See below "Worker scheduling" for more details on configuring Presto scheduling
+  replicas: 2
+
+clpConfig:
+  webui:
+    query_engine: "presto"
+
+  # Optional: Disable the clp-s native query pipeline to save resources.
+  # NOTE: The API server depends on the clp-s native query pipeline.
+  api_server: null
+  query_scheduler: null
+  query_worker: null
+  reducer: null
+
+  # Disable results cache retention since the Presto integration doesn't yet support garbage
+  # collection of search results.
+  results_cache:
+    retention_period: null
+
+  presto:
+    port: 30889
+    coordinator:
+      logging_level: "INFO"
+      query_max_memory_gb: 1
+      query_max_memory_per_node_gb: 1
+    worker:
+      query_memory_gb: 4
+      system_memory_gb: 8
+    # Split filter config for the Presto CLP connector. For each dataset you want to query, add a
+    # filter entry. Replace <dataset> with the dataset name (use "default" if you didn't specify one
+    # when compressing) and <timestamp-key> with the timestamp key used during compression.
+    # See https://docs.yscope.com/presto/connector/clp.html#split-filter-config-file
+    split_filter:
+      clp.default.<dataset>:
+        - columnName: "<timestamp-key>"
+          customOptions:
+            rangeMapping:
+              lowerBound: "begin_timestamp"
+              upperBound: "end_timestamp"
+          required: false
+```
+
+Install with the Presto values:
+
+```bash
+helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml
+```
+
+:::{note}
+Presto is deployed when `clpConfig.presto` is set to a non-null value. To disable the clp-s native query
+components, set their config keys to `null` as shown above.
+:::
+
+For more details on querying logs through Presto, see the [Using Presto][presto-guide] guide.
+
 ### Worker scheduling
 
 You can control where workers are scheduled using standard Kubernetes scheduling primitives
 (`nodeSelector`, `affinity`, `tolerations`, `topologySpreadConstraints`).
 
+:::{note}
+When using Presto as the query engine, use `prestoWorker:` instead of `queryWorker:` and `reducer:`
+to configure Presto worker scheduling. The `prestoWorker:` key supports the same `scheduling:`
+options.
+:::
+
 #### Dedicated node pools
 
 To run compression workers, query workers, and reducers in separate node pools:
@@ -263,6 +346,9 @@ To run compression workers, query workers, and reducers in separate node pools:
 
    # Label query nodes
    kubectl label nodes node3 node4 yscope.io/nodeType=query
+
+   # Label Presto nodes (if using Presto as the query engine)
+   kubectl label nodes node5 node6 yscope.io/nodeType=presto
    ```
 
 2. Configure scheduling:
@@ -276,19 +362,25 @@ To run compression workers, query workers, and reducers in separate node pools:
      replicas: 2
      scheduling:
        nodeSelector:
-         yscope.io/nodeType: compression
+         yscope.io/nodeType: "compression"
 
    queryWorker:
      replicas: 2
      scheduling:
        nodeSelector:
-         yscope.io/nodeType: query
+         yscope.io/nodeType: "query"
 
    reducer:
      replicas: 2
      scheduling:
        nodeSelector:
-         yscope.io/nodeType: query
+         yscope.io/nodeType: "query"
+
+   prestoWorker:
+     replicas: 2
+     scheduling:
+       nodeSelector:
+         yscope.io/nodeType: "presto"
    ```
 
 3. Install:
@@ -318,7 +410,7 @@ To run all worker types in the same node pool:
      replicas: 2
      scheduling:
        nodeSelector:
-         yscope.io/nodeType: compute
+         yscope.io/nodeType: "compute"
        topologySpreadConstraints:
          - maxSkew: 1
            topologyKey: "kubernetes.io/hostname"
@@ -331,13 +423,19 @@ To run all worker types in the same node pool:
      replicas: 2
      scheduling:
        nodeSelector:
-         yscope.io/nodeType: compute
+         yscope.io/nodeType: "compute"
 
    reducer:
      replicas: 2
      scheduling:
        nodeSelector:
-         yscope.io/nodeType: compute
+         yscope.io/nodeType: "compute"
+
+   prestoWorker:
+     replicas: 2
+     scheduling:
+       nodeSelector:
+         yscope.io/nodeType: "compute"
    ```
 
 3. Install:
@@ -542,6 +640,7 @@ To tear down a `kubeadm` cluster:
 * [External database setup][external-db-guide]: Using external MariaDB and MongoDB
 * [Using object storage][s3-storage]: Configuring S3 storage
 * [Configuring retention periods][retention-guide]: Setting up data retention policies
+* [Using Presto][presto-guide]: Distributed SQL queries on compressed logs
 
 [admin-tools]: reference-sbin-scripts/admin-tools.md
 [aks]: https://azure.microsoft.com/en-us/products/kubernetes-service
@@ -559,6 +658,8 @@ To tear down a `kubeadm` cluster:
 [kind]: https://kind.sigs.k8s.io/
 [kubeadm]: https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/install-kubeadm/
 [kubectl]: https://kubernetes.io/docs/tasks/tools/
+[logging-infra-issue]: https://github.com/y-scope/clp/issues/1760
+[presto-guide]: guides-using-presto.md
 [quick-start]: quick-start/index.md
 [retention-guide]: guides-retention.md
 [rfc-1918]: https://datatracker.ietf.org/doc/html/rfc1918#section-3

@@ -14,17 +14,106 @@ maintained in a [fork][yscope-presto] of the Presto project. At some point, thes
 been merged into the main Presto repository so that you can use official Presto releases with CLP.
 :::
 
-## Requirements
+## Deployment options
+
+CLP supports Presto through two deployment methods:
+
+* **[Kubernetes (Helm)](#kubernetes-helm)**: Presto is deployed as part of the CLP Helm chart. This
+  is the simplest option if you are already using the [Kubernetes deployment][k8s-deployment].
+* **[Docker Compose](#docker-compose)**: Presto is deployed separately using Docker Compose alongside
+  a CLP package installation.
+
+## Kubernetes (Helm)
+
+When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting `clpConfig.presto` to
+a non-null configuration and `webui.query_engine` to `"presto"`. The `query_engine` setting controls
+which search interface the Web UI displays. Presto runs alongside the existing compression pipeline;
+the clp-s native query components can optionally be disabled to save resources.
+
+### Requirements
+
+* A running CLP Kubernetes deployment (see the [Kubernetes deployment guide][k8s-deployment])
+
+### Set up
+
+1. Create a values file to enable Presto:
+
+   ```{code-block} yaml
+   :caption: presto-values.yaml
+
+   clpConfig:
+     webui:
+       query_engine: "presto"
+
+     # Optional: Disable the clp-s native query pipeline to save resources.
+     # NOTE: The API server depends on the clp-s native query pipeline.
+     api_server: null
+     query_scheduler: null
+     query_worker: null
+     reducer: null
+
+     # Disable results cache retention since the Presto integration doesn't yet support
+     # garbage collection of search results.
+     results_cache:
+       retention_period: null
+
+     presto:
+       port: 30889
+       coordinator:
+         logging_level: "INFO"
+         query_max_memory_gb: 1
+         query_max_memory_per_node_gb: 1
+       worker:
+         query_memory_gb: 4
+         system_memory_gb: 8
+       # Split filter config for the Presto CLP connector. For each dataset, add a filter entry.
+       # Replace <dataset> with the dataset name (use "default" if you didn't specify one when
+       # compressing) and <timestamp-key> with the timestamp key used during compression.
+       # See https://docs.yscope.com/presto/connector/clp.html#split-filter-config-file
+       split_filter:
+         clp.default.<dataset>:
+           - columnName: "<timestamp-key>"
+             customOptions:
+               rangeMapping:
+                 lowerBound: "begin_timestamp"
+                 upperBound: "end_timestamp"
+             required: false
+   ```
+
+2. Install (or upgrade) the Helm chart with the Presto values:
+
+   ```bash
+   helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml
+   ```
-2. Install (or upgrade) the Helm chart with the Presto values:
-
-   ```bash
-   helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml
-   ```
+2. If you haven't installed CLP yet, install it with Presto enabled:
-2. Install (or upgrade) the Helm chart with the Presto values:
-
-   ```bash
-   helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml
-   ```
+2. If you haven't installed CLP yet, install it with Presto enabled:
+
+3. Verify that the Presto coordinator and worker pods are running:
+
+   ```bash
+   kubectl get pods -l "app.kubernetes.io/component in (presto-coordinator, presto-worker)"
+   ```
+
+Once the pods are ready, you can [query your logs through Presto](#querying-your-logs-through-presto)
+using CLP's Web UI.
+
+:::{note}
+When using Kubernetes, Presto worker scheduling can be configured using the `prestoWorker.scheduling`
+key in Helm values. See the [worker scheduling][k8s-scheduling] section of the Kubernetes deployment
+guide for details.
+:::
+
+## Docker Compose
+
+### Requirements
 
 * [CLP][clp-releases] (clp-json) v0.5.0 or higher
 * [Docker] v28 or higher
 * [Docker Compose][docker-compose] v2.20.2 or higher
 * Python
 * python3-venv (for the version of Python installed)
 
-## Set up
+### Set up
 
-Using Presto with CLP requires:
+Using Presto with CLP via Docker Compose requires:
 
 * [Setting up CLP](#setting-up-clp) and compressing some logs.
 * [Setting up Presto](#setting-up-presto) to query CLP's metadata database and archives.
@@ -227,6 +316,8 @@ These limitations will be addressed in a future release of the Presto integratio
 [clp-releases]: https://github.com/y-scope/clp/releases
 [docker-compose]: https://docs.docker.com/compose/install/
 [Docker]: https://docs.docker.com/engine/install/
+[k8s-deployment]: guides-k8s-deployment.md
+[k8s-scheduling]: guides-k8s-deployment.md#worker-scheduling
 [postgresql]: https://zenodo.org/records/10516401
 [Presto]: https://prestodb.io/
 [y-scope/presto#8]: https://github.com/y-scope/presto/issues/8

@@ -100,7 +100,7 @@ helm repo update clp
 
 helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG \
   --set clpConfig.package.storage_engine=clp \
-  --set clpConfig.package.query_engine=clp \
+  --set clpConfig.webui.query_engine=clp \
   --set clpConfig.webui.port="$CLP_WEBUI_PORT" \
   --set clpConfig.results_cache.port="$CLP_RESULTS_CACHE_PORT" \
   --set clpConfig.database.port="$CLP_DATABASE_PORT" \

@@ -131,6 +131,9 @@ nodes:
   - containerPort: 30800
     hostPort: 30800
     protocol: TCP
+  - containerPort: 30889
+    hostPort: 30889
+    protocol: TCP
 EOF
 
     for ((i = 0; i < num_workers; i++)); do

@@ -1,6 +1,6 @@
 apiVersion: "v2"
 name: "clp"
-version: "0.2.1-dev.1"
+version: "0.2.1-dev.2"
 description: "A Helm chart for CLP's (Compressed Log Processor) package deployment"
 type: "application"
 appVersion: "0.10.1-dev"

@@ -10,9 +10,11 @@ CLP_HOME="${CLP_HOME:-/tmp/clp}"
 CLUSTER_NAME="${CLUSTER_NAME:-clp-test}"
 NUM_COMPRESSION_NODES="${NUM_COMPRESSION_NODES:-2}"
 NUM_QUERY_NODES="${NUM_QUERY_NODES:-2}"
+NUM_PRESTO_NODES="${NUM_PRESTO_NODES:-2}"
 COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}"
 QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}"
 REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}"
+PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-2}"
 
 # shellcheck source=.set-up-common.sh
 source "${script_dir}/.set-up-common.sh"
@@ -23,14 +25,16 @@ echo "=== Multi-node setup with dedicated worker nodes ==="
 echo "Cluster: ${CLUSTER_NAME}"
 echo "Compression nodes: ${NUM_COMPRESSION_NODES}"
 echo "Query nodes: ${NUM_QUERY_NODES}"
+echo "Presto nodes: ${NUM_PRESTO_NODES}"
 echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}"
 echo "Query workers: ${QUERY_WORKER_REPLICAS}"
 echo "Reducers: ${REDUCER_REPLICAS}"
+echo "Presto workers: ${PRESTO_WORKER_REPLICAS}"
 echo ""
 
 prepare_environment "${CLUSTER_NAME}"
 
-total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES))
+total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES + NUM_PRESTO_NODES))
 
 echo "Creating kind cluster..."
 generate_kind_config "${total_workers}" | kind create cluster --name "${CLUSTER_NAME}" --config=-
@@ -45,11 +49,18 @@ for ((i = 0; i < NUM_COMPRESSION_NODES; i++)); do
 done
 
 # Label query nodes
-for ((i = NUM_COMPRESSION_NODES; i < total_workers; i++)); do
+query_end=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES))
+for ((i = NUM_COMPRESSION_NODES; i < query_end; i++)); do
     echo "Labeling ${worker_nodes[$i]} as query node"
     kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=query --overwrite
 done
 
+# Label Presto nodes
+for ((i = query_end; i < total_workers; i++)); do
+    echo "Labeling ${worker_nodes[$i]} as presto node"
+    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=presto --overwrite
+done
+
 echo "Installing Helm chart..."
 helm uninstall test --ignore-not-found
 sleep 2
@@ -62,6 +73,9 @@ helm install test "${script_dir}" \
     --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \
     --set "queryWorker.scheduling.nodeSelector.yscope\.io/nodeType=query" \
     --set "reducer.replicas=${REDUCER_REPLICAS}" \
+    --set "reducer.scheduling.nodeSelector.yscope\.io/nodeType=query" \
+    --set "prestoWorker.replicas=${PRESTO_WORKER_REPLICAS}" \
+    --set "prestoWorker.scheduling.nodeSelector.yscope\.io/nodeType=presto" \
     $(get_image_helm_args "${CLUSTER_NAME}" "${CLP_PACKAGE_IMAGE}")
 
 wait_for_cluster_ready