Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
57be8f7
feat(helm): Add Presto query engine support to the Helm chart.
junhaoliao Feb 17, 2026
96e9a45
docs: Remove redundant worker configuration comment in Presto guide
junhaoliao Feb 17, 2026
8089254
chore(helm): Bump chart version to 0.1.4-dev.4
junhaoliao Feb 17, 2026
d389542
Merge branch 'main' into k8s-presto
hoophalab Feb 18, 2026
00d7292
feat(helm): Improve modularity and add Presto support refinements
junhaoliao Feb 18, 2026
ef19fba
Merge branch 'main' into k8s-presto
junhaoliao Mar 10, 2026
9c83eb3
chore(helm): Bump chart version to 0.2.1-dev.1
junhaoliao Mar 10, 2026
5fc55fc
docs: Clarify Helm configuration for enabling Presto and resource opt…
junhaoliao Mar 10, 2026
6611a46
docs: Remove redundant comments on Presto configuration in K8s guide
junhaoliao Mar 10, 2026
4ac6a71
chore(helm): Remove extraneous newline in `values.yaml`
junhaoliao Mar 10, 2026
ef6be81
Merge branch 'main' into k8s-presto
junhaoliao Mar 12, 2026
3ea1670
docs(helm): Clarify Presto SQL configuration in `values.yaml`
junhaoliao Mar 12, 2026
80204d7
chore(helm): Bump chart version to 0.2.1-dev.2
junhaoliao Mar 12, 2026
e0d175a
Merge remote-tracking branch 'origin/main' into k8s-presto
junhaoliao Mar 12, 2026
dda97e2
refactor(helm/docs): Rename `package.query_engine` to `webui.query_en…
junhaoliao Mar 12, 2026
5525e7e
refactor(helm): Use dynamic host and port for CLP metadata DB in Pres…
junhaoliao Mar 16, 2026
55d32d4
refactor(helm): Rename service account for Presto deployments to `ser…
junhaoliao Mar 16, 2026
72160c1
docs: Simplify Presto setup instructions in K8s deployment guide
junhaoliao Mar 16, 2026
4337cd1
docs: Remove dead link to logging infrastructure issue in K8s deploym…
junhaoliao Mar 16, 2026
efb640b
refactor(helm): Replace wget with curl for fetching Presto version in…
junhaoliao Mar 16, 2026
9df3643
refactor(helm): Update Presto version extraction to use `nodeVersion.…
junhaoliao Mar 16, 2026
c8fd988
docs: Fix incorrect property name in Presto setup instructions for K8…
junhaoliao Mar 16, 2026
1939ce1
refactor(helm): Set default Presto replicas to 0 in multi-shared and …
junhaoliao Mar 16, 2026
6688324
Add support for enabling Presto in test setups with configurable Helm…
junhaoliao Mar 16, 2026
f6ff83a
fix(deployment): Pre-create shared-data PVs in `set-up-multi-dedicate…
junhaoliao Mar 16, 2026
eec6709
chore(helm): bump chart version to 0.2.1-dev.2
junhaoliao Mar 16, 2026
48aa730
fix(deployment): correct heredoc identifier in `set-up-multi-dedicate…
junhaoliao Mar 16, 2026
f6d3bc0
update PV configuration in `set-up-multi-dedicated-test.sh` (remove r…
junhaoliao Mar 17, 2026
75626e0
Merge remote-tracking branch 'junhao/fix-helm-pv' into k8s-presto
junhaoliao Mar 17, 2026
ffab28b
fix(deployment): pre-create shared-data directories on nodes in `set-…
junhaoliao Mar 17, 2026
fdac633
fix(deployment): update shared-data directory creation to use `CLP_HO…
junhaoliao Mar 17, 2026
2df432c
Merge remote-tracking branch 'junhao/fix-helm-pv' into k8s-presto
junhaoliao Mar 17, 2026
304e97d
Merge branch 'main' into k8s-presto
junhaoliao Mar 18, 2026
96912c3
bump chart version
junhaoliao Mar 18, 2026
578fb0d
fix(helm): update Presto configurations and add S3 storage support in…
junhaoliao Mar 18, 2026
e144e69
fix(helm): increase liveness probe initial delay to 180s in `_helpers…
junhaoliao Mar 18, 2026
333941c
remove cert hack
junhaoliao Mar 18, 2026
8c5f6d8
fix(deployment): strip whitespace in JSON heredoc for `split_filter` …
junhaoliao Mar 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 106 additions & 6 deletions docs/src/user-docs/guides-k8s-deployment.md
Original file line number Diff line number Diff line change
Expand Up @@ -246,11 +246,93 @@ helm template clp . -f custom-values.yaml

::::

### Using Presto as the query engine
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this section duplicates with guides-using-presto.md. Shall we use a reference link?


To use [Presto][presto-guide] as the query engine, set `query_engine` to `"presto"` and configure
the Presto-specific settings. The `query_engine` setting controls which search interface the Web UI
displays. Presto runs alongside the existing compression pipeline; setting the clp-s native query
components to `null` is optional but recommended to save resources when you don't need both query
paths:

```{code-block} yaml
:caption: presto-values.yaml

image:
prestoCoordinator:
repository: "ghcr.io/y-scope/presto/coordinator"
tag: "dev"
prestoWorker:
repository: "ghcr.io/y-scope/presto/prestissimo-worker"
tag: "dev"

prestoWorker:
# See below "Worker scheduling" for more details on configuring Presto scheduling
replicas: 2

clpConfig:
package:
storage_engine: "clp-s"
query_engine: "presto"

# Optional: Disable the clp-s native query pipeline to save resources.
# NOTE: The API server depends on the clp-s native query pipeline.
api_server: null
query_scheduler: null
query_worker: null
reducer: null

# Disable results cache retention since the Presto integration doesn't yet support garbage
# collection of search results.
results_cache:
retention_period: null

presto:
port: 30889
coordinator:
logging_level: "INFO"
query_max_memory_gb: 1
query_max_memory_per_node_gb: 1
worker:
query_memory_gb: 4
system_memory_gb: 8
# Split filter config for the Presto CLP connector. For each dataset you want to query, add a
# filter entry. Replace <dataset> with the dataset name (use "default" if you didn't specify one
# when compressing) and <timestamp-key> with the timestamp key used during compression.
# See https://docs.yscope.com/presto/connector/clp.html#split-filter-config-file
split_filter:
clp.default.<dataset>:
- columnName: "<timestamp-key>"
customOptions:
rangeMapping:
lowerBound: "begin_timestamp"
upperBound: "end_timestamp"
required: false
```

Install with the Presto values:

```bash
helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml
```

:::{note}
Presto is deployed when `clpConfig.presto` is set to a non-null value. To disable the clp-s native query
components, set their config keys to `null` as shown above.
:::

For more details on querying logs through Presto, see the [Using Presto][presto-guide] guide.

### Worker scheduling

You can control where workers are scheduled using standard Kubernetes scheduling primitives
(`nodeSelector`, `affinity`, `tolerations`, `topologySpreadConstraints`).

:::{note}
When using Presto as the query engine, use `prestoWorker:` instead of `queryWorker:` and `reducer:`
to configure Presto worker scheduling. The `prestoWorker:` key supports the same `scheduling:`
options.
:::

#### Dedicated node pools

To run compression workers, query workers, and reducers in separate node pools:
Expand All @@ -263,6 +345,9 @@ To run compression workers, query workers, and reducers in separate node pools:

# Label query nodes
kubectl label nodes node3 node4 yscope.io/nodeType=query

# Label Presto nodes (if using Presto as the query engine)
kubectl label nodes node5 node6 yscope.io/nodeType=presto
```

2. Configure scheduling:
Expand All @@ -276,19 +361,25 @@ To run compression workers, query workers, and reducers in separate node pools:
replicas: 2
scheduling:
nodeSelector:
yscope.io/nodeType: compression
yscope.io/nodeType: "compression"

queryWorker:
replicas: 2
scheduling:
nodeSelector:
yscope.io/nodeType: query
yscope.io/nodeType: "query"

reducer:
replicas: 2
scheduling:
nodeSelector:
yscope.io/nodeType: query
yscope.io/nodeType: "query"

prestoWorker:
replicas: 2
scheduling:
nodeSelector:
yscope.io/nodeType: "presto"
```

3. Install:
Expand Down Expand Up @@ -318,7 +409,7 @@ To run all worker types in the same node pool:
replicas: 2
scheduling:
nodeSelector:
yscope.io/nodeType: compute
yscope.io/nodeType: "compute"
topologySpreadConstraints:
- maxSkew: 1
topologyKey: "kubernetes.io/hostname"
Expand All @@ -331,13 +422,19 @@ To run all worker types in the same node pool:
replicas: 2
scheduling:
nodeSelector:
yscope.io/nodeType: compute
yscope.io/nodeType: "compute"

reducer:
replicas: 2
scheduling:
nodeSelector:
yscope.io/nodeType: compute
yscope.io/nodeType: "compute"

prestoWorker:
replicas: 2
scheduling:
nodeSelector:
yscope.io/nodeType: "compute"
```

3. Install:
Expand Down Expand Up @@ -542,6 +639,7 @@ To tear down a `kubeadm` cluster:
* [External database setup][external-db-guide]: Using external MariaDB and MongoDB
* [Using object storage][s3-storage]: Configuring S3 storage
* [Configuring retention periods][retention-guide]: Setting up data retention policies
* [Using Presto][presto-guide]: Distributed SQL queries on compressed logs

[admin-tools]: reference-sbin-scripts/admin-tools.md
[aks]: https://azure.microsoft.com/en-us/products/kubernetes-service
Expand All @@ -559,6 +657,8 @@ To tear down a `kubeadm` cluster:
[kind]: https://kind.sigs.k8s.io/
[kubeadm]: https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/install-kubeadm/
[kubectl]: https://kubernetes.io/docs/tasks/tools/
[logging-infra-issue]: https://github.com/y-scope/clp/issues/1760
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not used

[presto-guide]: guides-using-presto.md
[quick-start]: quick-start/index.md
[retention-guide]: guides-retention.md
[rfc-1918]: https://datatracker.ietf.org/doc/html/rfc1918#section-3
Expand Down
97 changes: 94 additions & 3 deletions docs/src/user-docs/guides-using-presto.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,106 @@ maintained in a [fork][yscope-presto] of the Presto project. At some point, thes
been merged into the main Presto repository so that you can use official Presto releases with CLP.
:::

## Requirements
## Deployment options

CLP supports Presto through two deployment methods:

* **[Kubernetes (Helm)](#kubernetes-helm)**: Presto is deployed as part of the CLP Helm chart. This
is the simplest option if you are already using the [Kubernetes deployment][k8s-deployment].
* **[Docker Compose](#docker-compose)**: Presto is deployed separately using Docker Compose alongside
a CLP package installation.

## Kubernetes (Helm)

When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting `clpConfig.presto` to
a non-null configuration and `query_engine` to `"presto"`. The `query_engine` setting controls which
search interface the Web UI displays. Presto runs alongside the existing compression pipeline; the
clp-s native query components can optionally be disabled to save resources.

### Requirements

* A running CLP Kubernetes deployment (see the [Kubernetes deployment guide][k8s-deployment])

### Set up

1. Create a values file to enable Presto:

```{code-block} yaml
:caption: presto-values.yaml

clpConfig:
package:
query_engine: "presto"

# Optional: Disable the clp-s native query pipeline to save resources.
# NOTE: The API server depends on the clp-s native query pipeline.
api_server: null
query_scheduler: null
query_worker: null
reducer: null

# Disable results cache retention since the Presto integration doesn't yet support
# garbage collection of search results.
results_cache:
retention_period: null

presto:
port: 30889
coordinator:
logging_level: "INFO"
query_max_memory_gb: 1
query_max_memory_per_node_gb: 1
worker:
query_memory_gb: 4
system_memory_gb: 8
# Split filter config for the Presto CLP connector. For each dataset, add a filter entry.
# Replace <dataset> with the dataset name (use "default" if you didn't specify one when
# compressing) and <timestamp-key> with the timestamp key used during compression.
# See https://docs.yscope.com/presto/connector/clp.html#split-filter-config-file
split_filter:
clp.default.<dataset>:
- columnName: "<timestamp-key>"
customOptions:
rangeMapping:
lowerBound: "begin_timestamp"
upperBound: "end_timestamp"
required: false
```

2. Install (or upgrade) the Helm chart with the Presto values:

```bash
helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml
```
Comment on lines +83 to +87
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

helm install will fail for users with an existing CLP deployment.

The requirement at line 33 explicitly states the user already has "a running CLP Kubernetes deployment," yet step 2 only shows helm install, which would error out with release already exists. The parenthetical "(or upgrade)" acknowledges the upgrade path but never shows the corresponding command.

💡 Suggested fix
-2. Install (or upgrade) the Helm chart with the Presto values:
+2. If you haven't installed CLP yet, install it with Presto enabled:
 
    ```bash
    helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml
    ```
+
+   If CLP is already installed, upgrade the release instead:
+
+   ```bash
+   helm upgrade clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml
+   ```
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
2. Install (or upgrade) the Helm chart with the Presto values:
```bash
helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml
```
2. If you haven't installed CLP yet, install it with Presto enabled:
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@docs/src/user-docs/guides-using-presto.md` around lines 66 - 70, The docs
show only a helm install command which will fail when CLP is already deployed;
update the step under the Presto install to include the alternative helm upgrade
invocation so users can perform an upgrade when a release exists—specifically
add guidance to run `helm upgrade clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f
presto-values.yaml` as the alternative to `helm install` (referencing the
existing `helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f
presto-values.yaml` and the `presto-values.yaml` values file).


3. Verify that the Presto coordinator and worker pods are running:

```bash
kubectl get pods -l "app.kubernetes.io/component in (presto-coordinator, presto-worker)"
```

Once the pods are ready, you can [query your logs through Presto](#querying-your-logs-through-presto)
using CLP's Web UI.

:::{note}
When using Kubernetes, Presto worker scheduling can be configured using the `prestoWorker.scheduling`
key in Helm values. See the [worker scheduling][k8s-scheduling] section of the Kubernetes deployment
guide for details.
:::

## Docker Compose

### Requirements

* [CLP][clp-releases] (clp-json) v0.5.0 or higher
* [Docker] v28 or higher
* [Docker Compose][docker-compose] v2.20.2 or higher
* Python
* python3-venv (for the version of Python installed)

## Set up
### Set up

Using Presto with CLP requires:
Using Presto with CLP via Docker Compose requires:

* [Setting up CLP](#setting-up-clp) and compressing some logs.
* [Setting up Presto](#setting-up-presto) to query CLP's metadata database and archives.
Expand Down Expand Up @@ -227,6 +316,8 @@ These limitations will be addressed in a future release of the Presto integratio
[clp-releases]: https://github.com/y-scope/clp/releases
[docker-compose]: https://docs.docker.com/compose/install/
[Docker]: https://docs.docker.com/engine/install/
[k8s-deployment]: guides-k8s-deployment.md
[k8s-scheduling]: guides-k8s-deployment.md#worker-scheduling
[postgresql]: https://zenodo.org/records/10516401
[Presto]: https://prestodb.io/
[y-scope/presto#8]: https://github.com/y-scope/presto/issues/8
Expand Down
3 changes: 3 additions & 0 deletions tools/deployment/package-helm/.set-up-common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ nodes:
- containerPort: 30800
hostPort: 30800
protocol: TCP
- containerPort: 30889
hostPort: 30889
protocol: TCP
EOF

for ((i = 0; i < num_workers; i++)); do
Expand Down
2 changes: 1 addition & 1 deletion tools/deployment/package-helm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: "v2"
name: "clp"
version: "0.2.1-dev.0"
version: "0.2.1-dev.1"
description: "A Helm chart for CLP's (Compressed Log Processor) package deployment"
type: "application"
appVersion: "0.10.1-dev"
Expand Down
18 changes: 16 additions & 2 deletions tools/deployment/package-helm/set-up-multi-dedicated-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ CLP_HOME="${CLP_HOME:-/tmp/clp}"
CLUSTER_NAME="${CLUSTER_NAME:-clp-test}"
NUM_COMPRESSION_NODES="${NUM_COMPRESSION_NODES:-2}"
NUM_QUERY_NODES="${NUM_QUERY_NODES:-2}"
NUM_PRESTO_NODES="${NUM_PRESTO_NODES:-2}"
COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}"
QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}"
REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}"
PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-2}"

# shellcheck source=.set-up-common.sh
source "${script_dir}/.set-up-common.sh"
Expand All @@ -23,14 +25,16 @@ echo "=== Multi-node setup with dedicated worker nodes ==="
echo "Cluster: ${CLUSTER_NAME}"
echo "Compression nodes: ${NUM_COMPRESSION_NODES}"
echo "Query nodes: ${NUM_QUERY_NODES}"
echo "Presto nodes: ${NUM_PRESTO_NODES}"
echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}"
echo "Query workers: ${QUERY_WORKER_REPLICAS}"
echo "Reducers: ${REDUCER_REPLICAS}"
echo "Presto workers: ${PRESTO_WORKER_REPLICAS}"
echo ""

prepare_environment "${CLUSTER_NAME}"

total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES))
total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES + NUM_PRESTO_NODES))

echo "Creating kind cluster..."
generate_kind_config "${total_workers}" | kind create cluster --name "${CLUSTER_NAME}" --config=-
Expand All @@ -45,11 +49,18 @@ for ((i = 0; i < NUM_COMPRESSION_NODES; i++)); do
done

# Label query nodes
for ((i = NUM_COMPRESSION_NODES; i < total_workers; i++)); do
query_end=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES))
for ((i = NUM_COMPRESSION_NODES; i < query_end; i++)); do
echo "Labeling ${worker_nodes[$i]} as query node"
kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=query --overwrite
done

# Label Presto nodes
for ((i = query_end; i < total_workers; i++)); do
echo "Labeling ${worker_nodes[$i]} as presto node"
kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=presto --overwrite
done

echo "Installing Helm chart..."
helm uninstall test --ignore-not-found
sleep 2
Expand All @@ -62,6 +73,9 @@ helm install test "${script_dir}" \
--set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \
--set "queryWorker.scheduling.nodeSelector.yscope\.io/nodeType=query" \
--set "reducer.replicas=${REDUCER_REPLICAS}" \
--set "reducer.scheduling.nodeSelector.yscope\.io/nodeType=query" \
--set "prestoWorker.replicas=${PRESTO_WORKER_REPLICAS}" \
--set "prestoWorker.scheduling.nodeSelector.yscope\.io/nodeType=presto" \
$(get_image_helm_args "${CLUSTER_NAME}" "${CLP_PACKAGE_IMAGE}")

wait_for_cluster_ready
Loading
Loading