Skip to content

Commit 0f6a775

Browse files
committed
docs: update examples to use UDS tokenizer Docker image (llm-d#471)
* update examples Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * cache image Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * run examples Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * remove build-embedded CI job from examples workflow Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * fix examples path exclusion in golangci-lint config Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * lint Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * address review: fix lint, container cleanup, exit codes, and port config Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * build fix Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * decoder only model name for examples Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * Examples docs update Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * compute blocks fix Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * simplify verify examples and rm -f container Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * makefile dep Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * rm flag Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * update examples helper with extra keys Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * reduce git diff Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * Revert "reduce git diff" This reverts commit f2eb22f. Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * Fix TCP detection + export ApplyTokenizerEndpoint in helper Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * Split run-example Makefile target + fix verify-examples.sh Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * fix trap Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> * reuse run-example-only in run-example Signed-off-by: Sage Ahrac <sagiahrak@gmail.com> --------- Signed-off-by: Sage Ahrac <sagiahrak@gmail.com>
1 parent 22a7d1d commit 0f6a775

File tree

16 files changed

+150
-374
lines changed

16 files changed

+150
-374
lines changed

.github/workflows/ci-examples.yaml

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,6 @@ jobs:
1111
steps:
1212
- name: Checkout source
1313
uses: actions/checkout@v6
14-
15-
- name: Cache system (apt) dependencies
16-
uses: actions/cache@v4
17-
with:
18-
path: apt-archives
19-
key: ${{ runner.os }}-apt-${{ hashFiles('**/.github/workflows/ci-examples.yaml') }}
20-
restore-keys: |
21-
${{ runner.os }}-apt-
22-
23-
- name: Install system dependencies
24-
run: |
25-
mkdir -p apt-archives
26-
sudo cp -a apt-archives /var/cache/apt/archives # workaround for apt-get cache https://github.com/actions/cache/issues/324#issuecomment-1816908646
27-
sudo apt-get update
28-
sudo add-apt-repository ppa:deadsnakes/ppa -y
29-
sudo apt-get install -y libzmq3-dev pkg-config python3.12 python3.12-dev python3.12-venv clang-format
30-
cp -a /var/cache/apt/archives/*.deb apt-archives 2>/dev/null || true
31-
3214
- name: Extract Go version from go.mod
3315
run: sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> $GITHUB_ENV
3416

@@ -41,23 +23,17 @@ jobs:
4123
- name: Download Go dependencies
4224
run: go mod download
4325

44-
- name: Cache Python (pip) dependencies
45-
uses: actions/cache@v4
46-
with:
47-
path: |
48-
~/.cache/pip
49-
build/venv
50-
# This key is based ONLY on the requirements file
51-
key: ${{ runner.os }}-pip-${{ hashFiles('**/.github/workflows/ci-examples.yaml') }}-${{ hashFiles('**/requirements.txt') }}
52-
restore-keys: |
53-
${{ runner.os }}-pip-
54-
55-
# Set up the Python virtual environment (includes Python config verification)
56-
- name: Run setup-venv
57-
run: make setup-venv
26+
- name: Set up Docker Buildx
27+
uses: docker/setup-buildx-action@v3
5828

59-
- name: Install Python dependencies
60-
run: make install-python-deps
29+
- name: Build UDS tokenizer image
30+
uses: docker/build-push-action@v6
31+
with:
32+
context: services/uds_tokenizer
33+
tags: llm-d-uds-tokenizer:e2e-test
34+
load: true
35+
cache-from: type=gha
36+
cache-to: type=gha,mode=max
6137

6238
- name: Make verify-examples.sh executable
6339
run: chmod +x hack/verify-examples.sh

.github/workflows/ci-test.yaml

Lines changed: 11 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -78,59 +78,17 @@ jobs:
7878
- name: Install dependencies
7979
run: go mod download
8080

81-
- name: Run make e2e-test
82-
run: make e2e-test
83-
84-
build-embedded:
85-
runs-on: ubuntu-latest
86-
steps:
87-
- name: Checkout source
88-
uses: actions/checkout@v6
89-
90-
- name: Cache system (apt) dependencies
91-
uses: actions/cache@v4
92-
with:
93-
path: apt-archives
94-
key: ${{ runner.os }}-apt-build-embedded-${{ hashFiles('**/.github/workflows/ci-test.yaml') }}
95-
restore-keys: |
96-
${{ runner.os }}-apt-
97-
98-
- name: Install system dependencies
99-
run: |
100-
mkdir -p apt-archives
101-
sudo cp -a apt-archives /var/cache/apt/archives # workaround for apt-get cache https://github.com/actions/cache/issues/324#issuecomment-1816908646
102-
sudo apt-get update
103-
sudo apt-get install -y libzmq3-dev pkg-config python3.12-dev python3.12-venv
104-
cp -a /var/cache/apt/archives/*.deb apt-archives 2>/dev/null || true
105-
106-
- name: Extract Go version from go.mod
107-
run: sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> "$GITHUB_ENV"
81+
- name: Set up Docker Buildx
82+
uses: docker/setup-buildx-action@v3
10883

109-
- name: Set up Go with cache
110-
uses: actions/setup-go@v5
84+
- name: Build UDS tokenizer image
85+
uses: docker/build-push-action@v6
11186
with:
112-
go-version: "${{ env.GO_VERSION }}"
113-
cache-dependency-path: ./go.sum
87+
context: services/uds_tokenizer
88+
tags: llm-d-uds-tokenizer:e2e-test
89+
load: true
90+
cache-from: type=gha
91+
cache-to: type=gha,mode=max
11492

115-
- name: Cache Python (pip) dependencies
116-
uses: actions/cache@v4
117-
with:
118-
path: |
119-
~/.cache/pip
120-
build/venv
121-
# This key is based ONLY on the requirements file
122-
key: ${{ runner.os }}-pip-${{ hashFiles('**/.github/workflows/ci-test.yaml') }}-${{ hashFiles('**/requirements.txt') }}
123-
restore-keys: |
124-
${{ runner.os }}-pip-
125-
126-
# Set up the Python virtual environment (includes Python config verification)
127-
- name: Run setup-venv
128-
run: make setup-venv
129-
130-
- name: Install dependencies
131-
run: |
132-
go mod download
133-
make install-python-deps
134-
135-
- name: Run make build-embedded
136-
run: make build-embedded
93+
- name: Run make e2e-test
94+
run: make e2e-test

Makefile

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -622,12 +622,15 @@ download-zmq: ## Install ZMQ dependencies based on OS/ARCH
622622

623623
##@ Examples
624624

625+
UDS_TOKENIZER_GRPC_PORT ?= 50051
626+
UDS_TOKENIZER_HEALTH_PORT ?= 8082
627+
625628
# Define a template for building examples
626629
define BUILD_EXAMPLE_TEMPLATE
627-
$(1): $$(SRC) | check-go install-python-deps download-zmq
630+
$(1): $$(SRC) | check-go
628631
@echo "Building $$@..."
629632
@mkdir -p $$(dir $$@)
630-
@go build -tags $(EMBEDDED_TAGS) -o $$@ $(2)
633+
@go build -o $$@ $(2)
631634
@echo "✅ Built $$@"
632635
endef
633636

@@ -659,8 +662,39 @@ EXAMPLE_SHORTS := offline online valkey kv_cache_index kv_cache_index_service
659662
.PHONY: $(EXAMPLE_SHORTS)
660663
$(EXAMPLE_SHORTS):
661664

662-
.PHONY: run-example
663-
run-example: $(EXAMPLE) ## Run the example locally (e.g., make run-example offline)
665+
.PHONY: start-tokenizer
666+
start-tokenizer: check-container-tool ## Start the UDS tokenizer container; requires image-build-uds to have been run first
667+
@printf "\033[33;1m==== Starting UDS tokenizer container ====\033[0m\n"
668+
@$(CONTAINER_TOOL) run -d --rm --name uds-tokenizer-example --network host \
669+
-e GRPC_PORT=$(UDS_TOKENIZER_GRPC_PORT) \
670+
-e PROBE_PORT=$(UDS_TOKENIZER_HEALTH_PORT) \
671+
$(UDS_TOKENIZER_IMAGE)
672+
@printf "Waiting for tokenizer to be ready"
673+
@for i in $$(seq 1 30); do \
674+
if curl -sf http://localhost:$(UDS_TOKENIZER_HEALTH_PORT)/healthz > /dev/null 2>&1; then \
675+
printf " ready!\n"; break; \
676+
fi; \
677+
if [ $$i -eq 30 ]; then \
678+
printf " timeout!\n"; \
679+
$(CONTAINER_TOOL) stop uds-tokenizer-example 2>/dev/null || true; \
680+
exit 1; \
681+
fi; \
682+
printf "."; sleep 2; \
683+
done
684+
685+
.PHONY: stop-tokenizer
686+
stop-tokenizer: ## Stop and remove the UDS tokenizer container
687+
@$(CONTAINER_TOOL) stop uds-tokenizer-example 2>/dev/null || true
688+
@$(CONTAINER_TOOL) rm -f uds-tokenizer-example 2>/dev/null || true
689+
690+
.PHONY: run-example-only
691+
run-example-only: $(EXAMPLE) ## Run the example binary only (tokenizer must already be running via start-tokenizer)
664692
@printf "\033[33;1m==== Running example $(EXAMPLE) ====\033[0m\n"
665-
@echo "Using PYTHONPATH=$(PYTHONPATH)"
666-
@./$(EXAMPLE)
693+
@TOKENIZER_ENDPOINT=localhost:$(UDS_TOKENIZER_GRPC_PORT) ./$(EXAMPLE)
694+
695+
.PHONY: run-example
696+
run-example: ## Run the example with UDS tokenizer in Docker (e.g., make run-example offline); requires image-build-uds to have been run first
697+
@$(MAKE) --no-print-directory start-tokenizer
698+
@$(MAKE) --no-print-directory run-example-only; status=$$?; \
699+
$(MAKE) --no-print-directory stop-tokenizer; \
700+
exit $$status

examples/helper/events.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ func SimulateProduceEvent(ctx context.Context, publisher *Publisher) error {
5050
nil, // LoraID
5151
medium, // Medium
5252
nil, // LoraName
53+
nil, // ExtraKeys (MM extra keys, added in vLLM multi-modal support)
5354
}
5455

5556
//nolint // won't fail

examples/helper/indexer.go

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
//go:build embedded_tokenizers
2-
31
// Copyright 2025 The llm-d Authors.
42
//
53
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,32 +16,49 @@ package helper
1816

1917
import (
2018
"context"
19+
"net"
2120
"os"
2221

2322
"github.com/llm-d/llm-d-kv-cache/examples/testdata"
2423
"github.com/llm-d/llm-d-kv-cache/pkg/kvcache"
2524
"github.com/llm-d/llm-d-kv-cache/pkg/kvcache/kvblock"
25+
"github.com/llm-d/llm-d-kv-cache/pkg/tokenization"
2626
"sigs.k8s.io/controller-runtime/pkg/log"
2727
)
2828

2929
const (
30-
envHFToken = "HF_TOKEN"
30+
// EnvTokenizerEndpoint is the env var for the UDS tokenizer socket path or TCP address.
31+
// Use a path (e.g. /tmp/tokenizer/tokenizer-uds.socket) for UDS mode,
32+
// or host:port (e.g. localhost:50051) for TCP mode.
33+
EnvTokenizerEndpoint = "TOKENIZER_ENDPOINT" //nolint:gosec // env var name, not a credential
3134
)
3235

36+
func isTCPAddr(s string) bool {
37+
host, port, err := net.SplitHostPort(s)
38+
return err == nil && host != "" && port != ""
39+
}
40+
41+
// ApplyTokenizerEndpoint reads TOKENIZER_ENDPOINT and sets UDS config on the given config.
42+
func ApplyTokenizerEndpoint(config *kvcache.Config) {
43+
endpoint := os.Getenv(EnvTokenizerEndpoint)
44+
if endpoint == "" {
45+
return
46+
}
47+
config.TokenizersPoolConfig.UdsTokenizerConfig = &tokenization.UdsTokenizerConfig{
48+
SocketFile: endpoint,
49+
UseTCP: isTCPAddr(endpoint),
50+
}
51+
}
52+
3353
func getKVCacheIndexerConfig() (*kvcache.Config, error) {
3454
config, err := kvcache.NewDefaultConfig()
3555
if err != nil {
3656
return nil, err
3757
}
3858

3959
config.TokenizersPoolConfig.ModelName = testdata.ModelName
60+
ApplyTokenizerEndpoint(config)
4061

41-
huggingFaceToken := os.Getenv(envHFToken)
42-
if huggingFaceToken != "" {
43-
config.TokenizersPoolConfig.HFTokenizerConfig.HuggingFaceToken = huggingFaceToken
44-
}
45-
46-
config.TokenizersPoolConfig.ModelName = testdata.ModelName
4762
return config, nil
4863
}
4964

examples/kv_cache_aware_scorer/kvcache_aware_scorer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build exclude && embedded_tokenizers
1+
//go:build exclude
22

33
/*
44
Copyright 2025 The llm-d Authors.

examples/kv_cache_index/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ make run-example kv_cache_index
3333

3434
```
3535
I... Created Indexer
36-
I... Started Indexer {"model": "bert-base-uncased"}
36+
I... Started Indexer {"model": "Qwen/Qwen2-VL-7B-Instruct"}
3737
I... Got pods {"pods": {}}
3838
I... Got pods {"pods": {"pod1":4}}
3939
```

examples/kv_cache_index/main.go

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
//go:build embedded_tokenizers
2-
31
/*
42
Copyright 2025 The llm-d Authors.
53
@@ -20,16 +18,15 @@ package main
2018

2119
import (
2220
"context"
23-
_ "embed"
2421
"fmt"
2522
"os"
2623
"time"
2724

28-
"github.com/llm-d/llm-d-kv-cache/pkg/utils"
2925
"github.com/redis/go-redis/v9"
3026
"sigs.k8s.io/controller-runtime/pkg/log"
3127
"sigs.k8s.io/controller-runtime/pkg/log/zap"
3228

29+
"github.com/llm-d/llm-d-kv-cache/examples/helper"
3330
"github.com/llm-d/llm-d-kv-cache/examples/testdata"
3431
"github.com/llm-d/llm-d-kv-cache/pkg/kvcache"
3532
"github.com/llm-d/llm-d-kv-cache/pkg/kvcache/kvblock"
@@ -39,7 +36,6 @@ const (
3936
defaultModelName = testdata.ModelName
4037

4138
envRedisAddr = "REDIS_ADDR"
42-
envHFToken = "HF_TOKEN"
4339
envModelName = "MODEL_NAME"
4440
)
4541

@@ -51,10 +47,7 @@ func getKVCacheIndexerConfig() (*kvcache.Config, error) {
5147

5248
config.TokenizersPoolConfig.ModelName = getModelName()
5349

54-
huggingFaceToken := os.Getenv(envHFToken)
55-
if huggingFaceToken != "" && config.TokenizersPoolConfig.HFTokenizerConfig != nil {
56-
config.TokenizersPoolConfig.HFTokenizerConfig.HuggingFaceToken = huggingFaceToken
57-
}
50+
helper.ApplyTokenizerEndpoint(config)
5851

5952
redisAddr := os.Getenv(envRedisAddr)
6053
if redisAddr != "" {
@@ -105,8 +98,6 @@ func setupKVCacheIndexer(ctx context.Context) (*kvcache.Indexer, error) {
10598
return nil, err
10699
}
107100

108-
config.TokenizersPoolConfig.ModelName = testdata.ModelName
109-
110101
tokenProcessor, err := kvblock.NewChunkedTokenDatabase(&kvblock.TokenProcessorConfig{
111102
BlockSize: 256,
112103
})
@@ -143,10 +134,11 @@ func runPrompts(ctx context.Context, kvCacheIndexer *kvcache.Indexer) error {
143134
// Print the pods - should be empty because no tokenization
144135
logger.Info("Got pods", "pods", pods)
145136

146-
// Add entries in kvblock.Index manually
147-
engineKeys := utils.SliceMap(testdata.PromptHashes, func(h uint64) kvblock.BlockHash {
148-
return kvblock.BlockHash(h)
149-
})
137+
// Compute block keys from the actual prompt so they match what GetPodScores will look up.
138+
engineKeys, err := kvCacheIndexer.ComputeBlockKeys(ctx, testdata.RenderReq, testdata.Prompt, modelName)
139+
if err != nil {
140+
return err
141+
}
150142
// For this simple example, requestKeys == engineKeys
151143
requestKeys := engineKeys
152144

examples/kv_cache_index_service/server/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
//go:build embedded_tokenizers
2-
31
// Copyright 2025 The llm-d Authors.
42
//
53
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -66,6 +64,7 @@ func main() {
6664
lis, err := lc.Listen(ctx, "tcp", servicerAddr)
6765
if err != nil {
6866
logger.Error(err, fmt.Sprintf("Failed to listen: %v", servicerAddr))
67+
return
6968
}
7069

7170
// Setup ZMQ publisher to simulate vLLM engines
@@ -79,6 +78,7 @@ func main() {
7978
indexerSvc, err := setupIndexerService(ctx)
8079
if err != nil {
8180
logger.Error(err, "failed to create indexer service")
81+
return
8282
}
8383

8484
// Initial query - should be empty since no events have been published

0 commit comments

Comments
 (0)