Skip to content

Commit 07a7a52

Browse files
Merge pull request #145 from zdtsw/cherrypick-upstream-prs
[Cherrypick] upstream PR from 0.5.1 to 0.6.0
2 parents 740c775 + 6335602 commit 07a7a52

28 files changed

Lines changed: 1025 additions & 117 deletions

.github/actions/trivy-scan/action.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@ description: Scan container image with Trivy
33
inputs:
44
image:
55
required: true
6+
description: Image to scan
67
runs:
78
using: "composite"
89
steps:
910
- name: Install Trivy
1011
run: |
11-
wget https://github.com/aquasecurity/trivy/releases/download/v0.44.1/trivy_0.44.1_Linux-64bit.deb
12-
sudo dpkg -i trivy_0.44.1_Linux-64bit.deb
12+
wget https://github.com/aquasecurity/trivy/releases/download/v0.69.2/trivy_0.69.2_Linux-64bit.deb
13+
sudo dpkg -i trivy_0.69.2_Linux-64bit.deb
1314
shell: bash
1415

1516

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
name: Build and Push Container Images
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
epp-image-name:
7+
required: true
8+
type: string
9+
sidecar-image-name:
10+
required: true
11+
type: string
12+
tag:
13+
required: true
14+
type: string
15+
prerelease:
16+
required: true
17+
type: string
18+
secrets:
19+
GHCR_TOKEN:
20+
required: true
21+
22+
jobs:
23+
docker-build-and-push:
24+
runs-on: ubuntu-latest
25+
steps:
26+
- name: Free Disk Space (Ubuntu)
27+
uses: jlumbroso/free-disk-space@main
28+
with:
29+
tool-cache: false
30+
31+
- name: Checkout source
32+
uses: actions/checkout@v6
33+
34+
- name: Build and push EPP image
35+
uses: ./.github/actions/docker-build-and-push
36+
with:
37+
docker-file: Dockerfile.epp
38+
tag: ${{ inputs.tag }}
39+
image-name: ${{ inputs.epp-image-name }}
40+
registry: ghcr.io/llm-d
41+
github-token: ${{ secrets.GHCR_TOKEN }}
42+
prerelease: ${{ inputs.prerelease }}
43+
44+
- name: Build and push sidecar image
45+
uses: ./.github/actions/docker-build-and-push
46+
with:
47+
docker-file: Dockerfile.sidecar
48+
tag: ${{ inputs.tag }}
49+
image-name: ${{ inputs.sidecar-image-name }}
50+
registry: ghcr.io/llm-d
51+
github-token: ${{ secrets.GHCR_TOKEN }}
52+
prerelease: ${{ inputs.prerelease }}
53+
54+
- name: Run Trivy scan on EPP image
55+
uses: ./.github/actions/trivy-scan
56+
with:
57+
image: ghcr.io/llm-d/${{ inputs.epp-image-name }}:${{ inputs.tag }}
58+
59+
- name: Run Trivy scan on sidecar image
60+
uses: ./.github/actions/trivy-scan
61+
with:
62+
image: ghcr.io/llm-d/${{ inputs.sidecar-image-name }}:${{ inputs.tag }}

.github/workflows/ci-dev.yaml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: CI - Dev - Docker Container Image
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- 'release-*'
8+
9+
jobs:
10+
set-params:
11+
runs-on: ubuntu-latest
12+
outputs:
13+
project_name: ${{ steps.version.outputs.project_name }}
14+
sidecar_name: ${{ steps.version.outputs.sidecar_name }}
15+
tag: ${{ steps.tag.outputs.tag }}
16+
steps:
17+
- name: Set image names
18+
id: version
19+
run: |
20+
repo="${GITHUB_REPOSITORY##*/}"
21+
echo "project_name=${repo}-dev" >> "$GITHUB_OUTPUT"
22+
echo "sidecar_name=llm-d-routing-sidecar-dev" >> "$GITHUB_OUTPUT"
23+
24+
- name: Set branch name as tag
25+
id: tag
26+
run: |
27+
echo "tag=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT"
28+
29+
build-and-push:
30+
needs: set-params
31+
uses: ./.github/workflows/ci-build-images.yaml
32+
with:
33+
epp-image-name: ${{ needs.set-params.outputs.project_name }}
34+
sidecar-image-name: ${{ needs.set-params.outputs.sidecar_name }}
35+
tag: ${{ needs.set-params.outputs.tag }}
36+
prerelease: "true"
37+
secrets:
38+
GHCR_TOKEN: ${{ secrets.GHCR_TOKEN }}

.github/workflows/ci-release.yaml

Lines changed: 16 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,21 @@ on:
88
types: [published] # Also runs when a GitHub release is published
99

1010
jobs:
11-
docker-build-and-push:
11+
set-params:
1212
runs-on: ubuntu-latest
13+
outputs:
14+
project_name: ${{ steps.version.outputs.project_name }}
15+
sidecar_name: ${{ steps.version.outputs.sidecar_name }}
16+
tag: ${{ steps.tag.outputs.tag }}
17+
prerelease: ${{ steps.tag.outputs.prerelease }}
1318
steps:
14-
- name: Free Disk Space (Ubuntu)
15-
uses: jlumbroso/free-disk-space@main
16-
with:
17-
tool-cache: false
18-
19-
- name: Checkout source
20-
uses: actions/checkout@v6
21-
2219
- name: Set image names
2320
id: version
2421
run: |
2522
repo="${GITHUB_REPOSITORY##*/}"
2623
echo "project_name=$repo" >> "$GITHUB_OUTPUT"
2724
echo "sidecar_name=llm-d-routing-sidecar" >> "$GITHUB_OUTPUT"
2825
29-
- name: Print project name
30-
run: echo "Project is ${{ steps.version.outputs.project_name }}"
31-
3226
- name: Determine tag name
3327
id: tag
3428
run: |
@@ -43,32 +37,13 @@ jobs:
4337
echo "prerelease=${PRE_RELEASE}" >> "$GITHUB_OUTPUT"
4438
shell: bash
4539

46-
- name: Build and push EPP image
47-
uses: ./.github/actions/docker-build-and-push
48-
with:
49-
docker-file: Dockerfile.epp
50-
tag: ${{ steps.tag.outputs.tag }}
51-
image-name: ${{ steps.version.outputs.project_name }}
52-
registry: ghcr.io/llm-d
53-
github-token: ${{ secrets.GHCR_TOKEN }}
54-
prerelease: ${{ steps.tag.outputs.prerelease }}
55-
56-
- name: Build and push sidecar image
57-
uses: ./.github/actions/docker-build-and-push
58-
with:
59-
docker-file: Dockerfile.sidecar
60-
tag: ${{ steps.tag.outputs.tag }}
61-
image-name: ${{ steps.version.outputs.sidecar_name }}
62-
registry: ghcr.io/llm-d
63-
github-token: ${{ secrets.GHCR_TOKEN }}
64-
prerelease: ${{ steps.tag.outputs.prerelease }}
65-
66-
- name: Run Trivy scan on EPP image
67-
uses: ./.github/actions/trivy-scan
68-
with:
69-
image: ghcr.io/llm-d/${{ steps.version.outputs.project_name }}:${{ steps.tag.outputs.tag }}
70-
71-
- name: Run Trivy scan on sidecar image
72-
uses: ./.github/actions/trivy-scan
73-
with:
74-
image: ghcr.io/llm-d/${{ steps.version.outputs.sidecar_name }}:${{ steps.tag.outputs.tag }}
40+
build-and-push:
41+
needs: set-params
42+
uses: ./.github/workflows/ci-build-images.yaml
43+
with:
44+
epp-image-name: ${{ needs.set-params.outputs.project_name }}
45+
sidecar-image-name: ${{ needs.set-params.outputs.sidecar_name }}
46+
tag: ${{ needs.set-params.outputs.tag }}
47+
prerelease: ${{ needs.set-params.outputs.prerelease }}
48+
secrets:
49+
GHCR_TOKEN: ${{ secrets.GHCR_TOKEN }}

DEVELOPMENT.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@ Documentation for developing the inference scheduler.
88
- [Golang] `v1.24`+
99
- [Docker] (or [Podman])
1010
- [Kubernetes in Docker (KIND)]
11-
- [Kustomize]
11+
- [Kubectl] `v1.14`+
1212
- [ZeroMQ]
1313

1414
[Make]:https://www.gnu.org/software/make/
1515
[Golang]:https://go.dev/
1616
[Docker]:https://www.docker.com/
1717
[Podman]:https://podman.io/
1818
[Kubernetes in Docker (KIND)]:https://github.com/kubernetes-sigs/kind
19-
[Kustomize]:https://kubectl.docs.kubernetes.io/installation/kustomize/
19+
[Kubectl]:https://kubectl.docs.kubernetes.io/installation/kubectl/
2020
[ZeroMQ]:https://zeromq.org/
2121

2222
> [!NOTE]
@@ -59,30 +59,30 @@ There are several ways to access the gateway:
5959
**Port forward**:
6060

6161
```bash
62-
$ kubectl --context llm-d-inference-scheduler-dev port-forward service/inference-gateway 8080:80
62+
kubectl --context kind-llm-d-inference-scheduler-dev port-forward service/inference-gateway-istio 8080:80
6363
```
6464

6565
**NodePort**
6666

6767
```bash
6868
# Determine the k8s node address
69-
$ kubectl --context llm-d-inference-scheduler-dev get node -o yaml | grep address
69+
kubectl --context kind-llm-d-inference-scheduler-dev get node -o yaml | grep address
7070
# The service is accessible over port 80 of the worker IP address.
7171
```
7272

7373
**LoadBalancer**
7474

7575
```bash
7676
# Install and run cloud-provider-kind:
77-
$ go install sigs.k8s.io/cloud-provider-kind@latest && cloud-provider-kind &
78-
$ kubectl --context llm-d-inference-scheduler-dev get service inference-gateway
77+
go install sigs.k8s.io/cloud-provider-kind@latest && cloud-provider-kind &
78+
kubectl --context kind-llm-d-inference-scheduler-dev get service inference-gateway-istio
7979
# Wait for the LoadBalancer External-IP to become available. The service is accessible over port 80.
8080
```
8181

8282
You can now make requests matching the IP:port of one of the access mode above:
8383

8484
```bash
85-
$ curl -s -w '\n' http://<IP:port>/v1/completions -H 'Content-Type: application/json' -d '{"model":"food-review","prompt":"hi","max_tokens":10,"temperature":0}' | jq
85+
curl -s -w '\n' http://<IP:port>/v1/completions -H 'Content-Type: application/json' -d '{"model":"food-review","prompt":"hi","max_tokens":10,"temperature":0}' | jq
8686
```
8787

8888
By default the created inference gateway, can be accessed on port 30080. This can

Dockerfile.sidecar

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ RUN go mod download
1717
COPY cmd/pd-sidecar/main.go cmd/cmd.go
1818
COPY pkg/sidecar pkg/sidecar
1919
COPY pkg/common pkg/common
20+
COPY pkg/telemetry pkg/telemetry
2021

2122
# Build
2223
# the GOARCH has not a default value to allow the binary be built according to the host where the command

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,12 +190,12 @@ image-build-uds-tokenizer: check-container-tool ## Build UDS tokenizer image fro
190190
if [ -z "$$KV_CACHE_PATH_CHECK" ]; then \
191191
echo "Error: Could not find kv-cache module even after download."; \
192192
exit 1; \
193-
fi
193+
fi; \
194194
$(CONTAINER_RUNTIME) build \
195195
--platform linux/$(TARGETARCH) \
196196
-t $(UDS_TOKENIZER_IMAGE) \
197-
-f $(KV_CACHE_PATH)/services/uds_tokenizer/Dockerfile \
198-
$(KV_CACHE_PATH)/services/uds_tokenizer
197+
-f $$KV_CACHE_PATH_CHECK/services/uds_tokenizer/Dockerfile \
198+
$$KV_CACHE_PATH_CHECK/services/uds_tokenizer
199199

200200
.PHONY: image-build-%
201201
image-build-%: check-container-tool ## Build Container image using $(CONTAINER_RUNTIME)

cmd/epp/main.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,40 @@ import (
3232

3333
"github.com/llm-d/llm-d-inference-scheduler/pkg/metrics"
3434
"github.com/llm-d/llm-d-inference-scheduler/pkg/plugins"
35+
"github.com/llm-d/llm-d-inference-scheduler/pkg/telemetry"
3536
)
3637

3738
func main() {
39+
os.Exit(run())
40+
}
41+
42+
func run() int {
43+
ctx := ctrl.SetupSignalHandler()
44+
45+
// Initialize tracing before creating any spans
46+
shutdownTracing, err := telemetry.InitTracing(ctx)
47+
if err != nil {
48+
// Log error but don't fail - tracing is optional
49+
ctrl.Log.Error(err, "Failed to initialize tracing")
50+
}
51+
if shutdownTracing != nil {
52+
defer func() {
53+
if err := shutdownTracing(ctx); err != nil {
54+
ctrl.Log.Error(err, "Failed to shutdown tracing")
55+
}
56+
}()
57+
}
58+
3859
// Register llm-d-inference-scheduler plugins
3960
plugins.RegisterAllPlugins()
4061

62+
// Note: GIE built-in plugins are automatically registered by the runner
63+
// when it processes configuration in runner.parsePluginsConfiguration()
64+
4165
if err := runner.NewRunner().
4266
WithCustomCollectors(metrics.GetCollectors()...).
43-
Run(ctrl.SetupSignalHandler()); err != nil {
44-
os.Exit(1)
67+
Run(ctx); err != nil {
68+
return 1
4569
}
70+
return 0
4671
}

cmd/pd-sidecar/main.go

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ limitations under the License.
1616
package main
1717

1818
import (
19-
"crypto/tls"
2019
"flag"
2120
"net/url"
2221
"os"
@@ -29,6 +28,7 @@ import (
2928

3029
"github.com/llm-d/llm-d-inference-scheduler/pkg/sidecar/proxy"
3130
"github.com/llm-d/llm-d-inference-scheduler/pkg/sidecar/version"
31+
"github.com/llm-d/llm-d-inference-scheduler/pkg/telemetry"
3232
)
3333

3434
var (
@@ -70,6 +70,20 @@ func main() {
7070
ctx := ctrl.SetupSignalHandler()
7171
log.IntoContext(ctx, logger)
7272

73+
// Initialize tracing before creating any spans
74+
shutdownTracing, err := telemetry.InitTracing(ctx)
75+
if err != nil {
76+
// Log error but don't fail - tracing is optional
77+
logger.Error(err, "Failed to initialize tracing")
78+
}
79+
if shutdownTracing != nil {
80+
defer func() {
81+
if err := shutdownTracing(ctx); err != nil {
82+
logger.Error(err, "Failed to shutdown tracing")
83+
}
84+
}()
85+
}
86+
7387
logger.Info("Proxy starting", "Built on", version.BuildRef, "From Git SHA", version.CommitSHA)
7488

7589
// Validate connector
@@ -111,28 +125,15 @@ func main() {
111125
return
112126
}
113127

114-
var cert *tls.Certificate
115-
if *secureProxy {
116-
var tempCert tls.Certificate
117-
if *certPath != "" {
118-
tempCert, err = tls.LoadX509KeyPair(*certPath+"/tls.crt", *certPath+"/tls.key")
119-
} else {
120-
tempCert, err = proxy.CreateSelfSignedTLSCertificate()
121-
}
122-
if err != nil {
123-
logger.Error(err, "failed to create TLS certificate")
124-
return
125-
}
126-
cert = &tempCert
127-
}
128-
129128
config := proxy.Config{
130129
Connector: *connector,
131130
PrefillerUseTLS: *prefillerUseTLS,
132131
PrefillerInsecureSkipVerify: *prefillerInsecureSkipVerify,
133132
DecoderInsecureSkipVerify: *decoderInsecureSkipVerify,
134133
DataParallelSize: *vLLMDataParallelSize,
135134
EnablePrefillerSampling: *enablePrefillerSampling,
135+
SecureServing: *secureProxy,
136+
CertPath: *certPath,
136137
}
137138

138139
// Create SSRF protection validator
@@ -144,7 +145,7 @@ func main() {
144145

145146
proxyServer := proxy.NewProxy(*port, targetURL, config)
146147

147-
if err := proxyServer.Start(ctx, cert, validator); err != nil {
148+
if err := proxyServer.Start(ctx, validator); err != nil {
148149
logger.Error(err, "failed to start proxy server")
149150
}
150151
}

0 commit comments

Comments
 (0)