Skip to content

Commit 6212044

Browse files
committed
bump of configs to match current running env
Signed-off-by: Ryan Cook <rcook@redhat.com>
1 parent c9a5b2f commit 6212044

2 files changed

Lines changed: 64 additions & 19 deletions

File tree

kubernetes/llama-stack/configmap.yaml

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
kind: ConfigMap
21
apiVersion: v1
2+
kind: ConfigMap
33
metadata:
4+
labels:
5+
app.kubernetes.io/instance: llama-stack
46
name: run-config
57
data:
68
config.yaml: |
@@ -18,10 +20,24 @@ data:
1820
- vector_io
1921
providers:
2022
inference:
21-
- provider_id: vllm-inference
23+
- provider_id: llama-3b
24+
provider_type: remote::vllm
25+
config:
26+
url: ${env.LLAMA3B_URL}
27+
max_tokens: 128000
28+
api_token: fake
29+
tls_verify: false
30+
- provider_id: llama-70b
31+
provider_type: remote::vllm
32+
config:
33+
url: ${env.LLAMA70B_URL}
34+
max_tokens: 128000
35+
api_token: fake
36+
tls_verify: false
37+
- provider_id: granite
2238
provider_type: remote::vllm
2339
config:
24-
url: ${env.VLLM_URL}
40+
url: ${env.GRANITE_URL}
2541
max_tokens: 128000
2642
api_token: fake
2743
tls_verify: false
@@ -92,7 +108,7 @@ data:
92108
provider_type: inline::meta-reference
93109
config:
94110
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
95-
sinks: ${env.TELEMETRY_SINKS:console, otel_trace, otel_metric, sqlite}
111+
sinks: ${env.TELEMETRY_SINKS:console, otel_trace, sqlite}
96112
otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:}
97113
otel_metric_endpoint: ${env.OTEL_METRIC_ENDPOINT:}
98114
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
@@ -121,8 +137,16 @@ data:
121137
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
122138
models:
123139
- metadata: {}
124-
model_id: ${env.INFERENCE_MODEL}
125-
provider_id: vllm-inference
140+
model_id: ${env.LLAMA3B_MODEL}
141+
provider_id: llama-3b
142+
model_type: llm
143+
- metadata: {}
144+
model_id: ${env.LLAMA70B_MODEL}
145+
provider_id: llama-70b
146+
model_type: llm
147+
- metadata: {}
148+
model_id: ${env.GRANITE_MODEL}
149+
provider_id: granite
126150
model_type: llm
127151
- metadata: {}
128152
model_id: ${env.SAFETY_MODEL}
Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,57 @@
11
apiVersion: apps/v1
22
kind: Deployment
33
metadata:
4+
labels:
5+
app.kubernetes.io/instance: llama-stack
46
name: llamastack-deployment
57
spec:
8+
progressDeadlineSeconds: 600
69
replicas: 1
10+
revisionHistoryLimit: 10
711
selector:
812
matchLabels:
913
app: llamastack
14+
strategy:
15+
rollingUpdate:
16+
maxSurge: 25%
17+
maxUnavailable: 25%
18+
type: RollingUpdate
1019
template:
1120
metadata:
1221
labels:
1322
app: llamastack
1423
spec:
1524
containers:
1625
- args:
17-
- --yaml-config
26+
- --config
1827
- /app-config/config.yaml
1928
env:
29+
- name: MAX_TOKENS
30+
value: "128000"
2031
- name: VLLM_MAX_TOKENS
2132
value: "128000"
22-
- name: INFERENCE_MODEL
33+
- name: LLAMA3B_MODEL
2334
value: meta-llama/Llama-3.2-3B-Instruct
24-
- name: VLLM_URL
25-
value: http://vllm:8000/v1
35+
- name: LLAMA70B_MODEL
36+
value: meta-llama/Llama-3.3-70B-Instruct
37+
- name: GRANITE_URL
38+
value: https://granite-8b-llama-serve.apps.ocp-beta-test.nerc.mghpcc.org/v1
39+
- name: GRANITE_MODEL
40+
value: ibm-granite/granite-3.2-8b-instruct
41+
- name: LLAMA3B_URL
42+
value: https://llama32-3b-llama-serve.apps.ocp-beta-test.nerc.mghpcc.org/v1
43+
- name: LLAMA70B_URL
44+
value: https://llama33-70b-llama-serve.apps.ocp-beta-test.nerc.mghpcc.org/v1
2645
- name: VLLM_API_TOKEN
2746
value: fake
28-
- name: SAFETY_MODEL
29-
value: meta-llama/Llama-Guard-3-8B
30-
- name: SAFETY_VLLM_URL
31-
value: http://safety.llama-serve.svc.cluster.local:8000/v1
3247
- name: OTEL_TRACE_ENDPOINT
3348
value: http://otel-collector-collector.observability-hub.svc.cluster.local:4318/v1/traces
34-
- name: OTEL_METRIC_ENDPOINT
35-
value: http://otel-collector-collector.observability-hub.svc.cluster.local:4318/v1/metrics
49+
- name: SAFETY_MODEL
50+
value: meta-llama/Llama-Guard-3-8B
51+
- name: SAFETY_VLLM_URL
52+
value: http://safety.llama-serve.svc.cluster.local:8000/v1
3653
- name: MILVUS_DB_PATH
37-
value: 'milvus.db'
54+
value: milvus.db
3855
image: quay.io/redhat-et/llama:vllm-0.1.9
3956
imagePullPolicy: Always
4057
name: llamastack
@@ -45,6 +62,8 @@ spec:
4562
terminationMessagePath: /dev/termination-log
4663
terminationMessagePolicy: File
4764
volumeMounts:
65+
- mountPath: /pythainlp-data
66+
name: pythain
4867
- mountPath: /app-config
4968
name: run-config-volume
5069
- mountPath: /.llama
@@ -61,8 +80,10 @@ spec:
6180
defaultMode: 420
6281
name: run-config
6382
name: run-config-volume
64-
- persistentVolumeClaim:
83+
- name: llama-persist
84+
persistentVolumeClaim:
6585
claimName: llama-persist
66-
name: llama-persist
6786
- emptyDir: {}
6887
name: cache
88+
- emptyDir: {}
89+
name: pythain

0 commit comments

Comments
 (0)