-
Notifications
You must be signed in to change notification settings - Fork 55
Expand file tree
/
Copy pathmodel.yaml
More file actions
58 lines (58 loc) · 1.4 KB
/
model.yaml
File metadata and controls
58 lines (58 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
apiVersion: serving.kserve.io/v1alpha1
kind: LLMInferenceService
metadata:
name: simulated
spec:
model:
uri: hf://facebook/opt-125m
name: facebook/opt-125m
replicas: 1
router:
route: {}
# Connect to MaaS-enabled gateway
gateway:
refs:
- name: maas-default-gateway
namespace: openshift-ingress
template:
containers:
- name: main
image: "ghcr.io/llm-d/llm-d-inference-sim:v0.5.1"
imagePullPolicy: Always
command: ["/app/llm-d-inference-sim"]
args:
- --port
- "8000"
- --model
- facebook-opt-125m-simulated
- --mode
- random
- --ssl-certfile
- /etc/ssl/certs/tls.crt
- --ssl-keyfile
- /etc/ssl/certs/tls.key
env:
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
ports:
- name: https
containerPort: 8000
protocol: TCP
livenessProbe:
httpGet:
path: /health
port: https
scheme: HTTPS
readinessProbe:
httpGet:
path: /ready
port: https
scheme: HTTPS