@@ -53,6 +53,7 @@ func CreateLlmdSimDeployment(namespace, deployName, modelName, appLabel, port st
5353 "--enable-kvcache" ,
5454 "--kv-cache-size=1024" ,
5555 "--block-size=16" ,
56+ "--tokenizers-cache-dir=/tmp" ,
5657 },
5758 Env : []corev1.EnvVar {
5859 {Name : "POD_NAME" , ValueFrom : & corev1.EnvVarSource {
@@ -67,6 +68,12 @@ func CreateLlmdSimDeployment(namespace, deployName, modelName, appLabel, port st
6768 FieldPath : "metadata.namespace" ,
6869 },
6970 }},
71+ {Name : "POD_IP" , ValueFrom : & corev1.EnvVarSource {
72+ FieldRef : & corev1.ObjectFieldSelector {
73+ APIVersion : "v1" ,
74+ FieldPath : "status.podIP" ,
75+ },
76+ }},
7077 },
7178 Ports : []corev1.ContainerPort {
7279 {ContainerPort : 8000 , Name : "http" , Protocol : corev1 .ProtocolTCP },
@@ -105,6 +112,7 @@ func CreateLlmdSimDeploymentWithGPU(namespace, deployName, modelName, appLabel,
105112 "--enable-kvcache" ,
106113 "--kv-cache-size=1024" ,
107114 "--block-size=16" ,
115+ "--tokenizers-cache-dir=/tmp" ,
108116 },
109117 Env : []corev1.EnvVar {
110118 {Name : "POD_NAME" , ValueFrom : & corev1.EnvVarSource {
@@ -119,6 +127,12 @@ func CreateLlmdSimDeploymentWithGPU(namespace, deployName, modelName, appLabel,
119127 FieldPath : "metadata.namespace" ,
120128 },
121129 }},
130+ {Name : "POD_IP" , ValueFrom : & corev1.EnvVarSource {
131+ FieldRef : & corev1.ObjectFieldSelector {
132+ APIVersion : "v1" ,
133+ FieldPath : "status.podIP" ,
134+ },
135+ }},
122136 },
123137 Ports : []corev1.ContainerPort {
124138 {ContainerPort : 8000 , Name : "http" , Protocol : corev1 .ProtocolTCP },
@@ -185,6 +199,19 @@ func CreateLlmdSimDeploymentWithGPUAndNodeSelector(
185199
186200 if len (nodeSelector ) > 0 {
187201 deployment .Spec .Template .Spec .NodeSelector = nodeSelector
202+ // Add tolerations for control-plane nodes as H100s might be on control-plane in kind-emulator
203+ deployment .Spec .Template .Spec .Tolerations = []corev1.Toleration {
204+ {
205+ Key : "node-role.kubernetes.io/control-plane" ,
206+ Operator : corev1 .TolerationOpExists ,
207+ Effect : corev1 .TaintEffectNoSchedule ,
208+ },
209+ {
210+ Key : "node-role.kubernetes.io/master" ,
211+ Operator : corev1 .TolerationOpExists ,
212+ Effect : corev1 .TaintEffectNoSchedule ,
213+ },
214+ }
188215 }
189216
190217 return deployment
0 commit comments