Skip to content

Commit 3051935

Browse files
authored
Add modelArtifacts.readOnly option for PVC model mounts (#249)
The chart hardcodes readOnly: true on PVC model mounts, which breaks pvc+hf:// deployments where Hugging Face Hub needs to write cache metadata. Add a modelArtifacts.readOnly field (default true) so users can set it to false when write access is needed. Clarify OCI vs PVC read-only behavior in values, helpers, and PVC examples. Signed-off-by: Kay Yan <kay.yan@daocloud.io>
1 parent fa2972b commit 3051935

18 files changed

Lines changed: 74 additions & 43 deletions

charts/llm-d-modelservice/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ type: application
1313
# This is the chart version. This version number should be incremented each time you make changes
1414
# to the chart and its templates, including the app version.
1515
# Versions are expected to follow Semantic Versioning (https://semver.org/)
16-
version: "v0.4.11"
16+
version: "v0.4.12"
1717
# This is the version number of the application being deployed. This version number should be
1818
# incremented each time you make changes to the application. Versions are not expected to
1919
# follow Semantic Versioning. They should reflect the version the application is using.

charts/llm-d-modelservice/templates/_helpers.tpl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ Context is .Values.modelArtifacts
373373
- name: model-storage
374374
persistentVolumeClaim:
375375
claimName: {{ $claim }}
376-
readOnly: true
376+
readOnly: {{ .readOnly }}
377377
{{- else if eq $protocol "oci" }}
378378
- name: model-storage
379379
image:
@@ -398,12 +398,13 @@ volumeMounts:
398398
{{- if .container.mountModelVolume }}
399399
- name: model-storage
400400
mountPath: {{ .Values.modelArtifacts.mountPath }}
401-
{{- /* enforce readOnly volumeMounts for OCI and PVCs */}}
401+
{{- /* OCI always readOnly; PVC variants use modelArtifacts.readOnly */}}
402402
{{- $parsedArtifacts := regexSplit "://" .Values.modelArtifacts.uri -1 -}}
403403
{{- $protocol := first $parsedArtifacts -}}
404-
{{- $path := last $parsedArtifacts -}}
405-
{{- if or (eq $protocol "oci") (eq $protocol "pvc") }}
404+
{{- if eq $protocol "oci" }}
406405
readOnly: true
406+
{{- else if hasPrefix "pvc" $protocol }}
407+
readOnly: {{ .Values.modelArtifacts.readOnly }}
407408
{{- end -}}
408409
{{- end }}
409410
{{- end }}

charts/llm-d-modelservice/values.schema.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,6 +1966,12 @@
19661966
"required": [],
19671967
"title": "name"
19681968
},
1969+
"readOnly": {
1970+
"default": true,
1971+
"description": " type: boolean @schema Whether PVC-backed model mounts (pvc:// and pvc+hf://) should be read-only. Set to false for pvc+hf:// when Hugging Face cache writes are needed; other URI schemes ignore this setting.",
1972+
"title": "readOnly",
1973+
"type": "boolean"
1974+
},
19691975
"size": {
19701976
"default": "5Mi",
19711977
"description": "size of volume to create to hold the model",

charts/llm-d-modelservice/values.schema.tmpl.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,12 @@
652652
"required": [],
653653
"title": "name"
654654
},
655+
"readOnly": {
656+
"default": true,
657+
"description": " type: boolean @schema Whether PVC-backed model mounts (pvc:// and pvc+hf://) should be read-only. Set to false for pvc+hf:// when Hugging Face cache writes are needed; other URI schemes ignore this setting.",
658+
"title": "readOnly",
659+
"type": "boolean"
660+
},
655661
"size": {
656662
"default": "5Mi",
657663
"description": "size of volume to create to hold the model",

charts/llm-d-modelservice/values.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@ modelArtifacts:
7676
authSecretName: ""
7777
# location where model volume will be mounted (used when mountModelVolume: true)
7878
mountPath: /model-cache
79+
# @schema
80+
# type: boolean
81+
# @schema
82+
# Whether PVC-backed model mounts (pvc:// and pvc+hf://) should be read-only.
83+
# Set to false for pvc+hf:// when Hugging Face cache writes are needed; other URI schemes ignore this setting.
84+
readOnly: true
7985

8086
# When true, a LeaderWorkerSet is used instead of a Deployment
8187
multinode: false

examples/output-cpu.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ kind: ServiceAccount
66
metadata:
77
name: cpu-sim-llm-d-modelservice
88
labels:
9-
helm.sh/chart: llm-d-modelservice-v0.4.11
9+
helm.sh/chart: llm-d-modelservice-v0.4.12
1010
app.kubernetes.io/version: "v0.4.0"
1111
app.kubernetes.io/managed-by: Helm
1212
---
@@ -16,7 +16,7 @@ kind: Deployment
1616
metadata:
1717
name: cpu-sim-llm-d-modelservice-decode
1818
labels:
19-
helm.sh/chart: llm-d-modelservice-v0.4.11
19+
helm.sh/chart: llm-d-modelservice-v0.4.12
2020
app.kubernetes.io/version: "v0.4.0"
2121
app.kubernetes.io/managed-by: Helm
2222
spec:
@@ -106,7 +106,7 @@ kind: Deployment
106106
metadata:
107107
name: cpu-sim-llm-d-modelservice-prefill
108108
labels:
109-
helm.sh/chart: llm-d-modelservice-v0.4.11
109+
helm.sh/chart: llm-d-modelservice-v0.4.12
110110
app.kubernetes.io/version: "v0.4.0"
111111
app.kubernetes.io/managed-by: Helm
112112
spec:

examples/output-dra.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ kind: ServiceAccount
66
metadata:
77
name: dra-llm-d-modelservice
88
labels:
9-
helm.sh/chart: llm-d-modelservice-v0.4.11
9+
helm.sh/chart: llm-d-modelservice-v0.4.12
1010
app.kubernetes.io/version: "v0.4.0"
1111
app.kubernetes.io/managed-by: Helm
1212
---
@@ -16,7 +16,7 @@ kind: Deployment
1616
metadata:
1717
name: dra-llm-d-modelservice-decode
1818
labels:
19-
helm.sh/chart: llm-d-modelservice-v0.4.11
19+
helm.sh/chart: llm-d-modelservice-v0.4.12
2020
app.kubernetes.io/version: "v0.4.0"
2121
app.kubernetes.io/managed-by: Helm
2222
spec:
@@ -108,14 +108,15 @@ spec:
108108
volumeMounts:
109109
- name: model-storage
110110
mountPath: /model-cache
111+
readOnly: true
111112
---
112113
# Source: llm-d-modelservice/templates/resource-claim-template.yaml
113114
apiVersion: resource.k8s.io/v1
114115
kind: ResourceClaimTemplate
115116
metadata:
116117
name: intel-gaudi-claim-template-decode
117118
labels:
118-
helm.sh/chart: llm-d-modelservice-v0.4.11
119+
helm.sh/chart: llm-d-modelservice-v0.4.12
119120
app.kubernetes.io/version: "v0.4.0"
120121
app.kubernetes.io/managed-by: Helm
121122
llm-d.ai/role: decode

examples/output-gaudi.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ kind: ServiceAccount
66
metadata:
77
name: gaudi-llm-d-modelservice
88
labels:
9-
helm.sh/chart: llm-d-modelservice-v0.4.11
9+
helm.sh/chart: llm-d-modelservice-v0.4.12
1010
app.kubernetes.io/version: "v0.4.0"
1111
app.kubernetes.io/managed-by: Helm
1212
---
@@ -16,7 +16,7 @@ kind: Deployment
1616
metadata:
1717
name: gaudi-llm-d-modelservice-decode
1818
labels:
19-
helm.sh/chart: llm-d-modelservice-v0.4.11
19+
helm.sh/chart: llm-d-modelservice-v0.4.12
2020
app.kubernetes.io/version: "v0.4.0"
2121
app.kubernetes.io/managed-by: Helm
2222
spec:
@@ -101,3 +101,4 @@ spec:
101101
volumeMounts:
102102
- name: model-storage
103103
mountPath: /model-cache
104+
readOnly: true

examples/output-heterogeneous-pd.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ kind: ServiceAccount
66
metadata:
77
name: heterogeneous-pd-llm-d-modelservice
88
labels:
9-
helm.sh/chart: llm-d-modelservice-v0.4.11
9+
helm.sh/chart: llm-d-modelservice-v0.4.12
1010
app.kubernetes.io/version: "v0.4.0"
1111
app.kubernetes.io/managed-by: Helm
1212
---
@@ -16,7 +16,7 @@ kind: Deployment
1616
metadata:
1717
name: heterogeneous-pd-llm-d-modelservice-decode
1818
labels:
19-
helm.sh/chart: llm-d-modelservice-v0.4.11
19+
helm.sh/chart: llm-d-modelservice-v0.4.12
2020
app.kubernetes.io/version: "v0.4.0"
2121
app.kubernetes.io/managed-by: Helm
2222
spec:
@@ -132,7 +132,7 @@ kind: Deployment
132132
metadata:
133133
name: heterogeneous-pd-llm-d-modelservice-prefill
134134
labels:
135-
helm.sh/chart: llm-d-modelservice-v0.4.11
135+
helm.sh/chart: llm-d-modelservice-v0.4.12
136136
app.kubernetes.io/version: "v0.4.0"
137137
app.kubernetes.io/managed-by: Helm
138138
spec:
@@ -225,7 +225,7 @@ kind: ResourceClaimTemplate
225225
metadata:
226226
name: nvidia-claim-template-decode
227227
labels:
228-
helm.sh/chart: llm-d-modelservice-v0.4.11
228+
helm.sh/chart: llm-d-modelservice-v0.4.12
229229
app.kubernetes.io/version: "v0.4.0"
230230
app.kubernetes.io/managed-by: Helm
231231
llm-d.ai/role: decode

examples/output-pd-mnnvl.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ kind: ServiceAccount
66
metadata:
77
name: pd-mnnvl-llm-d-modelservice
88
labels:
9-
helm.sh/chart: llm-d-modelservice-v0.4.11
9+
helm.sh/chart: llm-d-modelservice-v0.4.12
1010
app.kubernetes.io/version: "v0.4.0"
1111
app.kubernetes.io/managed-by: Helm
1212
---
@@ -16,7 +16,7 @@ kind: Deployment
1616
metadata:
1717
name: pd-mnnvl-llm-d-modelservice-decode
1818
labels:
19-
helm.sh/chart: llm-d-modelservice-v0.4.11
19+
helm.sh/chart: llm-d-modelservice-v0.4.12
2020
app.kubernetes.io/version: "v0.4.0"
2121
app.kubernetes.io/managed-by: Helm
2222
spec:
@@ -132,7 +132,7 @@ kind: Deployment
132132
metadata:
133133
name: pd-mnnvl-llm-d-modelservice-prefill
134134
labels:
135-
helm.sh/chart: llm-d-modelservice-v0.4.11
135+
helm.sh/chart: llm-d-modelservice-v0.4.12
136136
app.kubernetes.io/version: "v0.4.0"
137137
app.kubernetes.io/managed-by: Helm
138138
spec:

0 commit comments

Comments
 (0)