Skip to content

Commit 1552b73

Browse files
Update max_completion_tokens and add security context to VLM inferenc… (#370)
1 parent 02f1d94 commit 1552b73

5 files changed

Lines changed: 32 additions & 5 deletions

File tree

microservices/vlm-openvino-serving/compose.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ services:
2121
VLM_DEVICE: ${VLM_DEVICE}
2222
VLM_SEED: ${VLM_SEED}
2323
WORKERS: ${WORKERS:-1}
24+
VLM_MAX_COMPLETION_TOKENS: ${VLM_MAX_COMPLETION_TOKENS}
2425
restart: unless-stopped
2526
devices:
2627
- /dev/dri:/dev/dri

microservices/vlm-openvino-serving/setup.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ export RENDER_GROUP_ID=$(getent group render | awk -F: '{printf "%s\n", $3}')
4040
export VLM_SERVICE_PORT=9764
4141
export VLM_SEED=42
4242

43+
# By default, VLM_MAX_COMPLETION_TOKENS is unset (which results in None in Python)
44+
# To set a specific value, uncomment and modify the following line:
45+
# export VLM_MAX_COMPLETION_TOKENS=1000
46+
unset VLM_MAX_COMPLETION_TOKENS
47+
4348
# Check if VLM_MODEL_NAME is not defined or empty
4449
if [ -z "$VLM_MODEL_NAME" ]; then
4550
echo -e "ERROR: VLM_MODEL_NAME is not set in your shell environment."

microservices/vlm-openvino-serving/src/utils/common.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
import os
66

77
from dotenv import load_dotenv
8-
from pydantic import Field
8+
from pydantic import Field, field_validator
99
from pydantic_settings import BaseSettings
10+
from typing import Optional, Any
1011

1112
# Configure logger
1213
logging.basicConfig(
@@ -48,6 +49,20 @@ class Settings(BaseSettings):
4849
)
4950
VLM_DEVICE: str = Field(default="CPU", json_schema_extra={"env": "VLM_DEVICE"})
5051
SEED: int = Field(default=42, json_schema_extra={"env": "SEED"})
52+
VLM_MAX_COMPLETION_TOKENS: Optional[int] = Field(
53+
default=None,
54+
json_schema_extra={"env": "VLM_MAX_COMPLETION_TOKENS"},
55+
)
56+
57+
@field_validator("VLM_MAX_COMPLETION_TOKENS", mode="before")
58+
@classmethod
59+
def validate_max_completion_tokens(cls, v: Any) -> Optional[int]:
60+
if v is None or v == "":
61+
return None
62+
try:
63+
return int(v)
64+
except (ValueError, TypeError):
65+
return None
5166

5267

5368
class ErrorMessages:

microservices/vlm-openvino-serving/src/utils/data_models.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
from pydantic import BaseModel, Field
77

8+
from .common import settings
9+
810

911
class MessageContentText(BaseModel):
1012
"""
@@ -116,7 +118,7 @@ class ChatRequest(BaseModel):
116118
None, json_schema_extra={"example": 1.15}
117119
)
118120
max_completion_tokens: Optional[int] = Field(
119-
None, json_schema_extra={"example": 1000}
121+
settings.VLM_MAX_COMPLETION_TOKENS, json_schema_extra={"example": 1000}
120122
)
121123
temperature: Optional[float] = Field(None, json_schema_extra={"example": 0.3})
122124
top_p: Optional[float] = Field(None, json_schema_extra={"example": 0.5})

sample-applications/video-search-and-summarization/chart/subchart/vlminference/templates/vlm-inference-deployment.yaml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ spec:
1414
labels:
1515
app: {{ .Values.vlminference.name }}
1616
spec:
17+
securityContext:
18+
runAsUser: 1000
19+
runAsGroup: 1000
20+
fsGroup: 1000
1721
containers:
1822
- name: {{ .Values.vlminference.name }}
1923
image: "{{ .Values.vlminference.image.repository }}:{{ .Values.vlminference.image.tag }}"
@@ -48,9 +52,9 @@ spec:
4852
volumeMounts:
4953
- name: workspace
5054
mountPath: /app/ov-model
55+
securityContext:
56+
readOnlyRootFilesystem: false
5157
volumes:
5258
- name: workspace
5359
persistentVolumeClaim:
54-
claimName: {{ .Values.vlminference.volumeMounts.pvcName }}
55-
56-
60+
claimName: {{ .Values.vlminference.volumeMounts.pvcName }}

0 commit comments

Comments
 (0)