Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/11648.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Wrap legacy string `start_command` in model definitions as `[shell, '-c', value]` to preserve shell semantics, with a data migration that repairs previously-broken single-token argv rows
89 changes: 47 additions & 42 deletions src/ai/backend/common/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import os
import shlex
import sys
from collections.abc import Mapping, MutableMapping
from pathlib import Path
Expand All @@ -14,7 +13,7 @@
AliasChoices,
ConfigDict,
Field,
field_validator,
model_validator,
)

from . import validators as tx
Expand Down Expand Up @@ -144,46 +143,52 @@ def snake_to_kebab_case(string: str) -> str:
agent_selector_config_iv = t.Dict({}) | agent_selector_globalconfig_iv


def _normalize_start_command(value: Any) -> Any:
"""Coerce legacy ``str`` ``start_command`` into argv list via
:func:`shlex.split`. Lists, ``None``, and other types pass through so
the schema's strict check rejects them.
"""
if isinstance(value, str):
return shlex.split(value)
return value
DEFAULT_SHELL = "/bin/bash"


def _wrap_str_start_command_into_argv(service: Any) -> Any:
if not isinstance(service, dict):
return service
sc = service.get("start_command")
if not isinstance(sc, str):
return service
shell = service.get("shell") or DEFAULT_SHELL
# key override with the wrapped value, preserving other keys if present.
return {**service, "start_command": [shell, "-c", sc]}
Comment thread
seedspirit marked this conversation as resolved.
Comment thread
seedspirit marked this conversation as resolved.
Comment thread
seedspirit marked this conversation as resolved.


model_definition_iv = t.Dict({
t.Key("models"): t.List(
t.Dict({
t.Key("name"): t.String,
t.Key("model_path"): t.String,
t.Key("service", default=None): t.Null
| t.Dict({
# ai.backend.kernel.service.ServiceParser.start_service()
# ai.backend.kernel.service_actions
t.Key("pre_start_actions", default=[]): t.Null
| t.List(
t.Dict({
t.Key("action"): t.String,
t.Key("args"): t.Dict().allow_extra("*"),
})
),
t.Key("start_command", default=None): (t.Null | t.String | t.List(t.String))
>> _normalize_start_command,
t.Key("shell", default="/bin/bash"): t.String,
t.Key("port"): t.ToInt[1:],
t.Key("health_check", default=None): t.Null
t.Key("service", default=None): (
t.Null
| t.Dict({
t.Key("interval", default=10): t.Null | t.ToFloat[0:],
t.Key("path"): t.String,
t.Key("max_retries", default=10): t.Null | t.ToInt[1:],
t.Key("max_wait_time", default=15): t.Null | t.ToFloat[0:],
t.Key("expected_status_code", default=200): t.Null | t.ToInt[100:],
t.Key("initial_delay", default=60): t.Null | t.ToFloat[0:],
}),
}),
# ai.backend.kernel.service.ServiceParser.start_service()
# ai.backend.kernel.service_actions
t.Key("pre_start_actions", default=[]): t.Null
| t.List(
t.Dict({
t.Key("action"): t.String,
t.Key("args"): t.Dict().allow_extra("*"),
})
),
t.Key("start_command", default=None): t.Null | t.String | t.List(t.String),
t.Key("shell", default=DEFAULT_SHELL): t.String,
t.Key("port"): t.ToInt[1:],
t.Key("health_check", default=None): t.Null
| t.Dict({
t.Key("interval", default=10): t.Null | t.ToFloat[0:],
t.Key("path"): t.String,
t.Key("max_retries", default=10): t.Null | t.ToInt[1:],
t.Key("max_wait_time", default=15): t.Null | t.ToFloat[0:],
t.Key("expected_status_code", default=200): t.Null | t.ToInt[100:],
t.Key("initial_delay", default=60): t.Null | t.ToFloat[0:],
}),
})
)
>> _wrap_str_start_command_into_argv,
t.Key("metadata", default=None): t.Null
| t.Dict({
t.Key("author", default=None): t.Null | t.String(allow_blank=True),
Expand Down Expand Up @@ -268,9 +273,9 @@ class ModelServiceConfig(BaseConfigModel):
examples=[["python", "service.py"], ["vllm", "serve", "{model_path}"]],
)
shell: str = Field(
default="/bin/bash",
default=DEFAULT_SHELL,
description="Shell configured for the model service.",
examples=["/bin/bash"],
examples=[DEFAULT_SHELL],
)
port: int = Field(
description="Port number for the model service. Must be greater than 1.",
Expand All @@ -282,10 +287,10 @@ class ModelServiceConfig(BaseConfigModel):
description="Health check configuration for the model service.",
)

@field_validator("start_command", mode="before")
@model_validator(mode="before")
@classmethod
def _coerce_start_command(cls, value: Any) -> Any:
return _normalize_start_command(value)
def _wrap_str_start_command(cls, data: Any) -> Any:
return _wrap_str_start_command_into_argv(data)


class ModelMetadata(BaseConfigModel):
Expand Down Expand Up @@ -553,10 +558,10 @@ class ModelServiceConfigDraft(BaseConfigModel):
port: int | None = None
health_check: ModelHealthCheckDraft | None = None

@field_validator("start_command", mode="before")
@model_validator(mode="before")
@classmethod
def _coerce_start_command(cls, value: Any) -> Any:
return _normalize_start_command(value)
def _wrap_str_start_command(cls, data: Any) -> Any:
return _wrap_str_start_command_into_argv(data)
Comment thread
seedspirit marked this conversation as resolved.

def to_resolved(self) -> ModelServiceConfig:
# Drop unset (None) scalars so the strict type's ``Field(default=...)``
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""rewrap legacy string start_command via shell -c

Migration ``8c1f7d3a9e2b`` (Part of: 26.5.0) wrapped any legacy ``str``
``start_command`` as a one-item argv list (e.g. ``"python service.py"``
became ``["python service.py"]``). That value is meaningless at exec
time -- ``execve`` would look for a binary literally named
``"python service.py"`` -- and it loses the shell semantics the user
originally intended.

This migration repairs those rows by replacing single-element argv
lists whose only token still looks like a shell command (contains
whitespace) with ``[shell, "-c", token]``, where ``shell`` comes from
the sibling ``service.shell`` field and falls back to ``/bin/bash`` to
match ``ai.backend.common.config.DEFAULT_SHELL``. Multi-token argv
lists and single-token lists without whitespace are left untouched,
so the migration is safe to re-apply.

Revision ID: 70ffcaaa5f5c
Revises: ba42cb865efe
Create Date: 2026-05-18

"""

import json

import sqlalchemy as sa
from alembic import op
from sqlalchemy.engine import Connection

# revision identifiers, used by Alembic.
revision = "70ffcaaa5f5c"
down_revision = "ba42cb865efe"
# Part of: 26.5.1
branch_labels = None
depends_on = None

DEFAULT_SHELL = "/bin/bash"


def _rewrap_model_definition(conn: Connection, table: str, column: str) -> None:
rows = conn.execute(
sa.text(f"SELECT id, {column} FROM {table} WHERE {column} IS NOT NULL")
).fetchall()

for row_id, model_definition in rows:
changed = False
for model in model_definition.get("models") or []:
service = model.get("service") or {}
start_command = service.get("start_command")
if (
isinstance(start_command, list)
and len(start_command) == 1
and " " in start_command[0]
Comment thread
seedspirit marked this conversation as resolved.
Comment thread
seedspirit marked this conversation as resolved.
):
# Case when the start_command is a single string that looks like a shell command.
# e.g. ["python service.py"] -> ["/bin/bash", "-c", "python service.py"]
shell = service.get("shell") or DEFAULT_SHELL
service["start_command"] = [shell, "-c", start_command[0]]
changed = True

if changed:
conn.execute(
sa.text(f"UPDATE {table} SET {column} = CAST(:definition AS JSONB) WHERE id = :id"),
{"definition": json.dumps(model_definition), "id": row_id},
)


def upgrade() -> None:
conn = op.get_bind()
_rewrap_model_definition(conn, "deployment_revisions", "model_definition")
_rewrap_model_definition(conn, "deployment_revision_presets", "model_definition")
_rewrap_model_definition(conn, "runtime_variants", "default_model_definition")


def downgrade() -> None:
# No-op: the ``[shell, "-c", token]`` form is also valid argv under
# any prior schema.
pass
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
``model_definition`` validators.

PR #11402 narrowed ``ModelServiceConfig.start_command`` from
``str | list[str]`` to ``list[str] | None``. To preserve backward
compatibility for users who still author the field as a shell string,
the validators normalize ``str`` input via :func:`shlex.split`.
``str | list[str]`` to ``list[str] | None``. Legacy ``str`` inputs are
preserved by wrapping them as ``[shell, "-c", start_command]`` so the
original shell semantics (pipes, redirections, env expansion,
multiline) survive. ``shell`` falls back to ``/bin/bash`` when the
input omits it.
"""

from __future__ import annotations
Expand All @@ -26,27 +28,38 @@
START_COMMAND_CASES = [
pytest.param(
"python service.py",
["python", "service.py"],
id="simple-split",
None,
["/bin/bash", "-c", "python service.py"],
id="simple-default-shell",
),
pytest.param(
'python -c "import x; x.run()"',
["python", "-c", "import x; x.run()"],
id="shlex-preserves-quoted",
None,
["/bin/bash", "-c", 'python -c "import x; x.run()"'],
id="quoted-default-shell",
),
pytest.param(
"echo $HOME && exec python serve.py",
"/bin/zsh",
["/bin/zsh", "-c", "echo $HOME && exec python serve.py"],
id="shell-semantics-custom-shell",
),
]


def _wrap_definition(start_command: str) -> dict[str, Any]:
def _wrap_definition(start_command: str, shell: str | None = None) -> dict[str, Any]:
service: dict[str, Any] = {
"start_command": start_command,
"port": 8080,
}
if shell is not None:
service["shell"] = shell
return {
"models": [
{
"name": "model",
"model_path": "/models/x",
"service": {
"start_command": start_command,
"port": 8080,
},
"service": service,
}
]
}
Expand All @@ -55,30 +68,31 @@ def _wrap_definition(start_command: str) -> dict[str, Any]:
class TestPydanticInputCompat:
"""REST/GQL input → ``ModelServiceConfigDraft`` Pydantic validator."""

@pytest.mark.parametrize(("raw", "expected"), START_COMMAND_CASES)
def test_str_input_is_normalized(self, raw: str, expected: list[str]) -> None:
draft = ModelServiceConfigDraft.model_validate({
"start_command": raw,
"port": 8080,
})
@pytest.mark.parametrize(("raw", "shell", "expected"), START_COMMAND_CASES)
def test_str_input_is_wrapped(self, raw: str, shell: str | None, expected: list[str]) -> None:
payload: dict[str, Any] = {"start_command": raw, "port": 8080}
if shell is not None:
payload["shell"] = shell
draft = ModelServiceConfigDraft.model_validate(payload)
assert draft.start_command == expected


class TestTrafaretInputCompat:
"""vfolder YAML scan → ``model_definition_iv`` trafaret validator."""

@pytest.mark.parametrize(("raw", "expected"), START_COMMAND_CASES)
def test_str_input_is_normalized(self, raw: str, expected: list[str]) -> None:
result = model_definition_iv.check(_wrap_definition(raw))
@pytest.mark.parametrize(("raw", "shell", "expected"), START_COMMAND_CASES)
def test_str_input_is_wrapped(self, raw: str, shell: str | None, expected: list[str]) -> None:
result = model_definition_iv.check(_wrap_definition(raw, shell))
assert result["models"][0]["service"]["start_command"] == expected


class TestYAMLInputCompat:
"""End-to-end vfolder scan path: a user-authored ``model-definition.yaml``
is parsed by ruamel.yaml and fed to ``model_definition_iv``. Confirms that
every YAML notation a user might write — legacy shell string, inline flow
sequence, hyphenated block sequence — resolves to the same canonical
``list[str]``.
sequence, hyphenated block sequence — resolves to the expected
``list[str]``. Legacy shell strings are wrapped via ``shell -c``; explicit
argv lists pass through unchanged.
"""

@pytest.mark.parametrize(
Expand All @@ -93,8 +107,21 @@ class TestYAMLInputCompat:
start_command: python service.py
port: 8080
"""),
["python", "service.py"],
id="legacy-shell-string",
["/bin/bash", "-c", "python service.py"],
id="legacy-shell-string-default-shell",
),
pytest.param(
textwrap.dedent("""\
models:
- name: model
model_path: /models/x
service:
start_command: echo $HOME | tee /tmp/out
shell: /bin/zsh
port: 8080
"""),
["/bin/zsh", "-c", "echo $HOME | tee /tmp/out"],
id="legacy-shell-string-custom-shell",
),
pytest.param(
textwrap.dedent("""\
Expand Down Expand Up @@ -138,10 +165,10 @@ class TestPydanticColumnReadCompat:
read → to_data flow.
"""

@pytest.mark.parametrize(("raw", "expected"), START_COMMAND_CASES)
def test_legacy_str_is_normalized(self, raw: str, expected: list[str]) -> None:
@pytest.mark.parametrize(("raw", "shell", "expected"), START_COMMAND_CASES)
def test_legacy_str_is_wrapped(self, raw: str, shell: str | None, expected: list[str]) -> None:
column = PydanticColumn(ModelDefinition)
resolved = column.process_result_value(_wrap_definition(raw), DefaultDialect())
resolved = column.process_result_value(_wrap_definition(raw, shell), DefaultDialect())
assert resolved is not None
assert resolved.models[0].service is not None
assert resolved.models[0].service.start_command == expected
Loading