fix(BA-5983): accept partial ModelDefinition input in deployment API (#11531)

jopemachine · claude · lablup-octodog · web-flow · commit bbb8e0203232 · 2026-05-11T13:41:15.000+09:00
Co-authored-by: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
Co-authored-by: octodog &lt;mu001@lablup.com&gt;
diff --git a/changes/11531.fix.md b/changes/11531.fix.md
@@ -0,0 +1 @@
+Make ModelConfig / ModelDefinition / ModelServiceConfig / ModelHealthCheck GraphQL input fields optional so addModelRevision can inherit values from the runtime variant, model-definition.yaml, or revision preset.
diff --git a/docs/manager/graphql-reference/supergraph.graphql b/docs/manager/graphql-reference/supergraph.graphql
@@ -9235,10 +9235,10 @@ input ModelConfigInput
   @join__type(graph: STRAWBERRY)
 {
   """Name of the model."""
-  name: String!
+  name: String = null
 
   """Path to the model file."""
-  modelPath: String!
+  modelPath: String = null
 
   """Configuration for the model service."""
   service: ModelServiceConfigInput = null
@@ -9264,7 +9264,7 @@ input ModelDefinitionInput
   @join__type(graph: STRAWBERRY)
 {
   """List of models in the model definition."""
-  models: [ModelConfigInput!]!
+  models: [ModelConfigInput!] = null
 }
 
 """
@@ -9431,22 +9431,22 @@ input ModelHealthCheckInput
   @join__type(graph: STRAWBERRY)
 {
   """Interval in seconds between health checks."""
-  interval: Float! = 10
+  interval: Float = null
 
   """Path to check for health status."""
-  path: String!
+  path: String = null
 
   """Maximum number of retries for health check."""
-  maxRetries: Int! = 10
+  maxRetries: Int = null
 
   """Maximum time in seconds to wait for a health check response."""
-  maxWaitTime: Float! = 15
+  maxWaitTime: Float = null
 
   """Expected HTTP status code for a healthy response."""
-  expectedStatusCode: Int! = 200
+  expectedStatusCode: Int = null
 
   """Initial delay in seconds before the first health check."""
-  initialDelay: Float! = 60
+  initialDelay: Float = null
 }
 
 """Added in 26.4.2. Metadata describing a model entry."""
@@ -9808,16 +9808,16 @@ input ModelServiceConfigInput
   """
   List of pre-start actions to execute before starting the model service.
   """
-  preStartActions: [PreStartActionInput!]!
+  preStartActions: [PreStartActionInput!] = null
 
   """Command to start the model service."""
   startCommand: [String!] = null
 
   """Shell configured for the model service."""
-  shell: String! = "/bin/bash"
+  shell: String = null
 
-  """Port number for the model service. Must be greater than 1."""
-  port: Int!
+  """Port number for the model service."""
+  port: Int = null
 
   """Health check configuration for the model service."""
   healthCheck: ModelHealthCheckInput = null
diff --git a/docs/manager/graphql-reference/v2-schema.graphql b/docs/manager/graphql-reference/v2-schema.graphql
@@ -6045,10 +6045,10 @@ Added in 26.4.0. Configuration for a single model within a model definition.
 """
 input ModelConfigInput {
   """Name of the model."""
-  name: String!
+  name: String = null
 
   """Path to the model file."""
-  modelPath: String!
+  modelPath: String = null
 
   """Configuration for the model service."""
   service: ModelServiceConfigInput = null
@@ -6070,7 +6070,7 @@ Added in 26.4.0. Model definition containing a list of model configurations.
 """
 input ModelDefinitionInput {
   """List of models in the model definition."""
-  models: [ModelConfigInput!]!
+  models: [ModelConfigInput!] = null
 }
 
 """
@@ -6218,22 +6218,22 @@ type ModelHealthCheck {
 """Added in 26.4.0. Health check configuration for a model service."""
 input ModelHealthCheckInput {
   """Interval in seconds between health checks."""
-  interval: Float! = 10
+  interval: Float = null
 
   """Path to check for health status."""
-  path: String!
+  path: String = null
 
   """Maximum number of retries for health check."""
-  maxRetries: Int! = 10
+  maxRetries: Int = null
 
   """Maximum time in seconds to wait for a health check response."""
-  maxWaitTime: Float! = 15
+  maxWaitTime: Float = null
 
   """Expected HTTP status code for a healthy response."""
-  expectedStatusCode: Int! = 200
+  expectedStatusCode: Int = null
 
   """Initial delay in seconds before the first health check."""
-  initialDelay: Float! = 60
+  initialDelay: Float = null
 }
 
 """Added in 26.4.2. Metadata describing a model entry."""
@@ -6559,16 +6559,16 @@ input ModelServiceConfigInput {
   """
   List of pre-start actions to execute before starting the model service.
   """
-  preStartActions: [PreStartActionInput!]!
+  preStartActions: [PreStartActionInput!] = null
 
   """Command to start the model service."""
   startCommand: [String!] = null
 
   """Shell configured for the model service."""
-  shell: String! = "/bin/bash"
+  shell: String = null
 
-  """Port number for the model service. Must be greater than 1."""
-  port: Int!
+  """Port number for the model service."""
+  port: Int = null
 
   """Health check configuration for the model service."""
   healthCheck: ModelHealthCheckInput = null
diff --git a/src/ai/backend/common/config.py b/src/ai/backend/common/config.py
@@ -534,16 +534,9 @@ class ModelHealthCheckDraft(BaseConfigModel):
     def to_resolved(self) -> ModelHealthCheck:
         if self.path is None:
             raise ValueError("ModelHealthCheck.path is required")
-        return ModelHealthCheck(
-            interval=self.interval if self.interval is not None else 10.0,
-            path=self.path,
-            max_retries=self.max_retries if self.max_retries is not None else 10,
-            max_wait_time=self.max_wait_time if self.max_wait_time is not None else 15.0,
-            expected_status_code=(
-                self.expected_status_code if self.expected_status_code is not None else 200
-            ),
-            initial_delay=self.initial_delay if self.initial_delay is not None else 60.0,
-        )
+        # Drop unset (None) fields so the strict type's ``Field(default=...)``
+        # declarations remain the single source of truth for default values.
+        return ModelHealthCheck.model_validate(self.model_dump(exclude_none=True))
 
 
 class ModelServiceConfigDraft(BaseConfigModel):
@@ -561,12 +554,13 @@ def _coerce_start_command(cls, value: Any) -> Any:
     def to_resolved(self) -> ModelServiceConfig:
         if self.port is None:
             raise ValueError("ModelServiceConfig.port is required")
+        # Drop unset (None) scalars so the strict type's ``Field(default=...)``
+        # declarations remain the single source of truth for default values;
+        # resolve the nested ``health_check`` draft explicitly so its own
+        # required-field check (``path``) fires with a clear error message.
         return ModelServiceConfig(
-            pre_start_actions=self.pre_start_actions or [],
-            start_command=self.start_command,
-            shell=self.shell if self.shell is not None else "/bin/bash",
-            port=self.port,
-            health_check=(self.health_check.to_resolved() if self.health_check else None),
+            **self.model_dump(exclude_none=True, exclude={"health_check"}),
+            health_check=self.health_check.to_resolved() if self.health_check else None,
         )
 
 
diff --git a/src/ai/backend/common/dto/manager/v2/deployment/request.py b/src/ai/backend/common/dto/manager/v2/deployment/request.py
@@ -13,7 +13,10 @@
 from pydantic import Field, field_validator
 
 from ai.backend.common.api_handlers import SENTINEL, BaseRequestModel, Sentinel
-from ai.backend.common.config import ModelDefinitionDraft
+from ai.backend.common.config import (
+    ModelDefinitionDraft,
+    PreStartAction,
+)
 from ai.backend.common.data.model_deployment.types import (
     DeploymentStrategy,
     RouteHealthStatus,
@@ -80,10 +83,15 @@
     "EnvironmentVariablesInput",
     "ExtraVFolderMountInput",
     "ImageInput",
+    "ModelConfigInput",
+    "ModelDefinitionInput",
     "ModelDeploymentMetadataInput",
     "ModelDeploymentNetworkAccessInput",
+    "ModelHealthCheckInput",
+    "ModelMetadataInput",
     "ModelMountConfigInput",
     "ModelRuntimeConfigInput",
+    "ModelServiceConfigInput",
     "ReplicaFilter",
     "ReplicaOrder",
     "ReplicaStatusFilter",
@@ -116,6 +124,67 @@
 )
 
 
+class ModelHealthCheckInput(BaseRequestModel):
+    interval: float | None = None
+    path: str | None = None
+    max_retries: int | None = None
+    max_wait_time: float | None = None
+    expected_status_code: int | None = None
+    initial_delay: float | None = None
+
+
+class ModelMetadataInput(BaseRequestModel):
+    author: str | None = None
+    title: str | None = None
+    version: str | None = None
+    created: str | None = None
+    last_modified: str | None = None
+    description: str | None = None
+    task: str | None = None
+    category: str | None = None
+    architecture: str | None = None
+    framework: list[str] | None = None
+    label: list[str] | None = None
+    license: str | None = None
+    min_resource: dict[str, Any] | None = None
+
+
+class ModelServiceConfigInput(BaseRequestModel):
+    pre_start_actions: list[PreStartAction] | None = None
+    start_command: list[str] | None = None
+    shell: str | None = None
+    port: int | None = None
+    health_check: ModelHealthCheckInput | None = None
+
+
+class ModelConfigInput(BaseRequestModel):
+    name: str | None = None
+    model_path: str | None = None
+    service: ModelServiceConfigInput | None = None
+    metadata: ModelMetadataInput | None = None
+
+
+class ModelDefinitionInput(BaseRequestModel):
+    """All-optional v2 input mirror of :class:`ModelDefinitionDraft`.
+
+    Fields a request omits are filled by lower-priority sources in the
+    revision merge chain (runtime variant baseline, revision preset,
+    vfolder ``model-definition.yaml``, ``model_mount_destination``
+    default). Required-field enforcement happens later in
+    ``ModelDefinitionDraft.to_resolved`` after the merge.
+    """
+
+    models: list[ModelConfigInput] | None = None
+
+    def to_draft(self) -> ModelDefinitionDraft:
+        # ``exclude_unset=True`` keeps the resulting draft's
+        # ``model_fields_set`` aligned with what the caller actually
+        # provided. Without it, every field would appear "explicitly
+        # set" (to ``None``) and clobber lower-priority sources during
+        # the revision merge.
+        return ModelDefinitionDraft.model_validate(self.model_dump(exclude_unset=True))
+
+
 class ClusterConfigInput(BaseRequestModel):
     """Cluster configuration input for a revision."""
 
@@ -240,7 +309,7 @@ class CreateRevisionInputDTO(BaseRequestModel):
     image: ImageInput = Field(description="Container image")
     model_runtime_config: ModelRuntimeConfigInput = Field(description="Runtime configuration")
     model_mount_config: ModelMountConfigInput = Field(description="Model mount configuration")
-    model_definition: ModelDefinitionDraft | None = Field(
+    model_definition: ModelDefinitionInput | None = Field(
         default=None,
         description="Model definition to override the default values generated by the server",
     )
@@ -276,7 +345,7 @@ class AddRevisionGQLInputDTO(BaseRequestModel):
     image: ImageInput = Field(description="Container image")
     model_runtime_config: ModelRuntimeConfigInput = Field(description="Runtime configuration")
     model_mount_config: ModelMountConfigInput = Field(description="Model mount configuration")
-    model_definition: ModelDefinitionDraft | None = Field(
+    model_definition: ModelDefinitionInput | None = Field(
         default=None,
         description="Model definition to override the default values generated by the server",
     )
@@ -403,7 +472,7 @@ class RevisionInput(BaseRequestModel):
         default="/models", description="Mount destination for model vfolder"
     )
     model_definition_path: str = Field(description="Path to model definition file")
-    model_definition: ModelDefinitionDraft | None = Field(
+    model_definition: ModelDefinitionInput | None = Field(
         default=None,
         description="Model definition to override the default values generated by the server",
     )
diff --git a/src/ai/backend/manager/api/adapters/deployment/adapter.py b/src/ai/backend/manager/api/adapters/deployment/adapter.py
@@ -506,7 +506,9 @@ async def create(
                     else None,
                 ),
                 mounts=mounts_creator,
-                model_definition=initial_revision.model_definition,
+                model_definition=initial_revision.model_definition.to_draft()
+                if initial_revision.model_definition is not None
+                else None,
                 revision_preset_id=initial_revision.revision_preset_id,
                 execution=ExecutionSpec(
                     runtime_variant_id=initial_revision.model_runtime_config.runtime_variant_id,
@@ -1111,7 +1113,9 @@ async def add_revision(
                 else None,
                 inference_runtime_config=input.model_runtime_config.inference_runtime_config,
             ),
-            model_definition=input.model_definition,
+            model_definition=input.model_definition.to_draft()
+            if input.model_definition is not None
+            else None,
             revision_preset_id=input.revision_preset_id,
         )
         action_result = await self._processors.deployment.add_model_revision.wait_for_complete(
diff --git a/src/ai/backend/manager/api/gql/deployment/types/revision.py b/src/ai/backend/manager/api/gql/deployment/types/revision.py
diff --git a/src/ai/backend/manager/sokovan/deployment/revision_draft/reader.py b/src/ai/backend/manager/sokovan/deployment/revision_draft/reader.py
diff --git a/tests/unit/common/dto/manager/v2/deployment/test_request.py b/tests/unit/common/dto/manager/v2/deployment/test_request.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Make ModelConfig / ModelDefinition / ModelServiceConfig / ModelHealthCheck GraphQL input fields optional so addModelRevision can inherit values from the runtime variant, model-definition.yaml, or revision preset.`