-
Notifications
You must be signed in to change notification settings - Fork 175
Expand file tree
/
Copy pathrequest.py
More file actions
1016 lines (781 loc) · 39.7 KB
/
request.py
File metadata and controls
1016 lines (781 loc) · 39.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Request DTOs for Deployment DTO v2.
"""
from __future__ import annotations
from collections.abc import Mapping
from datetime import datetime
from decimal import Decimal
from typing import Any
from uuid import UUID
from pydantic import Field, field_validator
from ai.backend.common.api_handlers import SENTINEL, BaseRequestModel, Sentinel
from ai.backend.common.config import (
ModelDefinitionDraft,
PreStartAction,
)
from ai.backend.common.data.model_deployment.types import (
DeploymentStrategy,
RouteHealthStatus,
RouteStatus,
RouteTrafficStatus,
)
from ai.backend.common.dto.manager.query import (
DateTimeFilter,
IntFilter,
NullableDateTimeFilter,
StringFilter,
UUIDFilter,
)
from ai.backend.common.dto.manager.v2.common import ResourceSlotEntryInput
from ai.backend.common.dto.manager.v2.deployment.types import (
AccessTokenOrderField,
AutoScalingRuleOrderField,
DeploymentOrderField,
IntOrPercent,
OrderDirection,
ReplicaOrderField,
RevisionOrderField,
RouteOrderField,
)
from ai.backend.common.dto.manager.v2.deployment_options import DeploymentOptionsInput
from ai.backend.common.dto.manager.v2.resource_slot.types import ResourceOptsDTOInput
from ai.backend.common.identifier.deployment import DeploymentID
from ai.backend.common.identifier.deployment_preset import DeploymentPresetID
from ai.backend.common.identifier.image import ImageID
from ai.backend.common.identifier.runtime_variant import RuntimeVariantID
from ai.backend.common.identifier.vfolder import VFolderUUID
from ai.backend.common.types import (
AutoScalingMetricSource,
ClusterMode,
MountPermission,
)
from ai.backend.common.utils import dedent_strip
__all__ = (
"AccessTokenFilter",
"AccessTokenOrder",
"ActivateDeploymentInput",
"ActivateRevisionInput",
"AddRevisionGQLInputDTO",
"AddRevisionInput",
"AdminSearchDeploymentsInput",
"AdminSearchRevisionsInput",
"AutoScalingRuleFilter",
"AutoScalingRuleOrder",
"BlueGreenConfigInput",
"ClusterConfigInput",
"CreateRevisionInputDTO",
"CreateAccessTokenInput",
"CreateAutoScalingRuleInput",
"CreateDeploymentInput",
"DeleteAutoScalingRuleInput",
"DeleteDeploymentInput",
"DeploymentFilter",
"DeploymentOrder",
"DeploymentPolicyFilter",
"DeploymentStatusFilter",
"DeploymentStrategyInput",
"EnvironmentVariableEntryInput",
"EnvironmentVariablesInput",
"ExtraVFolderMountInput",
"ImageInput",
"ModelConfigInput",
"ModelDefinitionInput",
"ModelDeploymentMetadataInput",
"ModelDeploymentNetworkAccessInput",
"ModelHealthCheckInput",
"ModelMetadataInput",
"ModelMountConfigInput",
"ModelRuntimeConfigInput",
"ModelServiceConfigInput",
"ReplicaFilter",
"ReplicaOrder",
"ReplicaStatusFilter",
"ReplicaTrafficStatusFilter",
"ResourceConfigInput",
"ResourceGroupInput",
"ResourceSlotEntryInput",
"ResourceSlotInput",
"RevisionFilter",
"RevisionInput",
"RevisionOrder",
"ReplaceDeploymentOptionsGQLInput",
"ReplaceDeploymentOptionsInput",
"RollingUpdateConfigInput",
"RouteFilter",
"RouteOrder",
"RouteStatusFilter",
"RouteTrafficStatusFilter",
"ScaleDeploymentInput",
"SearchAccessTokensInput",
"SearchAutoScalingRulesInput",
"SearchDeploymentPoliciesInput",
"SearchReplicasInput",
"SearchRoutesInput",
"SyncReplicaInput",
"UpdateAutoScalingRuleInput",
"UpdateDeploymentInput",
"UpdateRouteTrafficStatusInput",
"UpsertDeploymentPolicyInput",
)
class ModelHealthCheckInput(BaseRequestModel):
interval: float | None = None
path: str | None = None
max_retries: int | None = None
max_wait_time: float | None = None
expected_status_code: int | None = None
initial_delay: float | None = None
class ModelMetadataInput(BaseRequestModel):
author: str | None = None
title: str | None = None
version: str | None = None
created: str | None = None
last_modified: str | None = None
description: str | None = None
task: str | None = None
category: str | None = None
architecture: str | None = None
framework: list[str] | None = None
label: list[str] | None = None
license: str | None = None
min_resource: dict[str, Any] | None = None
class ModelServiceConfigInput(BaseRequestModel):
pre_start_actions: list[PreStartAction] | None = None
start_command: list[str] | None = None
shell: str | None = None
port: int | None = None
health_check: ModelHealthCheckInput | None = None
class ModelConfigInput(BaseRequestModel):
name: str | None = None
model_path: str | None = None
service: ModelServiceConfigInput | None = None
metadata: ModelMetadataInput | None = None
class ModelDefinitionInput(BaseRequestModel):
"""All-optional v2 input mirror of :class:`ModelDefinitionDraft`.
Fields a request omits are filled by lower-priority sources in the
revision merge chain (runtime variant baseline, revision preset,
vfolder ``model-definition.yaml``, ``model_mount_destination``
default). Required-field enforcement happens later in
``ModelDefinitionDraft.to_resolved`` after the merge.
"""
models: list[ModelConfigInput] | None = None
def to_draft(self) -> ModelDefinitionDraft:
# ``exclude_unset=True`` keeps the resulting draft's
# ``model_fields_set`` aligned with what the caller actually
# provided. Without it, every field would appear "explicitly
# set" (to ``None``) and clobber lower-priority sources during
# the revision merge.
return ModelDefinitionDraft.model_validate(self.model_dump(exclude_unset=True))
class ClusterConfigInput(BaseRequestModel):
"""Cluster configuration input for a revision."""
mode: ClusterMode = Field(description="Cluster mode")
size: int = Field(description="Cluster size (number of nodes)")
class ResourceGroupInput(BaseRequestModel):
"""Resource group input for a revision."""
name: str = Field(description="Resource group name")
class ResourceSlotInput(BaseRequestModel):
"""Collection of compute resource allocations."""
entries: list[ResourceSlotEntryInput] = Field(description="List of resource allocations")
class ResourceConfigInput(BaseRequestModel):
"""Resource configuration input for a revision."""
resource_group: ResourceGroupInput = Field(description="Resource group")
resource_slots: ResourceSlotInput = Field(description="Resource slot allocations")
resource_opts: ResourceOptsDTOInput | None = Field(
default=None, description="Additional resource options"
)
class ImageInput(BaseRequestModel):
"""Container image input for a revision."""
id: ImageID = Field(description="Container image ID")
class EnvironmentVariableEntryInput(BaseRequestModel):
"""A single environment variable entry with name and value.
.. deprecated::
Retained only for legacy deployment/session DTOs that already expose ``name``.
New code should use
:class:`ai.backend.common.dto.manager.v2.common.EnvironmentVariableEntryInput`
(``key``/``value``) instead.
"""
name: str = Field(description="Environment variable name")
value: str = Field(description="Environment variable value")
class EnvironmentVariablesInput(BaseRequestModel):
"""A collection of environment variable entries.
.. deprecated::
Retained only for legacy deployment/session DTOs.
New code should use
:class:`ai.backend.common.dto.manager.v2.common.EnvironmentVariablesInput`.
"""
entries: list[EnvironmentVariableEntryInput] = Field(
description="List of environment variable entries"
)
class ModelRuntimeConfigInput(BaseRequestModel):
"""Runtime configuration input for a revision."""
runtime_variant_id: RuntimeVariantID = Field(
description=(
"Runtime variant ID (UUID). Internal v2 adapters consume the id"
" directly; legacy REST handlers resolve name→id via the"
" RuntimeVariant resolver service before invoking internal flows."
),
)
inference_runtime_config: dict[str, Any] | None = Field(
default=None, description="Framework-specific inference runtime configuration"
)
environ: EnvironmentVariablesInput | None = Field(
default=None, description="Environment variables for the service"
)
class ModelMountConfigInput(BaseRequestModel):
"""Model mount configuration input for a revision."""
vfolder_id: VFolderUUID = Field(description="VFolder ID for the model")
mount_destination: str = Field(description="Mount destination path inside container")
definition_path: str = Field(description="Path to model definition file")
class ExtraVFolderMountInput(BaseRequestModel):
"""Input for an extra vfolder mount."""
vfolder_id: VFolderUUID = Field(description="VFolder ID to mount")
mount_destination: str | None = Field(default=None, description="Mount destination path")
mount_perm: MountPermission | None = Field(
default=None,
description=(
"Optional permission override. ``null`` (default) uses the vfolder's own "
"stored permission; a concrete value (e.g. ``ro``) forces that permission "
"regardless of what the vfolder grants."
),
)
class CreateRevisionInputDTO(BaseRequestModel):
"""Input for a deployment revision (nested structure matching GQL CreateRevisionInput)."""
name: str | None = Field(default=None, description="Revision name")
revision_preset_id: DeploymentPresetID | None = Field(
default=None,
description="DeploymentRevisionPreset ID. When specified, preset values are used as defaults and can be overridden by explicitly provided fields.",
)
cluster_config: ClusterConfigInput = Field(description="Cluster configuration")
resource_config: ResourceConfigInput = Field(description="Resource configuration")
image: ImageInput = Field(description="Container image")
model_runtime_config: ModelRuntimeConfigInput = Field(description="Runtime configuration")
model_mount_config: ModelMountConfigInput = Field(description="Model mount configuration")
model_definition: ModelDefinitionInput | None = Field(
default=None,
description="Model definition to override the default values generated by the server",
)
extra_mounts: list[ExtraVFolderMountInput] | None = Field(
default=None, description="Additional vfolder mounts"
)
auto_activate: bool = Field(
default=False,
description="If true, automatically activate this revision after creation.",
)
class AddRevisionOptions(BaseRequestModel):
"""Options for the add revision operation."""
auto_activate: bool = Field(
default=False,
description="When true, automatically activate the newly added revision immediately after creation.",
)
class AddRevisionGQLInputDTO(BaseRequestModel):
"""Input for adding a revision. Used by both GQL and REST v2 APIs."""
name: str | None = Field(default=None, description="Revision name")
revision_preset_id: DeploymentPresetID | None = Field(
default=None,
description="DeploymentRevisionPreset ID. When specified, preset values are used as defaults and can be overridden by explicitly provided fields.",
)
deployment_id: UUID = Field(description="Deployment ID")
cluster_config: ClusterConfigInput = Field(description="Cluster configuration")
resource_config: ResourceConfigInput = Field(description="Resource configuration")
image: ImageInput = Field(description="Container image")
model_runtime_config: ModelRuntimeConfigInput = Field(description="Runtime configuration")
model_mount_config: ModelMountConfigInput = Field(description="Model mount configuration")
model_definition: ModelDefinitionInput | None = Field(
default=None,
description="Model definition to override the default values generated by the server",
)
extra_mounts: list[ExtraVFolderMountInput] | None = Field(
default=None, description="Additional vfolder mounts"
)
options: AddRevisionOptions | None = Field(
default=None,
description="Additional options for the add revision operation.",
)
class ModelDeploymentMetadataInput(BaseRequestModel):
"""Metadata input for creating a model deployment."""
project_id: UUID = Field(description="Project ID")
domain_name: str = Field(description="Domain name")
name: str | None = Field(
default=None,
min_length=1,
max_length=256,
description="Deployment name",
)
tags: list[str] | None = Field(default=None, description="Deployment tags")
@field_validator("name")
@classmethod
def name_must_not_be_blank(cls, v: str | None) -> str | None:
if v is None:
return v
stripped = v.strip()
if not stripped:
raise ValueError("name must not be blank or whitespace-only")
return stripped
class ModelDeploymentNetworkAccessInput(BaseRequestModel):
"""Network access configuration input for a deployment."""
preferred_domain_name: str | None = Field(
default=None, description="Preferred domain name for URL"
)
open_to_public: bool = Field(default=False, description="Whether the deployment is public")
class RollingUpdateConfigInput(BaseRequestModel):
"""Input for rolling update configuration.
``max_surge`` and ``max_unavailable`` are :class:`IntOrPercent` objects (oneOf):
- ``{"count": 2}`` — absolute replica count
- ``{"percent": 0.25}`` — fraction of desired replicas (0.0-1.0)
"""
max_surge: IntOrPercent = Field(
default_factory=lambda: IntOrPercent(percent=0.5),
description=dedent_strip("""
Maximum number of extra replicas that can be created
during a rolling update.
Defaults to 50% of desired replicas.
"""),
examples=[
{"count": 2},
{"percent": 0.25},
],
)
max_unavailable: IntOrPercent = Field(
default_factory=lambda: IntOrPercent(percent=0.0),
description=dedent_strip("""
Maximum number of replicas that can be unavailable
during a rolling update.
Defaults to 0%.
"""),
examples=[
{"count": 0},
{"percent": 0.0},
],
)
class BlueGreenConfigInput(BaseRequestModel):
"""Input for blue/green deployment configuration."""
auto_promote: bool = Field(default=False, description="Automatically promote new revision")
promote_delay_seconds: int = Field(
default=0, ge=0, description="Delay in seconds before promotion"
)
class DeploymentStrategyInput(BaseRequestModel):
"""Deployment strategy input with type discriminator."""
type: DeploymentStrategy = Field(description="Deployment strategy type")
rolling_update: RollingUpdateConfigInput | None = Field(
default=None, description="Rolling update config (required for ROLLING strategy)"
)
blue_green: BlueGreenConfigInput | None = Field(
default=None, description="Blue/green config (required for BLUE_GREEN strategy)"
)
class RevisionInput(BaseRequestModel):
"""Input for a deployment revision."""
name: str | None = Field(default=None, description="Revision name")
revision_preset_id: DeploymentPresetID | None = Field(
default=None,
description="DeploymentRevisionPreset ID. When specified, preset values are used as defaults and can be overridden by explicitly provided fields.",
)
image_id: UUID = Field(description="Container image ID")
cluster_mode: ClusterMode = Field(description="Cluster mode for the revision")
cluster_size: int = Field(default=1, ge=1, description="Number of nodes in the cluster")
resource_group: str = Field(description="Resource group for allocation")
resource_slots: Mapping[str, Any] = Field(description="Resource slot requirements")
resource_opts: Mapping[str, Any] | None = Field(
default=None, description="Optional resource options"
)
runtime_variant_id: RuntimeVariantID = Field(description="Runtime variant ID (UUID)")
inference_runtime_config: dict[str, Any] | None = Field(
default=None, description="Framework-specific inference runtime configuration"
)
model_vfolder_id: VFolderUUID = Field(description="Model VFolder ID")
model_mount_destination: str = Field(
default="/models", description="Mount destination for model vfolder"
)
model_definition_path: str = Field(description="Path to model definition file")
model_definition: ModelDefinitionInput | None = Field(
default=None,
description="Model definition to override the default values generated by the server",
)
extra_mounts: list[ExtraVFolderMountInput] | None = Field(
default=None, description="Additional vfolder mounts"
)
environ: Mapping[str, str] | None = Field(default=None, description="Environment variables")
class CreateDeploymentInput(BaseRequestModel):
"""Input for creating a deployment."""
metadata: ModelDeploymentMetadataInput = Field(description="Deployment metadata")
network_access: ModelDeploymentNetworkAccessInput = Field(
description="Network access configuration"
)
default_deployment_strategy: DeploymentStrategyInput = Field(
description="Deployment strategy configuration"
)
replica_count: int = Field(ge=0, description="Number of replicas")
initial_revision: CreateRevisionInputDTO | None = Field(
default=None,
description="Initial revision configuration. If omitted, deployment is created without a revision and must be added later via add_revision.",
)
class UpdateDeploymentInput(BaseRequestModel):
"""Input for updating a deployment."""
name: str | None = Field(default=None, description="Updated deployment name")
replica_count: int | None = Field(default=None, ge=0, description="Updated replica count")
tags: list[str] | Sentinel | None = Field(
default=SENTINEL, description="Updated tags. Use SENTINEL to clear."
)
open_to_public: bool | None = Field(
default=None, description="Updated network visibility. None means no change."
)
preferred_domain_name: str | None = Field(
default=None, description="Updated preferred domain name. None means no change."
)
default_deployment_strategy: DeploymentStrategyInput | None = Field(
default=None, description="Updated deployment strategy. None means no change."
)
@field_validator("name")
@classmethod
def name_must_not_be_blank(cls, v: str | None) -> str | None:
if v is None:
return v
stripped = v.strip()
if not stripped:
raise ValueError("name must not be blank or whitespace-only")
return stripped
class DeleteDeploymentInput(BaseRequestModel):
"""Input for deleting a deployment."""
id: UUID = Field(description="Deployment ID to delete")
class ActivateDeploymentInput(BaseRequestModel):
"""Input for activating a deployment."""
id: UUID = Field(description="Deployment ID to activate")
class ScaleDeploymentInput(BaseRequestModel):
"""Input for scaling a deployment."""
id: UUID = Field(description="Deployment ID to scale")
replicas: int = Field(ge=0, description="Target replica count")
class AddRevisionInput(BaseRequestModel):
"""Input for adding a revision to a deployment."""
deployment_id: UUID = Field(description="Deployment ID")
revision: RevisionInput = Field(description="Revision configuration")
# ---------------------------------------------------------------------------
# Filter types
# ---------------------------------------------------------------------------
class DeploymentStatusFilter(BaseRequestModel):
"""Filter for deployment status."""
equals: str | None = Field(default=None, description="Exact status match")
in_: list[str] | None = Field(default=None, alias="in", description="Status is in list")
not_equals: str | None = Field(default=None, description="Excludes exact status match")
not_in: list[str] | None = Field(default=None, description="Status is not in list")
class RouteStatusFilter(BaseRequestModel):
"""Filter for route status."""
equals: RouteStatus | None = Field(default=None, description="Exact status match")
in_: list[RouteStatus] | None = Field(default=None, alias="in", description="Status is in list")
not_equals: RouteStatus | None = Field(default=None, description="Excludes exact status match")
not_in: list[RouteStatus] | None = Field(default=None, description="Status is not in list")
class RouteTrafficStatusFilter(BaseRequestModel):
"""Filter for route traffic status."""
equals: RouteTrafficStatus | None = Field(default=None, description="Exact status match")
in_: list[RouteTrafficStatus] | None = Field(
default=None, alias="in", description="Status is in list"
)
not_equals: RouteTrafficStatus | None = Field(
default=None, description="Excludes exact status match"
)
not_in: list[RouteTrafficStatus] | None = Field(
default=None, description="Status is not in list"
)
class ReplicaStatusFilter(BaseRequestModel):
"""Filter for replica (route) status."""
equals: RouteStatus | None = Field(default=None, description="Exact status match")
in_: list[RouteStatus] | None = Field(default=None, alias="in", description="Status is in list")
not_equals: RouteStatus | None = Field(default=None, description="Excludes exact status match")
not_in: list[RouteStatus] | None = Field(default=None, description="Status is not in list")
class ReplicaTrafficStatusFilter(BaseRequestModel):
"""Filter for replica traffic status."""
equals: RouteTrafficStatus | None = Field(
default=None, description="Exact traffic status match"
)
in_: list[RouteTrafficStatus] | None = Field(
default=None, alias="in", description="Traffic status is in list"
)
not_equals: RouteTrafficStatus | None = Field(
default=None, description="Excludes exact traffic status match"
)
not_in: list[RouteTrafficStatus] | None = Field(
default=None, description="Traffic status is not in list"
)
class DeploymentFilter(BaseRequestModel):
"""Filter for deployments."""
name: StringFilter | None = Field(default=None, description="Name filter")
status: DeploymentStatusFilter | None = Field(default=None, description="Status filter")
open_to_public: bool | None = Field(default=None, description="Public access filter")
tags: StringFilter | None = Field(default=None, description="Tags filter")
endpoint_url: StringFilter | None = Field(default=None, description="Endpoint URL filter")
domain_name: StringFilter | None = Field(default=None, description="Domain name filter")
project_id: UUIDFilter | None = Field(default=None, description="Filter by project ID")
resource_group: StringFilter | None = Field(
default=None, description="Resource group name filter"
)
created_user_id: UUIDFilter | None = Field(
default=None, description="Filter by the user who created the deployment"
)
created_at: DateTimeFilter | None = Field(default=None, description="Creation datetime filter")
destroyed_at: NullableDateTimeFilter | None = Field(
default=None, description="Destruction datetime filter (supports is_null)"
)
AND: list[DeploymentFilter] | None = Field(default=None, description="AND conjunction")
OR: list[DeploymentFilter] | None = Field(default=None, description="OR conjunction")
NOT: list[DeploymentFilter] | None = Field(default=None, description="NOT negation")
DeploymentFilter.model_rebuild()
class RevisionFilter(BaseRequestModel):
"""Filter for deployment revisions."""
revision_number: IntFilter | None = Field(default=None, description="Filter by revision number")
deployment_id: UUID | None = Field(default=None, description="Filter by deployment ID")
image_id: UUIDFilter | None = Field(default=None, description="Filter by container image ID")
model_vfolder_id: UUIDFilter | None = Field(
default=None, description="Filter by model VFolder ID"
)
resource_group: StringFilter | None = Field(
default=None, description="Resource group name filter"
)
cluster_mode: StringFilter | None = Field(default=None, description="Cluster mode filter")
created_at: DateTimeFilter | None = Field(default=None, description="Creation datetime filter")
AND: list[RevisionFilter] | None = Field(default=None, description="AND conjunction")
OR: list[RevisionFilter] | None = Field(default=None, description="OR conjunction")
NOT: list[RevisionFilter] | None = Field(default=None, description="NOT negation")
RevisionFilter.model_rebuild()
class RouteFilter(BaseRequestModel):
"""Filter for deployment routes."""
deployment_id: UUID | None = Field(default=None, description="Filter by deployment ID")
status: list[RouteStatus] | None = Field(
default=None, description="Route lifecycle status filter"
)
health_status: list[RouteHealthStatus] | None = Field(
default=None, description="Route health status filter"
)
traffic_status: list[RouteTrafficStatus] | None = Field(
default=None, description="Traffic status filter"
)
AND: list[RouteFilter] | None = Field(default=None, description="AND conjunction")
OR: list[RouteFilter] | None = Field(default=None, description="OR conjunction")
NOT: list[RouteFilter] | None = Field(default=None, description="NOT negation")
RouteFilter.model_rebuild()
class AccessTokenFilter(BaseRequestModel):
"""Filter for access tokens."""
deployment_id: UUID | None = Field(default=None, description="Filter by deployment ID")
token: StringFilter | None = Field(default=None, description="Token value filter")
expires_at: DateTimeFilter | None = Field(
default=None, description="Expiration datetime filter"
)
created_at: DateTimeFilter | None = Field(default=None, description="Creation datetime filter")
AND: list[AccessTokenFilter] | None = Field(default=None, description="AND conjunction")
OR: list[AccessTokenFilter] | None = Field(default=None, description="OR conjunction")
NOT: list[AccessTokenFilter] | None = Field(default=None, description="NOT negation")
AccessTokenFilter.model_rebuild()
class AutoScalingRuleFilter(BaseRequestModel):
"""Filter for auto-scaling rules."""
deployment_id: UUID | None = Field(default=None, description="Filter by deployment ID")
created_at: DateTimeFilter | None = Field(default=None, description="Creation datetime filter")
last_triggered_at: NullableDateTimeFilter | None = Field(
default=None, description="Last triggered datetime filter"
)
AND: list[AutoScalingRuleFilter] | None = Field(default=None, description="AND conjunction")
OR: list[AutoScalingRuleFilter] | None = Field(default=None, description="OR conjunction")
NOT: list[AutoScalingRuleFilter] | None = Field(default=None, description="NOT negation")
AutoScalingRuleFilter.model_rebuild()
class ReplicaFilter(BaseRequestModel):
"""Filter for deployment replicas."""
deployment_id: UUID | None = Field(default=None, description="Filter by deployment ID")
status: ReplicaStatusFilter | None = Field(default=None, description="Replica status filter")
traffic_status: ReplicaTrafficStatusFilter | None = Field(
default=None, description="Replica traffic status filter"
)
AND: list[ReplicaFilter] | None = Field(default=None, description="AND conjunction")
OR: list[ReplicaFilter] | None = Field(default=None, description="OR conjunction")
NOT: list[ReplicaFilter] | None = Field(default=None, description="NOT negation")
ReplicaFilter.model_rebuild()
class DeploymentPolicyFilter(BaseRequestModel):
"""Filter for deployment policies."""
deployment_id: UUID | None = Field(default=None, description="Filter by deployment ID")
# ---------------------------------------------------------------------------
# Order types
# ---------------------------------------------------------------------------
class DeploymentOrder(BaseRequestModel):
"""Ordering specification for deployments."""
field: DeploymentOrderField
direction: OrderDirection = OrderDirection.DESC
class RevisionOrder(BaseRequestModel):
"""Ordering specification for revisions."""
field: RevisionOrderField
direction: OrderDirection = OrderDirection.DESC
class RouteOrder(BaseRequestModel):
"""Ordering specification for routes."""
field: RouteOrderField
direction: OrderDirection = OrderDirection.DESC
class AccessTokenOrder(BaseRequestModel):
"""Ordering specification for access tokens."""
field: AccessTokenOrderField
direction: OrderDirection = OrderDirection.DESC
class AutoScalingRuleOrder(BaseRequestModel):
"""Ordering specification for auto-scaling rules."""
field: AutoScalingRuleOrderField
direction: OrderDirection = OrderDirection.DESC
class ReplicaOrder(BaseRequestModel):
"""Ordering specification for deployment replicas."""
field: ReplicaOrderField
direction: OrderDirection = OrderDirection.DESC
# ---------------------------------------------------------------------------
# Search input types
# ---------------------------------------------------------------------------
class AdminSearchDeploymentsInput(BaseRequestModel):
"""Input for searching deployments (admin, no scope)."""
filter: DeploymentFilter | None = Field(default=None, description="Filter criteria")
order: list[DeploymentOrder] | None = Field(default=None, description="Sort order")
first: int | None = Field(default=None, ge=1, description="Cursor-forward page size")
after: str | None = Field(default=None, description="Cursor-forward start cursor")
last: int | None = Field(default=None, ge=1, description="Cursor-backward page size")
before: str | None = Field(default=None, description="Cursor-backward end cursor")
limit: int | None = Field(default=None, ge=1, description="Max results per page (offset)")
offset: int | None = Field(default=None, ge=0, description="Pagination offset")
class AdminSearchRevisionsInput(BaseRequestModel):
"""Input for searching deployment revisions (admin, no scope)."""
filter: RevisionFilter | None = Field(default=None, description="Filter criteria")
order: list[RevisionOrder] | None = Field(default=None, description="Sort order")
first: int | None = Field(default=None, ge=1, description="Cursor-forward page size")
after: str | None = Field(default=None, description="Cursor-forward start cursor")
last: int | None = Field(default=None, ge=1, description="Cursor-backward page size")
before: str | None = Field(default=None, description="Cursor-backward end cursor")
limit: int | None = Field(default=None, ge=1, description="Max results per page (offset)")
offset: int | None = Field(default=None, ge=0, description="Pagination offset")
class SearchRoutesInput(BaseRequestModel):
"""Input for searching deployment routes."""
filter: RouteFilter | None = Field(default=None, description="Filter criteria")
order: list[RouteOrder] | None = Field(default=None, description="Sort order")
first: int | None = Field(default=None, ge=1, description="Cursor-forward page size")
after: str | None = Field(default=None, description="Cursor-forward start cursor")
last: int | None = Field(default=None, ge=1, description="Cursor-backward page size")
before: str | None = Field(default=None, description="Cursor-backward end cursor")
limit: int | None = Field(default=None, ge=1, description="Max results per page (offset)")
offset: int | None = Field(default=None, ge=0, description="Pagination offset")
class SearchAccessTokensInput(BaseRequestModel):
"""Input for searching access tokens."""
filter: AccessTokenFilter | None = Field(default=None, description="Filter criteria")
order: list[AccessTokenOrder] | None = Field(default=None, description="Sort order")
first: int | None = Field(default=None, ge=1, description="Cursor-forward page size")
after: str | None = Field(default=None, description="Cursor-forward start cursor")
last: int | None = Field(default=None, ge=1, description="Cursor-backward page size")
before: str | None = Field(default=None, description="Cursor-backward end cursor")
limit: int | None = Field(default=None, ge=1, description="Max results per page (offset)")
offset: int | None = Field(default=None, ge=0, description="Pagination offset")
class SearchAutoScalingRulesInput(BaseRequestModel):
"""Input for searching auto-scaling rules."""
filter: AutoScalingRuleFilter | None = Field(default=None, description="Filter criteria")
order: list[AutoScalingRuleOrder] | None = Field(default=None, description="Sort order")
first: int | None = Field(default=None, ge=1, description="Cursor-forward page size")
after: str | None = Field(default=None, description="Cursor-forward start cursor")
last: int | None = Field(default=None, ge=1, description="Cursor-backward page size")
before: str | None = Field(default=None, description="Cursor-backward end cursor")
limit: int | None = Field(default=None, ge=1, description="Max results per page (offset)")
offset: int | None = Field(default=None, ge=0, description="Pagination offset")
class SearchReplicasInput(BaseRequestModel):
"""Input for searching deployment replicas."""
filter: ReplicaFilter | None = Field(default=None, description="Filter criteria")
order: list[ReplicaOrder] | None = Field(default=None, description="Sort order")
first: int | None = Field(default=None, ge=1, description="Cursor-forward page size")
after: str | None = Field(default=None, description="Cursor-forward start cursor")
last: int | None = Field(default=None, ge=1, description="Cursor-backward page size")
before: str | None = Field(default=None, description="Cursor-backward end cursor")
limit: int | None = Field(default=None, ge=1, description="Max results per page (offset)")
offset: int | None = Field(default=None, ge=0, description="Pagination offset")
class SearchDeploymentPoliciesInput(BaseRequestModel):
"""Input for searching deployment policies."""
filter: DeploymentPolicyFilter | None = Field(default=None, description="Filter criteria")
limit: int | None = Field(default=None, ge=1, description="Max results per page")
offset: int | None = Field(default=None, ge=0, description="Pagination offset")
# ---------------------------------------------------------------------------
# Sub-entity mutation inputs
# ---------------------------------------------------------------------------
class CreateAccessTokenInput(BaseRequestModel):
"""Input for creating an access token."""
model_deployment_id: UUID = Field(description="Model deployment ID")
expires_at: datetime = Field(
description=(
"Token expiration timestamp. Required: there is no safe default — "
"callers must decide the token lifetime themselves."
)
)
class DeleteAccessTokenInput(BaseRequestModel):
"""Input for deleting an access token."""
id: UUID = Field(description="Access token ID")
class BulkDeleteAccessTokensInput(BaseRequestModel):
"""Input for bulk deleting access tokens."""
ids: list[UUID] = Field(description="List of access token UUIDs to delete.")
class CreateAutoScalingRuleInput(BaseRequestModel):
"""Input for creating an auto-scaling rule."""
deployment_id: UUID = Field(description="Deployment ID")
metric_source: AutoScalingMetricSource = Field(description="Metric source")
metric_name: str = Field(description="Metric name")
min_threshold: Decimal | None = Field(default=None, description="Minimum threshold")
max_threshold: Decimal | None = Field(default=None, description="Maximum threshold")
step_size: int = Field(ge=1, description="Scale step size")
time_window: int = Field(ge=1, description="Time window in seconds")
min_replicas: int | None = Field(default=None, ge=0, description="Minimum replicas")
max_replicas: int | None = Field(default=None, ge=1, description="Maximum replicas")
class UpdateAutoScalingRuleInput(BaseRequestModel):
"""Input for updating an auto-scaling rule (all fields are optional)."""
metric_source: AutoScalingMetricSource | None = Field(
default=None, description="Metric source (None = no change)"
)
metric_name: str | None = Field(default=None, description="Metric name (None = no change)")
min_threshold: Decimal | None = Field(
default=None, description="Minimum threshold (None = no change)"
)
max_threshold: Decimal | None = Field(
default=None, description="Maximum threshold (None = no change)"
)
step_size: int | None = Field(
default=None, ge=1, description="Scale step size (None = no change)"
)
time_window: int | None = Field(
default=None, ge=1, description="Time window in seconds (None = no change)"
)
min_replicas: int | None = Field(
default=None, ge=0, description="Minimum replicas (None = no change)"
)
max_replicas: int | None = Field(
default=None, ge=1, description="Maximum replicas (None = no change)"
)
class DeleteAutoScalingRuleInput(BaseRequestModel):
"""Input for deleting an auto-scaling rule."""
id: UUID = Field(description="Auto-scaling rule ID")
class BulkDeleteAutoScalingRulesInput(BaseRequestModel):
"""Input for bulk deleting auto-scaling rules."""
ids: list[UUID] = Field(description="List of auto-scaling rule UUIDs to delete.")
class UpsertDeploymentPolicyInput(BaseRequestModel):
"""Input for creating or updating a deployment policy."""
deployment_id: UUID = Field(description="Deployment ID")
strategy: DeploymentStrategy = Field(description="Deployment strategy")
rolling_update: RollingUpdateConfigInput | None = Field(
default=None, description="Rolling update config (required for ROLLING strategy)"
)
blue_green: BlueGreenConfigInput | None = Field(
default=None, description="Blue/green config (required for BLUE_GREEN strategy)"
)
class SyncReplicaInput(BaseRequestModel):
"""Input for syncing replicas for a deployment."""
model_deployment_id: UUID = Field(description="Deployment ID to sync replicas for")
class ActivateRevisionInput(BaseRequestModel):
"""Input for activating a revision as the current revision."""
deployment_id: UUID = Field(description="Deployment ID")
revision_id: UUID = Field(description="Revision ID to activate")
class UpdateRouteTrafficStatusInput(BaseRequestModel):
"""Input for updating a route's traffic status."""
route_id: UUID = Field(description="Route ID to update")
traffic_status: RouteTrafficStatus = Field(description="New traffic status (ACTIVE/INACTIVE)")
class ReplaceDeploymentOptionsInput(BaseRequestModel):
"""REST body for fully replacing a deployment's ``options`` surface.
Replace semantics — the supplied payload is the complete new value
(partial updates are not supported here).
"""
options: DeploymentOptionsInput = Field(