@@ -155,6 +155,26 @@ class ModelDeploymentInfrastructure(Builder):
155155 CONST_PRIVATE_ENDPOINT_ID = "privateEndpointId"
156156 CONST_CAPACITY_RESERVATION_IDS = "capacityReservationIds"
157157
158+ # Autoscaling config (builder-only; used when constructing `scalingPolicy` payload).
159+ # This can be applied to both SINGLE_MODEL and MODEL_GROUP deployments.
160+ CONST_AUTO_SCALING = "autoScaling"
161+ CONST_SCALING_TYPE = "scalingType"
162+ CONST_MINIMUM_INSTANCE_COUNT = "minimumInstanceCount"
163+ CONST_MAXIMUM_INSTANCE_COUNT = "maximumInstanceCount"
164+ CONST_INITIAL_INSTANCE_COUNT = "initialInstanceCount"
165+ CONST_SCALE_IN_THRESHOLD = "scaleInThreshold"
166+ CONST_SCALE_OUT_THRESHOLD = "scaleOutThreshold"
167+ CONST_COOL_DOWN_IN_SECONDS = "coolDownInSeconds"
168+ CONST_IS_ENABLED = "isEnabled"
169+
170+ # Autoscaling constants (for `with_auto_scaling`).
171+ CONST_SCALING_TYPE_CPU_UTILIZATION = "cpu_utilization"
172+ CONST_SCALING_TYPE_MEMORY_UTILIZATION = "memory_utilization"
173+ CONST_SUPPORTED_AUTO_SCALING_TYPES = (
174+ CONST_SCALING_TYPE_CPU_UTILIZATION ,
175+ CONST_SCALING_TYPE_MEMORY_UTILIZATION ,
176+ )
177+
158178 attribute_map = {
159179 CONST_PROJECT_ID : "project_id" ,
160180 CONST_COMPARTMENT_ID : "compartment_id" ,
@@ -172,6 +192,7 @@ class ModelDeploymentInfrastructure(Builder):
172192 CONST_SUBNET_ID : "subnet_id" ,
173193 CONST_PRIVATE_ENDPOINT_ID : "private_endpoint_id" ,
174194 CONST_CAPACITY_RESERVATION_IDS : "capacity_reservation_ids" ,
195+ CONST_AUTO_SCALING : "auto_scaling" ,
175196 }
176197
177198 shape_config_details_attribute_map = {
@@ -720,6 +741,83 @@ def with_capacity_reservation_ids(
720741 self .CONST_CAPACITY_RESERVATION_IDS , capacity_reservation_ids
721742 )
722743
744+ @property
745+ def auto_scaling (self ) -> Dict :
746+ """Autoscaling configuration for model deployment.
747+
748+ This configuration is used when building the deployment payload to generate
749+ an `AUTOSCALING` scaling policy.
750+
751+ Returns
752+ -------
753+ Dict
754+ Autoscaling configuration.
755+ """
756+ return self .get_spec (self .CONST_AUTO_SCALING , {})
757+
758+ def with_auto_scaling (
759+ self ,
760+ scaling_type : str ,
761+ minimum_instance_count : int = 1 ,
762+ maximum_instance_count : int = 3 ,
763+ initial_instance_count : int = None ,
764+ scale_in_threshold : int = 30 ,
765+ scale_out_threshold : int = 70 ,
766+ cool_down_in_seconds : int = None ,
767+ is_enabled : bool = True ,
768+ ) -> "ModelDeploymentInfrastructure" :
769+ """Enables threshold-based autoscaling.
770+
771+ Parameters
772+ ----------
773+ scaling_type: str
774+ One of ["cpu_utilization", "memory_utilization"].
775+ minimum_instance_count: int
776+ Minimum number of instances (default: 1).
777+ maximum_instance_count: int
778+ Maximum number of instances (default: 3).
779+ initial_instance_count: int
780+ Initial number of instances.
781+ Defaults to `replica` if set, otherwise `minimum_instance_count`.
782+ scale_in_threshold: int
783+ Threshold for scaling in (default: 30).
784+ scale_out_threshold: int
785+ Threshold for scaling out (default: 70).
786+ cool_down_in_seconds: int
787+ Optional cooldown period.
788+ is_enabled: bool
789+ Whether autoscaling is enabled (default: True).
790+
791+ Returns
792+ -------
793+ ModelDeploymentInfrastructure
794+ The ModelDeploymentInfrastructure instance (self).
795+ """
796+ scaling_type = str (scaling_type or "" ).lower ()
797+ if scaling_type not in self .CONST_SUPPORTED_AUTO_SCALING_TYPES :
798+ raise ValueError (
799+ "Invalid scaling_type: {}. Allowed values: {}." .format (
800+ scaling_type , list (self .CONST_SUPPORTED_AUTO_SCALING_TYPES )
801+ )
802+ )
803+
804+ if initial_instance_count is None :
805+ initial_instance_count = self .replica or minimum_instance_count
806+
807+ config = {
808+ self .CONST_SCALING_TYPE : scaling_type ,
809+ self .CONST_MINIMUM_INSTANCE_COUNT : minimum_instance_count ,
810+ self .CONST_MAXIMUM_INSTANCE_COUNT : maximum_instance_count ,
811+ self .CONST_INITIAL_INSTANCE_COUNT : initial_instance_count ,
812+ self .CONST_SCALE_IN_THRESHOLD : scale_in_threshold ,
813+ self .CONST_SCALE_OUT_THRESHOLD : scale_out_threshold ,
814+ self .CONST_IS_ENABLED : bool (is_enabled ),
815+ }
816+ if cool_down_in_seconds is not None :
817+ config [self .CONST_COOL_DOWN_IN_SECONDS ] = cool_down_in_seconds
818+
819+ return self .set_spec (self .CONST_AUTO_SCALING , config )
820+
723821 def init (self , ** kwargs ) -> "ModelDeploymentInfrastructure" :
724822 """Initializes a starter specification for the ModelDeploymentInfrastructure.
725823
0 commit comments