@@ -53,11 +53,35 @@ class ModelDeploymentProperties(
5353 Return an instance of CreateModelDeploymentDetails for creating the deployment.
5454 """
5555
56+ # Autoscaling constants (for `with_instance_configuration`).
57+ CONST_SCALING_TYPE_FIXED = "fixed"
58+ CONST_SCALING_TYPE_CPU_UTILIZATION = "cpu_utilization"
59+ CONST_SCALING_TYPE_MEMORY_UTILIZATION = "memory_utilization"
60+ CONST_SUPPORTED_AUTO_SCALING_TYPES = (
61+ CONST_SCALING_TYPE_CPU_UTILIZATION ,
62+ CONST_SCALING_TYPE_MEMORY_UTILIZATION ,
63+ )
64+ CONST_SUPPORTED_SCALING_TYPES = (
65+ CONST_SCALING_TYPE_FIXED ,
66+ * CONST_SUPPORTED_AUTO_SCALING_TYPES ,
67+ )
68+
5669 # These properties are supported by ModelDeploymentProperties but are not top-level attributes of ModelDeployment
5770 sub_properties = [
5871 "instance_shape" ,
5972 "instance_count" ,
6073 "bandwidth_mbps" ,
74+ # Autoscaling-related keys (used by `with_instance_configuration`).
75+ "scaling_type" ,
76+ "cpu_utilization" ,
77+ "memory_utilization" ,
78+ "minimum_instance_count" ,
79+ "maximum_instance_count" ,
80+ "initial_instance_count" ,
81+ "scale_in_threshold" ,
82+ "scale_out_threshold" ,
83+ "cool_down_in_seconds" ,
84+ "is_enabled" ,
6185 "access_log_group_id" ,
6286 "access_log_id" ,
6387 "predict_log_group_id" ,
@@ -205,6 +229,17 @@ def __init__(
205229 "bandwidth_mbps" ,
206230 "memory_in_gbs" ,
207231 "ocpus" ,
232+ # Autoscaling-related keys.
233+ "scaling_type" ,
234+ "cpu_utilization" ,
235+ "memory_utilization" ,
236+ "minimum_instance_count" ,
237+ "maximum_instance_count" ,
238+ "initial_instance_count" ,
239+ "scale_in_threshold" ,
240+ "scale_out_threshold" ,
241+ "cool_down_in_seconds" ,
242+ "is_enabled" ,
208243 ]:
209244 if key in kwargs :
210245 instance_config [key ] = kwargs [key ]
@@ -243,6 +278,22 @@ def with_instance_configuration(self, config):
243278 - memory_in_gbs: float,
244279 - ocpus: float
245280
281+ In addition, this method supports autoscaling for SINGLE_MODEL deployments.
282+ To enable autoscaling, set `scaling_type` to one of:
283+
284+ - scaling_type: str. One of ["fixed", "cpu_utilization", "memory_utilization"]. Defaults to "fixed".
285+
286+ For `cpu_utilization` / `memory_utilization`, the following keys are supported
287+ (all optional, sensible defaults will be applied):
288+
289+ - minimum_instance_count: int (default: 1)
290+ - maximum_instance_count: int (default: 3)
291+ - initial_instance_count: int (default: instance_count or minimum_instance_count)
292+ - scale_in_threshold: int (default: 30)
293+ - scale_out_threshold: int (default: 70)
294+ - cool_down_in_seconds: int (optional)
295+ - is_enabled: bool (default: True)
296+
246297 The instance_shape and instance_count are required when creating a new deployment.
247298 They are optional when updating an existing deployment.
248299
@@ -284,11 +335,86 @@ def with_instance_configuration(self, config):
284335 instance_configuration_object
285336 )
286337
287- # scaling_policy is required even though it can be initialized with empty values
288- scaling_policy_object = data_science_models .FixedSizeScalingPolicy ()
289- if "instance_count" in config :
290- scaling_policy_object .instance_count = int (config ["instance_count" ])
291- model_configuration_details_object .scaling_policy = scaling_policy_object
338+ # scaling_policy is required even though it can be initialized with empty values.
339+ # Keep backward compatible behaviour (FixedSizeScalingPolicy) as default,
340+ # and enable threshold-based autoscaling for model deployments.
341+
342+ scaling_type = config .get ("scaling_type" , None )
343+
344+ if not scaling_type :
345+ scaling_type = self .CONST_SCALING_TYPE_FIXED
346+
347+
348+ scaling_type = str (scaling_type ).lower ()
349+ if scaling_type not in self .CONST_SUPPORTED_SCALING_TYPES :
350+ raise ValueError (
351+ "Invalid scaling_type: {}. Allowed values: {}." .format (
352+ scaling_type , list (self .CONST_SUPPORTED_SCALING_TYPES )
353+ )
354+ )
355+
356+ if scaling_type == self .CONST_SCALING_TYPE_FIXED :
357+ scaling_policy_object = data_science_models .FixedSizeScalingPolicy ()
358+ if "instance_count" in config and config .get ("instance_count" ) is not None :
359+ scaling_policy_object .instance_count = int (config ["instance_count" ])
360+ model_configuration_details_object .scaling_policy = scaling_policy_object
361+ else :
362+ # Metric type is the upper of the scaling_type kwarg.
363+ # Example: cpu_utilization -> CPU_UTILIZATION
364+ metric_type = scaling_type .upper ()
365+
366+ minimum_instance_count = utils .parse_int (
367+ config .get ("minimum_instance_count" , config .get ("min_instance_count" )),
368+ 1 ,
369+ )
370+ maximum_instance_count = utils .parse_int (
371+ config .get ("maximum_instance_count" , config .get ("max_instance_count" )),
372+ 3 ,
373+ )
374+ # Backward compatibility: allow instance_count to act as initial_instance_count.
375+
376+ initial_instance_count = utils .parse_int (
377+ config .get (
378+ "initial_instance_count" ,
379+ config .get ("instance_count" , minimum_instance_count ),
380+ ),
381+ minimum_instance_count ,
382+ )
383+
384+ scale_in_threshold = utils .parse_int (config .get ("scale_in_threshold" ), 30 )
385+ scale_out_threshold = utils .parse_int (config .get ("scale_out_threshold" ), 70 )
386+ is_enabled = config .get ("is_enabled" , True )
387+ cool_down_in_seconds = config .get ("cool_down_in_seconds" , None )
388+
389+ threshold_details = data_science_models .ThresholdBasedAutoScalingPolicyDetails (
390+ auto_scaling_policy_type = "THRESHOLD" ,
391+ maximum_instance_count = maximum_instance_count ,
392+ minimum_instance_count = minimum_instance_count ,
393+ initial_instance_count = initial_instance_count ,
394+ rules = [
395+ data_science_models .PredefinedMetricExpressionRule (
396+ metric_type = metric_type ,
397+ scale_in_configuration = data_science_models .PredefinedExpressionThresholdScalingConfiguration (
398+ threshold = scale_in_threshold
399+ ),
400+ scale_out_configuration = data_science_models .PredefinedExpressionThresholdScalingConfiguration (
401+ threshold = scale_out_threshold
402+ ),
403+ )
404+ ],
405+ )
406+
407+ auto_scaling_policy = data_science_models .AutoScalingPolicy (
408+ policy_type = "AUTOSCALING" ,
409+ is_enabled = bool (is_enabled ),
410+ auto_scaling_policies = [threshold_details ],
411+ )
412+ if cool_down_in_seconds is not None :
413+ auto_scaling_policy .cool_down_in_seconds = utils .parse_int (
414+ cool_down_in_seconds
415+ )
416+
417+ model_configuration_details_object .scaling_policy = auto_scaling_policy
292418
293419 if "bandwidth_mbps" in config :
294420 model_configuration_details_object .bandwidth_mbps = config ["bandwidth_mbps" ]
0 commit comments