Skip to content

Commit 9751808

Browse files
HarrySnartmrDzurb
andauthored
add auto-scaling to ads deployments (#1342)
Co-authored-by: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
1 parent d6c386c commit 9751808

File tree

6 files changed

+404
-10
lines changed

6 files changed

+404
-10
lines changed

ads/common/utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,31 @@ def parse_bool(value: Any) -> bool:
237237
return bool(value)
238238

239239

240+
def parse_int(value: Any, default: Optional[int] = None) -> Optional[int]:
241+
"""Converts a value to int.
242+
243+
Parameters
244+
----------
245+
value: Any
246+
The value to convert.
247+
default: Optional[int]
248+
The value to return if `value` is None.
249+
250+
Returns
251+
-------
252+
Optional[int]
253+
The int value or `default`.
254+
255+
Raises
256+
------
257+
ValueError
258+
If `value` cannot be converted to int.
259+
"""
260+
if value is None:
261+
return default
262+
return int(value)
263+
264+
240265
def read_file(file_path: str, **kwargs) -> str:
241266
try:
242267
with fsspec.open(file_path, "r", **kwargs.get("auth", {})) as f:

ads/model/deployment/model_deployment.py

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1717,11 +1717,68 @@ def _build_model_deployment_configuration_details(self) -> Dict:
17171717
infrastructure.capacity_reservation_ids
17181718
)
17191719

1720-
scaling_policy = {
1721-
infrastructure.CONST_POLICY_TYPE: "FIXED_SIZE",
1722-
infrastructure.CONST_INSTANCE_COUNT: infrastructure.replica
1723-
or DEFAULT_REPLICA,
1724-
}
1720+
def _drop_none_values(d: Dict) -> Dict:
1721+
"""Drops keys with None values from the provided dict."""
1722+
return {k: v for k, v in d.items() if v is not None}
1723+
1724+
# Fixed-size is the default. If autoscaling is configured on infrastructure,
1725+
# emit an AUTOSCALING policy (supported for both SINGLE_MODEL and MODEL_GROUP).
1726+
auto_scaling = getattr(infrastructure, "auto_scaling", None) or {}
1727+
if auto_scaling:
1728+
scaling_type = str(auto_scaling.get("scalingType", "") or "").lower()
1729+
metric_type = scaling_type.upper()
1730+
1731+
scaling_policy = {
1732+
infrastructure.CONST_POLICY_TYPE: "AUTOSCALING",
1733+
"isEnabled": auto_scaling.get("isEnabled", True),
1734+
"coolDownInSeconds": auto_scaling.get("coolDownInSeconds", None),
1735+
"autoScalingPolicies": [
1736+
_drop_none_values(
1737+
{
1738+
"autoScalingPolicyType": "THRESHOLD",
1739+
"maximumInstanceCount": auto_scaling.get(
1740+
"maximumInstanceCount", 3
1741+
),
1742+
"minimumInstanceCount": auto_scaling.get(
1743+
"minimumInstanceCount", 1
1744+
),
1745+
"initialInstanceCount": auto_scaling.get(
1746+
"initialInstanceCount",
1747+
infrastructure.replica or DEFAULT_REPLICA,
1748+
),
1749+
"rules": [
1750+
{
1751+
"metricExpressionRuleType": "PREDEFINED_EXPRESSION",
1752+
"metricType": metric_type,
1753+
"scaleInConfiguration": _drop_none_values(
1754+
{
1755+
"scalingConfigurationType": "THRESHOLD",
1756+
"threshold": auto_scaling.get(
1757+
"scaleInThreshold", 30
1758+
),
1759+
}
1760+
),
1761+
"scaleOutConfiguration": _drop_none_values(
1762+
{
1763+
"scalingConfigurationType": "THRESHOLD",
1764+
"threshold": auto_scaling.get(
1765+
"scaleOutThreshold", 70
1766+
),
1767+
}
1768+
),
1769+
}
1770+
],
1771+
}
1772+
)
1773+
],
1774+
}
1775+
scaling_policy = _drop_none_values(scaling_policy)
1776+
else:
1777+
scaling_policy = {
1778+
infrastructure.CONST_POLICY_TYPE: "FIXED_SIZE",
1779+
infrastructure.CONST_INSTANCE_COUNT: infrastructure.replica
1780+
or DEFAULT_REPLICA,
1781+
}
17251782

17261783
if not (runtime.model_uri or runtime.model_group_id):
17271784
raise ValueError(

ads/model/deployment/model_deployment_infrastructure.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,26 @@ class ModelDeploymentInfrastructure(Builder):
155155
CONST_PRIVATE_ENDPOINT_ID = "privateEndpointId"
156156
CONST_CAPACITY_RESERVATION_IDS = "capacityReservationIds"
157157

158+
# Autoscaling config (builder-only; used when constructing `scalingPolicy` payload).
159+
# This can be applied to both SINGLE_MODEL and MODEL_GROUP deployments.
160+
CONST_AUTO_SCALING = "autoScaling"
161+
CONST_SCALING_TYPE = "scalingType"
162+
CONST_MINIMUM_INSTANCE_COUNT = "minimumInstanceCount"
163+
CONST_MAXIMUM_INSTANCE_COUNT = "maximumInstanceCount"
164+
CONST_INITIAL_INSTANCE_COUNT = "initialInstanceCount"
165+
CONST_SCALE_IN_THRESHOLD = "scaleInThreshold"
166+
CONST_SCALE_OUT_THRESHOLD = "scaleOutThreshold"
167+
CONST_COOL_DOWN_IN_SECONDS = "coolDownInSeconds"
168+
CONST_IS_ENABLED = "isEnabled"
169+
170+
# Autoscaling constants (for `with_auto_scaling`).
171+
CONST_SCALING_TYPE_CPU_UTILIZATION = "cpu_utilization"
172+
CONST_SCALING_TYPE_MEMORY_UTILIZATION = "memory_utilization"
173+
CONST_SUPPORTED_AUTO_SCALING_TYPES = (
174+
CONST_SCALING_TYPE_CPU_UTILIZATION,
175+
CONST_SCALING_TYPE_MEMORY_UTILIZATION,
176+
)
177+
158178
attribute_map = {
159179
CONST_PROJECT_ID: "project_id",
160180
CONST_COMPARTMENT_ID: "compartment_id",
@@ -172,6 +192,7 @@ class ModelDeploymentInfrastructure(Builder):
172192
CONST_SUBNET_ID: "subnet_id",
173193
CONST_PRIVATE_ENDPOINT_ID: "private_endpoint_id",
174194
CONST_CAPACITY_RESERVATION_IDS: "capacity_reservation_ids",
195+
CONST_AUTO_SCALING: "auto_scaling",
175196
}
176197

177198
shape_config_details_attribute_map = {
@@ -720,6 +741,83 @@ def with_capacity_reservation_ids(
720741
self.CONST_CAPACITY_RESERVATION_IDS, capacity_reservation_ids
721742
)
722743

744+
@property
745+
def auto_scaling(self) -> Dict:
746+
"""Autoscaling configuration for model deployment.
747+
748+
This configuration is used when building the deployment payload to generate
749+
an `AUTOSCALING` scaling policy.
750+
751+
Returns
752+
-------
753+
Dict
754+
Autoscaling configuration.
755+
"""
756+
return self.get_spec(self.CONST_AUTO_SCALING, {})
757+
758+
def with_auto_scaling(
759+
self,
760+
scaling_type: str,
761+
minimum_instance_count: int = 1,
762+
maximum_instance_count: int = 3,
763+
initial_instance_count: int = None,
764+
scale_in_threshold: int = 30,
765+
scale_out_threshold: int = 70,
766+
cool_down_in_seconds: int = None,
767+
is_enabled: bool = True,
768+
) -> "ModelDeploymentInfrastructure":
769+
"""Enables threshold-based autoscaling.
770+
771+
Parameters
772+
----------
773+
scaling_type: str
774+
One of ["cpu_utilization", "memory_utilization"].
775+
minimum_instance_count: int
776+
Minimum number of instances (default: 1).
777+
maximum_instance_count: int
778+
Maximum number of instances (default: 3).
779+
initial_instance_count: int
780+
Initial number of instances.
781+
Defaults to `replica` if set, otherwise `minimum_instance_count`.
782+
scale_in_threshold: int
783+
Threshold for scaling in (default: 30).
784+
scale_out_threshold: int
785+
Threshold for scaling out (default: 70).
786+
cool_down_in_seconds: int
787+
Optional cooldown period.
788+
is_enabled: bool
789+
Whether autoscaling is enabled (default: True).
790+
791+
Returns
792+
-------
793+
ModelDeploymentInfrastructure
794+
The ModelDeploymentInfrastructure instance (self).
795+
"""
796+
scaling_type = str(scaling_type or "").lower()
797+
if scaling_type not in self.CONST_SUPPORTED_AUTO_SCALING_TYPES:
798+
raise ValueError(
799+
"Invalid scaling_type: {}. Allowed values: {}.".format(
800+
scaling_type, list(self.CONST_SUPPORTED_AUTO_SCALING_TYPES)
801+
)
802+
)
803+
804+
if initial_instance_count is None:
805+
initial_instance_count = self.replica or minimum_instance_count
806+
807+
config = {
808+
self.CONST_SCALING_TYPE: scaling_type,
809+
self.CONST_MINIMUM_INSTANCE_COUNT: minimum_instance_count,
810+
self.CONST_MAXIMUM_INSTANCE_COUNT: maximum_instance_count,
811+
self.CONST_INITIAL_INSTANCE_COUNT: initial_instance_count,
812+
self.CONST_SCALE_IN_THRESHOLD: scale_in_threshold,
813+
self.CONST_SCALE_OUT_THRESHOLD: scale_out_threshold,
814+
self.CONST_IS_ENABLED: bool(is_enabled),
815+
}
816+
if cool_down_in_seconds is not None:
817+
config[self.CONST_COOL_DOWN_IN_SECONDS] = cool_down_in_seconds
818+
819+
return self.set_spec(self.CONST_AUTO_SCALING, config)
820+
723821
def init(self, **kwargs) -> "ModelDeploymentInfrastructure":
724822
"""Initializes a starter specification for the ModelDeploymentInfrastructure.
725823

ads/model/deployment/model_deployment_properties.py

Lines changed: 131 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,35 @@ class ModelDeploymentProperties(
5353
Return an instance of CreateModelDeploymentDetails for creating the deployment.
5454
"""
5555

56+
# Autoscaling constants (for `with_instance_configuration`).
57+
CONST_SCALING_TYPE_FIXED = "fixed"
58+
CONST_SCALING_TYPE_CPU_UTILIZATION = "cpu_utilization"
59+
CONST_SCALING_TYPE_MEMORY_UTILIZATION = "memory_utilization"
60+
CONST_SUPPORTED_AUTO_SCALING_TYPES = (
61+
CONST_SCALING_TYPE_CPU_UTILIZATION,
62+
CONST_SCALING_TYPE_MEMORY_UTILIZATION,
63+
)
64+
CONST_SUPPORTED_SCALING_TYPES = (
65+
CONST_SCALING_TYPE_FIXED,
66+
*CONST_SUPPORTED_AUTO_SCALING_TYPES,
67+
)
68+
5669
# These properties are supported by ModelDeploymentProperties but are not top-level attributes of ModelDeployment
5770
sub_properties = [
5871
"instance_shape",
5972
"instance_count",
6073
"bandwidth_mbps",
74+
# Autoscaling-related keys (used by `with_instance_configuration`).
75+
"scaling_type",
76+
"cpu_utilization",
77+
"memory_utilization",
78+
"minimum_instance_count",
79+
"maximum_instance_count",
80+
"initial_instance_count",
81+
"scale_in_threshold",
82+
"scale_out_threshold",
83+
"cool_down_in_seconds",
84+
"is_enabled",
6185
"access_log_group_id",
6286
"access_log_id",
6387
"predict_log_group_id",
@@ -205,6 +229,17 @@ def __init__(
205229
"bandwidth_mbps",
206230
"memory_in_gbs",
207231
"ocpus",
232+
# Autoscaling-related keys.
233+
"scaling_type",
234+
"cpu_utilization",
235+
"memory_utilization",
236+
"minimum_instance_count",
237+
"maximum_instance_count",
238+
"initial_instance_count",
239+
"scale_in_threshold",
240+
"scale_out_threshold",
241+
"cool_down_in_seconds",
242+
"is_enabled",
208243
]:
209244
if key in kwargs:
210245
instance_config[key] = kwargs[key]
@@ -243,6 +278,22 @@ def with_instance_configuration(self, config):
243278
- memory_in_gbs: float,
244279
- ocpus: float
245280
281+
In addition, this method supports autoscaling for SINGLE_MODEL deployments.
282+
To enable autoscaling, set `scaling_type` to one of:
283+
284+
- scaling_type: str. One of ["fixed", "cpu_utilization", "memory_utilization"]. Defaults to "fixed".
285+
286+
For `cpu_utilization` / `memory_utilization`, the following keys are supported
287+
(all optional, sensible defaults will be applied):
288+
289+
- minimum_instance_count: int (default: 1)
290+
- maximum_instance_count: int (default: 3)
291+
- initial_instance_count: int (default: instance_count or minimum_instance_count)
292+
- scale_in_threshold: int (default: 30)
293+
- scale_out_threshold: int (default: 70)
294+
- cool_down_in_seconds: int (optional)
295+
- is_enabled: bool (default: True)
296+
246297
The instance_shape and instance_count are required when creating a new deployment.
247298
They are optional when updating an existing deployment.
248299
@@ -284,11 +335,86 @@ def with_instance_configuration(self, config):
284335
instance_configuration_object
285336
)
286337

287-
# scaling_policy is required even though it can be initialized with empty values
288-
scaling_policy_object = data_science_models.FixedSizeScalingPolicy()
289-
if "instance_count" in config:
290-
scaling_policy_object.instance_count = int(config["instance_count"])
291-
model_configuration_details_object.scaling_policy = scaling_policy_object
338+
# scaling_policy is required even though it can be initialized with empty values.
339+
# Keep backward compatible behaviour (FixedSizeScalingPolicy) as default,
340+
# and enable threshold-based autoscaling for model deployments.
341+
342+
scaling_type = config.get("scaling_type", None)
343+
344+
if not scaling_type:
345+
scaling_type=self.CONST_SCALING_TYPE_FIXED
346+
347+
348+
scaling_type = str(scaling_type).lower()
349+
if scaling_type not in self.CONST_SUPPORTED_SCALING_TYPES:
350+
raise ValueError(
351+
"Invalid scaling_type: {}. Allowed values: {}.".format(
352+
scaling_type, list(self.CONST_SUPPORTED_SCALING_TYPES)
353+
)
354+
)
355+
356+
if scaling_type == self.CONST_SCALING_TYPE_FIXED:
357+
scaling_policy_object = data_science_models.FixedSizeScalingPolicy()
358+
if "instance_count" in config and config.get("instance_count") is not None:
359+
scaling_policy_object.instance_count = int(config["instance_count"])
360+
model_configuration_details_object.scaling_policy = scaling_policy_object
361+
else:
362+
# Metric type is the upper of the scaling_type kwarg.
363+
# Example: cpu_utilization -> CPU_UTILIZATION
364+
metric_type = scaling_type.upper()
365+
366+
minimum_instance_count = utils.parse_int(
367+
config.get("minimum_instance_count", config.get("min_instance_count")),
368+
1,
369+
)
370+
maximum_instance_count = utils.parse_int(
371+
config.get("maximum_instance_count", config.get("max_instance_count")),
372+
3,
373+
)
374+
# Backward compatibility: allow instance_count to act as initial_instance_count.
375+
376+
initial_instance_count = utils.parse_int(
377+
config.get(
378+
"initial_instance_count",
379+
config.get("instance_count", minimum_instance_count),
380+
),
381+
minimum_instance_count,
382+
)
383+
384+
scale_in_threshold = utils.parse_int(config.get("scale_in_threshold"), 30)
385+
scale_out_threshold = utils.parse_int(config.get("scale_out_threshold"), 70)
386+
is_enabled = config.get("is_enabled", True)
387+
cool_down_in_seconds = config.get("cool_down_in_seconds", None)
388+
389+
threshold_details = data_science_models.ThresholdBasedAutoScalingPolicyDetails(
390+
auto_scaling_policy_type="THRESHOLD",
391+
maximum_instance_count=maximum_instance_count,
392+
minimum_instance_count=minimum_instance_count,
393+
initial_instance_count=initial_instance_count,
394+
rules=[
395+
data_science_models.PredefinedMetricExpressionRule(
396+
metric_type=metric_type,
397+
scale_in_configuration=data_science_models.PredefinedExpressionThresholdScalingConfiguration(
398+
threshold=scale_in_threshold
399+
),
400+
scale_out_configuration=data_science_models.PredefinedExpressionThresholdScalingConfiguration(
401+
threshold=scale_out_threshold
402+
),
403+
)
404+
],
405+
)
406+
407+
auto_scaling_policy = data_science_models.AutoScalingPolicy(
408+
policy_type="AUTOSCALING",
409+
is_enabled=bool(is_enabled),
410+
auto_scaling_policies=[threshold_details],
411+
)
412+
if cool_down_in_seconds is not None:
413+
auto_scaling_policy.cool_down_in_seconds = utils.parse_int(
414+
cool_down_in_seconds
415+
)
416+
417+
model_configuration_details_object.scaling_policy = auto_scaling_policy
292418

293419
if "bandwidth_mbps" in config:
294420
model_configuration_details_object.bandwidth_mbps = config["bandwidth_mbps"]

0 commit comments

Comments
 (0)