Skip to content

Commit cacc99e

Browse files
authored
Merge branch 'main' into fix_operators_copytree
2 parents fe7197c + 779de5c commit cacc99e

File tree

19 files changed

+906
-22
lines changed

19 files changed

+906
-22
lines changed

CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
* @darenr @mayoor @mrDzurb @VipulMascarenhas @qiuosier @ahosler
1+
* @mayoor @mrDzurb @VipulMascarenhas @lu-ohai @smfirmin @sambitkumohanty

ads/common/utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,31 @@ def parse_bool(value: Any) -> bool:
237237
return bool(value)
238238

239239

240+
def parse_int(value: Any, default: Optional[int] = None) -> Optional[int]:
241+
"""Converts a value to int.
242+
243+
Parameters
244+
----------
245+
value: Any
246+
The value to convert.
247+
default: Optional[int]
248+
The value to return if `value` is None.
249+
250+
Returns
251+
-------
252+
Optional[int]
253+
The int value or `default`.
254+
255+
Raises
256+
------
257+
ValueError
258+
If `value` cannot be converted to int.
259+
"""
260+
if value is None:
261+
return default
262+
return int(value)
263+
264+
240265
def read_file(file_path: str, **kwargs) -> str:
241266
try:
242267
with fsspec.open(file_path, "r", **kwargs.get("auth", {})) as f:

ads/model/datascience_model_group.py

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
#!/usr/bin/env python
22

3-
# Copyright (c) 2025 Oracle and/or its affiliates.
3+
# Copyright (c) 2025, 2026 Oracle and/or its affiliates.
44
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
55

66
import copy
7+
import logging
8+
import os
79
from typing import Dict, List, Union
810

911
from ads.common.utils import batch_convert_case
1012
from ads.config import COMPARTMENT_OCID, PROJECT_OCID
1113
from ads.jobs.builders.base import Builder
14+
from ads.model.artifact import _validate_artifact_dir
1215
from ads.model.model_metadata import ModelCustomMetadata
1316
from ads.model.service.oci_datascience_model_group import OCIDataScienceModelGroup
1417

@@ -36,6 +39,16 @@
3639
ALLOWED_CREATE_TYPES = ["CREATE", "CLONE"]
3740
MODEL_GROUP_KIND = "datascienceModelGroup"
3841

42+
logger = logging.getLogger(__name__)
43+
44+
45+
class ModelGroupArtifactNotFoundError(Exception): # pragma: no cover
46+
pass
47+
48+
49+
class ModelGroupArtifactValidationError(ValueError): # pragma: no cover
50+
pass
51+
3952

4053
class DataScienceModelGroup(Builder):
4154
"""Represents a Data Science Model Group.
@@ -153,6 +166,7 @@ class DataScienceModelGroup(Builder):
153166
CONST_CREATED_BY = "createdBy"
154167
CONST_VERSION_LABEL = "versionLabel"
155168
CONST_VERSION_ID = "versionId"
169+
CONST_ARTIFACT = "artifact"
156170

157171
attribute_map = {
158172
CONST_ID: "id",
@@ -201,6 +215,28 @@ def __init__(self, spec=None, **kwargs):
201215
super().__init__(spec, **kwargs)
202216
self.dsc_model_group = OCIDataScienceModelGroup()
203217

218+
@property
219+
def artifact(self) -> str:
220+
"""The artifact location (path to a folder or zip archive).
221+
222+
For homogeneous model groups this artifact is expected to be a standard
223+
model deployment runtime artifact containing (at minimum) `score.py` and
224+
`runtime.yaml` at the top level.
225+
"""
226+
227+
return self.get_spec(self.CONST_ARTIFACT)
228+
229+
def with_artifact(self, uri: str) -> "DataScienceModelGroup":
230+
"""Sets the model group artifact location.
231+
232+
Parameters
233+
----------
234+
uri: str
235+
Path to artifact directory or to the ZIP archive.
236+
"""
237+
238+
return self.set_spec(self.CONST_ARTIFACT, uri)
239+
204240
@property
205241
def kind(self) -> str:
206242
"""The kind of the model group as showing in a YAML."""
@@ -508,7 +544,61 @@ def create(
508544
poll_interval=poll_interval,
509545
)
510546

511-
return self._update_from_oci_model(response)
547+
self._update_from_oci_model(response)
548+
549+
# Upload artifact for homogeneous groups only.
550+
if not self.base_model_id and self.artifact:
551+
self.upload_artifact()
552+
553+
return self
554+
555+
def upload_artifact(self) -> None:
556+
"""Validates and uploads model group artifact.
557+
558+
Notes
559+
-----
560+
This currently supports homogeneous model groups only.
561+
"""
562+
563+
if not self.id:
564+
raise ValueError(
565+
"Model group needs to be created before uploading artifacts."
566+
)
567+
568+
if not self.artifact:
569+
logger.info(
570+
"Model group artifact location not provided. "
571+
"Use `.with_artifact(<path>)` to upload a deployment runtime artifact."
572+
)
573+
return
574+
575+
artifact_path = os.path.abspath(os.path.expanduser(str(self.artifact)))
576+
577+
if not os.path.exists(artifact_path):
578+
raise ModelGroupArtifactNotFoundError(
579+
f"The artifact path `{self.artifact}` does not exist."
580+
)
581+
582+
# Validate expected runtime artifact structure when artifact is a directory.
583+
if os.path.isdir(artifact_path):
584+
try:
585+
_validate_artifact_dir(artifact_path)
586+
except Exception as ex:
587+
raise ModelGroupArtifactValidationError(
588+
f"Invalid model group artifact directory structure at `{artifact_path}`. "
589+
f"Expected top-level `score.py` and `runtime.yaml`. See: {ex}"
590+
) from ex
591+
592+
# Perform upload. Implemented in OCIDataScienceModelGroup.
593+
try:
594+
self.dsc_model_group = OCIDataScienceModelGroup.from_id(self.id)
595+
self.dsc_model_group.create_model_group_artifact(artifact_path)
596+
logger.info("Model group artifact upload succeeded.")
597+
except AttributeError as ex:
598+
raise RuntimeError(
599+
"Model group artifact upload requires an OCI SDK that supports "
600+
f"`create_model_group_artifact`. Please upgrade `oci` package. See: {ex}"
601+
) from ex
512602

513603
def _build_model_group_details(self) -> dict:
514604
"""Builds model group details dict for creating or updating oci model group."""
@@ -544,6 +634,7 @@ def _build_model_group_details(self) -> dict:
544634
build_model_group_details.pop(self.CONST_CUSTOM_METADATA_LIST, None)
545635
build_model_group_details.pop(self.CONST_MEMBER_MODELS, None)
546636
build_model_group_details.pop(self.CONST_BASE_MODEL_ID, None)
637+
build_model_group_details.pop(self.CONST_ARTIFACT, None)
547638
build_model_group_details.update(
548639
{
549640
self.CONST_COMPARTMENT_ID: self.compartment_id or COMPARTMENT_OCID,

ads/model/deployment/model_deployment.py

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1717,11 +1717,68 @@ def _build_model_deployment_configuration_details(self) -> Dict:
17171717
infrastructure.capacity_reservation_ids
17181718
)
17191719

1720-
scaling_policy = {
1721-
infrastructure.CONST_POLICY_TYPE: "FIXED_SIZE",
1722-
infrastructure.CONST_INSTANCE_COUNT: infrastructure.replica
1723-
or DEFAULT_REPLICA,
1724-
}
1720+
def _drop_none_values(d: Dict) -> Dict:
1721+
"""Drops keys with None values from the provided dict."""
1722+
return {k: v for k, v in d.items() if v is not None}
1723+
1724+
# Fixed-size is the default. If autoscaling is configured on infrastructure,
1725+
# emit an AUTOSCALING policy (supported for both SINGLE_MODEL and MODEL_GROUP).
1726+
auto_scaling = getattr(infrastructure, "auto_scaling", None) or {}
1727+
if auto_scaling:
1728+
scaling_type = str(auto_scaling.get("scalingType", "") or "").lower()
1729+
metric_type = scaling_type.upper()
1730+
1731+
scaling_policy = {
1732+
infrastructure.CONST_POLICY_TYPE: "AUTOSCALING",
1733+
"isEnabled": auto_scaling.get("isEnabled", True),
1734+
"coolDownInSeconds": auto_scaling.get("coolDownInSeconds", None),
1735+
"autoScalingPolicies": [
1736+
_drop_none_values(
1737+
{
1738+
"autoScalingPolicyType": "THRESHOLD",
1739+
"maximumInstanceCount": auto_scaling.get(
1740+
"maximumInstanceCount", 3
1741+
),
1742+
"minimumInstanceCount": auto_scaling.get(
1743+
"minimumInstanceCount", 1
1744+
),
1745+
"initialInstanceCount": auto_scaling.get(
1746+
"initialInstanceCount",
1747+
infrastructure.replica or DEFAULT_REPLICA,
1748+
),
1749+
"rules": [
1750+
{
1751+
"metricExpressionRuleType": "PREDEFINED_EXPRESSION",
1752+
"metricType": metric_type,
1753+
"scaleInConfiguration": _drop_none_values(
1754+
{
1755+
"scalingConfigurationType": "THRESHOLD",
1756+
"threshold": auto_scaling.get(
1757+
"scaleInThreshold", 30
1758+
),
1759+
}
1760+
),
1761+
"scaleOutConfiguration": _drop_none_values(
1762+
{
1763+
"scalingConfigurationType": "THRESHOLD",
1764+
"threshold": auto_scaling.get(
1765+
"scaleOutThreshold", 70
1766+
),
1767+
}
1768+
),
1769+
}
1770+
],
1771+
}
1772+
)
1773+
],
1774+
}
1775+
scaling_policy = _drop_none_values(scaling_policy)
1776+
else:
1777+
scaling_policy = {
1778+
infrastructure.CONST_POLICY_TYPE: "FIXED_SIZE",
1779+
infrastructure.CONST_INSTANCE_COUNT: infrastructure.replica
1780+
or DEFAULT_REPLICA,
1781+
}
17251782

17261783
if not (runtime.model_uri or runtime.model_group_id):
17271784
raise ValueError(

ads/model/deployment/model_deployment_infrastructure.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,26 @@ class ModelDeploymentInfrastructure(Builder):
155155
CONST_PRIVATE_ENDPOINT_ID = "privateEndpointId"
156156
CONST_CAPACITY_RESERVATION_IDS = "capacityReservationIds"
157157

158+
# Autoscaling config (builder-only; used when constructing `scalingPolicy` payload).
159+
# This can be applied to both SINGLE_MODEL and MODEL_GROUP deployments.
160+
CONST_AUTO_SCALING = "autoScaling"
161+
CONST_SCALING_TYPE = "scalingType"
162+
CONST_MINIMUM_INSTANCE_COUNT = "minimumInstanceCount"
163+
CONST_MAXIMUM_INSTANCE_COUNT = "maximumInstanceCount"
164+
CONST_INITIAL_INSTANCE_COUNT = "initialInstanceCount"
165+
CONST_SCALE_IN_THRESHOLD = "scaleInThreshold"
166+
CONST_SCALE_OUT_THRESHOLD = "scaleOutThreshold"
167+
CONST_COOL_DOWN_IN_SECONDS = "coolDownInSeconds"
168+
CONST_IS_ENABLED = "isEnabled"
169+
170+
# Autoscaling constants (for `with_auto_scaling`).
171+
CONST_SCALING_TYPE_CPU_UTILIZATION = "cpu_utilization"
172+
CONST_SCALING_TYPE_MEMORY_UTILIZATION = "memory_utilization"
173+
CONST_SUPPORTED_AUTO_SCALING_TYPES = (
174+
CONST_SCALING_TYPE_CPU_UTILIZATION,
175+
CONST_SCALING_TYPE_MEMORY_UTILIZATION,
176+
)
177+
158178
attribute_map = {
159179
CONST_PROJECT_ID: "project_id",
160180
CONST_COMPARTMENT_ID: "compartment_id",
@@ -172,6 +192,7 @@ class ModelDeploymentInfrastructure(Builder):
172192
CONST_SUBNET_ID: "subnet_id",
173193
CONST_PRIVATE_ENDPOINT_ID: "private_endpoint_id",
174194
CONST_CAPACITY_RESERVATION_IDS: "capacity_reservation_ids",
195+
CONST_AUTO_SCALING: "auto_scaling",
175196
}
176197

177198
shape_config_details_attribute_map = {
@@ -720,6 +741,83 @@ def with_capacity_reservation_ids(
720741
self.CONST_CAPACITY_RESERVATION_IDS, capacity_reservation_ids
721742
)
722743

744+
@property
745+
def auto_scaling(self) -> Dict:
746+
"""Autoscaling configuration for model deployment.
747+
748+
This configuration is used when building the deployment payload to generate
749+
an `AUTOSCALING` scaling policy.
750+
751+
Returns
752+
-------
753+
Dict
754+
Autoscaling configuration.
755+
"""
756+
return self.get_spec(self.CONST_AUTO_SCALING, {})
757+
758+
def with_auto_scaling(
759+
self,
760+
scaling_type: str,
761+
minimum_instance_count: int = 1,
762+
maximum_instance_count: int = 3,
763+
initial_instance_count: int = None,
764+
scale_in_threshold: int = 30,
765+
scale_out_threshold: int = 70,
766+
cool_down_in_seconds: int = None,
767+
is_enabled: bool = True,
768+
) -> "ModelDeploymentInfrastructure":
769+
"""Enables threshold-based autoscaling.
770+
771+
Parameters
772+
----------
773+
scaling_type: str
774+
One of ["cpu_utilization", "memory_utilization"].
775+
minimum_instance_count: int
776+
Minimum number of instances (default: 1).
777+
maximum_instance_count: int
778+
Maximum number of instances (default: 3).
779+
initial_instance_count: int
780+
Initial number of instances.
781+
Defaults to `replica` if set, otherwise `minimum_instance_count`.
782+
scale_in_threshold: int
783+
Threshold for scaling in (default: 30).
784+
scale_out_threshold: int
785+
Threshold for scaling out (default: 70).
786+
cool_down_in_seconds: int
787+
Optional cooldown period.
788+
is_enabled: bool
789+
Whether autoscaling is enabled (default: True).
790+
791+
Returns
792+
-------
793+
ModelDeploymentInfrastructure
794+
The ModelDeploymentInfrastructure instance (self).
795+
"""
796+
scaling_type = str(scaling_type or "").lower()
797+
if scaling_type not in self.CONST_SUPPORTED_AUTO_SCALING_TYPES:
798+
raise ValueError(
799+
"Invalid scaling_type: {}. Allowed values: {}.".format(
800+
scaling_type, list(self.CONST_SUPPORTED_AUTO_SCALING_TYPES)
801+
)
802+
)
803+
804+
if initial_instance_count is None:
805+
initial_instance_count = self.replica or minimum_instance_count
806+
807+
config = {
808+
self.CONST_SCALING_TYPE: scaling_type,
809+
self.CONST_MINIMUM_INSTANCE_COUNT: minimum_instance_count,
810+
self.CONST_MAXIMUM_INSTANCE_COUNT: maximum_instance_count,
811+
self.CONST_INITIAL_INSTANCE_COUNT: initial_instance_count,
812+
self.CONST_SCALE_IN_THRESHOLD: scale_in_threshold,
813+
self.CONST_SCALE_OUT_THRESHOLD: scale_out_threshold,
814+
self.CONST_IS_ENABLED: bool(is_enabled),
815+
}
816+
if cool_down_in_seconds is not None:
817+
config[self.CONST_COOL_DOWN_IN_SECONDS] = cool_down_in_seconds
818+
819+
return self.set_spec(self.CONST_AUTO_SCALING, config)
820+
723821
def init(self, **kwargs) -> "ModelDeploymentInfrastructure":
724822
"""Initializes a starter specification for the ModelDeploymentInfrastructure.
725823

0 commit comments

Comments
 (0)