From 2fb8a7d8b18b169418f669e3866ce848fe3c2a7d Mon Sep 17 00:00:00 2001 From: Safoine El khabich <34200873+safoinme@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:58:48 +0100 Subject: [PATCH 01/43] initial commit on vertex ai deployer and model registry --- src/zenml/integrations/gcp/__init__.py | 4 + .../integrations/gcp/flavors/__init__.py | 6 + .../flavors/vertex_model_deployer_flavor.py | 149 ++++++++++ .../gcp/model_deployers/__init__.py | 20 ++ .../model_deployers/vertex_model_deployer.py | 242 ++++++++++++++++ .../integrations/gcp/services/__init__.py | 19 ++ .../gcp/services/vertex_deployment.py | 263 ++++++++++++++++++ 7 files changed, 703 insertions(+) create mode 100644 src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py create mode 100644 src/zenml/integrations/gcp/model_deployers/__init__.py create mode 100644 src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py create mode 100644 src/zenml/integrations/gcp/services/__init__.py create mode 100644 src/zenml/integrations/gcp/services/vertex_deployment.py diff --git a/src/zenml/integrations/gcp/__init__.py b/src/zenml/integrations/gcp/__init__.py index 0c955c8eeec..5d5da217977 100644 --- a/src/zenml/integrations/gcp/__init__.py +++ b/src/zenml/integrations/gcp/__init__.py @@ -33,6 +33,10 @@ GCP_VERTEX_ORCHESTRATOR_FLAVOR = "vertex" GCP_VERTEX_STEP_OPERATOR_FLAVOR = "vertex" +# Model deployer constants +VERTEX_MODEL_DEPLOYER_FLAVOR = "vertex" +VERTEX_SERVICE_ARTIFACT = "vertex_deployment_service" + # Service connector constants GCP_CONNECTOR_TYPE = "gcp" GCP_RESOURCE_TYPE = "gcp-generic" diff --git a/src/zenml/integrations/gcp/flavors/__init__.py b/src/zenml/integrations/gcp/flavors/__init__.py index 73bb6259aa5..1328ec75b2d 100644 --- a/src/zenml/integrations/gcp/flavors/__init__.py +++ b/src/zenml/integrations/gcp/flavors/__init__.py @@ -29,6 +29,10 @@ VertexStepOperatorConfig, VertexStepOperatorFlavor, ) +from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( + VertexModelDeployerConfig, + VertexModelDeployerFlavor, +) __all__ = [ "GCPArtifactStoreFlavor", @@ -39,4 +43,6 @@ "VertexOrchestratorConfig", "VertexStepOperatorFlavor", "VertexStepOperatorConfig", + "VertexModelDeployerFlavor", + "VertexModelDeployerConfig", ] diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py new file mode 100644 index 00000000000..cb798cc19c8 --- /dev/null +++ b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py @@ -0,0 +1,149 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Vertex AI model deployer flavor.""" + +from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Type + +from pydantic import BaseModel + +from zenml.integrations.gcp import VERTEX_MODEL_DEPLOYER_FLAVOR +from zenml.model_deployers.base_model_deployer import ( + BaseModelDeployerConfig, + BaseModelDeployerFlavor, +) +from zenml.utils.secret_utils import SecretField + +if TYPE_CHECKING: + from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( + VertexModelDeployer, + ) + + +class VertexBaseConfig(BaseModel): + """Vertex AI Inference Endpoint configuration.""" + + location: Optional[str] = None + version: Optional[str] = None + serving_container_image_uri: Optional[str] = None + artifact_uri: Optional[str] = None + model_id: Optional[str] = None + is_default_version: Optional[bool] = None + serving_container_command: Optional[Sequence[str]] = None, + serving_container_args: Optional[Sequence[str]] = None, + serving_container_environment_variables: Optional[ + Dict[str, str] + ] = None, + serving_container_ports: Optional[Sequence[int]] = None, + serving_container_grpc_ports: Optional[Sequence[int]] = None, + deployed_model_display_name: Optional[str] = None + traffic_percentage: Optional[int] = 0 + traffic_split: Optional[Dict[str, int]] = None + machine_type: Optional[str] = None + accelerator_type: Optional[str] = None + accelerator_count: Optional[int] = None + min_replica_count: Optional[int] = None + max_replica_count: Optional[int] = None + service_account: Optional[str] = None + metadata: Optional[Dict[str, str]] = None + network: Optional[str] = None + encryption_spec_key_name: Optional[str] = None + sync=True, + deploy_request_timeout: Optional[int] = None + autoscaling_target_cpu_utilization: Optional[float] = None + autoscaling_target_accelerator_duty_cycle: Optional[float] = None + enable_access_logging: Optional[bool] = None + disable_container_logging: Optional[bool] = None + + + + + +class VertexModelDeployerConfig( + BaseModelDeployerConfig, VertexBaseConfig +): + """Configuration for the Vertex AI model deployer. + + Attributes: + model_name: The name of the model. + project_id: The project ID. + location: The location of the model. + version: The version of the model. + """ + + # The namespace to list endpoints for. Set to `"*"` to list all endpoints + # from all namespaces (i.e. personal namespace and all orgs the user belongs to). + model_name: str + + + +class VertexModelDeployerFlavor(BaseModelDeployerFlavor): + """Vertex AI Endpoint model deployer flavor.""" + + @property + def name(self) -> str: + """Name of the flavor. + + Returns: + The name of the flavor. + """ + return VERTEX_MODEL_DEPLOYER_FLAVOR + + @property + def docs_url(self) -> Optional[str]: + """A url to point at docs explaining this flavor. + + Returns: + A flavor docs url. + """ + return self.generate_default_docs_url() + + @property + def sdk_docs_url(self) -> Optional[str]: + """A url to point at SDK docs explaining this flavor. + + Returns: + A flavor SDK docs url. + """ + return self.generate_default_sdk_docs_url() + + @property + def logo_url(self) -> str: + """A url to represent the flavor in the dashboard. + + Returns: + The flavor logo. + """ + return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/model_registry/vertexai.png" + + @property + def config_class(self) -> Type[VertexModelDeployerConfig]: + """Returns `VertexModelDeployerConfig` config class. + + Returns: + The config class. + """ + return VertexModelDeployerConfig + + @property + def implementation_class(self) -> Type["VertexModelDeployer"]: + """Implementation class for this flavor. + + Returns: + The implementation class. + """ + from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( + VertexModelDeployer, + ) + + return VertexModelDeployer diff --git a/src/zenml/integrations/gcp/model_deployers/__init__.py b/src/zenml/integrations/gcp/model_deployers/__init__.py new file mode 100644 index 00000000000..99ee319f891 --- /dev/null +++ b/src/zenml/integrations/gcp/model_deployers/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) ZenML GmbH 2023. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Initialization of the Vertex AI model deployers.""" + +from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( # noqa + VertexMdelDeployer, +) + +__all__ = ["VertexMdelDeployer"] diff --git a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py new file mode 100644 index 00000000000..35a20890a0e --- /dev/null +++ b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py @@ -0,0 +1,242 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Implementation of the Vertex AI Model Deployer.""" + +from typing import ClassVar, Dict, Optional, Tuple, Type, cast +from uuid import UUID + +from zenml.analytics.enums import AnalyticsEvent +from zenml.analytics.utils import track_handler +from zenml.client import Client +from zenml.integrations.gcp import VERTEX_SERVICE_ARTIFACT +from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( + VertexModelDeployerConfig, + VertexModelDeployerFlavor, +) +from zenml.integrations.gcp.google_credentials_mixin import GoogleCredentialsMixin +from zenml.integrations.gcp.services.vertex_deployment import ( + VertexDeploymentService, + VertexServiceConfig, +) +from zenml.logger import get_logger +from zenml.model_deployers import BaseModelDeployer +from zenml.model_deployers.base_model_deployer import ( + DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, + BaseModelDeployerFlavor, +) +from zenml.services import BaseService, ServiceConfig +from zenml.stack.stack import Stack +from zenml.stack.stack_validator import StackValidator + +logger = get_logger(__name__) + +class VertexModelDeployer(BaseModelDeployer, GoogleCredentialsMixin): + """Vertex implementation of the BaseModelDeployer.""" + + @property + def config(self) -> VertexModelDeployerConfig: + """Config class for the Vertex AI Model deployer settings class. + + Returns: + The configuration. + """ + return cast(VertexModelDeployerConfig, self._config) + + @property + def validator(self) -> Optional[StackValidator]: + """Validates the stack. + + Returns: + A validator that checks that the stack contains a remote artifact + store. + """ + + def _validate_if_secret_or_token_is_present( + stack: "Stack", + ) -> Tuple[bool, str]: + """Check if secret or token is present in the stack. + + Args: + stack: The stack to validate. + + Returns: + A tuple with a boolean indicating whether the stack is valid + and a message describing the validation result. + """ + return bool(self.config.token or self.config.secret_name), ( + "The Vertex AI model deployer requires either a secret name" + " or a token to be present in the stack." + ) + + return StackValidator( + custom_validation_function=_validate_if_secret_or_token_is_present, + ) + + def _create_new_service( + self, id: UUID, timeout: int, config: VertexServiceConfig + ) -> VertexDeploymentService: + """Creates a new VertexDeploymentService. + + Args: + id: the UUID of the model to be deployed with Vertex AI model deployer. + timeout: the timeout in seconds to wait for the Vertex AI inference endpoint + to be provisioned and successfully started or updated. + config: the configuration of the model to be deployed with Vertex AI model deployer. + + Returns: + The VertexServiceConfig object that can be used to interact + with the Vertex AI inference endpoint. + """ + # create a new service for the new model + service = VertexDeploymentService(uuid=id, config=config) + + logger.info( + f"Creating an artifact {VERTEX_SERVICE_ARTIFACT} with service instance attached as metadata." + " If there's an active pipeline and/or model this artifact will be associated with it." + ) + service.start(timeout=timeout) + return service + + def _clean_up_existing_service( + self, + timeout: int, + force: bool, + existing_service: VertexDeploymentService, + ) -> None: + """Stop existing services. + + Args: + timeout: the timeout in seconds to wait for the Vertex AI + deployment to be stopped. + force: if True, force the service to stop + existing_service: Existing Vertex AI deployment service + """ + # stop the older service + existing_service.stop(timeout=timeout, force=force) + + def perform_deploy_model( + self, + id: UUID, + config: ServiceConfig, + timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, + ) -> BaseService: + """Create a new Vertex AI deployment service or update an existing one. + + This should serve the supplied model and deployment configuration. + + Args: + id: the UUID of the model to be deployed with Vertex AI. + config: the configuration of the model to be deployed with Vertex AI. + timeout: the timeout in seconds to wait for the Vertex AI endpoint + to be provisioned and successfully started or updated. If set + to 0, the method will return immediately after the Vertex AI + server is provisioned, without waiting for it to fully start. + + Returns: + The ZenML Vertex AI deployment service object that can be used to + interact with the remote Vertex AI inference endpoint server. + """ + with track_handler(AnalyticsEvent.MODEL_DEPLOYED) as analytics_handler: + config = cast(VertexServiceConfig, config) + # create a new VertexDeploymentService instance + service = self._create_new_service( + id=id, timeout=timeout, config=config + ) + logger.info( + f"Creating a new Vertex AI inference endpoint service: {service}" + ) + # Add telemetry with metadata that gets the stack metadata and + # differentiates between pure model and custom code deployments + stack = Client().active_stack + stack_metadata = { + component_type.value: component.flavor + for component_type, component in stack.components.items() + } + analytics_handler.metadata = { + "store_type": Client().zen_store.type.value, + **stack_metadata, + } + + return service + + def perform_stop_model( + self, + service: BaseService, + timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, + force: bool = False, + ) -> BaseService: + """Method to stop a model server. + + Args: + service: The service to stop. + timeout: Timeout in seconds to wait for the service to stop. + force: If True, force the service to stop. + + Returns: + The stopped service. + """ + service.stop(timeout=timeout, force=force) + return service + + def perform_start_model( + self, + service: BaseService, + timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, + ) -> BaseService: + """Method to start a model server. + + Args: + service: The service to start. + timeout: Timeout in seconds to wait for the service to start. + + Returns: + The started service. + """ + service.start(timeout=timeout) + return service + + def perform_delete_model( + self, + service: BaseService, + timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, + force: bool = False, + ) -> None: + """Method to delete all configuration of a model server. + + Args: + service: The service to delete. + timeout: Timeout in seconds to wait for the service to stop. + force: If True, force the service to stop. + """ + service = cast(VertexDeploymentService, service) + self._clean_up_existing_service( + existing_service=service, timeout=timeout, force=force + ) + + @staticmethod + def get_model_server_info( # type: ignore[override] + service_instance: "VertexDeploymentService", + ) -> Dict[str, Optional[str]]: + """Return implementation specific information that might be relevant to the user. + + Args: + service_instance: Instance of a VertexDeploymentService + + Returns: + Model server information. + """ + return { + "PREDICTION_URL": service_instance.get_prediction_url(), + "HEALTH_CHECK_URL": service_instance.get_healthcheck_url(), + } diff --git a/src/zenml/integrations/gcp/services/__init__.py b/src/zenml/integrations/gcp/services/__init__.py new file mode 100644 index 00000000000..b9f858b5302 --- /dev/null +++ b/src/zenml/integrations/gcp/services/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) ZenML GmbH 2023. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Initialization of the MLflow Service.""" + +from zenml.integrations.mlflow.services.mlflow_deployment import ( # noqa + MLFlowDeploymentConfig, + MLFlowDeploymentService, +) diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py new file mode 100644 index 00000000000..98df9d28e46 --- /dev/null +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -0,0 +1,263 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Implementation of the Vertex AI Deployment service.""" + +from typing import TYPE_CHECKING, Any, Generator, Optional, Tuple, cast + +from pydantic import Field + +from google.cloud import aiplatform + +from zenml.client import Client +from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( + VertexBaseConfig, +) +from zenml.logger import get_logger +from zenml.services import ServiceState, ServiceStatus, ServiceType +from zenml.services.service import BaseDeploymentService, ServiceConfig + +if TYPE_CHECKING: + from google.auth.credentials import Credentials + +logger = get_logger(__name__) + +POLLING_TIMEOUT = 1200 +UUID_SLICE_LENGTH: int = 8 + + +class VertexServiceConfig(VertexBaseConfig, ServiceConfig): + """Vertex AI service configurations.""" + + +class VertexServiceStatus(ServiceStatus): + """Vertex AI service status.""" + + +class VertexDeploymentService(BaseDeploymentService): + """Vertex AI model deployment service. + + Attributes: + SERVICE_TYPE: a service type descriptor with information describing + the Vertex AI deployment service class + config: service configuration + """ + + SERVICE_TYPE = ServiceType( + name="vertex-deployment", + type="model-serving", + flavor="vertex", + description="Vertex AI inference endpoint prediction service", + ) + config: VertexServiceConfig + status: VertexServiceStatus = Field( + default_factory=lambda: VertexServiceStatus() + ) + + def __init__(self, config: VertexServiceConfig, credentials: Tuple["Credentials", str], **attrs: Any): + """Initialize the Vertex AI deployment service. + + Args: + config: service configuration + attrs: additional attributes to set on the service + """ + super().__init__(config=config, **attrs) + self._config = config + self._project, self._credentials = credentials # Store credentials as a private attribute + + @property + def config(self) -> VertexServiceConfig: + """Returns the config of the deployment service. + + Returns: + The config of the deployment service. + """ + return cast(VertexServiceConfig, self._config) + + def get_token(self) -> str: + """Get the Vertex AI token. + + Raises: + ValueError: If token not found. + + Returns: + Vertex AI token. + """ + client = Client() + token = None + if self.config.secret_name: + secret = client.get_secret(self.config.secret_name) + token = secret.secret_values["token"] + else: + from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( + VertexModelDeployer, + ) + + model_deployer = client.active_stack.model_deployer + if not isinstance(model_deployer, VertexModelDeployer): + raise ValueError( + "VertexModelDeployer is not active in the stack." + ) + token = model_deployer.config.token or None + if not token: + raise ValueError("Token not found.") + return token + + @property + def vertex_model(self) -> aiplatform.Model: + """Get the deployed Vertex AI inference endpoint. + + Returns: + Vertex AI inference endpoint. + """ + return aiplatform.Model(f"projects/{self.__project}/locations/{self.config.location}/models/{self.config.model_id}") + + @property + def prediction_url(self) -> Optional[str]: + """The prediction URI exposed by the prediction service. + + Returns: + The prediction URI exposed by the prediction service, or None if + the service is not yet ready. + """ + return self.hf_endpoint.url if self.is_running else None + + def provision(self) -> None: + """Provision or update remote Vertex AI deployment instance. + + Raises: + Exception: If any unexpected error while creating inference endpoint. + """ + try: + # Attempt to create and wait for the inference endpoint + vertex_endpoint = self.vertex_model.deploy( + deployed_model_display_name=self.config.deployed_model_display_name, + traffic_percentage=self.config.traffic_percentage, + traffic_split=self.config.traffic_split, + machine_type=self.config.machine_type, + min_replica_count=self.config.min_replica_count, + max_replica_count=self.config.max_replica_count, + accelerator_type=self.config.accelerator_type, + accelerator_count=self.config.accelerator_count, + service_account=self.config.service_account, + metadata=self.config.metadata, + deploy_request_timeout=self.config.deploy_request_timeout, + autoscaling_target_cpu_utilization=self.config.autoscaling_target_cpu_utilization, + autoscaling_target_accelerator_duty_cycle=self.config.autoscaling_target_accelerator_duty_cycle, + enable_access_logging=self.config.enable_access_logging, + disable_container_logging=self.config.disable_container_logging, + encryption_spec_key_name=self.config.encryption_spec_key_name, + deploy_request_timeout=self.config.deploy_request_timeout, + ) + + except Exception as e: + self.status.update_state( + new_state=ServiceState.ERROR, error=str(e) + ) + # Catch-all for any other unexpected errors + raise Exception( + f"An unexpected error occurred while provisioning the Vertex AI inference endpoint: {e}" + ) + + # Check if the endpoint URL is available after provisioning + if hf_endpoint.url: + logger.info( + f"Vertex AI inference endpoint successfully deployed and available. Endpoint URL: {hf_endpoint.url}" + ) + else: + logger.error( + "Failed to start Vertex AI inference endpoint service: No URL available, please check the Vertex AI console for more details." + ) + + def check_status(self) -> Tuple[ServiceState, str]: + """Check the the current operational state of the Vertex AI deployment. + + Returns: + The operational state of the Vertex AI deployment and a message + providing additional information about that state (e.g. a + description of the error, if one is encountered). + """ + pass + + def deprovision(self, force: bool = False) -> None: + """Deprovision the remote Vertex AI deployment instance. + + Args: + force: if True, the remote deployment instance will be + forcefully deprovisioned. + """ + try: + self.vertex_model.undeploy() + except HfHubHTTPError: + logger.error( + "Vertex AI Inference Endpoint is deleted or cannot be found." + ) + + def predict(self, data: "Any", max_new_tokens: int) -> "Any": + """Make a prediction using the service. + + Args: + data: input data + max_new_tokens: Number of new tokens to generate + + Returns: + The prediction result. + + Raises: + Exception: if the service is not running + NotImplementedError: if task is not supported. + """ + if not self.is_running: + raise Exception( + "Vertex AI endpoint inference service is not running. " + "Please start the service before making predictions." + ) + if self.prediction_url is not None: + if self.hf_endpoint.task == "text-generation": + result = self.inference_client.task_generation( + data, max_new_tokens=max_new_tokens + ) + else: + # TODO: Add support for all different supported tasks + raise NotImplementedError( + "Tasks other than text-generation is not implemented." + ) + return result + + def get_logs( + self, follow: bool = False, tail: Optional[int] = None + ) -> Generator[str, bool, None]: + """Retrieve the service logs. + + Args: + follow: if True, the logs will be streamed as they are written + tail: only retrieve the last NUM lines of log output. + + Returns: + A generator that can be accessed to get the service logs. + """ + logger.info( + "Vertex AI Endpoints provides access to the logs of " + "your Endpoints through the UI in the “Logs” tab of your Endpoint" + ) + return # type: ignore + + def _generate_an_endpoint_name(self) -> str: + """Generate a unique name for the Vertex AI Inference Endpoint. + + Returns: + A unique name for the Vertex AI Inference Endpoint. + """ + return ( + f"{self.config.service_name}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" + ) From c03f2a01a0a50a536195c4d0386936a1a08dcbce Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 6 Jun 2024 08:06:03 +0100 Subject: [PATCH 02/43] vertex model --- .../model_registries/vertex_model_registry.py | 314 ++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 src/zenml/integrations/gcp/model_registries/vertex_model_registry.py diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py new file mode 100644 index 00000000000..48403965ca5 --- /dev/null +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -0,0 +1,314 @@ +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple, cast + +from google.cloud import aiplatform +from google.cloud.aiplatform import Model, ModelRegistry, ModelVersion +from google.cloud.aiplatform.exceptions import NotFound + +from zenml.enums import StackComponentType +from zenml.stack import Flavor, StackComponent +from zenml.stack.stack_component import StackComponentConfig +from zenml.model_registries.base_model_registry import ( + BaseModelRegistry, + ModelRegistryModelMetadata, + ModelVersionStage, + RegisteredModel, + RegistryModelVersion, +) +from zenml.stack.stack_validator import StackValidator +from zenml.logger import get_logger + +logger = get_logger(__name__) + +class VertexAIModelRegistry(BaseModelRegistry): + """Register models using Vertex AI.""" + + def __init__(self): + super().__init__() + aiplatform.init() # Initialize the Vertex AI SDK + + @property + def config(self) -> StackComponentConfig: + """Returns the config of the model registries.""" + return cast(StackComponentConfig, self._config) + + def register_model( + self, + name: str, + description: Optional[str] = None, + metadata: Optional[Dict[str, str]] = None, + ) -> RegisteredModel: + """Register a model to the Vertex AI model registry.""" + try: + model = Model( + display_name=name, + description=description, + labels=metadata + ) + model.upload() + return RegisteredModel(name=name, description=description, metadata=metadata) + except Exception as e: + raise RuntimeError(f"Failed to register model: {str(e)}") + + def delete_model( + self, + name: str, + ) -> None: + """Delete a model from the Vertex AI model registry.""" + try: + model = Model(model_name=name) + model.delete() + except NotFound: + raise KeyError(f"Model with name {name} does not exist.") + except Exception as e: + raise RuntimeError(f"Failed to delete model: {str(e)}") + + def update_model( + self, + name: str, + description: Optional[str] = None, + metadata: Optional[Dict[str, str]] = None, + remove_metadata: Optional[List[str]] = None, + ) -> RegisteredModel: + """Update a model in the Vertex AI model registry.""" + try: + model = Model(model_name=name) + if description: + model.update(description=description) + if metadata: + for key, value in metadata.items(): + model.labels[key] = value + if remove_metadata: + for key in remove_metadata: + if key in model.labels: + del model.labels[key] + model.update() + return self.get_model(name) + except NotFound: + raise KeyError(f"Model with name {name} does not exist.") + except Exception as e: + raise RuntimeError(f"Failed to update model: {str(e)}") + + def get_model(self, name: str) -> RegisteredModel: + """Get a model from the Vertex AI model registry.""" + try: + model = Model(model_name=name) + model_resource = model.gca_resource + return RegisteredModel( + name=model_resource.display_name, + description=model_resource.description, + metadata=model_resource.labels + ) + except NotFound: + raise KeyError(f"Model with name {name} does not exist.") + except Exception as e: + raise RuntimeError(f"Failed to get model: {str(e)}") + + def list_models( + self, + name: Optional[str] = None, + metadata: Optional[Dict[str, str]] = None, + ) -> List[RegisteredModel]: + """List models in the Vertex AI model registry.""" + filter_expression = "" + if name: + filter_expression += f"display_name={name}" + if metadata: + for key, value in metadata.items(): + filter_expression += f"labels.{key}={value} " + try: + models = Model.list(filter=filter_expression) + return [ + RegisteredModel( + name=model.display_name, + description=model.description, + metadata=model.labels + ) + for model in models + ] + except Exception as e: + raise RuntimeError(f"Failed to list models: {str(e)}") + + def register_model_version( + self, + name: str, + version: Optional[str] = None, + model_source_uri: Optional[str] = None, + description: Optional[str] = None, + metadata: Optional[ModelRegistryModelMetadata] = None, + **kwargs: Any, + ) -> RegistryModelVersion: + """Register a model version to the Vertex AI model registry.""" + try: + model = Model(model_name=name) + version_info = model.upload_version( + display_name=version, + description=description, + artifact_uri=model_source_uri, + labels=metadata.dict() if metadata else None + ) + return RegistryModelVersion( + version=version_info.version_id, + model_source_uri=model_source_uri, + model_format="Custom", + registered_model=self.get_model(name), + description=description, + created_at=version_info.create_time, + last_updated_at=version_info.update_time, + stage=ModelVersionStage.NONE, + metadata=metadata + ) + except NotFound: + raise KeyError(f"Model with name {name} does not exist.") + except Exception as e: + raise RuntimeError(f"Failed to register model version: {str(e)}") + + def delete_model_version( + self, + name: str, + version: str, + ) -> None: + """Delete a model version from the Vertex AI model registry.""" + try: + model = Model(model_name=name) + version_info = ModelVersion(model_name=f"{name}@{version}") + version_info.delete() + except NotFound: + raise KeyError(f"Model version {version} of model {name} does not exist.") + except Exception as e: + raise RuntimeError(f"Failed to delete model version: {str(e)}") + + def update_model_version( + self, + name: str, + version: str, + description: Optional[str] = None, + metadata: Optional[ModelRegistryModelMetadata] = None, + remove_metadata: Optional[List[str]] = None, + stage: Optional[ModelVersionStage] = None, + ) -> RegistryModelVersion: + """Update a model version in the Vertex AI model registry.""" + try: + model_version = ModelVersion(model_name=f"{name}@{version}") + if description: + model_version.update(description=description) + if metadata: + for key, value in metadata.dict().items(): + model_version.labels[key] = value + if remove_metadata: + for key in remove_metadata: + if key in model_version.labels: + del model_version.labels[key] + model_version.update() + if stage: + # Handle stage update if needed + pass + return self.get_model_version(name, version) + except NotFound: + raise KeyError(f"Model version {version} of model {name} does not exist.") + except Exception as e: + raise RuntimeError(f"Failed to update model version: {str(e)}") + + def get_model_version( + self, name: str, version: str + ) -> RegistryModelVersion: + """Get a model version from the Vertex AI model registry.""" + try: + model_version = ModelVersion(model_name=f"{name}@{version}") + return RegistryModelVersion( + version=model_version.version_id, + model_source_uri=model_version.gca_resource.artifact_uri, + model_format="Custom", + registered_model=self.get_model(name), + description=model_version.description, + created_at=model_version.create_time, + last_updated_at=model_version.update_time, + stage=ModelVersionStage.NONE, + metadata=ModelRegistryModelMetadata(**model_version.labels) + ) + except NotFound: + raise KeyError(f"Model version {version} of model {name} does not exist.") + except Exception as e: + raise RuntimeError(f"Failed to get model version: {str(e)}") + + def list_model_versions( + self, + name: Optional[str] = None, + model_source_uri: Optional[str] = None, + metadata: Optional[ModelRegistryModelMetadata] = None, + stage: Optional[ModelVersionStage] = None, + count: Optional[int] = None, + created_after: Optional[datetime] = None, + created_before: Optional[datetime] = None, + order_by_date: Optional[str] = None, + **kwargs: Any, + ) -> List[RegistryModelVersion]: + """List model versions from the Vertex AI model registry.""" + filter_expression = "" + if name: + filter_expression += f"display_name={name}" + if metadata: + for key, value in metadata.dict().items(): + filter_expression += f"labels.{key}={value} " + try: + model = Model(model_name=name) + versions = model.list_versions(filter=filter_expression) + return [ + RegistryModelVersion( + version=v.version_id, + model_source_uri=v.artifact_uri, + model_format="Custom", + registered_model=self.get_model(name), + description=v.description, + created_at=v.create_time, + + + last_updated_at=v.update_time, + stage=ModelVersionStage.NONE, + metadata=ModelRegistryModelMetadata(**v.labels) + ) + for v in versions + ] + except Exception as e: + raise RuntimeError(f"Failed to list model versions: {str(e)}") + + def load_model_version( + self, + name: str, + version: str, + **kwargs: Any, + ) -> Any: + """Load a model version from the Vertex AI model registry.""" + try: + model_version = ModelVersion(model_name=f"{name}@{version}") + return model_version + except NotFound: + raise KeyError(f"Model version {version} of model {name} does not exist.") + except Exception as e: + raise RuntimeError(f"Failed to load model version: {str(e)}") + + def get_model_uri_artifact_store( + self, + model_version: RegistryModelVersion, + ) -> str: + """Get the model URI artifact store.""" + return model_version.model_source_uri + + +class VertexAIModelRegistryFlavor(Flavor): + """Base class for all ZenML model registry flavors.""" + + @property + def type(self) -> StackComponentType: + """Type of the flavor.""" + return StackComponentType.MODEL_REGISTRY + + @property + def config_class(self) -> Type[StackComponentConfig]: + """Config class for this flavor.""" + return StackComponentConfig + + @property + def implementation_class(self) -> Type[StackComponent]: + """Returns the implementation class for this flavor.""" + return VertexAIModelRegistry From 4eeeb277cd2d76cfbdc178234e37cf22ce8acf75 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Mon, 15 Jul 2024 13:52:27 +0100 Subject: [PATCH 03/43] vertex deployer --- .../integrations/gcp/model_deployers/vertex_model_deployer.py | 4 +++- src/zenml/integrations/gcp/services/vertex_deployment.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py index 35a20890a0e..f16167c25a8 100644 --- a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py +++ b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py @@ -24,7 +24,9 @@ VertexModelDeployerConfig, VertexModelDeployerFlavor, ) -from zenml.integrations.gcp.google_credentials_mixin import GoogleCredentialsMixin +from zenml.integrations.gcp.google_credentials_mixin import ( + GoogleCredentialsMixin, +) from zenml.integrations.gcp.services.vertex_deployment import ( VertexDeploymentService, VertexServiceConfig, diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 98df9d28e46..07fd8ed2260 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -170,7 +170,7 @@ def provision(self) -> None: ) # Check if the endpoint URL is available after provisioning - if hf_endpoint.url: + if vertex_endpoint. logger.info( f"Vertex AI inference endpoint successfully deployed and available. Endpoint URL: {hf_endpoint.url}" ) @@ -260,4 +260,4 @@ def _generate_an_endpoint_name(self) -> str: """ return ( f"{self.config.service_name}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" - ) + ) \ No newline at end of file From 7c0ca3f15eda0ae8db3324f251eb1569b19a6fdf Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 18 Sep 2024 13:26:07 +0100 Subject: [PATCH 04/43] vertex registry code --- src/zenml/integrations/gcp/__init__.py | 5 + .../integrations/gcp/flavors/__init__.py | 6 + .../flavors/vertex_model_deployer_flavor.py | 32 +- .../flavors/vertex_model_registry_flavor.py | 130 +++++++ .../gcp/model_deployers/__init__.py | 4 +- .../model_deployers/vertex_model_deployer.py | 174 ++++++---- .../gcp/model_registries/__init__.py | 20 ++ .../model_registries/vertex_model_registry.py | 239 +++++++------ .../integrations/gcp/services/__init__.py | 8 +- .../gcp/services/vertex_deployment.py | 322 +++++++++++------- 10 files changed, 600 insertions(+), 340 deletions(-) create mode 100644 src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py create mode 100644 src/zenml/integrations/gcp/model_registries/__init__.py diff --git a/src/zenml/integrations/gcp/__init__.py b/src/zenml/integrations/gcp/__init__.py index 43bf09edda8..3c9de9a9348 100644 --- a/src/zenml/integrations/gcp/__init__.py +++ b/src/zenml/integrations/gcp/__init__.py @@ -34,6 +34,7 @@ GCP_VERTEX_STEP_OPERATOR_FLAVOR = "vertex" # Model deployer constants +VERTEX_MODEL_REGISTRY_FLAVOR = "vertex" VERTEX_MODEL_DEPLOYER_FLAVOR = "vertex" VERTEX_SERVICE_ARTIFACT = "vertex_deployment_service" @@ -76,6 +77,8 @@ def flavors(cls) -> List[Type[Flavor]]: GCPImageBuilderFlavor, VertexOrchestratorFlavor, VertexStepOperatorFlavor, + VertexModelDeployerFlavor, + VertexAIModelRegistryFlavor, ) return [ @@ -83,6 +86,8 @@ def flavors(cls) -> List[Type[Flavor]]: GCPImageBuilderFlavor, VertexOrchestratorFlavor, VertexStepOperatorFlavor, + VertexAIModelRegistryFlavor, + VertexModelDeployerFlavor, ] diff --git a/src/zenml/integrations/gcp/flavors/__init__.py b/src/zenml/integrations/gcp/flavors/__init__.py index 1328ec75b2d..cecf637cefd 100644 --- a/src/zenml/integrations/gcp/flavors/__init__.py +++ b/src/zenml/integrations/gcp/flavors/__init__.py @@ -33,6 +33,10 @@ VertexModelDeployerConfig, VertexModelDeployerFlavor, ) +from zenml.integrations.gcp.flavors.vertex_model_registry_flavor import ( + VertexAIModelRegistryConfig, + VertexAIModelRegistryFlavor, +) __all__ = [ "GCPArtifactStoreFlavor", @@ -45,4 +49,6 @@ "VertexStepOperatorConfig", "VertexModelDeployerFlavor", "VertexModelDeployerConfig", + "VertexAIModelRegistryFlavor", + "VertexAIModelRegistryConfig", ] diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py index cb798cc19c8..85d4bd52485 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py @@ -13,7 +13,7 @@ # permissions and limitations under the License. """Vertex AI model deployer flavor.""" -from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Type +from typing import TYPE_CHECKING, Dict, Optional, Sequence, Type from pydantic import BaseModel @@ -22,7 +22,6 @@ BaseModelDeployerConfig, BaseModelDeployerFlavor, ) -from zenml.utils.secret_utils import SecretField if TYPE_CHECKING: from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( @@ -39,13 +38,11 @@ class VertexBaseConfig(BaseModel): artifact_uri: Optional[str] = None model_id: Optional[str] = None is_default_version: Optional[bool] = None - serving_container_command: Optional[Sequence[str]] = None, - serving_container_args: Optional[Sequence[str]] = None, - serving_container_environment_variables: Optional[ - Dict[str, str] - ] = None, - serving_container_ports: Optional[Sequence[int]] = None, - serving_container_grpc_ports: Optional[Sequence[int]] = None, + serving_container_command: Optional[Sequence[str]] = None + serving_container_args: Optional[Sequence[str]] = None + serving_container_environment_variables: Optional[Dict[str, str]] = None + serving_container_ports: Optional[Sequence[int]] = None + serving_container_grpc_ports: Optional[Sequence[int]] = None deployed_model_display_name: Optional[str] = None traffic_percentage: Optional[int] = 0 traffic_split: Optional[Dict[str, int]] = None @@ -58,33 +55,26 @@ class VertexBaseConfig(BaseModel): metadata: Optional[Dict[str, str]] = None network: Optional[str] = None encryption_spec_key_name: Optional[str] = None - sync=True, + sync: Optional[bool] = True deploy_request_timeout: Optional[int] = None autoscaling_target_cpu_utilization: Optional[float] = None autoscaling_target_accelerator_duty_cycle: Optional[float] = None enable_access_logging: Optional[bool] = None disable_container_logging: Optional[bool] = None - - - -class VertexModelDeployerConfig( - BaseModelDeployerConfig, VertexBaseConfig -): +class VertexModelDeployerConfig(BaseModelDeployerConfig, VertexBaseConfig): """Configuration for the Vertex AI model deployer. Attributes: - model_name: The name of the model. project_id: The project ID. location: The location of the model. - version: The version of the model. """ # The namespace to list endpoints for. Set to `"*"` to list all endpoints # from all namespaces (i.e. personal namespace and all orgs the user belongs to). - model_name: str - + project_id: str + location: Optional[str] = None class VertexModelDeployerFlavor(BaseModelDeployerFlavor): @@ -124,7 +114,7 @@ def logo_url(self) -> str: Returns: The flavor logo. """ - return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/model_registry/vertexai.png" + return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/artifact_store/gcp.png" @property def config_class(self) -> Type[VertexModelDeployerConfig]: diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py new file mode 100644 index 00000000000..1c13e95a95e --- /dev/null +++ b/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py @@ -0,0 +1,130 @@ +# Copyright (c) ZenML GmbH 2023. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""VertexAI model registry flavor.""" + +from typing import TYPE_CHECKING, Optional, Type + +from zenml.config.base_settings import BaseSettings +from zenml.integrations.gcp import ( + GCP_RESOURCE_TYPE, + VERTEX_MODEL_REGISTRY_FLAVOR +) +from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( + VertexBaseConfig, +) +from zenml.integrations.gcp.google_credentials_mixin import ( + GoogleCredentialsConfigMixin, +) +from zenml.models import ServiceConnectorRequirements +from zenml.model_registries.base_model_registry import ( + BaseModelRegistryConfig, + BaseModelRegistryFlavor, +) + +if TYPE_CHECKING: + from zenml.integrations.gcp.model_registries import ( + VertexAIModelRegistry, + ) + +class VertexAIModelRegistrySettings(BaseSettings): + """Settings for the VertexAI model registry.""" + + location: str + + +class VertexAIModelRegistryConfig( + BaseModelRegistryConfig, + GoogleCredentialsConfigMixin, + VertexAIModelRegistrySettings +): + """Configuration for the VertexAI model registry.""" + + +class VertexAIModelRegistryFlavor(BaseModelRegistryFlavor): + """Model registry flavor for VertexAI models.""" + + @property + def name(self) -> str: + """Name of the flavor. + + Returns: + The name of the flavor. + """ + return VERTEX_MODEL_REGISTRY_FLAVOR + + @property + def service_connector_requirements( + self, + ) -> Optional[ServiceConnectorRequirements]: + """Service connector resource requirements for service connectors. + + Specifies resource requirements that are used to filter the available + service connector types that are compatible with this flavor. + + Returns: + Requirements for compatible service connectors, if a service + connector is required for this flavor. + """ + return ServiceConnectorRequirements( + resource_type=GCP_RESOURCE_TYPE, + ) + + @property + def docs_url(self) -> Optional[str]: + """A url to point at docs explaining this flavor. + + Returns: + A flavor docs url. + """ + return self.generate_default_docs_url() + + @property + def sdk_docs_url(self) -> Optional[str]: + """A url to point at SDK docs explaining this flavor. + + Returns: + A flavor SDK docs url. + """ + return self.generate_default_sdk_docs_url() + + @property + def logo_url(self) -> str: + """A url to represent the flavor in the dashboard. + + Returns: + The flavor logo. + """ + return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/artifact_store/gcp.png" + + @property + def config_class(self) -> Type[VertexAIModelRegistryConfig]: + """Returns `VertexAIModelRegistryConfig` config class. + + Returns: + The config class. + """ + return VertexAIModelRegistryConfig + + @property + def implementation_class(self) -> Type["VertexAIModelRegistry"]: + """Implementation class for this flavor. + + Returns: + The implementation class. + """ + from zenml.integrations.gcp.model_registries import ( + VertexAIModelRegistry, + ) + + return VertexAIModelRegistry diff --git a/src/zenml/integrations/gcp/model_deployers/__init__.py b/src/zenml/integrations/gcp/model_deployers/__init__.py index 99ee319f891..203f57c096f 100644 --- a/src/zenml/integrations/gcp/model_deployers/__init__.py +++ b/src/zenml/integrations/gcp/model_deployers/__init__.py @@ -14,7 +14,7 @@ """Initialization of the Vertex AI model deployers.""" from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( # noqa - VertexMdelDeployer, + VertexModelDeployer, ) -__all__ = ["VertexMdelDeployer"] +__all__ = ["VertexModelDeployer"] diff --git a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py index f16167c25a8..17ad388588d 100644 --- a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py +++ b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py @@ -13,7 +13,7 @@ # permissions and limitations under the License. """Implementation of the Vertex AI Model Deployer.""" -from typing import ClassVar, Dict, Optional, Tuple, Type, cast +from typing import ClassVar, Dict, List, Optional, Tuple, Type, cast from uuid import UUID from zenml.analytics.enums import AnalyticsEvent @@ -24,9 +24,6 @@ VertexModelDeployerConfig, VertexModelDeployerFlavor, ) -from zenml.integrations.gcp.google_credentials_mixin import ( - GoogleCredentialsMixin, -) from zenml.integrations.gcp.services.vertex_deployment import ( VertexDeploymentService, VertexServiceConfig, @@ -43,12 +40,18 @@ logger = get_logger(__name__) -class VertexModelDeployer(BaseModelDeployer, GoogleCredentialsMixin): - """Vertex implementation of the BaseModelDeployer.""" + +class VertexModelDeployer(BaseModelDeployer): + """Vertex AI endpoint model deployer.""" + + NAME: ClassVar[str] = "Vertex AI" + FLAVOR: ClassVar[Type["BaseModelDeployerFlavor"]] = ( + VertexModelDeployerFlavor + ) @property def config(self) -> VertexModelDeployerConfig: - """Config class for the Vertex AI Model deployer settings class. + """Returns the `VertexModelDeployerConfig` config. Returns: The configuration. @@ -60,14 +63,13 @@ def validator(self) -> Optional[StackValidator]: """Validates the stack. Returns: - A validator that checks that the stack contains a remote artifact - store. + A validator that checks that the stack contains required GCP components. """ - def _validate_if_secret_or_token_is_present( + def _validate_gcp_stack( stack: "Stack", ) -> Tuple[bool, str]: - """Check if secret or token is present in the stack. + """Check if GCP components are properly configured in the stack. Args: stack: The stack to validate. @@ -76,33 +78,34 @@ def _validate_if_secret_or_token_is_present( A tuple with a boolean indicating whether the stack is valid and a message describing the validation result. """ - return bool(self.config.token or self.config.secret_name), ( - "The Vertex AI model deployer requires either a secret name" - " or a token to be present in the stack." - ) + if not self.config.project_id or not self.config.location: + return False, ( + "The Vertex AI model deployer requires a GCP project and " + "location to be specified in the configuration." + ) + return True, "Stack is valid for Vertex AI model deployment." return StackValidator( - custom_validation_function=_validate_if_secret_or_token_is_present, + custom_validation_function=_validate_gcp_stack, ) - def _create_new_service( - self, id: UUID, timeout: int, config: VertexServiceConfig + def _create_deployment_service( + self, id: UUID, timeout: int, config: VertexModelDeployerConfig ) -> VertexDeploymentService: - """Creates a new VertexDeploymentService. + """Creates a new DatabricksDeploymentService. Args: - id: the UUID of the model to be deployed with Vertex AI model deployer. - timeout: the timeout in seconds to wait for the Vertex AI inference endpoint + id: the UUID of the model to be deployed with Databricks model deployer. + timeout: the timeout in seconds to wait for the Databricks inference endpoint to be provisioned and successfully started or updated. - config: the configuration of the model to be deployed with Vertex AI model deployer. + config: the configuration of the model to be deployed with Databricks model deployer. Returns: - The VertexServiceConfig object that can be used to interact - with the Vertex AI inference endpoint. + The VertexModelDeployerConfig object that can be used to interact + with the Databricks inference endpoint. """ # create a new service for the new model service = VertexDeploymentService(uuid=id, config=config) - logger.info( f"Creating an artifact {VERTEX_SERVICE_ARTIFACT} with service instance attached as metadata." " If there's an active pipeline and/or model this artifact will be associated with it." @@ -110,66 +113,48 @@ def _create_new_service( service.start(timeout=timeout) return service - def _clean_up_existing_service( - self, - timeout: int, - force: bool, - existing_service: VertexDeploymentService, - ) -> None: - """Stop existing services. - - Args: - timeout: the timeout in seconds to wait for the Vertex AI - deployment to be stopped. - force: if True, force the service to stop - existing_service: Existing Vertex AI deployment service - """ - # stop the older service - existing_service.stop(timeout=timeout, force=force) - def perform_deploy_model( self, id: UUID, config: ServiceConfig, timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, ) -> BaseService: - """Create a new Vertex AI deployment service or update an existing one. - - This should serve the supplied model and deployment configuration. + """Deploy a model to Vertex AI. Args: - id: the UUID of the model to be deployed with Vertex AI. - config: the configuration of the model to be deployed with Vertex AI. - timeout: the timeout in seconds to wait for the Vertex AI endpoint - to be provisioned and successfully started or updated. If set - to 0, the method will return immediately after the Vertex AI - server is provisioned, without waiting for it to fully start. + id: the UUID of the service to be created. + config: the configuration of the model to be deployed. + timeout: the timeout for the deployment operation. Returns: - The ZenML Vertex AI deployment service object that can be used to - interact with the remote Vertex AI inference endpoint server. + The ZenML Vertex AI deployment service object. """ with track_handler(AnalyticsEvent.MODEL_DEPLOYED) as analytics_handler: config = cast(VertexServiceConfig, config) - # create a new VertexDeploymentService instance - service = self._create_new_service( - id=id, timeout=timeout, config=config - ) + service = self._create_deployment_service(id=id, config=config) logger.info( - f"Creating a new Vertex AI inference endpoint service: {service}" + f"Creating a new Vertex AI deployment service: {service}" ) - # Add telemetry with metadata that gets the stack metadata and - # differentiates between pure model and custom code deployments - stack = Client().active_stack + service.start(timeout=timeout) + + client = Client() + stack = client.active_stack stack_metadata = { component_type.value: component.flavor for component_type, component in stack.components.items() } analytics_handler.metadata = { - "store_type": Client().zen_store.type.value, + "store_type": client.zen_store.type.value, **stack_metadata, } + # Create a service artifact + client.create_artifact( + name=VERTEX_SERVICE_ARTIFACT, + artifact_store_id=client.active_stack.artifact_store.id, + producer=service, + ) + return service def perform_stop_model( @@ -178,7 +163,7 @@ def perform_stop_model( timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, force: bool = False, ) -> BaseService: - """Method to stop a model server. + """Stop a Vertex AI deployment service. Args: service: The service to stop. @@ -196,7 +181,7 @@ def perform_start_model( service: BaseService, timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, ) -> BaseService: - """Method to start a model server. + """Start a Vertex AI deployment service. Args: service: The service to start. @@ -214,7 +199,7 @@ def perform_delete_model( timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, force: bool = False, ) -> None: - """Method to delete all configuration of a model server. + """Delete a Vertex AI deployment service. Args: service: The service to delete. @@ -222,23 +207,66 @@ def perform_delete_model( force: If True, force the service to stop. """ service = cast(VertexDeploymentService, service) - self._clean_up_existing_service( - existing_service=service, timeout=timeout, force=force - ) + service.stop(timeout=timeout, force=force) + service.delete() @staticmethod - def get_model_server_info( # type: ignore[override] + def get_model_server_info( service_instance: "VertexDeploymentService", ) -> Dict[str, Optional[str]]: - """Return implementation specific information that might be relevant to the user. + """Get information about the deployed model server. Args: - service_instance: Instance of a VertexDeploymentService + service_instance: The VertexDeploymentService instance. Returns: - Model server information. + A dictionary containing information about the model server. """ return { - "PREDICTION_URL": service_instance.get_prediction_url(), + "PREDICTION_URL": service_instance.prediction_url, "HEALTH_CHECK_URL": service_instance.get_healthcheck_url(), } + + def find_model_server( + self, + running: Optional[bool] = None, + service_uuid: Optional[UUID] = None, + pipeline_name: Optional[str] = None, + run_name: Optional[str] = None, + pipeline_step_name: Optional[str] = None, + model_name: Optional[str] = None, + model_uri: Optional[str] = None, + model_version: Optional[str] = None, + ) -> List[BaseService]: + """Find deployed model servers in Vertex AI. + + Args: + running: Filter by running status. + service_uuid: Filter by service UUID. + pipeline_name: Filter by pipeline name. + run_name: Filter by run name. + pipeline_step_name: Filter by pipeline step name. + model_name: Filter by model name. + model_uri: Filter by model URI. + model_version: Filter by model version. + + Returns: + A list of services matching the given criteria. + """ + client = Client() + services = client.list_services( + service_type=VertexDeploymentService.SERVICE_TYPE, + running=running, + service_uuid=service_uuid, + pipeline_name=pipeline_name, + run_name=run_name, + pipeline_step_name=pipeline_step_name, + model_name=model_name, + model_uri=model_uri, + model_version=model_version, + ) + + return [ + VertexDeploymentService.from_model(service_model) + for service_model in services + ] diff --git a/src/zenml/integrations/gcp/model_registries/__init__.py b/src/zenml/integrations/gcp/model_registries/__init__.py new file mode 100644 index 00000000000..38622ef0da3 --- /dev/null +++ b/src/zenml/integrations/gcp/model_registries/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) ZenML GmbH 2023. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Initialization of the Vertex AI model deployers.""" + +from zenml.integrations.gcp.model_registries.vertex_model_registry import ( + VertexAIModelRegistry +) + +__all__ = ["VertexAIModelRegistry"] diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 48403965ca5..f3fd31d84d7 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -1,13 +1,30 @@ +# Copyright (c) ZenML GmbH 2023. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Vertex AI model registry integration for ZenML.""" + from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple, cast +from typing import Any, Dict, List, Optional, cast from google.cloud import aiplatform -from google.cloud.aiplatform import Model, ModelRegistry, ModelVersion -from google.cloud.aiplatform.exceptions import NotFound -from zenml.enums import StackComponentType -from zenml.stack import Flavor, StackComponent -from zenml.stack.stack_component import StackComponentConfig +from zenml.integrations.gcp.flavors.vertex_model_registry_flavor import ( + VertexAIModelRegistryConfig, +) +from zenml.integrations.gcp.google_credentials_mixin import ( + GoogleCredentialsMixin, +) +from zenml.logger import get_logger from zenml.model_registries.base_model_registry import ( BaseModelRegistry, ModelRegistryModelMetadata, @@ -15,22 +32,29 @@ RegisteredModel, RegistryModelVersion, ) -from zenml.stack.stack_validator import StackValidator -from zenml.logger import get_logger +from zenml.stack.stack_component import StackComponentConfig logger = get_logger(__name__) -class VertexAIModelRegistry(BaseModelRegistry): - """Register models using Vertex AI.""" - def __init__(self): - super().__init__() - aiplatform.init() # Initialize the Vertex AI SDK +class VertexAIModelRegistry(BaseModelRegistry, GoogleCredentialsMixin): + """Register models using Vertex AI.""" @property - def config(self) -> StackComponentConfig: - """Returns the config of the model registries.""" - return cast(StackComponentConfig, self._config) + def config(self) -> VertexAIModelRegistryConfig: + """Returns the config of the model registry. + + Returns: + The configuration. + """ + return cast(VertexAIModelRegistryConfig, self._config) + + def setup_aiplatform(self) -> None: + """Setup the Vertex AI platform.""" + credentials, project_id = self._get_authentication() + aiplatform.init( + project=project_id, location=self.config.location, credentials=credentials + ) def register_model( self, @@ -40,13 +64,16 @@ def register_model( ) -> RegisteredModel: """Register a model to the Vertex AI model registry.""" try: - model = Model( + model = aiplatform.Model.upload( display_name=name, description=description, - labels=metadata + labels=metadata, + serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest", # Placeholder + ) + breakpoint() + return RegisteredModel( + name=name, description=description, metadata=metadata ) - model.upload() - return RegisteredModel(name=name, description=description, metadata=metadata) except Exception as e: raise RuntimeError(f"Failed to register model: {str(e)}") @@ -56,10 +83,8 @@ def delete_model( ) -> None: """Delete a model from the Vertex AI model registry.""" try: - model = Model(model_name=name) + model = aiplatform.Model(model_name=name) model.delete() - except NotFound: - raise KeyError(f"Model with name {name} does not exist.") except Exception as e: raise RuntimeError(f"Failed to delete model: {str(e)}") @@ -72,35 +97,28 @@ def update_model( ) -> RegisteredModel: """Update a model in the Vertex AI model registry.""" try: - model = Model(model_name=name) + model = aiplatform.Model(model_name=name) if description: - model.update(description=description) + model.description = description if metadata: - for key, value in metadata.items(): - model.labels[key] = value + model.labels.update(metadata) if remove_metadata: for key in remove_metadata: - if key in model.labels: - del model.labels[key] + model.labels.pop(key, None) model.update() return self.get_model(name) - except NotFound: - raise KeyError(f"Model with name {name} does not exist.") except Exception as e: raise RuntimeError(f"Failed to update model: {str(e)}") def get_model(self, name: str) -> RegisteredModel: """Get a model from the Vertex AI model registry.""" try: - model = Model(model_name=name) - model_resource = model.gca_resource + model = aiplatform.Model(model_name=name) return RegisteredModel( - name=model_resource.display_name, - description=model_resource.description, - metadata=model_resource.labels + name=model.name, + description=model.description, + metadata=model.labels, ) - except NotFound: - raise KeyError(f"Model with name {name} does not exist.") except Exception as e: raise RuntimeError(f"Failed to get model: {str(e)}") @@ -110,19 +128,22 @@ def list_models( metadata: Optional[Dict[str, str]] = None, ) -> List[RegisteredModel]: """List models in the Vertex AI model registry.""" - filter_expression = "" + filter_expr = [] if name: - filter_expression += f"display_name={name}" + filter_expr.append(f"display_name={name}") if metadata: for key, value in metadata.items(): - filter_expression += f"labels.{key}={value} " + filter_expr.append(f"labels.{key}={value}") + + filter_str = " AND ".join(filter_expr) if filter_expr else None + try: - models = Model.list(filter=filter_expression) + models = aiplatform.Model.list(filter=filter_str) return [ RegisteredModel( name=model.display_name, description=model.description, - metadata=model.labels + metadata=model.labels, ) for model in models ] @@ -139,27 +160,32 @@ def register_model_version( **kwargs: Any, ) -> RegistryModelVersion: """Register a model version to the Vertex AI model registry.""" + metadata_dict = metadata.model_dump() if metadata else {} + serving_container_image_uri = metadata_dict.get( + "serving_container_image_uri", None + ) + is_default_version = metadata_dict.get("is_default_version", False) + self.setup_aiplatform() try: - model = Model(model_name=name) - version_info = model.upload_version( - display_name=version, - description=description, + version_info = aiplatform.Model.upload( artifact_uri=model_source_uri, - labels=metadata.dict() if metadata else None + display_name=f"{name}_{version}", + serving_container_image_uri="europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest", + description=description, + is_default_version=is_default_version, + labels=metadata_dict, ) return RegistryModelVersion( version=version_info.version_id, model_source_uri=model_source_uri, - model_format="Custom", - registered_model=self.get_model(name), + model_format="Custom", # Vertex AI doesn't provide this info directly + registered_model=self.get_model(version_info.name), description=description, created_at=version_info.create_time, last_updated_at=version_info.update_time, - stage=ModelVersionStage.NONE, - metadata=metadata + stage=ModelVersionStage.NONE, # Vertex AI doesn't have built-in stages + metadata=metadata, ) - except NotFound: - raise KeyError(f"Model with name {name} does not exist.") except Exception as e: raise RuntimeError(f"Failed to register model version: {str(e)}") @@ -170,11 +196,10 @@ def delete_model_version( ) -> None: """Delete a model version from the Vertex AI model registry.""" try: - model = Model(model_name=name) - version_info = ModelVersion(model_name=f"{name}@{version}") - version_info.delete() - except NotFound: - raise KeyError(f"Model version {version} of model {name} does not exist.") + model_version = aiplatform.ModelVersion( + model_name=f"{name}@{version}" + ) + model_version.delete() except Exception as e: raise RuntimeError(f"Failed to delete model version: {str(e)}") @@ -189,23 +214,19 @@ def update_model_version( ) -> RegistryModelVersion: """Update a model version in the Vertex AI model registry.""" try: - model_version = ModelVersion(model_name=f"{name}@{version}") + model_version = aiplatform.ModelVersion( + model_name=f"{name}@{version}" + ) if description: - model_version.update(description=description) + model_version.description = description if metadata: - for key, value in metadata.dict().items(): - model_version.labels[key] = value + model_version.labels.update(metadata.dict()) if remove_metadata: for key in remove_metadata: - if key in model_version.labels: - del model_version.labels[key] + model_version.labels.pop(key, None) model_version.update() - if stage: - # Handle stage update if needed - pass + # Note: Vertex AI doesn't have built-in stages, so we ignore the 'stage' parameter return self.get_model_version(name, version) - except NotFound: - raise KeyError(f"Model version {version} of model {name} does not exist.") except Exception as e: raise RuntimeError(f"Failed to update model version: {str(e)}") @@ -214,20 +235,20 @@ def get_model_version( ) -> RegistryModelVersion: """Get a model version from the Vertex AI model registry.""" try: - model_version = ModelVersion(model_name=f"{name}@{version}") + model_version = aiplatform.ModelVersion( + model_name=f"{name}@{version}" + ) return RegistryModelVersion( version=model_version.version_id, - model_source_uri=model_version.gca_resource.artifact_uri, - model_format="Custom", + model_source_uri=model_version.artifact_uri, + model_format="Custom", # Vertex AI doesn't provide this info directly registered_model=self.get_model(name), description=model_version.description, created_at=model_version.create_time, last_updated_at=model_version.update_time, - stage=ModelVersionStage.NONE, - metadata=ModelRegistryModelMetadata(**model_version.labels) + stage=ModelVersionStage.NONE, # Vertex AI doesn't have built-in stages + metadata=ModelRegistryModelMetadata(**model_version.labels), ) - except NotFound: - raise KeyError(f"Model version {version} of model {name} does not exist.") except Exception as e: raise RuntimeError(f"Failed to get model version: {str(e)}") @@ -244,31 +265,48 @@ def list_model_versions( **kwargs: Any, ) -> List[RegistryModelVersion]: """List model versions from the Vertex AI model registry.""" - filter_expression = "" + filter_expr = [] if name: - filter_expression += f"display_name={name}" + filter_expr.append(f"display_name={name}") if metadata: for key, value in metadata.dict().items(): - filter_expression += f"labels.{key}={value} " + filter_expr.append(f"labels.{key}={value}") + if created_after: + filter_expr.append(f"create_time>{created_after.isoformat()}") + if created_before: + filter_expr.append(f"create_time<{created_before.isoformat()}") + + filter_str = " AND ".join(filter_expr) if filter_expr else None + try: - model = Model(model_name=name) - versions = model.list_versions(filter=filter_expression) - return [ + model = aiplatform.Model(model_name=name) + versions = model.list_versions(filter=filter_str) + + results = [ RegistryModelVersion( version=v.version_id, model_source_uri=v.artifact_uri, - model_format="Custom", + model_format="Custom", # Vertex AI doesn't provide this info directly registered_model=self.get_model(name), description=v.description, created_at=v.create_time, - - last_updated_at=v.update_time, - stage=ModelVersionStage.NONE, - metadata=ModelRegistryModelMetadata(**v.labels) + stage=ModelVersionStage.NONE, # Vertex AI doesn't have built-in stages + metadata=ModelRegistryModelMetadata(**v.labels), ) for v in versions ] + + if order_by_date: + results.sort( + key=lambda x: x.created_at, + reverse=(order_by_date.lower() == "desc"), + ) + + if count: + results = results[:count] + + return results except Exception as e: raise RuntimeError(f"Failed to list model versions: {str(e)}") @@ -280,10 +318,10 @@ def load_model_version( ) -> Any: """Load a model version from the Vertex AI model registry.""" try: - model_version = ModelVersion(model_name=f"{name}@{version}") + model_version = aiplatform.ModelVersion( + model_name=f"{name}@{version}" + ) return model_version - except NotFound: - raise KeyError(f"Model version {version} of model {name} does not exist.") except Exception as e: raise RuntimeError(f"Failed to load model version: {str(e)}") @@ -293,22 +331,3 @@ def get_model_uri_artifact_store( ) -> str: """Get the model URI artifact store.""" return model_version.model_source_uri - - -class VertexAIModelRegistryFlavor(Flavor): - """Base class for all ZenML model registry flavors.""" - - @property - def type(self) -> StackComponentType: - """Type of the flavor.""" - return StackComponentType.MODEL_REGISTRY - - @property - def config_class(self) -> Type[StackComponentConfig]: - """Config class for this flavor.""" - return StackComponentConfig - - @property - def implementation_class(self) -> Type[StackComponent]: - """Returns the implementation class for this flavor.""" - return VertexAIModelRegistry diff --git a/src/zenml/integrations/gcp/services/__init__.py b/src/zenml/integrations/gcp/services/__init__.py index b9f858b5302..a1b89b40ea7 100644 --- a/src/zenml/integrations/gcp/services/__init__.py +++ b/src/zenml/integrations/gcp/services/__init__.py @@ -13,7 +13,9 @@ # permissions and limitations under the License. """Initialization of the MLflow Service.""" -from zenml.integrations.mlflow.services.mlflow_deployment import ( # noqa - MLFlowDeploymentConfig, - MLFlowDeploymentService, +from zenml.integrations.gcp.services.vertex_deployment import ( # noqa + VertexServiceConfig, + VertexDeploymentService, ) + +__all__ = ["VertexServiceConfig", "VertexDeploymentService"] \ No newline at end of file diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 07fd8ed2260..93c69512173 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -13,36 +13,77 @@ # permissions and limitations under the License. """Implementation of the Vertex AI Deployment service.""" -from typing import TYPE_CHECKING, Any, Generator, Optional, Tuple, cast - -from pydantic import Field +import re +from typing import Any, Dict, Generator, List, Optional, Tuple +from google.api_core import exceptions from google.cloud import aiplatform +from pydantic import BaseModel, Field -from zenml.client import Client -from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( - VertexBaseConfig, -) +from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import VertexBaseConfig from zenml.logger import get_logger from zenml.services import ServiceState, ServiceStatus, ServiceType from zenml.services.service import BaseDeploymentService, ServiceConfig -if TYPE_CHECKING: - from google.auth.credentials import Credentials - logger = get_logger(__name__) POLLING_TIMEOUT = 1200 UUID_SLICE_LENGTH: int = 8 +def sanitize_labels(labels: Dict[str, str]) -> None: + """Update the label values to be valid Kubernetes labels. + + See: + https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set -class VertexServiceConfig(VertexBaseConfig, ServiceConfig): + Args: + labels: the labels to sanitize. + """ + for key, value in labels.items(): + # Kubernetes labels must be alphanumeric, no longer than + # 63 characters, and must begin and end with an alphanumeric + # character ([a-z0-9A-Z]) + labels[key] = re.sub(r"[^0-9a-zA-Z-_\.]+", "_", value)[:63].strip( + "-_." + ) + +class VertexAIDeploymentConfig(VertexBaseConfig, ServiceConfig): """Vertex AI service configurations.""" + def get_vertex_deployment_labels(self) -> Dict[str, str]: + """Generate labels for the VertexAI deployment from the service configuration. + + These labels are attached to the VertexAI deployment resource + and may be used as label selectors in lookup operations. + + Returns: + The labels for the VertexAI deployment. + """ + labels = {} + if self.pipeline_name: + labels["zenml_pipeline_name"] = self.pipeline_name + if self.pipeline_step_name: + labels["zenml_pipeline_step_name"] = self.pipeline_step_name + if self.model_name: + labels["zenml_model_name"] = self.model_name + if self.model_uri: + labels["zenml_model_uri"] = self.model_uri + sanitize_labels(labels) + return labels + + +class VertexPredictionServiceEndpoint(BaseModel): + """Vertex AI Prediction Service Endpoint.""" + + endpoint_name: str + endpoint_url: Optional[str] = None + class VertexServiceStatus(ServiceStatus): """Vertex AI service status.""" + endpoint: Optional[VertexPredictionServiceEndpoint] = None + class VertexDeploymentService(BaseDeploymentService): """Vertex AI model deployment service. @@ -59,12 +100,12 @@ class VertexDeploymentService(BaseDeploymentService): flavor="vertex", description="Vertex AI inference endpoint prediction service", ) - config: VertexServiceConfig + config: VertexAIDeploymentConfig status: VertexServiceStatus = Field( default_factory=lambda: VertexServiceStatus() ) - def __init__(self, config: VertexServiceConfig, credentials: Tuple["Credentials", str], **attrs: Any): + def __init__(self, config: VertexAIDeploymentConfig, **attrs: Any): """Initialize the Vertex AI deployment service. Args: @@ -72,55 +113,7 @@ def __init__(self, config: VertexServiceConfig, credentials: Tuple["Credentials" attrs: additional attributes to set on the service """ super().__init__(config=config, **attrs) - self._config = config - self._project, self._credentials = credentials # Store credentials as a private attribute - - @property - def config(self) -> VertexServiceConfig: - """Returns the config of the deployment service. - - Returns: - The config of the deployment service. - """ - return cast(VertexServiceConfig, self._config) - - def get_token(self) -> str: - """Get the Vertex AI token. - - Raises: - ValueError: If token not found. - - Returns: - Vertex AI token. - """ - client = Client() - token = None - if self.config.secret_name: - secret = client.get_secret(self.config.secret_name) - token = secret.secret_values["token"] - else: - from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( - VertexModelDeployer, - ) - - model_deployer = client.active_stack.model_deployer - if not isinstance(model_deployer, VertexModelDeployer): - raise ValueError( - "VertexModelDeployer is not active in the stack." - ) - token = model_deployer.config.token or None - if not token: - raise ValueError("Token not found.") - return token - - @property - def vertex_model(self) -> aiplatform.Model: - """Get the deployed Vertex AI inference endpoint. - - Returns: - Vertex AI inference endpoint. - """ - return aiplatform.Model(f"projects/{self.__project}/locations/{self.config.location}/models/{self.config.model_id}") + aiplatform.init(project=config.project, location=config.location) @property def prediction_url(self) -> Optional[str]: @@ -130,64 +123,67 @@ def prediction_url(self) -> Optional[str]: The prediction URI exposed by the prediction service, or None if the service is not yet ready. """ - return self.hf_endpoint.url if self.is_running else None + return ( + self.status.endpoint.endpoint_url if self.status.endpoint else None + ) - def provision(self) -> None: - """Provision or update remote Vertex AI deployment instance. + def get_endpoints(self) -> List[aiplatform.Endpoint]: + """Get all endpoints for the current project and location.""" + return aiplatform.Endpoint.list() - Raises: - Exception: If any unexpected error while creating inference endpoint. + def _generate_endpoint_name(self) -> str: + """Generate a unique name for the Vertex AI Inference Endpoint. + + Returns: + A unique name for the Vertex AI Inference Endpoint. """ + return f"{self.config.model_name}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" + + def provision(self) -> None: + """Provision or update remote Vertex AI deployment instance.""" try: - # Attempt to create and wait for the inference endpoint - vertex_endpoint = self.vertex_model.deploy( - deployed_model_display_name=self.config.deployed_model_display_name, - traffic_percentage=self.config.traffic_percentage, - traffic_split=self.config.traffic_split, + model = aiplatform.Model( + model_name=self.config.model_name, + version=self.config.model_version, + ) + + endpoint = aiplatform.Endpoint.create( + display_name=self._generate_endpoint_name() + ) + + deployment = endpoint.deploy( + model=model, machine_type=self.config.machine_type, min_replica_count=self.config.min_replica_count, max_replica_count=self.config.max_replica_count, accelerator_type=self.config.accelerator_type, accelerator_count=self.config.accelerator_count, service_account=self.config.service_account, - metadata=self.config.metadata, - deploy_request_timeout=self.config.deploy_request_timeout, - autoscaling_target_cpu_utilization=self.config.autoscaling_target_cpu_utilization, - autoscaling_target_accelerator_duty_cycle=self.config.autoscaling_target_accelerator_duty_cycle, - enable_access_logging=self.config.enable_access_logging, - disable_container_logging=self.config.disable_container_logging, + network=self.config.network, encryption_spec_key_name=self.config.encryption_spec_key_name, - deploy_request_timeout=self.config.deploy_request_timeout, + explanation_metadata=self.config.explanation_metadata, + explanation_parameters=self.config.explanation_parameters, + sync=True, ) - except Exception as e: - self.status.update_state( - new_state=ServiceState.ERROR, error=str(e) - ) - # Catch-all for any other unexpected errors - raise Exception( - f"An unexpected error occurred while provisioning the Vertex AI inference endpoint: {e}" + self.status.endpoint = VertexPredictionServiceEndpoint( + endpoint_name=endpoint.resource_name, + endpoint_url=endpoint.resource_name, ) + self.status.update_state(ServiceState.ACTIVE) - # Check if the endpoint URL is available after provisioning - if vertex_endpoint. logger.info( - f"Vertex AI inference endpoint successfully deployed and available. Endpoint URL: {hf_endpoint.url}" + f"Vertex AI inference endpoint successfully deployed. " + f"Endpoint: {endpoint.resource_name}" ) - else: - logger.error( - "Failed to start Vertex AI inference endpoint service: No URL available, please check the Vertex AI console for more details." - ) - - def check_status(self) -> Tuple[ServiceState, str]: - """Check the the current operational state of the Vertex AI deployment. - Returns: - The operational state of the Vertex AI deployment and a message - providing additional information about that state (e.g. a - description of the error, if one is encountered). - """ - pass + except Exception as e: + self.status.update_state( + new_state=ServiceState.ERROR, error=str(e) + ) + raise RuntimeError( + f"An error occurred while provisioning the Vertex AI inference endpoint: {e}" + ) def deprovision(self, force: bool = False) -> None: """Deprovision the remote Vertex AI deployment instance. @@ -196,43 +192,97 @@ def deprovision(self, force: bool = False) -> None: force: if True, the remote deployment instance will be forcefully deprovisioned. """ + if self.status.endpoint: + try: + endpoint = aiplatform.Endpoint( + endpoint_name=self.status.endpoint.endpoint_name + ) + endpoint.undeploy_all() + endpoint.delete(force=force) + self.status.endpoint = None + self.status.update_state(ServiceState.INACTIVE) + logger.info( + f"Vertex AI Inference Endpoint {self.status.endpoint.endpoint_name} has been deprovisioned." + ) + except exceptions.NotFound: + logger.warning( + f"Vertex AI Inference Endpoint {self.status.endpoint.endpoint_name} not found. It may have been already deleted." + ) + except Exception as e: + raise RuntimeError( + f"Failed to deprovision Vertex AI Inference Endpoint: {e}" + ) + + def check_status(self) -> Tuple[ServiceState, str]: + """Check the current operational state of the Vertex AI deployment. + + Returns: + The operational state of the Vertex AI deployment and a message + providing additional information about that state. + """ + if not self.status.endpoint: + return ServiceState.INACTIVE, "Endpoint not provisioned" + try: - self.vertex_model.undeploy() - except HfHubHTTPError: - logger.error( - "Vertex AI Inference Endpoint is deleted or cannot be found." + endpoint = aiplatform.Endpoint( + endpoint_name=self.status.endpoint.endpoint_name ) + deployments = endpoint.list_deployments() + + if not deployments: + return ServiceState.INACTIVE, "No active deployments" + + # Check the state of all deployments + for deployment in deployments: + if deployment.state == "ACTIVE": + return ServiceState.ACTIVE, "Deployment is active" + elif deployment.state == "DEPLOYING": + return ( + ServiceState.PENDING_STARTUP, + "Deployment is in progress", + ) + elif deployment.state in ["FAILED", "DELETING"]: + return ( + ServiceState.ERROR, + f"Deployment is in {deployment.state} state", + ) + + return ServiceState.INACTIVE, "No active deployments found" + + except exceptions.NotFound: + return ServiceState.INACTIVE, "Endpoint not found" + except Exception as e: + return ServiceState.ERROR, f"Error checking status: {str(e)}" - def predict(self, data: "Any", max_new_tokens: int) -> "Any": + def predict(self, instances: List[Any]) -> List[Any]: """Make a prediction using the service. Args: - data: input data - max_new_tokens: Number of new tokens to generate + instances: List of instances to predict. Returns: - The prediction result. + The prediction results. Raises: - Exception: if the service is not running - NotImplementedError: if task is not supported. + Exception: if the service is not running or prediction fails. """ if not self.is_running: raise Exception( "Vertex AI endpoint inference service is not running. " "Please start the service before making predictions." ) - if self.prediction_url is not None: - if self.hf_endpoint.task == "text-generation": - result = self.inference_client.task_generation( - data, max_new_tokens=max_new_tokens - ) - else: - # TODO: Add support for all different supported tasks - raise NotImplementedError( - "Tasks other than text-generation is not implemented." + + if not self.status.endpoint: + raise Exception("Endpoint information is missing.") + + try: + endpoint = aiplatform.Endpoint( + endpoint_name=self.status.endpoint.endpoint_name ) - return result + response = endpoint.predict(instances=instances) + return response.predictions + except Exception as e: + raise RuntimeError(f"Prediction failed: {str(e)}") def get_logs( self, follow: bool = False, tail: Optional[int] = None @@ -247,17 +297,27 @@ def get_logs( A generator that can be accessed to get the service logs. """ logger.info( - "Vertex AI Endpoints provides access to the logs of " - "your Endpoints through the UI in the “Logs” tab of your Endpoint" + "Vertex AI Endpoints provides access to the logs through " + "Cloud Logging. Please check the Google Cloud Console for detailed logs." ) - return # type: ignore + yield "Logs are available in Google Cloud Console." - def _generate_an_endpoint_name(self) -> str: - """Generate a unique name for the Vertex AI Inference Endpoint. + @property + def is_running(self) -> bool: + """Check if the service is running. Returns: - A unique name for the Vertex AI Inference Endpoint. + True if the service is in the ACTIVE state, False otherwise. """ - return ( - f"{self.config.service_name}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" - ) \ No newline at end of file + state, _ = self.check_status() + return state == ServiceState.ACTIVE + + def start(self) -> None: + """Start the Vertex AI deployment service.""" + if not self.is_running: + self.provision() + + def stop(self) -> None: + """Stop the Vertex AI deployment service.""" + if self.is_running: + self.deprovision() From 6769b6c98afb7fdb7bd9e945439ef77b1ceb2eec Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 18 Sep 2024 13:26:22 +0100 Subject: [PATCH 05/43] format --- .../flavors/vertex_model_registry_flavor.py | 20 +++++++++---------- .../model_registries/vertex_model_registry.py | 9 +++++---- .../gcp/services/vertex_deployment.py | 8 ++++++-- .../materializers/cloudpickle_materializer.py | 2 +- 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py index 1c13e95a95e..22adc0f6a5d 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py @@ -18,35 +18,33 @@ from zenml.config.base_settings import BaseSettings from zenml.integrations.gcp import ( GCP_RESOURCE_TYPE, - VERTEX_MODEL_REGISTRY_FLAVOR -) -from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( - VertexBaseConfig, + VERTEX_MODEL_REGISTRY_FLAVOR, ) from zenml.integrations.gcp.google_credentials_mixin import ( GoogleCredentialsConfigMixin, ) -from zenml.models import ServiceConnectorRequirements from zenml.model_registries.base_model_registry import ( BaseModelRegistryConfig, BaseModelRegistryFlavor, ) +from zenml.models import ServiceConnectorRequirements if TYPE_CHECKING: from zenml.integrations.gcp.model_registries import ( VertexAIModelRegistry, ) + class VertexAIModelRegistrySettings(BaseSettings): """Settings for the VertexAI model registry.""" - + location: str - + class VertexAIModelRegistryConfig( - BaseModelRegistryConfig, - GoogleCredentialsConfigMixin, - VertexAIModelRegistrySettings + BaseModelRegistryConfig, + GoogleCredentialsConfigMixin, + VertexAIModelRegistrySettings, ): """Configuration for the VertexAI model registry.""" @@ -79,7 +77,7 @@ def service_connector_requirements( return ServiceConnectorRequirements( resource_type=GCP_RESOURCE_TYPE, ) - + @property def docs_url(self) -> Optional[str]: """A url to point at docs explaining this flavor. diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index f3fd31d84d7..c46bb3956c2 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -32,7 +32,6 @@ RegisteredModel, RegistryModelVersion, ) -from zenml.stack.stack_component import StackComponentConfig logger = get_logger(__name__) @@ -43,17 +42,19 @@ class VertexAIModelRegistry(BaseModelRegistry, GoogleCredentialsMixin): @property def config(self) -> VertexAIModelRegistryConfig: """Returns the config of the model registry. - + Returns: The configuration. """ return cast(VertexAIModelRegistryConfig, self._config) - + def setup_aiplatform(self) -> None: """Setup the Vertex AI platform.""" credentials, project_id = self._get_authentication() aiplatform.init( - project=project_id, location=self.config.location, credentials=credentials + project=project_id, + location=self.config.location, + credentials=credentials, ) def register_model( diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 93c69512173..ccebdc09e75 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -20,7 +20,9 @@ from google.cloud import aiplatform from pydantic import BaseModel, Field -from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import VertexBaseConfig +from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( + VertexBaseConfig, +) from zenml.logger import get_logger from zenml.services import ServiceState, ServiceStatus, ServiceType from zenml.services.service import BaseDeploymentService, ServiceConfig @@ -30,6 +32,7 @@ POLLING_TIMEOUT = 1200 UUID_SLICE_LENGTH: int = 8 + def sanitize_labels(labels: Dict[str, str]) -> None: """Update the label values to be valid Kubernetes labels. @@ -46,7 +49,8 @@ def sanitize_labels(labels: Dict[str, str]) -> None: labels[key] = re.sub(r"[^0-9a-zA-Z-_\.]+", "_", value)[:63].strip( "-_." ) - + + class VertexAIDeploymentConfig(VertexBaseConfig, ServiceConfig): """Vertex AI service configurations.""" diff --git a/src/zenml/materializers/cloudpickle_materializer.py b/src/zenml/materializers/cloudpickle_materializer.py index 399ca7f2336..a6813cb4191 100644 --- a/src/zenml/materializers/cloudpickle_materializer.py +++ b/src/zenml/materializers/cloudpickle_materializer.py @@ -29,7 +29,7 @@ logger = get_logger(__name__) -DEFAULT_FILENAME = "artifact.pkl" +DEFAULT_FILENAME = "model.pkl" DEFAULT_PYTHON_VERSION_FILENAME = "python_version.txt" From 9a03f34522da92382f7048cf213fb56102d2d20e Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Fri, 20 Sep 2024 14:37:04 +0100 Subject: [PATCH 06/43] Refactor model registration and add URI parameter --- .../promotion/promote_with_metric_compare.py | 14 +++++++ examples/e2e/steps/training/model_trainer.py | 40 +++++++++---------- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/examples/e2e/steps/promotion/promote_with_metric_compare.py b/examples/e2e/steps/promotion/promote_with_metric_compare.py index d68409d2d54..fcd12935905 100644 --- a/examples/e2e/steps/promotion/promote_with_metric_compare.py +++ b/examples/e2e/steps/promotion/promote_with_metric_compare.py @@ -18,6 +18,7 @@ from utils import promote_in_model_registry from zenml import Model, get_step_context, step +from zenml.client import Client from zenml.logger import get_logger logger = get_logger(__name__) @@ -29,6 +30,7 @@ def promote_with_metric_compare( current_metric: float, mlflow_model_name: str, target_env: str, + uri: str, ) -> None: """Try to promote trained model. @@ -57,6 +59,18 @@ def promote_with_metric_compare( ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### should_promote = True + model_registry = Client().active_stack.model_registry + + # Register model version + model_version = model_registry.register_model_version( + name=mlflow_model_name, + version="1", + model_source_uri=uri, + description="test_register_model_version", + ) + + breakpoint() + # Get model version numbers from Model Control Plane latest_version = get_step_context().model current_version = Model(name=latest_version.name, version=target_env) diff --git a/examples/e2e/steps/training/model_trainer.py b/examples/e2e/steps/training/model_trainer.py index 87a695f5695..43e8c3f4402 100644 --- a/examples/e2e/steps/training/model_trainer.py +++ b/examples/e2e/steps/training/model_trainer.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from typing import Tuple import mlflow import pandas as pd @@ -25,9 +26,10 @@ from zenml.integrations.mlflow.experiment_trackers import ( MLFlowExperimentTracker, ) -from zenml.integrations.mlflow.steps.mlflow_registry import ( - mlflow_register_model_step, -) + +# from zenml.integrations.mlflow.steps.mlflow_registry import ( +# mlflow_register_model_step, +# ) from zenml.logger import get_logger logger = get_logger(__name__) @@ -49,8 +51,11 @@ def model_trainer( model: ClassifierMixin, target: str, name: str, -) -> Annotated[ - ClassifierMixin, ArtifactConfig(name="model", is_model_artifact=True) +) -> Tuple[ + Annotated[ + ClassifierMixin, ArtifactConfig(name="model", is_model_artifact=True) + ], + Annotated[str, "uri"], ]: """Configure and train a model on the training dataset. @@ -82,6 +87,9 @@ def model_trainer( Returns: The trained model artifact. """ + step_context = get_step_context() + # Get the URI where the output will be saved. + uri = step_context.get_output_artifact_uri(output_name="model") ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### # Initialize the model with the hyperparameters indicated in the step @@ -94,19 +102,9 @@ def model_trainer( ) # register mlflow model - mlflow_register_model_step.entrypoint( - model, - name=name, - ) - # keep track of mlflow version for future use - model_registry = Client().active_stack.model_registry - if model_registry: - version = model_registry.get_latest_model_version( - name=name, stage=None - ) - if version: - model_ = get_step_context().model - model_.log_metadata({"model_registry_version": version.version}) - ### YOUR CODE ENDS HERE ### - - return model + # mlflow_register_model_step.entrypoint( + # model, + # name=name, + # ) + + return model, uri From afc5c2b220c39902e567ea309abae90132de459f Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Sat, 21 Sep 2024 13:51:48 +0100 Subject: [PATCH 07/43] Refactor model registration and add URI parameter --- .../model_registries/vertex_model_registry.py | 81 ++++++------------- .../model_registries/base_model_registry.py | 4 +- 2 files changed, 28 insertions(+), 57 deletions(-) diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index c46bb3956c2..97582074842 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -64,19 +64,9 @@ def register_model( metadata: Optional[Dict[str, str]] = None, ) -> RegisteredModel: """Register a model to the Vertex AI model registry.""" - try: - model = aiplatform.Model.upload( - display_name=name, - description=description, - labels=metadata, - serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest", # Placeholder - ) - breakpoint() - return RegisteredModel( - name=name, description=description, metadata=metadata - ) - except Exception as e: - raise RuntimeError(f"Failed to register model: {str(e)}") + raise NotImplementedError( + "Vertex AI does not support registering models, you can only register model versions, skipping model registration..." + ) def delete_model( self, @@ -97,19 +87,9 @@ def update_model( remove_metadata: Optional[List[str]] = None, ) -> RegisteredModel: """Update a model in the Vertex AI model registry.""" - try: - model = aiplatform.Model(model_name=name) - if description: - model.description = description - if metadata: - model.labels.update(metadata) - if remove_metadata: - for key in remove_metadata: - model.labels.pop(key, None) - model.update() - return self.get_model(name) - except Exception as e: - raise RuntimeError(f"Failed to update model: {str(e)}") + raise NotImplementedError( + "Vertex AI does not support updating models, you can only update model versions, skipping model registration..." + ) def get_model(self, name: str) -> RegisteredModel: """Get a model from the Vertex AI model registry.""" @@ -129,17 +109,14 @@ def list_models( metadata: Optional[Dict[str, str]] = None, ) -> List[RegisteredModel]: """List models in the Vertex AI model registry.""" - filter_expr = [] + filter_expr = 'labels.managed_by="ZenML"' if name: - filter_expr.append(f"display_name={name}") + filter_expr = filter_expr + f' AND display_name="{name}"' if metadata: for key, value in metadata.items(): - filter_expr.append(f"labels.{key}={value}") - - filter_str = " AND ".join(filter_expr) if filter_expr else None - + filter_expr = filter_expr + f' AND labels.{key}="{value}"' try: - models = aiplatform.Model.list(filter=filter_str) + models = aiplatform.Model.list(filter=filter_expr) return [ RegisteredModel( name=model.display_name, @@ -163,7 +140,9 @@ def register_model_version( """Register a model version to the Vertex AI model registry.""" metadata_dict = metadata.model_dump() if metadata else {} serving_container_image_uri = metadata_dict.get( - "serving_container_image_uri", None + "serving_container_image_uri", + None + or "europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest", ) is_default_version = metadata_dict.get("is_default_version", False) self.setup_aiplatform() @@ -171,7 +150,7 @@ def register_model_version( version_info = aiplatform.Model.upload( artifact_uri=model_source_uri, display_name=f"{name}_{version}", - serving_container_image_uri="europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest", + serving_container_image_uri=serving_container_image_uri, description=description, is_default_version=is_default_version, labels=metadata_dict, @@ -215,18 +194,16 @@ def update_model_version( ) -> RegistryModelVersion: """Update a model version in the Vertex AI model registry.""" try: - model_version = aiplatform.ModelVersion( - model_name=f"{name}@{version}" - ) - if description: - model_version.description = description + model_version = aiplatform.Model(model_name=f"{name}@{version}") + labels = model_version.labels if metadata: - model_version.labels.update(metadata.dict()) + metadata_dict = metadata.model_dump() if metadata else {} + for key, value in metadata_dict.items(): + labels[key] = value if remove_metadata: for key in remove_metadata: - model_version.labels.pop(key, None) - model_version.update() - # Note: Vertex AI doesn't have built-in stages, so we ignore the 'stage' parameter + labels.pop(key, None) + model_version.update(description=description, labels=labels) return self.get_model_version(name, version) except Exception as e: raise RuntimeError(f"Failed to update model version: {str(e)}") @@ -236,14 +213,12 @@ def get_model_version( ) -> RegistryModelVersion: """Get a model version from the Vertex AI model registry.""" try: - model_version = aiplatform.ModelVersion( - model_name=f"{name}@{version}" - ) + model_version = aiplatform.Model(model_name=f"{name}@{version}") return RegistryModelVersion( version=model_version.version_id, model_source_uri=model_version.artifact_uri, model_format="Custom", # Vertex AI doesn't provide this info directly - registered_model=self.get_model(name), + registered_model=self.get_model(model_version.name), description=model_version.description, created_at=model_version.create_time, last_updated_at=model_version.update_time, @@ -288,7 +263,7 @@ def list_model_versions( version=v.version_id, model_source_uri=v.artifact_uri, model_format="Custom", # Vertex AI doesn't provide this info directly - registered_model=self.get_model(name), + registered_model=self.get_model(v.name), description=v.description, created_at=v.create_time, last_updated_at=v.update_time, @@ -297,13 +272,7 @@ def list_model_versions( ) for v in versions ] - - if order_by_date: - results.sort( - key=lambda x: x.created_at, - reverse=(order_by_date.lower() == "desc"), - ) - + if count: results = results[:count] diff --git a/src/zenml/model_registries/base_model_registry.py b/src/zenml/model_registries/base_model_registry.py index 578d97d396c..727632eaa15 100644 --- a/src/zenml/model_registries/base_model_registry.py +++ b/src/zenml/model_registries/base_model_registry.py @@ -20,6 +20,7 @@ from pydantic import BaseModel, ConfigDict +from zenml import __version__ from zenml.enums import StackComponentType from zenml.stack import Flavor, StackComponent from zenml.stack.stack_component import StackComponentConfig @@ -62,7 +63,8 @@ class ModelRegistryModelMetadata(BaseModel): model and its development process. """ - zenml_version: Optional[str] = None + managed_by: str = "ZenML" + zenml_version: str = __version__ zenml_run_name: Optional[str] = None zenml_pipeline_name: Optional[str] = None zenml_pipeline_uuid: Optional[str] = None From 2dc0d2d49ec043b71bd8a3f8065e94c90b12eea5 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Sat, 21 Sep 2024 14:06:18 +0100 Subject: [PATCH 08/43] Refactor model registration and remove unnecessary code --- .../integrations/gcp/model_registries/vertex_model_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 97582074842..873f22b54e6 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -272,7 +272,7 @@ def list_model_versions( ) for v in versions ] - + if count: results = results[:count] From 54b6748f2a642585854a8d29b6dde193f6ba262f Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Fri, 25 Oct 2024 15:37:50 +0100 Subject: [PATCH 09/43] Refactor GCP service and flavor classes for Vertex AI deployment --- .../flavors/vertex_model_deployer_flavor.py | 46 ++++-- .../model_deployers/vertex_model_deployer.py | 132 +++++++++--------- .../model_registries/vertex_model_registry.py | 10 +- .../integrations/gcp/services/__init__.py | 4 +- .../gcp/services/vertex_deployment.py | 33 +++-- 5 files changed, 128 insertions(+), 97 deletions(-) diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py index 85d4bd52485..f225847a5df 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py @@ -17,11 +17,20 @@ from pydantic import BaseModel -from zenml.integrations.gcp import VERTEX_MODEL_DEPLOYER_FLAVOR +from zenml.integrations.gcp import ( + GCP_RESOURCE_TYPE, + VERTEX_MODEL_DEPLOYER_FLAVOR, +) +from zenml.integrations.gcp.google_credentials_mixin import ( + GoogleCredentialsConfigMixin, +) from zenml.model_deployers.base_model_deployer import ( BaseModelDeployerConfig, BaseModelDeployerFlavor, ) +from zenml.models.v2.misc.service_connector_type import ( + ServiceConnectorRequirements, +) if TYPE_CHECKING: from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( @@ -61,20 +70,14 @@ class VertexBaseConfig(BaseModel): autoscaling_target_accelerator_duty_cycle: Optional[float] = None enable_access_logging: Optional[bool] = None disable_container_logging: Optional[bool] = None + explanation_metadata: Optional[Dict[str, str]] = None + explanation_parameters: Optional[Dict[str, str]] = None -class VertexModelDeployerConfig(BaseModelDeployerConfig, VertexBaseConfig): - """Configuration for the Vertex AI model deployer. - - Attributes: - project_id: The project ID. - location: The location of the model. - """ - - # The namespace to list endpoints for. Set to `"*"` to list all endpoints - # from all namespaces (i.e. personal namespace and all orgs the user belongs to). - project_id: str - location: Optional[str] = None +class VertexModelDeployerConfig( + BaseModelDeployerConfig, VertexBaseConfig, GoogleCredentialsConfigMixin +): + """Configuration for the Vertex AI model deployer.""" class VertexModelDeployerFlavor(BaseModelDeployerFlavor): @@ -89,6 +92,23 @@ def name(self) -> str: """ return VERTEX_MODEL_DEPLOYER_FLAVOR + @property + def service_connector_requirements( + self, + ) -> Optional[ServiceConnectorRequirements]: + """Service connector resource requirements for service connectors. + + Specifies resource requirements that are used to filter the available + service connector types that are compatible with this flavor. + + Returns: + Requirements for compatible service connectors, if a service + connector is required for this flavor. + """ + return ServiceConnectorRequirements( + resource_type=GCP_RESOURCE_TYPE, + ) + @property def docs_url(self) -> Optional[str]: """A url to point at docs explaining this flavor. diff --git a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py index 17ad388588d..472c3666eeb 100644 --- a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py +++ b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py @@ -13,20 +13,31 @@ # permissions and limitations under the License. """Implementation of the Vertex AI Model Deployer.""" -from typing import ClassVar, Dict, List, Optional, Tuple, Type, cast +from typing import ClassVar, Dict, Optional, Tuple, Type, cast from uuid import UUID +from google.cloud import aiplatform + from zenml.analytics.enums import AnalyticsEvent from zenml.analytics.utils import track_handler from zenml.client import Client -from zenml.integrations.gcp import VERTEX_SERVICE_ARTIFACT +from zenml.enums import StackComponentType +from zenml.integrations.gcp import ( + VERTEX_SERVICE_ARTIFACT, +) from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( VertexModelDeployerConfig, VertexModelDeployerFlavor, ) +from zenml.integrations.gcp.google_credentials_mixin import ( + GoogleCredentialsMixin, +) +from zenml.integrations.gcp.model_registries.vertex_model_registry import ( + VertexAIModelRegistry, +) from zenml.integrations.gcp.services.vertex_deployment import ( + VertexAIDeploymentConfig, VertexDeploymentService, - VertexServiceConfig, ) from zenml.logger import get_logger from zenml.model_deployers import BaseModelDeployer @@ -41,7 +52,7 @@ logger = get_logger(__name__) -class VertexModelDeployer(BaseModelDeployer): +class VertexModelDeployer(BaseModelDeployer, GoogleCredentialsMixin): """Vertex AI endpoint model deployer.""" NAME: ClassVar[str] = "Vertex AI" @@ -58,35 +69,72 @@ def config(self) -> VertexModelDeployerConfig: """ return cast(VertexModelDeployerConfig, self._config) + def setup_aiplatform(self) -> None: + """Setup the Vertex AI platform.""" + credentials, project_id = self._get_authentication() + aiplatform.init( + project=project_id, + location=self.config.location, + credentials=credentials, + ) + @property def validator(self) -> Optional[StackValidator]: - """Validates the stack. + """Validates that the stack contains a model registry. + + Also validates that the artifact store is not local. Returns: - A validator that checks that the stack contains required GCP components. + A StackValidator instance. """ - def _validate_gcp_stack( - stack: "Stack", - ) -> Tuple[bool, str]: - """Check if GCP components are properly configured in the stack. + def _validate_stack_requirements(stack: "Stack") -> Tuple[bool, str]: + """Validates that all the stack components are not local. Args: stack: The stack to validate. Returns: - A tuple with a boolean indicating whether the stack is valid - and a message describing the validation result. + A tuple of (is_valid, error_message). """ - if not self.config.project_id or not self.config.location: + # Validate that the container registry is not local. + model_registry = stack.model_registry + if not model_registry and isinstance( + model_registry, VertexAIModelRegistry + ): return False, ( - "The Vertex AI model deployer requires a GCP project and " - "location to be specified in the configuration." + "The Vertex AI model deployer requires a Vertex AI model " + "registry to be present in the stack. Please add a Vertex AI " + "model registry to the stack." ) - return True, "Stack is valid for Vertex AI model deployment." + + # Validate that the rest of the components are not local. + for stack_comp in stack.components.values(): + # For Forward compatibility a list of components is returned, + # but only the first item is relevant for now + # TODO: [server] make sure the ComponentModel actually has + # a local_path property or implement similar check + local_path = stack_comp.local_path + if not local_path: + continue + return False, ( + f"The '{stack_comp.name}' {stack_comp.type.value} is a " + f"local stack component. The Vertex AI Pipelines " + f"orchestrator requires that all the components in the " + f"stack used to execute the pipeline have to be not local, " + f"because there is no way for Vertex to connect to your " + f"local machine. You should use a flavor of " + f"{stack_comp.type.value} other than '" + f"{stack_comp.flavor}'." + ) + + return True, "" return StackValidator( - custom_validation_function=_validate_gcp_stack, + required_components={ + StackComponentType.MODEL_REGISTRY, + }, + custom_validation_function=_validate_stack_requirements, ) def _create_deployment_service( @@ -130,8 +178,10 @@ def perform_deploy_model( The ZenML Vertex AI deployment service object. """ with track_handler(AnalyticsEvent.MODEL_DEPLOYED) as analytics_handler: - config = cast(VertexServiceConfig, config) - service = self._create_deployment_service(id=id, config=config) + config = cast(VertexAIDeploymentConfig, config) + service = self._create_deployment_service( + id=id, config=config, timeout=timeout + ) logger.info( f"Creating a new Vertex AI deployment service: {service}" ) @@ -226,47 +276,3 @@ def get_model_server_info( "PREDICTION_URL": service_instance.prediction_url, "HEALTH_CHECK_URL": service_instance.get_healthcheck_url(), } - - def find_model_server( - self, - running: Optional[bool] = None, - service_uuid: Optional[UUID] = None, - pipeline_name: Optional[str] = None, - run_name: Optional[str] = None, - pipeline_step_name: Optional[str] = None, - model_name: Optional[str] = None, - model_uri: Optional[str] = None, - model_version: Optional[str] = None, - ) -> List[BaseService]: - """Find deployed model servers in Vertex AI. - - Args: - running: Filter by running status. - service_uuid: Filter by service UUID. - pipeline_name: Filter by pipeline name. - run_name: Filter by run name. - pipeline_step_name: Filter by pipeline step name. - model_name: Filter by model name. - model_uri: Filter by model URI. - model_version: Filter by model version. - - Returns: - A list of services matching the given criteria. - """ - client = Client() - services = client.list_services( - service_type=VertexDeploymentService.SERVICE_TYPE, - running=running, - service_uuid=service_uuid, - pipeline_name=pipeline_name, - run_name=run_name, - pipeline_step_name=pipeline_step_name, - model_name=model_name, - model_uri=model_uri, - model_version=model_version, - ) - - return [ - VertexDeploymentService.from_model(service_model) - for service_model in services - ] diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 873f22b54e6..7f0332f3631 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -109,7 +109,8 @@ def list_models( metadata: Optional[Dict[str, str]] = None, ) -> List[RegisteredModel]: """List models in the Vertex AI model registry.""" - filter_expr = 'labels.managed_by="ZenML"' + self.setup_aiplatform() + filter_expr = 'labels.managed_by="zenml"' if name: filter_expr = filter_expr + f' AND display_name="{name}"' if metadata: @@ -138,6 +139,7 @@ def register_model_version( **kwargs: Any, ) -> RegistryModelVersion: """Register a model version to the Vertex AI model registry.""" + self.setup_aiplatform() metadata_dict = metadata.model_dump() if metadata else {} serving_container_image_uri = metadata_dict.get( "serving_container_image_uri", @@ -145,7 +147,7 @@ def register_model_version( or "europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest", ) is_default_version = metadata_dict.get("is_default_version", False) - self.setup_aiplatform() + metadata_dict["managed_by"] = "zenml" try: version_info = aiplatform.Model.upload( artifact_uri=model_source_uri, @@ -175,6 +177,7 @@ def delete_model_version( version: str, ) -> None: """Delete a model version from the Vertex AI model registry.""" + self.setup_aiplatform() try: model_version = aiplatform.ModelVersion( model_name=f"{name}@{version}" @@ -193,6 +196,7 @@ def update_model_version( stage: Optional[ModelVersionStage] = None, ) -> RegistryModelVersion: """Update a model version in the Vertex AI model registry.""" + self.setup_aiplatform() try: model_version = aiplatform.Model(model_name=f"{name}@{version}") labels = model_version.labels @@ -212,6 +216,7 @@ def get_model_version( self, name: str, version: str ) -> RegistryModelVersion: """Get a model version from the Vertex AI model registry.""" + self.setup_aiplatform() try: model_version = aiplatform.Model(model_name=f"{name}@{version}") return RegistryModelVersion( @@ -241,6 +246,7 @@ def list_model_versions( **kwargs: Any, ) -> List[RegistryModelVersion]: """List model versions from the Vertex AI model registry.""" + self.setup_aiplatform() filter_expr = [] if name: filter_expr.append(f"display_name={name}") diff --git a/src/zenml/integrations/gcp/services/__init__.py b/src/zenml/integrations/gcp/services/__init__.py index a1b89b40ea7..be8c6508a37 100644 --- a/src/zenml/integrations/gcp/services/__init__.py +++ b/src/zenml/integrations/gcp/services/__init__.py @@ -14,8 +14,8 @@ """Initialization of the MLflow Service.""" from zenml.integrations.gcp.services.vertex_deployment import ( # noqa - VertexServiceConfig, + VertexAIDeploymentConfig, VertexDeploymentService, ) -__all__ = ["VertexServiceConfig", "VertexDeploymentService"] \ No newline at end of file +__all__ = ["VertexAIDeploymentConfig", "VertexDeploymentService"] \ No newline at end of file diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index ccebdc09e75..72069a1f181 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -20,6 +20,7 @@ from google.cloud import aiplatform from pydantic import BaseModel, Field +from zenml.client import Client from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( VertexBaseConfig, ) @@ -70,8 +71,6 @@ def get_vertex_deployment_labels(self) -> Dict[str, str]: labels["zenml_pipeline_step_name"] = self.pipeline_step_name if self.model_name: labels["zenml_model_name"] = self.model_name - if self.model_uri: - labels["zenml_model_uri"] = self.model_uri sanitize_labels(labels) return labels @@ -117,7 +116,6 @@ def __init__(self, config: VertexAIDeploymentConfig, **attrs: Any): attrs: additional attributes to set on the service """ super().__init__(config=config, **attrs) - aiplatform.init(project=config.project, location=config.location) @property def prediction_url(self) -> Optional[str]: @@ -145,17 +143,28 @@ def _generate_endpoint_name(self) -> str: def provision(self) -> None: """Provision or update remote Vertex AI deployment instance.""" + from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( + VertexModelDeployer, + ) + + zenml_client = Client() + model_deployer = zenml_client.active_stack.model_deployer + if isinstance(model_deployer, VertexModelDeployer): + model_deployer.setup_aiplatform() + else: + raise ValueError("Model deployer is not VertexModelDeployer") try: + breakpoint() model = aiplatform.Model( model_name=self.config.model_name, version=self.config.model_version, ) - + breakpoint() endpoint = aiplatform.Endpoint.create( display_name=self._generate_endpoint_name() ) - - deployment = endpoint.deploy( + breakpoint() + endpoint.deploy( model=model, machine_type=self.config.machine_type, min_replica_count=self.config.min_replica_count, @@ -169,7 +178,7 @@ def provision(self) -> None: explanation_parameters=self.config.explanation_parameters, sync=True, ) - + breakpoint() self.status.endpoint = VertexPredictionServiceEndpoint( endpoint_name=endpoint.resource_name, endpoint_url=endpoint.resource_name, @@ -315,13 +324,3 @@ def is_running(self) -> bool: """ state, _ = self.check_status() return state == ServiceState.ACTIVE - - def start(self) -> None: - """Start the Vertex AI deployment service.""" - if not self.is_running: - self.provision() - - def stop(self) -> None: - """Stop the Vertex AI deployment service.""" - if self.is_running: - self.deprovision() From a9804493162723fde5db745be6ecd395a39df9fa Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 31 Oct 2024 12:01:54 +0100 Subject: [PATCH 10/43] Refactor Vertex AI model registry and deployer configurations - Update model source URI retrieval in VertexAIModelRegistry. - Enhance BaseModelDeployer to check and start inactive services. - Set default replica counts to 1 and sync to False in VertexBaseConfig. - Rename and update documentation for deployment service creation in VertexModelDeployer. --- .../flavors/vertex_model_deployer_flavor.py | 7 +- .../model_deployers/vertex_model_deployer.py | 24 +- .../model_registries/vertex_model_registry.py | 2 +- .../gcp/services/vertex_deployment.py | 373 ++++++++++-------- .../model_deployers/base_model_deployer.py | 7 + 5 files changed, 229 insertions(+), 184 deletions(-) diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py index f225847a5df..5c8041c94e5 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py @@ -58,13 +58,13 @@ class VertexBaseConfig(BaseModel): machine_type: Optional[str] = None accelerator_type: Optional[str] = None accelerator_count: Optional[int] = None - min_replica_count: Optional[int] = None - max_replica_count: Optional[int] = None + min_replica_count: Optional[int] = 1 + max_replica_count: Optional[int] = 1 service_account: Optional[str] = None metadata: Optional[Dict[str, str]] = None network: Optional[str] = None encryption_spec_key_name: Optional[str] = None - sync: Optional[bool] = True + sync: Optional[bool] = False deploy_request_timeout: Optional[int] = None autoscaling_target_cpu_utilization: Optional[float] = None autoscaling_target_accelerator_duty_cycle: Optional[float] = None @@ -72,6 +72,7 @@ class VertexBaseConfig(BaseModel): disable_container_logging: Optional[bool] = None explanation_metadata: Optional[Dict[str, str]] = None explanation_parameters: Optional[Dict[str, str]] = None + existing_endpoint: Optional[str] = None class VertexModelDeployerConfig( diff --git a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py index 472c3666eeb..82ab4aca520 100644 --- a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py +++ b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py @@ -138,19 +138,19 @@ def _validate_stack_requirements(stack: "Stack") -> Tuple[bool, str]: ) def _create_deployment_service( - self, id: UUID, timeout: int, config: VertexModelDeployerConfig + self, id: UUID, timeout: int, config: VertexAIDeploymentConfig ) -> VertexDeploymentService: - """Creates a new DatabricksDeploymentService. + """Creates a new VertexAIDeploymentService. Args: - id: the UUID of the model to be deployed with Databricks model deployer. - timeout: the timeout in seconds to wait for the Databricks inference endpoint + id: the UUID of the model to be deployed with Vertex model deployer. + timeout: the timeout in seconds to wait for the Vertex inference endpoint to be provisioned and successfully started or updated. - config: the configuration of the model to be deployed with Databricks model deployer. + config: the configuration of the model to be deployed with Vertex model deployer. Returns: The VertexModelDeployerConfig object that can be used to interact - with the Databricks inference endpoint. + with the Vertex inference endpoint. """ # create a new service for the new model service = VertexDeploymentService(uuid=id, config=config) @@ -197,14 +197,6 @@ def perform_deploy_model( "store_type": client.zen_store.type.value, **stack_metadata, } - - # Create a service artifact - client.create_artifact( - name=VERTEX_SERVICE_ARTIFACT, - artifact_store_id=client.active_stack.artifact_store.id, - producer=service, - ) - return service def perform_stop_model( @@ -258,10 +250,10 @@ def perform_delete_model( """ service = cast(VertexDeploymentService, service) service.stop(timeout=timeout, force=force) - service.delete() + service.stop() @staticmethod - def get_model_server_info( + def get_model_server_info( # type: ignore[override] service_instance: "VertexDeploymentService", ) -> Dict[str, Optional[str]]: """Get information about the deployed model server. diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 7f0332f3631..66b7465bbaa 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -159,7 +159,7 @@ def register_model_version( ) return RegistryModelVersion( version=version_info.version_id, - model_source_uri=model_source_uri, + model_source_uri=version_info.resource_name, model_format="Custom", # Vertex AI doesn't provide this info directly registered_model=self.get_model(version_info.name), description=description, diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 72069a1f181..2353f0d8d47 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -1,16 +1,3 @@ -# Copyright (c) ZenML GmbH 2024. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. """Implementation of the Vertex AI Deployment service.""" import re @@ -30,7 +17,10 @@ logger = get_logger(__name__) -POLLING_TIMEOUT = 1200 +# Increase timeout for long-running operations +POLLING_TIMEOUT = ( + 1800 # Increased from 1200 to allow for longer deployment times +) UUID_SLICE_LENGTH: int = 8 @@ -52,26 +42,45 @@ def sanitize_labels(labels: Dict[str, str]) -> None: ) +def sanitize_vertex_label(value: str) -> str: + """Sanitize a label value to comply with Vertex AI requirements. + + Args: + value: The label value to sanitize + + Returns: + Sanitized label value + """ + # Convert to lowercase + value = value.lower() + # Replace any character that's not lowercase letter, number, dash or underscore + value = re.sub(r"[^a-z0-9\-_]", "-", value) + # Ensure it starts with a letter/number by prepending 'x' if needed + if not value[0].isalnum(): + value = f"x{value}" + # Truncate to 63 chars to stay under limit + return value[:63] + + class VertexAIDeploymentConfig(VertexBaseConfig, ServiceConfig): """Vertex AI service configurations.""" def get_vertex_deployment_labels(self) -> Dict[str, str]: - """Generate labels for the VertexAI deployment from the service configuration. - - These labels are attached to the VertexAI deployment resource - and may be used as label selectors in lookup operations. + """Generate labels for the VertexAI deployment from the service configuration.""" + labels = { + "managed-by": "zenml", # Changed from managed_by to managed-by + } - Returns: - The labels for the VertexAI deployment. - """ - labels = {} if self.pipeline_name: - labels["zenml_pipeline_name"] = self.pipeline_name + labels["pipeline-name"] = sanitize_vertex_label(self.pipeline_name) if self.pipeline_step_name: - labels["zenml_pipeline_step_name"] = self.pipeline_step_name + labels["step-name"] = sanitize_vertex_label( + self.pipeline_step_name + ) if self.model_name: - labels["zenml_model_name"] = self.model_name - sanitize_labels(labels) + labels["model-name"] = sanitize_vertex_label(self.model_name) + if self.service_name: + labels["service-name"] = sanitize_vertex_label(self.service_name) return labels @@ -80,6 +89,9 @@ class VertexPredictionServiceEndpoint(BaseModel): endpoint_name: str endpoint_url: Optional[str] = None + deployed_model_id: Optional[str] = ( + None # Added to track specific model deployment + ) class VertexServiceStatus(ServiceStatus): @@ -89,13 +101,7 @@ class VertexServiceStatus(ServiceStatus): class VertexDeploymentService(BaseDeploymentService): - """Vertex AI model deployment service. - - Attributes: - SERVICE_TYPE: a service type descriptor with information describing - the Vertex AI deployment service class - config: service configuration - """ + """Vertex AI model deployment service.""" SERVICE_TYPE = ServiceType( name="vertex-deployment", @@ -109,158 +115,202 @@ class VertexDeploymentService(BaseDeploymentService): ) def __init__(self, config: VertexAIDeploymentConfig, **attrs: Any): - """Initialize the Vertex AI deployment service. - - Args: - config: service configuration - attrs: additional attributes to set on the service - """ + """Initialize the Vertex AI deployment service.""" super().__init__(config=config, **attrs) + # Initialize aiplatform with project and location + from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( + VertexModelDeployer, + ) + + zenml_client = Client() + model_deployer = zenml_client.active_stack.model_deployer + if not isinstance(model_deployer, VertexModelDeployer): + raise ValueError("Model deployer is not VertexModelDeployer") + + model_deployer.setup_aiplatform() + @property def prediction_url(self) -> Optional[str]: - """The prediction URI exposed by the prediction service. - - Returns: - The prediction URI exposed by the prediction service, or None if - the service is not yet ready. - """ - return ( - self.status.endpoint.endpoint_url if self.status.endpoint else None - ) + """The prediction URI exposed by the prediction service.""" + if not self.status.endpoint or not self.status.endpoint.endpoint_url: + return None + + # Construct proper prediction URL + return f"https://{self.config.location}-aiplatform.googleapis.com/v1/{self.status.endpoint.endpoint_url}" def get_endpoints(self) -> List[aiplatform.Endpoint]: """Get all endpoints for the current project and location.""" - return aiplatform.Endpoint.list() + try: + # Use proper filtering and pagination + return list( + aiplatform.Endpoint.list( + filter='labels.managed_by="zenml"', + location=self.config.location, + ) + ) + except Exception as e: + logger.error(f"Failed to list endpoints: {e}") + return [] def _generate_endpoint_name(self) -> str: - """Generate a unique name for the Vertex AI Inference Endpoint. - - Returns: - A unique name for the Vertex AI Inference Endpoint. - """ - return f"{self.config.model_name}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" + """Generate a unique name for the Vertex AI Inference Endpoint.""" + # Make name more descriptive and conformant to Vertex AI naming rules + sanitized_model_name = re.sub( + r"[^a-zA-Z0-9-]", "-", self.config.model_name.lower() + ) + return f"{sanitized_model_name}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" def provision(self) -> None: """Provision or update remote Vertex AI deployment instance.""" - from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( - VertexModelDeployer, - ) - - zenml_client = Client() - model_deployer = zenml_client.active_stack.model_deployer - if isinstance(model_deployer, VertexModelDeployer): - model_deployer.setup_aiplatform() - else: - raise ValueError("Model deployer is not VertexModelDeployer") try: - breakpoint() + if self.config.existing_endpoint: + # Use the existing endpoint + endpoint = aiplatform.Endpoint( + endpoint_name=self.config.existing_endpoint, + location=self.config.location, + ) + logger.info( + f"Using existing Vertex AI inference endpoint: {endpoint.resource_name}" + ) + else: + # Create the endpoint + endpoint_name = self._generate_endpoint_name() + endpoint = aiplatform.Endpoint.create( + display_name=endpoint_name, + location=self.config.location, + encryption_spec_key_name=self.config.encryption_spec_key_name, + labels=self.config.get_vertex_deployment_labels(), + ) + logger.info( + f"Vertex AI inference endpoint created: {endpoint.resource_name}" + ) + + # Then get the model model = aiplatform.Model( - model_name=self.config.model_name, - version=self.config.model_version, + model_name=self.config.model_id, + location=self.config.location, ) - breakpoint() - endpoint = aiplatform.Endpoint.create( - display_name=self._generate_endpoint_name() + logger.info( + f"Found existing model to deploy: {model.resource_name} to the endpoint." ) - breakpoint() + + # Deploy the model to the endpoint endpoint.deploy( model=model, + deployed_model_display_name=f"{endpoint_name}-deployment", machine_type=self.config.machine_type, min_replica_count=self.config.min_replica_count, max_replica_count=self.config.max_replica_count, accelerator_type=self.config.accelerator_type, accelerator_count=self.config.accelerator_count, service_account=self.config.service_account, - network=self.config.network, - encryption_spec_key_name=self.config.encryption_spec_key_name, explanation_metadata=self.config.explanation_metadata, explanation_parameters=self.config.explanation_parameters, - sync=True, + sync=self.config.sync, + ) + logger.info( + f"Model {model.resource_name} successfully deployed to endpoint {endpoint.resource_name}" ) - breakpoint() + + # Store both endpoint and deployment information self.status.endpoint = VertexPredictionServiceEndpoint( endpoint_name=endpoint.resource_name, endpoint_url=endpoint.resource_name, + deployed_model_id=model.resource_name, ) - self.status.update_state(ServiceState.ACTIVE) + self.status.update_state(ServiceState.PENDING_STARTUP) logger.info( - f"Vertex AI inference endpoint successfully deployed. " - f"Endpoint: {endpoint.resource_name}" + f"Vertex AI inference endpoint successfully deployed. Pending startup" + f"Endpoint: {endpoint.resource_name}, " ) except Exception as e: self.status.update_state( - new_state=ServiceState.ERROR, error=str(e) + new_state=ServiceState.ERROR, + error=f"Deployment failed: {str(e)}", ) raise RuntimeError( f"An error occurred while provisioning the Vertex AI inference endpoint: {e}" ) def deprovision(self, force: bool = False) -> None: - """Deprovision the remote Vertex AI deployment instance. - - Args: - force: if True, the remote deployment instance will be - forcefully deprovisioned. - """ - if self.status.endpoint: - try: - endpoint = aiplatform.Endpoint( - endpoint_name=self.status.endpoint.endpoint_name - ) - endpoint.undeploy_all() - endpoint.delete(force=force) - self.status.endpoint = None - self.status.update_state(ServiceState.INACTIVE) - logger.info( - f"Vertex AI Inference Endpoint {self.status.endpoint.endpoint_name} has been deprovisioned." - ) - except exceptions.NotFound: - logger.warning( - f"Vertex AI Inference Endpoint {self.status.endpoint.endpoint_name} not found. It may have been already deleted." - ) - except Exception as e: - raise RuntimeError( - f"Failed to deprovision Vertex AI Inference Endpoint: {e}" - ) + """Deprovision the remote Vertex AI deployment instance.""" + if not self.status.endpoint: + return - def check_status(self) -> Tuple[ServiceState, str]: - """Check the current operational state of the Vertex AI deployment. + try: + endpoint = aiplatform.Endpoint( + endpoint_name=self.status.endpoint.endpoint_name, + location=self.config.location, + ) + + # First undeploy the specific model if we have its ID + if self.status.endpoint.deployed_model_id: + try: + endpoint.undeploy( + deployed_model_id=self.status.endpoint.deployed_model_id, + sync=self.config.sync, + ) + except exceptions.NotFound: + logger.warning("Deployed model already undeployed") + + # Then delete the endpoint + endpoint.delete(force=force, sync=self.config.sync) + + self.status.endpoint = None + self.status.update_state(ServiceState.INACTIVE) + + logger.info("Vertex AI Inference Endpoint has been deprovisioned.") + + except exceptions.NotFound: + logger.warning( + "Vertex AI Inference Endpoint not found. It may have been already deleted." + ) + self.status.update_state(ServiceState.INACTIVE) + except Exception as e: + error_msg = ( + f"Failed to deprovision Vertex AI Inference Endpoint: {e}" + ) + logger.error(error_msg) + if not force: + raise RuntimeError(error_msg) - Returns: - The operational state of the Vertex AI deployment and a message - providing additional information about that state. - """ + def check_status(self) -> Tuple[ServiceState, str]: + """Check the current operational state of the Vertex AI deployment.""" if not self.status.endpoint: return ServiceState.INACTIVE, "Endpoint not provisioned" - try: + logger.info( + f"Checking status of Vertex AI Inference Endpoint: {self.status.endpoint.endpoint_name}" + ) endpoint = aiplatform.Endpoint( - endpoint_name=self.status.endpoint.endpoint_name + endpoint_name=self.status.endpoint.endpoint_name, + location=self.config.location, ) - deployments = endpoint.list_deployments() - - if not deployments: - return ServiceState.INACTIVE, "No active deployments" - - # Check the state of all deployments - for deployment in deployments: - if deployment.state == "ACTIVE": - return ServiceState.ACTIVE, "Deployment is active" - elif deployment.state == "DEPLOYING": - return ( - ServiceState.PENDING_STARTUP, - "Deployment is in progress", - ) - elif deployment.state in ["FAILED", "DELETING"]: - return ( - ServiceState.ERROR, - f"Deployment is in {deployment.state} state", + + # Get detailed deployment status + deployment = None + if self.status.endpoint.deployed_model_id: + deployments = [ + d + for d in endpoint.list_models() + if d.model == self.status.endpoint.deployed_model_id + ] + if deployments: + deployment = deployments[0] + logger.info( + f"Model {self.status.endpoint.deployed_model_id} was deployed to the endpoint" ) - return ServiceState.INACTIVE, "No active deployments found" + if not deployment: + logger.warning( + "No matching deployment found, endpoint may be inactive or failed to deploy" + ) + return ServiceState.INACTIVE, "No matching deployment found" + + return ServiceState.ACTIVE, "Deployment is ready" except exceptions.NotFound: return ServiceState.INACTIVE, "Endpoint not found" @@ -268,17 +318,7 @@ def check_status(self) -> Tuple[ServiceState, str]: return ServiceState.ERROR, f"Error checking status: {str(e)}" def predict(self, instances: List[Any]) -> List[Any]: - """Make a prediction using the service. - - Args: - instances: List of instances to predict. - - Returns: - The prediction results. - - Raises: - Exception: if the service is not running or prediction fails. - """ + """Make a prediction using the service.""" if not self.is_running: raise Exception( "Vertex AI endpoint inference service is not running. " @@ -290,37 +330,42 @@ def predict(self, instances: List[Any]) -> List[Any]: try: endpoint = aiplatform.Endpoint( - endpoint_name=self.status.endpoint.endpoint_name + endpoint_name=self.status.endpoint.endpoint_name, + location=self.config.location, ) - response = endpoint.predict(instances=instances) - return response.predictions + + # Add proper prediction parameters and handle sync/async + predictions = endpoint.predict( + instances=instances, + deployed_model_id=self.status.endpoint.deployed_model_id.split( + "/" + )[-1], + timeout=30, # Add reasonable timeout + ) + + if not predictions: + raise RuntimeError("No predictions returned") + except Exception as e: + logger.error(f"Prediction failed: {e}") raise RuntimeError(f"Prediction failed: {str(e)}") + return [predictions] + def get_logs( self, follow: bool = False, tail: Optional[int] = None ) -> Generator[str, bool, None]: - """Retrieve the service logs. - - Args: - follow: if True, the logs will be streamed as they are written - tail: only retrieve the last NUM lines of log output. - - Returns: - A generator that can be accessed to get the service logs. - """ + """Retrieve the service logs.""" + # Note: Could be enhanced to actually fetch logs from Cloud Logging logger.info( "Vertex AI Endpoints provides access to the logs through " - "Cloud Logging. Please check the Google Cloud Console for detailed logs." + "Cloud Logging. Please check the Google Cloud Console for detailed logs. " + f"Location: {self.config.location}" ) yield "Logs are available in Google Cloud Console." @property def is_running(self) -> bool: - """Check if the service is running. - - Returns: - True if the service is in the ACTIVE state, False otherwise. - """ - state, _ = self.check_status() - return state == ServiceState.ACTIVE + """Check if the service is running.""" + self.update_status() + return self.status.state == ServiceState.ACTIVE diff --git a/src/zenml/model_deployers/base_model_deployer.py b/src/zenml/model_deployers/base_model_deployer.py index 40a65128f26..814e4f28175 100644 --- a/src/zenml/model_deployers/base_model_deployer.py +++ b/src/zenml/model_deployers/base_model_deployer.py @@ -32,6 +32,7 @@ from zenml.logger import get_logger from zenml.services import BaseService, ServiceConfig from zenml.services.service import BaseDeploymentService +from zenml.services.service_status import ServiceState from zenml.services.service_type import ServiceType from zenml.stack import StackComponent from zenml.stack.flavor import Flavor @@ -180,6 +181,12 @@ def deploy_model( logger.info( f"Existing model server found for {config.name or config.model_name} with the exact same configuration. Returning the existing service named {services[0].config.service_name}." ) + status, _ = services[0].check_status() + if status != ServiceState.ACTIVE: + logger.info( + f"Service found for {config.name or config.model_name} is not active. Starting the service." + ) + services[0].start(timeout=timeout) return services[0] else: # Find existing model server From 0a13214bbc5e094a820b18907d6edba508a4c48b Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 31 Oct 2024 13:44:54 +0100 Subject: [PATCH 11/43] Refactor model deployer configurations and add VertexAI model deployer --- .../component-guide/model-deployers/vertex.md | 179 ++++++++++++++++++ .../model-registries/vertex.md | 150 +++++++++++++++ docs/book/toc.md | 2 + 3 files changed, 331 insertions(+) create mode 100644 docs/book/component-guide/model-deployers/vertex.md create mode 100644 docs/book/component-guide/model-registries/vertex.md diff --git a/docs/book/component-guide/model-deployers/vertex.md b/docs/book/component-guide/model-deployers/vertex.md new file mode 100644 index 00000000000..812271da6e9 --- /dev/null +++ b/docs/book/component-guide/model-deployers/vertex.md @@ -0,0 +1,179 @@ +# Vertex AI Model Deployer + +[Vertex AI](https://cloud.google.com/vertex-ai) provides managed infrastructure for deploying machine learning models at scale. The Vertex AI Model Deployer in ZenML allows you to deploy models to Vertex AI endpoints, providing a scalable and managed solution for model serving. + +## When to use it? + +You should use the Vertex AI Model Deployer when: + +* You're already using Google Cloud Platform (GCP) and want to leverage its native ML infrastructure +* You need enterprise-grade model serving capabilities with autoscaling +* You want a fully managed solution for hosting ML models +* You need to handle high-throughput prediction requests +* You want to deploy models with GPU acceleration +* You need to monitor and track your model deployments + +This is particularly useful in the following scenarios: +* Deploying models to production with high availability requirements +* Serving models that need GPU acceleration +* Handling varying prediction workloads with autoscaling +* Integrating model serving with other GCP services + +{% hint style="warning" %} +The Vertex AI Model Deployer requires a Vertex AI Model Registry to be present in your stack. Make sure you have configured both components properly. +{% endhint %} + +## How to deploy it? + +The Vertex AI Model Deployer is provided by the GCP ZenML integration. First, install the integration: + +```shell +zenml integration install gcp -y +``` + +### Authentication and Service Connector Configuration + +The Vertex AI Model Deployer requires proper GCP authentication. The recommended way to configure this is using the ZenML Service Connector functionality: + +```shell +# Register the service connector with a service account key +zenml service-connector register vertex_deployer_connector \ + --type gcp \ + --auth-method=service-account \ + --project_id= \ + --service_account_json=@vertex-deployer-sa.json \ + --resource-type gcp-generic + +# Register the model deployer +zenml model-deployer register vertex_deployer \ + --flavor=vertex \ + --location=us-central1 + +# Connect the model deployer to the service connector +zenml model-deployer connect vertex_deployer --connector vertex_deployer_connector +``` + +{% hint style="info" %} +The service account needs the following permissions: +- `Vertex AI User` role for deploying models +- `Vertex AI Service Agent` role for managing model endpoints +{% endhint %} + +## How to use it + +### Deploy a model in a pipeline + +Here's an example of how to use the Vertex AI Model Deployer in a ZenML pipeline: + +```python +from typing_extensions import Annotated +from zenml import ArtifactConfig, get_step_context, step +from zenml.client import Client +from zenml.integrations.gcp.services.vertex_deployment import ( + VertexAIDeploymentConfig, + VertexDeploymentService, +) + +@step(enable_cache=False) +def model_deployer( + model_registry_uri: str, +) -> Annotated[ + VertexDeploymentService, + ArtifactConfig(name="vertex_deployment", is_deployment_artifact=True) +]: + """Model deployer step.""" + zenml_client = Client() + current_model = get_step_context().model + model_deployer = zenml_client.active_stack.model_deployer + + # Configure the deployment + vertex_deployment_config = VertexAIDeploymentConfig( + location="europe-west1", + name="zenml-vertex-quickstart", + model_name=current_model.name, + description="Vertex AI model deployment example", + model_id=model_registry_uri, + machine_type="n1-standard-4", # Optional: specify machine type + min_replica_count=1, # Optional: minimum number of replicas + max_replica_count=3, # Optional: maximum number of replicas + ) + + # Deploy the model + service = model_deployer.deploy_model( + config=vertex_deployment_config, + service_type=VertexDeploymentService.SERVICE_TYPE, + ) + + return service +``` + +### Configuration Options + +The Vertex AI Model Deployer accepts a rich set of configuration options through `VertexAIDeploymentConfig`: + +* Basic Configuration: + * `location`: GCP region for deployment (e.g., "us-central1") + * `name`: Name for the deployment endpoint + * `model_name`: Name of the model being deployed + * `model_id`: Model ID from the Vertex AI Model Registry + +* Infrastructure Configuration: + * `machine_type`: Type of machine to use (e.g., "n1-standard-4") + * `accelerator_type`: GPU accelerator type if needed + * `accelerator_count`: Number of GPUs per replica + * `min_replica_count`: Minimum number of serving replicas + * `max_replica_count`: Maximum number of serving replicas + +* Advanced Configuration: + * `service_account`: Custom service account for the deployment + * `network`: VPC network configuration + * `encryption_spec_key_name`: Customer-managed encryption key + * `enable_access_logging`: Enable detailed access logging + * `explanation_metadata`: Model explanation configuration + * `autoscaling_target_cpu_utilization`: Target CPU utilization for autoscaling + +### Running Predictions + +Once a model is deployed, you can run predictions using the service: + +```python +from zenml.integrations.gcp.model_deployers import VertexModelDeployer +from zenml.services import ServiceState + +# Get the deployed service +model_deployer = VertexModelDeployer.get_active_model_deployer() +services = model_deployer.find_model_server( + pipeline_name="deployment_pipeline", + pipeline_step_name="model_deployer", + model_name="my_model", +) + +if services: + service = services[0] + if service.is_running: + # Run prediction + prediction = service.predict( + instances=[{"feature1": 1.0, "feature2": 2.0}] + ) + print(f"Prediction: {prediction}") +``` + +### Limitations and Considerations + +1. **Stack Requirements**: + - Requires a Vertex AI Model Registry in the stack + - All stack components must be non-local + +2. **Authentication**: + - Requires proper GCP credentials with Vertex AI permissions + - Best practice is to use service connectors for authentication + +3. **Costs**: + - Vertex AI endpoints incur costs based on machine type and uptime + - Consider using autoscaling to optimize costs + +4. **Region Availability**: + - Service availability depends on Vertex AI regional availability + - Model and endpoint must be in the same region + +Check out the [SDK docs](https://sdkdocs.zenml.io) for more detailed information about the implementation. \ No newline at end of file diff --git a/docs/book/component-guide/model-registries/vertex.md b/docs/book/component-guide/model-registries/vertex.md new file mode 100644 index 00000000000..41a29ffb11d --- /dev/null +++ b/docs/book/component-guide/model-registries/vertex.md @@ -0,0 +1,150 @@ +# Vertex AI Model Registry + +[Vertex AI](https://cloud.google.com/vertex-ai) is Google Cloud's unified ML platform that helps you build, deploy, and scale ML models. The Vertex AI Model Registry is a centralized repository for managing your ML models throughout their lifecycle. ZenML's Vertex AI Model Registry integration allows you to register, version, and manage your models using Vertex AI's infrastructure. + +## When would you want to use it? + +You should consider using the Vertex AI Model Registry when: + +* You're already using Google Cloud Platform (GCP) and want to leverage its native ML infrastructure +* You need enterprise-grade model management capabilities with fine-grained access control +* You want to track model lineage and metadata in a centralized location +* You're building ML pipelines that need to integrate with other Vertex AI services +* You need to manage model deployment across different GCP environments + +This is particularly useful in the following scenarios: + +* Building production ML pipelines that need to integrate with GCP services +* Managing multiple versions of models across development and production environments +* Tracking model artifacts and metadata in a centralized location +* Deploying models to Vertex AI endpoints for serving + +{% hint style="warning" %} +Important: The Vertex AI Model Registry implementation only supports the model version interface, not the model interface. This means you cannot register, delete, or update models directly - you can only work with model versions. Operations like `register_model()`, `delete_model()`, and `update_model()` are not supported. +{% endhint %} + +## How do you deploy it? + +The Vertex AI Model Registry flavor is provided by the GCP ZenML integration. First, install the integration: + +```shell +zenml integration install gcp -y +``` + +### Authentication and Service Connector Configuration + +The Vertex AI Model Registry requires proper GCP authentication. The recommended way to configure this is using the ZenML Service Connector functionality. You have several options for authentication: + +1. Using a GCP Service Connector with a dedicated service account (Recommended): +```shell +# Register the service connector with a service account key +zenml service-connector register vertex_registry_connector \ + --type gcp \ + --auth-method=service-account \ + --project_id= \ + --service_account_json=@vertex-registry-sa.json \ + --resource-type gcp-generic + +# Register the model registry +zenml model-registry register vertex_registry \ + --flavor=vertex \ + --location=us-central1 + +# Connect the model registry to the service connector +zenml model-registry connect vertex_registry --connector vertex_registry_connector +``` + +2. Using local gcloud credentials: +```shell +# Register the model registry using local gcloud auth +zenml model-registry register vertex_registry \ + --flavor=vertex \ + --location=us-central1 +``` + +{% hint style="info" %} +The service account used needs the following permissions: +- `Vertex AI User` role for creating and managing model versions +- `Storage Object Viewer` role if accessing models stored in Google Cloud Storage +{% endhint %} + +## How do you use it? + +### Register models inside a pipeline + +Here's an example of how to use the Vertex AI Model Registry in your ZenML pipeline using the provided model registration step: + +```python +from typing_extensions import Annotated +from zenml import ArtifactConfig, get_step_context, step +from zenml.client import Client +from zenml.logger import get_logger + +logger = get_logger(__name__) + +@step(enable_cache=False) +def model_register() -> Annotated[str, ArtifactConfig(name="model_registry_uri")]: + """Model registration step.""" + # Get the current model from the context + current_model = get_step_context().model + + client = Client() + model_registry = client.active_stack.model_registry + model_version = model_registry.register_model_version( + name=current_model.name, + version=str(current_model.version), + model_source_uri=current_model.get_model_artifact("sklearn_classifier").uri, + description="ZenML model registered after promotion", + ) + logger.info( + f"Model version {model_version.version} registered in Model Registry" + ) + + return model_version.model_source_uri +``` + +### Configuration Options + +The Vertex AI Model Registry accepts the following configuration options: + +* `location`: The GCP region where the model registry will be created (e.g., "us-central1") +* `project_id`: (Optional) The GCP project ID. If not specified, will use the default project +* `credentials`: (Optional) GCP credentials configuration + +### Working with Model Versions + +Since the Vertex AI Model Registry only supports version-level operations, here's how to work with model versions: + +```shell +# List all model versions +zenml model-registry models list-versions + +# Get details of a specific model version +zenml model-registry models get-version -v + +# Delete a model version +zenml model-registry models delete-version -v +``` + +### Key Differences from MLflow Model Registry + +Unlike the MLflow Model Registry, the Vertex AI implementation has some important differences: + +1. **Version-Only Interface**: Vertex AI only supports model version operations. You cannot register, delete, or update models directly - only their versions. +2. **Authentication**: Uses GCP service connectors for authentication, similar to other Vertex AI services in ZenML. +3. **Staging Levels**: Vertex AI doesn't have built-in staging levels (like Production, Staging, etc.) - these are handled through metadata. +4. **Default Container Images**: Vertex AI requires a serving container image URI, which defaults to the scikit-learn prediction container if not specified. +5. **Managed Service**: As a fully managed service, you don't need to worry about infrastructure management, but you need valid GCP credentials. + +### Limitations + +Based on the implementation, there are some limitations to be aware of: + +1. The `register_model()`, `update_model()`, and `delete_model()` methods are not implemented as Vertex AI only supports registering model versions +2. Model stage transitions (Production, Staging, etc.) are not natively supported +3. Models must have a serving container image URI specified or will use the default scikit-learn image +4. All registered models are automatically labeled with `managed_by="zenml"` for tracking purposes + +Check out the [SDK docs](https://sdkdocs.zenml.io/latest/integration\_code\_docs/integrations-gcp/#zenml.integrations.gcp.model\_registry) to see more about the interface and implementation. + +
ZenML Scarf
\ No newline at end of file diff --git a/docs/book/toc.md b/docs/book/toc.md index 28b24626f03..e42a1b1516c 100644 --- a/docs/book/toc.md +++ b/docs/book/toc.md @@ -260,6 +260,7 @@ * [Develop a custom experiment tracker](component-guide/experiment-trackers/custom.md) * [Model Deployers](component-guide/model-deployers/model-deployers.md) * [MLflow](component-guide/model-deployers/mlflow.md) + * [VertexAI](component-guide/model-deployers/vertex.md) * [Seldon](component-guide/model-deployers/seldon.md) * [BentoML](component-guide/model-deployers/bentoml.md) * [Hugging Face](component-guide/model-deployers/huggingface.md) @@ -289,6 +290,7 @@ * [Develop a Custom Annotator](component-guide/annotators/custom.md) * [Model Registries](component-guide/model-registries/model-registries.md) * [MLflow Model Registry](component-guide/model-registries/mlflow.md) + * [VertexAI](component-guide/model-registries/vertex.md) * [Develop a Custom Model Registry](component-guide/model-registries/custom.md) * [Feature Stores](component-guide/feature-stores/feature-stores.md) * [Feast](component-guide/feature-stores/feast.md) From 53da68dd6fcb41099db959a98139cb4ef7ceb15d Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 31 Oct 2024 13:49:36 +0100 Subject: [PATCH 12/43] Refactor model deployer configurations and add VertexAI model deployer --- .../promotion/promote_with_metric_compare.py | 14 ------- examples/e2e/steps/training/model_trainer.py | 40 ++++++++++--------- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/examples/e2e/steps/promotion/promote_with_metric_compare.py b/examples/e2e/steps/promotion/promote_with_metric_compare.py index 7affe356f44..038d219d32d 100644 --- a/examples/e2e/steps/promotion/promote_with_metric_compare.py +++ b/examples/e2e/steps/promotion/promote_with_metric_compare.py @@ -18,7 +18,6 @@ from utils import promote_in_model_registry from zenml import Model, get_step_context, step -from zenml.client import Client from zenml.logger import get_logger logger = get_logger(__name__) @@ -30,7 +29,6 @@ def promote_with_metric_compare( current_metric: float, mlflow_model_name: str, target_env: str, - uri: str, ) -> None: """Try to promote trained model. @@ -59,18 +57,6 @@ def promote_with_metric_compare( ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### should_promote = True - model_registry = Client().active_stack.model_registry - - # Register model version - model_version = model_registry.register_model_version( - name=mlflow_model_name, - version="1", - model_source_uri=uri, - description="test_register_model_version", - ) - - breakpoint() - # Get model version numbers from Model Control Plane latest_version = get_step_context().model current_version = Model(name=latest_version.name, version=target_env) diff --git a/examples/e2e/steps/training/model_trainer.py b/examples/e2e/steps/training/model_trainer.py index 43e8c3f4402..87a695f5695 100644 --- a/examples/e2e/steps/training/model_trainer.py +++ b/examples/e2e/steps/training/model_trainer.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from typing import Tuple import mlflow import pandas as pd @@ -26,10 +25,9 @@ from zenml.integrations.mlflow.experiment_trackers import ( MLFlowExperimentTracker, ) - -# from zenml.integrations.mlflow.steps.mlflow_registry import ( -# mlflow_register_model_step, -# ) +from zenml.integrations.mlflow.steps.mlflow_registry import ( + mlflow_register_model_step, +) from zenml.logger import get_logger logger = get_logger(__name__) @@ -51,11 +49,8 @@ def model_trainer( model: ClassifierMixin, target: str, name: str, -) -> Tuple[ - Annotated[ - ClassifierMixin, ArtifactConfig(name="model", is_model_artifact=True) - ], - Annotated[str, "uri"], +) -> Annotated[ + ClassifierMixin, ArtifactConfig(name="model", is_model_artifact=True) ]: """Configure and train a model on the training dataset. @@ -87,9 +82,6 @@ def model_trainer( Returns: The trained model artifact. """ - step_context = get_step_context() - # Get the URI where the output will be saved. - uri = step_context.get_output_artifact_uri(output_name="model") ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### # Initialize the model with the hyperparameters indicated in the step @@ -102,9 +94,19 @@ def model_trainer( ) # register mlflow model - # mlflow_register_model_step.entrypoint( - # model, - # name=name, - # ) - - return model, uri + mlflow_register_model_step.entrypoint( + model, + name=name, + ) + # keep track of mlflow version for future use + model_registry = Client().active_stack.model_registry + if model_registry: + version = model_registry.get_latest_model_version( + name=name, stage=None + ) + if version: + model_ = get_step_context().model + model_.log_metadata({"model_registry_version": version.version}) + ### YOUR CODE ENDS HERE ### + + return model From ce2019d32bed9c5c6dd70fab79397c61ea595765 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 7 Nov 2024 14:45:21 +0100 Subject: [PATCH 13/43] Rename VertexAI model registry classes and update documentation for consistency --- .../component-guide/model-deployers/vertex.md | 6 +-- src/zenml/integrations/gcp/__init__.py | 4 +- .../integrations/gcp/flavors/__init__.py | 4 +- .../flavors/vertex_model_deployer_flavor.py | 3 +- .../flavors/vertex_model_registry_flavor.py | 2 +- .../model_deployers/vertex_model_deployer.py | 17 ++------ .../gcp/model_registries/__init__.py | 2 +- .../model_registries/vertex_model_registry.py | 1 - .../integrations/gcp/services/__init__.py | 6 +-- .../gcp/services/vertex_deployment.py | 43 ++++++++----------- .../model_registries/base_model_registry.py | 11 ++++- 11 files changed, 46 insertions(+), 53 deletions(-) diff --git a/docs/book/component-guide/model-deployers/vertex.md b/docs/book/component-guide/model-deployers/vertex.md index 812271da6e9..98453df6744 100644 --- a/docs/book/component-guide/model-deployers/vertex.md +++ b/docs/book/component-guide/model-deployers/vertex.md @@ -70,7 +70,7 @@ from typing_extensions import Annotated from zenml import ArtifactConfig, get_step_context, step from zenml.client import Client from zenml.integrations.gcp.services.vertex_deployment import ( - VertexAIDeploymentConfig, + VertexDeploymentConfig, VertexDeploymentService, ) @@ -87,7 +87,7 @@ def model_deployer( model_deployer = zenml_client.active_stack.model_deployer # Configure the deployment - vertex_deployment_config = VertexAIDeploymentConfig( + vertex_deployment_config = VertexDeploymentConfig( location="europe-west1", name="zenml-vertex-quickstart", model_name=current_model.name, @@ -109,7 +109,7 @@ def model_deployer( ### Configuration Options -The Vertex AI Model Deployer accepts a rich set of configuration options through `VertexAIDeploymentConfig`: +The Vertex AI Model Deployer accepts a rich set of configuration options through `VertexDeploymentConfig`: * Basic Configuration: * `location`: GCP region for deployment (e.g., "us-central1") diff --git a/src/zenml/integrations/gcp/__init__.py b/src/zenml/integrations/gcp/__init__.py index 3c9de9a9348..e25d9441427 100644 --- a/src/zenml/integrations/gcp/__init__.py +++ b/src/zenml/integrations/gcp/__init__.py @@ -78,7 +78,7 @@ def flavors(cls) -> List[Type[Flavor]]: VertexOrchestratorFlavor, VertexStepOperatorFlavor, VertexModelDeployerFlavor, - VertexAIModelRegistryFlavor, + VertexModelRegistryFlavor, ) return [ @@ -86,7 +86,7 @@ def flavors(cls) -> List[Type[Flavor]]: GCPImageBuilderFlavor, VertexOrchestratorFlavor, VertexStepOperatorFlavor, - VertexAIModelRegistryFlavor, + VertexModelRegistryFlavor, VertexModelDeployerFlavor, ] diff --git a/src/zenml/integrations/gcp/flavors/__init__.py b/src/zenml/integrations/gcp/flavors/__init__.py index cecf637cefd..25067703d55 100644 --- a/src/zenml/integrations/gcp/flavors/__init__.py +++ b/src/zenml/integrations/gcp/flavors/__init__.py @@ -35,7 +35,7 @@ ) from zenml.integrations.gcp.flavors.vertex_model_registry_flavor import ( VertexAIModelRegistryConfig, - VertexAIModelRegistryFlavor, + VertexModelRegistryFlavor, ) __all__ = [ @@ -49,6 +49,6 @@ "VertexStepOperatorConfig", "VertexModelDeployerFlavor", "VertexModelDeployerConfig", - "VertexAIModelRegistryFlavor", + "VertexModelRegistryFlavor", "VertexAIModelRegistryConfig", ] diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py index 5c8041c94e5..1b526cf0f2f 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py @@ -73,6 +73,7 @@ class VertexBaseConfig(BaseModel): explanation_metadata: Optional[Dict[str, str]] = None explanation_parameters: Optional[Dict[str, str]] = None existing_endpoint: Optional[str] = None + labels: Optional[Dict[str, str]] = None class VertexModelDeployerConfig( @@ -82,7 +83,7 @@ class VertexModelDeployerConfig( class VertexModelDeployerFlavor(BaseModelDeployerFlavor): - """Vertex AI Endpoint model deployer flavor.""" + """Vertex AI model deployer flavor.""" @property def name(self) -> str: diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py index 22adc0f6a5d..e16cf548685 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py @@ -49,7 +49,7 @@ class VertexAIModelRegistryConfig( """Configuration for the VertexAI model registry.""" -class VertexAIModelRegistryFlavor(BaseModelRegistryFlavor): +class VertexModelRegistryFlavor(BaseModelRegistryFlavor): """Model registry flavor for VertexAI models.""" @property diff --git a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py index 82ab4aca520..b152839356b 100644 --- a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py +++ b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py @@ -36,7 +36,7 @@ VertexAIModelRegistry, ) from zenml.integrations.gcp.services.vertex_deployment import ( - VertexAIDeploymentConfig, + VertexDeploymentConfig, VertexDeploymentService, ) from zenml.logger import get_logger @@ -97,11 +97,8 @@ def _validate_stack_requirements(stack: "Stack") -> Tuple[bool, str]: Returns: A tuple of (is_valid, error_message). """ - # Validate that the container registry is not local. model_registry = stack.model_registry - if not model_registry and isinstance( - model_registry, VertexAIModelRegistry - ): + if not isinstance(model_registry, VertexAIModelRegistry): return False, ( "The Vertex AI model deployer requires a Vertex AI model " "registry to be present in the stack. Please add a Vertex AI " @@ -110,10 +107,6 @@ def _validate_stack_requirements(stack: "Stack") -> Tuple[bool, str]: # Validate that the rest of the components are not local. for stack_comp in stack.components.values(): - # For Forward compatibility a list of components is returned, - # but only the first item is relevant for now - # TODO: [server] make sure the ComponentModel actually has - # a local_path property or implement similar check local_path = stack_comp.local_path if not local_path: continue @@ -138,7 +131,7 @@ def _validate_stack_requirements(stack: "Stack") -> Tuple[bool, str]: ) def _create_deployment_service( - self, id: UUID, timeout: int, config: VertexAIDeploymentConfig + self, id: UUID, timeout: int, config: VertexDeploymentConfig ) -> VertexDeploymentService: """Creates a new VertexAIDeploymentService. @@ -178,14 +171,13 @@ def perform_deploy_model( The ZenML Vertex AI deployment service object. """ with track_handler(AnalyticsEvent.MODEL_DEPLOYED) as analytics_handler: - config = cast(VertexAIDeploymentConfig, config) + config = cast(VertexDeploymentConfig, config) service = self._create_deployment_service( id=id, config=config, timeout=timeout ) logger.info( f"Creating a new Vertex AI deployment service: {service}" ) - service.start(timeout=timeout) client = Client() stack = client.active_stack @@ -250,7 +242,6 @@ def perform_delete_model( """ service = cast(VertexDeploymentService, service) service.stop(timeout=timeout, force=force) - service.stop() @staticmethod def get_model_server_info( # type: ignore[override] diff --git a/src/zenml/integrations/gcp/model_registries/__init__.py b/src/zenml/integrations/gcp/model_registries/__init__.py index 38622ef0da3..672c7c19619 100644 --- a/src/zenml/integrations/gcp/model_registries/__init__.py +++ b/src/zenml/integrations/gcp/model_registries/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing # permissions and limitations under the License. -"""Initialization of the Vertex AI model deployers.""" +"""Initialization of the Vertex AI model registry.""" from zenml.integrations.gcp.model_registries.vertex_model_registry import ( VertexAIModelRegistry diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 66b7465bbaa..fcc57001867 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -147,7 +147,6 @@ def register_model_version( or "europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest", ) is_default_version = metadata_dict.get("is_default_version", False) - metadata_dict["managed_by"] = "zenml" try: version_info = aiplatform.Model.upload( artifact_uri=model_source_uri, diff --git a/src/zenml/integrations/gcp/services/__init__.py b/src/zenml/integrations/gcp/services/__init__.py index be8c6508a37..392a48e9694 100644 --- a/src/zenml/integrations/gcp/services/__init__.py +++ b/src/zenml/integrations/gcp/services/__init__.py @@ -11,11 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing # permissions and limitations under the License. -"""Initialization of the MLflow Service.""" +"""Initialization of the Vertex Service.""" from zenml.integrations.gcp.services.vertex_deployment import ( # noqa - VertexAIDeploymentConfig, + VertexDeploymentConfig, VertexDeploymentService, ) -__all__ = ["VertexAIDeploymentConfig", "VertexDeploymentService"] \ No newline at end of file +__all__ = ["VertexDeploymentConfig", "VertexDeploymentService"] \ No newline at end of file diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 2353f0d8d47..32e8a9722f7 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -1,3 +1,16 @@ +# Copyright (c) ZenML GmbH 2023. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. """Implementation of the Vertex AI Deployment service.""" import re @@ -24,24 +37,6 @@ UUID_SLICE_LENGTH: int = 8 -def sanitize_labels(labels: Dict[str, str]) -> None: - """Update the label values to be valid Kubernetes labels. - - See: - https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set - - Args: - labels: the labels to sanitize. - """ - for key, value in labels.items(): - # Kubernetes labels must be alphanumeric, no longer than - # 63 characters, and must begin and end with an alphanumeric - # character ([a-z0-9A-Z]) - labels[key] = re.sub(r"[^0-9a-zA-Z-_\.]+", "_", value)[:63].strip( - "-_." - ) - - def sanitize_vertex_label(value: str) -> str: """Sanitize a label value to comply with Vertex AI requirements. @@ -62,15 +57,13 @@ def sanitize_vertex_label(value: str) -> str: return value[:63] -class VertexAIDeploymentConfig(VertexBaseConfig, ServiceConfig): +class VertexDeploymentConfig(VertexBaseConfig, ServiceConfig): """Vertex AI service configurations.""" def get_vertex_deployment_labels(self) -> Dict[str, str]: """Generate labels for the VertexAI deployment from the service configuration.""" - labels = { - "managed-by": "zenml", # Changed from managed_by to managed-by - } - + labels = self.labels or {} + labels["managed_by"] = "zenml" if self.pipeline_name: labels["pipeline-name"] = sanitize_vertex_label(self.pipeline_name) if self.pipeline_step_name: @@ -109,12 +102,12 @@ class VertexDeploymentService(BaseDeploymentService): flavor="vertex", description="Vertex AI inference endpoint prediction service", ) - config: VertexAIDeploymentConfig + config: VertexDeploymentConfig status: VertexServiceStatus = Field( default_factory=lambda: VertexServiceStatus() ) - def __init__(self, config: VertexAIDeploymentConfig, **attrs: Any): + def __init__(self, config: VertexDeploymentConfig, **attrs: Any): """Initialize the Vertex AI deployment service.""" super().__init__(config=config, **attrs) diff --git a/src/zenml/model_registries/base_model_registry.py b/src/zenml/model_registries/base_model_registry.py index 727632eaa15..ffab13f974a 100644 --- a/src/zenml/model_registries/base_model_registry.py +++ b/src/zenml/model_registries/base_model_registry.py @@ -63,7 +63,7 @@ class ModelRegistryModelMetadata(BaseModel): model and its development process. """ - managed_by: str = "ZenML" + _managed_by: str = "zenml" zenml_version: str = __version__ zenml_run_name: Optional[str] = None zenml_pipeline_name: Optional[str] = None @@ -72,6 +72,15 @@ class ModelRegistryModelMetadata(BaseModel): zenml_step_name: Optional[str] = None zenml_workspace: Optional[str] = None + @property + def managed_by(self) -> str: + """Returns the managed_by attribute. + + Returns: + The managed_by attribute. + """ + return self._managed_by + @property def custom_attributes(self) -> Dict[str, str]: """Returns a dictionary of custom attributes. From 14f299837d25b3af60fbc5a305c987bc97050e69 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 7 Nov 2024 13:47:34 +0000 Subject: [PATCH 14/43] Auto-update of LLM Finetuning template --- examples/llm_finetuning/.copier-answers.yml | 2 +- examples/llm_finetuning/steps/log_metadata.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/llm_finetuning/.copier-answers.yml b/examples/llm_finetuning/.copier-answers.yml index 09cb600a5fa..386863f54e8 100644 --- a/examples/llm_finetuning/.copier-answers.yml +++ b/examples/llm_finetuning/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.09.24 +_commit: 2024.08.29-1-g7af7693 _src_path: gh:zenml-io/template-llm-finetuning bf16: true cuda_version: cuda11.8 diff --git a/examples/llm_finetuning/steps/log_metadata.py b/examples/llm_finetuning/steps/log_metadata.py index 14371b78b6e..645f98cc8ea 100644 --- a/examples/llm_finetuning/steps/log_metadata.py +++ b/examples/llm_finetuning/steps/log_metadata.py @@ -34,7 +34,7 @@ def log_metadata_from_step_artifact( context = get_step_context() metadata_dict: Dict[str, Any] = ( - context.pipeline_run.steps[step_name].outputs[artifact_name].load() + context.pipeline_run.steps[step_name].outputs[artifact_name][0].load() ) metadata = {artifact_name: metadata_dict} From 0b30a61ddbb2b5a0c703cfc1a7f40e8a12a7bb03 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 7 Nov 2024 13:47:55 +0000 Subject: [PATCH 15/43] Auto-update of Starter template --- examples/mlops_starter/.copier-answers.yml | 2 +- examples/mlops_starter/quickstart.ipynb | 4 ++-- examples/mlops_starter/run.py | 4 ++-- examples/mlops_starter/steps/model_promoter.py | 8 +++----- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/examples/mlops_starter/.copier-answers.yml b/examples/mlops_starter/.copier-answers.yml index 8b1fb8187ed..e17f27ee551 100644 --- a/examples/mlops_starter/.copier-answers.yml +++ b/examples/mlops_starter/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.09.24 +_commit: 2024.10.21 _src_path: gh:zenml-io/template-starter email: info@zenml.io full_name: ZenML GmbH diff --git a/examples/mlops_starter/quickstart.ipynb b/examples/mlops_starter/quickstart.ipynb index df8c010b5ea..6fba7a0e8cc 100644 --- a/examples/mlops_starter/quickstart.ipynb +++ b/examples/mlops_starter/quickstart.ipynb @@ -994,8 +994,8 @@ "@pipeline\n", "def inference(preprocess_pipeline_id: UUID):\n", " \"\"\"Model batch inference pipeline\"\"\"\n", - " # random_state = client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id).metadata[\"random_state\"].value\n", - " # target = client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id).run_metadata['target'].value\n", + " # random_state = client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id).metadata[\"random_state\"]\n", + " # target = client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id).run_metadata['target']\n", " random_state = 42\n", " target = \"target\"\n", "\n", diff --git a/examples/mlops_starter/run.py b/examples/mlops_starter/run.py index d7b1a7f11b2..16a352588d6 100644 --- a/examples/mlops_starter/run.py +++ b/examples/mlops_starter/run.py @@ -239,8 +239,8 @@ def main( # to get the random state and target column random_state = preprocess_pipeline_artifact.run_metadata[ "random_state" - ].value - target = preprocess_pipeline_artifact.run_metadata["target"].value + ] + target = preprocess_pipeline_artifact.run_metadata["target"] run_args_inference["random_state"] = random_state run_args_inference["target"] = target diff --git a/examples/mlops_starter/steps/model_promoter.py b/examples/mlops_starter/steps/model_promoter.py index 52040638496..43d43ceac1f 100644 --- a/examples/mlops_starter/steps/model_promoter.py +++ b/examples/mlops_starter/steps/model_promoter.py @@ -58,11 +58,9 @@ def model_promoter(accuracy: float, stage: str = "production") -> bool: try: stage_model = client.get_model_version(current_model.name, stage) # We compare their metrics - prod_accuracy = ( - stage_model.get_artifact("sklearn_classifier") - .run_metadata["test_accuracy"] - .value - ) + prod_accuracy = stage_model.get_artifact( + "sklearn_classifier" + ).run_metadata["test_accuracy"] if float(accuracy) > float(prod_accuracy): # If current model has better metrics, we promote it is_promoted = True From 83dfe315977eab7a895f9afddc6f6360e7f83641 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 7 Nov 2024 13:54:11 +0000 Subject: [PATCH 16/43] Auto-update of E2E template --- examples/e2e/.copier-answers.yml | 2 +- examples/e2e/steps/deployment/deployment_deploy.py | 2 +- examples/e2e/steps/hp_tuning/hp_tuning_select_best_model.py | 2 +- examples/e2e/steps/promotion/promote_with_metric_compare.py | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/e2e/.copier-answers.yml b/examples/e2e/.copier-answers.yml index 74cc33d8594..b008b2c1e99 100644 --- a/examples/e2e/.copier-answers.yml +++ b/examples/e2e/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.10.10 +_commit: 2024.10.21 _src_path: gh:zenml-io/template-e2e-batch data_quality_checks: true email: info@zenml.io diff --git a/examples/e2e/steps/deployment/deployment_deploy.py b/examples/e2e/steps/deployment/deployment_deploy.py index 3fb0d879f3f..dad351e45be 100644 --- a/examples/e2e/steps/deployment/deployment_deploy.py +++ b/examples/e2e/steps/deployment/deployment_deploy.py @@ -67,7 +67,7 @@ def deployment_deploy() -> ( registry_model_name=model.name, registry_model_version=model.run_metadata[ "model_registry_version" - ].value, + ], replace_existing=True, ) else: diff --git a/examples/e2e/steps/hp_tuning/hp_tuning_select_best_model.py b/examples/e2e/steps/hp_tuning/hp_tuning_select_best_model.py index 7d5a6bc33ea..65e524ecd98 100644 --- a/examples/e2e/steps/hp_tuning/hp_tuning_select_best_model.py +++ b/examples/e2e/steps/hp_tuning/hp_tuning_select_best_model.py @@ -50,7 +50,7 @@ def hp_tuning_select_best_model( hp_output = model.get_data_artifact("hp_result") model_: ClassifierMixin = hp_output.load() # fetch metadata we attached earlier - metric = float(hp_output.run_metadata["metric"].value) + metric = float(hp_output.run_metadata["metric"]) if best_model is None or best_metric < metric: best_model = model_ ### YOUR CODE ENDS HERE ### diff --git a/examples/e2e/steps/promotion/promote_with_metric_compare.py b/examples/e2e/steps/promotion/promote_with_metric_compare.py index 038d219d32d..6bc580f47ba 100644 --- a/examples/e2e/steps/promotion/promote_with_metric_compare.py +++ b/examples/e2e/steps/promotion/promote_with_metric_compare.py @@ -92,14 +92,14 @@ def promote_with_metric_compare( # Promote in Model Registry latest_version_model_registry_number = latest_version.run_metadata[ "model_registry_version" - ].value + ] if current_version_number is None: current_version_model_registry_number = ( latest_version_model_registry_number ) else: current_version_model_registry_number = ( - current_version.run_metadata["model_registry_version"].value + current_version.run_metadata["model_registry_version"] ) promote_in_model_registry( latest_version=latest_version_model_registry_number, @@ -111,7 +111,7 @@ def promote_with_metric_compare( else: promoted_version = current_version.run_metadata[ "model_registry_version" - ].value + ] logger.info( f"Current model version in `{target_env}` is `{promoted_version}` registered in Model Registry" From 7888717a96b3c5cec491872e0e1595ba658be61f Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 7 Nov 2024 13:57:27 +0000 Subject: [PATCH 17/43] Auto-update of NLP template --- examples/e2e_nlp/.copier-answers.yml | 2 +- examples/e2e_nlp/gradio/requirements.txt | 2 +- .../e2e_nlp/steps/promotion/promote_get_metrics.py | 12 ++++-------- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/examples/e2e_nlp/.copier-answers.yml b/examples/e2e_nlp/.copier-answers.yml index 3ca2ba198fe..e509aae2760 100644 --- a/examples/e2e_nlp/.copier-answers.yml +++ b/examples/e2e_nlp/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.09.23 +_commit: 2024.10.21 _src_path: gh:zenml-io/template-nlp accelerator: cpu cloud_of_choice: aws diff --git a/examples/e2e_nlp/gradio/requirements.txt b/examples/e2e_nlp/gradio/requirements.txt index 1bddfdfb85b..b53f1df9e62 100644 --- a/examples/e2e_nlp/gradio/requirements.txt +++ b/examples/e2e_nlp/gradio/requirements.txt @@ -9,4 +9,4 @@ pandas==1.5.3 session_info==1.0.0 scikit-learn==1.5.0 transformers==4.28.1 -IPython==7.34.0 \ No newline at end of file +IPython==8.10.0 \ No newline at end of file diff --git a/examples/e2e_nlp/steps/promotion/promote_get_metrics.py b/examples/e2e_nlp/steps/promotion/promote_get_metrics.py index 7f2951a5865..b24ac42245c 100644 --- a/examples/e2e_nlp/steps/promotion/promote_get_metrics.py +++ b/examples/e2e_nlp/steps/promotion/promote_get_metrics.py @@ -56,9 +56,7 @@ def promote_get_metrics() -> ( # Get current model version metric in current run model = get_step_context().model - current_metrics = ( - model.get_model_artifact("model").run_metadata["metrics"].value - ) + current_metrics = model.get_model_artifact("model").run_metadata["metrics"] logger.info(f"Current model version metrics are {current_metrics}") # Get latest saved model version metric in target environment @@ -72,11 +70,9 @@ def promote_get_metrics() -> ( except KeyError: latest_version = None if latest_version: - latest_metrics = ( - latest_version.get_model_artifact("model") - .run_metadata["metrics"] - .value - ) + latest_metrics = latest_version.get_model_artifact( + "model" + ).run_metadata["metrics"] logger.info(f"Latest model version metrics are {latest_metrics}") else: logger.info("No currently promoted model version found.") From 70cc4a97c62b296b816da04a36b497ca374f69ea Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 7 Nov 2024 15:07:24 +0000 Subject: [PATCH 18/43] Auto-update of LLM Finetuning template --- examples/llm_finetuning/.copier-answers.yml | 2 +- examples/llm_finetuning/steps/log_metadata.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/llm_finetuning/.copier-answers.yml b/examples/llm_finetuning/.copier-answers.yml index dd85e236760..4004897928b 100644 --- a/examples/llm_finetuning/.copier-answers.yml +++ b/examples/llm_finetuning/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.09.24-1-g378145b +_commit: 2024.10.30 _src_path: gh:zenml-io/template-llm-finetuning bf16: true cuda_version: cuda11.8 diff --git a/examples/llm_finetuning/steps/log_metadata.py b/examples/llm_finetuning/steps/log_metadata.py index 645f98cc8ea..14371b78b6e 100644 --- a/examples/llm_finetuning/steps/log_metadata.py +++ b/examples/llm_finetuning/steps/log_metadata.py @@ -34,7 +34,7 @@ def log_metadata_from_step_artifact( context = get_step_context() metadata_dict: Dict[str, Any] = ( - context.pipeline_run.steps[step_name].outputs[artifact_name][0].load() + context.pipeline_run.steps[step_name].outputs[artifact_name].load() ) metadata = {artifact_name: metadata_dict} From fcdec6eab8913c24f6c93f94c696b2ea7995f75b Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 7 Nov 2024 15:07:44 +0000 Subject: [PATCH 19/43] Auto-update of Starter template --- examples/mlops_starter/.copier-answers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mlops_starter/.copier-answers.yml b/examples/mlops_starter/.copier-answers.yml index 21ba51bc459..fd6b937c7c9 100644 --- a/examples/mlops_starter/.copier-answers.yml +++ b/examples/mlops_starter/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.09.24-3-g2c1a682 +_commit: 2024.10.30 _src_path: gh:zenml-io/template-starter email: info@zenml.io full_name: ZenML GmbH From 0108c0f2b96b9ff333ad7380ac94a5a02373622a Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 7 Nov 2024 15:14:35 +0000 Subject: [PATCH 20/43] Auto-update of E2E template --- examples/e2e/.copier-answers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/e2e/.copier-answers.yml b/examples/e2e/.copier-answers.yml index 04c970cb9a4..cd687be59df 100644 --- a/examples/e2e/.copier-answers.yml +++ b/examples/e2e/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.10.10-5-g6edd133 +_commit: 2024.10.30 _src_path: gh:zenml-io/template-e2e-batch data_quality_checks: true email: info@zenml.io From 0c33f82ed1295c0838676ecb3f023fe294c83058 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 7 Nov 2024 15:17:27 +0000 Subject: [PATCH 21/43] Auto-update of NLP template --- examples/e2e_nlp/.copier-answers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/e2e_nlp/.copier-answers.yml b/examples/e2e_nlp/.copier-answers.yml index a78ae8bac68..e13858e7da1 100644 --- a/examples/e2e_nlp/.copier-answers.yml +++ b/examples/e2e_nlp/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.10.21-3-ge37d83a +_commit: 2024.10.30 _src_path: gh:zenml-io/template-nlp accelerator: cpu cloud_of_choice: aws From 012cd6e1f6ed8c314e7113bc760d57639550c13d Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Tue, 12 Nov 2024 15:32:58 +0100 Subject: [PATCH 22/43] Update default filenames and improve backward compatibility for sklearn materializer --- .../component-guide/model-deployers/vertex.md | 2 +- .../model-registries/vertex.md | 12 +- src/zenml/cli/model_registry.py | 3 +- .../model_deployers/vertex_model_deployer.py | 10 +- .../gcp/services/vertex_deployment.py | 103 +++++++++++++++--- .../materializers/sklearn_materializer.py | 84 +++++++++++--- .../materializers/cloudpickle_materializer.py | 2 +- .../model_registries/base_model_registry.py | 3 +- src/zenml/services/service.py | 3 + 9 files changed, 176 insertions(+), 46 deletions(-) diff --git a/docs/book/component-guide/model-deployers/vertex.md b/docs/book/component-guide/model-deployers/vertex.md index 98453df6744..727f2d763fe 100644 --- a/docs/book/component-guide/model-deployers/vertex.md +++ b/docs/book/component-guide/model-deployers/vertex.md @@ -76,7 +76,6 @@ from zenml.integrations.gcp.services.vertex_deployment import ( @step(enable_cache=False) def model_deployer( - model_registry_uri: str, ) -> Annotated[ VertexDeploymentService, ArtifactConfig(name="vertex_deployment", is_deployment_artifact=True) @@ -84,6 +83,7 @@ def model_deployer( """Model deployer step.""" zenml_client = Client() current_model = get_step_context().model + model_registry_uri = current_model.get_model_artifact("THE_MODEL_ARTIFACT_NAME_GIVEN_IN_TRAINING_STEP").uri model_deployer = zenml_client.active_stack.model_deployer # Configure the deployment diff --git a/docs/book/component-guide/model-registries/vertex.md b/docs/book/component-guide/model-registries/vertex.md index 41a29ffb11d..eef9096ce62 100644 --- a/docs/book/component-guide/model-registries/vertex.md +++ b/docs/book/component-guide/model-registries/vertex.md @@ -21,6 +21,12 @@ This is particularly useful in the following scenarios: {% hint style="warning" %} Important: The Vertex AI Model Registry implementation only supports the model version interface, not the model interface. This means you cannot register, delete, or update models directly - you can only work with model versions. Operations like `register_model()`, `delete_model()`, and `update_model()` are not supported. + +Unlike platforms like MLflow where you first create a model container and then add versions to it, Vertex AI combines model registration and versioning into a single operation: + +- When you upload a model, it automatically creates both the model and its first version +- Each subsequent upload with the same display name creates a new version +- You cannot create an empty model container without a version {% endhint %} ## How do you deploy it? @@ -141,9 +147,9 @@ Unlike the MLflow Model Registry, the Vertex AI implementation has some importan Based on the implementation, there are some limitations to be aware of: 1. The `register_model()`, `update_model()`, and `delete_model()` methods are not implemented as Vertex AI only supports registering model versions -2. Model stage transitions (Production, Staging, etc.) are not natively supported -3. Models must have a serving container image URI specified or will use the default scikit-learn image -4. All registered models are automatically labeled with `managed_by="zenml"` for tracking purposes +3. It's preferable for the models to be given a serving container image URI specified to avoid using the default scikit-learn prediction container and to ensure compatibility with Vertex AI endpoints +when deploying models. +4. All registered models by the integration are automatically labeled with `managed_by="zenml"` for tracking purposes Check out the [SDK docs](https://sdkdocs.zenml.io/latest/integration\_code\_docs/integrations-gcp/#zenml.integrations.gcp.model\_registry) to see more about the interface and implementation. diff --git a/src/zenml/cli/model_registry.py b/src/zenml/cli/model_registry.py index 93c55391817..838132b9e9a 100644 --- a/src/zenml/cli/model_registry.py +++ b/src/zenml/cli/model_registry.py @@ -18,6 +18,7 @@ import click +from zenml import __version__ from zenml.cli import utils as cli_utils from zenml.cli.cli import TagGroup, cli from zenml.enums import StackComponentType @@ -643,7 +644,7 @@ def register_model_version( # Parse metadata metadata = dict(metadata) if metadata else {} registered_metadata = ModelRegistryModelMetadata(**dict(metadata)) - registered_metadata.zenml_version = zenml_version + registered_metadata.zenml_version = zenml_version or __version__ registered_metadata.zenml_run_name = zenml_run_name registered_metadata.zenml_pipeline_name = zenml_pipeline_name registered_metadata.zenml_step_name = zenml_step_name diff --git a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py index b152839356b..3b6d31820cc 100644 --- a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py +++ b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py @@ -22,9 +22,6 @@ from zenml.analytics.utils import track_handler from zenml.client import Client from zenml.enums import StackComponentType -from zenml.integrations.gcp import ( - VERTEX_SERVICE_ARTIFACT, -) from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( VertexModelDeployerConfig, VertexModelDeployerFlavor, @@ -142,14 +139,15 @@ def _create_deployment_service( config: the configuration of the model to be deployed with Vertex model deployer. Returns: - The VertexModelDeployerConfig object that can be used to interact + The VertexDeploymentService object that can be used to interact with the Vertex inference endpoint. """ # create a new service for the new model service = VertexDeploymentService(uuid=id, config=config) logger.info( - f"Creating an artifact {VERTEX_SERVICE_ARTIFACT} with service instance attached as metadata." - " If there's an active pipeline and/or model this artifact will be associated with it." + "Creating an artifact %s with service instance attached as metadata.", + "attached as metadata. If there's an active pipeline and/or model, " + "this artifact will be associated with it.", ) service.start(timeout=timeout) return service diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 32e8a9722f7..c8a4e02f0a5 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -14,10 +14,12 @@ """Implementation of the Vertex AI Deployment service.""" import re +import time from typing import Any, Dict, Generator, List, Optional, Tuple from google.api_core import exceptions from google.cloud import aiplatform +from google.cloud import logging as vertex_logging from pydantic import BaseModel, Field from zenml.client import Client @@ -46,6 +48,10 @@ def sanitize_vertex_label(value: str) -> str: Returns: Sanitized label value """ + # Handle empty string + if not value: + return "" + # Convert to lowercase value = value.lower() # Replace any character that's not lowercase letter, number, dash or underscore @@ -81,10 +87,8 @@ class VertexPredictionServiceEndpoint(BaseModel): """Vertex AI Prediction Service Endpoint.""" endpoint_name: str + deployed_model_id: str endpoint_url: Optional[str] = None - deployed_model_id: Optional[str] = ( - None # Added to track specific model deployment - ) class VertexServiceStatus(ServiceStatus): @@ -107,10 +111,8 @@ class VertexDeploymentService(BaseDeploymentService): default_factory=lambda: VertexServiceStatus() ) - def __init__(self, config: VertexDeploymentConfig, **attrs: Any): - """Initialize the Vertex AI deployment service.""" - super().__init__(config=config, **attrs) - + def _initialize_gcp_clients(self) -> None: + """Initialize GCP clients with consistent credentials.""" # Initialize aiplatform with project and location from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( VertexModelDeployer, @@ -119,9 +121,29 @@ def __init__(self, config: VertexDeploymentConfig, **attrs: Any): zenml_client = Client() model_deployer = zenml_client.active_stack.model_deployer if not isinstance(model_deployer, VertexModelDeployer): - raise ValueError("Model deployer is not VertexModelDeployer") + raise RuntimeError( + "Active model deployer must be Vertex AI Model Deployer" + ) - model_deployer.setup_aiplatform() + # get credentials from model deployer + credentials, project_id = model_deployer._get_authentication() + + # Initialize aiplatform + aiplatform.init( + project=project_id, + location=self.config.location, + credentials=credentials, + ) + + # Initialize logging client + self.logging_client = vertex_logging.Client( + project=project_id, credentials=credentials + ) + + def __init__(self, config: VertexDeploymentConfig, **attrs: Any): + """Initialize the Vertex AI deployment service.""" + super().__init__(config=config, **attrs) + self._initialize_gcp_clients() @property def prediction_url(self) -> Optional[str]: @@ -187,6 +209,10 @@ def provision(self) -> None: logger.info( f"Found existing model to deploy: {model.resource_name} to the endpoint." ) + if not model: + raise RuntimeError( + f"Model {self.config.model_id} not found in the project." + ) # Deploy the model to the endpoint endpoint.deploy( @@ -332,7 +358,9 @@ def predict(self, instances: List[Any]) -> List[Any]: instances=instances, deployed_model_id=self.status.endpoint.deployed_model_id.split( "/" - )[-1], + )[-1] + if self.status.endpoint.deployed_model_id + else None, timeout=30, # Add reasonable timeout ) @@ -348,14 +376,53 @@ def predict(self, instances: List[Any]) -> List[Any]: def get_logs( self, follow: bool = False, tail: Optional[int] = None ) -> Generator[str, bool, None]: - """Retrieve the service logs.""" - # Note: Could be enhanced to actually fetch logs from Cloud Logging - logger.info( - "Vertex AI Endpoints provides access to the logs through " - "Cloud Logging. Please check the Google Cloud Console for detailed logs. " - f"Location: {self.config.location}" - ) - yield "Logs are available in Google Cloud Console." + """Retrieve the service logs from Cloud Logging. + + Args: + follow: If True, continuously yield new logs + tail: Number of most recent logs to return + """ + if not self.status.endpoint: + yield "No endpoint deployed yet" + return + + try: + # Create filter for Vertex AI endpoint logs + endpoint_id = self.status.endpoint.endpoint_name.split("/")[-1] + filter_str = ( + f'resource.type="aiplatform.googleapis.com/Endpoint" ' + f'resource.labels.endpoint_id="{endpoint_id}" ' + f'resource.labels.location="{self.config.location}"' + ) + + # Set time range for logs + if tail: + filter_str += f" limit {tail}" + + # Get log iterator + iterator = self.logging_client.list_entries( + filter_=filter_str, order_by=vertex_logging.DESCENDING + ) + + # Yield historical logs + for entry in iterator: + yield f"[{entry.timestamp}] {entry.severity}: {entry.payload.get('message', '')}" + + # If following logs, continue to stream new entries + if follow: + while True: + time.sleep(2) # Poll every 2 seconds + for entry in self.logging_client.list_entries( + filter_=filter_str, + order_by=vertex_logging.DESCENDING, + page_size=1, + ): + yield f"[{entry.timestamp}] {entry.severity}: {entry.payload.get('message', '')}" + + except Exception as e: + error_msg = f"Failed to retrieve logs: {str(e)}" + logger.error(error_msg) + yield error_msg @property def is_running(self) -> bool: diff --git a/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py b/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py index b11f7fe7080..d0b22d99e83 100644 --- a/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py +++ b/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py @@ -1,20 +1,9 @@ -# Copyright (c) ZenML GmbH 2021. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. """Implementation of the sklearn materializer.""" +import os from typing import Any, ClassVar, Tuple, Type +import cloudpickle from sklearn.base import ( BaseEstimator, BiclusterMixin, @@ -29,13 +18,20 @@ ) from zenml.enums import ArtifactType +from zenml.environment import Environment +from zenml.logger import get_logger from zenml.materializers.cloudpickle_materializer import ( + DEFAULT_FILENAME, CloudpickleMaterializer, ) +logger = get_logger(__name__) + +SKLEARN_MODEL_FILENAME = "model.pkl" + class SklearnMaterializer(CloudpickleMaterializer): - """Materializer to read data to and from sklearn.""" + """Materializer to read data to and from sklearn with backward compatibility.""" ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = ( BaseEstimator, @@ -50,3 +46,63 @@ class SklearnMaterializer(CloudpickleMaterializer): TransformerMixin, ) ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL + + def load(self, data_type: Type[Any]) -> Any: + """Reads a sklearn model from pickle file with backward compatibility. + + Args: + data_type: The data type of the artifact. + + Returns: + The loaded sklearn model. + """ + # First try to load from model.pkl + model_filepath = os.path.join(self.uri, SKLEARN_MODEL_FILENAME) + artifact_filepath = os.path.join(self.uri, DEFAULT_FILENAME) + + # Check which file exists and load accordingly + if self.artifact_store.exists(model_filepath): + filepath = model_filepath + elif self.artifact_store.exists(artifact_filepath): + logger.info( + f"Loading from legacy filepath {artifact_filepath}. Future saves " + f"will use {model_filepath}" + ) + filepath = artifact_filepath + else: + raise FileNotFoundError( + f"Neither {model_filepath} nor {artifact_filepath} found in artifact store" + ) + + # validate python version before loading + source_python_version = self._load_python_version() + current_python_version = Environment().python_version() + if ( + source_python_version != "unknown" + and source_python_version != current_python_version + ): + logger.warning( + f"Your artifact was materialized under Python version " + f"'{source_python_version}' but you are currently using " + f"'{current_python_version}'. This might cause unexpected " + "behavior since pickle is not reproducible across Python " + "versions. Attempting to load anyway..." + ) + + # Load the model + with self.artifact_store.open(filepath, "rb") as fid: + return cloudpickle.load(fid) + + def save(self, data: Any) -> None: + """Saves a sklearn model to pickle file using the new filename. + + Args: + data: The sklearn model to save. + """ + # Save python version for validation on loading + self._save_python_version() + + # Save using the new filename + filepath = os.path.join(self.uri, SKLEARN_MODEL_FILENAME) + with self.artifact_store.open(filepath, "wb") as fid: + cloudpickle.dump(data, fid) diff --git a/src/zenml/materializers/cloudpickle_materializer.py b/src/zenml/materializers/cloudpickle_materializer.py index a6813cb4191..399ca7f2336 100644 --- a/src/zenml/materializers/cloudpickle_materializer.py +++ b/src/zenml/materializers/cloudpickle_materializer.py @@ -29,7 +29,7 @@ logger = get_logger(__name__) -DEFAULT_FILENAME = "model.pkl" +DEFAULT_FILENAME = "artifact.pkl" DEFAULT_PYTHON_VERSION_FILENAME = "python_version.txt" diff --git a/src/zenml/model_registries/base_model_registry.py b/src/zenml/model_registries/base_model_registry.py index ffab13f974a..3e3019dfb6f 100644 --- a/src/zenml/model_registries/base_model_registry.py +++ b/src/zenml/model_registries/base_model_registry.py @@ -20,7 +20,6 @@ from pydantic import BaseModel, ConfigDict -from zenml import __version__ from zenml.enums import StackComponentType from zenml.stack import Flavor, StackComponent from zenml.stack.stack_component import StackComponentConfig @@ -64,7 +63,7 @@ class ModelRegistryModelMetadata(BaseModel): """ _managed_by: str = "zenml" - zenml_version: str = __version__ + zenml_version: Optional[str] = None zenml_run_name: Optional[str] = None zenml_pipeline_name: Optional[str] = None zenml_pipeline_uuid: Optional[str] = None diff --git a/src/zenml/services/service.py b/src/zenml/services/service.py index 7b607aae611..0077a3a945f 100644 --- a/src/zenml/services/service.py +++ b/src/zenml/services/service.py @@ -35,6 +35,7 @@ from zenml.console import console from zenml.logger import get_logger +from zenml.model.model import Model from zenml.services.service_endpoint import BaseServiceEndpoint from zenml.services.service_monitor import HTTPEndpointHealthMonitor from zenml.services.service_status import ServiceState, ServiceStatus @@ -109,6 +110,7 @@ class ServiceConfig(BaseTypedModel): pipeline_name: name of the pipeline that spun up the service pipeline_step_name: name of the pipeline step that spun up the service run_name: name of the pipeline run that spun up the service. + zenml_model: the ZenML model object to be deployed. """ name: str = "" @@ -118,6 +120,7 @@ class ServiceConfig(BaseTypedModel): model_name: str = "" model_version: str = "" service_name: str = "" + zenml_model: Optional[Model] = None # TODO: In Pydantic v2, the `model_` is a protected namespaces for all # fields defined under base models. If not handled, this raises a warning. From ac2e69a9de424e17ae78bd42367d7b673a958c86 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 12 Nov 2024 14:35:13 +0000 Subject: [PATCH 23/43] Auto-update of LLM Finetuning template --- examples/llm_finetuning/.copier-answers.yml | 2 +- examples/llm_finetuning/requirements.txt | 2 +- examples/llm_finetuning/steps/log_metadata.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/llm_finetuning/.copier-answers.yml b/examples/llm_finetuning/.copier-answers.yml index 4004897928b..2c547f98d61 100644 --- a/examples/llm_finetuning/.copier-answers.yml +++ b/examples/llm_finetuning/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.10.30 +_commit: 2024.11.08 _src_path: gh:zenml-io/template-llm-finetuning bf16: true cuda_version: cuda11.8 diff --git a/examples/llm_finetuning/requirements.txt b/examples/llm_finetuning/requirements.txt index ac6d8625411..23d38eef649 100644 --- a/examples/llm_finetuning/requirements.txt +++ b/examples/llm_finetuning/requirements.txt @@ -1,6 +1,6 @@ zenml torch>=2.2.0 -datasets +datasets>=2.15 transformers peft bitsandbytes>=0.41.3 diff --git a/examples/llm_finetuning/steps/log_metadata.py b/examples/llm_finetuning/steps/log_metadata.py index 14371b78b6e..645f98cc8ea 100644 --- a/examples/llm_finetuning/steps/log_metadata.py +++ b/examples/llm_finetuning/steps/log_metadata.py @@ -34,7 +34,7 @@ def log_metadata_from_step_artifact( context = get_step_context() metadata_dict: Dict[str, Any] = ( - context.pipeline_run.steps[step_name].outputs[artifact_name].load() + context.pipeline_run.steps[step_name].outputs[artifact_name][0].load() ) metadata = {artifact_name: metadata_dict} From 3194db31410c98a05b78aea5eb1a754c0651d284 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Tue, 12 Nov 2024 16:16:25 +0100 Subject: [PATCH 24/43] Enhance Vertex AI Model Registry with model conversion utility and documentation update --- .../component-guide/model-deployers/vertex.md | 2 + .../model_registries/vertex_model_registry.py | 49 ++++++++++--------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/docs/book/component-guide/model-deployers/vertex.md b/docs/book/component-guide/model-deployers/vertex.md index 727f2d763fe..2938f45541e 100644 --- a/docs/book/component-guide/model-deployers/vertex.md +++ b/docs/book/component-guide/model-deployers/vertex.md @@ -61,6 +61,8 @@ The service account needs the following permissions: ## How to use it +A full project example is available in the [ZenML Examples repository](https://github.com/zenml-io/zenml-projects/tree/main/vertex-registry-and-deployer). + ### Deploy a model in a pipeline Here's an example of how to use the Vertex AI Model Deployer in a ZenML pipeline: diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index fcc57001867..a733934abb0 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -148,7 +148,7 @@ def register_model_version( ) is_default_version = metadata_dict.get("is_default_version", False) try: - version_info = aiplatform.Model.upload( + model_version = aiplatform.Model.upload( artifact_uri=model_source_uri, display_name=f"{name}_{version}", serving_container_image_uri=serving_container_image_uri, @@ -156,17 +156,7 @@ def register_model_version( is_default_version=is_default_version, labels=metadata_dict, ) - return RegistryModelVersion( - version=version_info.version_id, - model_source_uri=version_info.resource_name, - model_format="Custom", # Vertex AI doesn't provide this info directly - registered_model=self.get_model(version_info.name), - description=description, - created_at=version_info.create_time, - last_updated_at=version_info.update_time, - stage=ModelVersionStage.NONE, # Vertex AI doesn't have built-in stages - metadata=metadata, - ) + return self._vertex_model_to_registry_version(model_version) except Exception as e: raise RuntimeError(f"Failed to register model version: {str(e)}") @@ -218,17 +208,7 @@ def get_model_version( self.setup_aiplatform() try: model_version = aiplatform.Model(model_name=f"{name}@{version}") - return RegistryModelVersion( - version=model_version.version_id, - model_source_uri=model_version.artifact_uri, - model_format="Custom", # Vertex AI doesn't provide this info directly - registered_model=self.get_model(model_version.name), - description=model_version.description, - created_at=model_version.create_time, - last_updated_at=model_version.update_time, - stage=ModelVersionStage.NONE, # Vertex AI doesn't have built-in stages - metadata=ModelRegistryModelMetadata(**model_version.labels), - ) + return self._vertex_model_to_registry_version(model_version) except Exception as e: raise RuntimeError(f"Failed to get model version: {str(e)}") @@ -306,3 +286,26 @@ def get_model_uri_artifact_store( ) -> str: """Get the model URI artifact store.""" return model_version.model_source_uri + + def _vertex_model_to_registry_version( + self, model: "aiplatform.Model" + ) -> RegistryModelVersion: + """Convert Vertex AI model to RegistryModelVersion. + + Args: + model: Vertex AI model instance + + Returns: + RegistryModelVersion object + """ + return RegistryModelVersion( + version=model.version_id, + model_source_uri=model.resource_name, + model_format="Custom", # Vertex AI doesn't provide this info directly + registered_model=self.get_model(model.name), + description=model.description, + created_at=model.create_time, + last_updated_at=model.update_time, + stage=ModelVersionStage.NONE, # Vertex AI doesn't have built-in stages + metadata=ModelRegistryModelMetadata(**model.labels), + ) From 373177b38fd5d55fae8d7dff029f1f7272df34c5 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Mon, 3 Feb 2025 19:24:03 +0100 Subject: [PATCH 25/43] Refactor Vertex AI model registry and deployer configurations to enhance clarity and maintainability --- .../component-guide/model-deployers/vertex.md | 65 ++- .../gcp/flavors/vertex_base_config.py | 199 ++++++++ .../flavors/vertex_model_deployer_flavor.py | 57 +-- .../flavors/vertex_model_registry_flavor.py | 20 +- .../model_deployers/vertex_model_deployer.py | 120 ++--- .../model_registries/vertex_model_registry.py | 456 +++++++++++++---- .../gcp/services/vertex_deployment.py | 468 ++++++++++-------- .../materializers/sklearn_materializer.py | 13 + .../model_registries/base_model_registry.py | 11 +- 9 files changed, 967 insertions(+), 442 deletions(-) create mode 100644 src/zenml/integrations/gcp/flavors/vertex_base_config.py diff --git a/docs/book/component-guide/model-deployers/vertex.md b/docs/book/component-guide/model-deployers/vertex.md index 2938f45541e..cc7f3154969 100644 --- a/docs/book/component-guide/model-deployers/vertex.md +++ b/docs/book/component-guide/model-deployers/vertex.md @@ -12,15 +12,18 @@ You should use the Vertex AI Model Deployer when: * You need to handle high-throughput prediction requests * You want to deploy models with GPU acceleration * You need to monitor and track your model deployments +* You want to integrate with other GCP services like Cloud Logging, IAM, and VPC This is particularly useful in the following scenarios: * Deploying models to production with high availability requirements * Serving models that need GPU acceleration * Handling varying prediction workloads with autoscaling -* Integrating model serving with other GCP services +* Building end-to-end ML pipelines on GCP -{% hint style="warning" %} -The Vertex AI Model Deployer requires a Vertex AI Model Registry to be present in your stack. Make sure you have configured both components properly. +{% hint style="info" %} +The Vertex AI Model Deployer works best with a Vertex AI Model Registry in your stack, as this enables seamless model versioning and deployment. However, it can also work with other model registries or directly with model artifacts. + +The deployer can be used with both local and remote orchestrators, making it flexible for different development and production scenarios. {% endhint %} ## How to deploy it? @@ -44,19 +47,18 @@ zenml service-connector register vertex_deployer_connector \ --service_account_json=@vertex-deployer-sa.json \ --resource-type gcp-generic -# Register the model deployer +# Register the model deployer and connect it to the service connector zenml model-deployer register vertex_deployer \ --flavor=vertex \ - --location=us-central1 - -# Connect the model deployer to the service connector -zenml model-deployer connect vertex_deployer --connector vertex_deployer_connector + --location=us-central1 \ + --connector vertex_deployer_connector ``` {% hint style="info" %} The service account needs the following permissions: - `Vertex AI User` role for deploying models - `Vertex AI Service Agent` role for managing model endpoints +- `Storage Object Viewer` role if accessing models stored in Google Cloud Storage {% endhint %} ## How to use it @@ -85,7 +87,7 @@ def model_deployer( """Model deployer step.""" zenml_client = Client() current_model = get_step_context().model - model_registry_uri = current_model.get_model_artifact("THE_MODEL_ARTIFACT_NAME_GIVEN_IN_TRAINING_STEP").uri + model_registry_uri = current_model.get_model_artifact("model").uri model_deployer = zenml_client.active_stack.model_deployer # Configure the deployment @@ -95,9 +97,23 @@ def model_deployer( model_name=current_model.name, description="Vertex AI model deployment example", model_id=model_registry_uri, - machine_type="n1-standard-4", # Optional: specify machine type - min_replica_count=1, # Optional: minimum number of replicas - max_replica_count=3, # Optional: maximum number of replicas + machine_type="n1-standard-4", + min_replica_count=1, + max_replica_count=3, + # Optional advanced settings + container=VertexAIContainerSpec( + image_uri="your-custom-image:latest", + ports=[8080], + env={"ENV_VAR": "value"} + ), + resources=VertexAIResourceSpec( + accelerator_type="NVIDIA_TESLA_T4", + accelerator_count=1 + ), + explanation=VertexAIExplanationSpec( + metadata={"method": "integrated-gradients"}, + parameters={"num_integral_steps": 50} + ) ) # Deploy the model @@ -111,7 +127,7 @@ def model_deployer( ### Configuration Options -The Vertex AI Model Deployer accepts a rich set of configuration options through `VertexDeploymentConfig`: +The Vertex AI Model Deployer uses a comprehensive configuration system that includes: * Basic Configuration: * `location`: GCP region for deployment (e.g., "us-central1") @@ -119,9 +135,16 @@ The Vertex AI Model Deployer accepts a rich set of configuration options through * `model_name`: Name of the model being deployed * `model_id`: Model ID from the Vertex AI Model Registry -* Infrastructure Configuration: +* Container Configuration (`VertexAIContainerSpec`): + * `image_uri`: Custom serving container image + * `ports`: Container ports to expose + * `env`: Environment variables + * `predict_route`: Custom prediction HTTP path + * `health_route`: Custom health check path + +* Resource Configuration (`VertexAIResourceSpec`): * `machine_type`: Type of machine to use (e.g., "n1-standard-4") - * `accelerator_type`: GPU accelerator type if needed + * `accelerator_type`: GPU accelerator type * `accelerator_count`: Number of GPUs per replica * `min_replica_count`: Minimum number of serving replicas * `max_replica_count`: Maximum number of serving replicas @@ -131,8 +154,8 @@ The Vertex AI Model Deployer accepts a rich set of configuration options through * `network`: VPC network configuration * `encryption_spec_key_name`: Customer-managed encryption key * `enable_access_logging`: Enable detailed access logging - * `explanation_metadata`: Model explanation configuration - * `autoscaling_target_cpu_utilization`: Target CPU utilization for autoscaling + * `explanation`: Model explanation configuration + * `labels`: Custom resource labels ### Running Predictions @@ -163,19 +186,23 @@ if services: ### Limitations and Considerations 1. **Stack Requirements**: - - Requires a Vertex AI Model Registry in the stack - - All stack components must be non-local + - Works best with a Vertex AI Model Registry but can function without it + - Compatible with both local and remote orchestrators + - Requires valid GCP credentials and permissions 2. **Authentication**: - Requires proper GCP credentials with Vertex AI permissions - Best practice is to use service connectors for authentication + - Supports multiple authentication methods (service account, user account, workload identity) 3. **Costs**: - Vertex AI endpoints incur costs based on machine type and uptime - Consider using autoscaling to optimize costs + - Monitor usage through GCP Cloud Monitoring 4. **Region Availability**: - Service availability depends on Vertex AI regional availability - Model and endpoint must be in the same region + - Consider data residency requirements when choosing regions Check out the [SDK docs](https://sdkdocs.zenml.io) for more detailed information about the implementation. \ No newline at end of file diff --git a/src/zenml/integrations/gcp/flavors/vertex_base_config.py b/src/zenml/integrations/gcp/flavors/vertex_base_config.py new file mode 100644 index 00000000000..e2872411ba6 --- /dev/null +++ b/src/zenml/integrations/gcp/flavors/vertex_base_config.py @@ -0,0 +1,199 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Shared configuration classes for Vertex AI components.""" + +from typing import Any, Dict, Optional, Sequence + +from pydantic import BaseModel, Field + +from zenml.config.base_settings import BaseSettings + + +class VertexAIContainerSpec(BaseModel): + """Container specification for Vertex AI models and endpoints.""" + + image_uri: Optional[str] = Field( + None, description="Docker image URI for model serving" + ) + command: Optional[Sequence[str]] = Field( + None, description="Container command to run" + ) + args: Optional[Sequence[str]] = Field( + None, description="Container command arguments" + ) + env: Optional[Dict[str, str]] = Field( + None, description="Environment variables" + ) + ports: Optional[Sequence[int]] = Field( + None, description="Container ports to expose" + ) + predict_route: Optional[str] = Field( + None, description="HTTP path for prediction requests" + ) + health_route: Optional[str] = Field( + None, description="HTTP path for health check requests" + ) + + +class VertexAIResourceSpec(BaseModel): + """Resource specification for Vertex AI deployments.""" + + machine_type: Optional[str] = Field( + None, description="Compute instance machine type" + ) + accelerator_type: Optional[str] = Field( + None, description="Hardware accelerator type" + ) + accelerator_count: Optional[int] = Field( + None, description="Number of accelerators" + ) + min_replica_count: Optional[int] = Field( + 1, description="Minimum number of replicas" + ) + max_replica_count: Optional[int] = Field( + 1, description="Maximum number of replicas" + ) + + +class VertexAIExplanationSpec(BaseModel): + """Explanation configuration for Vertex AI models.""" + + metadata: Optional[Dict[str, Any]] = Field( + None, description="Explanation metadata" + ) + parameters: Optional[Dict[str, Any]] = Field( + None, description="Explanation parameters" + ) + + +class VertexAIBaseConfig(BaseModel): + """Base configuration shared by Vertex AI components. + + Reference: + - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models + - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints + """ + + # Basic settings + location: str = Field( + "us-central1", description="GCP region for Vertex AI resources" + ) + project_id: Optional[str] = Field( + None, description="Optional project ID override" + ) + + # Container configuration + container: Optional[VertexAIContainerSpec] = Field( + None, description="Container configuration" + ) + + # Resource configuration + resources: Optional[VertexAIResourceSpec] = Field( + None, description="Resource configuration" + ) + + # Service configuration + service_account: Optional[str] = Field( + None, description="Service account email" + ) + network: Optional[str] = Field(None, description="VPC network") + + # Security + encryption_spec_key_name: Optional[str] = Field( + None, description="Customer-managed encryption key" + ) + + # Monitoring and logging + enable_access_logging: Optional[bool] = Field( + None, description="Enable access logging" + ) + disable_container_logging: Optional[bool] = Field( + None, description="Disable container logging" + ) + + # Model explanation + explanation: Optional[VertexAIExplanationSpec] = Field( + None, description="Model explanation configuration" + ) + + # Labels and metadata + labels: Optional[Dict[str, str]] = Field( + None, description="Resource labels" + ) + metadata: Optional[Dict[str, str]] = Field( + None, description="Custom metadata" + ) + + +class VertexAIModelConfig(VertexAIBaseConfig): + """Configuration specific to Vertex AI Models.""" + + # Model metadata + display_name: Optional[str] = None + description: Optional[str] = None + version_description: Optional[str] = None + version_aliases: Optional[Sequence[str]] = None + + # Model artifacts + artifact_uri: Optional[str] = None + model_source_spec: Optional[Dict[str, Any]] = None + + # Model versioning + is_default_version: Optional[bool] = None + + # Model formats + supported_deployment_resources_types: Optional[Sequence[str]] = None + supported_input_storage_formats: Optional[Sequence[str]] = None + supported_output_storage_formats: Optional[Sequence[str]] = None + + # Training metadata + training_pipeline_display_name: Optional[str] = None + training_pipeline_id: Optional[str] = None + + # Model optimization + model_source_info: Optional[Dict[str, str]] = None + original_model_info: Optional[Dict[str, str]] = None + containerized_model_optimization: Optional[Dict[str, Any]] = None + + +class VertexAIEndpointConfig(VertexAIBaseConfig): + """Configuration specific to Vertex AI Endpoints.""" + + # Endpoint metadata + display_name: Optional[str] = None + description: Optional[str] = None + + # Traffic configuration + traffic_split: Optional[Dict[str, int]] = None + traffic_percentage: Optional[int] = 0 + + # Autoscaling + autoscaling_target_cpu_utilization: Optional[float] = None + autoscaling_target_accelerator_duty_cycle: Optional[float] = None + + # Deployment + sync: Optional[bool] = False + deploy_request_timeout: Optional[int] = None + existing_endpoint: Optional[str] = None + + +class VertexAIBaseSettings(BaseSettings): + """Base settings for Vertex AI components.""" + + location: str = Field( + "us-central1", description="Default GCP region for Vertex AI resources" + ) + project_id: Optional[str] = Field( + None, description="Optional project ID override" + ) diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py index 1b526cf0f2f..7c450f51b09 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py @@ -13,14 +13,15 @@ # permissions and limitations under the License. """Vertex AI model deployer flavor.""" -from typing import TYPE_CHECKING, Dict, Optional, Sequence, Type - -from pydantic import BaseModel +from typing import TYPE_CHECKING, Optional, Type from zenml.integrations.gcp import ( GCP_RESOURCE_TYPE, VERTEX_MODEL_DEPLOYER_FLAVOR, ) +from zenml.integrations.gcp.flavors.vertex_base_config import ( + VertexAIEndpointConfig, +) from zenml.integrations.gcp.google_credentials_mixin import ( GoogleCredentialsConfigMixin, ) @@ -38,48 +39,18 @@ ) -class VertexBaseConfig(BaseModel): - """Vertex AI Inference Endpoint configuration.""" - - location: Optional[str] = None - version: Optional[str] = None - serving_container_image_uri: Optional[str] = None - artifact_uri: Optional[str] = None - model_id: Optional[str] = None - is_default_version: Optional[bool] = None - serving_container_command: Optional[Sequence[str]] = None - serving_container_args: Optional[Sequence[str]] = None - serving_container_environment_variables: Optional[Dict[str, str]] = None - serving_container_ports: Optional[Sequence[int]] = None - serving_container_grpc_ports: Optional[Sequence[int]] = None - deployed_model_display_name: Optional[str] = None - traffic_percentage: Optional[int] = 0 - traffic_split: Optional[Dict[str, int]] = None - machine_type: Optional[str] = None - accelerator_type: Optional[str] = None - accelerator_count: Optional[int] = None - min_replica_count: Optional[int] = 1 - max_replica_count: Optional[int] = 1 - service_account: Optional[str] = None - metadata: Optional[Dict[str, str]] = None - network: Optional[str] = None - encryption_spec_key_name: Optional[str] = None - sync: Optional[bool] = False - deploy_request_timeout: Optional[int] = None - autoscaling_target_cpu_utilization: Optional[float] = None - autoscaling_target_accelerator_duty_cycle: Optional[float] = None - enable_access_logging: Optional[bool] = None - disable_container_logging: Optional[bool] = None - explanation_metadata: Optional[Dict[str, str]] = None - explanation_parameters: Optional[Dict[str, str]] = None - existing_endpoint: Optional[str] = None - labels: Optional[Dict[str, str]] = None - - class VertexModelDeployerConfig( - BaseModelDeployerConfig, VertexBaseConfig, GoogleCredentialsConfigMixin + BaseModelDeployerConfig, + GoogleCredentialsConfigMixin, + VertexAIEndpointConfig, ): - """Configuration for the Vertex AI model deployer.""" + """Configuration for the Vertex AI model deployer. + + This configuration combines: + - Base model deployer configuration + - Google Cloud authentication + - Vertex AI endpoint configuration + """ class VertexModelDeployerFlavor(BaseModelDeployerFlavor): diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py index e16cf548685..8524d407e2f 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py @@ -15,11 +15,13 @@ from typing import TYPE_CHECKING, Optional, Type -from zenml.config.base_settings import BaseSettings from zenml.integrations.gcp import ( GCP_RESOURCE_TYPE, VERTEX_MODEL_REGISTRY_FLAVOR, ) +from zenml.integrations.gcp.flavors.vertex_base_config import ( + VertexAIModelConfig, +) from zenml.integrations.gcp.google_credentials_mixin import ( GoogleCredentialsConfigMixin, ) @@ -35,18 +37,18 @@ ) -class VertexAIModelRegistrySettings(BaseSettings): - """Settings for the VertexAI model registry.""" - - location: str - - class VertexAIModelRegistryConfig( BaseModelRegistryConfig, GoogleCredentialsConfigMixin, - VertexAIModelRegistrySettings, + VertexAIModelConfig, ): - """Configuration for the VertexAI model registry.""" + """Configuration for the VertexAI model registry. + + This configuration combines: + - Base model registry configuration + - Google Cloud authentication + - Vertex AI model configuration + """ class VertexModelRegistryFlavor(BaseModelRegistryFlavor): diff --git a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py index 3b6d31820cc..e8e6df120dd 100644 --- a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py +++ b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py @@ -13,7 +13,7 @@ # permissions and limitations under the License. """Implementation of the Vertex AI Model Deployer.""" -from typing import ClassVar, Dict, Optional, Tuple, Type, cast +from typing import Any, ClassVar, Dict, Optional, Tuple, Type, cast from uuid import UUID from google.cloud import aiplatform @@ -66,9 +66,19 @@ def config(self) -> VertexModelDeployerConfig: """ return cast(VertexModelDeployerConfig, self._config) - def setup_aiplatform(self) -> None: - """Setup the Vertex AI platform.""" - credentials, project_id = self._get_authentication() + def _init_vertex_client( + self, + credentials: Optional[Any] = None, + ) -> None: + """Initialize Vertex AI client with proper credentials. + + Args: + credentials: Optional credentials to use + """ + if not credentials: + credentials, project_id = self._get_authentication() + + # Initialize with per-instance credentials aiplatform.init( project=project_id, location=self.config.location, @@ -77,16 +87,14 @@ def setup_aiplatform(self) -> None: @property def validator(self) -> Optional[StackValidator]: - """Validates that the stack contains a model registry. - - Also validates that the artifact store is not local. + """Validates that the stack contains a Vertex AI model registry. Returns: A StackValidator instance. """ def _validate_stack_requirements(stack: "Stack") -> Tuple[bool, str]: - """Validates that all the stack components are not local. + """Validates stack requirements. Args: stack: The stack to validate. @@ -102,22 +110,6 @@ def _validate_stack_requirements(stack: "Stack") -> Tuple[bool, str]: "model registry to the stack." ) - # Validate that the rest of the components are not local. - for stack_comp in stack.components.values(): - local_path = stack_comp.local_path - if not local_path: - continue - return False, ( - f"The '{stack_comp.name}' {stack_comp.type.value} is a " - f"local stack component. The Vertex AI Pipelines " - f"orchestrator requires that all the components in the " - f"stack used to execute the pipeline have to be not local, " - f"because there is no way for Vertex to connect to your " - f"local machine. You should use a flavor of " - f"{stack_comp.type.value} other than '" - f"{stack_comp.flavor}'." - ) - return True, "" return StackValidator( @@ -133,22 +125,21 @@ def _create_deployment_service( """Creates a new VertexAIDeploymentService. Args: - id: the UUID of the model to be deployed with Vertex model deployer. - timeout: the timeout in seconds to wait for the Vertex inference endpoint - to be provisioned and successfully started or updated. - config: the configuration of the model to be deployed with Vertex model deployer. + id: the UUID of the model to be deployed + timeout: timeout in seconds for deployment operations + config: deployment configuration Returns: - The VertexDeploymentService object that can be used to interact - with the Vertex inference endpoint. + The VertexDeploymentService instance """ - # create a new service for the new model + # Initialize client with fresh credentials + self._init_vertex_client() + + # Create service instance service = VertexDeploymentService(uuid=id, config=config) - logger.info( - "Creating an artifact %s with service instance attached as metadata.", - "attached as metadata. If there's an active pipeline and/or model, " - "this artifact will be associated with it.", - ) + logger.info("Creating Vertex AI deployment service with ID %s", id) + + # Start the service service.start(timeout=timeout) return service @@ -161,22 +152,22 @@ def perform_deploy_model( """Deploy a model to Vertex AI. Args: - id: the UUID of the service to be created. - config: the configuration of the model to be deployed. - timeout: the timeout for the deployment operation. + id: the UUID of the service to be created + config: deployment configuration + timeout: timeout for deployment operations Returns: - The ZenML Vertex AI deployment service object. + The deployment service instance """ with track_handler(AnalyticsEvent.MODEL_DEPLOYED) as analytics_handler: config = cast(VertexDeploymentConfig, config) + + # Create and start deployment service service = self._create_deployment_service( id=id, config=config, timeout=timeout ) - logger.info( - f"Creating a new Vertex AI deployment service: {service}" - ) + # Track analytics client = Client() stack = client.active_stack stack_metadata = { @@ -187,7 +178,8 @@ def perform_deploy_model( "store_type": client.zen_store.type.value, **stack_metadata, } - return service + + return service def perform_stop_model( self, @@ -198,13 +190,16 @@ def perform_stop_model( """Stop a Vertex AI deployment service. Args: - service: The service to stop. - timeout: Timeout in seconds to wait for the service to stop. - force: If True, force the service to stop. + service: The service to stop + timeout: Timeout for stop operation + force: Whether to force stop Returns: - The stopped service. + The stopped service """ + # Initialize client with fresh credentials + self._init_vertex_client() + service.stop(timeout=timeout, force=force) return service @@ -216,12 +211,15 @@ def perform_start_model( """Start a Vertex AI deployment service. Args: - service: The service to start. - timeout: Timeout in seconds to wait for the service to start. + service: The service to start + timeout: Timeout for start operation Returns: - The started service. + The started service """ + # Initialize client with fresh credentials + self._init_vertex_client() + service.start(timeout=timeout) return service @@ -234,10 +232,13 @@ def perform_delete_model( """Delete a Vertex AI deployment service. Args: - service: The service to delete. - timeout: Timeout in seconds to wait for the service to stop. - force: If True, force the service to stop. + service: The service to delete + timeout: Timeout for delete operation + force: Whether to force delete """ + # Initialize client with fresh credentials + self._init_vertex_client() + service = cast(VertexDeploymentService, service) service.stop(timeout=timeout, force=force) @@ -248,12 +249,15 @@ def get_model_server_info( # type: ignore[override] """Get information about the deployed model server. Args: - service_instance: The VertexDeploymentService instance. + service_instance: The deployment service instance Returns: - A dictionary containing information about the model server. + Dict containing server information """ return { - "PREDICTION_URL": service_instance.prediction_url, - "HEALTH_CHECK_URL": service_instance.get_healthcheck_url(), + "prediction_url": service_instance.get_prediction_url(), + "status": service_instance.status.state.value, + "endpoint_id": service_instance.status.endpoint.endpoint_name + if service_instance.status.endpoint + else None, } diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index a733934abb0..64dff0a3592 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -13,11 +13,14 @@ # permissions and limitations under the License. """Vertex AI model registry integration for ZenML.""" +import base64 from datetime import datetime -from typing import Any, Dict, List, Optional, cast +from typing import Any, Dict, List, Optional, Tuple, cast +from google.api_core import exceptions from google.cloud import aiplatform +from zenml.client import Client from zenml.integrations.gcp.flavors.vertex_model_registry_flavor import ( VertexAIModelRegistryConfig, ) @@ -35,6 +38,12 @@ logger = get_logger(__name__) +# Constants for Vertex AI limitations +MAX_LABEL_COUNT = 64 +MAX_LABEL_KEY_LENGTH = 63 +MAX_LABEL_VALUE_LENGTH = 63 +MAX_DISPLAY_NAME_LENGTH = 128 + class VertexAIModelRegistry(BaseModelRegistry, GoogleCredentialsMixin): """Register models using Vertex AI.""" @@ -48,14 +57,164 @@ def config(self) -> VertexAIModelRegistryConfig: """ return cast(VertexAIModelRegistryConfig, self._config) - def setup_aiplatform(self) -> None: - """Setup the Vertex AI platform.""" - credentials, project_id = self._get_authentication() - aiplatform.init( - project=project_id, - location=self.config.location, - credentials=credentials, + def _sanitize_label(self, value: str) -> str: + """Sanitize a label value to comply with Vertex AI requirements. + + Args: + value: The label value to sanitize + + Returns: + Sanitized label value + """ + if not value: + return "" + # Convert to lowercase and replace invalid chars + value = value.lower() + value = "".join( + c if c.isalnum() or c in ["-", "_"] else "-" for c in value ) + # Ensure starts with letter/number + if not value[0].isalnum(): + value = f"x{value}" + return value[:MAX_LABEL_KEY_LENGTH] + + def _get_tenant_id(self) -> str: + """Get the current ZenML server/tenant ID for multi-tenancy support. + + Returns: + The tenant ID string + """ + client = Client() + return str(client.active_stack_model.id) + + def _encode_name_version(self, name: str, version: str) -> str: + """Encode model name and version into a Vertex AI compatible format. + + Args: + name: Model name + version: Model version + + Returns: + Encoded string suitable for Vertex AI + """ + # Base64 encode to handle special characters while preserving uniqueness + encoded = base64.b64encode(f"{name}:{version}".encode()).decode() + # Make it URL and label safe + encoded = encoded.replace("+", "-").replace("/", "_").replace("=", "") + return encoded[:MAX_DISPLAY_NAME_LENGTH] + + def _decode_name_version(self, encoded: str) -> Tuple[str, str]: + """Decode model name and version from encoded format. + + Args: + encoded: The encoded string + + Returns: + Tuple of (name, version) + """ + # Add back padding + padding = 4 - (len(encoded) % 4) + if padding != 4: + encoded += "=" * padding + # Restore special chars + encoded = encoded.replace("-", "+").replace("_", "/") + try: + decoded = base64.b64decode(encoded).decode() + name, version = decoded.split(":", 1) + return name, version + except Exception as e: + logger.warning( + f"Failed to decode name/version from {encoded}: {e}" + ) + return encoded, "unknown" + + def _prepare_labels( + self, + metadata: Optional[Dict[str, str]] = None, + stage: Optional[ModelVersionStage] = None, + ) -> Dict[str, str]: + """Prepare labels for Vertex AI, including internal ZenML metadata.""" + labels = {} + + # Add internal ZenML labels + labels["managed_by"] = "zenml" + tenant_id = self._sanitize_label(self._get_tenant_id()) + labels["tenant_id"] = tenant_id + + if stage: + labels["stage"] = stage.value.lower() + + # Merge user metadata with sanitization + if metadata: + remaining_slots = MAX_LABEL_COUNT - len(labels) + for i, (key, value) in enumerate(metadata.items()): + if i >= remaining_slots: + logger.warning( + f"Exceeded maximum label count ({MAX_LABEL_COUNT}), " + f"dropping remaining metadata" + ) + break + safe_key = self._sanitize_label(str(key)) + safe_value = self._sanitize_label(str(value)) + labels[safe_key] = safe_value + + return labels + + def _get_model_id(self, name: str) -> str: + """Get the full Vertex AI model ID. + + Args: + name: Model name + + Returns: + Full model ID in format: projects/{project}/locations/{location}/models/{model} + """ + _, project_id = self._get_authentication() + return f"projects/{project_id}/locations/{self.config.location}/models/{name}" + + def _get_model_version_id(self, model_id: str, version: str) -> str: + """Get the full Vertex AI model version ID. + + Args: + model_id: Full model ID + version: Version string + + Returns: + Full model version ID in format: {model_id}/versions/{version} + """ + return f"{model_id}/versions/{version}" + + def _init_vertex_model( + self, + name: Optional[str] = None, + version: Optional[str] = None, + credentials: Optional[Any] = None, + ) -> aiplatform.Model: + """Initialize a Vertex AI model with proper credentials. + + Args: + name: Optional model name + version: Optional version + credentials: Optional credentials + + Returns: + Vertex AI Model instance + """ + if not credentials: + credentials, _ = self._get_authentication() + + kwargs = { + "location": self.config.location, + "credentials": credentials, + } + + if name: + model_id = self._get_model_id(name) + if version: + model_id = self._get_model_version_id(model_id, version) + kwargs["name"] = model_id + + return aiplatform.Model(**kwargs) def register_model( self, @@ -72,10 +231,16 @@ def delete_model( self, name: str, ) -> None: - """Delete a model from the Vertex AI model registry.""" + """Delete a model and all of its versions from the Vertex AI model registry.""" try: - model = aiplatform.Model(model_name=name) + model = self._init_vertex_model(name=name) + # List and delete all model versions first + versions = model.list_versions() + for version in versions: + version.delete() + # Then delete the parent model model.delete() + logger.info(f"Deleted model '{name}' and all its versions.") except Exception as e: raise RuntimeError(f"Failed to delete model: {str(e)}") @@ -92,11 +257,12 @@ def update_model( ) def get_model(self, name: str) -> RegisteredModel: - """Get a model from the Vertex AI model registry.""" + """Get a model from the Vertex AI model registry by name without needing a version.""" try: - model = aiplatform.Model(model_name=name) + # Fetch by display_name, and use unique labels to ensure multi-tenancy + model = aiplatform.Model(display_name=name) return RegisteredModel( - name=model.name, + name=model.display_name, description=model.description, metadata=model.labels, ) @@ -109,22 +275,29 @@ def list_models( metadata: Optional[Dict[str, str]] = None, ) -> List[RegisteredModel]: """List models in the Vertex AI model registry.""" - self.setup_aiplatform() - filter_expr = 'labels.managed_by="zenml"' + _ = self._init_vertex_model(name=name) + # Always filter with ZenML-specific labels (including tenant id for multi-tenancy) + tenant_label = self._sanitize_label(self._get_tenant_id()) + filter_expr = ( + f"labels.managed_by='zenml' AND labels.tenant_id='{tenant_label}'" + ) + if name: - filter_expr = filter_expr + f' AND display_name="{name}"' + filter_expr += f" AND display_name='{name}'" if metadata: for key, value in metadata.items(): - filter_expr = filter_expr + f' AND labels.{key}="{value}"' + filter_expr += f" AND labels.{key}='{value}'" try: - models = aiplatform.Model.list(filter=filter_expr) + all_models = aiplatform.Model.list(filter=filter_expr) + # Deduplicate by display_name so only one entry per "logical" model is returned. + unique_models = {model.display_name: model for model in all_models} return [ RegisteredModel( - name=model.display_name, - description=model.description, - metadata=model.labels, + name=parent_model.display_name, + description=parent_model.description, + metadata=parent_model.labels, ) - for model in models + for parent_model in unique_models.values() ] except Exception as e: raise RuntimeError(f"Failed to list models: {str(e)}") @@ -138,40 +311,103 @@ def register_model_version( metadata: Optional[ModelRegistryModelMetadata] = None, **kwargs: Any, ) -> RegistryModelVersion: - """Register a model version to the Vertex AI model registry.""" - self.setup_aiplatform() + """Register a model version to the Vertex AI model registry. + + Args: + name: Model name + version: Model version + model_source_uri: URI to model artifacts + description: Model description + metadata: Model metadata + **kwargs: Additional arguments + + Returns: + RegistryModelVersion instance + """ + credentials, _ = self._get_authentication() + + # Prepare labels with internal ZenML metadata, ensuring they are sanitized metadata_dict = metadata.model_dump() if metadata else {} - serving_container_image_uri = metadata_dict.get( - "serving_container_image_uri", - None - or "europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest", - ) - is_default_version = metadata_dict.get("is_default_version", False) + labels = self._prepare_labels(metadata_dict) + if version: + labels["user_version"] = self._sanitize_label(version) + + # Get container image from config if available, otherwise from metadata with a default + if ( + hasattr(self.config, "container") + and self.config.container + and self.config.container.image_uri + ): + serving_container_image_uri = self.config.container.image_uri + else: + serving_container_image_uri = metadata_dict.get( + "serving_container_image_uri", + "europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest", + ) + + # Optionally add additional parameters from the config resources + if hasattr(self.config, "resources") and self.config.resources: + if self.config.resources.machine_type: + metadata_dict.setdefault( + "machine_type", self.config.resources.machine_type + ) + if self.config.resources.min_replica_count is not None: + metadata_dict.setdefault( + "min_replica_count", + str(self.config.resources.min_replica_count), + ) + if self.config.resources.max_replica_count is not None: + metadata_dict.setdefault( + "max_replica_count", + str(self.config.resources.max_replica_count), + ) + + # Use a consistently sanitized display name instead of flat "name_version" + model_display_name = self._sanitize_model_display_name(name) + try: - model_version = aiplatform.Model.upload( + # Attempt to get the parent model (by name only) + parent_model = self._init_vertex_model(name=name) + logger.info(f"Found existing model: {name}") + except exceptions.NotFound: + # Create the parent model if it doesn"t exist + parent_model = aiplatform.Model.upload( + display_name=model_display_name, artifact_uri=model_source_uri, - display_name=f"{name}_{version}", serving_container_image_uri=serving_container_image_uri, description=description, - is_default_version=is_default_version, - labels=metadata_dict, + labels=labels, + credentials=credentials, + location=self.config.location, ) - return self._vertex_model_to_registry_version(model_version) - except Exception as e: - raise RuntimeError(f"Failed to register model version: {str(e)}") + logger.info(f"Created new model: {name}") + + # Create a new version for the model. Note that we keep the display name intact. + model_version = parent_model.create_version( + artifact_uri=model_source_uri, + serving_container_image_uri=serving_container_image_uri, + description=description, + labels=labels, + ) + logger.info(f"Created new version with labels: {model_version.labels}") + + return self._vertex_model_to_registry_version(model_version) def delete_model_version( self, name: str, version: str, ) -> None: - """Delete a model version from the Vertex AI model registry.""" - self.setup_aiplatform() + """Delete a model version from the Vertex AI model registry. + + Args: + name: Model name + version: Version string + """ try: - model_version = aiplatform.ModelVersion( - model_name=f"{name}@{version}" - ) - model_version.delete() + model = self._init_vertex_model(name=name, version=version) + model.delete() + logger.info(f"Deleted model version: {name} version {version}") except Exception as e: raise RuntimeError(f"Failed to delete model version: {str(e)}") @@ -185,18 +421,31 @@ def update_model_version( stage: Optional[ModelVersionStage] = None, ) -> RegistryModelVersion: """Update a model version in the Vertex AI model registry.""" - self.setup_aiplatform() try: - model_version = aiplatform.Model(model_name=f"{name}@{version}") - labels = model_version.labels + parent_model = self._init_vertex_model(name=name) + sanitized_version = self._sanitize_label(version) + target_version = None + for v in parent_model.list_versions(): + if v.labels.get("user_version") == sanitized_version: + target_version = v + break + if target_version is None: + raise RuntimeError( + f"Model version '{version}' for '{name}' not found." + ) + labels = target_version.labels or {} if metadata: - metadata_dict = metadata.model_dump() if metadata else {} + metadata_dict = metadata.model_dump() for key, value in metadata_dict.items(): - labels[key] = value + labels[self._sanitize_label(key)] = self._sanitize_label( + str(value) + ) if remove_metadata: for key in remove_metadata: - labels.pop(key, None) - model_version.update(description=description, labels=labels) + labels.pop(self._sanitize_label(key), None) + if stage: + labels["stage"] = stage.value.lower() + target_version.update(description=description, labels=labels) return self.get_model_version(name, version) except Exception as e: raise RuntimeError(f"Failed to update model version: {str(e)}") @@ -204,11 +453,16 @@ def update_model_version( def get_model_version( self, name: str, version: str ) -> RegistryModelVersion: - """Get a model version from the Vertex AI model registry.""" - self.setup_aiplatform() + """Get a model version from the Vertex AI model registry using the version label.""" try: - model_version = aiplatform.Model(model_name=f"{name}@{version}") - return self._vertex_model_to_registry_version(model_version) + parent_model = self._init_vertex_model(name=name) + sanitized_version = self._sanitize_label(version) + for v in parent_model.list_versions(): + if v.labels.get("user_version") == sanitized_version: + return self._vertex_model_to_registry_version(v) + raise RuntimeError( + f"Model '{name}' with version '{version}' not found." + ) except Exception as e: raise RuntimeError(f"Failed to get model version: {str(e)}") @@ -225,13 +479,16 @@ def list_model_versions( **kwargs: Any, ) -> List[RegistryModelVersion]: """List model versions from the Vertex AI model registry.""" - self.setup_aiplatform() filter_expr = [] if name: - filter_expr.append(f"display_name={name}") + filter_expr.append( + f"display_name={self._sanitize_model_display_name(name)}" + ) if metadata: for key, value in metadata.dict().items(): - filter_expr.append(f"labels.{key}={value}") + filter_expr.append( + f"labels.{self._sanitize_label(key)}={self._sanitize_label(str(value))}" + ) if created_after: filter_expr.append(f"create_time>{created_after.isoformat()}") if created_before: @@ -240,27 +497,13 @@ def list_model_versions( filter_str = " AND ".join(filter_expr) if filter_expr else None try: - model = aiplatform.Model(model_name=name) - versions = model.list_versions(filter=filter_str) - + parent_model = self._init_vertex_model(name=name) + versions = parent_model.list_versions(filter=filter_str) results = [ - RegistryModelVersion( - version=v.version_id, - model_source_uri=v.artifact_uri, - model_format="Custom", # Vertex AI doesn't provide this info directly - registered_model=self.get_model(v.name), - description=v.description, - created_at=v.create_time, - last_updated_at=v.update_time, - stage=ModelVersionStage.NONE, # Vertex AI doesn't have built-in stages - metadata=ModelRegistryModelMetadata(**v.labels), - ) - for v in versions + self._vertex_model_to_registry_version(v) for v in versions ] - if count: results = results[:count] - return results except Exception as e: raise RuntimeError(f"Failed to list model versions: {str(e)}") @@ -271,12 +514,16 @@ def load_model_version( version: str, **kwargs: Any, ) -> Any: - """Load a model version from the Vertex AI model registry.""" + """Load a model version from the Vertex AI model registry using label-based lookup.""" try: - model_version = aiplatform.ModelVersion( - model_name=f"{name}@{version}" + parent_model = self._init_vertex_model(name=name) + sanitized_version = self._sanitize_label(version) + for v in parent_model.list_versions(): + if v.labels.get("user_version") == sanitized_version: + return v + raise RuntimeError( + f"Model version '{version}' for '{name}' not found." ) - return model_version except Exception as e: raise RuntimeError(f"Failed to load model version: {str(e)}") @@ -288,24 +535,59 @@ def get_model_uri_artifact_store( return model_version.model_source_uri def _vertex_model_to_registry_version( - self, model: "aiplatform.Model" + self, model: aiplatform.Model ) -> RegistryModelVersion: - """Convert Vertex AI model to RegistryModelVersion. + """Convert Vertex AI model to ZenML RegistryModelVersion. Args: - model: Vertex AI model instance + model: Vertex AI Model instance Returns: - RegistryModelVersion object + RegistryModelVersion instance """ + # Extract stage from labels if present + stage = ModelVersionStage.NONE + if model.labels and "stage" in model.labels: + try: + stage = ModelVersionStage(model.labels["stage"].upper()) + except ValueError: + pass + + # Get parent model for registered_model field + parent_model = None + try: + model_id = model.resource_name.split("/versions/")[0] + parent_model = self._init_vertex_model(name=model_id) + registered_model = RegisteredModel( + name=parent_model.display_name, + description=parent_model.description, + metadata=parent_model.labels, + ) + except Exception: + logger.warning( + f"Failed to get parent model for version: {model.resource_name}" + ) + registered_model = None + return RegistryModelVersion( + registered_model=registered_model, version=model.version_id, - model_source_uri=model.resource_name, - model_format="Custom", # Vertex AI doesn't provide this info directly - registered_model=self.get_model(model.name), + model_source_uri=model.artifact_uri, + model_format="Custom", # Vertex AI doesn't provide format info description=model.description, + metadata=model.labels, created_at=model.create_time, last_updated_at=model.update_time, - stage=ModelVersionStage.NONE, # Vertex AI doesn't have built-in stages - metadata=ModelRegistryModelMetadata(**model.labels), + stage=stage, ) + + def _sanitize_model_display_name(self, name: str) -> str: + """Sanitize the model display name to conform to Vertex AI limits.""" + # Use our existing sanitizer (which converts to lowercase, replaces invalid characters, etc.) + name = self._sanitize_label(name) + if len(name) > MAX_DISPLAY_NAME_LENGTH: + logger.warning( + f"Model name '{name}' exceeds {MAX_DISPLAY_NAME_LENGTH} characters; truncating." + ) + name = name[:MAX_DISPLAY_NAME_LENGTH] + return name diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index c8a4e02f0a5..2b71ea5f71d 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -14,17 +14,16 @@ """Implementation of the Vertex AI Deployment service.""" import re -import time -from typing import Any, Dict, Generator, List, Optional, Tuple +from datetime import datetime +from typing import Any, Dict, Generator, List, Optional, cast -from google.api_core import exceptions +from google.api_core import exceptions, retry from google.cloud import aiplatform -from google.cloud import logging as vertex_logging -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, PrivateAttr from zenml.client import Client -from zenml.integrations.gcp.flavors.vertex_model_deployer_flavor import ( - VertexBaseConfig, +from zenml.integrations.gcp.flavors.vertex_base_config import ( + VertexAIEndpointConfig, ) from zenml.logger import get_logger from zenml.services import ServiceState, ServiceStatus, ServiceType @@ -32,12 +31,20 @@ logger = get_logger(__name__) -# Increase timeout for long-running operations -POLLING_TIMEOUT = ( - 1800 # Increased from 1200 to allow for longer deployment times -) +# Constants +POLLING_TIMEOUT = 1800 # 30 minutes +RETRY_DEADLINE = 600 # 10 minutes UUID_SLICE_LENGTH: int = 8 +# Retry configuration for transient errors +retry_config = retry.Retry( + initial=1.0, # Initial delay in seconds + maximum=60.0, # Maximum delay + multiplier=2.0, # Delay multiplier + deadline=RETRY_DEADLINE, + predicate=retry.if_transient_error, +) + def sanitize_vertex_label(value: str) -> str: """Sanitize a label value to comply with Vertex AI requirements. @@ -48,7 +55,6 @@ def sanitize_vertex_label(value: str) -> str: Returns: Sanitized label value """ - # Handle empty string if not value: return "" @@ -63,7 +69,7 @@ def sanitize_vertex_label(value: str) -> str: return value[:63] -class VertexDeploymentConfig(VertexBaseConfig, ServiceConfig): +class VertexDeploymentConfig(VertexAIEndpointConfig, ServiceConfig): """Vertex AI service configurations.""" def get_vertex_deployment_labels(self) -> Dict[str, str]: @@ -89,6 +95,9 @@ class VertexPredictionServiceEndpoint(BaseModel): endpoint_name: str deployed_model_id: str endpoint_url: Optional[str] = None + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None + state: Optional[str] = None class VertexServiceStatus(ServiceStatus): @@ -110,34 +119,22 @@ class VertexDeploymentService(BaseDeploymentService): status: VertexServiceStatus = Field( default_factory=lambda: VertexServiceStatus() ) + _project_id: Optional[str] = PrivateAttr(default=None) + _credentials: Optional[Any] = PrivateAttr(default=None) def _initialize_gcp_clients(self) -> None: """Initialize GCP clients with consistent credentials.""" - # Initialize aiplatform with project and location from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( VertexModelDeployer, ) - zenml_client = Client() - model_deployer = zenml_client.active_stack.model_deployer - if not isinstance(model_deployer, VertexModelDeployer): - raise RuntimeError( - "Active model deployer must be Vertex AI Model Deployer" - ) - - # get credentials from model deployer - credentials, project_id = model_deployer._get_authentication() - - # Initialize aiplatform - aiplatform.init( - project=project_id, - location=self.config.location, - credentials=credentials, + model_deployer = cast( + VertexModelDeployer, Client().active_stack.model_deployer ) - # Initialize logging client - self.logging_client = vertex_logging.Client( - project=project_id, credentials=credentials + # Get credentials from model deployer + self._credentials, self._project_id = ( + model_deployer._get_authentication() ) def __init__(self, config: VertexDeploymentConfig, **attrs: Any): @@ -151,17 +148,21 @@ def prediction_url(self) -> Optional[str]: if not self.status.endpoint or not self.status.endpoint.endpoint_url: return None - # Construct proper prediction URL return f"https://{self.config.location}-aiplatform.googleapis.com/v1/{self.status.endpoint.endpoint_url}" def get_endpoints(self) -> List[aiplatform.Endpoint]: - """Get all endpoints for the current project and location.""" + """Get all endpoints for the current project and location. + + Returns: + List of Vertex AI endpoints + """ try: # Use proper filtering and pagination return list( aiplatform.Endpoint.list( filter='labels.managed_by="zenml"', location=self.config.location, + credentials=self._credentials, ) ) except Exception as e: @@ -169,260 +170,295 @@ def get_endpoints(self) -> List[aiplatform.Endpoint]: return [] def _generate_endpoint_name(self) -> str: - """Generate a unique name for the Vertex AI Inference Endpoint.""" - # Make name more descriptive and conformant to Vertex AI naming rules - sanitized_model_name = re.sub( - r"[^a-zA-Z0-9-]", "-", self.config.model_name.lower() + """Generate a unique name for the Vertex AI Inference Endpoint. + + Returns: + Generated endpoint name + """ + from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( + VertexModelDeployer, + ) + + # Include tenant ID in name for multi-tenancy support + model_deployer = cast( + VertexModelDeployer, Client().active_stack.model_deployer ) - return f"{sanitized_model_name}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" + + # Make name more descriptive and conformant + sanitized_model_name = sanitize_vertex_label(self.config.model_name) + return f"{sanitized_model_name}-{model_deployer.id}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" + + def _get_model_id(self, name: str) -> str: + """Helper to construct a full model ID from a given model name.""" + return f"projects/{self._project_id}/locations/{self.config.location}/models/{name}" + + def _verify_model_exists(self) -> aiplatform.Model: + """Verify the model exists and return it. + + Returns: + Vertex AI Model instance + + Raises: + RuntimeError: If model not found + """ + try: + model = aiplatform.Model( + model_name=self._get_model_id(self.config.model_name), + location=self.config.location, + credentials=self._credentials, + ) + logger.info(f"Found model to deploy: {model.resource_name}") + return model + except exceptions.NotFound: + raise RuntimeError( + f"Model {self._get_model_id(self.config.model_name)} not found in project {self._project_id}" + ) + + def _deploy_model(self) -> Any: + """Deploy model to Vertex AI endpoint.""" + # Initialize endpoint + if self.config.existing_endpoint: + endpoint = aiplatform.Endpoint( + endpoint_name=self.config.existing_endpoint, + project=self._project_id, + location=self.config.location, + credentials=self._credentials, + ) + else: + endpoint = aiplatform.Endpoint.create( + display_name=self.config.name, + project=self._project_id, + location=self.config.location, + credentials=self._credentials, + labels=self.config.get_vertex_deployment_labels(), + ) + + # Prepare deployment configuration + deploy_kwargs = { + "model_display_name": self.config.model_name, + "deployed_model_display_name": self.config.name, + "sync": False, + } + + # Add container configuration if specified + if self.config.container: + deploy_kwargs.update( + { + "container_image_uri": self.config.container.image_uri, + "container_ports": self.config.container.ports, + "container_predict_route": self.config.container.predict_route, + "container_health_route": self.config.container.health_route, + "container_env": self.config.container.env, + } + ) + + # Add resource configuration if specified + if self.config.resources: + deploy_kwargs.update( + { + "machine_type": self.config.resources.machine_type, + "min_replica_count": self.config.resources.min_replica_count, + "max_replica_count": self.config.resources.max_replica_count, + "accelerator_type": self.config.resources.accelerator_type, + "accelerator_count": self.config.resources.accelerator_count, + } + ) + + # Add explanation configuration if specified + if self.config.explanation: + deploy_kwargs.update( + { + "explanation_metadata": self.config.explanation.metadata, + "explanation_parameters": self.config.explanation.parameters, + } + ) + + # Add service account if specified + if self.config.service_account: + deploy_kwargs["service_account"] = self.config.service_account + + # Add network configuration if specified + if self.config.network: + deploy_kwargs["network"] = self.config.network + + # Add encryption key if specified + if self.config.encryption_spec_key_name: + deploy_kwargs["encryption_spec_key_name"] = ( + self.config.encryption_spec_key_name + ) + + # Deploy model + operation = endpoint.deploy(**deploy_kwargs) + return operation def provision(self) -> None: """Provision or update remote Vertex AI deployment instance.""" try: + # First verify model exists + model = self._verify_model_exists() + + # Get or create endpoint if self.config.existing_endpoint: - # Use the existing endpoint endpoint = aiplatform.Endpoint( endpoint_name=self.config.existing_endpoint, location=self.config.location, + credentials=self._credentials, ) logger.info( - f"Using existing Vertex AI inference endpoint: {endpoint.resource_name}" + f"Using existing endpoint: {endpoint.resource_name}" ) else: - # Create the endpoint endpoint_name = self._generate_endpoint_name() endpoint = aiplatform.Endpoint.create( display_name=endpoint_name, location=self.config.location, encryption_spec_key_name=self.config.encryption_spec_key_name, labels=self.config.get_vertex_deployment_labels(), + credentials=self._credentials, ) + logger.info(f"Created new endpoint: {endpoint.resource_name}") + + # Deploy model with retries for transient errors + try: + deploy_op = self._deploy_model() + + # Wait for deployment + deploy_op.result(timeout=POLLING_TIMEOUT) + logger.info( - f"Vertex AI inference endpoint created: {endpoint.resource_name}" + f"Model {model.resource_name} deployed to endpoint {endpoint.resource_name}" ) - - # Then get the model - model = aiplatform.Model( - model_name=self.config.model_id, - location=self.config.location, - ) - logger.info( - f"Found existing model to deploy: {model.resource_name} to the endpoint." - ) - if not model: - raise RuntimeError( - f"Model {self.config.model_id} not found in the project." + except Exception as e: + self.status.update_state( + ServiceState.ERROR, f"Deployment failed: {str(e)}" ) + raise - # Deploy the model to the endpoint - endpoint.deploy( - model=model, - deployed_model_display_name=f"{endpoint_name}-deployment", - machine_type=self.config.machine_type, - min_replica_count=self.config.min_replica_count, - max_replica_count=self.config.max_replica_count, - accelerator_type=self.config.accelerator_type, - accelerator_count=self.config.accelerator_count, - service_account=self.config.service_account, - explanation_metadata=self.config.explanation_metadata, - explanation_parameters=self.config.explanation_parameters, - sync=self.config.sync, - ) - logger.info( - f"Model {model.resource_name} successfully deployed to endpoint {endpoint.resource_name}" - ) - - # Store both endpoint and deployment information + # Update status self.status.endpoint = VertexPredictionServiceEndpoint( endpoint_name=endpoint.resource_name, endpoint_url=endpoint.resource_name, deployed_model_id=model.resource_name, + created_at=datetime.utcnow(), + updated_at=datetime.utcnow(), + state="DEPLOYED", ) - self.status.update_state(ServiceState.PENDING_STARTUP) + self.status.update_state(ServiceState.ACTIVE) logger.info( - f"Vertex AI inference endpoint successfully deployed. Pending startup" - f"Endpoint: {endpoint.resource_name}, " + f"Deployment completed successfully. " + f"Endpoint: {endpoint.resource_name}" ) except Exception as e: - self.status.update_state( - new_state=ServiceState.ERROR, - error=f"Deployment failed: {str(e)}", - ) - raise RuntimeError( - f"An error occurred while provisioning the Vertex AI inference endpoint: {e}" - ) + error_msg = f"Failed to provision deployment: {str(e)}" + logger.error(error_msg) + self.status.update_state(ServiceState.ERROR, error_msg) + raise RuntimeError(error_msg) def deprovision(self, force: bool = False) -> None: - """Deprovision the remote Vertex AI deployment instance.""" + """Deprovision the Vertex AI deployment. + + Args: + force: Whether to force deprovision + """ if not self.status.endpoint: + logger.warning("No endpoint to deprovision") return try: endpoint = aiplatform.Endpoint( endpoint_name=self.status.endpoint.endpoint_name, location=self.config.location, + credentials=self._credentials, ) - # First undeploy the specific model if we have its ID - if self.status.endpoint.deployed_model_id: - try: - endpoint.undeploy( - deployed_model_id=self.status.endpoint.deployed_model_id, - sync=self.config.sync, - ) - except exceptions.NotFound: - logger.warning("Deployed model already undeployed") + # Undeploy model + endpoint.undeploy_all() + + # Delete endpoint if we created it + if not self.config.existing_endpoint: + endpoint.delete() - # Then delete the endpoint - endpoint.delete(force=force, sync=self.config.sync) + logger.info(f"Deprovisioned endpoint: {endpoint.resource_name}") self.status.endpoint = None self.status.update_state(ServiceState.INACTIVE) - logger.info("Vertex AI Inference Endpoint has been deprovisioned.") - - except exceptions.NotFound: - logger.warning( - "Vertex AI Inference Endpoint not found. It may have been already deleted." - ) - self.status.update_state(ServiceState.INACTIVE) except Exception as e: - error_msg = ( - f"Failed to deprovision Vertex AI Inference Endpoint: {e}" - ) - logger.error(error_msg) + error_msg = f"Failed to deprovision deployment: {str(e)}" if not force: + logger.error(error_msg) + self.status.update_state(ServiceState.ERROR, error_msg) raise RuntimeError(error_msg) - - def check_status(self) -> Tuple[ServiceState, str]: - """Check the current operational state of the Vertex AI deployment.""" - if not self.status.endpoint: - return ServiceState.INACTIVE, "Endpoint not provisioned" - try: - logger.info( - f"Checking status of Vertex AI Inference Endpoint: {self.status.endpoint.endpoint_name}" - ) - endpoint = aiplatform.Endpoint( - endpoint_name=self.status.endpoint.endpoint_name, - location=self.config.location, - ) - - # Get detailed deployment status - deployment = None - if self.status.endpoint.deployed_model_id: - deployments = [ - d - for d in endpoint.list_models() - if d.model == self.status.endpoint.deployed_model_id - ] - if deployments: - deployment = deployments[0] - logger.info( - f"Model {self.status.endpoint.deployed_model_id} was deployed to the endpoint" - ) - - if not deployment: + else: logger.warning( - "No matching deployment found, endpoint may be inactive or failed to deploy" + f"Error during forced deprovision (ignoring): {error_msg}" ) - return ServiceState.INACTIVE, "No matching deployment found" + self.status.update_state(ServiceState.INACTIVE) - return ServiceState.ACTIVE, "Deployment is ready" + def start_deployment( + self, timeout: int = POLLING_TIMEOUT + ) -> aiplatform.Endpoint: + """Start the Vertex AI deployment and wait until it's ready. - except exceptions.NotFound: - return ServiceState.INACTIVE, "Endpoint not found" - except Exception as e: - return ServiceState.ERROR, f"Error checking status: {str(e)}" - - def predict(self, instances: List[Any]) -> List[Any]: - """Make a prediction using the service.""" - if not self.is_running: - raise Exception( - "Vertex AI endpoint inference service is not running. " - "Please start the service before making predictions." - ) + This method initiates the deployment (via a helper, e.g. _deploy_model()) and then + blocks until the underlying operation is completed using wait(). - if not self.status.endpoint: - raise Exception("Endpoint information is missing.") + Args: + timeout: Maximum time (in seconds) to wait for deployment readiness. + + Returns: + The deployed Vertex AI Endpoint object. + Raises: + RuntimeError: If the deployment operation fails. + """ try: + # _deploy_model() is assumed to initiate deployment and return an operation object. + # The operation object has a wait() method. + operation = ( + self._deploy_model() + ) # <-- your deployment call; adjust as needed + logger.info( + "Deployment operation initiated. Waiting for deployment to be ready..." + ) + operation.wait(timeout=timeout) + + # After waiting, retrieve the endpoint object. endpoint = aiplatform.Endpoint( - endpoint_name=self.status.endpoint.endpoint_name, + endpoint_name=operation.resource.name, location=self.config.location, + credentials=self._credentials, ) - # Add proper prediction parameters and handle sync/async - predictions = endpoint.predict( - instances=instances, - deployed_model_id=self.status.endpoint.deployed_model_id.split( - "/" - )[-1] - if self.status.endpoint.deployed_model_id - else None, - timeout=30, # Add reasonable timeout + self.status.endpoint = endpoint + self.status.update_state(ServiceState.ACTIVE) + logger.info( + f"Deployment is ready at endpoint: {endpoint.resource_name}" ) - - if not predictions: - raise RuntimeError("No predictions returned") - + return endpoint except Exception as e: - logger.error(f"Prediction failed: {e}") - raise RuntimeError(f"Prediction failed: {str(e)}") - - return [predictions] + logger.error(f"Deployment failed: {e}") + self.status.update_state(ServiceState.ERROR, str(e)) + raise RuntimeError(f"Deployment failed: {e}") def get_logs( - self, follow: bool = False, tail: Optional[int] = None + self, + follow: bool = False, + tail: Optional[int] = None, ) -> Generator[str, bool, None]: - """Retrieve the service logs from Cloud Logging. + """Retrieve logs for the Vertex AI deployment (not supported). - Args: - follow: If True, continuously yield new logs - tail: Number of most recent logs to return + Yields: + Log entries as strings, but logs are not supported for Vertex AI. """ - if not self.status.endpoint: - yield "No endpoint deployed yet" - return - - try: - # Create filter for Vertex AI endpoint logs - endpoint_id = self.status.endpoint.endpoint_name.split("/")[-1] - filter_str = ( - f'resource.type="aiplatform.googleapis.com/Endpoint" ' - f'resource.labels.endpoint_id="{endpoint_id}" ' - f'resource.labels.location="{self.config.location}"' - ) + logger.warning("Logs are not supported for Vertex AI") + yield from () - # Set time range for logs - if tail: - filter_str += f" limit {tail}" - - # Get log iterator - iterator = self.logging_client.list_entries( - filter_=filter_str, order_by=vertex_logging.DESCENDING - ) - - # Yield historical logs - for entry in iterator: - yield f"[{entry.timestamp}] {entry.severity}: {entry.payload.get('message', '')}" - - # If following logs, continue to stream new entries - if follow: - while True: - time.sleep(2) # Poll every 2 seconds - for entry in self.logging_client.list_entries( - filter_=filter_str, - order_by=vertex_logging.DESCENDING, - page_size=1, - ): - yield f"[{entry.timestamp}] {entry.severity}: {entry.payload.get('message', '')}" - - except Exception as e: - error_msg = f"Failed to retrieve logs: {str(e)}" - logger.error(error_msg) - yield error_msg + def check_status(self) -> None: + """Check the status of the deployment (no-op implementation).""" + return @property def is_running(self) -> bool: diff --git a/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py b/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py index d0b22d99e83..df8cf57f304 100644 --- a/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py +++ b/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py @@ -1,3 +1,16 @@ +# Copyright (c) ZenML GmbH 2021. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. """Implementation of the sklearn materializer.""" import os diff --git a/src/zenml/model_registries/base_model_registry.py b/src/zenml/model_registries/base_model_registry.py index 3e3019dfb6f..72fdd123b89 100644 --- a/src/zenml/model_registries/base_model_registry.py +++ b/src/zenml/model_registries/base_model_registry.py @@ -62,7 +62,7 @@ class ModelRegistryModelMetadata(BaseModel): model and its development process. """ - _managed_by: str = "zenml" + managed_by: str = "zenml" zenml_version: Optional[str] = None zenml_run_name: Optional[str] = None zenml_pipeline_name: Optional[str] = None @@ -71,15 +71,6 @@ class ModelRegistryModelMetadata(BaseModel): zenml_step_name: Optional[str] = None zenml_workspace: Optional[str] = None - @property - def managed_by(self) -> str: - """Returns the managed_by attribute. - - Returns: - The managed_by attribute. - """ - return self._managed_by - @property def custom_attributes(self) -> Dict[str, str]: """Returns a dictionary of custom attributes. From 8f074efcfb92949814d645692600c4dd65521a04 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Tue, 4 Feb 2025 15:47:40 +0100 Subject: [PATCH 26/43] refactor: remove direct attribute from ModelRegistryModelMetadata and implement managed_by as a property --- .../component-guide/model-deployers/vertex.md | 245 ++++++------ .../model-registries/vertex.md | 239 +++++++----- .../model_deployers/vertex_model_deployer.py | 3 - .../model_registries/vertex_model_registry.py | 358 ++++++++++------- .../gcp/services/vertex_deployment.py | 360 +++++++++--------- .../model_registries/base_model_registry.py | 10 +- 6 files changed, 656 insertions(+), 559 deletions(-) diff --git a/docs/book/component-guide/model-deployers/vertex.md b/docs/book/component-guide/model-deployers/vertex.md index cc7f3154969..df8fa1c2ff0 100644 --- a/docs/book/component-guide/model-deployers/vertex.md +++ b/docs/book/component-guide/model-deployers/vertex.md @@ -1,34 +1,26 @@ # Vertex AI Model Deployer -[Vertex AI](https://cloud.google.com/vertex-ai) provides managed infrastructure for deploying machine learning models at scale. The Vertex AI Model Deployer in ZenML allows you to deploy models to Vertex AI endpoints, providing a scalable and managed solution for model serving. +[Vertex AI](https://cloud.google.com/vertex-ai) provides managed infrastructure for deploying machine learning models at scale. The Vertex AI Model Deployer in ZenML allows you to deploy models to Vertex AI endpoints, providing a scalable and fully managed solution for model serving. ## When to use it? -You should use the Vertex AI Model Deployer when: +Use the Vertex AI Model Deployer when: -* You're already using Google Cloud Platform (GCP) and want to leverage its native ML infrastructure -* You need enterprise-grade model serving capabilities with autoscaling -* You want a fully managed solution for hosting ML models -* You need to handle high-throughput prediction requests -* You want to deploy models with GPU acceleration -* You need to monitor and track your model deployments -* You want to integrate with other GCP services like Cloud Logging, IAM, and VPC +- You are leveraging Google Cloud Platform (GCP) and wish to integrate with its native ML serving infrastructure. +- You need enterprise-grade model serving capabilities complete with autoscaling and GPU acceleration. +- You require a fully managed solution that abstracts away the operational overhead of serving models. +- You need to deploy models directly from your Vertex AI Model Registry—or even from other registries or artifacts. +- You want seamless integration with GCP services like Cloud Logging, IAM, and VPC. -This is particularly useful in the following scenarios: -* Deploying models to production with high availability requirements -* Serving models that need GPU acceleration -* Handling varying prediction workloads with autoscaling -* Building end-to-end ML pipelines on GCP +This deployer is especially useful for production deployments, high-availability serving, and dynamic scaling based on workloads. {% hint style="info" %} -The Vertex AI Model Deployer works best with a Vertex AI Model Registry in your stack, as this enables seamless model versioning and deployment. However, it can also work with other model registries or directly with model artifacts. - -The deployer can be used with both local and remote orchestrators, making it flexible for different development and production scenarios. +For best results, the Vertex AI Model Deployer works with a Vertex AI Model Registry in your ZenML stack. This allows you to register models with detailed metadata and configuration and then deploy a specific version seamlessly. {% endhint %} ## How to deploy it? -The Vertex AI Model Deployer is provided by the GCP ZenML integration. First, install the integration: +The Vertex AI Model Deployer is enabled via the ZenML GCP integration. First, install the integration: ```shell zenml integration install gcp -y @@ -36,7 +28,7 @@ zenml integration install gcp -y ### Authentication and Service Connector Configuration -The Vertex AI Model Deployer requires proper GCP authentication. The recommended way to configure this is using the ZenML Service Connector functionality: +The deployer requires proper GCP authentication. The recommended approach is to use the ZenML Service Connector: ```shell # Register the service connector with a service account key @@ -55,19 +47,24 @@ zenml model-deployer register vertex_deployer \ ``` {% hint style="info" %} -The service account needs the following permissions: -- `Vertex AI User` role for deploying models -- `Vertex AI Service Agent` role for managing model endpoints -- `Storage Object Viewer` role if accessing models stored in Google Cloud Storage +The service account used for deployment must have the following permissions: +- `Vertex AI User` to enable model deployments +- `Vertex AI Service Agent` for model endpoint management +- `Storage Object Viewer` if the model artifacts reside in Google Cloud Storage {% endhint %} ## How to use it -A full project example is available in the [ZenML Examples repository](https://github.com/zenml-io/zenml-projects/tree/main/vertex-registry-and-deployer). +A complete usage example is available in the [ZenML Examples repository](https://github.com/zenml-io/zenml-projects/tree/main/vertex-registry-and-deployer). + +### Deploying a Model in a Pipeline -### Deploy a model in a pipeline +Below is an example of a deployment step that uses the updated configuration options. In this example, the deployment configuration supports: -Here's an example of how to use the Vertex AI Model Deployer in a ZenML pipeline: +- **Model versioning**: Explicitly provide the model version (using the full resource name from the model registry). +- **Display name and Sync mode**: Fields such as `display_name` (for a friendly endpoint name) and `sync` (to wait for deployment completion) are now available. +- **Traffic configuration**: Route a certain percentage (e.g., 100%) of traffic to this deployment. +- **Advanced options**: You can still specify custom container settings, resource specifications (including GPU options), and explanation configuration via shared classes from `vertex_base_config.py`. ```python from typing_extensions import Annotated @@ -80,129 +77,111 @@ from zenml.integrations.gcp.services.vertex_deployment import ( @step(enable_cache=False) def model_deployer( + model_registry_uri: str, + is_promoted: bool = False, ) -> Annotated[ - VertexDeploymentService, - ArtifactConfig(name="vertex_deployment", is_deployment_artifact=True) + VertexDeploymentService, + ArtifactConfig(name="vertex_deployment", is_deployment_artifact=True), ]: - """Model deployer step.""" - zenml_client = Client() - current_model = get_step_context().model - model_registry_uri = current_model.get_model_artifact("model").uri - model_deployer = zenml_client.active_stack.model_deployer - - # Configure the deployment - vertex_deployment_config = VertexDeploymentConfig( - location="europe-west1", - name="zenml-vertex-quickstart", - model_name=current_model.name, - description="Vertex AI model deployment example", - model_id=model_registry_uri, - machine_type="n1-standard-4", - min_replica_count=1, - max_replica_count=3, - # Optional advanced settings - container=VertexAIContainerSpec( - image_uri="your-custom-image:latest", - ports=[8080], - env={"ENV_VAR": "value"} - ), - resources=VertexAIResourceSpec( - accelerator_type="NVIDIA_TESLA_T4", - accelerator_count=1 - ), - explanation=VertexAIExplanationSpec( - metadata={"method": "integrated-gradients"}, - parameters={"num_integral_steps": 50} + """Model deployer step. + + Args: + model_registry_uri: The full resource name of the model in the registry. + is_promoted: Flag indicating if the model is promoted to production. + + Returns: + The deployed model service. + """ + if not is_promoted: + # Skip deployment if the model is not promoted. + return None + else: + zenml_client = Client() + current_model = get_step_context().model + model_deployer = zenml_client.active_stack.model_deployer + + # Create deployment configuration with advanced options. + vertex_deployment_config = VertexDeploymentConfig( + location="europe-west1", + name=current_model.name, # Unique endpoint name in Vertex AI. + display_name="zenml-vertex-quickstart", + model_name=model_registry_uri, # Fully qualified model name (from model registry). + model_version=current_model.version, # Specify the model version explicitly. + description="An example of deploying a model using the Vertex AI Model Deployer", + sync=True, # Wait for deployment to complete before proceeding. + traffic_percentage=100, # Route 100% of traffic to this model version. + # (Optional) Advanced configurations: + # container=VertexAIContainerSpec( + # image_uri="your-custom-image:latest", + # ports=[8080], + # env={"ENV_VAR": "value"} + # ), + # resources=VertexAIResourceSpec( + # accelerator_type="NVIDIA_TESLA_T4", + # accelerator_count=1, + # machine_type="n1-standard-4", + # min_replica_count=1, + # max_replica_count=3, + # ), + # explanation=VertexAIExplanationSpec( + # metadata={"method": "integrated-gradients"}, + # parameters={"num_integral_steps": 50} + # ) ) - ) - - # Deploy the model - service = model_deployer.deploy_model( - config=vertex_deployment_config, - service_type=VertexDeploymentService.SERVICE_TYPE, - ) - - return service -``` -### Configuration Options - -The Vertex AI Model Deployer uses a comprehensive configuration system that includes: + service = model_deployer.deploy_model( + config=vertex_deployment_config, + service_type=VertexDeploymentService.SERVICE_TYPE, + ) -* Basic Configuration: - * `location`: GCP region for deployment (e.g., "us-central1") - * `name`: Name for the deployment endpoint - * `model_name`: Name of the model being deployed - * `model_id`: Model ID from the Vertex AI Model Registry + return service +``` -* Container Configuration (`VertexAIContainerSpec`): - * `image_uri`: Custom serving container image - * `ports`: Container ports to expose - * `env`: Environment variables - * `predict_route`: Custom prediction HTTP path - * `health_route`: Custom health check path +*Example: [`model_deployer.py`](../../examples/vertex-registry-and-deployer/steps/model_deployer.py)* -* Resource Configuration (`VertexAIResourceSpec`): - * `machine_type`: Type of machine to use (e.g., "n1-standard-4") - * `accelerator_type`: GPU accelerator type - * `accelerator_count`: Number of GPUs per replica - * `min_replica_count`: Minimum number of serving replicas - * `max_replica_count`: Maximum number of serving replicas +### Configuration Options -* Advanced Configuration: - * `service_account`: Custom service account for the deployment - * `network`: VPC network configuration - * `encryption_spec_key_name`: Customer-managed encryption key - * `enable_access_logging`: Enable detailed access logging - * `explanation`: Model explanation configuration - * `labels`: Custom resource labels +The Vertex AI Model Deployer leverages a comprehensive configuration system defined in the shared base configuration and deployer-specific settings: -### Running Predictions +- **Basic Settings:** + - `location`: The GCP region for deployment (e.g., "us-central1" or "europe-west1"). + - `name`: Unique identifier for the deployed endpoint. + - `display_name`: A human-friendly name for the endpoint. + - `model_name`: The fully qualified model name from the model registry. + - `model_version`: The version of the model to deploy. + - `description`: A textual description of the deployment. + - `sync`: A flag to indicate whether the deployment should wait until completion. + - `traffic_percentage`: The percentage of incoming traffic to route to this deployment. -Once a model is deployed, you can run predictions using the service: +- **Container and Resource Configuration:** + - Configurations provided via [VertexAIContainerSpec](../../integrations/gcp/flavors/vertex_base_config.py) allow you to specify a custom serving container image, HTTP routes (`predict_route`, `health_route`), environment variables, and port exposure. + - [VertexAIResourceSpec](../../integrations/gcp/flavors/vertex_base_config.py) lets you override the default machine type, number of replicas, and even GPU options. -```python -from zenml.integrations.gcp.model_deployers import VertexModelDeployer -from zenml.services import ServiceState - -# Get the deployed service -model_deployer = VertexModelDeployer.get_active_model_deployer() -services = model_deployer.find_model_server( - pipeline_name="deployment_pipeline", - pipeline_step_name="model_deployer", - model_name="my_model", -) +- **Advanced Settings:** + - Service account, network configuration, and customer-managed encryption keys. + - Model explanation settings via `VertexAIExplanationSpec` if you need integrated model interpretability. -if services: - service = services[0] - if service.is_running: - # Run prediction - prediction = service.predict( - instances=[{"feature1": 1.0, "feature2": 2.0}] - ) - print(f"Prediction: {prediction}") -``` +These options are defined across the [Vertex AI Base Config](../../integrations/gcp/flavors/vertex_base_config.py) and the deployer–specific configuration in [VertexModelDeployerFlavor](../../integrations/gcp/flavors/vertex_model_deployer_flavor.py). ### Limitations and Considerations -1. **Stack Requirements**: - - Works best with a Vertex AI Model Registry but can function without it - - Compatible with both local and remote orchestrators - - Requires valid GCP credentials and permissions +1. **Stack Requirements:** + - It is recommended to pair the deployer with a Vertex AI Model Registry in your stack. + - Compatible with both local and remote orchestrators. + - Requires valid GCP credentials and permissions. -2. **Authentication**: - - Requires proper GCP credentials with Vertex AI permissions - - Best practice is to use service connectors for authentication - - Supports multiple authentication methods (service account, user account, workload identity) +2. **Authentication:** + - Best practice is to use service connectors for secure and managed authentication. + - Supports multiple authentication methods (service accounts, local credentials). -3. **Costs**: - - Vertex AI endpoints incur costs based on machine type and uptime - - Consider using autoscaling to optimize costs - - Monitor usage through GCP Cloud Monitoring +3. **Costs:** + - Vertex AI endpoints will incur costs based on machine type and uptime. + - Utilize autoscaling (via configured `min_replica_count` and `max_replica_count`) to manage cost. -4. **Region Availability**: - - Service availability depends on Vertex AI regional availability - - Model and endpoint must be in the same region - - Consider data residency requirements when choosing regions +4. **Region Consistency:** + - Ensure that the model and deployment are created in the same GCP region. -Check out the [SDK docs](https://sdkdocs.zenml.io) for more detailed information about the implementation. \ No newline at end of file +For more details, please refer to the [SDK docs](https://sdkdocs.zenml.io) and the relevant implementation files: +- [`vertex_model_deployer.py`](../../integrations/gcp/model_deployers/vertex_model_deployer.py) +- [`vertex_base_config.py`](../../integrations/gcp/flavors/vertex_base_config.py) +- [`vertex_model_deployer_flavor.py`](../../integrations/gcp/flavors/vertex_model_deployer_flavor.py) \ No newline at end of file diff --git a/docs/book/component-guide/model-registries/vertex.md b/docs/book/component-guide/model-registries/vertex.md index eef9096ce62..f4e32ffb514 100644 --- a/docs/book/component-guide/model-registries/vertex.md +++ b/docs/book/component-guide/model-registries/vertex.md @@ -1,37 +1,29 @@ # Vertex AI Model Registry -[Vertex AI](https://cloud.google.com/vertex-ai) is Google Cloud's unified ML platform that helps you build, deploy, and scale ML models. The Vertex AI Model Registry is a centralized repository for managing your ML models throughout their lifecycle. ZenML's Vertex AI Model Registry integration allows you to register, version, and manage your models using Vertex AI's infrastructure. +[Vertex AI](https://cloud.google.com/vertex-ai) is Google Cloud's unified ML platform that helps you build, deploy, and scale ML models. The Vertex AI Model Registry is a centralized repository for managing your ML models throughout their lifecycle. With ZenML's Vertex AI Model Registry integration, you can register model versions—with extended configuration options—track metadata, and seamlessly deploy your models using Vertex AI's managed infrastructure. ## When would you want to use it? You should consider using the Vertex AI Model Registry when: -* You're already using Google Cloud Platform (GCP) and want to leverage its native ML infrastructure -* You need enterprise-grade model management capabilities with fine-grained access control -* You want to track model lineage and metadata in a centralized location -* You're building ML pipelines that need to integrate with other Vertex AI services -* You need to manage model deployment across different GCP environments +- You're already using Google Cloud Platform (GCP) and want to leverage its native ML infrastructure. +- You need enterprise-grade model management with fine-grained access control. +- You want to track model lineage and metadata in a centralized location. +- You're building ML pipelines that integrate with other Vertex AI services. +- You need to deploy models with custom configurations such as defined container images, resource specifications, and additional metadata. -This is particularly useful in the following scenarios: - -* Building production ML pipelines that need to integrate with GCP services -* Managing multiple versions of models across development and production environments -* Tracking model artifacts and metadata in a centralized location -* Deploying models to Vertex AI endpoints for serving +This registry is particularly useful in scenarios where you: +- Build production ML pipelines that require deployment to Vertex AI endpoints. +- Manage multiple versions of models across development, staging, and production. +- Need to register model versions with detailed configuration for robust deployment. {% hint style="warning" %} -Important: The Vertex AI Model Registry implementation only supports the model version interface, not the model interface. This means you cannot register, delete, or update models directly - you can only work with model versions. Operations like `register_model()`, `delete_model()`, and `update_model()` are not supported. - -Unlike platforms like MLflow where you first create a model container and then add versions to it, Vertex AI combines model registration and versioning into a single operation: - -- When you upload a model, it automatically creates both the model and its first version -- Each subsequent upload with the same display name creates a new version -- You cannot create an empty model container without a version +**Important:** The Vertex AI Model Registry implementation only supports the model **version** interface—not the model interface. This means that you cannot directly register, update, or delete models; you only have operations for model versions. A model container is automatically created with the first version, and subsequent uploads with the same display name create new versions. {% endhint %} ## How do you deploy it? -The Vertex AI Model Registry flavor is provided by the GCP ZenML integration. First, install the integration: +The Vertex AI Model Registry flavor is enabled through the ZenML GCP integration. First, install the integration: ```shell zenml integration install gcp -y @@ -39,87 +31,132 @@ zenml integration install gcp -y ### Authentication and Service Connector Configuration -The Vertex AI Model Registry requires proper GCP authentication. The recommended way to configure this is using the ZenML Service Connector functionality. You have several options for authentication: - -1. Using a GCP Service Connector with a dedicated service account (Recommended): -```shell -# Register the service connector with a service account key -zenml service-connector register vertex_registry_connector \ - --type gcp \ - --auth-method=service-account \ - --project_id= \ - --service_account_json=@vertex-registry-sa.json \ - --resource-type gcp-generic - -# Register the model registry -zenml model-registry register vertex_registry \ - --flavor=vertex \ - --location=us-central1 - -# Connect the model registry to the service connector -zenml model-registry connect vertex_registry --connector vertex_registry_connector -``` - -2. Using local gcloud credentials: -```shell -# Register the model registry using local gcloud auth -zenml model-registry register vertex_registry \ - --flavor=vertex \ - --location=us-central1 -``` +Vertex AI requires proper GCP authentication. The recommended configuration is via the ZenML Service Connector, which supports both service-account-based authentication and local gcloud credentials. + +1. **Using a GCP Service Connector with a service account (Recommended):** + ```shell + # Register the service connector with a service account key + zenml service-connector register vertex_registry_connector \ + --type gcp \ + --auth-method=service-account \ + --project_id= \ + --service_account_json=@vertex-registry-sa.json \ + --resource-type gcp-generic + + # Register the model registry + zenml model-registry register vertex_registry \ + --flavor=vertex \ + --location=us-central1 + + # Connect the model registry to the service connector + zenml model-registry connect vertex_registry --connector vertex_registry_connector + ``` +2. **Using local gcloud credentials:** + ```shell + # Register the model registry using local gcloud auth + zenml model-registry register vertex_registry \ + --flavor=vertex \ + --location=us-central1 + ``` {% hint style="info" %} -The service account used needs the following permissions: -- `Vertex AI User` role for creating and managing model versions -- `Storage Object Viewer` role if accessing models stored in Google Cloud Storage +The service account needs the following permissions: +- `Vertex AI User` role for creating and managing model versions. +- `Storage Object Viewer` role if accessing models stored in Google Cloud Storage. {% endhint %} ## How do you use it? -### Register models inside a pipeline +### Registering Models inside a Pipeline with Extended Configuration + +The Vertex AI Model Registry supports extended configuration options via the `VertexAIModelConfig` class (defined in the [vertex_base_config.py](../../integrations/gcp/flavors/vertex_base_config.py) file). This means you can specify additional details for your deployments such as: -Here's an example of how to use the Vertex AI Model Registry in your ZenML pipeline using the provided model registration step: +- **Container configuration**: Use the `VertexAIContainerSpec` to define a custom serving container (e.g., specifying the `image_uri`, `predict_route`, `health_route`, and exposed ports). +- **Resource configuration**: Use the `VertexAIResourceSpec` to specify compute resources like `machine_type`, `min_replica_count`, and `max_replica_count`. +- **Additional metadata and labels**: Annotate your model registrations with pipeline details, stage information, and custom labels. + +Below is an example of how you might register a model version in your ZenML pipeline: ```python from typing_extensions import Annotated + from zenml import ArtifactConfig, get_step_context, step from zenml.client import Client +from zenml.integrations.gcp.flavors.vertex_base_config import ( + VertexAIContainerSpec, + VertexAIModelConfig, + VertexAIResourceSpec, +) from zenml.logger import get_logger +from zenml.model_registries.base_model_registry import ( + ModelRegistryModelMetadata, +) logger = get_logger(__name__) + @step(enable_cache=False) -def model_register() -> Annotated[str, ArtifactConfig(name="model_registry_uri")]: - """Model registration step.""" - # Get the current model from the context - current_model = get_step_context().model - - client = Client() - model_registry = client.active_stack.model_registry - model_version = model_registry.register_model_version( - name=current_model.name, - version=str(current_model.version), - model_source_uri=current_model.get_model_artifact("sklearn_classifier").uri, - description="ZenML model registered after promotion", - ) - logger.info( - f"Model version {model_version.version} registered in Model Registry" - ) - - return model_version.model_source_uri +def model_register( + is_promoted: bool = False, +) -> Annotated[str, ArtifactConfig(name="model_registry_uri")]: + """Model registration step. + + Registers a model version in the Vertex AI Model Registry with extended configuration + and returns the full resource name of the registered model. + + Extended configuration includes settings for container, resources, and metadata which can then be reused in + subsequent model deployments. + """ + if is_promoted: + # Get the current model from the step context + current_model = get_step_context().model + + client = Client() + model_registry = client.active_stack.model_registry + # Create an extended model configuration using Vertex AI base settings + model_config = VertexAIModelConfig( + location="europe-west1", + container=VertexAIContainerSpec( + image_uri="europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-5:latest", + predict_route="predict", + health_route="health", + ports=[8080], + ), + resources=VertexAIResourceSpec( + machine_type="n1-standard-4", + min_replica_count=1, + max_replica_count=1, + ), + labels={"env": "production"}, + description="Extended model configuration for Vertex AI", + ) + + # Register the model version with the extended configuration as metadata + model_version = model_registry.register_model_version( + name=current_model.name, + version=str(current_model.version), + model_source_uri=current_model.get_model_artifact("sklearn_classifier").uri, + description="ZenML model version registered with extended configuration", + metadata=ModelRegistryModelMetadata( + zenml_pipeline_name=get_step_context().pipeline.name, + zenml_pipeline_run_uuid=str(get_step_context().pipeline_run.id), + zenml_step_name=get_step_context().step_run.name, + ), + config=model_config, + ) + logger.info(f"Model version {model_version.version} registered in Model Registry") + + # Return the full resource name of the registered model + return model_version.registered_model.name + else: + return "" ``` -### Configuration Options - -The Vertex AI Model Registry accepts the following configuration options: - -* `location`: The GCP region where the model registry will be created (e.g., "us-central1") -* `project_id`: (Optional) The GCP project ID. If not specified, will use the default project -* `credentials`: (Optional) GCP credentials configuration +*Example: [`model_register.py`](../../examples/vertex-registry-and-deployer/steps/model_register.py)* ### Working with Model Versions -Since the Vertex AI Model Registry only supports version-level operations, here's how to work with model versions: +Since the Vertex AI Model Registry supports only version-level operations, here are some commands to manage model versions: ```shell # List all model versions @@ -132,25 +169,39 @@ zenml model-registry models get-version -v zenml model-registry models delete-version -v ``` -### Key Differences from MLflow Model Registry +### Configuration Options + +The Vertex AI Model Registry accepts several configuration options, now enriched with extended settings: + +- **location**: The GCP region where your resources will be created (e.g., "us-central1" or "europe-west1"). +- **project_id**: (Optional) A GCP project ID override. +- **credentials**: (Optional) GCP credentials configuration. +- **container**: (Optional) Detailed container settings (defined via `VertexAIContainerSpec`) for the model's serving container such as: + - `image_uri` + - `predict_route` + - `health_route` + - `ports` +- **resources**: (Optional) Compute resource settings (using `VertexAIResourceSpec`) like `machine_type`, `min_replica_count`, and `max_replica_count`. +- **labels** and **metadata**: Additional annotation data for organizing and tracking your model versions. -Unlike the MLflow Model Registry, the Vertex AI implementation has some important differences: +These configuration options are specified in the [Vertex AI Base Config](../../integrations/gcp/flavors/vertex_base_config.py) and further extended in the [Vertex AI Model Registry Flavor](../../integrations/gcp/flavors/vertex_model_registry_flavor.py). -1. **Version-Only Interface**: Vertex AI only supports model version operations. You cannot register, delete, or update models directly - only their versions. -2. **Authentication**: Uses GCP service connectors for authentication, similar to other Vertex AI services in ZenML. -3. **Staging Levels**: Vertex AI doesn't have built-in staging levels (like Production, Staging, etc.) - these are handled through metadata. -4. **Default Container Images**: Vertex AI requires a serving container image URI, which defaults to the scikit-learn prediction container if not specified. -5. **Managed Service**: As a fully managed service, you don't need to worry about infrastructure management, but you need valid GCP credentials. +### Key Differences from Other Model Registries -### Limitations +1. **Version-Only Interface**: Vertex AI only supports version-level operations for model registration. +2. **Authentication**: Uses GCP service connectors and local credentials integrated via ZenML. +3. **Extended Configuration**: Register model versions with detailed settings for container, resources, and metadata through `VertexAIModelConfig`. +4. **Managed Service**: As a fully managed service, Vertex AI handles infrastructure management while you focus on your ML models. -Based on the implementation, there are some limitations to be aware of: +## Limitations -1. The `register_model()`, `update_model()`, and `delete_model()` methods are not implemented as Vertex AI only supports registering model versions -3. It's preferable for the models to be given a serving container image URI specified to avoid using the default scikit-learn prediction container and to ensure compatibility with Vertex AI endpoints -when deploying models. -4. All registered models by the integration are automatically labeled with `managed_by="zenml"` for tracking purposes +- The methods `register_model()`, `update_model()`, and `delete_model()` are not implemented; you can only work with model versions. +- It is recommended to specify a serving container image URI rather than rely on the default scikit-learn container to ensure compatibility with Vertex AI endpoints. +- All models registered through this integration are automatically labeled with `managed_by="zenml"` for consistent tracking. -Check out the [SDK docs](https://sdkdocs.zenml.io/latest/integration\_code\_docs/integrations-gcp/#zenml.integrations.gcp.model\_registry) to see more about the interface and implementation. +For more detailed information, check out the [SDK docs](https://sdkdocs.zenml.io/latest/integration_code_docs/integrations-gcp/#zenml.integrations.gcp.model_registry). -
ZenML Scarf
\ No newline at end of file +
+ ZenML Scarf +
ZenML in action
+
\ No newline at end of file diff --git a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py index e8e6df120dd..0256dde9368 100644 --- a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py +++ b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py @@ -257,7 +257,4 @@ def get_model_server_info( # type: ignore[override] return { "prediction_url": service_instance.get_prediction_url(), "status": service_instance.status.state.value, - "endpoint_id": service_instance.status.endpoint.endpoint_name - if service_instance.status.endpoint - else None, } diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 64dff0a3592..3fa8eb25e2b 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -14,10 +14,10 @@ """Vertex AI model registry integration for ZenML.""" import base64 +import re from datetime import datetime from typing import Any, Dict, List, Optional, Tuple, cast -from google.api_core import exceptions from google.cloud import aiplatform from zenml.client import Client @@ -68,24 +68,35 @@ def _sanitize_label(self, value: str) -> str: """ if not value: return "" - # Convert to lowercase and replace invalid chars + + # Convert to lowercase value = value.lower() - value = "".join( - c if c.isalnum() or c in ["-", "_"] else "-" for c in value - ) - # Ensure starts with letter/number + + # Replace any character that's not lowercase letter, number, dash or underscore + value = re.sub(r"[^a-z0-9\-_]", "-", value) + + # Ensure it starts with a letter/number by prepending 'x' if needed if not value[0].isalnum(): value = f"x{value}" - return value[:MAX_LABEL_KEY_LENGTH] - def _get_tenant_id(self) -> str: - """Get the current ZenML server/tenant ID for multi-tenancy support. + # Truncate to 63 chars to stay under limit + return value[:63] + + def _get_deployer_id(self) -> str: + """Get the current ZenML server/deployer ID for multi-tenancy support. Returns: - The tenant ID string + The deployer ID string """ + from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( + VertexModelDeployer, + ) + client = Client() - return str(client.active_stack_model.id) + model_deployer = client.active_stack.model_deployer + if not isinstance(model_deployer, VertexModelDeployer): + raise ValueError("VertexModelDeployer is not active in the stack.") + return str(model_deployer.id) def _encode_name_version(self, name: str, version: str) -> str: """Encode model name and version into a Vertex AI compatible format. @@ -133,30 +144,66 @@ def _prepare_labels( metadata: Optional[Dict[str, str]] = None, stage: Optional[ModelVersionStage] = None, ) -> Dict[str, str]: - """Prepare labels for Vertex AI, including internal ZenML metadata.""" + """Prepare labels for Vertex AI model. + + Args: + metadata: Optional metadata to include as labels + stage: Optional model version stage + + Returns: + Dictionary of sanitized labels + """ labels = {} - # Add internal ZenML labels + # Add base labels labels["managed_by"] = "zenml" - tenant_id = self._sanitize_label(self._get_tenant_id()) - labels["tenant_id"] = tenant_id + labels["deployer_id"] = self._sanitize_label(self._get_deployer_id()) + # Add stage if provided if stage: - labels["stage"] = stage.value.lower() + labels["stage"] = self._sanitize_label(stage.value) - # Merge user metadata with sanitization + # Process metadata if provided if metadata: - remaining_slots = MAX_LABEL_COUNT - len(labels) - for i, (key, value) in enumerate(metadata.items()): - if i >= remaining_slots: - logger.warning( - f"Exceeded maximum label count ({MAX_LABEL_COUNT}), " - f"dropping remaining metadata" + # If metadata is not a dict (e.g. a pydantic model), convert it using .dict() + if not isinstance(metadata, dict): + try: + metadata = metadata.dict() + except Exception as e: + logger.warning(f"Unable to convert metadata to dict: {e}") + metadata = {} + for key, value in metadata.items(): + # Skip None values + if value is None: + continue + # Convert complex objects to string + if isinstance(value, (dict, list)): + value = ( + "x" # Simplify complex objects to avoid length issues ) - break - safe_key = self._sanitize_label(str(key)) - safe_value = self._sanitize_label(str(value)) - labels[safe_key] = safe_value + # Sanitize both key and value + sanitized_key = self._sanitize_label(str(key)) + sanitized_value = self._sanitize_label(str(value)) + # Only add if both key and value are valid + if sanitized_key and sanitized_value: + labels[sanitized_key] = sanitized_value + + # Ensure we don't exceed 64 labels + if len(labels) > 64: + # Keep essential labels and truncate the rest + essential_labels = { + k: labels[k] + for k in ["managed_by", "deployer_id", "stage"] + if k in labels + } + # Add remaining labels up to limit + remaining_slots = 64 - len(essential_labels) + other_labels = { + k: v + for i, (k, v) in enumerate(labels.items()) + if k not in essential_labels and i < remaining_slots + } + labels = {**essential_labels, **other_labels} return labels @@ -185,36 +232,49 @@ def _get_model_version_id(self, model_id: str, version: str) -> str: return f"{model_id}/versions/{version}" def _init_vertex_model( - self, - name: Optional[str] = None, - version: Optional[str] = None, - credentials: Optional[Any] = None, - ) -> aiplatform.Model: - """Initialize a Vertex AI model with proper credentials. + self, name: str, version: Optional[str] = None + ) -> Optional[aiplatform.Model]: + """Initialize a single Vertex AI model with proper credentials. + + This method returns one Vertex AI model based on the given name (and optional version). Args: - name: Optional model name - version: Optional version - credentials: Optional credentials + name: The model name. + version: The model version (optional). Returns: - Vertex AI Model instance + A single Vertex AI model instance or None if initialization fails. """ - if not credentials: - credentials, _ = self._get_authentication() - + credentials, project_id = self._get_authentication() + location = self.config.location kwargs = { - "location": self.config.location, + "location": location, + "project": project_id, "credentials": credentials, } - if name: - model_id = self._get_model_id(name) - if version: - model_id = self._get_model_version_id(model_id, version) - kwargs["name"] = model_id + if name.startswith("projects/"): + kwargs["model_name"] = name + else: + # Attempt to find an existing model by display_name + existing_models = aiplatform.Model.list( + filter=f"display_name={name}", + project=self.config.project_id or project_id, + location=location, + ) + if existing_models: + kwargs["model_name"] = existing_models[0].resource_name + else: + model_id = self._get_model_id(name) + if version: + model_id = self._get_model_version_id(model_id, version) + kwargs["model_name"] = model_id - return aiplatform.Model(**kwargs) + try: + return aiplatform.Model(**kwargs) + except Exception as e: + logger.warning(f"Failed to initialize model: {e}") + return None def register_model( self, @@ -234,12 +294,8 @@ def delete_model( """Delete a model and all of its versions from the Vertex AI model registry.""" try: model = self._init_vertex_model(name=name) - # List and delete all model versions first - versions = model.list_versions() - for version in versions: - version.delete() - # Then delete the parent model - model.delete() + if isinstance(model, aiplatform.Model): + model.delete() logger.info(f"Deleted model '{name}' and all its versions.") except Exception as e: raise RuntimeError(f"Failed to delete model: {str(e)}") @@ -275,20 +331,23 @@ def list_models( metadata: Optional[Dict[str, str]] = None, ) -> List[RegisteredModel]: """List models in the Vertex AI model registry.""" - _ = self._init_vertex_model(name=name) - # Always filter with ZenML-specific labels (including tenant id for multi-tenancy) - tenant_label = self._sanitize_label(self._get_tenant_id()) - filter_expr = ( - f"labels.managed_by='zenml' AND labels.tenant_id='{tenant_label}'" - ) + credentials, project_id = self._get_authentication() + location = self.config.location + # Always filter with ZenML-specific labels (including deployer id for multi-tenancy) + filter_expr = "labels.managed_by=zenml" if name: - filter_expr += f" AND display_name='{name}'" + filter_expr += f" AND display_name={name}" if metadata: for key, value in metadata.items(): - filter_expr += f" AND labels.{key}='{value}'" + filter_expr += f" AND labels.{key}={value}" try: - all_models = aiplatform.Model.list(filter=filter_expr) + all_models = aiplatform.Model.list( + project=project_id, + location=location, + filter=filter_expr, + credentials=credentials, + ) # Deduplicate by display_name so only one entry per "logical" model is returned. unique_models = {model.display_name: model for model in all_models} return [ @@ -318,21 +377,20 @@ def register_model_version( version: Model version model_source_uri: URI to model artifacts description: Model description - metadata: Model metadata + metadata: Model metadata (expected to be a ModelRegistryModelMetadata or + equivalent serializable dict) **kwargs: Additional arguments Returns: RegistryModelVersion instance """ - credentials, _ = self._get_authentication() - # Prepare labels with internal ZenML metadata, ensuring they are sanitized metadata_dict = metadata.model_dump() if metadata else {} labels = self._prepare_labels(metadata_dict) if version: labels["user_version"] = self._sanitize_label(version) - # Get container image from config if available, otherwise from metadata with a default + # Get the container image from the config if available, otherwise fallback to metadata if ( hasattr(self.config, "container") and self.config.container @@ -345,53 +403,75 @@ def register_model_version( "europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest", ) - # Optionally add additional parameters from the config resources - if hasattr(self.config, "resources") and self.config.resources: - if self.config.resources.machine_type: - metadata_dict.setdefault( - "machine_type", self.config.resources.machine_type - ) - if self.config.resources.min_replica_count is not None: - metadata_dict.setdefault( - "min_replica_count", - str(self.config.resources.min_replica_count), - ) - if self.config.resources.max_replica_count is not None: - metadata_dict.setdefault( - "max_replica_count", - str(self.config.resources.max_replica_count), - ) - - # Use a consistently sanitized display name instead of flat "name_version" + # Use a consistently sanitized display name instead of the raw model name model_display_name = self._sanitize_model_display_name(name) - try: - # Attempt to get the parent model (by name only) - parent_model = self._init_vertex_model(name=name) - logger.info(f"Found existing model: {name}") - except exceptions.NotFound: - # Create the parent model if it doesn"t exist - parent_model = aiplatform.Model.upload( - display_name=model_display_name, - artifact_uri=model_source_uri, - serving_container_image_uri=serving_container_image_uri, - description=description, - labels=labels, - credentials=credentials, - location=self.config.location, + # Build extended upload arguments for vertex.Model.upload, + # leveraging extra settings from self.config. + upload_arguments = { + "serving_container_image_uri": serving_container_image_uri, + "artifact_uri": model_source_uri or self.config.artifact_uri, + "is_default_version": self.config.is_default_version + if self.config.is_default_version is not None + else True, + "version_aliases": self.config.version_aliases, + "version_description": self.config.version_description, + "serving_container_predict_route": self.config.container.predict_route + if self.config.container + else None, + "serving_container_health_route": self.config.container.health_route + if self.config.container + else None, + "description": description or self.config.description, + "serving_container_command": self.config.container.command + if self.config.container + else None, + "serving_container_args": self.config.container.args + if self.config.container + else None, + "serving_container_environment_variables": self.config.container.env + if self.config.container + else None, + "serving_container_ports": self.config.container.ports + if self.config.container + else None, + "display_name": self.config.display_name or model_display_name, + "project": self.config.project_id, + "location": self.config.location, + "labels": labels, + "encryption_spec_key_name": self.config.encryption_spec_key_name, + } + + # Include explanation settings if provided in the config. + if self.config.explanation: + upload_arguments["explanation_metadata"] = ( + self.config.explanation.metadata + ) + upload_arguments["explanation_parameters"] = ( + self.config.explanation.parameters + ) + + # Remove any parameters that are None to avoid passing them to upload. + upload_arguments = { + k: v for k, v in upload_arguments.items() if v is not None + } + + parent_model = self._init_vertex_model(name=name, version=version) + assert isinstance(parent_model, aiplatform.Model) + if parent_model and parent_model.uri == model_source_uri: + logger.info( + f"Model version {version} already exists, skipping upload..." ) - logger.info(f"Created new model: {name}") - - # Create a new version for the model. Note that we keep the display name intact. - model_version = parent_model.create_version( - artifact_uri=model_source_uri, - serving_container_image_uri=serving_container_image_uri, - description=description, - labels=labels, + return self._vertex_model_to_registry_version(parent_model) + # Always call model.upload (even if a parent model already exists), since Vertex AI + # expects a full upload for each version. + upload_arguments["parent_model"] = ( + parent_model.resource_name if parent_model else None ) - logger.info(f"Created new version with labels: {model_version.labels}") + model = aiplatform.Model.upload(**upload_arguments) + logger.info(f"Uploaded new model version with labels: {model.labels}") - return self._vertex_model_to_registry_version(model_version) + return self._vertex_model_to_registry_version(model) def delete_model_version( self, @@ -406,7 +486,8 @@ def delete_model_version( """ try: model = self._init_vertex_model(name=name, version=version) - model.delete() + assert isinstance(model, aiplatform.Model) + model.versioning_registry.delete_version(version) logger.info(f"Deleted model version: {name} version {version}") except Exception as e: raise RuntimeError(f"Failed to delete model version: {str(e)}") @@ -422,10 +503,11 @@ def update_model_version( ) -> RegistryModelVersion: """Update a model version in the Vertex AI model registry.""" try: - parent_model = self._init_vertex_model(name=name) + parent_model = self._init_vertex_model(name=name, version=version) + assert isinstance(parent_model, aiplatform.Model) sanitized_version = self._sanitize_label(version) target_version = None - for v in parent_model.list_versions(): + for v in parent_model.list(): if v.labels.get("user_version") == sanitized_version: target_version = v break @@ -455,14 +537,9 @@ def get_model_version( ) -> RegistryModelVersion: """Get a model version from the Vertex AI model registry using the version label.""" try: - parent_model = self._init_vertex_model(name=name) - sanitized_version = self._sanitize_label(version) - for v in parent_model.list_versions(): - if v.labels.get("user_version") == sanitized_version: - return self._vertex_model_to_registry_version(v) - raise RuntimeError( - f"Model '{name}' with version '{version}' not found." - ) + parent_model = self._init_vertex_model(name=name, version=version) + assert isinstance(parent_model, aiplatform.Model) + return self._vertex_model_to_registry_version(parent_model) except Exception as e: raise RuntimeError(f"Failed to get model version: {str(e)}") @@ -479,6 +556,8 @@ def list_model_versions( **kwargs: Any, ) -> List[RegistryModelVersion]: """List model versions from the Vertex AI model registry.""" + credentials, project_id = self._get_authentication() + location = self.config.location filter_expr = [] if name: filter_expr.append( @@ -497,8 +576,13 @@ def list_model_versions( filter_str = " AND ".join(filter_expr) if filter_expr else None try: - parent_model = self._init_vertex_model(name=name) - versions = parent_model.list_versions(filter=filter_str) + model = aiplatform.Model( + project=project_id, + location=location, + filter=filter_str, + credentials=credentials, + ) + versions = model.versioning_registry.list_versions() results = [ self._vertex_model_to_registry_version(v) for v in versions ] @@ -516,14 +600,9 @@ def load_model_version( ) -> Any: """Load a model version from the Vertex AI model registry using label-based lookup.""" try: - parent_model = self._init_vertex_model(name=name) - sanitized_version = self._sanitize_label(version) - for v in parent_model.list_versions(): - if v.labels.get("user_version") == sanitized_version: - return v - raise RuntimeError( - f"Model version '{version}' for '{name}' not found." - ) + parent_model = self._init_vertex_model(name=name, version=version) + assert isinstance(parent_model, aiplatform.Model) + return parent_model except Exception as e: raise RuntimeError(f"Failed to load model version: {str(e)}") @@ -554,25 +633,26 @@ def _vertex_model_to_registry_version( pass # Get parent model for registered_model field - parent_model = None try: - model_id = model.resource_name.split("/versions/")[0] - parent_model = self._init_vertex_model(name=model_id) registered_model = RegisteredModel( - name=parent_model.display_name, - description=parent_model.description, - metadata=parent_model.labels, + name=model.display_name, + description=model.description, + metadata=model.labels, ) - except Exception: + except Exception as e: logger.warning( - f"Failed to get parent model for version: {model.resource_name}" + f"Failed to get parent model for version: {model.resource_name}: {e}" + ) + registered_model = RegisteredModel( + name=model.display_name if model.display_name else "unknown", + description=model.description if model.description else "", + metadata=model.labels if model.labels else {}, ) - registered_model = None return RegistryModelVersion( registered_model=registered_model, version=model.version_id, - model_source_uri=model.artifact_uri, + model_source_uri=model.uri, model_format="Custom", # Vertex AI doesn't provide format info description=model.description, metadata=model.labels, diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 2b71ea5f71d..cad3f694744 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -15,11 +15,11 @@ import re from datetime import datetime -from typing import Any, Dict, Generator, List, Optional, cast +from typing import Any, Dict, Generator, List, Optional, Tuple, cast -from google.api_core import exceptions, retry +from google.api_core import retry from google.cloud import aiplatform -from pydantic import BaseModel, Field, PrivateAttr +from pydantic import Field, PrivateAttr from zenml.client import Client from zenml.integrations.gcp.flavors.vertex_base_config import ( @@ -28,6 +28,10 @@ from zenml.logger import get_logger from zenml.services import ServiceState, ServiceStatus, ServiceType from zenml.services.service import BaseDeploymentService, ServiceConfig +from zenml.services.service_endpoint import ( + BaseServiceEndpoint, + ServiceEndpointConfig, +) logger = get_logger(__name__) @@ -86,14 +90,18 @@ def get_vertex_deployment_labels(self) -> Dict[str, str]: labels["model-name"] = sanitize_vertex_label(self.model_name) if self.service_name: labels["service-name"] = sanitize_vertex_label(self.service_name) + if self.display_name: + labels["display-name"] = sanitize_vertex_label( + self.display_name + ) or sanitize_vertex_label(self.name) return labels -class VertexPredictionServiceEndpoint(BaseModel): +class VertexPredictionServiceEndpointConfig(ServiceEndpointConfig): """Vertex AI Prediction Service Endpoint.""" - endpoint_name: str - deployed_model_id: str + endpoint_name: Optional[str] = None + deployed_model_id: Optional[str] = None endpoint_url: Optional[str] = None created_at: Optional[datetime] = None updated_at: Optional[datetime] = None @@ -103,7 +111,11 @@ class VertexPredictionServiceEndpoint(BaseModel): class VertexServiceStatus(ServiceStatus): """Vertex AI service status.""" - endpoint: Optional[VertexPredictionServiceEndpoint] = None + +class VertexPredictionServiceEndpoint(BaseServiceEndpoint): + """Vertex AI Prediction Service Endpoint.""" + + config: VertexPredictionServiceEndpointConfig class VertexDeploymentService(BaseDeploymentService): @@ -145,10 +157,11 @@ def __init__(self, config: VertexDeploymentConfig, **attrs: Any): @property def prediction_url(self) -> Optional[str]: """The prediction URI exposed by the prediction service.""" - if not self.status.endpoint or not self.status.endpoint.endpoint_url: + endpoints = self.get_endpoints() + if not endpoints: return None - - return f"https://{self.config.location}-aiplatform.googleapis.com/v1/{self.status.endpoint.endpoint_url}" + endpoint = endpoints[0] + return f"https://{self.config.location}-aiplatform.googleapis.com/v1/{endpoint.resource_name}" def get_endpoints(self) -> List[aiplatform.Endpoint]: """Get all endpoints for the current project and location. @@ -158,9 +171,13 @@ def get_endpoints(self) -> List[aiplatform.Endpoint]: """ try: # Use proper filtering and pagination + display_name = self.config.name or self.config.display_name + assert display_name is not None + display_name = sanitize_vertex_label(display_name) return list( aiplatform.Endpoint.list( - filter='labels.managed_by="zenml"', + filter=f"labels.managed_by=zenml AND labels.display-name={display_name}", + project=self._project_id, location=self.config.location, credentials=self._credentials, ) @@ -175,18 +192,11 @@ def _generate_endpoint_name(self) -> str: Returns: Generated endpoint name """ - from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( - VertexModelDeployer, - ) - - # Include tenant ID in name for multi-tenancy support - model_deployer = cast( - VertexModelDeployer, Client().active_stack.model_deployer - ) - # Make name more descriptive and conformant - sanitized_model_name = sanitize_vertex_label(self.config.model_name) - return f"{sanitized_model_name}-{model_deployer.id}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" + sanitized_model_name = sanitize_vertex_label( + self.config.display_name or self.config.name + ) + return f"{sanitized_model_name}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" def _get_model_id(self, name: str) -> str: """Helper to construct a full model ID from a given model name.""" @@ -201,45 +211,38 @@ def _verify_model_exists(self) -> aiplatform.Model: Raises: RuntimeError: If model not found """ - try: - model = aiplatform.Model( - model_name=self._get_model_id(self.config.model_name), - location=self.config.location, - credentials=self._credentials, - ) - logger.info(f"Found model to deploy: {model.resource_name}") - return model - except exceptions.NotFound: - raise RuntimeError( - f"Model {self._get_model_id(self.config.model_name)} not found in project {self._project_id}" - ) - - def _deploy_model(self) -> Any: - """Deploy model to Vertex AI endpoint.""" - # Initialize endpoint - if self.config.existing_endpoint: - endpoint = aiplatform.Endpoint( - endpoint_name=self.config.existing_endpoint, - project=self._project_id, - location=self.config.location, - credentials=self._credentials, - ) + if self.config.model_name.startswith("projects/"): + model_name = self.config.model_name else: - endpoint = aiplatform.Endpoint.create( - display_name=self.config.name, - project=self._project_id, - location=self.config.location, - credentials=self._credentials, - labels=self.config.get_vertex_deployment_labels(), - ) + model_name = self._get_model_id(self.config.model_name) + # Remove version suffix if present + if "@" in model_name: + model_name = model_name.split("@")[0] + logger.info(f"Model name: {model_name}") + model = aiplatform.Model( + model_name=model_name, + project=self._project_id, + location=self.config.location, + credentials=self._credentials, + ) + logger.info(f"Found model to deploy: {model.resource_name}") + return model + def _deploy_model( + self, model: aiplatform.Model, endpoint: aiplatform.Endpoint + ) -> None: + """Deploy model to Vertex AI endpoint.""" # Prepare deployment configuration deploy_kwargs = { - "model_display_name": self.config.model_name, - "deployed_model_display_name": self.config.name, + "model": model, + "deployed_model_display_name": self.config.display_name + or self.config.name, + "traffic_percentage": 100, "sync": False, } - + logger.info( + f"Deploying model to endpoint with kwargs: {deploy_kwargs}" + ) # Add container configuration if specified if self.config.container: deploy_kwargs.update( @@ -288,73 +291,53 @@ def _deploy_model(self) -> Any: ) # Deploy model - operation = endpoint.deploy(**deploy_kwargs) - return operation + logger.info( + f"Deploying model to endpoint with kwargs: {deploy_kwargs}" + ) + endpoint.deploy(**deploy_kwargs) def provision(self) -> None: """Provision or update remote Vertex AI deployment instance.""" - try: - # First verify model exists - model = self._verify_model_exists() - - # Get or create endpoint - if self.config.existing_endpoint: - endpoint = aiplatform.Endpoint( - endpoint_name=self.config.existing_endpoint, - location=self.config.location, - credentials=self._credentials, - ) - logger.info( - f"Using existing endpoint: {endpoint.resource_name}" - ) - else: - endpoint_name = self._generate_endpoint_name() - endpoint = aiplatform.Endpoint.create( - display_name=endpoint_name, - location=self.config.location, - encryption_spec_key_name=self.config.encryption_spec_key_name, - labels=self.config.get_vertex_deployment_labels(), - credentials=self._credentials, - ) - logger.info(f"Created new endpoint: {endpoint.resource_name}") - - # Deploy model with retries for transient errors - try: - deploy_op = self._deploy_model() - - # Wait for deployment - deploy_op.result(timeout=POLLING_TIMEOUT) - - logger.info( - f"Model {model.resource_name} deployed to endpoint {endpoint.resource_name}" - ) - except Exception as e: - self.status.update_state( - ServiceState.ERROR, f"Deployment failed: {str(e)}" - ) - raise - - # Update status - self.status.endpoint = VertexPredictionServiceEndpoint( - endpoint_name=endpoint.resource_name, - endpoint_url=endpoint.resource_name, - deployed_model_id=model.resource_name, - created_at=datetime.utcnow(), - updated_at=datetime.utcnow(), - state="DEPLOYED", + # First verify model exists + model = self._verify_model_exists() + logger.info(f"Found model to deploy: {model.resource_name}") + # Get or create endpoint + if self.config.existing_endpoint: + endpoint = aiplatform.Endpoint( + endpoint_name=self.config.existing_endpoint, + location=self.config.location, + credentials=self._credentials, ) - self.status.update_state(ServiceState.ACTIVE) + logger.info(f"Using existing endpoint: {endpoint.resource_name}") + else: + endpoint_name = self._generate_endpoint_name() + endpoint = aiplatform.Endpoint.create( + display_name=endpoint_name, + location=self.config.location, + encryption_spec_key_name=self.config.encryption_spec_key_name, + labels=self.config.get_vertex_deployment_labels(), + credentials=self._credentials, + ) + logger.info(f"Created new endpoint: {endpoint.resource_name}") + # Deploy model with retries for transient errors + try: + self._deploy_model(model, endpoint) logger.info( - f"Deployment completed successfully. " - f"Endpoint: {endpoint.resource_name}" + f"Model {model.resource_name} deployed to endpoint {endpoint.resource_name}" ) - except Exception as e: - error_msg = f"Failed to provision deployment: {str(e)}" - logger.error(error_msg) - self.status.update_state(ServiceState.ERROR, error_msg) - raise RuntimeError(error_msg) + self.status.update_state( + ServiceState.ERROR, f"Deployment failed: {str(e)}" + ) + raise + + self.status.update_state(ServiceState.ACTIVE) + + logger.info( + f"Deployment completed successfully. " + f"Endpoint: {endpoint.resource_name}" + ) def deprovision(self, force: bool = False) -> None: """Deprovision the Vertex AI deployment. @@ -362,86 +345,53 @@ def deprovision(self, force: bool = False) -> None: Args: force: Whether to force deprovision """ - if not self.status.endpoint: - logger.warning("No endpoint to deprovision") - return - - try: - endpoint = aiplatform.Endpoint( - endpoint_name=self.status.endpoint.endpoint_name, - location=self.config.location, - credentials=self._credentials, - ) - - # Undeploy model - endpoint.undeploy_all() - - # Delete endpoint if we created it - if not self.config.existing_endpoint: + endpoints = self.get_endpoints() + if endpoints: + try: + endpoint = endpoints[0] + endpoint.undeploy_all() endpoint.delete() - - logger.info(f"Deprovisioned endpoint: {endpoint.resource_name}") - - self.status.endpoint = None - self.status.update_state(ServiceState.INACTIVE) - - except Exception as e: - error_msg = f"Failed to deprovision deployment: {str(e)}" - if not force: - logger.error(error_msg) - self.status.update_state(ServiceState.ERROR, error_msg) - raise RuntimeError(error_msg) - else: - logger.warning( - f"Error during forced deprovision (ignoring): {error_msg}" + logger.info( + f"Deprovisioned endpoint: {endpoint.resource_name}" ) self.status.update_state(ServiceState.INACTIVE) + except Exception as e: + logger.error(f"Failed to deprovision endpoint: {e}") + self.status.update_state( + ServiceState.ERROR, f"Failed to deprovision endpoint: {e}" + ) + else: + try: + endpoint = aiplatform.Endpoint( + endpoint_name=self._generate_endpoint_name(), + location=self.config.location, + credentials=self._credentials, + ) - def start_deployment( - self, timeout: int = POLLING_TIMEOUT - ) -> aiplatform.Endpoint: - """Start the Vertex AI deployment and wait until it's ready. - - This method initiates the deployment (via a helper, e.g. _deploy_model()) and then - blocks until the underlying operation is completed using wait(). - - Args: - timeout: Maximum time (in seconds) to wait for deployment readiness. + # Undeploy model + endpoint.undeploy_all() - Returns: - The deployed Vertex AI Endpoint object. + # Delete endpoint if we created it + if not self.config.existing_endpoint: + endpoint.delete() - Raises: - RuntimeError: If the deployment operation fails. - """ - try: - # _deploy_model() is assumed to initiate deployment and return an operation object. - # The operation object has a wait() method. - operation = ( - self._deploy_model() - ) # <-- your deployment call; adjust as needed - logger.info( - "Deployment operation initiated. Waiting for deployment to be ready..." - ) - operation.wait(timeout=timeout) + logger.info( + f"Deprovisioned endpoint: {endpoint.resource_name}" + ) - # After waiting, retrieve the endpoint object. - endpoint = aiplatform.Endpoint( - endpoint_name=operation.resource.name, - location=self.config.location, - credentials=self._credentials, - ) + self.status.update_state(ServiceState.INACTIVE) - self.status.endpoint = endpoint - self.status.update_state(ServiceState.ACTIVE) - logger.info( - f"Deployment is ready at endpoint: {endpoint.resource_name}" - ) - return endpoint - except Exception as e: - logger.error(f"Deployment failed: {e}") - self.status.update_state(ServiceState.ERROR, str(e)) - raise RuntimeError(f"Deployment failed: {e}") + except Exception as e: + error_msg = f"Failed to deprovision deployment: {str(e)}" + if not force: + logger.error(error_msg) + self.status.update_state(ServiceState.ERROR, error_msg) + raise RuntimeError(error_msg) + else: + logger.warning( + f"Error during forced deprovision (ignoring): {error_msg}" + ) + self.status.update_state(ServiceState.INACTIVE) def get_logs( self, @@ -456,9 +406,41 @@ def get_logs( logger.warning("Logs are not supported for Vertex AI") yield from () - def check_status(self) -> None: - """Check the status of the deployment (no-op implementation).""" - return + def check_status(self) -> Tuple[ServiceState, str]: + """Check the status of the deployment by validating if an endpoint exists and if it has deployed models. + + Returns: + A tuple containing the deployment's state and a status message. + """ + try: + endpoints = self.get_endpoints() + if not endpoints: + return ServiceState.INACTIVE, "No endpoint found." + + endpoint = endpoints[0] + try: + endpoint.reload() + except Exception as e: + logger.warning(f"Failed to reload endpoint: {e}") + + deployed_models = [] + if hasattr(endpoint, "list_models"): + try: + deployed_models = endpoint.list_models() + except Exception as e: + logger.warning(f"Failed to list models for endpoint: {e}") + elif hasattr(endpoint, "deployed_models"): + deployed_models = endpoint.deployed_models or [] + + if deployed_models and len(deployed_models) > 0: + return ServiceState.ACTIVE, "" + else: + return ( + ServiceState.PENDING_STARTUP, + "Endpoint deployment is in progress.", + ) + except Exception as e: + return ServiceState.ERROR, f"Deployment check failed: {e}" @property def is_running(self) -> bool: diff --git a/src/zenml/model_registries/base_model_registry.py b/src/zenml/model_registries/base_model_registry.py index 72fdd123b89..b2da8c358e2 100644 --- a/src/zenml/model_registries/base_model_registry.py +++ b/src/zenml/model_registries/base_model_registry.py @@ -62,7 +62,6 @@ class ModelRegistryModelMetadata(BaseModel): model and its development process. """ - managed_by: str = "zenml" zenml_version: Optional[str] = None zenml_run_name: Optional[str] = None zenml_pipeline_name: Optional[str] = None @@ -71,6 +70,15 @@ class ModelRegistryModelMetadata(BaseModel): zenml_step_name: Optional[str] = None zenml_workspace: Optional[str] = None + @property + def managed_by(self) -> str: + """Returns the managed by attribute. + + Returns: + The managed by attribute. + """ + return "zenml" + @property def custom_attributes(self) -> Dict[str, str]: """Returns a dictionary of custom attributes. From 0792f75bcf0446c3c48611b55d1a1ce680df7a50 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Mon, 24 Mar 2025 00:28:35 +0000 Subject: [PATCH 27/43] Refactor Vertex AI model upload logic and enhance logging for deployment service --- .../model_registries/vertex_model_registry.py | 16 +++++++++------- .../gcp/services/vertex_deployment.py | 8 +++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 3fa8eb25e2b..c88923b3d3c 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -269,7 +269,6 @@ def _init_vertex_model( if version: model_id = self._get_model_version_id(model_id, version) kwargs["model_name"] = model_id - try: return aiplatform.Model(**kwargs) except Exception as e: @@ -456,18 +455,21 @@ def register_model_version( k: v for k, v in upload_arguments.items() if v is not None } + # Try to get existing parent model, but don't fail if it doesn't exist parent_model = self._init_vertex_model(name=name, version=version) - assert isinstance(parent_model, aiplatform.Model) + + # If parent model exists and has same URI, return existing version if parent_model and parent_model.uri == model_source_uri: logger.info( f"Model version {version} already exists, skipping upload..." ) return self._vertex_model_to_registry_version(parent_model) - # Always call model.upload (even if a parent model already exists), since Vertex AI - # expects a full upload for each version. - upload_arguments["parent_model"] = ( - parent_model.resource_name if parent_model else None - ) + + # Set parent model resource name if it exists + if parent_model: + upload_arguments["parent_model"] = parent_model.resource_name + + # Upload the model model = aiplatform.Model.upload(**upload_arguments) logger.info(f"Uploaded new model version with labels: {model.labels}") diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index cad3f694744..a9bcb10ca0e 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -219,6 +219,9 @@ def _verify_model_exists(self) -> aiplatform.Model: if "@" in model_name: model_name = model_name.split("@")[0] logger.info(f"Model name: {model_name}") + logger.info(f"Project ID: {self._project_id}") + logger.info(f"Location: {self.config.location}") + logger.info(f"Credentials: {self._credentials}") model = aiplatform.Model( model_name=model_name, project=self._project_id, @@ -418,11 +421,6 @@ def check_status(self) -> Tuple[ServiceState, str]: return ServiceState.INACTIVE, "No endpoint found." endpoint = endpoints[0] - try: - endpoint.reload() - except Exception as e: - logger.warning(f"Failed to reload endpoint: {e}") - deployed_models = [] if hasattr(endpoint, "list_models"): try: From 147658c1c3969cfb81e8cefbe0e05c39b812b8eb Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Thu, 27 Mar 2025 02:01:07 +0000 Subject: [PATCH 28/43] Enhance Vertex AI model registry metadata handling and improve logging in deployment service --- .../gcp/model_registries/vertex_model_registry.py | 6 ++++-- src/zenml/integrations/gcp/services/vertex_deployment.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index c88923b3d3c..82f23198a78 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -457,7 +457,7 @@ def register_model_version( # Try to get existing parent model, but don't fail if it doesn't exist parent_model = self._init_vertex_model(name=name, version=version) - + # If parent model exists and has same URI, return existing version if parent_model and parent_model.uri == model_source_uri: logger.info( @@ -651,13 +651,15 @@ def _vertex_model_to_registry_version( metadata=model.labels if model.labels else {}, ) + model_version_metadata = model.labels + model_version_metadata["resource_name"] = model.resource_name return RegistryModelVersion( registered_model=registered_model, version=model.version_id, model_source_uri=model.uri, model_format="Custom", # Vertex AI doesn't provide format info description=model.description, - metadata=model.labels, + metadata=model_version_metadata, created_at=model.create_time, last_updated_at=model.update_time, stage=stage, diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index a9bcb10ca0e..3f3187aeafc 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -304,6 +304,7 @@ def provision(self) -> None: # First verify model exists model = self._verify_model_exists() logger.info(f"Found model to deploy: {model.resource_name}") + # Get or create endpoint if self.config.existing_endpoint: endpoint = aiplatform.Endpoint( From 06b1f5f01da4384291b307756253662ced73162b Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 8 Apr 2025 07:24:15 +0000 Subject: [PATCH 29/43] Auto-update of NLP template --- examples/e2e_nlp/.copier-answers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/e2e_nlp/.copier-answers.yml b/examples/e2e_nlp/.copier-answers.yml index 6d8e693457c..01d4cfb1fc0 100644 --- a/examples/e2e_nlp/.copier-answers.yml +++ b/examples/e2e_nlp/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2025.01.08-1-gd1ba11d +_commit: 2025.04.07 _src_path: gh:zenml-io/template-nlp accelerator: cpu cloud_of_choice: aws From 97c96c4f7255a9f16b0d232c8ef2dc4df1392925 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Tue, 8 Apr 2025 09:12:32 +0100 Subject: [PATCH 30/43] fix links --- docs/book/component-guide/model-deployers/vertex.md | 13 ++++--------- .../book/component-guide/model-registries/vertex.md | 8 ++------ 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/docs/book/component-guide/model-deployers/vertex.md b/docs/book/component-guide/model-deployers/vertex.md index df8fa1c2ff0..0bd4ab175b5 100644 --- a/docs/book/component-guide/model-deployers/vertex.md +++ b/docs/book/component-guide/model-deployers/vertex.md @@ -137,8 +137,6 @@ def model_deployer( return service ``` -*Example: [`model_deployer.py`](../../examples/vertex-registry-and-deployer/steps/model_deployer.py)* - ### Configuration Options The Vertex AI Model Deployer leverages a comprehensive configuration system defined in the shared base configuration and deployer-specific settings: @@ -154,14 +152,14 @@ The Vertex AI Model Deployer leverages a comprehensive configuration system defi - `traffic_percentage`: The percentage of incoming traffic to route to this deployment. - **Container and Resource Configuration:** - - Configurations provided via [VertexAIContainerSpec](../../integrations/gcp/flavors/vertex_base_config.py) allow you to specify a custom serving container image, HTTP routes (`predict_route`, `health_route`), environment variables, and port exposure. - - [VertexAIResourceSpec](../../integrations/gcp/flavors/vertex_base_config.py) lets you override the default machine type, number of replicas, and even GPU options. + - Configurations provided via VertexAIContainerSpec allow you to specify a custom serving container image, HTTP routes (`predict_route`, `health_route`), environment variables, and port exposure. + - VertexAIResourceSpec lets you override the default machine type, number of replicas, and even GPU options. - **Advanced Settings:** - Service account, network configuration, and customer-managed encryption keys. - Model explanation settings via `VertexAIExplanationSpec` if you need integrated model interpretability. -These options are defined across the [Vertex AI Base Config](../../integrations/gcp/flavors/vertex_base_config.py) and the deployer–specific configuration in [VertexModelDeployerFlavor](../../integrations/gcp/flavors/vertex_model_deployer_flavor.py). +These options are defined across the Vertex AI Base Config and the deployer–specific configuration in VertexModelDeployerFlavor. ### Limitations and Considerations @@ -181,7 +179,4 @@ These options are defined across the [Vertex AI Base Config](../../integrations/ 4. **Region Consistency:** - Ensure that the model and deployment are created in the same GCP region. -For more details, please refer to the [SDK docs](https://sdkdocs.zenml.io) and the relevant implementation files: -- [`vertex_model_deployer.py`](../../integrations/gcp/model_deployers/vertex_model_deployer.py) -- [`vertex_base_config.py`](../../integrations/gcp/flavors/vertex_base_config.py) -- [`vertex_model_deployer_flavor.py`](../../integrations/gcp/flavors/vertex_model_deployer_flavor.py) \ No newline at end of file +For more details, please refer to the [SDK docs](https://sdkdocs.zenml.io). \ No newline at end of file diff --git a/docs/book/component-guide/model-registries/vertex.md b/docs/book/component-guide/model-registries/vertex.md index f4e32ffb514..4a401a93595 100644 --- a/docs/book/component-guide/model-registries/vertex.md +++ b/docs/book/component-guide/model-registries/vertex.md @@ -69,7 +69,7 @@ The service account needs the following permissions: ### Registering Models inside a Pipeline with Extended Configuration -The Vertex AI Model Registry supports extended configuration options via the `VertexAIModelConfig` class (defined in the [vertex_base_config.py](../../integrations/gcp/flavors/vertex_base_config.py) file). This means you can specify additional details for your deployments such as: +The Vertex AI Model Registry supports extended configuration options via the `VertexAIModelConfig` class. This means you can specify additional details for your deployments such as: - **Container configuration**: Use the `VertexAIContainerSpec` to define a custom serving container (e.g., specifying the `image_uri`, `predict_route`, `health_route`, and exposed ports). - **Resource configuration**: Use the `VertexAIResourceSpec` to specify compute resources like `machine_type`, `min_replica_count`, and `max_replica_count`. @@ -152,8 +152,6 @@ def model_register( return "" ``` -*Example: [`model_register.py`](../../examples/vertex-registry-and-deployer/steps/model_register.py)* - ### Working with Model Versions Since the Vertex AI Model Registry supports only version-level operations, here are some commands to manage model versions: @@ -184,8 +182,6 @@ The Vertex AI Model Registry accepts several configuration options, now enriched - **resources**: (Optional) Compute resource settings (using `VertexAIResourceSpec`) like `machine_type`, `min_replica_count`, and `max_replica_count`. - **labels** and **metadata**: Additional annotation data for organizing and tracking your model versions. -These configuration options are specified in the [Vertex AI Base Config](../../integrations/gcp/flavors/vertex_base_config.py) and further extended in the [Vertex AI Model Registry Flavor](../../integrations/gcp/flavors/vertex_model_registry_flavor.py). - ### Key Differences from Other Model Registries 1. **Version-Only Interface**: Vertex AI only supports version-level operations for model registration. @@ -199,7 +195,7 @@ These configuration options are specified in the [Vertex AI Base Config](../../i - It is recommended to specify a serving container image URI rather than rely on the default scikit-learn container to ensure compatibility with Vertex AI endpoints. - All models registered through this integration are automatically labeled with `managed_by="zenml"` for consistent tracking. -For more detailed information, check out the [SDK docs](https://sdkdocs.zenml.io/latest/integration_code_docs/integrations-gcp/#zenml.integrations.gcp.model_registry). +For more detailed information, check out the [SDK docs](https://sdkdocs.zenml.io/0.80.1/integration_code_docs/integrations-gcp.html#zenml.integrations.gcp).
ZenML Scarf From 5d3822814fa7e4ffec1de2d0efbab442eb20e466 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Tue, 8 Apr 2025 09:29:28 +0100 Subject: [PATCH 31/43] Refactor imports in vertex_deployment and base_model_deployer for clarity and consistency --- src/zenml/integrations/gcp/services/vertex_deployment.py | 4 +++- src/zenml/model_deployers/base_model_deployer.py | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 3f3187aeafc..162c96ef0b9 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -22,11 +22,13 @@ from pydantic import Field, PrivateAttr from zenml.client import Client +from zenml.enums import ServiceState from zenml.integrations.gcp.flavors.vertex_base_config import ( VertexAIEndpointConfig, ) from zenml.logger import get_logger -from zenml.services import ServiceState, ServiceStatus, ServiceType +from zenml.models.v2.misc.service import ServiceType +from zenml.services import ServiceStatus from zenml.services.service import BaseDeploymentService, ServiceConfig from zenml.services.service_endpoint import ( BaseServiceEndpoint, diff --git a/src/zenml/model_deployers/base_model_deployer.py b/src/zenml/model_deployers/base_model_deployer.py index 6ed6024b390..70a1bc7caad 100644 --- a/src/zenml/model_deployers/base_model_deployer.py +++ b/src/zenml/model_deployers/base_model_deployer.py @@ -28,12 +28,11 @@ from uuid import UUID from zenml.client import Client -from zenml.enums import StackComponentType +from zenml.enums import ServiceState, StackComponentType from zenml.logger import get_logger from zenml.models.v2.misc.service import ServiceType from zenml.services import BaseService, ServiceConfig from zenml.services.service import BaseDeploymentService -from zenml.services.service_status import ServiceState from zenml.stack import StackComponent from zenml.stack.flavor import Flavor from zenml.stack.stack_component import StackComponentConfig From e91a3ebccdc2af71b07cd3b2393c46f261c7a20c Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Tue, 8 Apr 2025 12:14:12 +0100 Subject: [PATCH 32/43] Update TestService configuration to include zenml_model and adjust assertions in tests --- tests/unit/services/test_service.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/unit/services/test_service.py b/tests/unit/services/test_service.py index 080d53c081c..065d9c33468 100644 --- a/tests/unit/services/test_service.py +++ b/tests/unit/services/test_service.py @@ -62,7 +62,9 @@ def base_service(): return TestService( uuid=UUID("12345678-1234-5678-1234-567812345678"), admin_state=ServiceState.ACTIVE, - config=ServiceConfig(name="test_service", param1="value1", param2=2), + config=ServiceConfig( + name="test_service", param1="value1", param2=2, zenml_model=None + ), status=ServiceStatus( state=ServiceState.ACTIVE, last_error="", @@ -78,13 +80,15 @@ def test_from_model(service_response): assert isinstance(service, TestService) assert service.uuid == service_response.id assert service.admin_state == service_response.admin_state - assert dict(service.config) == service_response.config + assert ( + service.config.model_dump(exclude_unset=True) + == service_response.config + ) assert dict(service.status) == service_response.status assert service.SERVICE_TYPE["type"] == service_response.service_type.type assert ( service.SERVICE_TYPE["flavor"] == service_response.service_type.flavor ) - assert service.endpoint == service_response.endpoint def test_update_status(base_service, monkeypatch): From c59a5f22262ce9f90825fe7ff364005e696b1108 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Tue, 8 Apr 2025 12:41:27 +0100 Subject: [PATCH 33/43] Enhance docstrings in VertexDeployment classes for improved clarity and completeness --- .../model_registries/vertex_model_registry.py | 149 ++++++++++++++++-- .../gcp/services/vertex_deployment.py | 56 +++++-- 2 files changed, 183 insertions(+), 22 deletions(-) diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 82f23198a78..91a82bb26bd 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -87,6 +87,9 @@ def _get_deployer_id(self) -> str: Returns: The deployer ID string + + Raises: + ValueError: If VertexModelDeployer is not active in the stack """ from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( VertexModelDeployer, @@ -281,7 +284,20 @@ def register_model( description: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ) -> RegisteredModel: - """Register a model to the Vertex AI model registry.""" + """Register a model to the Vertex AI model registry. + + Args: + name: The name of the model. + description: The description of the model. + metadata: The metadata of the model. + + Returns: + The registered model. + + Raises: + NotImplementedError: Vertex AI does not support registering models, you can only register model versions, skipping model registration... + + """ raise NotImplementedError( "Vertex AI does not support registering models, you can only register model versions, skipping model registration..." ) @@ -290,7 +306,14 @@ def delete_model( self, name: str, ) -> None: - """Delete a model and all of its versions from the Vertex AI model registry.""" + """Delete a model and all of its versions from the Vertex AI model registry. + + Args: + name: The name of the model. + + Raises: + NotImplementedError: Vertex AI does not support deleting models, skipping model deletion... + """ try: model = self._init_vertex_model(name=name) if isinstance(model, aiplatform.Model): @@ -306,13 +329,33 @@ def update_model( metadata: Optional[Dict[str, str]] = None, remove_metadata: Optional[List[str]] = None, ) -> RegisteredModel: - """Update a model in the Vertex AI model registry.""" + """Update a model in the Vertex AI model registry. + + Args: + name: The name of the model. + description: The description of the model. + metadata: The metadata of the model. + remove_metadata: The metadata to remove from the model. + + Returns: + The updated model. + + Raises: + NotImplementedError: Vertex AI does not support updating models, you can only update model versions, skipping model registration... + """ raise NotImplementedError( "Vertex AI does not support updating models, you can only update model versions, skipping model registration..." ) def get_model(self, name: str) -> RegisteredModel: - """Get a model from the Vertex AI model registry by name without needing a version.""" + """Get a model from the Vertex AI model registry by name without needing a version. + + Args: + name: The name of the model. + + Returns: + The registered model. + """ try: # Fetch by display_name, and use unique labels to ensure multi-tenancy model = aiplatform.Model(display_name=name) @@ -329,7 +372,18 @@ def list_models( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ) -> List[RegisteredModel]: - """List models in the Vertex AI model registry.""" + """List models in the Vertex AI model registry. + + Args: + name: The name of the model. + metadata: The metadata of the model. + + Returns: + The registered models. + + Raises: + RuntimeError: If the models are not found + """ credentials, project_id = self._get_authentication() location = self.config.location # Always filter with ZenML-specific labels (including deployer id for multi-tenancy) @@ -485,6 +539,9 @@ def delete_model_version( Args: name: Model name version: Version string + + Raises: + RuntimeError: If the model version is not found """ try: model = self._init_vertex_model(name=name, version=version) @@ -503,7 +560,22 @@ def update_model_version( remove_metadata: Optional[List[str]] = None, stage: Optional[ModelVersionStage] = None, ) -> RegistryModelVersion: - """Update a model version in the Vertex AI model registry.""" + """Update a model version in the Vertex AI model registry. + + Args: + name: The name of the model. + version: The version of the model. + description: The description of the model. + metadata: The metadata of the model. + remove_metadata: The metadata to remove from the model. + stage: The stage of the model. + + Returns: + The updated model version. + + Raises: + RuntimeError: If the model version is not found + """ try: parent_model = self._init_vertex_model(name=name, version=version) assert isinstance(parent_model, aiplatform.Model) @@ -537,7 +609,18 @@ def update_model_version( def get_model_version( self, name: str, version: str ) -> RegistryModelVersion: - """Get a model version from the Vertex AI model registry using the version label.""" + """Get a model version from the Vertex AI model registry using the version label. + + Args: + name: The name of the model. + version: The version of the model. + + Returns: + The registered model version. + + Raises: + RuntimeError: If the model version is not found + """ try: parent_model = self._init_vertex_model(name=name, version=version) assert isinstance(parent_model, aiplatform.Model) @@ -557,7 +640,25 @@ def list_model_versions( order_by_date: Optional[str] = None, **kwargs: Any, ) -> List[RegistryModelVersion]: - """List model versions from the Vertex AI model registry.""" + """List model versions from the Vertex AI model registry. + + Args: + name: The name of the model. + model_source_uri: The URI of the model source. + metadata: The metadata of the model. + stage: The stage of the model. + count: The number of model versions to return. + created_after: The date after which the model versions were created. + created_before: The date before which the model versions were created. + order_by_date: The date to order the model versions by. + **kwargs: Additional arguments + + Returns: + The registered model versions. + + Raises: + RuntimeError: If the model versions are not found + """ credentials, project_id = self._get_authentication() location = self.config.location filter_expr = [] @@ -600,7 +701,19 @@ def load_model_version( version: str, **kwargs: Any, ) -> Any: - """Load a model version from the Vertex AI model registry using label-based lookup.""" + """Load a model version from the Vertex AI model registry using label-based lookup. + + Args: + name: The name of the model. + version: The version of the model. + **kwargs: Additional arguments + + Returns: + The loaded model version. + + Raises: + RuntimeError: If the model version is not found + """ try: parent_model = self._init_vertex_model(name=name, version=version) assert isinstance(parent_model, aiplatform.Model) @@ -612,7 +725,14 @@ def get_model_uri_artifact_store( self, model_version: RegistryModelVersion, ) -> str: - """Get the model URI artifact store.""" + """Get the model URI artifact store. + + Args: + model_version: The model version. + + Returns: + The model URI artifact store. + """ return model_version.model_source_uri def _vertex_model_to_registry_version( @@ -666,7 +786,14 @@ def _vertex_model_to_registry_version( ) def _sanitize_model_display_name(self, name: str) -> str: - """Sanitize the model display name to conform to Vertex AI limits.""" + """Sanitize the model display name to conform to Vertex AI limits. + + Args: + name: The name of the model. + + Returns: + The sanitized model name. + """ # Use our existing sanitizer (which converts to lowercase, replaces invalid characters, etc.) name = self._sanitize_label(name) if len(name) > MAX_DISPLAY_NAME_LENGTH: diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 162c96ef0b9..332260a2539 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -79,7 +79,11 @@ class VertexDeploymentConfig(VertexAIEndpointConfig, ServiceConfig): """Vertex AI service configurations.""" def get_vertex_deployment_labels(self) -> Dict[str, str]: - """Generate labels for the VertexAI deployment from the service configuration.""" + """Generate labels for the VertexAI deployment from the service configuration. + + Returns: + A dictionary of labels for the VertexAI deployment. + """ labels = self.labels or {} labels["managed_by"] = "zenml" if self.pipeline_name: @@ -152,13 +156,22 @@ def _initialize_gcp_clients(self) -> None: ) def __init__(self, config: VertexDeploymentConfig, **attrs: Any): - """Initialize the Vertex AI deployment service.""" + """Initialize the Vertex AI deployment service. + + Args: + config: The configuration for the Vertex AI deployment service. + **attrs: Additional attributes for the service. + """ super().__init__(config=config, **attrs) self._initialize_gcp_clients() @property def prediction_url(self) -> Optional[str]: - """The prediction URI exposed by the prediction service.""" + """The prediction URI exposed by the prediction service. + + Returns: + The prediction URI exposed by the prediction service. + """ endpoints = self.get_endpoints() if not endpoints: return None @@ -201,17 +214,21 @@ def _generate_endpoint_name(self) -> str: return f"{sanitized_model_name}-{str(self.uuid)[:UUID_SLICE_LENGTH]}" def _get_model_id(self, name: str) -> str: - """Helper to construct a full model ID from a given model name.""" + """Helper to construct a full model ID from a given model name. + + Args: + name: The name of the model. + + Returns: + The full model ID. + """ return f"projects/{self._project_id}/locations/{self.config.location}/models/{name}" - + def _verify_model_exists(self) -> aiplatform.Model: """Verify the model exists and return it. Returns: Vertex AI Model instance - - Raises: - RuntimeError: If model not found """ if self.config.model_name.startswith("projects/"): model_name = self.config.model_name @@ -236,7 +253,12 @@ def _verify_model_exists(self) -> aiplatform.Model: def _deploy_model( self, model: aiplatform.Model, endpoint: aiplatform.Endpoint ) -> None: - """Deploy model to Vertex AI endpoint.""" + """Deploy model to Vertex AI endpoint. + + Args: + model: The model to deploy. + endpoint: The endpoint to deploy the model to. + """ # Prepare deployment configuration deploy_kwargs = { "model": model, @@ -302,7 +324,11 @@ def _deploy_model( endpoint.deploy(**deploy_kwargs) def provision(self) -> None: - """Provision or update remote Vertex AI deployment instance.""" + """Provision or update remote Vertex AI deployment instance. + + Raises: + RuntimeError: If model not found + """ # First verify model exists model = self._verify_model_exists() logger.info(f"Found model to deploy: {model.resource_name}") @@ -405,6 +431,10 @@ def get_logs( tail: Optional[int] = None, ) -> Generator[str, bool, None]: """Retrieve logs for the Vertex AI deployment (not supported). + + Args: + follow: Whether to follow the logs. + tail: The number of lines to tail. Yields: Log entries as strings, but logs are not supported for Vertex AI. @@ -445,6 +475,10 @@ def check_status(self) -> Tuple[ServiceState, str]: @property def is_running(self) -> bool: - """Check if the service is running.""" + """Check if the service is running. + + Returns: + True if the service is running, False otherwise. + """ self.update_status() return self.status.state == ServiceState.ACTIVE From c7a3e7ef709d68dce54e531f1416228edfdc2119 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Tue, 8 Apr 2025 14:03:59 +0100 Subject: [PATCH 34/43] Refactor docstrings in VertexAIModelRegistry and VertexDeploymentService for improved readability and consistency --- .../model_registries/vertex_model_registry.py | 48 +++++++++---------- .../gcp/services/vertex_deployment.py | 33 ++++++------- 2 files changed, 41 insertions(+), 40 deletions(-) diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 91a82bb26bd..1d2505a87e3 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -87,7 +87,7 @@ def _get_deployer_id(self) -> str: Returns: The deployer ID string - + Raises: ValueError: If VertexModelDeployer is not active in the stack """ @@ -285,7 +285,7 @@ def register_model( metadata: Optional[Dict[str, str]] = None, ) -> RegisteredModel: """Register a model to the Vertex AI model registry. - + Args: name: The name of the model. description: The description of the model. @@ -293,10 +293,10 @@ def register_model( Returns: The registered model. - + Raises: NotImplementedError: Vertex AI does not support registering models, you can only register model versions, skipping model registration... - + """ raise NotImplementedError( "Vertex AI does not support registering models, you can only register model versions, skipping model registration..." @@ -307,7 +307,7 @@ def delete_model( name: str, ) -> None: """Delete a model and all of its versions from the Vertex AI model registry. - + Args: name: The name of the model. @@ -330,7 +330,7 @@ def update_model( remove_metadata: Optional[List[str]] = None, ) -> RegisteredModel: """Update a model in the Vertex AI model registry. - + Args: name: The name of the model. description: The description of the model. @@ -349,7 +349,7 @@ def update_model( def get_model(self, name: str) -> RegisteredModel: """Get a model from the Vertex AI model registry by name without needing a version. - + Args: name: The name of the model. @@ -373,14 +373,14 @@ def list_models( metadata: Optional[Dict[str, str]] = None, ) -> List[RegisteredModel]: """List models in the Vertex AI model registry. - + Args: name: The name of the model. metadata: The metadata of the model. Returns: The registered models. - + Raises: RuntimeError: If the models are not found """ @@ -497,12 +497,12 @@ def register_model_version( # Include explanation settings if provided in the config. if self.config.explanation: - upload_arguments["explanation_metadata"] = ( - self.config.explanation.metadata - ) - upload_arguments["explanation_parameters"] = ( - self.config.explanation.parameters - ) + upload_arguments[ + "explanation_metadata" + ] = self.config.explanation.metadata + upload_arguments[ + "explanation_parameters" + ] = self.config.explanation.parameters # Remove any parameters that are None to avoid passing them to upload. upload_arguments = { @@ -539,7 +539,7 @@ def delete_model_version( Args: name: Model name version: Version string - + Raises: RuntimeError: If the model version is not found """ @@ -561,7 +561,7 @@ def update_model_version( stage: Optional[ModelVersionStage] = None, ) -> RegistryModelVersion: """Update a model version in the Vertex AI model registry. - + Args: name: The name of the model. version: The version of the model. @@ -610,14 +610,14 @@ def get_model_version( self, name: str, version: str ) -> RegistryModelVersion: """Get a model version from the Vertex AI model registry using the version label. - + Args: name: The name of the model. version: The version of the model. Returns: The registered model version. - + Raises: RuntimeError: If the model version is not found """ @@ -641,7 +641,7 @@ def list_model_versions( **kwargs: Any, ) -> List[RegistryModelVersion]: """List model versions from the Vertex AI model registry. - + Args: name: The name of the model. model_source_uri: The URI of the model source. @@ -655,7 +655,7 @@ def list_model_versions( Returns: The registered model versions. - + Raises: RuntimeError: If the model versions are not found """ @@ -702,7 +702,7 @@ def load_model_version( **kwargs: Any, ) -> Any: """Load a model version from the Vertex AI model registry using label-based lookup. - + Args: name: The name of the model. version: The version of the model. @@ -726,7 +726,7 @@ def get_model_uri_artifact_store( model_version: RegistryModelVersion, ) -> str: """Get the model URI artifact store. - + Args: model_version: The model version. @@ -787,7 +787,7 @@ def _vertex_model_to_registry_version( def _sanitize_model_display_name(self, name: str) -> str: """Sanitize the model display name to conform to Vertex AI limits. - + Args: name: The name of the model. diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 332260a2539..d134155cbc4 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -80,7 +80,7 @@ class VertexDeploymentConfig(VertexAIEndpointConfig, ServiceConfig): def get_vertex_deployment_labels(self) -> Dict[str, str]: """Generate labels for the VertexAI deployment from the service configuration. - + Returns: A dictionary of labels for the VertexAI deployment. """ @@ -151,13 +151,14 @@ def _initialize_gcp_clients(self) -> None: ) # Get credentials from model deployer - self._credentials, self._project_id = ( - model_deployer._get_authentication() - ) + ( + self._credentials, + self._project_id, + ) = model_deployer._get_authentication() def __init__(self, config: VertexDeploymentConfig, **attrs: Any): """Initialize the Vertex AI deployment service. - + Args: config: The configuration for the Vertex AI deployment service. **attrs: Additional attributes for the service. @@ -168,7 +169,7 @@ def __init__(self, config: VertexDeploymentConfig, **attrs: Any): @property def prediction_url(self) -> Optional[str]: """The prediction URI exposed by the prediction service. - + Returns: The prediction URI exposed by the prediction service. """ @@ -215,15 +216,15 @@ def _generate_endpoint_name(self) -> str: def _get_model_id(self, name: str) -> str: """Helper to construct a full model ID from a given model name. - + Args: name: The name of the model. - + Returns: The full model ID. """ return f"projects/{self._project_id}/locations/{self.config.location}/models/{name}" - + def _verify_model_exists(self) -> aiplatform.Model: """Verify the model exists and return it. @@ -254,7 +255,7 @@ def _deploy_model( self, model: aiplatform.Model, endpoint: aiplatform.Endpoint ) -> None: """Deploy model to Vertex AI endpoint. - + Args: model: The model to deploy. endpoint: The endpoint to deploy the model to. @@ -313,9 +314,9 @@ def _deploy_model( # Add encryption key if specified if self.config.encryption_spec_key_name: - deploy_kwargs["encryption_spec_key_name"] = ( - self.config.encryption_spec_key_name - ) + deploy_kwargs[ + "encryption_spec_key_name" + ] = self.config.encryption_spec_key_name # Deploy model logger.info( @@ -325,7 +326,7 @@ def _deploy_model( def provision(self) -> None: """Provision or update remote Vertex AI deployment instance. - + Raises: RuntimeError: If model not found """ @@ -431,7 +432,7 @@ def get_logs( tail: Optional[int] = None, ) -> Generator[str, bool, None]: """Retrieve logs for the Vertex AI deployment (not supported). - + Args: follow: Whether to follow the logs. tail: The number of lines to tail. @@ -476,7 +477,7 @@ def check_status(self) -> Tuple[ServiceState, str]: @property def is_running(self) -> bool: """Check if the service is running. - + Returns: True if the service is running, False otherwise. """ From ec0ae9c0b364767017680545a730a1f916be9867 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Tue, 8 Apr 2025 14:17:38 +0100 Subject: [PATCH 35/43] format --- .../gcp/model_registries/vertex_model_registry.py | 12 ++++++------ .../integrations/gcp/services/vertex_deployment.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 1d2505a87e3..c64725fc06f 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -497,12 +497,12 @@ def register_model_version( # Include explanation settings if provided in the config. if self.config.explanation: - upload_arguments[ - "explanation_metadata" - ] = self.config.explanation.metadata - upload_arguments[ - "explanation_parameters" - ] = self.config.explanation.parameters + upload_arguments["explanation_metadata"] = ( + self.config.explanation.metadata + ) + upload_arguments["explanation_parameters"] = ( + self.config.explanation.parameters + ) # Remove any parameters that are None to avoid passing them to upload. upload_arguments = { diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index d134155cbc4..9f6d4c0596e 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -314,9 +314,9 @@ def _deploy_model( # Add encryption key if specified if self.config.encryption_spec_key_name: - deploy_kwargs[ - "encryption_spec_key_name" - ] = self.config.encryption_spec_key_name + deploy_kwargs["encryption_spec_key_name"] = ( + self.config.encryption_spec_key_name + ) # Deploy model logger.info( From d52729383afdea086832c8ebfceb7988354eb009 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 9 Apr 2025 14:35:44 +0100 Subject: [PATCH 36/43] Refactor model ID generation and improve error handling in VertexAIModelRegistry and VertexDeploymentService --- .../model_registries/vertex_model_registry.py | 33 +++++++++---------- .../gcp/services/vertex_deployment.py | 8 +++-- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index c64725fc06f..44e22e40432 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -217,10 +217,11 @@ def _get_model_id(self, name: str) -> str: name: Model name Returns: - Full model ID in format: projects/{project}/locations/{location}/models/{model} + str: Full model ID in format: projects/{project}/locations/{location}/models/{model} """ _, project_id = self._get_authentication() - return f"projects/{project_id}/locations/{self.config.location}/models/{name}" + model_id = f"projects/{project_id}/locations/{self.config.location}/models/{name}" + return model_id def _get_model_version_id(self, model_id: str, version: str) -> str: """Get the full Vertex AI model version ID. @@ -230,9 +231,10 @@ def _get_model_version_id(self, model_id: str, version: str) -> str: version: Version string Returns: - Full model version ID in format: {model_id}/versions/{version} + str: Full model version ID in format: {model_id}/versions/{version} """ - return f"{model_id}/versions/{version}" + model_version_id = f"{model_id}/versions/{version}" + return model_version_id def _init_vertex_model( self, name: str, version: Optional[str] = None @@ -291,9 +293,6 @@ def register_model( description: The description of the model. metadata: The metadata of the model. - Returns: - The registered model. - Raises: NotImplementedError: Vertex AI does not support registering models, you can only register model versions, skipping model registration... @@ -312,7 +311,7 @@ def delete_model( name: The name of the model. Raises: - NotImplementedError: Vertex AI does not support deleting models, skipping model deletion... + RuntimeError: if model deletion fails """ try: model = self._init_vertex_model(name=name) @@ -337,9 +336,6 @@ def update_model( metadata: The metadata of the model. remove_metadata: The metadata to remove from the model. - Returns: - The updated model. - Raises: NotImplementedError: Vertex AI does not support updating models, you can only update model versions, skipping model registration... """ @@ -355,17 +351,20 @@ def get_model(self, name: str) -> RegisteredModel: Returns: The registered model. + + Raises: + RuntimeError: if model retrieval fails """ try: # Fetch by display_name, and use unique labels to ensure multi-tenancy model = aiplatform.Model(display_name=name) - return RegisteredModel( - name=model.display_name, - description=model.description, - metadata=model.labels, - ) except Exception as e: raise RuntimeError(f"Failed to get model: {str(e)}") + return RegisteredModel( + name=model.display_name, + description=model.description, + metadata=model.labels, + ) def list_models( self, @@ -602,9 +601,9 @@ def update_model_version( if stage: labels["stage"] = stage.value.lower() target_version.update(description=description, labels=labels) - return self.get_model_version(name, version) except Exception as e: raise RuntimeError(f"Failed to update model version: {str(e)}") + return self.get_model_version(name, version) def get_model_version( self, name: str, version: str diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index 9f6d4c0596e..c025da216a2 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -223,7 +223,8 @@ def _get_model_id(self, name: str) -> str: Returns: The full model ID. """ - return f"projects/{self._project_id}/locations/{self.config.location}/models/{name}" + model_id = f"projects/{self._project_id}/locations/{self.config.location}/models/{name}" + return model_id def _verify_model_exists(self) -> aiplatform.Model: """Verify the model exists and return it. @@ -328,7 +329,7 @@ def provision(self) -> None: """Provision or update remote Vertex AI deployment instance. Raises: - RuntimeError: If model not found + Exception: if model deployment fails """ # First verify model exists model = self._verify_model_exists() @@ -377,6 +378,9 @@ def deprovision(self, force: bool = False) -> None: Args: force: Whether to force deprovision + + Raises: + RuntimeError: if endpoint deletion fails """ endpoints = self.get_endpoints() if endpoints: From b2da984bf7ac3e3e73920c29bf39bd266663868d Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 9 Apr 2025 18:56:50 +0100 Subject: [PATCH 37/43] fix sclearn docstring --- .../integrations/sklearn/materializers/sklearn_materializer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py b/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py index df8cf57f304..3a436fb996e 100644 --- a/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py +++ b/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py @@ -68,6 +68,9 @@ def load(self, data_type: Type[Any]) -> Any: Returns: The loaded sklearn model. + + Raises: + FileNotFoundError: if model file not found """ # First try to load from model.pkl model_filepath = os.path.join(self.uri, SKLEARN_MODEL_FILENAME) From ba602221a398097d1a896eb425d8aabfcbf24ccf Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Wed, 9 Apr 2025 19:09:00 +0100 Subject: [PATCH 38/43] fix: remove unnecessary blank line in docstring of SklearnMaterializer --- .../integrations/sklearn/materializers/sklearn_materializer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py b/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py index 3a436fb996e..a796deb4863 100644 --- a/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py +++ b/src/zenml/integrations/sklearn/materializers/sklearn_materializer.py @@ -68,7 +68,7 @@ def load(self, data_type: Type[Any]) -> Any: Returns: The loaded sklearn model. - + Raises: FileNotFoundError: if model file not found """ From f0632b2bbc82d6f15e66bbf676a3542f85ea9975 Mon Sep 17 00:00:00 2001 From: Safoine El Khabich <34200873+safoinme@users.noreply.github.com> Date: Wed, 23 Apr 2025 20:51:25 +0100 Subject: [PATCH 39/43] Update src/zenml/services/service.py Co-authored-by: Stefan Nica --- src/zenml/services/service.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zenml/services/service.py b/src/zenml/services/service.py index 9ae968292d4..e32cf32d231 100644 --- a/src/zenml/services/service.py +++ b/src/zenml/services/service.py @@ -121,7 +121,6 @@ class ServiceConfig(BaseTypedModel): model_name: str = "" model_version: str = "" service_name: str = "" - zenml_model: Optional[Model] = None # TODO: In Pydantic v2, the `model_` is a protected namespaces for all # fields defined under base models. If not handled, this raises a warning. From 1cbf2c09ac6a4903f6eaaafed4035141312b3e32 Mon Sep 17 00:00:00 2001 From: Safoine El Khabich <34200873+safoinme@users.noreply.github.com> Date: Wed, 23 Apr 2025 20:51:35 +0100 Subject: [PATCH 40/43] Update tests/unit/services/test_service.py Co-authored-by: Stefan Nica --- tests/unit/services/test_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/services/test_service.py b/tests/unit/services/test_service.py index 065d9c33468..4c6f47e3894 100644 --- a/tests/unit/services/test_service.py +++ b/tests/unit/services/test_service.py @@ -63,7 +63,7 @@ def base_service(): uuid=UUID("12345678-1234-5678-1234-567812345678"), admin_state=ServiceState.ACTIVE, config=ServiceConfig( - name="test_service", param1="value1", param2=2, zenml_model=None + name="test_service", param1="value1", param2=2 ), status=ServiceStatus( state=ServiceState.ACTIVE, From 2f241bb09ac6706b00abb2a0269bd604141e95ed Mon Sep 17 00:00:00 2001 From: Safoine El Khabich <34200873+safoinme@users.noreply.github.com> Date: Wed, 23 Apr 2025 20:54:30 +0100 Subject: [PATCH 41/43] Update src/zenml/integrations/gcp/services/vertex_deployment.py Co-authored-by: Stefan Nica --- src/zenml/integrations/gcp/services/vertex_deployment.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index c025da216a2..8833d34d562 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -242,7 +242,6 @@ def _verify_model_exists(self) -> aiplatform.Model: logger.info(f"Model name: {model_name}") logger.info(f"Project ID: {self._project_id}") logger.info(f"Location: {self.config.location}") - logger.info(f"Credentials: {self._credentials}") model = aiplatform.Model( model_name=model_name, project=self._project_id, From a3adaa66b1765e10f1329f856e005f7feb20c492 Mon Sep 17 00:00:00 2001 From: Safoine El Khabich <34200873+safoinme@users.noreply.github.com> Date: Wed, 23 Apr 2025 21:11:28 +0100 Subject: [PATCH 42/43] Update src/zenml/integrations/gcp/model_registries/vertex_model_registry.py Co-authored-by: Stefan Nica --- .../model_registries/vertex_model_registry.py | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 44e22e40432..78180572cda 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -101,46 +101,6 @@ def _get_deployer_id(self) -> str: raise ValueError("VertexModelDeployer is not active in the stack.") return str(model_deployer.id) - def _encode_name_version(self, name: str, version: str) -> str: - """Encode model name and version into a Vertex AI compatible format. - - Args: - name: Model name - version: Model version - - Returns: - Encoded string suitable for Vertex AI - """ - # Base64 encode to handle special characters while preserving uniqueness - encoded = base64.b64encode(f"{name}:{version}".encode()).decode() - # Make it URL and label safe - encoded = encoded.replace("+", "-").replace("/", "_").replace("=", "") - return encoded[:MAX_DISPLAY_NAME_LENGTH] - - def _decode_name_version(self, encoded: str) -> Tuple[str, str]: - """Decode model name and version from encoded format. - - Args: - encoded: The encoded string - - Returns: - Tuple of (name, version) - """ - # Add back padding - padding = 4 - (len(encoded) % 4) - if padding != 4: - encoded += "=" * padding - # Restore special chars - encoded = encoded.replace("-", "+").replace("_", "/") - try: - decoded = base64.b64decode(encoded).decode() - name, version = decoded.split(":", 1) - return name, version - except Exception as e: - logger.warning( - f"Failed to decode name/version from {encoded}: {e}" - ) - return encoded, "unknown" def _prepare_labels( self, From c92c99150f37e3b089b87c6e99331e698a81fa35 Mon Sep 17 00:00:00 2001 From: Safoine El khabich Date: Mon, 28 Apr 2025 13:44:23 +0200 Subject: [PATCH 43/43] stefan's review --- .../flavors/vertex_model_deployer_flavor.py | 6 +- .../flavors/vertex_model_registry_flavor.py | 6 +- .../model_deployers/vertex_model_deployer.py | 12 - .../model_registries/vertex_model_registry.py | 330 +++++++++++------- .../gcp/services/vertex_deployment.py | 25 +- src/zenml/integrations/gcp/utils.py | 42 +++ 6 files changed, 251 insertions(+), 170 deletions(-) create mode 100644 src/zenml/integrations/gcp/utils.py diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py index 7c450f51b09..afd249385f3 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_model_deployer_flavor.py @@ -20,7 +20,7 @@ VERTEX_MODEL_DEPLOYER_FLAVOR, ) from zenml.integrations.gcp.flavors.vertex_base_config import ( - VertexAIEndpointConfig, + VertexAIBaseSettings, ) from zenml.integrations.gcp.google_credentials_mixin import ( GoogleCredentialsConfigMixin, @@ -42,14 +42,14 @@ class VertexModelDeployerConfig( BaseModelDeployerConfig, GoogleCredentialsConfigMixin, - VertexAIEndpointConfig, + VertexAIBaseSettings, ): """Configuration for the Vertex AI model deployer. This configuration combines: - Base model deployer configuration - Google Cloud authentication - - Vertex AI endpoint configuration + - Vertex AI Base configuration """ diff --git a/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py index 8524d407e2f..5055d3ae5dc 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_model_registry_flavor.py @@ -20,7 +20,7 @@ VERTEX_MODEL_REGISTRY_FLAVOR, ) from zenml.integrations.gcp.flavors.vertex_base_config import ( - VertexAIModelConfig, + VertexAIBaseSettings, ) from zenml.integrations.gcp.google_credentials_mixin import ( GoogleCredentialsConfigMixin, @@ -40,14 +40,14 @@ class VertexAIModelRegistryConfig( BaseModelRegistryConfig, GoogleCredentialsConfigMixin, - VertexAIModelConfig, + VertexAIBaseSettings, ): """Configuration for the VertexAI model registry. This configuration combines: - Base model registry configuration - Google Cloud authentication - - Vertex AI model configuration + - Vertex AI Base configuration """ diff --git a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py index 0256dde9368..d31e5bc6ee7 100644 --- a/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py +++ b/src/zenml/integrations/gcp/model_deployers/vertex_model_deployer.py @@ -132,9 +132,6 @@ def _create_deployment_service( Returns: The VertexDeploymentService instance """ - # Initialize client with fresh credentials - self._init_vertex_client() - # Create service instance service = VertexDeploymentService(uuid=id, config=config) logger.info("Creating Vertex AI deployment service with ID %s", id) @@ -197,9 +194,6 @@ def perform_stop_model( Returns: The stopped service """ - # Initialize client with fresh credentials - self._init_vertex_client() - service.stop(timeout=timeout, force=force) return service @@ -217,9 +211,6 @@ def perform_start_model( Returns: The started service """ - # Initialize client with fresh credentials - self._init_vertex_client() - service.start(timeout=timeout) return service @@ -236,9 +227,6 @@ def perform_delete_model( timeout: Timeout for delete operation force: Whether to force delete """ - # Initialize client with fresh credentials - self._init_vertex_client() - service = cast(VertexDeploymentService, service) service.stop(timeout=timeout, force=force) diff --git a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py index 44e22e40432..0c9530905ac 100644 --- a/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py +++ b/src/zenml/integrations/gcp/model_registries/vertex_model_registry.py @@ -14,19 +14,21 @@ """Vertex AI model registry integration for ZenML.""" import base64 -import re from datetime import datetime from typing import Any, Dict, List, Optional, Tuple, cast from google.cloud import aiplatform -from zenml.client import Client +from zenml.integrations.gcp.flavors.vertex_base_config import ( + VertexAIModelConfig, +) from zenml.integrations.gcp.flavors.vertex_model_registry_flavor import ( VertexAIModelRegistryConfig, ) from zenml.integrations.gcp.google_credentials_mixin import ( GoogleCredentialsMixin, ) +from zenml.integrations.gcp.utils import sanitize_vertex_label from zenml.logger import get_logger from zenml.model_registries.base_model_registry import ( BaseModelRegistry, @@ -45,6 +47,14 @@ MAX_DISPLAY_NAME_LENGTH = 128 +# Helper function to safely get values from metadata dict +def _get_metadata_value( + metadata: Dict[str, Any], key: str, default: Any = None +) -> Any: + """Safely retrieves a value from a dictionary.""" + return metadata.get(key, default) + + class VertexAIModelRegistry(BaseModelRegistry, GoogleCredentialsMixin): """Register models using Vertex AI.""" @@ -57,50 +67,6 @@ def config(self) -> VertexAIModelRegistryConfig: """ return cast(VertexAIModelRegistryConfig, self._config) - def _sanitize_label(self, value: str) -> str: - """Sanitize a label value to comply with Vertex AI requirements. - - Args: - value: The label value to sanitize - - Returns: - Sanitized label value - """ - if not value: - return "" - - # Convert to lowercase - value = value.lower() - - # Replace any character that's not lowercase letter, number, dash or underscore - value = re.sub(r"[^a-z0-9\-_]", "-", value) - - # Ensure it starts with a letter/number by prepending 'x' if needed - if not value[0].isalnum(): - value = f"x{value}" - - # Truncate to 63 chars to stay under limit - return value[:63] - - def _get_deployer_id(self) -> str: - """Get the current ZenML server/deployer ID for multi-tenancy support. - - Returns: - The deployer ID string - - Raises: - ValueError: If VertexModelDeployer is not active in the stack - """ - from zenml.integrations.gcp.model_deployers.vertex_model_deployer import ( - VertexModelDeployer, - ) - - client = Client() - model_deployer = client.active_stack.model_deployer - if not isinstance(model_deployer, VertexModelDeployer): - raise ValueError("VertexModelDeployer is not active in the stack.") - return str(model_deployer.id) - def _encode_name_version(self, name: str, version: str) -> str: """Encode model name and version into a Vertex AI compatible format. @@ -160,33 +126,16 @@ def _prepare_labels( # Add base labels labels["managed_by"] = "zenml" - labels["deployer_id"] = self._sanitize_label(self._get_deployer_id()) - # Add stage if provided if stage: - labels["stage"] = self._sanitize_label(stage.value) + labels["stage"] = sanitize_vertex_label(stage.value) # Process metadata if provided if metadata: - # If metadata is not a dict (e.g. a pydantic model), convert it using .dict() - if not isinstance(metadata, dict): - try: - metadata = metadata.dict() - except Exception as e: - logger.warning(f"Unable to convert metadata to dict: {e}") - metadata = {} for key, value in metadata.items(): - # Skip None values - if value is None: - continue - # Convert complex objects to string - if isinstance(value, (dict, list)): - value = ( - "x" # Simplify complex objects to avoid length issues - ) # Sanitize both key and value - sanitized_key = self._sanitize_label(str(key)) - sanitized_value = self._sanitize_label(str(value)) + sanitized_key = sanitize_vertex_label(str(key)) + sanitized_value = sanitize_vertex_label(str(value)) # Only add if both key and value are valid if sanitized_key and sanitized_value: labels[sanitized_key] = sanitized_value @@ -195,9 +144,7 @@ def _prepare_labels( if len(labels) > 64: # Keep essential labels and truncate the rest essential_labels = { - k: labels[k] - for k in ["managed_by", "deployer_id", "stage"] - if k in labels + k: labels[k] for k in ["managed_by", "stage"] if k in labels } # Add remaining labels up to limit remaining_slots = 64 - len(essential_labels) @@ -264,7 +211,7 @@ def _init_vertex_model( # Attempt to find an existing model by display_name existing_models = aiplatform.Model.list( filter=f"display_name={name}", - project=self.config.project_id or project_id, + project=self.config.project or project_id, location=location, ) if existing_models: @@ -413,6 +360,77 @@ def list_models( except Exception as e: raise RuntimeError(f"Failed to list models: {str(e)}") + def _extract_vertex_config_from_metadata( + self, metadata: Dict[str, Any] + ) -> "VertexAIModelConfig": + """Extracts Vertex AI specific configuration from metadata dictionary. + + Args: + metadata: The metadata dictionary potentially containing config overrides. + + Returns: + A VertexAIModelConfig instance populated from metadata. + """ + # Use the module-level helper function + container_config_dict = _get_metadata_value(metadata, "container", {}) + container_config = None + if isinstance(container_config_dict, dict) and container_config_dict: + from zenml.integrations.gcp.flavors.vertex_base_config import ( + VertexAIContainerSpec, + ) + + container_config = VertexAIContainerSpec(**container_config_dict) + + explanation_config_dict = _get_metadata_value( + metadata, "explanation", {} + ) + explanation_config = None + if ( + isinstance(explanation_config_dict, dict) + and explanation_config_dict + ): + from zenml.integrations.gcp.flavors.vertex_base_config import ( + VertexAIExplanationSpec, + ) + + explanation_config = VertexAIExplanationSpec( + **explanation_config_dict + ) + + # Use the module-level helper function and correct instantiation + return VertexAIModelConfig( + # Model metadata overrides + display_name=_get_metadata_value(metadata, "display_name"), + description=_get_metadata_value(metadata, "description"), + version_description=_get_metadata_value( + metadata, "version_description" + ), + version_aliases=_get_metadata_value(metadata, "version_aliases"), + # Model artifacts overrides + artifact_uri=_get_metadata_value(metadata, "artifact_uri"), + # Model versioning overrides + is_default_version=_get_metadata_value( + metadata, "is_default_version" + ), + # Model formats overrides (less likely used here, but for completeness) + supported_deployment_resources_types=_get_metadata_value( + metadata, "supported_deployment_resources_types" + ), + supported_input_storage_formats=_get_metadata_value( + metadata, "supported_input_storage_formats" + ), + supported_output_storage_formats=_get_metadata_value( + metadata, "supported_output_storage_formats" + ), + # Container and Explanation config (parsed above) + container=container_config, + explanation=explanation_config, + # GCP Base config (from component config) + encryption_spec_key_name=_get_metadata_value( + metadata, "encryption_spec_key_name" + ), + ) + def register_model_version( self, name: str, @@ -427,10 +445,13 @@ def register_model_version( Args: name: Model name version: Model version - model_source_uri: URI to model artifacts - description: Model description + model_source_uri: URI to model artifacts (overrides metadata if provided) + description: Model description (overrides metadata if provided) metadata: Model metadata (expected to be a ModelRegistryModelMetadata or - equivalent serializable dict) + equivalent serializable dict). Can contain overrides for + Vertex AI model parameters like 'display_name', 'artifact_uri', + 'version_description', 'container', 'explanation', etc. + config: Vertex AI model configuration overrides. **kwargs: Additional arguments Returns: @@ -440,67 +461,89 @@ def register_model_version( metadata_dict = metadata.model_dump() if metadata else {} labels = self._prepare_labels(metadata_dict) if version: - labels["user_version"] = self._sanitize_label(version) + labels["user_version"] = sanitize_vertex_label(version) - # Get the container image from the config if available, otherwise fallback to metadata - if ( - hasattr(self.config, "container") - and self.config.container - and self.config.container.image_uri - ): - serving_container_image_uri = self.config.container.image_uri - else: - serving_container_image_uri = metadata_dict.get( - "serving_container_image_uri", - "europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest", + # Extract Vertex AI specific config overrides from metadata + vertex_config = self._extract_vertex_config_from_metadata( + metadata_dict + ) + + # Use a consistently sanitized display name. Prioritize metadata, then name arg. + model_display_name_override = vertex_config.display_name + model_display_name = ( + model_display_name_override + or self._sanitize_model_display_name(name) + ) + + # Determine serving container image URI: prioritize metadata container config, + # then metadata direct key, then default. + serving_container_image_uri = "europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest" # Default + if "serving_container_image_uri" in metadata_dict: + serving_container_image_uri = metadata_dict[ + "serving_container_image_uri" + ] + if vertex_config.container and vertex_config.container.image_uri: + serving_container_image_uri = vertex_config.container.image_uri + + # Determine artifact URI: prioritize direct argument, then metadata, then log warning. + final_artifact_uri = model_source_uri or vertex_config.artifact_uri + if not final_artifact_uri: + logger.warning( + "No 'artifact_uri' provided in function arguments or metadata. " + "Model registration might fail or use an unexpected artifact source." ) - # Use a consistently sanitized display name instead of the raw model name - model_display_name = self._sanitize_model_display_name(name) + # Determine description: prioritize direct argument, then metadata. + final_description = description or vertex_config.description # Build extended upload arguments for vertex.Model.upload, - # leveraging extra settings from self.config. + # leveraging extracted config from metadata and component config for core details. upload_arguments = { - "serving_container_image_uri": serving_container_image_uri, - "artifact_uri": model_source_uri or self.config.artifact_uri, - "is_default_version": self.config.is_default_version - if self.config.is_default_version is not None + # Core GCP config from component + "project": self.config.project_id or self.config.project, + "location": self.config.location or vertex_config.location, + # Model identification and artifacts + "display_name": model_display_name, + "artifact_uri": final_artifact_uri, + # Description and Versioning - prioritize metadata + "description": final_description, + "version_description": vertex_config.version_description, + "version_aliases": vertex_config.version_aliases, + "is_default_version": vertex_config.is_default_version + if vertex_config.is_default_version is not None else True, - "version_aliases": self.config.version_aliases, - "version_description": self.config.version_description, - "serving_container_predict_route": self.config.container.predict_route - if self.config.container + # Container configuration from metadata + "serving_container_image_uri": serving_container_image_uri, + "serving_container_predict_route": vertex_config.container.predict_route + if vertex_config.container else None, - "serving_container_health_route": self.config.container.health_route - if self.config.container + "serving_container_health_route": vertex_config.container.health_route + if vertex_config.container else None, - "description": description or self.config.description, - "serving_container_command": self.config.container.command - if self.config.container + "serving_container_command": vertex_config.container.command + if vertex_config.container else None, - "serving_container_args": self.config.container.args - if self.config.container + "serving_container_args": vertex_config.container.args + if vertex_config.container else None, - "serving_container_environment_variables": self.config.container.env - if self.config.container + "serving_container_environment_variables": vertex_config.container.env + if vertex_config.container else None, - "serving_container_ports": self.config.container.ports - if self.config.container + "serving_container_ports": vertex_config.container.ports + if vertex_config.container else None, - "display_name": self.config.display_name or model_display_name, - "project": self.config.project_id, - "location": self.config.location, + # Labels and Encryption "labels": labels, - "encryption_spec_key_name": self.config.encryption_spec_key_name, + "encryption_spec_key_name": vertex_config.encryption_spec_key_name, } - # Include explanation settings if provided in the config. - if self.config.explanation: + # Include explanation settings if provided in metadata config. + if vertex_config.explanation: upload_arguments["explanation_metadata"] = ( - self.config.explanation.metadata + vertex_config.explanation.metadata ) upload_arguments["explanation_parameters"] = ( - self.config.explanation.parameters + vertex_config.explanation.parameters ) # Remove any parameters that are None to avoid passing them to upload. @@ -509,22 +552,49 @@ def register_model_version( } # Try to get existing parent model, but don't fail if it doesn't exist + # Use the actual model name `name` for lookup, not the potentially overridden display name parent_model = self._init_vertex_model(name=name, version=version) # If parent model exists and has same URI, return existing version - if parent_model and parent_model.uri == model_source_uri: + # Check against final_artifact_uri used for upload + if parent_model and parent_model.uri == final_artifact_uri: logger.info( - f"Model version {version} already exists, skipping upload..." + f"Model version {version} targeting artifact URI " + f"'{final_artifact_uri}' already exists, skipping upload..." ) return self._vertex_model_to_registry_version(parent_model) # Set parent model resource name if it exists if parent_model: + # Ensure the display_name matches the parent model if it exists, + # otherwise upload might create a *new* model instead of a version. + # Use the parent model's display name for the upload. + upload_arguments["display_name"] = parent_model.display_name upload_arguments["parent_model"] = parent_model.resource_name + logger.info( + f"Found existing parent model '{parent_model.display_name}' " + f"({parent_model.resource_name}). Uploading as a new version." + ) + else: + logger.info( + f"No existing parent model found for name '{name}'. " + f"A new model named '{upload_arguments['display_name']}' will be created." + ) # Upload the model - model = aiplatform.Model.upload(**upload_arguments) - logger.info(f"Uploaded new model version with labels: {model.labels}") + try: + logger.info( + f"Uploading model to Vertex AI with arguments: { {k: v for k, v in upload_arguments.items() if k != 'labels'} }" + ) # Don't log potentially large labels dict + model = aiplatform.Model.upload(**upload_arguments) + logger.info( + f"Uploaded new model version with labels: {model.labels}" + ) + except Exception as e: + logger.error(f"Failed to upload model to Vertex AI: {e}") + # Log the arguments again on failure for easier debugging + logger.error(f"Failed upload arguments: {upload_arguments}") + raise return self._vertex_model_to_registry_version(model) @@ -544,7 +614,10 @@ def delete_model_version( """ try: model = self._init_vertex_model(name=name, version=version) - assert isinstance(model, aiplatform.Model) + if model is None: + raise RuntimeError( + f"Model version '{version}' for '{name}' not found." + ) model.versioning_registry.delete_version(version) logger.info(f"Deleted model version: {name} version {version}") except Exception as e: @@ -577,8 +650,7 @@ def update_model_version( """ try: parent_model = self._init_vertex_model(name=name, version=version) - assert isinstance(parent_model, aiplatform.Model) - sanitized_version = self._sanitize_label(version) + sanitized_version = sanitize_vertex_label(version) target_version = None for v in parent_model.list(): if v.labels.get("user_version") == sanitized_version: @@ -592,12 +664,12 @@ def update_model_version( if metadata: metadata_dict = metadata.model_dump() for key, value in metadata_dict.items(): - labels[self._sanitize_label(key)] = self._sanitize_label( + labels[sanitize_vertex_label(key)] = sanitize_vertex_label( str(value) ) if remove_metadata: for key in remove_metadata: - labels.pop(self._sanitize_label(key), None) + labels.pop(sanitize_vertex_label(key), None) if stage: labels["stage"] = stage.value.lower() target_version.update(description=description, labels=labels) @@ -622,7 +694,10 @@ def get_model_version( """ try: parent_model = self._init_vertex_model(name=name, version=version) - assert isinstance(parent_model, aiplatform.Model) + if parent_model is None: + raise RuntimeError( + f"Model version '{version}' for '{name}' not found." + ) return self._vertex_model_to_registry_version(parent_model) except Exception as e: raise RuntimeError(f"Failed to get model version: {str(e)}") @@ -668,7 +743,7 @@ def list_model_versions( if metadata: for key, value in metadata.dict().items(): filter_expr.append( - f"labels.{self._sanitize_label(key)}={self._sanitize_label(str(value))}" + f"labels.{sanitize_vertex_label(key)}={sanitize_vertex_label(str(value))}" ) if created_after: filter_expr.append(f"create_time>{created_after.isoformat()}") @@ -793,8 +868,7 @@ def _sanitize_model_display_name(self, name: str) -> str: Returns: The sanitized model name. """ - # Use our existing sanitizer (which converts to lowercase, replaces invalid characters, etc.) - name = self._sanitize_label(name) + name = sanitize_vertex_label(name) if len(name) > MAX_DISPLAY_NAME_LENGTH: logger.warning( f"Model name '{name}' exceeds {MAX_DISPLAY_NAME_LENGTH} characters; truncating." diff --git a/src/zenml/integrations/gcp/services/vertex_deployment.py b/src/zenml/integrations/gcp/services/vertex_deployment.py index c025da216a2..6bd9e1fd2e5 100644 --- a/src/zenml/integrations/gcp/services/vertex_deployment.py +++ b/src/zenml/integrations/gcp/services/vertex_deployment.py @@ -13,7 +13,6 @@ # permissions and limitations under the License. """Implementation of the Vertex AI Deployment service.""" -import re from datetime import datetime from typing import Any, Dict, Generator, List, Optional, Tuple, cast @@ -26,6 +25,7 @@ from zenml.integrations.gcp.flavors.vertex_base_config import ( VertexAIEndpointConfig, ) +from zenml.integrations.gcp.utils import sanitize_vertex_label from zenml.logger import get_logger from zenml.models.v2.misc.service import ServiceType from zenml.services import ServiceStatus @@ -52,29 +52,6 @@ ) -def sanitize_vertex_label(value: str) -> str: - """Sanitize a label value to comply with Vertex AI requirements. - - Args: - value: The label value to sanitize - - Returns: - Sanitized label value - """ - if not value: - return "" - - # Convert to lowercase - value = value.lower() - # Replace any character that's not lowercase letter, number, dash or underscore - value = re.sub(r"[^a-z0-9\-_]", "-", value) - # Ensure it starts with a letter/number by prepending 'x' if needed - if not value[0].isalnum(): - value = f"x{value}" - # Truncate to 63 chars to stay under limit - return value[:63] - - class VertexDeploymentConfig(VertexAIEndpointConfig, ServiceConfig): """Vertex AI service configurations.""" diff --git a/src/zenml/integrations/gcp/utils.py b/src/zenml/integrations/gcp/utils.py new file mode 100644 index 00000000000..cbdae568b36 --- /dev/null +++ b/src/zenml/integrations/gcp/utils.py @@ -0,0 +1,42 @@ +# Copyright (c) ZenML GmbH 2022. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""GCP utils.""" + +import re + + +def sanitize_vertex_label(value: str) -> str: + """Sanitize a label value to comply with Vertex AI requirements. + + Args: + value: The label value to sanitize + + Returns: + Sanitized label value + """ + if not value: + return "" + + # Convert to lowercase + value = value.lower() + + # Replace any character that's not lowercase letter, number, dash or underscore + value = re.sub(r"[^a-z0-9\-_]", "-", value) + + # Ensure it starts with a letter/number by prepending 'x' if needed + if not value[0].isalnum(): + value = f"x{value}" + + # Truncate to 63 chars to stay under limit + return value[:63]