From b59e677e58ba3e30110621c2a1fa60647abe0708 Mon Sep 17 00:00:00 2001 From: Michele Gazzetti Date: Wed, 3 Jun 2026 11:08:01 +0100 Subject: [PATCH 1/7] feat(vllm_performance): enable vllm version reference Signed-off-by: Michele Gazzetti --- .../vllm_performance/experiment_executor.py | 104 +++++++++++- .../performance_testing_geospatial.yaml | 160 ++++++++++++++++++ .../k8s/create_environment.py | 6 + .../vllm_performance/k8s/manage_components.py | 6 + .../k8s/yaml_support/build_components.py | 17 ++ .../yamls/test_geospatial_threadpool.yaml | 29 ++++ 6 files changed, 320 insertions(+), 2 deletions(-) create mode 100644 plugins/actuators/vllm_performance/yamls/test_geospatial_threadpool.yaml diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index b9d294c70..4086bfc1e 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -39,6 +39,7 @@ execute_guidellm_benchmark, execute_guidellm_geospatial_benchmark, ) +from packaging import version from ray.actor import ActorHandle from orchestrator.modules.actuators.measurement_queue import MeasurementQueue @@ -53,6 +54,86 @@ logger = logging.getLogger(__name__) +def _get_vllm_version_from_metadata( + experiment: Experiment, image_name: str +) -> str | None: + """ + Extract vLLM version from experiment metadata for a given image. + + Args: + experiment: The experiment object containing metadata + image_name: The image name to look up version for + + Returns: + Version string if found in metadata, None otherwise + """ + # Look for image property in experiment's optional or required properties + for prop in experiment.optionalProperties + experiment.requiredProperties: + if prop.identifier == "image" and prop.metadata: + vllm_version_map = prop.metadata.get("vllm_version", {}) + if isinstance(vllm_version_map, dict): + return vllm_version_map.get(image_name) + return None + + +def _should_enable_threadpool( + experiment: Experiment, image_name: str, threadpool_value: int +) -> bool: + """ + Determine if threadpool should be enabled based on vLLM version and user preference. + + Threadpool is only supported in vLLM >= 0.20.0. This function checks: + 1. If user explicitly disabled threadpool (threadpool=0), return False + 2. If vLLM version metadata exists and version < 0.20.0, return False + 3. Otherwise, return True (user wants it and version supports it or no version info) + + Args: + experiment: The experiment object containing metadata + image_name: The image name to check version for + threadpool_value: User's threadpool preference (0 or 1) + + Returns: + True if threadpool should be enabled, False otherwise + """ + # If user explicitly disabled, respect that + if threadpool_value == 0: + return False + + # Get version from metadata + vllm_version_str = _get_vllm_version_from_metadata(experiment, image_name) + + # If no version metadata, assume it's supported (backward compatible) + if vllm_version_str is None: + logger.warning( + f"No vLLM version metadata found for image {image_name}. " + "Assuming threadpool is supported." + ) + return True + + # Parse and compare version + try: + vllm_ver = version.parse(vllm_version_str) + min_version = version.parse("0.20.0") + + if vllm_ver < min_version: + logger.info( + f"Threadpool disabled: vLLM version {vllm_version_str} < 0.20.0 " + f"for image {image_name}" + ) + return False + + logger.info( + f"Threadpool enabled: vLLM version {vllm_version_str} >= 0.20.0 " + f"for image {image_name}" + ) + return True + except Exception as e: + logger.error( + f"Failed to parse vLLM version '{vllm_version_str}' for image {image_name}: {e}. " + ) + return True + + def _build_entity_env(values: dict[str, str]) -> str: """ This is the list of entity parameters that define the environment: @@ -83,6 +164,8 @@ def _build_entity_env(values: dict[str, str]) -> str: "dtype": values.get("dtype"), "cpu_offload": values.get("cpu_offload"), "max_num_seq": values.get("max_num_seq"), + "threadpool": values.get("threadpool", 1), + "renderer_num_workers": values.get("renderer_num_workers", 32), } return json.dumps(env_values) @@ -93,11 +176,12 @@ def _create_environment( node_selector: dict[str, str], request_id: str, env_manager: ActorHandle[EnvironmentManager], + experiment: Experiment | ParameterizedExperiment, check_interval: int = 5, timeout: int = 1200, ) -> tuple[str, str]: """ - Create environment + Create environment with version-aware threadpool support. Important: This function will block until env_manager.get_environment returns an environment. @@ -109,6 +193,7 @@ def _create_environment( :param node_selector: node selector :param request_id the request associated with this environment :param env_manager: environment manager + :param experiment: experiment definition (used for version checking) :param check_interval: wait interval :param timeout: timeout :return: kubernetes environment name @@ -190,12 +275,22 @@ def _create_environment( ) ) try: + # Determine if threadpool should be enabled based on version + image_name = values.get("image", "") + threadpool_requested = int(values.get("threadpool", 1)) + enable_threadpool = _should_enable_threadpool( + experiment, image_name, threadpool_requested + ) + + # Convert boolean back to int for consistency with existing code + threadpool_value = 1 if enable_threadpool else 0 + create_test_environment( k8s_name=env.k8s_name, model=model, in_cluster=actuator.in_cluster, verify_ssl=actuator.verify_ssl, - image=values.get("image"), + image=image_name, image_pull_secret_name=actuator.image_pull_secret_name, deployment_template=actuator.deployment_template, service_template=actuator.service_template, @@ -218,6 +313,10 @@ def _create_environment( enforce_eager=values.get("enforce_eager", 0) == 1, io_processor_plugin=values.get("io_processor_plugin"), otlp_traces_endpoint=otlp_traces_endpoint, + threadpool=threadpool_value, + renderer_num_workers=int( + values.get("renderer_num_workers", 32) + ), check_interval=check_interval, timeout=timeout, ) @@ -396,6 +495,7 @@ def run_resource_and_workload_experiment( actuator=actuator_parameters, node_selector=node_selector, env_manager=env_manager, + experiment=experiment, request_id=request.requestid, ) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index e9b976401..07f67482c 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -53,6 +53,19 @@ performance_testing-geospatial-endpoint: propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" values: ['india_url_in_b64_out', 'valencia_url_in_b64_out', 'terramind_flood_url_in_b64_out'] + - identifier: 'threadpool' + metadata: + description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering." + propertyDomain: + variableType: 'CATEGORICAL_VARIABLE_TYPE' + values: [0, 1] + - identifier: 'renderer_num_workers' + metadata: + description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 128] + interval: 1 defaultParameterization: - value: 100 property: @@ -66,6 +79,12 @@ performance_testing-geospatial-endpoint: - property: identifier: 'dataset' value: 'india_url_in_b64_out' + - property: + identifier: 'threadpool' + value: 1 + - property: + identifier: 'renderer_num_workers' + value: 32 # measurements targetProperties: - identifier: "duration" @@ -128,6 +147,8 @@ performance_testing-geospatial-full: - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" + vllm_version: + "your/image/with/vllm/and/terratorch:0.1": "0.20.0" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: ["your/image/with/vllm/and/terratorch:0.1"] @@ -207,6 +228,19 @@ performance_testing-geospatial-full: propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: [None, "terratorch_segmentation"] + - identifier: 'threadpool' + metadata: + description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering." + propertyDomain: + variableType: 'CATEGORICAL_VARIABLE_TYPE' + values: [0, 1] + - identifier: 'renderer_num_workers' + metadata: + description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 128] + interval: 1 defaultParameterization: - property: identifier: 'image' @@ -259,6 +293,12 @@ performance_testing-geospatial-full: - property: identifier: 'dataset' value: 'india_url_in_b64_out' + - property: + identifier: 'threadpool' + value: 1 + - property: + identifier: 'renderer_num_workers' + value: 32 # measurements targetProperties: - identifier: "duration" @@ -321,6 +361,8 @@ performance_testing-geospatial-full-custom-dataset: - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" + vllm_version: + "your/image/with/vllm/and/terratorch:0.1": "0.20.0" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: ["your/image/with/vllm/and/terratorch:0.1"] @@ -400,6 +442,19 @@ performance_testing-geospatial-full-custom-dataset: propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: ["terratorch_segmentation"] + - identifier: 'threadpool' + metadata: + description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering." + propertyDomain: + variableType: 'CATEGORICAL_VARIABLE_TYPE' + values: [0, 1] + - identifier: 'renderer_num_workers' + metadata: + description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 128] + interval: 1 defaultParameterization: - property: identifier: 'image' @@ -449,6 +504,12 @@ performance_testing-geospatial-full-custom-dataset: - property: identifier: 'io_processor_plugin' value: "terratorch_segmentation" + - property: + identifier: 'threadpool' + value: 1 + - property: + identifier: 'renderer_num_workers' + value: 32 # measurements targetProperties: - identifier: "duration" @@ -514,6 +575,19 @@ performance_testing-geospatial-endpoint-custom-dataset: variableType: 'DISCRETE_VARIABLE_TYPE' domainRange: [-1, 500] # -1 means no concurrency control interval: 1 + - identifier: 'threadpool' + metadata: + description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering." + propertyDomain: + variableType: 'CATEGORICAL_VARIABLE_TYPE' + values: [0, 1] + - identifier: 'renderer_num_workers' + metadata: + description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 128] + interval: 1 defaultParameterization: - value: 100 property: @@ -524,6 +598,12 @@ performance_testing-geospatial-endpoint-custom-dataset: - value: 1.0 property: identifier: 'burstiness' + - property: + identifier: 'threadpool' + value: 1 + - property: + identifier: 'renderer_num_workers' + value: 32 # measurements targetProperties: - identifier: "duration" @@ -588,6 +668,19 @@ performance_testing-geospatial-endpoint-guidellm: propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" values: ['india_url_in_b64_out', 'valencia_url_in_b64_out'] + - identifier: 'threadpool' + metadata: + description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering." + propertyDomain: + variableType: 'CATEGORICAL_VARIABLE_TYPE' + values: [0, 1] + - identifier: 'renderer_num_workers' + metadata: + description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 128] + interval: 1 defaultParameterization: - value: 100 property: @@ -601,6 +694,12 @@ performance_testing-geospatial-endpoint-guidellm: - property: identifier: 'dataset' value: 'india_url_in_b64_out' + - property: + identifier: 'threadpool' + value: 1 + - property: + identifier: 'renderer_num_workers' + value: 32 targetProperties: - identifier: "duration" - identifier: "completed" @@ -661,6 +760,8 @@ performance_testing-geospatial-full-guidellm: - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" + vllm_version: + "your/image/with/vllm/and/terratorch:0.1": "0.20.0" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: ["your/image/with/vllm/and/terratorch:0.1"] @@ -740,6 +841,19 @@ performance_testing-geospatial-full-guidellm: propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: [None, "terratorch_segmentation"] + - identifier: 'threadpool' + metadata: + description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering." + propertyDomain: + variableType: 'CATEGORICAL_VARIABLE_TYPE' + values: [0, 1] + - identifier: 'renderer_num_workers' + metadata: + description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 128] + interval: 1 defaultParameterization: - property: identifier: 'image' @@ -792,6 +906,12 @@ performance_testing-geospatial-full-guidellm: - property: identifier: 'dataset' value: 'india_url_in_b64_out' + - property: + identifier: 'threadpool' + value: 1 + - property: + identifier: 'renderer_num_workers' + value: 32 targetProperties: - identifier: "duration" - identifier: "completed" @@ -852,6 +972,8 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset: - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" + vllm_version: + "your/image/with/vllm/and/terratorch:0.1": "0.20.0" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: ["your/image/with/vllm/and/terratorch:0.1"] @@ -933,6 +1055,19 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset: propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: [None, "terratorch_segmentation"] + - identifier: 'threadpool' + metadata: + description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering." + propertyDomain: + variableType: 'CATEGORICAL_VARIABLE_TYPE' + values: [0, 1] + - identifier: 'renderer_num_workers' + metadata: + description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 128] + interval: 1 defaultParameterization: - property: identifier: 'image' @@ -982,6 +1117,12 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset: - property: identifier: 'io_processor_plugin' value: "terratorch_segmentation" + - property: + identifier: 'threadpool' + value: 1 + - property: + identifier: 'renderer_num_workers' + value: 32 targetProperties: - identifier: "duration" - identifier: "completed" @@ -1045,6 +1186,19 @@ performance_testing-geospatial-guidellm-endpoint-custom-dataset: variableType: 'DISCRETE_VARIABLE_TYPE' domainRange: [-1, 500] # -1 means no concurrency control interval: 1 + - identifier: 'threadpool' + metadata: + description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering." + propertyDomain: + variableType: 'CATEGORICAL_VARIABLE_TYPE' + values: [0, 1] + - identifier: 'renderer_num_workers' + metadata: + description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 128] + interval: 1 defaultParameterization: - value: 100 property: @@ -1055,6 +1209,12 @@ performance_testing-geospatial-guidellm-endpoint-custom-dataset: - value: 1.0 property: identifier: 'burstiness' + - property: + identifier: 'threadpool' + value: 1 + - property: + identifier: 'renderer_num_workers' + value: 32 targetProperties: - identifier: "duration" - identifier: "completed" diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/create_environment.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/create_environment.py index 49439f11c..fa419adfc 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/create_environment.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/create_environment.py @@ -41,6 +41,8 @@ def create_test_environment( skip_tokenizer_init: bool = False, io_processor_plugin: str | None = None, otlp_traces_endpoint: pydantic.AnyUrl | None = None, + threadpool: int = 1, + renderer_num_workers: int = 32, check_interval: int = 5, timeout: int = 1200, ) -> None: @@ -71,6 +73,8 @@ def create_test_environment( :param skip_tokenizer_init: flag to skip tokenizer initialization in vLLM :param io_processor_plugin: name of the IO processor plugin to be used by vLLM :param otlp_traces_endpoint: OpenTelemetry traces endpoint URL + :param threadpool: enable threadpool for vLLM renderer (0=disabled, 1=enabled) + :param renderer_num_workers: number of renderer workers when threadpool is enabled :param check_interval: wait interval in seconds :param timeout: timeout in seconds :return: @@ -119,6 +123,8 @@ def create_test_environment( skip_tokenizer_init=skip_tokenizer_init, io_processor_plugin=io_processor_plugin, otlp_traces_endpoint=otlp_traces_endpoint, + threadpool=threadpool, + renderer_num_workers=renderer_num_workers, ) logger.debug("deployment created") c_manager.wait_deployment_ready( diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py index 24c197c89..adeb3a519 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py @@ -262,6 +262,8 @@ def create_deployment( skip_tokenizer_init: bool = False, io_processor_plugin: str | None = None, otlp_traces_endpoint: pydantic.AnyUrl | None = None, + threadpool: int = 1, + renderer_num_workers: int = 32, ) -> None: """ create deployment for model @@ -286,6 +288,8 @@ def create_deployment( :param skip_tokenizer_init: flag to skip tokenizer initialization in vLLM :param io_processor_plugin: name of the IO processor plugin to be used by vLLM :param otlp_traces_endpoint: OpenTelemetry traces endpoint URL + :param threadpool: enable threadpool for vLLM renderer (0=disabled, 1=enabled) + :param renderer_num_workers: number of renderer workers when threadpool is enabled :return: """ if node_selector is None: @@ -314,6 +318,8 @@ def create_deployment( io_processor_plugin=io_processor_plugin, enforce_eager=enforce_eager, otlp_traces_endpoint=otlp_traces_endpoint, + threadpool=threadpool, + renderer_num_workers=renderer_num_workers, ) logger.debug(json.dumps(deployment_yaml, indent=2)) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/yaml_support/build_components.py index 891bca66f..9d7b73f42 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/yaml_support/build_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/yaml_support/build_components.py @@ -93,6 +93,8 @@ def deployment_yaml( skip_tokenizer_init: bool = False, io_processor_plugin: str | None = None, otlp_traces_endpoint: pydantic.AnyUrl | None = None, + threadpool: int = 1, + renderer_num_workers: int = 32, ) -> dict[str, Any]: """ Generate deployment yaml @@ -116,6 +118,8 @@ def deployment_yaml( :param enforce_eager: flag to enforce using Pytorch eager mode :param skip_tokenizer_init: flag to skip tokenizer initialization in vLLM :param io_processor_plugin: name of the IO processor plugin to be used by vLLM + :param threadpool: enable threadpool for vLLM renderer (0=disabled, 1=enabled) + :param renderer_num_workers: number of renderer workers when threadpool is enabled :return: """ if node_selector is None: @@ -174,6 +178,8 @@ def deployment_yaml( vllm_serve_args = [ model, + "--max-num-seqs", + "256", "--max-num-batched-tokens", f"{max_batch_tokens}", "--gpu-memory-utilization", @@ -188,6 +194,17 @@ def deployment_yaml( dtype.value, ] + # Add threadpool arguments if enabled + if threadpool == 1: + vllm_serve_args.extend( + [ + "--renderer-num-workers", + str(renderer_num_workers), + "--mm-processor-cache-gb", + "0", + ] + ) + if enforce_eager: vllm_serve_args.append("--enforce-eager") if skip_tokenizer_init: diff --git a/plugins/actuators/vllm_performance/yamls/test_geospatial_threadpool.yaml b/plugins/actuators/vllm_performance/yamls/test_geospatial_threadpool.yaml new file mode 100644 index 000000000..22d955656 --- /dev/null +++ b/plugins/actuators/vllm_performance/yamls/test_geospatial_threadpool.yaml @@ -0,0 +1,29 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT +# Test discoveryspace for geospatial experiments with threadpool properties +entitySpace: + - identifier: model + propertyDomain: + values: + - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11 + - identifier: endpoint + propertyDomain: + values: + - http://localhost:8000 + - identifier: request_rate + propertyDomain: + values: [10] + - identifier: threadpool + propertyDomain: + values: [0, 1] + - identifier: renderer_num_workers + propertyDomain: + values: [16, 32, 64] +experiments: + - actuatorIdentifier: vllm_performance + experimentIdentifier: test-geospatial-endpoint-v1 +metadata: + description: Test space for geospatial experiments with threadpool configuration + name: geospatial_threadpool_test + +# Made with Bob From 4ffc667c3b2fa402b77fbd456f286a10f760f416 Mon Sep 17 00:00:00 2001 From: Michele Gazzetti Date: Wed, 3 Jun 2026 11:44:23 +0100 Subject: [PATCH 2/7] feat(vllm_performance): add logs Signed-off-by: Michele Gazzetti --- .../vllm_performance/experiment_executor.py | 42 +++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index 4086bfc1e..27896c355 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -67,12 +67,25 @@ def _get_vllm_version_from_metadata( Returns: Version string if found in metadata, None otherwise """ + logger.debug( + f"_get_vllm_version_from_metadata called for experiment={experiment.identifier}, " + f"optionalProperties={experiment.optionalProperties}," + f"requiredProperties={experiment.requiredProperties}" + ) + # Look for image property in experiment's optional or required properties for prop in experiment.optionalProperties + experiment.requiredProperties: - if prop.identifier == "image" and prop.metadata: - vllm_version_map = prop.metadata.get("vllm_version", {}) - if isinstance(vllm_version_map, dict): - return vllm_version_map.get(image_name) + if prop.identifier == "image": + logger.debug(f"Found image property with metadata: {prop.metadata}") + if prop.metadata: + vllm_version_map = prop.metadata.get("vllm_version", {}) + logger.debug(f"vllm_version_map: {vllm_version_map}") + if isinstance(vllm_version_map, dict): + version = vllm_version_map.get(image_name) + logger.debug(f"Version lookup for {image_name}: {version}") + return version + + logger.debug(f"No vLLM version found in metadata for image {image_name}") return None @@ -95,12 +108,19 @@ def _should_enable_threadpool( Returns: True if threadpool should be enabled, False otherwise """ + logger.debug( + f"_should_enable_threadpool called with: image_name={image_name}, " + f"threadpool_value={threadpool_value}, experiment_id={experiment.identifier}" + ) + # If user explicitly disabled, respect that if threadpool_value == 0: + logger.debug("Threadpool explicitly disabled by user (threadpool_value=0)") return False # Get version from metadata vllm_version_str = _get_vllm_version_from_metadata(experiment, image_name) + logger.debug(f"Retrieved vLLM version from metadata: {vllm_version_str}") # If no version metadata, assume it's supported (backward compatible) if vllm_version_str is None: @@ -114,6 +134,9 @@ def _should_enable_threadpool( try: vllm_ver = version.parse(vllm_version_str) min_version = version.parse("0.20.0") + logger.debug( + f"Parsed versions - vLLM: {vllm_ver}, minimum required: {min_version}" + ) if vllm_ver < min_version: logger.info( @@ -130,6 +153,7 @@ def _should_enable_threadpool( except Exception as e: logger.error( f"Failed to parse vLLM version '{vllm_version_str}' for image {image_name}: {e}. " + "Assuming threadpool is supported." ) return True @@ -278,12 +302,22 @@ def _create_environment( # Determine if threadpool should be enabled based on version image_name = values.get("image", "") threadpool_requested = int(values.get("threadpool", 1)) + logger.debug( + f"Before _should_enable_threadpool: image_name={image_name}, " + f"threadpool_requested={threadpool_requested}" + ) enable_threadpool = _should_enable_threadpool( experiment, image_name, threadpool_requested ) + logger.debug( + f"After _should_enable_threadpool: enable_threadpool={enable_threadpool}" + ) # Convert boolean back to int for consistency with existing code threadpool_value = 1 if enable_threadpool else 0 + logger.debug( + f"Final threadpool_value to be used: {threadpool_value}" + ) create_test_environment( k8s_name=env.k8s_name, From 1f4e2abc1d6c8f5533257a637f7f5b9859ecda4f Mon Sep 17 00:00:00 2001 From: Michele Gazzetti Date: Wed, 3 Jun 2026 13:37:46 +0100 Subject: [PATCH 3/7] fix(vllm_performance): add vllm version ref Signed-off-by: Michele Gazzetti --- .../experiments/performance_testing_geospatial.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index 07f67482c..407a77e42 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -148,6 +148,7 @@ performance_testing-geospatial-full: metadata: description: "(deployment) Docker image to use to create vllm deployments" vllm_version: + "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5": "0.18.0" "your/image/with/vllm/and/terratorch:0.1": "0.20.0" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" From c2676765bd984627e0fc7f91ee043e56fa6c86b3 Mon Sep 17 00:00:00 2001 From: Michele Gazzetti Date: Thu, 4 Jun 2026 06:57:43 +0100 Subject: [PATCH 4/7] feat(vllm): add threadpool experiment properties for geospatial deployments allow PropertyValue to store dict values and make grouped sampling hash dict/list values safely in _build_point_group_values() add threadpool and renderer_num_workers support through the vLLM actuator deployment pipeline and geospatial test YAMLs in experiment_executor.py, build_components.py, and related config files include tests and implementation notes in test_experiment_executor.py and ado-threadpool-threadpool-property-implementation.md Signed-off-by: Michele Gazzetti --- .../core/discoveryspace/group_samplers.py | 17 ++- orchestrator/schema/property_value.py | 10 +- .../vllm_performance/experiment_executor.py | 89 ++++++++------- .../experiments/performance_testing.yaml | 8 +- .../performance_testing_geospatial.yaml | 21 ++-- .../tests/test_experiment_executor.py | 64 +++++++++++ tests/core/test_group_samplers.py | 106 ++++++++++++++++++ tests/schema/test_property_value.py | 14 ++- 8 files changed, 259 insertions(+), 70 deletions(-) create mode 100644 plugins/actuators/vllm_performance/tests/test_experiment_executor.py diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index 051c64352..05028f48e 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -55,9 +55,24 @@ def _build_point_group_values( ) -> frozenset[tuple[str, Any]]: """ :return: A frozen set of (key,value) pairs + + Note: Converts unhashable values (dict, list) to hashable representations """ - return frozenset({(k, v) for k, v in point.items() if k in group}) + def make_hashable( + value: float | list | str | dict | None, + ) -> int | float | tuple | str | None: + """Convert unhashable types to hashable equivalents""" + if isinstance(value, dict): + # Convert dict to sorted tuple of items + return tuple(sorted(value.items())) + if isinstance(value, list): + # Convert list to tuple + return tuple(value) + # Return value as-is if already hashable + return value # type: ignore[return-value] + + return frozenset({(k, make_hashable(v)) for k, v in point.items() if k in group}) def _build_groups_dict( diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py index 038085bb2..529facc12 100644 --- a/orchestrator/schema/property_value.py +++ b/orchestrator/schema/property_value.py @@ -59,7 +59,7 @@ class PropertyValue(pydantic.BaseModel): ), ] = None value: Annotated[ - int | float | list | str | CustomBytes | None, + int | float | list | str | CustomBytes | dict | None, pydantic.Field(description="The measured value."), ] property: Annotated[ @@ -86,9 +86,9 @@ def convert_property_to_descriptor( @pydantic.field_validator("value") def check_value_type( cls, - value: float | list | str | CustomBytes | None, + value: float | list | str | CustomBytes | dict | None, context: pydantic.ValidationInfo, - ) -> int | float | list | str | CustomBytes | None: + ) -> int | float | list | str | CustomBytes | dict | None: valueType = context.data.get("valueType") if valueType: @@ -111,7 +111,7 @@ def check_value_type( if type(value) not in {float, int} and value is not None: raise ValueError("Validation failed for NUMERIC_VALUE_TYPE") elif valueType == ValueTypeEnum.STRING_VALUE_TYPE: - if not isinstance(value, str): + if not isinstance(value, (str, dict)): raise ValueError( f"ValueType was string but Value was of type {type(value)}" ) @@ -155,6 +155,8 @@ def set_value_type(self) -> "PropertyValue": self.valueType = ValueTypeEnum.BLOB_VALUE_TYPE elif isinstance(self.value, list): self.valueType = ValueTypeEnum.VECTOR_VALUE_TYPE + elif isinstance(self.value, dict): + self.valueType = ValueTypeEnum.STRING_VALUE_TYPE elif self.valueType == ValueTypeEnum.NUMERIC_VALUE_TYPE and isinstance( self.value, str ): diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index 27896c355..386aa4abc 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -54,63 +54,49 @@ logger = logging.getLogger(__name__) -def _get_vllm_version_from_metadata( - experiment: Experiment, image_name: str -) -> str | None: +def _get_vllm_version_from_image_value(image_value: dict | str) -> str | None: """ - Extract vLLM version from experiment metadata for a given image. + Extract vLLM version from image property value. Args: - experiment: The experiment object containing metadata - image_name: The image name to look up version for + image_value: The image property value, either a dict with 'image' and 'vllm_version' keys, + or a string (for backward compatibility) Returns: - Version string if found in metadata, None otherwise + Version string if found in dict, None otherwise """ - logger.debug( - f"_get_vllm_version_from_metadata called for experiment={experiment.identifier}, " - f"optionalProperties={experiment.optionalProperties}," - f"requiredProperties={experiment.requiredProperties}" - ) + logger.debug(f"_get_vllm_version_from_image_value called with: {image_value}") + + # If image_value is a dict with vllm_version, extract it + if isinstance(image_value, dict): + version = image_value.get("vllm_version") + logger.debug(f"Extracted vLLM version from dict: {version}") + return version - # Look for image property in experiment's optional or required properties - for prop in experiment.optionalProperties + experiment.requiredProperties: - if prop.identifier == "image": - logger.debug(f"Found image property with metadata: {prop.metadata}") - if prop.metadata: - vllm_version_map = prop.metadata.get("vllm_version", {}) - logger.debug(f"vllm_version_map: {vllm_version_map}") - if isinstance(vllm_version_map, dict): - version = vllm_version_map.get(image_name) - logger.debug(f"Version lookup for {image_name}: {version}") - return version - - logger.debug(f"No vLLM version found in metadata for image {image_name}") + # For backward compatibility: if it's a string, we don't have version info + logger.debug("Image value is a string, no version info available") return None -def _should_enable_threadpool( - experiment: Experiment, image_name: str, threadpool_value: int -) -> bool: +def _should_enable_threadpool(image_value: dict | str, threadpool_value: int) -> bool: """ Determine if threadpool should be enabled based on vLLM version and user preference. Threadpool is only supported in vLLM >= 0.20.0. This function checks: 1. If user explicitly disabled threadpool (threadpool=0), return False - 2. If vLLM version metadata exists and version < 0.20.0, return False + 2. If vLLM version exists in image_value dict and version < 0.20.0, return False 3. Otherwise, return True (user wants it and version supports it or no version info) Args: - experiment: The experiment object containing metadata - image_name: The image name to check version for + image_value: The image property value (dict with 'image' and 'vllm_version' or string) threadpool_value: User's threadpool preference (0 or 1) Returns: True if threadpool should be enabled, False otherwise """ logger.debug( - f"_should_enable_threadpool called with: image_name={image_name}, " - f"threadpool_value={threadpool_value}, experiment_id={experiment.identifier}" + f"_should_enable_threadpool called with: image_value={image_value}, " + f"threadpool_value={threadpool_value}" ) # If user explicitly disabled, respect that @@ -118,14 +104,14 @@ def _should_enable_threadpool( logger.debug("Threadpool explicitly disabled by user (threadpool_value=0)") return False - # Get version from metadata - vllm_version_str = _get_vllm_version_from_metadata(experiment, image_name) - logger.debug(f"Retrieved vLLM version from metadata: {vllm_version_str}") + # Get version from image value + vllm_version_str = _get_vllm_version_from_image_value(image_value) + logger.debug(f"Retrieved vLLM version: {vllm_version_str}") - # If no version metadata, assume it's supported (backward compatible) + # If no version info, assume it's supported (backward compatible) if vllm_version_str is None: logger.warning( - f"No vLLM version metadata found for image {image_name}. " + f"No vLLM version info found for image {image_value}. " "Assuming threadpool is supported." ) return True @@ -141,18 +127,18 @@ def _should_enable_threadpool( if vllm_ver < min_version: logger.info( f"Threadpool disabled: vLLM version {vllm_version_str} < 0.20.0 " - f"for image {image_name}" + f"for image {image_value}" ) return False logger.info( f"Threadpool enabled: vLLM version {vllm_version_str} >= 0.20.0 " - f"for image {image_name}" + f"for image {image_value}" ) return True except Exception as e: logger.error( - f"Failed to parse vLLM version '{vllm_version_str}' for image {image_name}: {e}. " + f"Failed to parse vLLM version '{vllm_version_str}' for image {image_value}: {e}. " "Assuming threadpool is supported." ) return True @@ -176,9 +162,16 @@ def _build_entity_env(values: dict[str, str]) -> str: :param values: experiment values :return: definition """ + # Extract image string from dict if needed + image_value = values.get("image") + if isinstance(image_value, dict): + image_str = image_value.get("image") + else: + image_str = image_value + env_values = { "model": values.get("model"), - "image": values.get("image"), + "image": image_str, "n_gpus": values.get("n_gpus"), "gpu_type": values.get("gpu_type"), "n_cpus": values.get("n_cpus"), @@ -300,14 +293,14 @@ def _create_environment( ) try: # Determine if threadpool should be enabled based on version - image_name = values.get("image", "") + image_value = values.get("image", "") threadpool_requested = int(values.get("threadpool", 1)) logger.debug( - f"Before _should_enable_threadpool: image_name={image_name}, " + f"Before _should_enable_threadpool: image_value={image_value}, " f"threadpool_requested={threadpool_requested}" ) enable_threadpool = _should_enable_threadpool( - experiment, image_name, threadpool_requested + image_value, threadpool_requested ) logger.debug( f"After _should_enable_threadpool: enable_threadpool={enable_threadpool}" @@ -319,6 +312,12 @@ def _create_environment( f"Final threadpool_value to be used: {threadpool_value}" ) + # Extract image string from dict if needed + if isinstance(image_value, dict): + image_name = image_value.get("image", "") + else: + image_name = image_value + create_test_environment( k8s_name=env.k8s_name, model=model, diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml index e94a1060a..d32d07e1a 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml @@ -60,7 +60,9 @@ test-deployment-v1: description: "(deployment) Docker image to use to create vllm deployments" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: ["vllm/vllm-openai:v0.14.0"] + values: + - {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"} + - {image: "vllm/vllm-openai:latest", vllm_version: "0.21.0"} - identifier: n_cpus metadata: description: "(deployment) the number of CPUs to use" @@ -128,7 +130,7 @@ test-deployment-v1: defaultParameterization: - property: identifier: 'image' - value: "vllm/vllm-openai:v0.14.0" + value: {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"} - property: identifier: n_cpus value: 8 @@ -461,7 +463,7 @@ test-deployment-guidellm-v1: defaultParameterization: - property: identifier: 'image' - value: "vllm/vllm-openai:v0.14.0" + value: {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"} - property: identifier: n_cpus value: 8 diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index 407a77e42..0320e59ec 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -147,12 +147,11 @@ performance_testing-geospatial-full: - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" - vllm_version: - "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5": "0.18.0" - "your/image/with/vllm/and/terratorch:0.1": "0.20.0" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: ["your/image/with/vllm/and/terratorch:0.1"] + values: + - {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"} + - {image: "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", vllm_version: "0.18.0"} - identifier: n_cpus metadata: description: "(deployment) the number of CPUs to use" @@ -245,7 +244,7 @@ performance_testing-geospatial-full: defaultParameterization: - property: identifier: 'image' - value: "your/image/with/vllm/and/terratorch:0.1" + value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"} - property: identifier: n_cpus value: 8 @@ -362,8 +361,6 @@ performance_testing-geospatial-full-custom-dataset: - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" - vllm_version: - "your/image/with/vllm/and/terratorch:0.1": "0.20.0" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: ["your/image/with/vllm/and/terratorch:0.1"] @@ -459,7 +456,7 @@ performance_testing-geospatial-full-custom-dataset: defaultParameterization: - property: identifier: 'image' - value: "your/image/with/vllm/and/terratorch:0.1" + value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"} - property: identifier: n_cpus value: 8 @@ -761,8 +758,6 @@ performance_testing-geospatial-full-guidellm: - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" - vllm_version: - "your/image/with/vllm/and/terratorch:0.1": "0.20.0" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: ["your/image/with/vllm/and/terratorch:0.1"] @@ -858,7 +853,7 @@ performance_testing-geospatial-full-guidellm: defaultParameterization: - property: identifier: 'image' - value: "your/image/with/vllm/and/terratorch:0.1" + value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"} - property: identifier: n_cpus value: 8 @@ -973,8 +968,6 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset: - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" - vllm_version: - "your/image/with/vllm/and/terratorch:0.1": "0.20.0" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: ["your/image/with/vllm/and/terratorch:0.1"] @@ -1072,7 +1065,7 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset: defaultParameterization: - property: identifier: 'image' - value: "your/image/with/vllm/and/terratorch:0.1" + value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"} - property: identifier: n_cpus value: 8 diff --git a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py new file mode 100644 index 000000000..645aff82a --- /dev/null +++ b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py @@ -0,0 +1,64 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +""" +Unit tests for experiment_executor module functions. +Tests version extraction from image property values. +""" + +from ado_actuators.vllm_performance.experiment_executor import ( + _get_vllm_version_from_image_value, +) + + +class TestGetVllmVersionFromImageValue: + """Test suite for _get_vllm_version_from_image_value function""" + + def test_version_extraction_from_dict_value(self) -> None: + """Test extracting vLLM version from dict image value""" + image_value = { + "image": "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", + "vllm_version": "0.18.0", + } + + version = _get_vllm_version_from_image_value(image_value) + assert version == "0.18.0" + + def test_version_extraction_from_another_dict_value(self) -> None: + """Test extracting vLLM version from another dict image value""" + image_value = { + "image": "vllm/vllm-openai:v0.14.0", + "vllm_version": "0.14.0", + } + + version = _get_vllm_version_from_image_value(image_value) + assert version == "0.14.0" + + def test_version_extraction_returns_none_for_string_value(self) -> None: + """Test that None is returned when image value is a string (backward compatibility)""" + image_value = "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5" + + version = _get_vllm_version_from_image_value(image_value) + assert version is None + + def test_version_extraction_returns_none_for_dict_without_version(self) -> None: + """Test that None is returned when dict doesn't have vllm_version key""" + image_value = { + "image": "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", + } + + version = _get_vllm_version_from_image_value(image_value) + assert version is None + + def test_version_extraction_with_latest_tag(self) -> None: + """Test extracting version for latest tag""" + image_value = { + "image": "vllm/vllm-openai:latest", + "vllm_version": "0.21.0", + } + + version = _get_vllm_version_from_image_value(image_value) + assert version == "0.21.0" + + +# Made with Bob diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py index 2e1204fbd..1de76c494 100644 --- a/tests/core/test_group_samplers.py +++ b/tests/core/test_group_samplers.py @@ -10,6 +10,7 @@ RandomGroupSampleSelector, SequentialGroupSampleSelector, _build_groups_dict, + _build_point_group_values, _get_space_matching_points, ) from orchestrator.core.discoveryspace.samplers import ( @@ -312,6 +313,111 @@ async def test_group_sampler_sequential_remote( ), "Expected for selectors that the number of entities iterated is equal to number matching entities in source" +def test_build_point_group_values_with_unhashable_types() -> None: + """Test that _build_point_group_values handles dict and list values correctly.""" + + # Test with dictionary values (like the image property in the geospatial case) + point_with_dict = { + "model": "test-model", + "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"}, + "n_gpus": 1, + "memory": "128Gi", + } + + group = ["model", "image", "n_gpus"] + + # This should not raise TypeError: unhashable type: 'dict' + result = _build_point_group_values(point=point_with_dict, group=group) + + # Verify the result is a frozenset + assert isinstance(result, frozenset) + + # Verify the dict was converted to a tuple of sorted items + assert ("model", "test-model") in result + assert ("n_gpus", 1) in result + + # The dict should be converted to a tuple of sorted items + image_tuple = tuple( + sorted({"image": "icr.io/test:v1", "vllm_version": "0.18.0"}.items()) + ) + assert ("image", image_tuple) in result + + # Test with list values + point_with_list = { + "model": "test-model", + "tags": ["tag1", "tag2", "tag3"], + "n_gpus": 1, + } + + group_with_list = ["model", "tags"] + result_with_list = _build_point_group_values( + point=point_with_list, group=group_with_list + ) + + assert isinstance(result_with_list, frozenset) + assert ("model", "test-model") in result_with_list + # The list should be converted to a tuple + assert ("tags", ("tag1", "tag2", "tag3")) in result_with_list + + # Test that the same dict values produce the same hash + point_with_dict2 = { + "model": "test-model", + "image": { + "vllm_version": "0.18.0", + "image": "icr.io/test:v1", + }, # Different order + "n_gpus": 1, + "memory": "128Gi", + } + + result2 = _build_point_group_values(point=point_with_dict2, group=group) + + # Should be equal because dict items are sorted + assert result == result2 + + +def test_build_groups_dict_with_unhashable_values() -> None: + """Test that _build_groups_dict correctly groups points with dict values.""" + + points = [ + { + "model": "model-a", + "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"}, + "n_gpus": 1, + }, + { + "model": "model-a", + "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"}, + "n_gpus": 2, + }, + { + "model": "model-a", + "image": {"image": "icr.io/test:v2", "vllm_version": "0.20.1"}, + "n_gpus": 1, + }, + ] + + group = ["model", "image"] + + # This should not raise TypeError + groups = _build_groups_dict(points=points, group=group) + + # Should have 2 groups (model-a with v1 image, and model-a with v2 image) + assert len(groups) == 2 + + # Each group should contain the correct points + for group_key, group_points in groups.items(): + if ( + "image", + tuple( + sorted({"image": "icr.io/test:v1", "vllm_version": "0.18.0"}.items()) + ), + ) in group_key: + assert len(group_points) == 2 # Two points with v1 image + else: + assert len(group_points) == 1 # One point with v2 image + + @pytest.mark.asyncio async def test_group_sample_generator_fail_on_continuous_space() -> None: diff --git a/tests/schema/test_property_value.py b/tests/schema/test_property_value.py index b8654a62e..34e547d8e 100644 --- a/tests/schema/test_property_value.py +++ b/tests/schema/test_property_value.py @@ -24,10 +24,11 @@ def python_type_value_examples() -> dict[type, tuple[ValueTypeEnum, typing.Any]] str: (ValueTypeEnum.STRING_VALUE_TYPE, "string"), list: (ValueTypeEnum.VECTOR_VALUE_TYPE, [0, "a", 10]), bytes: (ValueTypeEnum.BLOB_VALUE_TYPE, b"PNG\r89\n\x1a\n\x00\x00"), + dict: (ValueTypeEnum.STRING_VALUE_TYPE, {"key": "value", "number": 42}), } -@pytest.fixture(params=[int, float, str, bytes, list, type(None)]) +@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)]) def value_example( python_type_value_examples: dict[type, tuple[ValueTypeEnum, typing.Any]], request: pytest.FixtureRequest, @@ -36,7 +37,7 @@ def value_example( return python_type_value_examples[request.param] -@pytest.fixture(params=[int, float, str, bytes, list, type(None)]) +@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)]) def test_value_example( python_type_value_examples: dict[type, tuple[ValueTypeEnum, typing.Any]], request: pytest.FixtureRequest, @@ -45,7 +46,7 @@ def test_value_example( return python_type_value_examples[request.param] -@pytest.fixture(params=[int, float, str, bytes, list, type(None)]) +@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)]) def property_value( request: pytest.FixtureRequest, ) -> tuple[ConstitutivePropertyValue, type]: @@ -68,6 +69,10 @@ def property_value( ) elif request.param is list: val = ConstitutivePropertyValue(value=[0, "a", 10], property=prop.descriptor()) + elif request.param is dict: + val = ConstitutivePropertyValue( + value={"key": "value", "number": 42}, property=prop.descriptor() + ) elif request.param is type(None): val = ConstitutivePropertyValue(value=None, property=prop.descriptor()) else: @@ -209,6 +214,9 @@ def test_type_detection(property_value: tuple[PropertyValue, type]) -> None: assert val.valueType == ValueTypeEnum.VECTOR_VALUE_TYPE elif value_type is bytes: assert val.valueType == ValueTypeEnum.BLOB_VALUE_TYPE + elif value_type is dict: + # Dict values are treated as STRING_VALUE_TYPE (they're serialized as strings) + assert val.valueType == ValueTypeEnum.STRING_VALUE_TYPE elif value_type is type(None): # Treating None as a Numeric type currently assert val.valueType == ValueTypeEnum.NUMERIC_VALUE_TYPE From b08a3a422958a1e8d69772f9534aea88a7f6f1ac Mon Sep 17 00:00:00 2001 From: Michele Gazzetti Date: Thu, 4 Jun 2026 17:43:53 +0100 Subject: [PATCH 5/7] refactor: migrate image property from dict to list format Replace dict-based image property representation with list-based format for better compatibility with PropertyValue schema. Changes: - Remove dict support from PropertyValue schema (property_value.py) - Update vLLM actuator to use list format [image_url, vllm_version] - Migrate experiment YAML files to list-based image values - Update all related tests to reflect list-based approach - Remove obsolete dict-based group sampler tests The new format uses a simple list where: - First element: image URL string - Second element (optional): vLLM version string This simplifies the schema and maintains backward compatibility with string-only image values. --- orchestrator/schema/property_value.py | 10 +- .../vllm_performance/experiment_executor.py | 37 +++--- .../experiments/performance_testing.yaml | 8 +- .../performance_testing_geospatial.yaml | 12 +- .../tests/test_experiment_executor.py | 42 +++---- tests/core/test_group_samplers.py | 106 ------------------ tests/schema/test_property_value.py | 14 +-- 7 files changed, 58 insertions(+), 171 deletions(-) diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py index 529facc12..038085bb2 100644 --- a/orchestrator/schema/property_value.py +++ b/orchestrator/schema/property_value.py @@ -59,7 +59,7 @@ class PropertyValue(pydantic.BaseModel): ), ] = None value: Annotated[ - int | float | list | str | CustomBytes | dict | None, + int | float | list | str | CustomBytes | None, pydantic.Field(description="The measured value."), ] property: Annotated[ @@ -86,9 +86,9 @@ def convert_property_to_descriptor( @pydantic.field_validator("value") def check_value_type( cls, - value: float | list | str | CustomBytes | dict | None, + value: float | list | str | CustomBytes | None, context: pydantic.ValidationInfo, - ) -> int | float | list | str | CustomBytes | dict | None: + ) -> int | float | list | str | CustomBytes | None: valueType = context.data.get("valueType") if valueType: @@ -111,7 +111,7 @@ def check_value_type( if type(value) not in {float, int} and value is not None: raise ValueError("Validation failed for NUMERIC_VALUE_TYPE") elif valueType == ValueTypeEnum.STRING_VALUE_TYPE: - if not isinstance(value, (str, dict)): + if not isinstance(value, str): raise ValueError( f"ValueType was string but Value was of type {type(value)}" ) @@ -155,8 +155,6 @@ def set_value_type(self) -> "PropertyValue": self.valueType = ValueTypeEnum.BLOB_VALUE_TYPE elif isinstance(self.value, list): self.valueType = ValueTypeEnum.VECTOR_VALUE_TYPE - elif isinstance(self.value, dict): - self.valueType = ValueTypeEnum.STRING_VALUE_TYPE elif self.valueType == ValueTypeEnum.NUMERIC_VALUE_TYPE and isinstance( self.value, str ): diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index 386aa4abc..3b53f2ffc 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -54,41 +54,44 @@ logger = logging.getLogger(__name__) -def _get_vllm_version_from_image_value(image_value: dict | str) -> str | None: +def _get_vllm_version_from_image_value(image_value: list | str) -> str | None: """ Extract vLLM version from image property value. Args: - image_value: The image property value, either a dict with 'image' and 'vllm_version' keys, + image_value: The image property value, either a list [image_url, vllm_version], or a string (for backward compatibility) Returns: - Version string if found in dict, None otherwise + Version string if found in list, None otherwise """ logger.debug(f"_get_vllm_version_from_image_value called with: {image_value}") - # If image_value is a dict with vllm_version, extract it - if isinstance(image_value, dict): - version = image_value.get("vllm_version") - logger.debug(f"Extracted vLLM version from dict: {version}") - return version + # If image_value is a list with vllm_version, extract it + if isinstance(image_value, list): + if len(image_value) > 1: + version = image_value[1] + logger.debug(f"Extracted vLLM version from list: {version}") + return version + logger.debug("List has only one element, no version info available") + return None # For backward compatibility: if it's a string, we don't have version info logger.debug("Image value is a string, no version info available") return None -def _should_enable_threadpool(image_value: dict | str, threadpool_value: int) -> bool: +def _should_enable_threadpool(image_value: list | str, threadpool_value: int) -> bool: """ Determine if threadpool should be enabled based on vLLM version and user preference. Threadpool is only supported in vLLM >= 0.20.0. This function checks: 1. If user explicitly disabled threadpool (threadpool=0), return False - 2. If vLLM version exists in image_value dict and version < 0.20.0, return False + 2. If vLLM version exists in image_value list and version < 0.20.0, return False 3. Otherwise, return True (user wants it and version supports it or no version info) Args: - image_value: The image property value (dict with 'image' and 'vllm_version' or string) + image_value: The image property value (list [image_url, vllm_version] or string) threadpool_value: User's threadpool preference (0 or 1) Returns: @@ -162,10 +165,10 @@ def _build_entity_env(values: dict[str, str]) -> str: :param values: experiment values :return: definition """ - # Extract image string from dict if needed + # Extract image string from list if needed image_value = values.get("image") - if isinstance(image_value, dict): - image_str = image_value.get("image") + if isinstance(image_value, list): + image_str = image_value[0] if len(image_value) > 0 else image_value else: image_str = image_value @@ -312,9 +315,9 @@ def _create_environment( f"Final threadpool_value to be used: {threadpool_value}" ) - # Extract image string from dict if needed - if isinstance(image_value, dict): - image_name = image_value.get("image", "") + # Extract image string from list if needed + if isinstance(image_value, list): + image_name = image_value[0] if len(image_value) > 0 else "" else: image_name = image_value diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml index d32d07e1a..9da7d30b5 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml @@ -61,8 +61,8 @@ test-deployment-v1: propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: - - {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"} - - {image: "vllm/vllm-openai:latest", vllm_version: "0.21.0"} + - ["vllm/vllm-openai:v0.14.0", "0.14.0"] + - ["vllm/vllm-openai:latest", "0.21.0"] - identifier: n_cpus metadata: description: "(deployment) the number of CPUs to use" @@ -130,7 +130,7 @@ test-deployment-v1: defaultParameterization: - property: identifier: 'image' - value: {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"} + value: ["vllm/vllm-openai:v0.14.0", "0.14.0"] - property: identifier: n_cpus value: 8 @@ -463,7 +463,7 @@ test-deployment-guidellm-v1: defaultParameterization: - property: identifier: 'image' - value: {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"} + value: ["vllm/vllm-openai:v0.14.0", "0.14.0"] - property: identifier: n_cpus value: 8 diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index 0320e59ec..6fece3e73 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -150,8 +150,8 @@ performance_testing-geospatial-full: propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: - - {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"} - - {image: "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", vllm_version: "0.18.0"} + - ["your/image/with/vllm/and/terratorch:0.1", "0.20.0"] + - ["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", "0.18.0"] - identifier: n_cpus metadata: description: "(deployment) the number of CPUs to use" @@ -244,7 +244,7 @@ performance_testing-geospatial-full: defaultParameterization: - property: identifier: 'image' - value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"} + value: ["your/image/with/vllm/and/terratorch:0.1", "0.20.0"] - property: identifier: n_cpus value: 8 @@ -456,7 +456,7 @@ performance_testing-geospatial-full-custom-dataset: defaultParameterization: - property: identifier: 'image' - value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"} + value: ["your/image/with/vllm/and/terratorch:0.1", "0.20.0"] - property: identifier: n_cpus value: 8 @@ -853,7 +853,7 @@ performance_testing-geospatial-full-guidellm: defaultParameterization: - property: identifier: 'image' - value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"} + value: ["your/image/with/vllm/and/terratorch:0.1", "0.20.0"] - property: identifier: n_cpus value: 8 @@ -1065,7 +1065,7 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset: defaultParameterization: - property: identifier: 'image' - value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"} + value: ["your/image/with/vllm/and/terratorch:0.1", "0.20.0"] - property: identifier: n_cpus value: 8 diff --git a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py index 645aff82a..483928da5 100644 --- a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py +++ b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py @@ -14,22 +14,22 @@ class TestGetVllmVersionFromImageValue: """Test suite for _get_vllm_version_from_image_value function""" - def test_version_extraction_from_dict_value(self) -> None: - """Test extracting vLLM version from dict image value""" - image_value = { - "image": "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", - "vllm_version": "0.18.0", - } + def test_version_extraction_from_list_value(self) -> None: + """Test extracting vLLM version from list image value""" + image_value = [ + "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", + "0.18.0", + ] version = _get_vllm_version_from_image_value(image_value) assert version == "0.18.0" - def test_version_extraction_from_another_dict_value(self) -> None: - """Test extracting vLLM version from another dict image value""" - image_value = { - "image": "vllm/vllm-openai:v0.14.0", - "vllm_version": "0.14.0", - } + def test_version_extraction_from_another_list_value(self) -> None: + """Test extracting vLLM version from another list image value""" + image_value = [ + "vllm/vllm-openai:v0.14.0", + "0.14.0", + ] version = _get_vllm_version_from_image_value(image_value) assert version == "0.14.0" @@ -41,21 +41,21 @@ def test_version_extraction_returns_none_for_string_value(self) -> None: version = _get_vllm_version_from_image_value(image_value) assert version is None - def test_version_extraction_returns_none_for_dict_without_version(self) -> None: - """Test that None is returned when dict doesn't have vllm_version key""" - image_value = { - "image": "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", - } + def test_version_extraction_returns_none_for_list_without_version(self) -> None: + """Test that None is returned when list has only one element (no version)""" + image_value = [ + "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", + ] version = _get_vllm_version_from_image_value(image_value) assert version is None def test_version_extraction_with_latest_tag(self) -> None: """Test extracting version for latest tag""" - image_value = { - "image": "vllm/vllm-openai:latest", - "vllm_version": "0.21.0", - } + image_value = [ + "vllm/vllm-openai:latest", + "0.21.0", + ] version = _get_vllm_version_from_image_value(image_value) assert version == "0.21.0" diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py index 1de76c494..2e1204fbd 100644 --- a/tests/core/test_group_samplers.py +++ b/tests/core/test_group_samplers.py @@ -10,7 +10,6 @@ RandomGroupSampleSelector, SequentialGroupSampleSelector, _build_groups_dict, - _build_point_group_values, _get_space_matching_points, ) from orchestrator.core.discoveryspace.samplers import ( @@ -313,111 +312,6 @@ async def test_group_sampler_sequential_remote( ), "Expected for selectors that the number of entities iterated is equal to number matching entities in source" -def test_build_point_group_values_with_unhashable_types() -> None: - """Test that _build_point_group_values handles dict and list values correctly.""" - - # Test with dictionary values (like the image property in the geospatial case) - point_with_dict = { - "model": "test-model", - "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"}, - "n_gpus": 1, - "memory": "128Gi", - } - - group = ["model", "image", "n_gpus"] - - # This should not raise TypeError: unhashable type: 'dict' - result = _build_point_group_values(point=point_with_dict, group=group) - - # Verify the result is a frozenset - assert isinstance(result, frozenset) - - # Verify the dict was converted to a tuple of sorted items - assert ("model", "test-model") in result - assert ("n_gpus", 1) in result - - # The dict should be converted to a tuple of sorted items - image_tuple = tuple( - sorted({"image": "icr.io/test:v1", "vllm_version": "0.18.0"}.items()) - ) - assert ("image", image_tuple) in result - - # Test with list values - point_with_list = { - "model": "test-model", - "tags": ["tag1", "tag2", "tag3"], - "n_gpus": 1, - } - - group_with_list = ["model", "tags"] - result_with_list = _build_point_group_values( - point=point_with_list, group=group_with_list - ) - - assert isinstance(result_with_list, frozenset) - assert ("model", "test-model") in result_with_list - # The list should be converted to a tuple - assert ("tags", ("tag1", "tag2", "tag3")) in result_with_list - - # Test that the same dict values produce the same hash - point_with_dict2 = { - "model": "test-model", - "image": { - "vllm_version": "0.18.0", - "image": "icr.io/test:v1", - }, # Different order - "n_gpus": 1, - "memory": "128Gi", - } - - result2 = _build_point_group_values(point=point_with_dict2, group=group) - - # Should be equal because dict items are sorted - assert result == result2 - - -def test_build_groups_dict_with_unhashable_values() -> None: - """Test that _build_groups_dict correctly groups points with dict values.""" - - points = [ - { - "model": "model-a", - "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"}, - "n_gpus": 1, - }, - { - "model": "model-a", - "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"}, - "n_gpus": 2, - }, - { - "model": "model-a", - "image": {"image": "icr.io/test:v2", "vllm_version": "0.20.1"}, - "n_gpus": 1, - }, - ] - - group = ["model", "image"] - - # This should not raise TypeError - groups = _build_groups_dict(points=points, group=group) - - # Should have 2 groups (model-a with v1 image, and model-a with v2 image) - assert len(groups) == 2 - - # Each group should contain the correct points - for group_key, group_points in groups.items(): - if ( - "image", - tuple( - sorted({"image": "icr.io/test:v1", "vllm_version": "0.18.0"}.items()) - ), - ) in group_key: - assert len(group_points) == 2 # Two points with v1 image - else: - assert len(group_points) == 1 # One point with v2 image - - @pytest.mark.asyncio async def test_group_sample_generator_fail_on_continuous_space() -> None: diff --git a/tests/schema/test_property_value.py b/tests/schema/test_property_value.py index 34e547d8e..b8654a62e 100644 --- a/tests/schema/test_property_value.py +++ b/tests/schema/test_property_value.py @@ -24,11 +24,10 @@ def python_type_value_examples() -> dict[type, tuple[ValueTypeEnum, typing.Any]] str: (ValueTypeEnum.STRING_VALUE_TYPE, "string"), list: (ValueTypeEnum.VECTOR_VALUE_TYPE, [0, "a", 10]), bytes: (ValueTypeEnum.BLOB_VALUE_TYPE, b"PNG\r89\n\x1a\n\x00\x00"), - dict: (ValueTypeEnum.STRING_VALUE_TYPE, {"key": "value", "number": 42}), } -@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)]) +@pytest.fixture(params=[int, float, str, bytes, list, type(None)]) def value_example( python_type_value_examples: dict[type, tuple[ValueTypeEnum, typing.Any]], request: pytest.FixtureRequest, @@ -37,7 +36,7 @@ def value_example( return python_type_value_examples[request.param] -@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)]) +@pytest.fixture(params=[int, float, str, bytes, list, type(None)]) def test_value_example( python_type_value_examples: dict[type, tuple[ValueTypeEnum, typing.Any]], request: pytest.FixtureRequest, @@ -46,7 +45,7 @@ def test_value_example( return python_type_value_examples[request.param] -@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)]) +@pytest.fixture(params=[int, float, str, bytes, list, type(None)]) def property_value( request: pytest.FixtureRequest, ) -> tuple[ConstitutivePropertyValue, type]: @@ -69,10 +68,6 @@ def property_value( ) elif request.param is list: val = ConstitutivePropertyValue(value=[0, "a", 10], property=prop.descriptor()) - elif request.param is dict: - val = ConstitutivePropertyValue( - value={"key": "value", "number": 42}, property=prop.descriptor() - ) elif request.param is type(None): val = ConstitutivePropertyValue(value=None, property=prop.descriptor()) else: @@ -214,9 +209,6 @@ def test_type_detection(property_value: tuple[PropertyValue, type]) -> None: assert val.valueType == ValueTypeEnum.VECTOR_VALUE_TYPE elif value_type is bytes: assert val.valueType == ValueTypeEnum.BLOB_VALUE_TYPE - elif value_type is dict: - # Dict values are treated as STRING_VALUE_TYPE (they're serialized as strings) - assert val.valueType == ValueTypeEnum.STRING_VALUE_TYPE elif value_type is type(None): # Treating None as a Numeric type currently assert val.valueType == ValueTypeEnum.NUMERIC_VALUE_TYPE From ddfaee6a7fd198b98adaf49c24148834df9db4f7 Mon Sep 17 00:00:00 2001 From: Michele Gazzetti Date: Fri, 5 Jun 2026 15:51:04 +0100 Subject: [PATCH 6/7] Fix vLLM actuator cache to include benchmark parameters - Add composite cache key combining environment and benchmark params - Move cache to EnvironmentManager actor for persistence across batches - Ensure cache hits only occur for identical env + benchmark config - Add comprehensive tests for cache key generation - Fixes issue where measurements were incorrectly reused Signed-off-by: Michele Gazzetti --- .../vllm_performance/env_manager.py | 33 ++ .../vllm_performance/experiment_executor.py | 115 ++++- .../tests/test_experiment_executor.py | 423 ++++++++++++++++++ 3 files changed, 562 insertions(+), 9 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py index e3242ce22..c79212f5f 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py @@ -134,6 +134,10 @@ def __init__( self.verify_ssl = verify_ssl self.otlp_traces_endpoint = otlp_traces_endpoint + # Measurement cache for reusing results across entities with identical + # environment and benchmark parameters + self.measurement_cache: dict[str, dict] = {} + # component manager for cleanup self.manager = ComponentsManager( namespace=self.namespace, @@ -144,6 +148,35 @@ def __init__( pvc_template=pvc_template, ) + def get_cached_measurement(self, cache_key: str) -> dict | None: + """ + Get a cached measurement result for the given cache key. + + Args: + cache_key: Composite key containing environment and benchmark parameters + + Returns: + Cached measurement dict with 'measurements' and 'error' keys, or None if not cached + """ + return self.measurement_cache.get(cache_key) + + def cache_measurement( + self, cache_key: str, measurements: list, error: str | None + ) -> None: + """ + Cache a measurement result for reuse by subsequent entities with identical parameters. + + Args: + cache_key: Composite key containing environment and benchmark parameters + measurements: List of measured property values + error: Error message if measurement failed, None otherwise + """ + self.measurement_cache[cache_key] = { + "measurements": measurements, + "error": error, + } + logger.debug(f"Cached measurement for key: {cache_key}") + def _delete_environment_k8s_resources(self, k8s_name: str) -> None: """ Deletes a deployment. Intended to be used for cleanup or error recovery diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index 3b53f2ffc..9f447d97c 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -172,6 +172,27 @@ def _build_entity_env(values: dict[str, str]) -> str: else: image_str = image_value + # Determine effective threadpool and renderer_num_workers values + threadpool_requested = int(values.get("threadpool", 1)) + renderer_num_workers_requested = int(values.get("renderer_num_workers", 32)) + + # Check if threadpool will actually be enabled based on version + # Use empty string as fallback if image_value is None + enable_threadpool = _should_enable_threadpool( + image_value if image_value is not None else "", threadpool_requested + ) + + # Normalize values for environment definition: + # - If threadpool is disabled (version < 0.20.0 or user disabled it), + # set both to 0 so different renderer_num_workers values don't create + # different environments when they would behave identically + if enable_threadpool: + threadpool_value = 1 + renderer_num_workers_value = renderer_num_workers_requested + else: + threadpool_value = 0 + renderer_num_workers_value = 0 # Normalize to 0 when not used + env_values = { "model": values.get("model"), "image": image_str, @@ -184,12 +205,61 @@ def _build_entity_env(values: dict[str, str]) -> str: "dtype": values.get("dtype"), "cpu_offload": values.get("cpu_offload"), "max_num_seq": values.get("max_num_seq"), - "threadpool": values.get("threadpool", 1), - "renderer_num_workers": values.get("renderer_num_workers", 32), + "threadpool": threadpool_value, + "renderer_num_workers": renderer_num_workers_value, } return json.dumps(env_values) +def _build_benchmark_params_key(values: dict[str, str]) -> str: + """ + Build a cache key from benchmark parameters that affect measurement results. + + These parameters define the workload characteristics and must be included + in the cache key to ensure measurements are only reused for identical tests. + + Args: + values: experiment values + + Returns: + JSON string of benchmark parameters + """ + benchmark_params = { + "num_prompts": values.get("num_prompts"), + "request_rate": values.get("request_rate"), + "max_concurrency": values.get("max_concurrency"), + "number_input_tokens": values.get("number_input_tokens"), + "max_output_tokens": values.get("max_output_tokens"), + "burstiness": values.get("burstiness"), + "dataset": values.get("dataset"), + } + return json.dumps(benchmark_params, sort_keys=True) + + +def _build_cache_key(values: dict[str, str]) -> str: + """ + Build a composite cache key from both environment and benchmark parameters. + + Cache hits should only occur when both the deployment environment AND + the benchmark workload parameters are identical. + + Args: + values: experiment values + + Returns: + composite cache key as JSON string + """ + env_key = _build_entity_env(values) + benchmark_key = _build_benchmark_params_key(values) + + # Combine both keys into a single cache key + composite = { + "environment": json.loads(env_key), + "benchmark": json.loads(benchmark_key), + } + return json.dumps(composite, sort_keys=True) + + def _create_environment( values: dict[str, str], actuator: VLLMPerformanceTestParameters, @@ -523,6 +593,30 @@ def run_resource_and_workload_experiment( try: values = experiment.propertyValuesFromEntity(entity=entity) + # Check if we've already measured an entity with the same environment and benchmark parameters + # Cache key includes both environment (model, GPUs, etc.) and benchmark params (num_prompts, request_rate, etc.) + cache_key = _build_cache_key(values) + logger.info("cache_key: %s", cache_key) + + # Check actor's cache for this measurement + cached_result = ray.get( + env_manager.get_cached_measurement.remote(cache_key) + ) + if cached_result is not None: + logger.info( + f"Reusing cached measurement for entity {entity.identifier} " + f"(identical environment and benchmark parameters)" + ) + measurements.append( + create_measurement_result( + identifier=entity.identifier, + measurements=cached_result["measurements"], + error=cached_result["error"], + reference=request.experimentReference, + ) + ) + continue + logger.info(f"Creating K8s environment for {entity.identifier}") # Will raise an K8sEnvironmentCreationError if the environment could not be created @@ -653,14 +747,17 @@ def run_resource_and_workload_experiment( ) else: measured_values = result.to_observed_property_values(experiment=experiment) - measurements.append( - create_measurement_result( - identifier=entity.identifier, - measurements=measured_values, - error=None, - reference=request.experimentReference, - ) + measurement_result = create_measurement_result( + identifier=entity.identifier, + measurements=measured_values, + error=None, + reference=request.experimentReference, ) + measurements.append(measurement_result) + + # Cache the measurement in the actor for potential reuse by subsequent entities + # with the same environment and benchmark parameters + env_manager.cache_measurement.remote(cache_key, measured_values, None) finally: if started_benchmarking: console.put.remote( diff --git a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py index 483928da5..2216c2e63 100644 --- a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py +++ b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py @@ -6,7 +6,12 @@ Tests version extraction from image property values. """ +import json + from ado_actuators.vllm_performance.experiment_executor import ( + _build_benchmark_params_key, + _build_cache_key, + _build_entity_env, _get_vllm_version_from_image_value, ) @@ -61,4 +66,422 @@ def test_version_extraction_with_latest_tag(self) -> None: assert version == "0.21.0" +class TestBuildEntityEnv: + """Test suite for _build_entity_env function""" + + def test_renderer_num_workers_normalized_when_vllm_version_less_than_0_20_0( + self, + ) -> None: + """Test that renderer_num_workers is normalized to 0 when vLLM < 0.20.0""" + # Test with vLLM 0.18.0 (< 0.20.0) + values = { + "model": "test-model", + "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"], + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + "renderer_num_workers": "64", # Should be normalized to 0 + } + + result = _build_entity_env(values) + result_dict = json.loads(result) + + # Both threadpool and renderer_num_workers should be 0 + assert result_dict["threadpool"] == 0 + assert result_dict["renderer_num_workers"] == 0 + + def test_renderer_num_workers_preserved_when_vllm_version_greater_than_0_20_0( + self, + ) -> None: + """Test that renderer_num_workers is preserved when vLLM >= 0.20.0""" + # Test with vLLM 0.21.0 (>= 0.20.0) + values = { + "model": "test-model", + "image": ["icr.io/test/vllm:v0.21.0", "0.21.0"], + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + "renderer_num_workers": "64", + } + + result = _build_entity_env(values) + result_dict = json.loads(result) + + # Both should be preserved + assert result_dict["threadpool"] == 1 + assert result_dict["renderer_num_workers"] == 64 + + def test_renderer_num_workers_normalized_when_threadpool_disabled_by_user( + self, + ) -> None: + """Test that renderer_num_workers is normalized to 0 when user disables threadpool""" + # Test with vLLM 0.21.0 but threadpool=0 + values = { + "model": "test-model", + "image": ["icr.io/test/vllm:v0.21.0", "0.21.0"], + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "0", # User explicitly disabled + "renderer_num_workers": "64", # Should be normalized to 0 + } + + result = _build_entity_env(values) + result_dict = json.loads(result) + + # Both should be 0 + assert result_dict["threadpool"] == 0 + assert result_dict["renderer_num_workers"] == 0 + + def test_different_renderer_num_workers_same_env_when_vllm_less_than_0_20_0( + self, + ) -> None: + """Test that different renderer_num_workers values produce same env when vLLM < 0.20.0""" + base_values = { + "model": "test-model", + "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"], + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + } + + # Test with different renderer_num_workers values + values_32 = {**base_values, "renderer_num_workers": "32"} + values_64 = {**base_values, "renderer_num_workers": "64"} + values_128 = {**base_values, "renderer_num_workers": "128"} + + env_32 = _build_entity_env(values_32) + env_64 = _build_entity_env(values_64) + env_128 = _build_entity_env(values_128) + + # All should produce the same environment definition + assert env_32 == env_64 == env_128 + + # Verify they all have renderer_num_workers=0 + result_dict = json.loads(env_32) + assert result_dict["renderer_num_workers"] == 0 + + def test_backward_compatibility_with_string_image(self) -> None: + """Test backward compatibility when image is a string (no version info)""" + values = { + "model": "test-model", + "image": "icr.io/test/vllm:v0.18.0", # String, no version info + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + "renderer_num_workers": "64", + } + + result = _build_entity_env(values) + result_dict = json.loads(result) + + # Should assume threadpool is supported (backward compatible) + assert result_dict["threadpool"] == 1 + assert result_dict["renderer_num_workers"] == 64 + + +class TestBuildBenchmarkParamsKey: + """Test suite for _build_benchmark_params_key function""" + + def test_includes_all_benchmark_parameters(self) -> None: + """Test that all benchmark parameters are included in the key""" + values = { + "num_prompts": "100", + "request_rate": "10", + "max_concurrency": "5", + "number_input_tokens": "50", + "max_output_tokens": "100", + "burstiness": "1.0", + "dataset": "random", + } + + result = _build_benchmark_params_key(values) + result_dict = json.loads(result) + + assert result_dict["num_prompts"] == "100" + assert result_dict["request_rate"] == "10" + assert result_dict["max_concurrency"] == "5" + assert result_dict["number_input_tokens"] == "50" + assert result_dict["max_output_tokens"] == "100" + assert result_dict["burstiness"] == "1.0" + assert result_dict["dataset"] == "random" + + def test_handles_missing_values(self) -> None: + """Test that missing values are handled as None""" + values = { + "num_prompts": "100", + # Other parameters missing + } + + result = _build_benchmark_params_key(values) + result_dict = json.loads(result) + + assert result_dict["num_prompts"] == "100" + assert result_dict["request_rate"] is None + assert result_dict["max_concurrency"] is None + assert result_dict["dataset"] is None + + def test_consistent_output_with_sorted_keys(self) -> None: + """Test that output is consistent (keys are sorted)""" + values = { + "dataset": "random", + "num_prompts": "100", + "request_rate": "10", + } + + result1 = _build_benchmark_params_key(values) + result2 = _build_benchmark_params_key(values) + + # Should produce identical output + assert result1 == result2 + + # Verify keys are sorted in JSON + result_dict = json.loads(result1) + keys = list(result_dict.keys()) + assert keys == sorted(keys) + + +class TestBuildCacheKey: + """Test suite for _build_cache_key function""" + + def test_combines_environment_and_benchmark_params(self) -> None: + """Test that cache key includes both environment and benchmark parameters""" + values = { + # Environment params + "model": "test-model", + "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"], + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + "renderer_num_workers": "32", + # Benchmark params + "num_prompts": "200", + "request_rate": "32", + "dataset": "random", + } + + result = _build_cache_key(values) + result_dict = json.loads(result) + + # Should have both environment and benchmark sections + assert "environment" in result_dict + assert "benchmark" in result_dict + + # Check environment section + env = result_dict["environment"] + assert env["model"] == "test-model" + assert env["n_gpus"] == "1" + + # Check benchmark section + benchmark = result_dict["benchmark"] + assert benchmark["num_prompts"] == "200" + assert benchmark["request_rate"] == "32" + assert benchmark["dataset"] == "random" + + def test_different_benchmark_params_produce_different_keys(self) -> None: + """Test that different benchmark parameters produce different cache keys""" + base_values = { + "model": "test-model", + "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"], + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + "renderer_num_workers": "32", + } + + # Same environment, different num_prompts + values1 = {**base_values, "num_prompts": "100", "request_rate": "32"} + values2 = {**base_values, "num_prompts": "200", "request_rate": "32"} + + key1 = _build_cache_key(values1) + key2 = _build_cache_key(values2) + + # Different benchmark params should produce different keys + assert key1 != key2 + + def test_same_params_produce_same_key(self) -> None: + """Test that identical parameters produce identical cache keys""" + values = { + "model": "test-model", + "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"], + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + "renderer_num_workers": "32", + "num_prompts": "200", + "request_rate": "32", + "dataset": "random", + } + + key1 = _build_cache_key(values) + key2 = _build_cache_key(values) + + # Identical params should produce identical keys + assert key1 == key2 + + def test_cache_key_differentiates_on_request_rate(self) -> None: + """Test that different request_rate values produce different cache keys""" + base_values = { + "model": "test-model", + "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"], + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + "renderer_num_workers": "32", + "num_prompts": "200", + "dataset": "random", + } + + # Same everything except request_rate + values_rate_32 = {**base_values, "request_rate": "32"} + values_rate_64 = {**base_values, "request_rate": "64"} + + key_32 = _build_cache_key(values_rate_32) + key_64 = _build_cache_key(values_rate_64) + + # Different request rates should produce different keys + assert key_32 != key_64 + + def test_vllm_0_18_same_cache_key_for_different_renderer_num_workers(self) -> None: + """ + Test that for vLLM 0.18.0, different renderer_num_workers values produce + the SAME cache key (because threadpool is not supported and normalized to 0) + """ + base_values = { + "model": "test-model", + "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"], + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + "num_prompts": "200", + "request_rate": "32", + "dataset": "random", + } + + # Different renderer_num_workers values + values_32 = {**base_values, "renderer_num_workers": "32"} + values_64 = {**base_values, "renderer_num_workers": "64"} + + key_32 = _build_cache_key(values_32) + key_64 = _build_cache_key(values_64) + + # For vLLM 0.18.0, both should produce the same key + # because renderer_num_workers is normalized to 0 in the environment + assert key_32 == key_64 + + # Verify the environment section has renderer_num_workers=0 + result_dict = json.loads(key_32) + assert result_dict["environment"]["renderer_num_workers"] == 0 + + def test_vllm_0_20_different_cache_key_for_different_renderer_num_workers( + self, + ) -> None: + """ + Test that for vLLM 0.20.1, different renderer_num_workers values produce + DIFFERENT cache keys (because threadpool is supported) + """ + base_values = { + "model": "test-model", + "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"], + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + "num_prompts": "200", + "request_rate": "32", + "dataset": "random", + } + + # Different renderer_num_workers values + values_32 = {**base_values, "renderer_num_workers": "32"} + values_64 = {**base_values, "renderer_num_workers": "64"} + + key_32 = _build_cache_key(values_32) + key_64 = _build_cache_key(values_64) + + # For vLLM 0.20.1, should produce different keys + assert key_32 != key_64 + + # Verify the environment sections have different renderer_num_workers + result_dict_32 = json.loads(key_32) + result_dict_64 = json.loads(key_64) + assert result_dict_32["environment"]["renderer_num_workers"] == 32 + assert result_dict_64["environment"]["renderer_num_workers"] == 64 + + # Made with Bob From cd6f3502ddc03d5e6f385855489facbb67bd3fac Mon Sep 17 00:00:00 2001 From: Michele Gazzetti Date: Fri, 5 Jun 2026 16:20:53 +0100 Subject: [PATCH 7/7] refactor: improve code readability and reduce duplication - Create VLLMVersionChecker and CacheKeyBuilder utility classes - Add CachedMeasurement dataclass for structured cache storage - Refactor make_hashable to use pattern matching - Add pytest fixtures and parametrize tests - Remove excessive comments and simplify docstrings - All 49 tests passing Signed-off-by: Michele Gazzetti --- discoveryspace_9e79e7.yaml | 402 +++++++++++++ discoveryspace_ca6479.yaml | 402 +++++++++++++ operation_entities.csv | 9 + operation_entities_623871.csv | 9 + operation_random_walk.yaml | 56 ++ operation_random_walk_623871.yaml | 56 ++ operation_results.csv | 9 + operation_results_623871.csv | 9 + .../core/discoveryspace/group_samplers.py | 15 +- .../vllm_performance/cache_utils.py | 120 ++++ .../vllm_performance/env_manager.py | 39 +- .../vllm_performance/experiment_executor.py | 348 +---------- .../vllm_performance/version_utils.py | 41 ++ .../tests/test_cache_utils.py | 144 +++++ .../tests/test_experiment_executor.py | 561 ++++++------------ .../tests/test_version_utils.py | 59 ++ pod_sample.yaml | 262 ++++++++ rhaiis_deployment.yaml | 81 +++ ...do-threadpool-list-based-image-property.md | 201 +++++++ .../plans/ado-threadpool-refactoring-plan.md | 248 ++++++++ ...pool-threadpool-property-implementation.md | 296 +++++++++ ...dpool-vllm-cache-implementation-summary.md | 141 +++++ .../ado-threadpool-vllm-cache-improvement.md | 275 +++++++++ 23 files changed, 3023 insertions(+), 760 deletions(-) create mode 100644 discoveryspace_9e79e7.yaml create mode 100644 discoveryspace_ca6479.yaml create mode 100644 operation_entities.csv create mode 100644 operation_entities_623871.csv create mode 100644 operation_random_walk.yaml create mode 100644 operation_random_walk_623871.yaml create mode 100644 operation_results.csv create mode 100644 operation_results_623871.csv create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py create mode 100644 plugins/actuators/vllm_performance/tests/test_cache_utils.py create mode 100644 plugins/actuators/vllm_performance/tests/test_version_utils.py create mode 100644 pod_sample.yaml create mode 100644 rhaiis_deployment.yaml create mode 100644 ~/workspace/plans/ado-threadpool-list-based-image-property.md create mode 100644 ~/workspace/plans/ado-threadpool-refactoring-plan.md create mode 100644 ~/workspace/plans/ado-threadpool-threadpool-property-implementation.md create mode 100644 ~/workspace/plans/ado-threadpool-vllm-cache-implementation-summary.md create mode 100644 ~/workspace/plans/ado-threadpool-vllm-cache-improvement.md diff --git a/discoveryspace_9e79e7.yaml b/discoveryspace_9e79e7.yaml new file mode 100644 index 000000000..029bd2836 --- /dev/null +++ b/discoveryspace_9e79e7.yaml @@ -0,0 +1,402 @@ +config: + entitySpace: + - identifier: model + propertyDomain: + values: + - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: request_rate + propertyDomain: + values: + - 32 + - 64 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: image + metadata: + description: Docker image with vLLM + terratorch + propertyDomain: + values: + - - icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5 + - 0.18.0 + - - icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main + - 0.20.1 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: n_gpus + propertyDomain: + values: + - 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: gpu_type + propertyDomain: + values: + - NVIDIA-A100-80GB-PCIe + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: max_num_seq + propertyDomain: + values: + - 256 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: max_batch_tokens + propertyDomain: + values: + - 16384 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: n_cpus + propertyDomain: + values: + - 48 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: memory + propertyDomain: + values: + - 128Gi + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: gpu_memory_utilization + propertyDomain: + values: + - 0.9 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: dtype + propertyDomain: + values: + - float16 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: num_prompts + propertyDomain: + values: + - 200 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: dataset + propertyDomain: + values: + - dataset_url_input_india_incluster.jsonl + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: skip_tokenizer_init + propertyDomain: + values: + - 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: enforce_eager + propertyDomain: + values: + - 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: io_processor_plugin + propertyDomain: + values: + - terratorch_segmentation + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: threadpool + propertyDomain: + values: + - 1 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: renderer_num_workers + propertyDomain: + values: + - 32 + - 64 + variableType: DISCRETE_VARIABLE_TYPE + experiments: + experiments: + - actuatorIdentifier: vllm_performance + defaultParameterization: + - property: + identifier: image + value: + - your/image/with/vllm/and/terratorch:0.1 + - 0.20.0 + valueType: VECTOR_VALUE_TYPE + - property: + identifier: n_cpus + value: 8 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: memory + value: 128Gi + valueType: STRING_VALUE_TYPE + - property: + identifier: dtype + value: auto + valueType: STRING_VALUE_TYPE + - property: + identifier: num_prompts + value: 500 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: max_concurrency + value: -1 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: burstiness + value: 1.0 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: gpu_memory_utilization + value: 0.9 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: cpu_offload + value: 0 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: max_num_seq + value: 256 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: max_batch_tokens + value: 16384 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: n_gpus + value: 1 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: gpu_type + value: NVIDIA-A100-80GB-PCIe + valueType: STRING_VALUE_TYPE + - property: + identifier: skip_tokenizer_init + value: 1 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: enforce_eager + value: 1 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: io_processor_plugin + value: terratorch_segmentation + valueType: STRING_VALUE_TYPE + - property: + identifier: threadpool + value: 1 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: renderer_num_workers + value: 32 + valueType: NUMERIC_VALUE_TYPE + identifier: test-geospatial-deployment-custom-dataset-v1 + metadata: + description: VLLM performance testing across compute resource and workload + configuration + optionalProperties: + - identifier: num_prompts + metadata: + description: (benchmark) The number of prompts to send (total number of + requests) + propertyDomain: + domainRange: + - 1 + - 10001 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: max_concurrency + metadata: + description: (benchmark) The maximum number of concurrent requests to send + propertyDomain: + domainRange: + - -1 + - 500 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: burstiness + metadata: + description: (benchmark) The burstiness of the requests - 1.0 is a Poisson + distribution with rate = request_rate. Others are gamma distributions + with lambda = request_rate and shape = burstiness. + propertyDomain: + domainRange: + - 0 + - 10 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: image + metadata: + description: (deployment) Docker image to use to create vllm deployments + propertyDomain: + values: + - your/image/with/vllm/and/terratorch:0.1 + variableType: OPEN_CATEGORICAL_VARIABLE_TYPE + - identifier: n_cpus + metadata: + description: (deployment) the number of CPUs to use + propertyDomain: + domainRange: + - 1 + - 256 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: memory + metadata: + description: (deployment) the amount of memory to allocate to vLLM pod + propertyDomain: + values: + - 64Gi + - 128Gi + - 256Gi + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: dtype + metadata: + description: "(deployment) data type for model weights and activations.\ + \ \u201Cauto\u201D will use FP16 precision for FP32 and FP16 models, and\ + \ BF16 precision for BF16 models." + propertyDomain: + values: + - auto + - half + - float16 + - bfloat16 + - float + - float32 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: gpu_memory_utilization + metadata: + description: (deployment) The fraction of GPU memory to be used for the + model executor, + propertyDomain: + values: + - 0.5 + - 0.75 + - 0.9 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: cpu_offload + metadata: + description: (deployment) The amount of model weights in GB to offload to + the CPU per GPU. 0 means all weights are on GPU, + propertyDomain: + values: + - 0 + - 8 + - 16 + - 24 + - 32 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: max_num_seq + metadata: + description: (deployment) Maximum number of sequences per iteration + propertyDomain: + domainRange: + - 32 + - 2049 + interval: 32 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: max_batch_tokens + metadata: + description: (deployment) maximum number of batched tokens per iteration + propertyDomain: + domainRange: + - 256 + - 32769 + interval: 256 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: n_gpus + metadata: + description: (deployment) Number of GPUs to use + propertyDomain: + domainRange: + - 1 + - 9 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: gpu_type + metadata: + description: (deployment) The GPU type to use + propertyDomain: + values: + - NVIDIA-A100-80GB-PCIe + - NVIDIA-A100-SXM4-80GB + - NVIDIA-H100-PCIe + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: skip_tokenizer_init + metadata: + description: (deployment) skip tokenizer initialization + propertyDomain: + values: + - 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: enforce_eager + metadata: + description: (deployment) enforce PyTorch eager mode + propertyDomain: + values: + - 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: io_processor_plugin + metadata: + description: IO Processor plugin to load for the model + propertyDomain: + values: + - terratorch_segmentation + variableType: OPEN_CATEGORICAL_VARIABLE_TYPE + - identifier: threadpool + metadata: + description: Enable threadpool for vLLM renderer (0=disabled, 1=enabled). + When enabled, uses multiple workers for rendering. + propertyDomain: + values: + - 0 + - 1 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: renderer_num_workers + metadata: + description: Number of renderer workers when threadpool is enabled. Only + used when threadpool=1. + propertyDomain: + domainRange: + - 1 + - 128 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + requiredProperties: + - identifier: model + metadata: + description: model to use for testing. Assumed to be served by all endpoints + tested. Required to obtain correct tokenizer for benchmarking metrics + calculation + propertyDomain: + values: + - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11 + variableType: OPEN_CATEGORICAL_VARIABLE_TYPE + - identifier: request_rate + metadata: + description: (benchmark) The number of requests to send per second + propertyDomain: + domainRange: + - -1 + - 1000 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: dataset + metadata: + description: (benchmark) The dataset to be used for the experiment + propertyDomain: + values: + - custom_dataset.jsonl + variableType: OPEN_CATEGORICAL_VARIABLE_TYPE + targetProperties: + - identifier: duration + - identifier: completed + - identifier: request_throughput + - identifier: mean_e2el_ms + - identifier: median_e2el_ms + - identifier: std_e2el_ms + - identifier: p25_e2el_ms + - identifier: p50_e2el_ms + - identifier: p75_e2el_ms + - identifier: p99_e2el_ms + metadata: + description: Test threadpool functionality with vLLM v0.20.1 and v0.18.0 images + labels: + model_type: geospatial + task: flood_detection + test_type: threadpool_version_comparison + name: Geospatial Terramind Model Performance Testing - Threadpool Version Test + sampleStoreIdentifier: a2760d +created: '2026-06-05T09:48:44.390740Z' +identifier: space-9e79e7-a2760d +status: +- event: created + recorded_at: '2026-06-05T09:48:44.390751Z' +- event: added + recorded_at: '2026-06-05T09:48:44.795200Z' + diff --git a/discoveryspace_ca6479.yaml b/discoveryspace_ca6479.yaml new file mode 100644 index 000000000..999ba87ec --- /dev/null +++ b/discoveryspace_ca6479.yaml @@ -0,0 +1,402 @@ +config: + entitySpace: + - identifier: model + propertyDomain: + values: + - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: request_rate + propertyDomain: + values: + - 32 + - 64 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: image + metadata: + description: Docker image with vLLM + terratorch + propertyDomain: + values: + - - icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5 + - 0.18.0 + - - icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main + - 0.20.1 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: n_gpus + propertyDomain: + values: + - 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: gpu_type + propertyDomain: + values: + - NVIDIA-A100-80GB-PCIe + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: max_num_seq + propertyDomain: + values: + - 256 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: max_batch_tokens + propertyDomain: + values: + - 16384 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: n_cpus + propertyDomain: + values: + - 48 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: memory + propertyDomain: + values: + - 128Gi + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: gpu_memory_utilization + propertyDomain: + values: + - 0.9 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: dtype + propertyDomain: + values: + - float16 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: num_prompts + propertyDomain: + values: + - 2000 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: dataset + propertyDomain: + values: + - dataset_url_input_india_incluster.jsonl + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: skip_tokenizer_init + propertyDomain: + values: + - 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: enforce_eager + propertyDomain: + values: + - 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: io_processor_plugin + propertyDomain: + values: + - terratorch_segmentation + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: threadpool + propertyDomain: + values: + - 1 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: renderer_num_workers + propertyDomain: + values: + - 32 + - 64 + variableType: DISCRETE_VARIABLE_TYPE + experiments: + experiments: + - actuatorIdentifier: vllm_performance + defaultParameterization: + - property: + identifier: image + value: + - your/image/with/vllm/and/terratorch:0.1 + - 0.20.0 + valueType: VECTOR_VALUE_TYPE + - property: + identifier: n_cpus + value: 8 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: memory + value: 128Gi + valueType: STRING_VALUE_TYPE + - property: + identifier: dtype + value: auto + valueType: STRING_VALUE_TYPE + - property: + identifier: num_prompts + value: 500 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: max_concurrency + value: -1 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: burstiness + value: 1.0 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: gpu_memory_utilization + value: 0.9 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: cpu_offload + value: 0 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: max_num_seq + value: 256 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: max_batch_tokens + value: 16384 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: n_gpus + value: 1 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: gpu_type + value: NVIDIA-A100-80GB-PCIe + valueType: STRING_VALUE_TYPE + - property: + identifier: skip_tokenizer_init + value: 1 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: enforce_eager + value: 1 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: io_processor_plugin + value: terratorch_segmentation + valueType: STRING_VALUE_TYPE + - property: + identifier: threadpool + value: 1 + valueType: NUMERIC_VALUE_TYPE + - property: + identifier: renderer_num_workers + value: 32 + valueType: NUMERIC_VALUE_TYPE + identifier: test-geospatial-deployment-custom-dataset-v1 + metadata: + description: VLLM performance testing across compute resource and workload + configuration + optionalProperties: + - identifier: num_prompts + metadata: + description: (benchmark) The number of prompts to send (total number of + requests) + propertyDomain: + domainRange: + - 1 + - 10001 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: max_concurrency + metadata: + description: (benchmark) The maximum number of concurrent requests to send + propertyDomain: + domainRange: + - -1 + - 500 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: burstiness + metadata: + description: (benchmark) The burstiness of the requests - 1.0 is a Poisson + distribution with rate = request_rate. Others are gamma distributions + with lambda = request_rate and shape = burstiness. + propertyDomain: + domainRange: + - 0 + - 10 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: image + metadata: + description: (deployment) Docker image to use to create vllm deployments + propertyDomain: + values: + - your/image/with/vllm/and/terratorch:0.1 + variableType: OPEN_CATEGORICAL_VARIABLE_TYPE + - identifier: n_cpus + metadata: + description: (deployment) the number of CPUs to use + propertyDomain: + domainRange: + - 1 + - 256 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: memory + metadata: + description: (deployment) the amount of memory to allocate to vLLM pod + propertyDomain: + values: + - 64Gi + - 128Gi + - 256Gi + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: dtype + metadata: + description: "(deployment) data type for model weights and activations.\ + \ \u201Cauto\u201D will use FP16 precision for FP32 and FP16 models, and\ + \ BF16 precision for BF16 models." + propertyDomain: + values: + - auto + - half + - float16 + - bfloat16 + - float + - float32 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: gpu_memory_utilization + metadata: + description: (deployment) The fraction of GPU memory to be used for the + model executor, + propertyDomain: + values: + - 0.5 + - 0.75 + - 0.9 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: cpu_offload + metadata: + description: (deployment) The amount of model weights in GB to offload to + the CPU per GPU. 0 means all weights are on GPU, + propertyDomain: + values: + - 0 + - 8 + - 16 + - 24 + - 32 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: max_num_seq + metadata: + description: (deployment) Maximum number of sequences per iteration + propertyDomain: + domainRange: + - 32 + - 2049 + interval: 32 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: max_batch_tokens + metadata: + description: (deployment) maximum number of batched tokens per iteration + propertyDomain: + domainRange: + - 256 + - 32769 + interval: 256 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: n_gpus + metadata: + description: (deployment) Number of GPUs to use + propertyDomain: + domainRange: + - 1 + - 9 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: gpu_type + metadata: + description: (deployment) The GPU type to use + propertyDomain: + values: + - NVIDIA-A100-80GB-PCIe + - NVIDIA-A100-SXM4-80GB + - NVIDIA-H100-PCIe + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: skip_tokenizer_init + metadata: + description: (deployment) skip tokenizer initialization + propertyDomain: + values: + - 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: enforce_eager + metadata: + description: (deployment) enforce PyTorch eager mode + propertyDomain: + values: + - 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: io_processor_plugin + metadata: + description: IO Processor plugin to load for the model + propertyDomain: + values: + - terratorch_segmentation + variableType: OPEN_CATEGORICAL_VARIABLE_TYPE + - identifier: threadpool + metadata: + description: Enable threadpool for vLLM renderer (0=disabled, 1=enabled). + When enabled, uses multiple workers for rendering. + propertyDomain: + values: + - 0 + - 1 + variableType: CATEGORICAL_VARIABLE_TYPE + - identifier: renderer_num_workers + metadata: + description: Number of renderer workers when threadpool is enabled. Only + used when threadpool=1. + propertyDomain: + domainRange: + - 1 + - 128 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + requiredProperties: + - identifier: model + metadata: + description: model to use for testing. Assumed to be served by all endpoints + tested. Required to obtain correct tokenizer for benchmarking metrics + calculation + propertyDomain: + values: + - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11 + variableType: OPEN_CATEGORICAL_VARIABLE_TYPE + - identifier: request_rate + metadata: + description: (benchmark) The number of requests to send per second + propertyDomain: + domainRange: + - -1 + - 1000 + interval: 1 + variableType: DISCRETE_VARIABLE_TYPE + - identifier: dataset + metadata: + description: (benchmark) The dataset to be used for the experiment + propertyDomain: + values: + - custom_dataset.jsonl + variableType: OPEN_CATEGORICAL_VARIABLE_TYPE + targetProperties: + - identifier: duration + - identifier: completed + - identifier: request_throughput + - identifier: mean_e2el_ms + - identifier: median_e2el_ms + - identifier: std_e2el_ms + - identifier: p25_e2el_ms + - identifier: p50_e2el_ms + - identifier: p75_e2el_ms + - identifier: p99_e2el_ms + metadata: + description: Test threadpool functionality with vLLM v0.20.1 and v0.18.0 images + labels: + model_type: geospatial + task: flood_detection + test_type: threadpool_version_comparison + name: Geospatial Terramind Model Performance Testing - Threadpool Version Test + sampleStoreIdentifier: a2760d +created: '2026-06-05T08:43:34.231390Z' +identifier: space-ca6479-a2760d +status: +- event: created + recorded_at: '2026-06-05T08:43:34.231402Z' +- event: added + recorded_at: '2026-06-05T08:43:34.615893Z' + diff --git a/operation_entities.csv b/operation_entities.csv new file mode 100644 index 000000000..c868d7fde --- /dev/null +++ b/operation_entities.csv @@ -0,0 +1,9 @@ +request_index,result_index,identifier,experiment_id,dataset,dtype,enforce_eager,generatorid,gpu_memory_utilization,gpu_type,image,io_processor_plugin,max_batch_tokens,max_num_seq,memory,model,n_cpus,n_gpus,num_prompts,renderer_num_workers,request_rate,skip_tokenizer_init,threadpool,duration,completed,request_throughput,mean_e2el_ms,median_e2el_ms,std_e2el_ms,p25_e2el_ms,p50_e2el_ms,p75_e2el_ms,p99_e2el_ms,request_id,entity_index,valid +0,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,32,32,1,1,215.27072143740952,2000,9.290627107325902,80530.65929337498,92830.85599914192,49518.423878155205,20688.09394375421,92830.85599914192,122615.82531570455,151683.5461606551,260012,0,True +1,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,32,64,1,1,208.70084162708372,2000,9.58309503884844,91508.32508202083,94061.80999567732,52116.900894615734,51941.69757212512,94061.80999567732,135936.58734648488,175984.5853097178,6f069a,0,True +2,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,64,32,1,1,213.164786985144,2000,9.382412678410084,79639.53276119987,91883.17732699215,48888.65282522971,20816.216070204973,91883.17732699215,121425.36071920767,149769.97912688178,01d594,0,True +3,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,64,64,1,1,208.27105004619807,2000,9.602870872146491,90961.90372485157,93205.19294030964,52049.438963978195,52220.867299241945,93205.19294030964,135152.128781192,175817.94290206395,56e3d4,0,True +4,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,32,32,1,1,76.71821601688862,1430,18.639640938538015,7224.8957470541345,2276.4977612532675,7776.232630976363,1995.9888060111552,2276.4977612532675,14173.72929956764,23919.61157440208,2950ca,0,True +5,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,32,64,1,1,87.09100105147809,1545,17.740064775312153,27247.12148647828,32077.017480507493,19474.259797434173,2426.9744735211134,32077.017480507493,43850.73825716972,55731.31082225591,ea4dd5,0,True +6,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,64,32,1,1,69.94518145918846,1433,20.487472762310656,4799.1137330050515,2080.0580009818077,4893.778910783289,1845.9379142150285,2080.0580009818077,7618.101900443435,17009.875886179507,bcdd13,0,True +7,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,64,64,1,1,71.60710450261831,1492,20.835921384664353,20450.668643505865,27168.44614362344,15321.471204638105,1938.958921469748,27168.44614362344,34347.58948907256,40303.56870012358,203a07,0,True diff --git a/operation_entities_623871.csv b/operation_entities_623871.csv new file mode 100644 index 000000000..3f28d4125 --- /dev/null +++ b/operation_entities_623871.csv @@ -0,0 +1,9 @@ +request_index,result_index,identifier,experiment_id,dataset,dtype,enforce_eager,generatorid,gpu_memory_utilization,gpu_type,image,io_processor_plugin,max_batch_tokens,max_num_seq,memory,model,n_cpus,n_gpus,num_prompts,renderer_num_workers,request_rate,skip_tokenizer_init,threadpool,duration,completed,request_throughput,mean_e2el_ms,median_e2el_ms,std_e2el_ms,p25_e2el_ms,p50_e2el_ms,p75_e2el_ms,p99_e2el_ms,request_id,entity_index,valid +0,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,32,32,1,1,21.16098095756024,200,9.451357685218532,8323.152535227127,8751.465620007366,4447.680493290849,4647.437798324972,8751.465620007366,12455.881118308753,14942.587131289763,85f9af,0,True +1,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,32,64,1,1,20.831470943056047,200,9.600858266164249,9387.722638151608,9541.33604792878,5095.870009355212,5034.923402359709,9541.33604792878,13802.96064238064,17705.100336009637,963caa,0,True +2,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,64,32,1,1,21.051082043908536,200,9.500699279155256,8082.675415757112,7225.963302887976,4594.845149836468,4306.130911456421,7225.963302887976,12459.563876036556,14833.095950279385,774a6f,0,True +3,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,64,64,1,1,20.759232787415385,200,9.63426741479789,9383.305872818455,9475.987559650091,5158.763184013406,4642.030250979587,9475.987559650091,13991.68548709713,17492.737999986857,24617a,0,True +4,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,32,32,1,1,7.335592240095139,149,20.31192508023968,1311.595108735081,1347.61365596205,532.2226718124316,966.6640544310212,1347.61365596205,1623.8912288099527,2275.5631955340514,fa955b,0,True +5,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,32,64,1,1,7.772065187804401,157,20.200551102731076,2756.258463415845,3167.6963847130537,1368.5851351345912,1464.73484672606,3167.6963847130537,4070.4276766628022,4661.2075228616595,c7c1a6,0,True +6,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,64,32,1,1,6.895735959522426,161,23.3477617102889,837.0693416699119,866.3467029109597,227.43894353062015,715.9368423745036,866.3467029109597,984.7816163673996,1284.9709721282125,5637b4,0,True +7,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,64,64,1,1,5.922862338833511,155,26.169779260904917,1804.9909565357432,1586.8693506345153,894.8397404377881,1008.32075253129,1586.8693506345153,2751.6918242909014,2992.8823397308593,499691,0,True diff --git a/operation_random_walk.yaml b/operation_random_walk.yaml new file mode 100644 index 000000000..647efeb89 --- /dev/null +++ b/operation_random_walk.yaml @@ -0,0 +1,56 @@ +config: + actuatorConfigurationIdentifiers: + - actuatorconfiguration-vllm_performance-678e1bd6 + metadata: + description: Test threadpool functionality across vLLM versions (v0.20.1 with + threadpool, v0.18.0 without) + labels: + experiment_type: performance_testing + model_type: geospatial + test_type: threadpool_version_comparison + name: Geospatial Model Performance - Threadpool Version Test + operation: + module: + operationType: search + operatorName: random_walk + parameters: + numberEntities: all + samplerConfig: + grouping: + - model + - image + - memory + - max_batch_tokens + - max_num_seq + - n_gpus + - gpu_type + - n_cpus + - threadpool + - renderer_num_workers + mode: sequentialgrouped + samplerType: generator + singleMeasurement: false + spaces: + - space-ca6479-a2760d +created: '2026-06-05T09:08:38.348395Z' +identifier: random_walk-1.8.1.dev41+gb08a3a42.d20260604164353.dirty-fe5225 +metadata: + entities_submitted: 8 + experiments_requested: 8 +operationType: search +operatorIdentifier: random_walk-1.8.1.dev41+gb08a3a42.d20260604164353.dirty +status: +- event: created + recorded_at: '2026-06-05T09:08:38.348402Z' +- event: added + recorded_at: '2026-06-05T09:08:38.360114Z' +- event: started + recorded_at: '2026-06-05T09:08:38.374526Z' +- event: updated + recorded_at: '2026-06-05T09:08:38.374551Z' +- event: finished + exit_state: success + recorded_at: '2026-06-05T09:42:31.394135Z' +- event: updated + recorded_at: '2026-06-05T09:42:31.399321Z' + diff --git a/operation_random_walk_623871.yaml b/operation_random_walk_623871.yaml new file mode 100644 index 000000000..13d1ec337 --- /dev/null +++ b/operation_random_walk_623871.yaml @@ -0,0 +1,56 @@ +config: + actuatorConfigurationIdentifiers: + - actuatorconfiguration-vllm_performance-678e1bd6 + metadata: + description: Test threadpool functionality across vLLM versions (v0.20.1 with + threadpool, v0.18.0 without) + labels: + experiment_type: performance_testing + model_type: geospatial + test_type: threadpool_version_comparison + name: Geospatial Model Performance - Threadpool Version Test + operation: + module: + operationType: search + operatorName: random_walk + parameters: + numberEntities: all + samplerConfig: + grouping: + - model + - image + - memory + - max_batch_tokens + - max_num_seq + - n_gpus + - gpu_type + - n_cpus + - threadpool + - renderer_num_workers + mode: sequentialgrouped + samplerType: generator + singleMeasurement: false + spaces: + - space-9e79e7-a2760d +created: '2026-06-05T10:00:13.976806Z' +identifier: random_walk-1.8.1.dev41+gb08a3a42.d20260604164353.dirty-623871 +metadata: + entities_submitted: 8 + experiments_requested: 8 +operationType: search +operatorIdentifier: random_walk-1.8.1.dev41+gb08a3a42.d20260604164353.dirty +status: +- event: created + recorded_at: '2026-06-05T10:00:13.976813Z' +- event: added + recorded_at: '2026-06-05T10:00:13.990153Z' +- event: started + recorded_at: '2026-06-05T10:00:14.005488Z' +- event: updated + recorded_at: '2026-06-05T10:00:14.005501Z' +- event: finished + exit_state: success + recorded_at: '2026-06-05T10:07:09.909085Z' +- event: updated + recorded_at: '2026-06-05T10:07:09.913408Z' + diff --git a/operation_results.csv b/operation_results.csv new file mode 100644 index 000000000..911503bb5 --- /dev/null +++ b/operation_results.csv @@ -0,0 +1,9 @@ +,Result UID,Request ID,Request Index,Request type,Experiment ID,Entity ID,Valid,Number of Properties,Metadata +0,a0798a17-f7dd-44f4-b35f-2b8906b5a575,260012,0,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{} +1,ea1afb78-1905-4e70-987b-2367a8ed4867,6f069a,1,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{} +2,635cdb1d-5d6f-40dd-abc7-5fcad3c99eea,01d594,2,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{} +3,dd80fcf3-1e71-42b7-af4f-1c1d67e4c154,56e3d4,3,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{} +4,8d3bc967-4b50-40a4-8d1a-9ef224ef588d,2950ca,4,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{} +5,e5e74f8f-e0c0-4f53-92b2-0ac4b70ad384,ea4dd5,5,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{} +6,d29e5933-4f86-4f78-98a7-22e2f67f4155,bcdd13,6,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{} +7,f41dec02-f8e4-4ca8-a32f-c531df73d5f9,203a07,7,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{} diff --git a/operation_results_623871.csv b/operation_results_623871.csv new file mode 100644 index 000000000..7755cc944 --- /dev/null +++ b/operation_results_623871.csv @@ -0,0 +1,9 @@ +,Result UID,Request ID,Request Index,Request type,Experiment ID,Entity ID,Valid,Number of Properties,Metadata +0,4ea602e5-bbf0-4cfa-bede-cc5df5525b77,85f9af,0,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{} +1,b5bc7457-ee2c-4c8a-a162-30ffe8807321,963caa,1,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{} +2,66087ee8-ce37-4b76-8eff-f029988d3408,774a6f,2,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{} +3,ff47dee4-2e9b-431e-b8fb-b4c3caf26cce,24617a,3,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{} +4,b1308077-3fd9-4100-a20f-6f5c0bd4d7e5,fa955b,4,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{} +5,de055654-54c1-42a9-9999-32c01d38aa79,c7c1a6,5,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{} +6,1f4868b8-7eaa-4549-87de-fc09dddf79ca,5637b4,6,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{} +7,7df51b86-26c5-4de2-a6c9-fa77f414a57b,499691,7,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{} diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index 05028f48e..c9b1afb53 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -63,14 +63,13 @@ def make_hashable( value: float | list | str | dict | None, ) -> int | float | tuple | str | None: """Convert unhashable types to hashable equivalents""" - if isinstance(value, dict): - # Convert dict to sorted tuple of items - return tuple(sorted(value.items())) - if isinstance(value, list): - # Convert list to tuple - return tuple(value) - # Return value as-is if already hashable - return value # type: ignore[return-value] + match value: + case dict(): + return tuple(sorted(value.items())) + case list(): + return tuple(value) + case _: + return value # type: ignore[return-value] return frozenset({(k, make_hashable(v)) for k, v in point.items() if k in group}) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py new file mode 100644 index 000000000..7e0016967 --- /dev/null +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py @@ -0,0 +1,120 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Utilities for building cache keys for vLLM performance measurements.""" + +import json + +from ado_actuators.vllm_performance.version_utils import VLLMVersionChecker + + +class CacheKeyBuilder: + """Build cache keys for vLLM performance measurements. + + Cache keys combine environment parameters (model, GPUs, etc.) and + benchmark parameters (num_prompts, request_rate, etc.) to ensure + measurements are only reused for identical configurations. + """ + + # Environment parameters that define the deployment + ENV_PARAMS = [ + "model", + "image", + "n_gpus", + "gpu_type", + "n_cpus", + "memory", + "max_batch_tokens", + "gpu_memory_utilization", + "dtype", + "cpu_offload", + "max_num_seq", + "threadpool", + "renderer_num_workers", + ] + + BENCHMARK_PARAMS: list[str] = [ + "num_prompts", + "request_rate", + "max_concurrency", + "number_input_tokens", + "max_output_tokens", + "burstiness", + "dataset", + ] + + # All parameters used in cache key + ALL_PARAMS = ENV_PARAMS + BENCHMARK_PARAMS + + @classmethod + def _normalize_and_extract_env_params(cls, values: dict[str, str]) -> dict: + """Extract and normalize environment parameters.""" + # Extract and normalize image value + image_value = values.get("image") + if isinstance(image_value, list): + image_str = image_value[0] if len(image_value) > 0 else image_value + else: + image_str = image_value + + # Normalize threadpool properties based on vLLM version + threadpool_requested = int(values.get("threadpool", 1)) + renderer_num_workers_requested = int(values.get("renderer_num_workers", 32)) + + enable_threadpool = VLLMVersionChecker.supports_threadpool( + image_value if image_value is not None else "", threadpool_requested + ) + + if enable_threadpool: + threadpool_value = 1 + renderer_num_workers_value = renderer_num_workers_requested + else: + threadpool_value = 0 + renderer_num_workers_value = 0 + + return { + "model": values.get("model"), + "image": image_str, + "n_gpus": values.get("n_gpus"), + "gpu_type": values.get("gpu_type"), + "n_cpus": values.get("n_cpus"), + "memory": values.get("memory"), + "max_batch_tokens": values.get("max_batch_tokens"), + "gpu_memory_utilization": values.get("gpu_memory_utilization"), + "dtype": values.get("dtype"), + "cpu_offload": values.get("cpu_offload"), + "max_num_seq": values.get("max_num_seq"), + "threadpool": threadpool_value, + "renderer_num_workers": renderer_num_workers_value, + } + + @classmethod + def build_env_definition(cls, values: dict[str, str]) -> str: + """Build environment definition JSON string.""" + env_values = cls._normalize_and_extract_env_params(values) + return json.dumps(env_values) + + @classmethod + def build(cls, values: dict[str, str]) -> str: + """Build composite cache key from environment and benchmark parameters.""" + env_values = cls._normalize_and_extract_env_params(values) + + # Build benchmark parameters + benchmark_params = { + "num_prompts": values.get("num_prompts"), + "request_rate": values.get("request_rate"), + "max_concurrency": values.get("max_concurrency"), + "number_input_tokens": values.get("number_input_tokens"), + "max_output_tokens": values.get("max_output_tokens"), + "burstiness": values.get("burstiness"), + "dataset": values.get("dataset"), + } + + # Combine into composite key + composite = { + "environment": env_values, + "benchmark": benchmark_params, + } + return json.dumps(composite, sort_keys=True) + + +# Made with Bob diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py index c79212f5f..75c4e25d3 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py @@ -3,6 +3,7 @@ import asyncio import logging +from dataclasses import dataclass from enum import Enum from typing import Annotated @@ -26,6 +27,14 @@ logger = logging.getLogger(__name__) +@dataclass +class CachedMeasurement: + """Cached measurement result.""" + + measurements: list + error: str | None + + class EnvironmentState(Enum): """ Environment state @@ -136,7 +145,7 @@ def __init__( # Measurement cache for reusing results across entities with identical # environment and benchmark parameters - self.measurement_cache: dict[str, dict] = {} + self.measurement_cache: dict[str, CachedMeasurement] = {} # component manager for cleanup self.manager = ComponentsManager( @@ -148,33 +157,17 @@ def __init__( pvc_template=pvc_template, ) - def get_cached_measurement(self, cache_key: str) -> dict | None: - """ - Get a cached measurement result for the given cache key. - - Args: - cache_key: Composite key containing environment and benchmark parameters - - Returns: - Cached measurement dict with 'measurements' and 'error' keys, or None if not cached - """ + def get_cached_measurement(self, cache_key: str) -> CachedMeasurement | None: + """Get cached measurement result.""" return self.measurement_cache.get(cache_key) def cache_measurement( self, cache_key: str, measurements: list, error: str | None ) -> None: - """ - Cache a measurement result for reuse by subsequent entities with identical parameters. - - Args: - cache_key: Composite key containing environment and benchmark parameters - measurements: List of measured property values - error: Error message if measurement failed, None otherwise - """ - self.measurement_cache[cache_key] = { - "measurements": measurements, - "error": error, - } + """Cache measurement result for reuse.""" + self.measurement_cache[cache_key] = CachedMeasurement( + measurements=measurements, error=error + ) logger.debug(f"Cached measurement for key: {cache_key}") def _delete_environment_k8s_resources(self, k8s_name: str) -> None: diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index 9f447d97c..2eabb7c1b 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -11,6 +11,7 @@ from ado_actuators.vllm_performance.actuator_parameters import ( VLLMPerformanceTestParameters, ) +from ado_actuators.vllm_performance.cache_utils import CacheKeyBuilder from ado_actuators.vllm_performance.env_manager import ( Environment, EnvironmentManager, @@ -26,6 +27,7 @@ from ado_actuators.vllm_performance.k8s.yaml_support.build_components import ( VLLMDtype, ) +from ado_actuators.vllm_performance.version_utils import VLLMVersionChecker from ado_actuators.vllm_performance.vllm_performance_test.benchmark_models import ( BenchmarkParameters, BenchmarkResult, @@ -39,7 +41,6 @@ execute_guidellm_benchmark, execute_guidellm_geospatial_benchmark, ) -from packaging import version from ray.actor import ActorHandle from orchestrator.modules.actuators.measurement_queue import MeasurementQueue @@ -54,212 +55,6 @@ logger = logging.getLogger(__name__) -def _get_vllm_version_from_image_value(image_value: list | str) -> str | None: - """ - Extract vLLM version from image property value. - - Args: - image_value: The image property value, either a list [image_url, vllm_version], - or a string (for backward compatibility) - - Returns: - Version string if found in list, None otherwise - """ - logger.debug(f"_get_vllm_version_from_image_value called with: {image_value}") - - # If image_value is a list with vllm_version, extract it - if isinstance(image_value, list): - if len(image_value) > 1: - version = image_value[1] - logger.debug(f"Extracted vLLM version from list: {version}") - return version - logger.debug("List has only one element, no version info available") - return None - - # For backward compatibility: if it's a string, we don't have version info - logger.debug("Image value is a string, no version info available") - return None - - -def _should_enable_threadpool(image_value: list | str, threadpool_value: int) -> bool: - """ - Determine if threadpool should be enabled based on vLLM version and user preference. - - Threadpool is only supported in vLLM >= 0.20.0. This function checks: - 1. If user explicitly disabled threadpool (threadpool=0), return False - 2. If vLLM version exists in image_value list and version < 0.20.0, return False - 3. Otherwise, return True (user wants it and version supports it or no version info) - - Args: - image_value: The image property value (list [image_url, vllm_version] or string) - threadpool_value: User's threadpool preference (0 or 1) - - Returns: - True if threadpool should be enabled, False otherwise - """ - logger.debug( - f"_should_enable_threadpool called with: image_value={image_value}, " - f"threadpool_value={threadpool_value}" - ) - - # If user explicitly disabled, respect that - if threadpool_value == 0: - logger.debug("Threadpool explicitly disabled by user (threadpool_value=0)") - return False - - # Get version from image value - vllm_version_str = _get_vllm_version_from_image_value(image_value) - logger.debug(f"Retrieved vLLM version: {vllm_version_str}") - - # If no version info, assume it's supported (backward compatible) - if vllm_version_str is None: - logger.warning( - f"No vLLM version info found for image {image_value}. " - "Assuming threadpool is supported." - ) - return True - - # Parse and compare version - try: - vllm_ver = version.parse(vllm_version_str) - min_version = version.parse("0.20.0") - logger.debug( - f"Parsed versions - vLLM: {vllm_ver}, minimum required: {min_version}" - ) - - if vllm_ver < min_version: - logger.info( - f"Threadpool disabled: vLLM version {vllm_version_str} < 0.20.0 " - f"for image {image_value}" - ) - return False - - logger.info( - f"Threadpool enabled: vLLM version {vllm_version_str} >= 0.20.0 " - f"for image {image_value}" - ) - return True - except Exception as e: - logger.error( - f"Failed to parse vLLM version '{vllm_version_str}' for image {image_value}: {e}. " - "Assuming threadpool is supported." - ) - return True - - -def _build_entity_env(values: dict[str, str]) -> str: - """ - This is the list of entity parameters that define the environment: - * model name - * image name - * number of gpus - * gpu type - * number of cpus - * memory - * max batch tokens - * max number of sequences - * gpu memory utilization - * data type - * cpu offload - Build entity based environment parameters - :param values: experiment values - :return: definition - """ - # Extract image string from list if needed - image_value = values.get("image") - if isinstance(image_value, list): - image_str = image_value[0] if len(image_value) > 0 else image_value - else: - image_str = image_value - - # Determine effective threadpool and renderer_num_workers values - threadpool_requested = int(values.get("threadpool", 1)) - renderer_num_workers_requested = int(values.get("renderer_num_workers", 32)) - - # Check if threadpool will actually be enabled based on version - # Use empty string as fallback if image_value is None - enable_threadpool = _should_enable_threadpool( - image_value if image_value is not None else "", threadpool_requested - ) - - # Normalize values for environment definition: - # - If threadpool is disabled (version < 0.20.0 or user disabled it), - # set both to 0 so different renderer_num_workers values don't create - # different environments when they would behave identically - if enable_threadpool: - threadpool_value = 1 - renderer_num_workers_value = renderer_num_workers_requested - else: - threadpool_value = 0 - renderer_num_workers_value = 0 # Normalize to 0 when not used - - env_values = { - "model": values.get("model"), - "image": image_str, - "n_gpus": values.get("n_gpus"), - "gpu_type": values.get("gpu_type"), - "n_cpus": values.get("n_cpus"), - "memory": values.get("memory"), - "max_batch_tokens": values.get("max_batch_tokens"), - "gpu_memory_utilization": values.get("gpu_memory_utilization"), - "dtype": values.get("dtype"), - "cpu_offload": values.get("cpu_offload"), - "max_num_seq": values.get("max_num_seq"), - "threadpool": threadpool_value, - "renderer_num_workers": renderer_num_workers_value, - } - return json.dumps(env_values) - - -def _build_benchmark_params_key(values: dict[str, str]) -> str: - """ - Build a cache key from benchmark parameters that affect measurement results. - - These parameters define the workload characteristics and must be included - in the cache key to ensure measurements are only reused for identical tests. - - Args: - values: experiment values - - Returns: - JSON string of benchmark parameters - """ - benchmark_params = { - "num_prompts": values.get("num_prompts"), - "request_rate": values.get("request_rate"), - "max_concurrency": values.get("max_concurrency"), - "number_input_tokens": values.get("number_input_tokens"), - "max_output_tokens": values.get("max_output_tokens"), - "burstiness": values.get("burstiness"), - "dataset": values.get("dataset"), - } - return json.dumps(benchmark_params, sort_keys=True) - - -def _build_cache_key(values: dict[str, str]) -> str: - """ - Build a composite cache key from both environment and benchmark parameters. - - Cache hits should only occur when both the deployment environment AND - the benchmark workload parameters are identical. - - Args: - values: experiment values - - Returns: - composite cache key as JSON string - """ - env_key = _build_entity_env(values) - benchmark_key = _build_benchmark_params_key(values) - - # Combine both keys into a single cache key - composite = { - "environment": json.loads(env_key), - "benchmark": json.loads(benchmark_key), - } - return json.dumps(composite, sort_keys=True) - - def _create_environment( values: dict[str, str], actuator: VLLMPerformanceTestParameters, @@ -270,28 +65,10 @@ def _create_environment( check_interval: int = 5, timeout: int = 1200, ) -> tuple[str, str]: - """ - Create environment with version-aware threadpool support. - - Important: This function will block until env_manager.get_environment - returns an environment. - The env_manager will not return an environment until there is one free - to be used - - :param values: experiment values - :param actuator: actuator parameters - :param node_selector: node selector - :param request_id the request associated with this environment - :param env_manager: environment manager - :param experiment: experiment definition (used for version checking) - :param check_interval: wait interval - :param timeout: timeout - :return: kubernetes environment name - - :raises K8sEnvironmentCreationError if there was an issue - - If the creation step fails after three attempts - - If after creation the environment was not in ready state after timeout seconds (1200 default) + """Create environment with version-aware threadpool support. + Blocks until env_manager returns an available environment. + Raises K8sEnvironmentCreationError if creation fails after 3 attempts or timeout. """ from orchestrator.modules.operators.console_output import ( RichConsoleSpinnerMessage, @@ -304,7 +81,7 @@ def _create_environment( model = values.get("model") # create environment definition - definition = _build_entity_env(values=values) + definition = CacheKeyBuilder.build_env_definition(values=values) console.put.remote( message=RichConsoleSpinnerMessage( id=request_id, @@ -329,7 +106,6 @@ def _create_environment( ) break - # This is to guarantee that the request is next in line as soon as an environment is available ray.get(env_manager.wait_for_env.remote()) error = None @@ -348,8 +124,6 @@ def _create_environment( # Environment does not exist, create it logger.debug(f"Environment {env.k8s_name} does not exist. Creating it") tmout = 1 - - # To avoid data corruption we wait if another environment is concurrently downloading the same model for the first time ray.get( env_manager.wait_deployment_before_starting.remote( env=env, request_id=request_id @@ -365,27 +139,13 @@ def _create_environment( ) ) try: - # Determine if threadpool should be enabled based on version image_value = values.get("image", "") threadpool_requested = int(values.get("threadpool", 1)) - logger.debug( - f"Before _should_enable_threadpool: image_value={image_value}, " - f"threadpool_requested={threadpool_requested}" - ) - enable_threadpool = _should_enable_threadpool( + enable_threadpool = VLLMVersionChecker.supports_threadpool( image_value, threadpool_requested ) - logger.debug( - f"After _should_enable_threadpool: enable_threadpool={enable_threadpool}" - ) - - # Convert boolean back to int for consistency with existing code threadpool_value = 1 if enable_threadpool else 0 - logger.debug( - f"Final threadpool_value to be used: {threadpool_value}" - ) - # Extract image string from list if needed if isinstance(image_value, list): image_name = image_value[0] if len(image_value) > 0 else "" else: @@ -426,7 +186,6 @@ def _create_environment( check_interval=check_interval, timeout=timeout, ) - # Update manager env_manager.done_creating.remote(identifier=env.k8s_name) error = None break @@ -439,7 +198,6 @@ def _create_environment( time.sleep(tmout) tmout *= 2 - # Check if error after three attempts if error is None: console.put.remote( message=RichConsoleSpinnerMessage( @@ -460,9 +218,6 @@ def _create_environment( ) ) - # In case of failure creating the environment deployment we must release any - # other request with a deployment conflicting with this request's deployment - # We also need to release the slot for this environment ray.get( env_manager.cleanup_failed_deployment.remote( identifier=env.k8s_name @@ -481,27 +236,11 @@ def _connect_to_vllm_server( actuator_parameters: VLLMPerformanceTestParameters, port: int, ) -> tuple[str, subprocess.Popen | None]: - """Returns the URL of the vLLM inference server - - Creates a port forward for the inference server if test - is not running on the cluster with the service - - Parameters: - k8s_name: The name of the vLLM service - actuator_parameters: VLLMPerformanceTestParameters instance containing - namespace and test location (in_cluster or not) information - - Returns: - A tuple containing - - The URL of the created vLLM server - - If a port-forward is created the POpen object for the port-forward - Otherwise None + """Returns vLLM server URL and optional port-forward process. - Raise: - K8ConnectionError if a port-forward could not be created + Creates port-forward if not running in-cluster. + Raises K8sConnectionError if port-forward fails. """ - - # create environment if not actuator_parameters.in_cluster: logger.info("We are running locally connecting to remote cluster") logger.info("please make sure that you have executed `oc login`") @@ -511,13 +250,11 @@ def _connect_to_vllm_server( ) if actuator_parameters.in_cluster: - # we are running in cluster, connect to service directly base_url = ( f"http://{k8s_name}.{actuator_parameters.namespace}.svc.cluster.local:80" ) pf = None else: - # we are running locally. need to do port-forward and connect to the local one pf_command_args = [ "kubectl", "port-forward", @@ -532,9 +269,7 @@ def _connect_to_vllm_server( stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) - # make sure that port forwarding is up time.sleep(5) - # Check if there is a returncode- if there is it means port-forward exited if pf.returncode: raise K8sConnectionError( f"failed to start port forward to service {k8s_name} - port-forward command exited for unknown reason. Check logs." @@ -560,32 +295,11 @@ def run_resource_and_workload_experiment( env_manager: ActorHandle, local_port: int, ) -> None: - """ - Runs an experiment on a specific compute resource and inference workload configuration. - - This requires spinning up a vLLM instance with the given compute resources - - :param request: measurement request - :param experiment: definition of experiment - :param state_update_queue: update queue - :param actuator_parameters: actuator parameters - :param node_selector: node selector - :param env_manager: environment manager - :param local_port: local port to use - :return: - """ - - # This function - # 1. Performs the measurement represented by MeasurementRequest - # 2. Updates MeasurementRequest with the results of the measurement and status - # 3. Puts it in the stateUpdateQueue - - # placeholder for measurements + """Run experiment on specific compute resource and workload configuration.""" measurements = [] current_port = local_port - 1 console = ray.get_actor(name="RichConsoleQueue") - # For every entity for entity in request.entities: port_forward = None definition = None @@ -593,12 +307,9 @@ def run_resource_and_workload_experiment( try: values = experiment.propertyValuesFromEntity(entity=entity) - # Check if we've already measured an entity with the same environment and benchmark parameters - # Cache key includes both environment (model, GPUs, etc.) and benchmark params (num_prompts, request_rate, etc.) - cache_key = _build_cache_key(values) + cache_key = CacheKeyBuilder.build(values) logger.info("cache_key: %s", cache_key) - # Check actor's cache for this measurement cached_result = ray.get( env_manager.get_cached_measurement.remote(cache_key) ) @@ -610,8 +321,8 @@ def run_resource_and_workload_experiment( measurements.append( create_measurement_result( identifier=entity.identifier, - measurements=cached_result["measurements"], - error=cached_result["error"], + measurements=cached_result.measurements, + error=cached_result.error, reference=request.experimentReference, ) ) @@ -619,7 +330,6 @@ def run_resource_and_workload_experiment( logger.info(f"Creating K8s environment for {entity.identifier}") - # Will raise an K8sEnvironmentCreationError if the environment could not be created k8s_name, definition = _create_environment( values=values, actuator=actuator_parameters, @@ -629,8 +339,6 @@ def run_resource_and_workload_experiment( request_id=request.requestid, ) - # Will raise an K8sConnectionError if a port-forward was required - # but could not be created current_port += 1 base_url, port_forward = _connect_to_vllm_server( k8s_name, actuator_parameters, current_port @@ -639,8 +347,6 @@ def run_resource_and_workload_experiment( logger.info(f"Will use vllm server at {base_url}") benchmark_parameters = BenchmarkParameters.model_validate(values) - # In this case the endpoint does not come through the property values and is generated - # when creating the vLLM deployment benchmark_parameters.endpoint = base_url started_benchmarking = True @@ -754,9 +460,6 @@ def run_resource_and_workload_experiment( reference=request.experimentReference, ) measurements.append(measurement_result) - - # Cache the measurement in the actor for potential reuse by subsequent entities - # with the same environment and benchmark parameters env_manager.cache_measurement.remote(cache_key, measured_values, None) finally: if started_benchmarking: @@ -772,7 +475,6 @@ def run_resource_and_workload_experiment( if definition is not None: env_manager.done_using.remote(identifier=k8s_name) - # For multi entity experiments if ONE entity had ValidResults the status must be SUCCESS if len(measurements) > 0: request.measurements = measurements request.status = compute_measurement_status(measurements=measurements) @@ -788,26 +490,8 @@ def run_workload_experiment( state_update_queue: MeasurementQueue, actuator_parameters: VLLMPerformanceTestParameters, ) -> None: - """ - Runs an experiment with a specific inference workload configuration on a given endpoint. - - The compute resource associated with the end-point is not known. - - :param request: measurement request - :param experiment: definition of experiment - :param state_update_queue: update queue - :param actuator_parameters: actuator parameters - :return: - """ - - # This function - # 1. Performs the measurement represented by MeasurementRequest - # 2. Updates MeasurementRequest with the results of the measurement and status - # 3. Puts it in the stateUpdateQueue - - # placeholder for measurements + """Run experiment with specific workload configuration on given endpoint.""" measurements = [] - # For every entity for entity in request.entities: measured_values = [] error = None @@ -820,7 +504,6 @@ def run_workload_experiment( benchmark_parameters = BenchmarkParameters.model_validate(values) - # Will raise VLLMBenchmarkError if there is a problem logger.info(f"Executing experiment: {experiment.identifier}") result: BenchmarkResult if experiment.identifier in [ @@ -908,7 +591,6 @@ def run_workload_experiment( ) ) - # For multi entity experiments if ONE entity had ValidResults the status must be SUCCESS if len(measurements) > 0: request.measurements = measurements request.status = compute_measurement_status(measurements=measurements) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py new file mode 100644 index 000000000..10516847a --- /dev/null +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py @@ -0,0 +1,41 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Utilities for vLLM version checking and threadpool support detection.""" + +from packaging import version + + +class VLLMVersionChecker: + """Utility class for checking vLLM version and threadpool support.""" + + THREADPOOL_MIN_VERSION = "0.20.0" + + @staticmethod + def parse_version(image_value: list | str) -> str | None: + """Extract vLLM version from image property value.""" + if isinstance(image_value, list) and len(image_value) > 1: + return image_value[1] + return None + + @classmethod + def supports_threadpool( + cls, image_value: list | str, threadpool_requested: int + ) -> bool: + """Check if threadpool is supported and should be enabled.""" + if threadpool_requested == 0: + return False + + vllm_version_str = cls.parse_version(image_value) + if vllm_version_str is None: + return True + + try: + vllm_ver = version.parse(vllm_version_str) + min_ver = version.parse(cls.THREADPOOL_MIN_VERSION) + return vllm_ver >= min_ver + except Exception: + return True + + +# Made with Bob diff --git a/plugins/actuators/vllm_performance/tests/test_cache_utils.py b/plugins/actuators/vllm_performance/tests/test_cache_utils.py new file mode 100644 index 000000000..26a243b43 --- /dev/null +++ b/plugins/actuators/vllm_performance/tests/test_cache_utils.py @@ -0,0 +1,144 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Tests for cache key building utilities.""" + +import json + +import pytest +from ado_actuators.vllm_performance.cache_utils import CacheKeyBuilder + + +class TestCacheKeyBuilder: + """Tests for CacheKeyBuilder class.""" + + @pytest.fixture + def base_values(self): + """Base values for testing.""" + return { + "model": "meta-llama/Llama-2-7b-hf", + "image": ["vllm/vllm-openai:v0.20.1", "0.20.1"], + "n_gpus": "1", + "gpu_type": "nvidia-l4", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + "threadpool": "1", + "renderer_num_workers": "32", + "num_prompts": "100", + "request_rate": "10", + "max_concurrency": "50", + "number_input_tokens": "128", + "max_output_tokens": "256", + "burstiness": "1.0", + "dataset": "random", + } + + def test_build_returns_json_string(self, base_values): + """Test that build returns a valid JSON string.""" + cache_key = CacheKeyBuilder.build(base_values) + assert isinstance(cache_key, str) + parsed = json.loads(cache_key) + assert "environment" in parsed + assert "benchmark" in parsed + + def test_same_values_produce_same_key(self, base_values): + """Test that identical values produce identical cache keys.""" + key1 = CacheKeyBuilder.build(base_values) + key2 = CacheKeyBuilder.build(base_values.copy()) + assert key1 == key2 + + def test_different_env_params_produce_different_keys(self, base_values): + """Test that different environment parameters produce different keys.""" + key1 = CacheKeyBuilder.build(base_values) + + modified_values = base_values.copy() + modified_values["n_gpus"] = "2" + key2 = CacheKeyBuilder.build(modified_values) + + assert key1 != key2 + + def test_different_benchmark_params_produce_different_keys(self, base_values): + """Test that different benchmark parameters produce different keys.""" + key1 = CacheKeyBuilder.build(base_values) + + modified_values = base_values.copy() + modified_values["num_prompts"] = "200" + key2 = CacheKeyBuilder.build(modified_values) + + assert key1 != key2 + + def test_threadpool_normalization_vllm_0_18(self, base_values): + """Test threadpool normalization for vLLM < 0.20.0.""" + base_values["image"] = ["vllm/vllm-openai:v0.18.0", "0.18.0"] + base_values["threadpool"] = "1" + base_values["renderer_num_workers"] = "32" + + cache_key = CacheKeyBuilder.build(base_values) + parsed = json.loads(cache_key) + + assert parsed["environment"]["threadpool"] == 0 + assert parsed["environment"]["renderer_num_workers"] == 0 + + def test_threadpool_normalization_vllm_0_20(self, base_values): + """Test threadpool normalization for vLLM >= 0.20.0.""" + base_values["image"] = ["vllm/vllm-openai:v0.20.1", "0.20.1"] + base_values["threadpool"] = "1" + base_values["renderer_num_workers"] = "32" + + cache_key = CacheKeyBuilder.build(base_values) + parsed = json.loads(cache_key) + + assert parsed["environment"]["threadpool"] == 1 + assert parsed["environment"]["renderer_num_workers"] == 32 + + def test_different_renderer_num_workers_same_key_when_disabled(self, base_values): + """Test that different renderer_num_workers produce same key when threadpool disabled.""" + base_values["image"] = ["vllm/vllm-openai:v0.18.0", "0.18.0"] + base_values["threadpool"] = "1" + base_values["renderer_num_workers"] = "32" + key1 = CacheKeyBuilder.build(base_values) + + base_values["renderer_num_workers"] = "64" + key2 = CacheKeyBuilder.build(base_values) + + assert key1 == key2 + + def test_different_renderer_num_workers_different_key_when_enabled( + self, base_values + ): + """Test that different renderer_num_workers produce different keys when threadpool enabled.""" + base_values["image"] = ["vllm/vllm-openai:v0.20.1", "0.20.1"] + base_values["threadpool"] = "1" + base_values["renderer_num_workers"] = "32" + key1 = CacheKeyBuilder.build(base_values) + + base_values["renderer_num_workers"] = "64" + key2 = CacheKeyBuilder.build(base_values) + + assert key1 != key2 + + def test_image_list_extraction(self, base_values): + """Test that image URL is correctly extracted from list.""" + base_values["image"] = ["vllm/vllm-openai:v0.20.1", "0.20.1"] + cache_key = CacheKeyBuilder.build(base_values) + parsed = json.loads(cache_key) + + assert parsed["environment"]["image"] == "vllm/vllm-openai:v0.20.1" + + def test_image_string_backward_compatibility(self, base_values): + """Test backward compatibility with string image values.""" + base_values["image"] = "vllm/vllm-openai:v0.20.1" + cache_key = CacheKeyBuilder.build(base_values) + parsed = json.loads(cache_key) + + assert parsed["environment"]["image"] == "vllm/vllm-openai:v0.20.1" + # When no version info, threadpool should be enabled (backward compatible) + assert parsed["environment"]["threadpool"] == 1 + + +# Made with Bob diff --git a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py index 2216c2e63..4d0f8a0e1 100644 --- a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py +++ b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py @@ -1,224 +1,120 @@ # Copyright IBM Corporation 2025, 2026 # SPDX-License-Identifier: MIT -""" -Unit tests for experiment_executor module functions. -Tests version extraction from image property values. -""" +"""Unit tests for experiment_executor module functions.""" import json -from ado_actuators.vllm_performance.experiment_executor import ( - _build_benchmark_params_key, - _build_cache_key, - _build_entity_env, - _get_vllm_version_from_image_value, -) +import pytest +from ado_actuators.vllm_performance.cache_utils import CacheKeyBuilder +from ado_actuators.vllm_performance.version_utils import VLLMVersionChecker + + +@pytest.fixture +def base_env_values(): + """Base environment values for testing.""" + return { + "model": "test-model", + "n_gpus": "1", + "gpu_type": "nvidia-a100", + "n_cpus": "8", + "memory": "32Gi", + "max_batch_tokens": "4096", + "gpu_memory_utilization": "0.9", + "dtype": "auto", + "cpu_offload": "0", + "max_num_seq": "256", + } + + +@pytest.fixture +def base_benchmark_values(): + """Base benchmark values for testing.""" + return { + "num_prompts": "200", + "request_rate": "32", + "dataset": "random", + } class TestGetVllmVersionFromImageValue: - """Test suite for _get_vllm_version_from_image_value function""" - - def test_version_extraction_from_list_value(self) -> None: - """Test extracting vLLM version from list image value""" - image_value = [ - "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", - "0.18.0", - ] - - version = _get_vllm_version_from_image_value(image_value) - assert version == "0.18.0" - - def test_version_extraction_from_another_list_value(self) -> None: - """Test extracting vLLM version from another list image value""" - image_value = [ - "vllm/vllm-openai:v0.14.0", - "0.14.0", - ] - - version = _get_vllm_version_from_image_value(image_value) - assert version == "0.14.0" - - def test_version_extraction_returns_none_for_string_value(self) -> None: - """Test that None is returned when image value is a string (backward compatibility)""" - image_value = "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5" - - version = _get_vllm_version_from_image_value(image_value) - assert version is None - - def test_version_extraction_returns_none_for_list_without_version(self) -> None: - """Test that None is returned when list has only one element (no version)""" - image_value = [ - "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", - ] - - version = _get_vllm_version_from_image_value(image_value) - assert version is None - - def test_version_extraction_with_latest_tag(self) -> None: - """Test extracting version for latest tag""" - image_value = [ - "vllm/vllm-openai:latest", - "0.21.0", - ] - - version = _get_vllm_version_from_image_value(image_value) - assert version == "0.21.0" + """Test suite for version extraction from image values.""" + + @pytest.mark.parametrize( + "image_value,expected", + [ + (["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", "0.18.0"], "0.18.0"), + (["vllm/vllm-openai:v0.14.0", "0.14.0"], "0.14.0"), + (["vllm/vllm-openai:latest", "0.21.0"], "0.21.0"), + ("icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", None), + (["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5"], None), + ], + ) + def test_version_extraction(self, image_value, expected) -> None: + """Test version extraction from various image value formats.""" + version = VLLMVersionChecker.parse_version(image_value) + assert version == expected class TestBuildEntityEnv: - """Test suite for _build_entity_env function""" - - def test_renderer_num_workers_normalized_when_vllm_version_less_than_0_20_0( - self, - ) -> None: - """Test that renderer_num_workers is normalized to 0 when vLLM < 0.20.0""" - # Test with vLLM 0.18.0 (< 0.20.0) - values = { - "model": "test-model", - "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"], - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", - "threadpool": "1", - "renderer_num_workers": "64", # Should be normalized to 0 - } - - result = _build_entity_env(values) - result_dict = json.loads(result) - - # Both threadpool and renderer_num_workers should be 0 - assert result_dict["threadpool"] == 0 - assert result_dict["renderer_num_workers"] == 0 - - def test_renderer_num_workers_preserved_when_vllm_version_greater_than_0_20_0( - self, - ) -> None: - """Test that renderer_num_workers is preserved when vLLM >= 0.20.0""" - # Test with vLLM 0.21.0 (>= 0.20.0) - values = { - "model": "test-model", - "image": ["icr.io/test/vllm:v0.21.0", "0.21.0"], - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", - "threadpool": "1", - "renderer_num_workers": "64", - } - - result = _build_entity_env(values) - result_dict = json.loads(result) - - # Both should be preserved - assert result_dict["threadpool"] == 1 - assert result_dict["renderer_num_workers"] == 64 - - def test_renderer_num_workers_normalized_when_threadpool_disabled_by_user( + """Test suite for environment definition building.""" + + @pytest.mark.parametrize( + "image,threadpool,renderer_workers,expected_threadpool,expected_workers", + [ + (["icr.io/test/vllm:v0.18.0", "0.18.0"], "1", "64", 0, 0), + (["icr.io/test/vllm:v0.21.0", "0.21.0"], "1", "64", 1, 64), + (["icr.io/test/vllm:v0.21.0", "0.21.0"], "0", "64", 0, 0), + ("icr.io/test/vllm:v0.18.0", "1", "64", 1, 64), + ], + ) + def test_threadpool_normalization( self, + base_env_values, + image, + threadpool, + renderer_workers, + expected_threadpool, + expected_workers, ) -> None: - """Test that renderer_num_workers is normalized to 0 when user disables threadpool""" - # Test with vLLM 0.21.0 but threadpool=0 + """Test threadpool and renderer_num_workers normalization.""" values = { - "model": "test-model", - "image": ["icr.io/test/vllm:v0.21.0", "0.21.0"], - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", - "threadpool": "0", # User explicitly disabled - "renderer_num_workers": "64", # Should be normalized to 0 + **base_env_values, + "image": image, + "threadpool": threadpool, + "renderer_num_workers": renderer_workers, } - result = _build_entity_env(values) + result = CacheKeyBuilder.build_env_definition(values) result_dict = json.loads(result) - # Both should be 0 - assert result_dict["threadpool"] == 0 - assert result_dict["renderer_num_workers"] == 0 + assert result_dict["threadpool"] == expected_threadpool + assert result_dict["renderer_num_workers"] == expected_workers - def test_different_renderer_num_workers_same_env_when_vllm_less_than_0_20_0( - self, + def test_different_renderer_workers_same_env_vllm_0_18( + self, base_env_values ) -> None: - """Test that different renderer_num_workers values produce same env when vLLM < 0.20.0""" - base_values = { - "model": "test-model", + """Test different renderer_num_workers produce same env for vLLM < 0.20.0.""" + base = { + **base_env_values, "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"], - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", "threadpool": "1", } - # Test with different renderer_num_workers values - values_32 = {**base_values, "renderer_num_workers": "32"} - values_64 = {**base_values, "renderer_num_workers": "64"} - values_128 = {**base_values, "renderer_num_workers": "128"} - - env_32 = _build_entity_env(values_32) - env_64 = _build_entity_env(values_64) - env_128 = _build_entity_env(values_128) - - # All should produce the same environment definition - assert env_32 == env_64 == env_128 - - # Verify they all have renderer_num_workers=0 - result_dict = json.loads(env_32) - assert result_dict["renderer_num_workers"] == 0 - - def test_backward_compatibility_with_string_image(self) -> None: - """Test backward compatibility when image is a string (no version info)""" - values = { - "model": "test-model", - "image": "icr.io/test/vllm:v0.18.0", # String, no version info - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", - "threadpool": "1", - "renderer_num_workers": "64", - } - - result = _build_entity_env(values) - result_dict = json.loads(result) + envs = [ + CacheKeyBuilder.build_env_definition({**base, "renderer_num_workers": w}) + for w in ["32", "64", "128"] + ] - # Should assume threadpool is supported (backward compatible) - assert result_dict["threadpool"] == 1 - assert result_dict["renderer_num_workers"] == 64 + assert envs[0] == envs[1] == envs[2] + assert json.loads(envs[0])["renderer_num_workers"] == 0 class TestBuildBenchmarkParamsKey: - """Test suite for _build_benchmark_params_key function""" + """Test suite for benchmark parameter extraction.""" def test_includes_all_benchmark_parameters(self) -> None: - """Test that all benchmark parameters are included in the key""" + """Test all benchmark parameters are included.""" values = { "num_prompts": "100", "request_rate": "10", @@ -229,259 +125,132 @@ def test_includes_all_benchmark_parameters(self) -> None: "dataset": "random", } - result = _build_benchmark_params_key(values) - result_dict = json.loads(result) + cache_key = CacheKeyBuilder.build(values) + benchmark = json.loads(cache_key)["benchmark"] - assert result_dict["num_prompts"] == "100" - assert result_dict["request_rate"] == "10" - assert result_dict["max_concurrency"] == "5" - assert result_dict["number_input_tokens"] == "50" - assert result_dict["max_output_tokens"] == "100" - assert result_dict["burstiness"] == "1.0" - assert result_dict["dataset"] == "random" + assert benchmark["num_prompts"] == "100" + assert benchmark["request_rate"] == "10" + assert benchmark["max_concurrency"] == "5" + assert benchmark["number_input_tokens"] == "50" + assert benchmark["max_output_tokens"] == "100" + assert benchmark["burstiness"] == "1.0" + assert benchmark["dataset"] == "random" def test_handles_missing_values(self) -> None: - """Test that missing values are handled as None""" - values = { - "num_prompts": "100", - # Other parameters missing - } - - result = _build_benchmark_params_key(values) - result_dict = json.loads(result) + """Test missing values are handled as None.""" + cache_key = CacheKeyBuilder.build({"num_prompts": "100"}) + benchmark = json.loads(cache_key)["benchmark"] - assert result_dict["num_prompts"] == "100" - assert result_dict["request_rate"] is None - assert result_dict["max_concurrency"] is None - assert result_dict["dataset"] is None + assert benchmark["num_prompts"] == "100" + assert benchmark["request_rate"] is None + assert benchmark["max_concurrency"] is None def test_consistent_output_with_sorted_keys(self) -> None: - """Test that output is consistent (keys are sorted)""" - values = { - "dataset": "random", - "num_prompts": "100", - "request_rate": "10", - } - - result1 = _build_benchmark_params_key(values) - result2 = _build_benchmark_params_key(values) + """Test output is consistent with sorted keys.""" + values = {"dataset": "random", "num_prompts": "100", "request_rate": "10"} - # Should produce identical output - assert result1 == result2 + key1 = json.dumps( + json.loads(CacheKeyBuilder.build(values))["benchmark"], sort_keys=True + ) + key2 = json.dumps( + json.loads(CacheKeyBuilder.build(values))["benchmark"], sort_keys=True + ) - # Verify keys are sorted in JSON - result_dict = json.loads(result1) - keys = list(result_dict.keys()) - assert keys == sorted(keys) + assert key1 == key2 + assert list(json.loads(key1).keys()) == sorted(json.loads(key1).keys()) class TestBuildCacheKey: - """Test suite for _build_cache_key function""" + """Test suite for complete cache key building.""" - def test_combines_environment_and_benchmark_params(self) -> None: - """Test that cache key includes both environment and benchmark parameters""" + def test_combines_environment_and_benchmark_params( + self, base_env_values, base_benchmark_values + ) -> None: + """Test cache key includes both environment and benchmark sections.""" values = { - # Environment params - "model": "test-model", + **base_env_values, "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"], - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", "threadpool": "1", "renderer_num_workers": "32", - # Benchmark params - "num_prompts": "200", - "request_rate": "32", - "dataset": "random", + **base_benchmark_values, } - result = _build_cache_key(values) - result_dict = json.loads(result) + result_dict = json.loads(CacheKeyBuilder.build(values)) - # Should have both environment and benchmark sections assert "environment" in result_dict assert "benchmark" in result_dict - - # Check environment section - env = result_dict["environment"] - assert env["model"] == "test-model" - assert env["n_gpus"] == "1" - - # Check benchmark section - benchmark = result_dict["benchmark"] - assert benchmark["num_prompts"] == "200" - assert benchmark["request_rate"] == "32" - assert benchmark["dataset"] == "random" - - def test_different_benchmark_params_produce_different_keys(self) -> None: - """Test that different benchmark parameters produce different cache keys""" - base_values = { - "model": "test-model", + assert result_dict["environment"]["model"] == "test-model" + assert result_dict["benchmark"]["num_prompts"] == "200" + + @pytest.mark.parametrize( + "param,value1,value2", + [ + ("num_prompts", "100", "200"), + ("request_rate", "32", "64"), + ], + ) + def test_different_params_produce_different_keys( + self, base_env_values, base_benchmark_values, param, value1, value2 + ) -> None: + """Test different parameter values produce different cache keys.""" + base = { + **base_env_values, "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"], - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", "threadpool": "1", "renderer_num_workers": "32", + **base_benchmark_values, } - # Same environment, different num_prompts - values1 = {**base_values, "num_prompts": "100", "request_rate": "32"} - values2 = {**base_values, "num_prompts": "200", "request_rate": "32"} + key1 = CacheKeyBuilder.build({**base, param: value1}) + key2 = CacheKeyBuilder.build({**base, param: value2}) - key1 = _build_cache_key(values1) - key2 = _build_cache_key(values2) - - # Different benchmark params should produce different keys assert key1 != key2 - def test_same_params_produce_same_key(self) -> None: - """Test that identical parameters produce identical cache keys""" + def test_same_params_produce_same_key( + self, base_env_values, base_benchmark_values + ) -> None: + """Test identical parameters produce identical cache keys.""" values = { - "model": "test-model", + **base_env_values, "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"], - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", "threadpool": "1", "renderer_num_workers": "32", - "num_prompts": "200", - "request_rate": "32", - "dataset": "random", - } - - key1 = _build_cache_key(values) - key2 = _build_cache_key(values) - - # Identical params should produce identical keys - assert key1 == key2 - - def test_cache_key_differentiates_on_request_rate(self) -> None: - """Test that different request_rate values produce different cache keys""" - base_values = { - "model": "test-model", - "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"], - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", - "threadpool": "1", - "renderer_num_workers": "32", - "num_prompts": "200", - "dataset": "random", + **base_benchmark_values, } - # Same everything except request_rate - values_rate_32 = {**base_values, "request_rate": "32"} - values_rate_64 = {**base_values, "request_rate": "64"} - - key_32 = _build_cache_key(values_rate_32) - key_64 = _build_cache_key(values_rate_64) + assert CacheKeyBuilder.build(values) == CacheKeyBuilder.build(values) - # Different request rates should produce different keys - assert key_32 != key_64 - - def test_vllm_0_18_same_cache_key_for_different_renderer_num_workers(self) -> None: - """ - Test that for vLLM 0.18.0, different renderer_num_workers values produce - the SAME cache key (because threadpool is not supported and normalized to 0) - """ - base_values = { - "model": "test-model", + def test_vllm_0_18_same_key_different_renderer_workers( + self, base_env_values, base_benchmark_values + ) -> None: + """Test vLLM 0.18.0 produces same key for different renderer_num_workers.""" + base = { + **base_env_values, "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"], - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", "threadpool": "1", - "num_prompts": "200", - "request_rate": "32", - "dataset": "random", + **base_benchmark_values, } - # Different renderer_num_workers values - values_32 = {**base_values, "renderer_num_workers": "32"} - values_64 = {**base_values, "renderer_num_workers": "64"} + key_32 = CacheKeyBuilder.build({**base, "renderer_num_workers": "32"}) + key_64 = CacheKeyBuilder.build({**base, "renderer_num_workers": "64"}) - key_32 = _build_cache_key(values_32) - key_64 = _build_cache_key(values_64) - - # For vLLM 0.18.0, both should produce the same key - # because renderer_num_workers is normalized to 0 in the environment assert key_32 == key_64 + assert json.loads(key_32)["environment"]["renderer_num_workers"] == 0 - # Verify the environment section has renderer_num_workers=0 - result_dict = json.loads(key_32) - assert result_dict["environment"]["renderer_num_workers"] == 0 - - def test_vllm_0_20_different_cache_key_for_different_renderer_num_workers( - self, + def test_vllm_0_20_different_key_different_renderer_workers( + self, base_env_values, base_benchmark_values ) -> None: - """ - Test that for vLLM 0.20.1, different renderer_num_workers values produce - DIFFERENT cache keys (because threadpool is supported) - """ - base_values = { - "model": "test-model", + """Test vLLM 0.20.1 produces different keys for different renderer_num_workers.""" + base = { + **base_env_values, "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"], - "n_gpus": "1", - "gpu_type": "nvidia-a100", - "n_cpus": "8", - "memory": "32Gi", - "max_batch_tokens": "4096", - "gpu_memory_utilization": "0.9", - "dtype": "auto", - "cpu_offload": "0", - "max_num_seq": "256", "threadpool": "1", - "num_prompts": "200", - "request_rate": "32", - "dataset": "random", + **base_benchmark_values, } - # Different renderer_num_workers values - values_32 = {**base_values, "renderer_num_workers": "32"} - values_64 = {**base_values, "renderer_num_workers": "64"} - - key_32 = _build_cache_key(values_32) - key_64 = _build_cache_key(values_64) + key_32 = CacheKeyBuilder.build({**base, "renderer_num_workers": "32"}) + key_64 = CacheKeyBuilder.build({**base, "renderer_num_workers": "64"}) - # For vLLM 0.20.1, should produce different keys assert key_32 != key_64 - - # Verify the environment sections have different renderer_num_workers - result_dict_32 = json.loads(key_32) - result_dict_64 = json.loads(key_64) - assert result_dict_32["environment"]["renderer_num_workers"] == 32 - assert result_dict_64["environment"]["renderer_num_workers"] == 64 - - -# Made with Bob + assert json.loads(key_32)["environment"]["renderer_num_workers"] == 32 + assert json.loads(key_64)["environment"]["renderer_num_workers"] == 64 diff --git a/plugins/actuators/vllm_performance/tests/test_version_utils.py b/plugins/actuators/vllm_performance/tests/test_version_utils.py new file mode 100644 index 000000000..938acdc4b --- /dev/null +++ b/plugins/actuators/vllm_performance/tests/test_version_utils.py @@ -0,0 +1,59 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Tests for vLLM version utilities.""" + + +from ado_actuators.vllm_performance.version_utils import VLLMVersionChecker + + +class TestVLLMVersionChecker: + """Tests for VLLMVersionChecker class.""" + + def test_parse_version_from_list(self): + """Test version parsing from list format.""" + image_value = ["vllm/vllm-openai:v0.20.1", "0.20.1"] + assert VLLMVersionChecker.parse_version(image_value) == "0.20.1" + + def test_parse_version_from_list_single_element(self): + """Test version parsing from list with single element.""" + image_value = ["vllm/vllm-openai:v0.20.1"] + assert VLLMVersionChecker.parse_version(image_value) is None + + def test_parse_version_from_string(self): + """Test version parsing from string format (backward compatibility).""" + image_value = "vllm/vllm-openai:v0.20.1" + assert VLLMVersionChecker.parse_version(image_value) is None + + def test_supports_threadpool_disabled_by_user(self): + """Test threadpool disabled when user sets threadpool=0.""" + image_value = ["vllm/vllm-openai:v0.20.1", "0.20.1"] + assert not VLLMVersionChecker.supports_threadpool(image_value, 0) + + def test_supports_threadpool_version_supported(self): + """Test threadpool enabled for vLLM >= 0.20.0.""" + image_value = ["vllm/vllm-openai:v0.20.1", "0.20.1"] + assert VLLMVersionChecker.supports_threadpool(image_value, 1) + + def test_supports_threadpool_version_not_supported(self): + """Test threadpool disabled for vLLM < 0.20.0.""" + image_value = ["vllm/vllm-openai:v0.18.0", "0.18.0"] + assert not VLLMVersionChecker.supports_threadpool(image_value, 1) + + def test_supports_threadpool_no_version_info(self): + """Test threadpool enabled when no version info (backward compatible).""" + image_value = "vllm/vllm-openai:v0.20.1" + assert VLLMVersionChecker.supports_threadpool(image_value, 1) + + def test_supports_threadpool_invalid_version(self): + """Test threadpool enabled for invalid version (fail-safe).""" + image_value = ["vllm/vllm-openai:latest", "invalid-version"] + assert VLLMVersionChecker.supports_threadpool(image_value, 1) + + def test_supports_threadpool_edge_version(self): + """Test threadpool enabled at exact minimum version.""" + image_value = ["vllm/vllm-openai:v0.20.0", "0.20.0"] + assert VLLMVersionChecker.supports_threadpool(image_value, 1) + + +# Made with Bob diff --git a/pod_sample.yaml b/pod_sample.yaml new file mode 100644 index 000000000..ef3391982 --- /dev/null +++ b/pod_sample.yaml @@ -0,0 +1,262 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +apiVersion: v1 +kind: Pod +metadata: + annotations: + k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.128.14.116/23"],"mac_address":"0a:58:0a:80:0e:74","gateway_ips":["10.128.14.1"],"routes":[{"dest":"10.128.0.0/14","nextHop":"10.128.14.1"},{"dest":"172.30.0.0/16","nextHop":"10.128.14.1"},{"dest":"169.254.169.5/32","nextHop":"10.128.14.1"},{"dest":"100.64.0.0/16","nextHop":"10.128.14.1"}],"ip_address":"10.128.14.116/23","gateway_ip":"10.128.14.1","role":"primary"}}' + k8s.v1.cni.cncf.io/network-status: |- + [{ + "name": "ovn-kubernetes", + "interface": "eth0", + "ips": [ + "10.128.14.116" + ], + "mac": "0a:58:0a:80:0e:74", + "default": true, + "dns": {} + }] + openshift.io/scc: anyuid + security.openshift.io/validated-scc-subject-type: serviceaccount + creationTimestamp: "2026-06-03T14:46:46Z" + generateName: vllm-prithvi-eo-2-0-300m-tl-se-12a30a40176c40a78df249972a2fe66b-74c45688b4- + generation: 1 + labels: + app.kubernetes.io/instance: vllm-prithvi-eo-2-0-300m-tl-se-12a30a40176c40a78df249972a2fe66b + app.kubernetes.io/name: vllm + pod-template-hash: 74c45688b4 + name: vllm-prithvi-eo-2-0-300m-tl-se-12a30a40176c40a78df249972a2hrzrm + namespace: cp-testing +spec: + containers: + - args: + - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11 + - --max-num-seqs + - "256" + - --max-num-batched-tokens + - "16384" + - --gpu-memory-utilization + - "0.9" + - --cpu-offload-gb + - "0" + - --max-num-seq + - "256" + - --tensor-parallel-size + - "1" + - --dtype + - auto + - --enforce-eager + - --skip-tokenizer-init + - --io-processor-plugin + - terratorch_segmentation + - --enable-mm-embeds + - --otlp-traces-endpoint + - http://jaeger:4317/v1/traces + command: + - vllm + - serve + env: + - name: HF_HUB_OFFLINE + value: "0" + - name: TRANSFORMERS_OFFLINE + value: "0" + - name: HF_TOKEN + - name: OTEL_SERVICE_NAME + value: vllm-prithvi-eo-2-0-300m-tl-se-12a30a40176c40a78df249972a2fe66b + - name: HOME + value: /tmp + - name: HF_HOME + value: /tmp/transformers_cache + - name: VLLM_LOGGING_LEVEL + value: DEBUG + #image: icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5 + image: icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main + imagePullPolicy: IfNotPresent + livenessProbe: + exec: + command: + - sh + - -c + - | + curl -X 'GET' "http://localhost:8000/health" \ + -H 'accept: application/json' + failureThreshold: 100 + periodSeconds: 100 + successThreshold: 1 + timeoutSeconds: 1 + name: vllm + ports: + - containerPort: 8000 + name: http + protocol: TCP + resources: + limits: + cpu: "48" + memory: 128Gi + nvidia.com/gpu: "1" + requests: + cpu: "48" + memory: 128Gi + nvidia.com/gpu: "1" + securityContext: + capabilities: + drop: + - MKNOD + startupProbe: + exec: + command: + - sh + - -c + - | + curl -X 'GET' "http://localhost:8000/health" \ + -H 'accept: application/json' + failureThreshold: 200 + initialDelaySeconds: 20 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 300 + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /dev/shm + name: dshm + - mountPath: /dev/cache + name: vllm-support + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access-n9srb + readOnly: true + dnsPolicy: ClusterFirst + enableServiceLinks: true + imagePullSecrets: + - name: cp-icr-pull-secret + nodeName: adcpu014 + nodeSelector: + nvidia.com/gpu.product: NVIDIA-A100-80GB-PCIe + preemptionPolicy: PreemptLowerPriority + priority: 0 + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + seLinuxOptions: + level: s0:c33,c7 + serviceAccount: default + serviceAccountName: default + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 + - effect: NoSchedule + key: node.kubernetes.io/memory-pressure + operator: Exists + volumes: + - emptyDir: + medium: Memory + name: dshm + - name: vllm-support + persistentVolumeClaim: + claimName: vllm-support-75966dc2efc74e07a740ba76edca0f1c + - name: kube-api-access-n9srb + projected: + defaultMode: 420 + sources: + - serviceAccountToken: + expirationSeconds: 3607 + path: token + - configMap: + items: + - key: ca.crt + path: ca.crt + name: kube-root-ca.crt + - downwardAPI: + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + path: namespace + - configMap: + items: + - key: service-ca.crt + path: service-ca.crt + name: openshift-service-ca.crt +status: + conditions: + - lastProbeTime: null + lastTransitionTime: "2026-06-03T14:46:58Z" + status: "True" + type: PodReadyToStartContainers + - lastProbeTime: null + lastTransitionTime: "2026-06-03T14:46:46Z" + status: "True" + type: Initialized + - lastProbeTime: null + lastTransitionTime: "2026-06-03T14:46:46Z" + message: 'containers with unready status: [vllm]' + reason: ContainersNotReady + status: "False" + type: Ready + - lastProbeTime: null + lastTransitionTime: "2026-06-03T14:46:46Z" + message: 'containers with unready status: [vllm]' + reason: ContainersNotReady + status: "False" + type: ContainersReady + - lastProbeTime: null + lastTransitionTime: "2026-06-03T14:46:46Z" + status: "True" + type: PodScheduled + containerStatuses: + - allocatedResources: + cpu: "48" + memory: 128Gi + nvidia.com/gpu: "1" + containerID: cri-o://19404f25f6a87d0de5d10ec7b0feca8ac9d096333320f589db612c62df10a9c7 + image: icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5 + imageID: icr.io/drl-nextgen/mgazz/vllm@sha256:b2eb5944328836c91dad29bdabc6313d471a4dd827cb332f9ef284409e8364a7 + lastState: {} + name: vllm + ready: false + resources: + limits: + cpu: "48" + memory: 128Gi + nvidia.com/gpu: "1" + requests: + cpu: "48" + memory: 128Gi + nvidia.com/gpu: "1" + restartCount: 0 + started: false + state: + running: + startedAt: "2026-06-03T14:46:57Z" + user: + linux: + gid: 0 + supplementalGroups: + - 0 + uid: 0 + volumeMounts: + - mountPath: /dev/shm + name: dshm + - mountPath: /dev/cache + name: vllm-support + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: kube-api-access-n9srb + readOnly: true + recursiveReadOnly: Disabled + hostIP: 172.16.1.14 + hostIPs: + - ip: 172.16.1.14 + phase: Running + podIP: 10.128.14.116 + podIPs: + - ip: 10.128.14.116 + qosClass: Guaranteed + startTime: "2026-06-03T14:46:46Z" diff --git a/rhaiis_deployment.yaml b/rhaiis_deployment.yaml new file mode 100644 index 000000000..193683bdd --- /dev/null +++ b/rhaiis_deployment.yaml @@ -0,0 +1,81 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vllm-testing + labels: + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm-testing +spec: + selector: + matchLabels: + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm-testing + template: + metadata: + labels: + app.kubernetes.io/name: vllm + app.kubernetes.io/instance: vllm-testing + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - adcpu015 + containers: + - name: vllm + image: "vllm/vllm-openai:v0.13.0" + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 8000 + protocol: TCP + startupProbe: + exec: + command: + - sh + - -c + - | + curl -X 'GET' "http://localhost:8000/health" \ + -H 'accept: application/json' + initialDelaySeconds: 20 + timeoutSeconds: 300 + periodSeconds: 5 + # Allow for up to 20 minutes of startup time + failureThreshold: 200 + livenessProbe: + exec: + command: + - sh + - -c + - | + curl -X 'GET' "http://localhost:8000/health" \ + -H 'accept: application/json' + failureThreshold: 100 + periodSeconds: 100 + resources: + limits: + cpu: "8" + memory: 128Gi + nvidia.com/gpu: "1" + requests: + cpu: "4" + memory: 128Gi + nvidia.com/gpu: "1" + env: + - name: HF_HUB_OFFLINE + value: "0" + - name: TRANSFORMERS_OFFLINE + value: "0" + volumeMounts: + - mountPath: /dev/shm + name: dshm + volumes: + - name: dshm + emptyDir: + medium: Memory diff --git a/~/workspace/plans/ado-threadpool-list-based-image-property.md b/~/workspace/plans/ado-threadpool-list-based-image-property.md new file mode 100644 index 000000000..99af4d263 --- /dev/null +++ b/~/workspace/plans/ado-threadpool-list-based-image-property.md @@ -0,0 +1,201 @@ +# Implementation Plan: Refactor Image Property from Dict to List Format + +## Overview +Refactor the vllm_performance actuator to handle image properties as positional lists `[image_url, vllm_version]` instead of dictionaries `{image: url, vllm_version: version}`. This simplifies the YAML format and removes the need for special handling of dict values in core ado code. + +## Current State Analysis + +### Current Implementation (Dict-based) +```yaml +- identifier: "image" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: + - image: "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5" + vllm_version: "0.18.0" + - image: "icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main" + vllm_version: "0.20.1" +``` + +### Target Implementation (List-based) +```yaml +- identifier: "image" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: + - ["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", "0.18.0"] + - ["icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main", "0.20.1"] +``` + +## Files to Modify + +### 1. Actuator Code (vllm_performance plugin) +**File**: `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` + +**Changes Required**: +- **Function**: `_get_vllm_version_from_image_value(image_value: dict | str) -> str | None` + - Change signature to: `_get_vllm_version_from_image_value(image_value: list | str) -> str | None` + - Update logic to handle list format: `image_value[1]` instead of `image_value.get("vllm_version")` + - Keep backward compatibility for string format + +- **Function**: `_should_enable_threadpool(image_value: dict | str, threadpool_value: int) -> bool` + - Change signature to: `_should_enable_threadpool(image_value: list | str, threadpool_value: int) -> bool` + - Update to work with list-based image values + +- **Function**: `_build_entity_env(values: dict[str, str]) -> str` + - Update lines 166-170 to extract image string from list format + - Change from: `image_value.get("image")` to `image_value[0]` + +**Expected Changes**: +```python +# Before (dict-based) +if isinstance(image_value, dict): + version = image_value.get("vllm_version") + image_str = image_value.get("image") + +# After (list-based) +if isinstance(image_value, list): + version = image_value[1] if len(image_value) > 1 else None + image_str = image_value[0] +``` + +### 2. Core ado Files (REVERT CHANGES) + +#### File: `orchestrator/core/discoveryspace/group_samplers.py` +**Action**: REVERT changes made to support dict values in grouping + +**Lines to Revert**: 53-76 +- Remove the `make_hashable()` function that converts dicts to tuples +- Restore original simple implementation that doesn't handle unhashable types + +**Original Implementation**: +```python +def _build_point_group_values( + point: dict, group: list[str] +) -> frozenset[tuple[str, Any]]: + """ + :return: A frozen set of (key,value) pairs + """ + return frozenset({(k, v) for k, v in point.items() if k in group}) +``` + +#### File: `orchestrator/schema/property_value.py` +**Action**: REVERT temporary changes for dict value handling + +**Lines to Revert**: +- Lines 62, 89, 114, 159 (dict type annotations) +- Lines 158-159 (dict value type detection) +- Lines 27 (dict in ValueTypeEnum comment) + +**Changes**: +- Remove `dict` from union types in value fields +- Remove dict handling in `set_value_type()` method (lines 158-159) +- Remove dict from validation logic + +### 3. Test Files (REMOVE/UPDATE) + +#### File: `tests/core/test_group_samplers.py` +**Action**: REMOVE tests for dict value handling + +**Lines to Remove**: 316-419 +- Remove `test_build_point_group_values_with_unhashable_types()` +- Remove `test_build_groups_dict_with_unhashable_values()` + +These tests were added specifically to validate dict handling in grouping, which is no longer needed. + +#### File: `tests/schema/test_property_value.py` +**Action**: UPDATE to remove dict from test fixtures + +**Lines to Modify**: +- Line 27: Remove dict from `python_type_value_examples` fixture +- Lines 31, 40, 49: Remove dict from parametrize decorators +- Lines 72-75: Remove dict case from `property_value` fixture +- Lines 217-219: Remove dict case from `test_type_detection` + +### 4. Discovery Space YAML (Example Update) + +**File**: `../ops/geo/discoveryspace_geospatial_threadpool_test.yaml` + +**Lines to Update**: 32-38 + +**Before**: +```yaml +values: + - image: "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5" + vllm_version: "0.18.0" + - image: "icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main" + vllm_version: "0.20.1" +``` + +**After**: +```yaml +values: + - ["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", "0.18.0"] + - ["icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main", "0.20.1"] +``` + +## Implementation Steps + +### Phase 1: Update Actuator Code +1. ✅ Modify `_get_vllm_version_from_image_value()` to handle list format +2. ✅ Modify `_should_enable_threadpool()` signature and logic +3. ✅ Modify `_build_entity_env()` to extract image from list +4. ✅ Add validation for list format (length check, type check) +5. ✅ Maintain backward compatibility for string-only format + +### Phase 2: Revert Core Changes +1. ✅ Revert `orchestrator/core/discoveryspace/group_samplers.py` + - Remove `make_hashable()` function + - Restore original `_build_point_group_values()` implementation +2. ✅ Revert `orchestrator/schema/property_value.py` + - Remove dict from type annotations + - Remove dict handling in validators + - Remove dict from ValueTypeEnum documentation + +### Phase 3: Update Tests +1. ✅ Remove dict-specific tests from `tests/core/test_group_samplers.py` +2. ✅ Update `tests/schema/test_property_value.py` to remove dict cases +3. ✅ Run test suite to ensure no regressions + +### Phase 4: Update YAML Files +1. ✅ Update `discoveryspace_geospatial_threadpool_test.yaml` +2. ✅ Update any other YAML files using dict-based image format + +### Phase 5: Validation +1. ✅ Run linting (black, ruff) on modified files +2. ✅ Run pytest on affected test modules +3. ✅ Test the operation command to ensure it works: + ```bash + uv run ado --remote ../ops/geo/remote_execution_context_threadpool_test.yaml \ + create operation -f ../ops/geo/operation_geospatial_threadpool_test.yaml + ``` + +## Benefits of This Approach + +1. **Simpler YAML**: List format is more concise and readable +2. **No Core Changes**: Removes need for special dict handling in core ado +3. **Type Safety**: Lists are hashable and work naturally with frozensets +4. **Backward Compatible**: String-only format still supported +5. **Cleaner Code**: Removes temporary workarounds and special cases + +## Risk Assessment + +**Low Risk**: +- Changes are localized to vllm_performance actuator +- Core ado code is simplified (reverted to original) +- Backward compatibility maintained for string format + +**Testing Strategy**: +- Unit tests for list parsing logic +- Integration test with actual operation execution +- Verify grouping still works correctly with list values + +## Success Criteria + +- [ ] Actuator correctly parses list-based image values +- [ ] vLLM version extraction works from list format +- [ ] Threadpool logic correctly uses version from list +- [ ] Core ado files reverted to original state +- [ ] All tests pass +- [ ] Operation executes successfully with new YAML format +- [ ] No regressions in existing functionality \ No newline at end of file diff --git a/~/workspace/plans/ado-threadpool-refactoring-plan.md b/~/workspace/plans/ado-threadpool-refactoring-plan.md new file mode 100644 index 000000000..2ce162e2f --- /dev/null +++ b/~/workspace/plans/ado-threadpool-refactoring-plan.md @@ -0,0 +1,248 @@ +# Implementation Plan: Code Refactoring for Readability and Conciseness + +## Overview +This plan implements the refactoring recommendations to improve code readability, reduce duplication, and remove excessive comments across the threadpool branch changes. + +## Phase 1: Core Utilities (Foundation) + +### Task 1.1: Create VLLMVersionChecker utility class +**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py` (new file) + +**Actions:** +1. Create new file `version_utils.py` +2. Implement `VLLMVersionChecker` class with: + - `THREADPOOL_MIN_VERSION = "0.20.0"` constant + - `parse_version()` static method + - `supports_threadpool()` class method +3. Add minimal docstrings (one-line per method) +4. Add unit tests in `tests/test_version_utils.py` + +**Dependencies:** None +**Estimated effort:** 30 minutes + +--- + +### Task 1.2: Create CachedMeasurement dataclass +**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py` + +**Actions:** +1. Add `from dataclasses import dataclass` import +2. Define `CachedMeasurement` dataclass at top of file +3. Update `EnvironmentManager.__init__()` type hint for `measurement_cache` +4. Simplify `get_cached_measurement()` - remove verbose docstring +5. Simplify `cache_measurement()` - remove verbose docstring, keep debug log +6. Update any code that accesses cache dict structure to use dataclass attributes + +**Dependencies:** None +**Estimated effort:** 20 minutes + +--- + +## Phase 2: Cache Key Refactoring (Core Logic) + +### Task 2.1: Create unified CacheKeyBuilder class +**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py` (new file) + +**Actions:** +1. Create new file `cache_utils.py` +2. Implement `CacheKeyBuilder` class with: + - `ALL_PARAMS` class variable (single list of all parameters) + - `build()` class method + - Brief docstring referencing YAML files +3. Import `_normalize_threadpool_properties` or move it to this file +4. Add unit tests in `tests/test_cache_utils.py` + +**Dependencies:** Task 1.1 (for `_normalize_threadpool_properties`) +**Estimated effort:** 45 minutes + +--- + +### Task 2.2: Refactor _normalize_threadpool_properties +**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` + +**Actions:** +1. Import `VLLMVersionChecker` from `version_utils` +2. Replace `_should_enable_threadpool()` and `_get_vllm_version_from_image_value()` with `VLLMVersionChecker.supports_threadpool()` +3. Simplify `_normalize_threadpool_properties()` to use new utility +4. Remove all debug logging statements +5. Remove verbose comments +6. Keep only brief docstring + +**Dependencies:** Task 1.1 +**Estimated effort:** 30 minutes + +--- + +### Task 2.3: Replace cache key functions with CacheKeyBuilder +**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` + +**Actions:** +1. Import `CacheKeyBuilder` from `cache_utils` +2. Delete `_build_entity_env()` function (62 lines) +3. Delete `_build_benchmark_params_key()` function (23 lines) +4. Delete `_build_cache_key()` function +5. Replace all calls to these functions with `CacheKeyBuilder.build(values)` +6. Search for any other references to deleted functions + +**Dependencies:** Task 2.1, Task 2.2 +**Estimated effort:** 30 minutes + +--- + +## Phase 3: Simplify group_samplers.py + +### Task 3.1: Refactor make_hashable with pattern matching +**File:** `orchestrator/core/discoveryspace/group_samplers.py` + +**Actions:** +1. Replace `if/elif` chain with `match/case` statement +2. Remove inline comments +3. Keep docstring +4. Verify existing tests still pass + +**Dependencies:** None +**Estimated effort:** 15 minutes + +--- + +## Phase 4: Test Refactoring + +### Task 4.1: Create pytest fixtures for test_experiment_executor.py +**File:** `plugins/actuators/vllm_performance/tests/test_experiment_executor.py` + +**Actions:** +1. Create `base_vllm_values` fixture +2. Identify test methods that can be parametrized +3. Create parametrized test for threadpool normalization +4. Create parametrized test for cache key generation +5. Remove redundant test methods +6. Simplify docstrings (one-line descriptions) +7. Update assertions to use `CacheKeyBuilder.build()` + +**Dependencies:** Task 2.3 +**Estimated effort:** 60 minutes + +--- + +### Task 4.2: Add tests for new utility classes +**File:** `plugins/actuators/vllm_performance/tests/test_version_utils.py` (new) +**File:** `plugins/actuators/vllm_performance/tests/test_cache_utils.py` (new) + +**Actions:** +1. Create test file for `VLLMVersionChecker`: + - Test version parsing from list + - Test version parsing from string + - Test threadpool support detection + - Test edge cases (None, invalid versions) +2. Create test file for `CacheKeyBuilder`: + - Test cache key generation + - Test parameter extraction + - Test normalization integration + - Test key consistency + +**Dependencies:** Task 1.1, Task 2.1 +**Estimated effort:** 45 minutes + +--- + +## Phase 5: Comment Cleanup + +### Task 5.1: Remove excessive comments from experiment_executor.py +**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` + +**Actions:** +1. Remove debug logging statements (20+ lines) +2. Simplify function docstrings to one-line descriptions +3. Remove inline comments that restate code +4. Keep only non-obvious comments (e.g., version thresholds, backward compatibility notes) + +**Dependencies:** Task 2.2, Task 2.3 +**Estimated effort:** 20 minutes + +--- + +### Task 5.2: Remove excessive comments from test files +**File:** `plugins/actuators/vllm_performance/tests/test_experiment_executor.py` + +**Actions:** +1. Simplify test docstrings to brief descriptions +2. Remove comments that just restate test names +3. Keep only comments explaining non-obvious test logic + +**Dependencies:** Task 4.1 +**Estimated effort:** 15 minutes + +--- + +## Phase 6: Integration and Validation + +### Task 6.1: Run full test suite +**Actions:** +1. Run linting: `uv run black plugins/actuators/vllm_performance/` +2. Run linting: `uv run ruff check --fix plugins/actuators/vllm_performance/` +3. Run linting: `uv run black orchestrator/core/discoveryspace/` +4. Run linting: `uv run ruff check --fix orchestrator/core/discoveryspace/` +5. Run tests: `uv run pytest -n auto plugins/actuators/vllm_performance/tests/` +6. Run tests: `uv run pytest -n auto tests/core/test_group_samplers.py` +7. Fix any failures + +**Dependencies:** All previous tasks +**Estimated effort:** 30 minutes + +--- + +### Task 6.2: Integration testing +**Actions:** +1. Test with actual YAML files from `plugins/actuators/vllm_performance/yamls/` +2. Verify cache key generation produces expected results +3. Verify threadpool normalization works correctly +4. Test with both vLLM 0.18.0 and 0.20.1+ images +5. Verify backward compatibility with string image values + +**Dependencies:** Task 6.1 +**Estimated effort:** 30 minutes + +--- + +## Phase 7: Documentation + +### Task 7.1: Update inline documentation +**Actions:** +1. Add brief module docstrings to new files (`version_utils.py`, `cache_utils.py`) +2. Update any affected documentation in `plugins/actuators/vllm_performance/README.md` +3. Ensure YAML files are referenced correctly in code comments + +**Dependencies:** Task 6.2 +**Estimated effort:** 20 minutes + +--- + +## Summary + +**Total estimated effort:** ~6 hours + +**Files to create:** +- `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py` +- `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py` +- `plugins/actuators/vllm_performance/tests/test_version_utils.py` +- `plugins/actuators/vllm_performance/tests/test_cache_utils.py` + +**Files to modify:** +- `orchestrator/core/discoveryspace/group_samplers.py` +- `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py` +- `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` +- `plugins/actuators/vllm_performance/tests/test_experiment_executor.py` + +**Expected outcomes:** +- ~200+ lines of code/comments removed +- 4 new utility classes with clear responsibilities +- 40% reduction in test code through parametrization +- Improved maintainability and readability +- All existing functionality preserved +- All tests passing + +**Risk mitigation:** +- Each phase can be completed and tested independently +- Existing tests validate behavior is preserved +- New tests validate new utilities work correctly +- Integration testing catches any edge cases \ No newline at end of file diff --git a/~/workspace/plans/ado-threadpool-threadpool-property-implementation.md b/~/workspace/plans/ado-threadpool-threadpool-property-implementation.md new file mode 100644 index 000000000..1f06cd362 --- /dev/null +++ b/~/workspace/plans/ado-threadpool-threadpool-property-implementation.md @@ -0,0 +1,296 @@ +# Implementation Plan: Add Threadpool Experiment Property to vLLM Performance Actuator + +**Project:** ado-threadpool +**Branch:** feature/threadpool-property +**Issue:** [#988](https://github.com/IBM/ado/issues/988) - Option 1: Dictionary in Metadata +**Date:** 2026-06-02 + +## Overview + +Add a `threadpool` experiment property to the vLLM performance actuator that enables/disables threadpool functionality. When enabled, it passes `--renderer-num-workers` and `--mm-processor-cache-gb 0` arguments to vLLM deployments. + +## Requirements + +### Functional Requirements + +1. Add `threadpool` property to all geospatial experiments + - Domain: Categorical with values `[0, 1]` (0=disabled, 1=enabled) + - Default: 1 (enabled) + - Metadata: Clear description of threadpool functionality + +2. Add `renderer_num_workers` property to all geospatial experiments + - Domain: Discrete integer, range `[1, 128]` + - Default: 32 + - Only used when `threadpool=1` + - Metadata: Description of worker count purpose + +3. Modify vLLM deployment creation to conditionally add arguments: + - When `threadpool=1`: Add `--renderer-num-workers ` and `--mm-processor-cache-gb 0` + - When `threadpool=0`: Do not add these arguments + +### Non-Functional Requirements + +- Backward compatibility: Existing experiments without these properties should continue to work +- Clear documentation of the new properties +- Proper validation and error handling + +## Technical Design + +### Files to Modify + +1. **Experiment YAML Files** (2 files) + - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml` + - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml` (if it has geospatial experiments) + +2. **Deployment Builder** (1 file) + - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/yaml_support/build_components.py` + - Modify `deployment_yaml()` method to accept threadpool parameters + - Update `vllm_serve_args` construction logic + +3. **Environment Creation** (1 file) + - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/create_environment.py` + - Modify `create_test_environment()` to accept and pass threadpool parameters + +4. **Experiment Executor** (1 file) + - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` + - Modify `_create_environment()` to extract and pass threadpool values from entity properties + +### Implementation Steps + +#### Step 1: Update Experiment YAML Files + +For each geospatial experiment in `performance_testing_geospatial.yaml`: +- `performance_testing-geospatial-endpoint` +- `performance_testing-geospatial-full` +- `performance_testing-geospatial-full-custom-dataset` +- `performance_testing-geospatial-endpoint-custom-dataset` +- `performance_testing-geospatial-endpoint-guidellm` +- `performance_testing-geospatial-full-guidellm` +- `performance_testing-geospatial-guidellm-deployment-custom-dataset` +- `performance_testing-geospatial-guidellm-endpoint-custom-dataset` + +Add to `optionalProperties`: +```yaml +- identifier: 'threadpool' + metadata: + description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering." + propertyDomain: + variableType: 'CATEGORICAL_VARIABLE_TYPE' + values: [0, 1] +- identifier: 'renderer_num_workers' + metadata: + description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 128] + interval: 1 +``` + +Add to `defaultParameterization`: +```yaml +- property: + identifier: 'threadpool' + value: 1 +- property: + identifier: 'renderer_num_workers' + value: 32 +``` + +#### Step 2: Update build_components.py + +Modify `ComponentsYaml.deployment_yaml()`: + +1. Add parameters: +```python +def deployment_yaml( + # ... existing parameters ... + threadpool: int = 1, + renderer_num_workers: int = 32, +) -> dict[str, Any]: +``` + +2. Update docstring to document new parameters + +3. Modify `vllm_serve_args` construction (around line 173-195): +```python +vllm_serve_args = [ + model, + "--max-num-seqs", + str(max_num_seq), + "--max-num-batched-tokens", + str(max_batch_tokens), + "--dtype", + dtype.value, +] + +# Add threadpool arguments if enabled +if threadpool == 1: + vllm_serve_args.extend([ + "--renderer-num-workers", + str(renderer_num_workers), + "--mm-processor-cache-gb", + "0", + ]) + +if enforce_eager: + vllm_serve_args.append("--enforce-eager") +# ... rest of the logic +``` + +#### Step 3: Update create_environment.py + +Modify `create_test_environment()`: + +1. Add parameters: +```python +def create_test_environment( + # ... existing parameters ... + threadpool: int = 1, + renderer_num_workers: int = 32, +) -> None: +``` + +2. Update docstring + +3. Pass to `c_manager.create_deployment()`: +```python +c_manager.create_deployment( + # ... existing parameters ... + threadpool=threadpool, + renderer_num_workers=renderer_num_workers, +) +``` + +#### Step 4: Update manage_components.py + +Modify `ComponentsManager.create_deployment()`: + +1. Add parameters to method signature +2. Pass to `ComponentsYaml.deployment_yaml()` + +#### Step 5: Update experiment_executor.py + +Modify `_build_entity_env()` to include threadpool parameters: +```python +env_values = { + # ... existing values ... + "threadpool": values.get("threadpool", 1), + "renderer_num_workers": values.get("renderer_num_workers", 32), +} +``` + +Modify `_create_environment()` to extract and pass values: +```python +create_test_environment( + # ... existing parameters ... + threadpool=int(values.get("threadpool", 1)), + renderer_num_workers=int(values.get("renderer_num_workers", 32)), +) +``` + +### Testing Strategy + +#### Unit Tests + +1. **Test YAML Validation** + - Validate experiment YAML with new properties + - Test with `ado create discoveryspace -f --dry-run` + +2. **Test Property Extraction** + - Verify `_build_entity_env()` correctly extracts threadpool values + - Test with both enabled and disabled states + +3. **Test Argument Construction** + - Verify `deployment_yaml()` correctly builds vllm_serve_args + - Test threadpool=0: arguments should NOT be present + - Test threadpool=1: arguments should be present with correct values + +#### Integration Tests + +1. **End-to-End Test with Threadpool Enabled (Default)** + - Create discoveryspace without specifying threadpool + - Run operation + - Verify deployment includes `--renderer-num-workers 32 --mm-processor-cache-gb 0` + +2. **End-to-End Test with Custom Threadpool Settings** + - Create discoveryspace with threadpool=1, renderer_num_workers=16 + - Run operation + - Verify deployment includes `--renderer-num-workers 16 --mm-processor-cache-gb 0` + +3. **End-to-End Test with Threadpool Disabled** + - Create discoveryspace with threadpool=0 + - Run operation + - Verify deployment does not include threadpool arguments + +### Example YAML Usage + +```yaml +# Example discoveryspace with threadpool enabled +apiVersion: ado.org/v1 +kind: DiscoverySpace +metadata: + name: vllm-threadpool-test +spec: + entitySpace: + - model: "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" + endpoint: "http://localhost:8000" + request_rate: 10 + threadpool: 1 # Optional: defaults to 1 (enabled) + renderer_num_workers: 64 # Optional: defaults to 32 + experiments: + - identifier: test-geospatial-endpoint-v1 +``` + +### Validation Checklist + +- [ ] All 8 geospatial experiments updated with new properties +- [ ] Default values set correctly in all experiments +- [ ] `build_components.py` modified to handle threadpool parameters +- [ ] `create_environment.py` modified to pass threadpool parameters +- [ ] `manage_components.py` modified to accept and forward parameters +- [ ] `experiment_executor.py` modified to extract and pass values +- [ ] YAML validation passes with `--dry-run` +- [ ] Unit tests written and passing +- [ ] Integration tests written and passing +- [ ] Documentation updated (if applicable) +- [ ] Backward compatibility verified + +### Risks and Mitigations + +| Risk | Impact | Mitigation | +|------|--------|------------| +| Breaking existing experiments | Medium | Use optional properties; threadpool enabled by default may change behavior but should improve performance | +| Invalid renderer_num_workers values | Medium | Use discrete domain with reasonable range [1, 128] | +| Threadpool not supported in all vLLM versions | Medium | Document minimum vLLM version requirement | +| Performance regression if threadpool causes issues | Medium | Users can disable by setting threadpool=0 | + +### Dependencies + +- vLLM version must support `--renderer-num-workers` and `--mm-processor-cache-gb` arguments +- No changes to ado core required +- No changes to other actuators required + +### Rollout Plan + +1. Implement changes in feature branch +2. Run full test suite +3. Test with sample discoveryspace on development cluster +4. Create PR with detailed description +5. Code review +6. Merge to main +7. Update documentation with examples + +### Success Criteria + +- [ ] All tests pass +- [ ] Existing experiments continue to work without modification +- [ ] New properties can be used to enable threadpool functionality +- [ ] vLLM deployments correctly include/exclude threadpool arguments based on property values +- [ ] Code review approved +- [ ] Documentation updated + +## Notes + +- The commented-out lines in `build_components.py` (lines 175-178) suggest this feature was previously considered but not fully implemented +- This implementation follows the pattern established for other optional vLLM arguments like `enforce_eager` and `skip_tokenizer_init` +- The property uses integer values (0/1) instead of boolean to maintain consistency with other categorical properties in the actuator \ No newline at end of file diff --git a/~/workspace/plans/ado-threadpool-vllm-cache-implementation-summary.md b/~/workspace/plans/ado-threadpool-vllm-cache-implementation-summary.md new file mode 100644 index 000000000..b8978419e --- /dev/null +++ b/~/workspace/plans/ado-threadpool-vllm-cache-implementation-summary.md @@ -0,0 +1,141 @@ +# vLLM Performance Actuator Cache Improvement - Implementation Summary + +## Overview + +Successfully implemented a fix for the vLLM performance actuator's measurement cache to include both environment and benchmark parameters in the cache key, preventing incorrect measurement reuse. + +## Problem Fixed + +**Before**: Cache only considered environment parameters (model, GPUs, memory, etc.), causing measurements to be incorrectly reused when benchmark parameters (num_prompts, request_rate, dataset, etc.) differed. + +**After**: Cache now uses a composite key that includes both environment AND benchmark parameters, ensuring measurements are only reused when both match. + +## Implementation Details + +### 1. New Helper Functions Added + +#### `_build_benchmark_params_key(values: dict[str, str]) -> str` +- Location: `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` (lines 214-233) +- Purpose: Extracts and serializes benchmark parameters that affect measurement results +- Parameters included: + - `num_prompts` - number of prompts to test + - `request_rate` - rate of requests + - `max_concurrency` - maximum concurrent requests + - `number_input_tokens` - input token count + - `max_output_tokens` - output token count + - `burstiness` - burstiness factor + - `dataset` - dataset used for testing +- Returns: JSON string with sorted keys for consistency + +#### `_build_cache_key(values: dict[str, str]) -> str` +- Location: `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` (lines 236-257) +- Purpose: Creates composite cache key combining environment and benchmark parameters +- Structure: + ```json + { + "environment": { /* environment params from _build_entity_env */ }, + "benchmark": { /* benchmark params from _build_benchmark_params_key */ } + } + ``` +- Returns: JSON string with sorted keys + +### 2. Cache Logic Updated + +Modified `run_resource_and_workload_experiment()` function: +- Line 600: Changed from `_build_entity_env(values)` to `_build_cache_key(values)` +- Line 601: Updated log message to reflect composite key +- Line 603: Updated cache lookup to use new key +- Line 605-606: Updated log message for cache hits +- Line 761: Updated cache storage to use new key + +### 3. Comprehensive Test Suite + +Added 9 new test cases in `plugins/actuators/vllm_performance/tests/test_experiment_executor.py`: + +#### TestBuildBenchmarkParamsKey (3 tests) +- `test_includes_all_benchmark_parameters`: Verifies all benchmark params are included +- `test_handles_missing_values`: Ensures missing values are handled as None +- `test_consistent_output_with_sorted_keys`: Confirms consistent JSON output + +#### TestBuildCacheKey (6 tests) +- `test_combines_environment_and_benchmark_params`: Verifies composite structure +- `test_different_benchmark_params_produce_different_keys`: Ensures differentiation +- `test_same_params_produce_same_key`: Confirms consistency +- `test_cache_key_differentiates_on_request_rate`: Tests specific parameter differentiation +- `test_vllm_0_18_same_cache_key_for_different_renderer_num_workers`: **Critical test** - Verifies that for vLLM 0.18.0, different `renderer_num_workers` values produce the SAME cache key (because threadpool is not supported and normalized to 0) +- `test_vllm_0_20_different_cache_key_for_different_renderer_num_workers`: Verifies that for vLLM 0.20.1+, different `renderer_num_workers` values produce DIFFERENT cache keys + +## Test Results + +All 19 tests pass successfully: +``` +============================= test session starts ============================== +collected 19 items + +tests/test_experiment_executor.py::TestGetVllmVersionFromImageValue::... PASSED +tests/test_experiment_executor.py::TestBuildEntityEnv::... PASSED +tests/test_experiment_executor.py::TestBuildBenchmarkParamsKey::... PASSED +tests/test_experiment_executor.py::TestBuildCacheKey::... PASSED + +============================== 19 passed in 0.68s ============================== +``` + +Code quality checks: +- ✅ Black formatting: Passed +- ✅ Ruff linting: All checks passed + +## Key Behavior Verified + +### For vLLM 0.18.0 (threadpool not supported) +When using the test discoveryspace `../ops/geo/discoveryspace_geospatial_threadpool_test.yaml`: +- Image: `["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", "0.18.0"]` +- `renderer_num_workers` values: 32, 64 +- **Result**: Both produce the SAME cache key because: + 1. vLLM 0.18.0 < 0.20.0 (minimum version for threadpool) + 2. Both `threadpool` and `renderer_num_workers` are normalized to 0 in environment + 3. Same environment + same benchmark params = cache hit ✅ + +### For vLLM 0.20.1+ (threadpool supported) +- Image: `["icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main", "0.20.1"]` +- `renderer_num_workers` values: 32, 64 +- **Result**: Produce DIFFERENT cache keys because: + 1. vLLM 0.20.1 >= 0.20.0 (threadpool supported) + 2. `renderer_num_workers` values are preserved (32 vs 64) + 3. Different environment = no cache hit ✅ + +## Files Modified + +1. **plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py** + - Added `_build_benchmark_params_key()` function + - Added `_build_cache_key()` function + - Updated cache logic in `run_resource_and_workload_experiment()` + +2. **plugins/actuators/vllm_performance/tests/test_experiment_executor.py** + - Added imports for new functions + - Added `TestBuildBenchmarkParamsKey` test class (3 tests) + - Added `TestBuildCacheKey` test class (6 tests) + +## Impact + +- **Bug Fix**: Prevents incorrect measurement reuse +- **No Breaking Changes**: Cache is local to each operation execution +- **Performance**: No performance regression +- **Correctness**: Ensures measurements are only reused when truly identical + +## Validation + +The implementation correctly handles the user's specific test case: +- For vLLM 0.18.0 with different `renderer_num_workers` values (32, 64) +- Cache hits occur as expected because threadpool is not supported +- Both entities share the same normalized environment (threadpool=0, renderer_num_workers=0) +- Same benchmark parameters (num_prompts=200, request_rate=32/64, dataset, etc.) +- Result: Measurements are correctly reused for entities with same effective configuration + +## Next Steps + +The implementation is complete and ready for use. The cache now correctly: +1. ✅ Differentiates based on benchmark parameters +2. ✅ Reuses measurements only when appropriate +3. ✅ Handles vLLM version-specific behavior (threadpool support) +4. ✅ Maintains backward compatibility +5. ✅ Passes all tests with proper code quality \ No newline at end of file diff --git a/~/workspace/plans/ado-threadpool-vllm-cache-improvement.md b/~/workspace/plans/ado-threadpool-vllm-cache-improvement.md new file mode 100644 index 000000000..bd8ccdc60 --- /dev/null +++ b/~/workspace/plans/ado-threadpool-vllm-cache-improvement.md @@ -0,0 +1,275 @@ +# Plan: Improve vLLM Performance Actuator Measurement Cache + +## Problem Statement + +The current measurement cache in the vLLM performance actuator (lines 541, 554-571 in `experiment_executor.py`) only considers the entity environment definition when determining cache hits. This is insufficient because: + +1. **Current cache key**: Only includes environment parameters (model, image, GPUs, memory, etc.) via `_build_entity_env()` +2. **Missing from cache key**: Benchmark/workload parameters that affect measurements: + - `num_prompts` - number of prompts to test + - `request_rate` - rate of requests + - `max_concurrency` - maximum concurrent requests + - `number_input_tokens` - input token count + - `max_output_tokens` - output token count + - `burstiness` - burstiness factor + - `dataset` - dataset used for testing + +3. **Impact**: Entities with identical environments but different benchmark parameters incorrectly reuse cached measurements, producing invalid results. + +## Current Implementation Analysis + +### Cache Location +- **File**: `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` +- **Function**: `run_resource_and_workload_experiment` (lines 504-738) +- **Cache variable**: `measurement_cache: dict[str, dict]` (line 541) + +### Current Cache Key Generation +```python +definition_for_cache = _build_entity_env(values) # Line 553 +``` + +The `_build_entity_env()` function (lines 150-211) creates a JSON string containing only environment parameters: +- model, image, n_gpus, gpu_type, n_cpus, memory +- max_batch_tokens, gpu_memory_utilization, dtype, cpu_offload +- max_num_seq, threadpool, renderer_num_workers + +### Cache Usage Pattern +1. Check if `definition_for_cache` exists in cache (line 556) +2. If hit: reuse cached measurements (lines 557-571) +3. If miss: execute benchmark and cache result (lines 713-716) + +## Solution Design + +### New Cache Key Strategy + +Create a composite cache key that includes both: +1. **Environment definition** (existing `_build_entity_env()` output) +2. **Benchmark parameters** (new component) + +### Implementation Approach + +#### 1. Create New Function: `_build_benchmark_params_key()` + +```python +def _build_benchmark_params_key(values: dict[str, str]) -> str: + """ + Build a cache key from benchmark parameters that affect measurement results. + + These parameters define the workload characteristics and must be included + in the cache key to ensure measurements are only reused for identical tests. + + :param values: experiment values + :return: JSON string of benchmark parameters + """ + benchmark_params = { + "num_prompts": values.get("num_prompts"), + "request_rate": values.get("request_rate"), + "max_concurrency": values.get("max_concurrency"), + "number_input_tokens": values.get("number_input_tokens"), + "max_output_tokens": values.get("max_output_tokens"), + "burstiness": values.get("burstiness"), + "dataset": values.get("dataset"), + } + return json.dumps(benchmark_params, sort_keys=True) +``` + +**Rationale**: +- These parameters directly affect benchmark execution and results +- They correspond to fields in `BenchmarkParameters` model +- Sorting keys ensures consistent JSON output + +#### 2. Create Composite Cache Key Function + +```python +def _build_cache_key(values: dict[str, str]) -> str: + """ + Build a composite cache key from both environment and benchmark parameters. + + Cache hits should only occur when both the deployment environment AND + the benchmark workload parameters are identical. + + :param values: experiment values + :return: composite cache key + """ + env_key = _build_entity_env(values) + benchmark_key = _build_benchmark_params_key(values) + + # Combine both keys into a single cache key + composite = { + "environment": json.loads(env_key), + "benchmark": json.loads(benchmark_key) + } + return json.dumps(composite, sort_keys=True) +``` + +**Rationale**: +- Separates concerns: environment vs. workload +- Makes cache key structure explicit +- Maintains backward compatibility with environment definition + +#### 3. Update Cache Usage in `run_resource_and_workload_experiment()` + +**Current code (lines 553-571)**: +```python +definition_for_cache = _build_entity_env(values) +logger.info("definition_for_cache: %s", definition_for_cache) + +if definition_for_cache in measurement_cache: + # ... reuse cached result +``` + +**Updated code**: +```python +cache_key = _build_cache_key(values) +logger.info("cache_key: %s", cache_key) + +if cache_key in measurement_cache: + logger.info( + f"Reusing cached measurement for entity {entity.identifier} " + f"(identical environment and benchmark parameters)" + ) + # ... reuse cached result (same logic) +``` + +**Changes**: +- Replace `definition_for_cache` with `cache_key` +- Update log message to reflect both environment and benchmark matching +- Update cache storage (line 713) to use `cache_key` + +## Implementation Steps + +### Step 1: Add Helper Functions +- Location: After `_build_entity_env()` function (after line 211) +- Add `_build_benchmark_params_key()` +- Add `_build_cache_key()` +- Include comprehensive docstrings + +### Step 2: Update Cache Logic +- In `run_resource_and_workload_experiment()`: + - Line 553: Replace `_build_entity_env()` with `_build_cache_key()` + - Line 554: Update log message + - Line 556: Update condition check + - Line 557-559: Update log message + - Line 713: Update cache storage key + +### Step 3: Add Tests +- Location: `plugins/actuators/vllm_performance/tests/test_experiment_executor.py` +- Test `_build_benchmark_params_key()`: + - Verify all benchmark parameters included + - Verify consistent JSON output (sorted keys) + - Test with missing/None values +- Test `_build_cache_key()`: + - Verify composite structure + - Verify different benchmark params → different keys + - Verify same params → same keys +- Integration test for cache behavior: + - Same environment + same benchmark → cache hit + - Same environment + different benchmark → cache miss + - Different environment + same benchmark → cache miss + +### Step 4: Update Documentation +- Add comments explaining cache key composition +- Document why benchmark parameters must be in cache key +- Update any relevant README or design docs + +## Testing Strategy + +### Unit Tests + +1. **Test `_build_benchmark_params_key()`**: + ```python + def test_build_benchmark_params_key(): + values = { + "num_prompts": 100, + "request_rate": 10, + "max_concurrency": 5, + "dataset": "random" + } + key = _build_benchmark_params_key(values) + assert "num_prompts" in key + assert "100" in key + # Verify consistent output + key2 = _build_benchmark_params_key(values) + assert key == key2 + ``` + +2. **Test `_build_cache_key()` differentiation**: + ```python + def test_cache_key_differentiates_benchmark_params(): + base_values = { + "model": "test-model", + "image": "test-image", + "n_gpus": 1, + "num_prompts": 100, + } + + key1 = _build_cache_key(base_values) + + # Change benchmark param + modified_values = base_values.copy() + modified_values["num_prompts"] = 200 + key2 = _build_cache_key(modified_values) + + assert key1 != key2 # Different benchmark params → different keys + ``` + +3. **Test cache hit/miss behavior**: + - Mock the cache and verify correct reuse + - Verify measurements not reused when benchmark params differ + +### Integration Tests + +1. Create test scenario with: + - 2 entities with identical environment + - Different `num_prompts` values + - Verify both entities execute (no cache hit) + +2. Create test scenario with: + - 2 entities with identical environment AND benchmark params + - Verify second entity reuses cache (cache hit) + +## Backward Compatibility + +**Impact**: None - this is a bug fix, not a breaking change + +- Cache is local to each operation execution (not persisted) +- No external APIs affected +- Existing operations will simply have more accurate caching + +## Edge Cases to Consider + +1. **None/missing values**: Ensure consistent handling in JSON serialization +2. **Default values**: Consider if defaults should be explicit in cache key +3. **Floating point precision**: `burstiness` is float - ensure consistent serialization +4. **Dataset paths**: If dataset is a path, ensure normalization + +## Success Criteria + +1. ✅ Cache key includes all benchmark parameters +2. ✅ Different benchmark params → different cache keys +3. ✅ Same environment + same benchmark → cache hit +4. ✅ Same environment + different benchmark → cache miss +5. ✅ All tests pass +6. ✅ No performance regression +7. ✅ Clear logging of cache hits/misses + +## Files to Modify + +1. **Primary**: + - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` + +2. **Tests**: + - `plugins/actuators/vllm_performance/tests/test_experiment_executor.py` + +## Estimated Effort + +- Implementation: 2-3 hours +- Testing: 2-3 hours +- Review and refinement: 1 hour +- **Total**: 5-7 hours + +## Notes + +- The cache is only used in `run_resource_and_workload_experiment()`, not in `run_workload_experiment()` (which tests existing endpoints) +- This fix prevents incorrect measurement reuse that could lead to invalid experimental results +- The fix is localized to the caching logic and doesn't affect environment creation or benchmark execution \ No newline at end of file