From b59e677e58ba3e30110621c2a1fa60647abe0708 Mon Sep 17 00:00:00 2001
From: Michele Gazzetti <michele.gazzetti1@ibm.com>
Date: Wed, 3 Jun 2026 11:08:01 +0100
Subject: [PATCH 1/7] feat(vllm_performance): enable vllm version reference

Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com>
---
 .../vllm_performance/experiment_executor.py   | 104 +++++++++++-
 .../performance_testing_geospatial.yaml       | 160 ++++++++++++++++++
 .../k8s/create_environment.py                 |   6 +
 .../vllm_performance/k8s/manage_components.py |   6 +
 .../k8s/yaml_support/build_components.py      |  17 ++
 .../yamls/test_geospatial_threadpool.yaml     |  29 ++++
 6 files changed, 320 insertions(+), 2 deletions(-)
 create mode 100644 plugins/actuators/vllm_performance/yamls/test_geospatial_threadpool.yaml

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index b9d294c70..4086bfc1e 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -39,6 +39,7 @@
     execute_guidellm_benchmark,
     execute_guidellm_geospatial_benchmark,
 )
+from packaging import version
 from ray.actor import ActorHandle
 
 from orchestrator.modules.actuators.measurement_queue import MeasurementQueue
@@ -53,6 +54,86 @@
 logger = logging.getLogger(__name__)
 
 
+def _get_vllm_version_from_metadata(
+    experiment: Experiment, image_name: str
+) -> str | None:
+    """
+    Extract vLLM version from experiment metadata for a given image.
+
+    Args:
+        experiment: The experiment object containing metadata
+        image_name: The image name to look up version for
+
+    Returns:
+        Version string if found in metadata, None otherwise
+    """
+    # Look for image property in experiment's optional or required properties
+    for prop in experiment.optionalProperties + experiment.requiredProperties:
+        if prop.identifier == "image" and prop.metadata:
+            vllm_version_map = prop.metadata.get("vllm_version", {})
+            if isinstance(vllm_version_map, dict):
+                return vllm_version_map.get(image_name)
+    return None
+
+
+def _should_enable_threadpool(
+    experiment: Experiment, image_name: str, threadpool_value: int
+) -> bool:
+    """
+    Determine if threadpool should be enabled based on vLLM version and user preference.
+
+    Threadpool is only supported in vLLM >= 0.20.0. This function checks:
+    1. If user explicitly disabled threadpool (threadpool=0), return False
+    2. If vLLM version metadata exists and version < 0.20.0, return False
+    3. Otherwise, return True (user wants it and version supports it or no version info)
+
+    Args:
+        experiment: The experiment object containing metadata
+        image_name: The image name to check version for
+        threadpool_value: User's threadpool preference (0 or 1)
+
+    Returns:
+        True if threadpool should be enabled, False otherwise
+    """
+    # If user explicitly disabled, respect that
+    if threadpool_value == 0:
+        return False
+
+    # Get version from metadata
+    vllm_version_str = _get_vllm_version_from_metadata(experiment, image_name)
+
+    # If no version metadata, assume it's supported (backward compatible)
+    if vllm_version_str is None:
+        logger.warning(
+            f"No vLLM version metadata found for image {image_name}. "
+            "Assuming threadpool is supported."
+        )
+        return True
+
+    # Parse and compare version
+    try:
+        vllm_ver = version.parse(vllm_version_str)
+        min_version = version.parse("0.20.0")
+
+        if vllm_ver < min_version:
+            logger.info(
+                f"Threadpool disabled: vLLM version {vllm_version_str} < 0.20.0 "
+                f"for image {image_name}"
+            )
+            return False
+
+        logger.info(
+            f"Threadpool enabled: vLLM version {vllm_version_str} >= 0.20.0 "
+            f"for image {image_name}"
+        )
+        return True
+    except Exception as e:
+        logger.error(
+            f"Failed to parse vLLM version '{vllm_version_str}' for image {image_name}: {e}. "
+        )
+        return True
+
+
 def _build_entity_env(values: dict[str, str]) -> str:
     """
     This is the list of entity parameters that define the environment:
@@ -83,6 +164,8 @@ def _build_entity_env(values: dict[str, str]) -> str:
         "dtype": values.get("dtype"),
         "cpu_offload": values.get("cpu_offload"),
         "max_num_seq": values.get("max_num_seq"),
+        "threadpool": values.get("threadpool", 1),
+        "renderer_num_workers": values.get("renderer_num_workers", 32),
     }
     return json.dumps(env_values)
 
@@ -93,11 +176,12 @@ def _create_environment(
     node_selector: dict[str, str],
     request_id: str,
     env_manager: ActorHandle[EnvironmentManager],
+    experiment: Experiment | ParameterizedExperiment,
     check_interval: int = 5,
     timeout: int = 1200,
 ) -> tuple[str, str]:
     """
-     Create environment
+     Create environment with version-aware threadpool support.
 
      Important: This function will block until env_manager.get_environment
      returns an environment.
@@ -109,6 +193,7 @@ def _create_environment(
      :param node_selector: node selector
      :param request_id the request associated with this environment
      :param env_manager: environment manager
+     :param experiment: experiment definition (used for version checking)
      :param check_interval: wait interval
      :param timeout: timeout
     :return: kubernetes environment name
@@ -190,12 +275,22 @@ def _create_environment(
                     )
                 )
                 try:
+                    # Determine if threadpool should be enabled based on version
+                    image_name = values.get("image", "")
+                    threadpool_requested = int(values.get("threadpool", 1))
+                    enable_threadpool = _should_enable_threadpool(
+                        experiment, image_name, threadpool_requested
+                    )
+
+                    # Convert boolean back to int for consistency with existing code
+                    threadpool_value = 1 if enable_threadpool else 0
+
                     create_test_environment(
                         k8s_name=env.k8s_name,
                         model=model,
                         in_cluster=actuator.in_cluster,
                         verify_ssl=actuator.verify_ssl,
-                        image=values.get("image"),
+                        image=image_name,
                         image_pull_secret_name=actuator.image_pull_secret_name,
                         deployment_template=actuator.deployment_template,
                         service_template=actuator.service_template,
@@ -218,6 +313,10 @@ def _create_environment(
                         enforce_eager=values.get("enforce_eager", 0) == 1,
                         io_processor_plugin=values.get("io_processor_plugin"),
                         otlp_traces_endpoint=otlp_traces_endpoint,
+                        threadpool=threadpool_value,
+                        renderer_num_workers=int(
+                            values.get("renderer_num_workers", 32)
+                        ),
                         check_interval=check_interval,
                         timeout=timeout,
                     )
@@ -396,6 +495,7 @@ def run_resource_and_workload_experiment(
                 actuator=actuator_parameters,
                 node_selector=node_selector,
                 env_manager=env_manager,
+                experiment=experiment,
                 request_id=request.requestid,
             )
 
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index e9b976401..07f67482c 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -53,6 +53,19 @@ performance_testing-geospatial-endpoint:
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: ['india_url_in_b64_out', 'valencia_url_in_b64_out', 'terramind_flood_url_in_b64_out']
+    - identifier: 'threadpool'
+      metadata:
+        description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering."
+      propertyDomain:
+        variableType: 'CATEGORICAL_VARIABLE_TYPE'
+        values: [0, 1]
+    - identifier: 'renderer_num_workers'
+      metadata:
+        description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1, 128]
+        interval: 1
   defaultParameterization:
     - value: 100
       property:
@@ -66,6 +79,12 @@ performance_testing-geospatial-endpoint:
     - property:
         identifier: 'dataset'
       value: 'india_url_in_b64_out'
+    - property:
+        identifier: 'threadpool'
+      value: 1
+    - property:
+        identifier: 'renderer_num_workers'
+      value: 32
   # measurements
   targetProperties:
     - identifier: "duration"
@@ -128,6 +147,8 @@ performance_testing-geospatial-full:
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
+        vllm_version:
+          "your/image/with/vllm/and/terratorch:0.1": "0.20.0"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: ["your/image/with/vllm/and/terratorch:0.1"]
@@ -207,6 +228,19 @@ performance_testing-geospatial-full:
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [None, "terratorch_segmentation"]
+    - identifier: 'threadpool'
+      metadata:
+        description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering."
+      propertyDomain:
+        variableType: 'CATEGORICAL_VARIABLE_TYPE'
+        values: [0, 1]
+    - identifier: 'renderer_num_workers'
+      metadata:
+        description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1, 128]
+        interval: 1
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -259,6 +293,12 @@ performance_testing-geospatial-full:
     - property:
         identifier: 'dataset'
       value: 'india_url_in_b64_out'
+    - property:
+        identifier: 'threadpool'
+      value: 1
+    - property:
+        identifier: 'renderer_num_workers'
+      value: 32
   # measurements
   targetProperties:
     - identifier: "duration"
@@ -321,6 +361,8 @@ performance_testing-geospatial-full-custom-dataset:
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
+        vllm_version:
+          "your/image/with/vllm/and/terratorch:0.1": "0.20.0"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: ["your/image/with/vllm/and/terratorch:0.1"]
@@ -400,6 +442,19 @@ performance_testing-geospatial-full-custom-dataset:
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: ["terratorch_segmentation"]
+    - identifier: 'threadpool'
+      metadata:
+        description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering."
+      propertyDomain:
+        variableType: 'CATEGORICAL_VARIABLE_TYPE'
+        values: [0, 1]
+    - identifier: 'renderer_num_workers'
+      metadata:
+        description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1, 128]
+        interval: 1
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -449,6 +504,12 @@ performance_testing-geospatial-full-custom-dataset:
     - property:
         identifier: 'io_processor_plugin'
       value: "terratorch_segmentation"
+    - property:
+        identifier: 'threadpool'
+      value: 1
+    - property:
+        identifier: 'renderer_num_workers'
+      value: 32
   # measurements
   targetProperties:
     - identifier: "duration"
@@ -514,6 +575,19 @@ performance_testing-geospatial-endpoint-custom-dataset:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [-1, 500] # -1 means no concurrency control
         interval: 1
+    - identifier: 'threadpool'
+      metadata:
+        description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering."
+      propertyDomain:
+        variableType: 'CATEGORICAL_VARIABLE_TYPE'
+        values: [0, 1]
+    - identifier: 'renderer_num_workers'
+      metadata:
+        description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1, 128]
+        interval: 1
   defaultParameterization:
     - value: 100
       property:
@@ -524,6 +598,12 @@ performance_testing-geospatial-endpoint-custom-dataset:
     - value: 1.0
       property:
         identifier: 'burstiness'
+    - property:
+        identifier: 'threadpool'
+      value: 1
+    - property:
+        identifier: 'renderer_num_workers'
+      value: 32
   # measurements
   targetProperties:
     - identifier: "duration"
@@ -588,6 +668,19 @@ performance_testing-geospatial-endpoint-guidellm:
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: ['india_url_in_b64_out', 'valencia_url_in_b64_out']
+    - identifier: 'threadpool'
+      metadata:
+        description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering."
+      propertyDomain:
+        variableType: 'CATEGORICAL_VARIABLE_TYPE'
+        values: [0, 1]
+    - identifier: 'renderer_num_workers'
+      metadata:
+        description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1, 128]
+        interval: 1
   defaultParameterization:
     - value: 100
       property:
@@ -601,6 +694,12 @@ performance_testing-geospatial-endpoint-guidellm:
     - property:
         identifier: 'dataset'
       value: 'india_url_in_b64_out'
+    - property:
+        identifier: 'threadpool'
+      value: 1
+    - property:
+        identifier: 'renderer_num_workers'
+      value: 32
   targetProperties:
     - identifier: "duration"
     - identifier: "completed"
@@ -661,6 +760,8 @@ performance_testing-geospatial-full-guidellm:
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
+        vllm_version:
+          "your/image/with/vllm/and/terratorch:0.1": "0.20.0"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: ["your/image/with/vllm/and/terratorch:0.1"]
@@ -740,6 +841,19 @@ performance_testing-geospatial-full-guidellm:
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [None, "terratorch_segmentation"]
+    - identifier: 'threadpool'
+      metadata:
+        description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering."
+      propertyDomain:
+        variableType: 'CATEGORICAL_VARIABLE_TYPE'
+        values: [0, 1]
+    - identifier: 'renderer_num_workers'
+      metadata:
+        description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1, 128]
+        interval: 1
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -792,6 +906,12 @@ performance_testing-geospatial-full-guidellm:
     - property:
         identifier: 'dataset'
       value: 'india_url_in_b64_out'
+    - property:
+        identifier: 'threadpool'
+      value: 1
+    - property:
+        identifier: 'renderer_num_workers'
+      value: 32
   targetProperties:
     - identifier: "duration"
     - identifier: "completed"
@@ -852,6 +972,8 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset:
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
+        vllm_version:
+          "your/image/with/vllm/and/terratorch:0.1": "0.20.0"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: ["your/image/with/vllm/and/terratorch:0.1"]
@@ -933,6 +1055,19 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset:
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [None, "terratorch_segmentation"]
+    - identifier: 'threadpool'
+      metadata:
+        description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering."
+      propertyDomain:
+        variableType: 'CATEGORICAL_VARIABLE_TYPE'
+        values: [0, 1]
+    - identifier: 'renderer_num_workers'
+      metadata:
+        description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1, 128]
+        interval: 1
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -982,6 +1117,12 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset:
     - property:
         identifier: 'io_processor_plugin'
       value: "terratorch_segmentation"
+    - property:
+        identifier: 'threadpool'
+      value: 1
+    - property:
+        identifier: 'renderer_num_workers'
+      value: 32
   targetProperties:
     - identifier: "duration"
     - identifier: "completed"
@@ -1045,6 +1186,19 @@ performance_testing-geospatial-guidellm-endpoint-custom-dataset:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [-1, 500] # -1 means no concurrency control
         interval: 1
+    - identifier: 'threadpool'
+      metadata:
+        description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering."
+      propertyDomain:
+        variableType: 'CATEGORICAL_VARIABLE_TYPE'
+        values: [0, 1]
+    - identifier: 'renderer_num_workers'
+      metadata:
+        description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1, 128]
+        interval: 1
   defaultParameterization:
     - value: 100
       property:
@@ -1055,6 +1209,12 @@ performance_testing-geospatial-guidellm-endpoint-custom-dataset:
     - value: 1.0
       property:
         identifier: 'burstiness'
+    - property:
+        identifier: 'threadpool'
+      value: 1
+    - property:
+        identifier: 'renderer_num_workers'
+      value: 32
   targetProperties:
     - identifier: "duration"
     - identifier: "completed"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/create_environment.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/create_environment.py
index 49439f11c..fa419adfc 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/create_environment.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/create_environment.py
@@ -41,6 +41,8 @@ def create_test_environment(
     skip_tokenizer_init: bool = False,
     io_processor_plugin: str | None = None,
     otlp_traces_endpoint: pydantic.AnyUrl | None = None,
+    threadpool: int = 1,
+    renderer_num_workers: int = 32,
     check_interval: int = 5,
     timeout: int = 1200,
 ) -> None:
@@ -71,6 +73,8 @@ def create_test_environment(
     :param skip_tokenizer_init: flag to skip tokenizer initialization in vLLM
     :param io_processor_plugin: name of the IO processor plugin to be used by vLLM
     :param otlp_traces_endpoint: OpenTelemetry traces endpoint URL
+    :param threadpool: enable threadpool for vLLM renderer (0=disabled, 1=enabled)
+    :param renderer_num_workers: number of renderer workers when threadpool is enabled
     :param check_interval: wait interval in seconds
     :param timeout: timeout in seconds
     :return:
@@ -119,6 +123,8 @@ def create_test_environment(
         skip_tokenizer_init=skip_tokenizer_init,
         io_processor_plugin=io_processor_plugin,
         otlp_traces_endpoint=otlp_traces_endpoint,
+        threadpool=threadpool,
+        renderer_num_workers=renderer_num_workers,
     )
     logger.debug("deployment created")
     c_manager.wait_deployment_ready(
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py
index 24c197c89..adeb3a519 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py
@@ -262,6 +262,8 @@ def create_deployment(
         skip_tokenizer_init: bool = False,
         io_processor_plugin: str | None = None,
         otlp_traces_endpoint: pydantic.AnyUrl | None = None,
+        threadpool: int = 1,
+        renderer_num_workers: int = 32,
     ) -> None:
         """
         create deployment for model
@@ -286,6 +288,8 @@ def create_deployment(
         :param skip_tokenizer_init: flag to skip tokenizer initialization in vLLM
         :param io_processor_plugin: name of the IO processor plugin to be used by vLLM
         :param otlp_traces_endpoint: OpenTelemetry traces endpoint URL
+        :param threadpool: enable threadpool for vLLM renderer (0=disabled, 1=enabled)
+        :param renderer_num_workers: number of renderer workers when threadpool is enabled
         :return:
         """
         if node_selector is None:
@@ -314,6 +318,8 @@ def create_deployment(
             io_processor_plugin=io_processor_plugin,
             enforce_eager=enforce_eager,
             otlp_traces_endpoint=otlp_traces_endpoint,
+            threadpool=threadpool,
+            renderer_num_workers=renderer_num_workers,
         )
         logger.debug(json.dumps(deployment_yaml, indent=2))
 
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/yaml_support/build_components.py
index 891bca66f..9d7b73f42 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/yaml_support/build_components.py
@@ -93,6 +93,8 @@ def deployment_yaml(
         skip_tokenizer_init: bool = False,
         io_processor_plugin: str | None = None,
         otlp_traces_endpoint: pydantic.AnyUrl | None = None,
+        threadpool: int = 1,
+        renderer_num_workers: int = 32,
     ) -> dict[str, Any]:
         """
         Generate deployment yaml
@@ -116,6 +118,8 @@ def deployment_yaml(
         :param enforce_eager: flag to enforce using Pytorch eager mode
         :param skip_tokenizer_init: flag to skip tokenizer initialization in vLLM
         :param io_processor_plugin: name of the IO processor plugin to be used by vLLM
+        :param threadpool: enable threadpool for vLLM renderer (0=disabled, 1=enabled)
+        :param renderer_num_workers: number of renderer workers when threadpool is enabled
         :return:
         """
         if node_selector is None:
@@ -174,6 +178,8 @@ def deployment_yaml(
 
         vllm_serve_args = [
             model,
+            "--max-num-seqs",
+            "256",
             "--max-num-batched-tokens",
             f"{max_batch_tokens}",
             "--gpu-memory-utilization",
@@ -188,6 +194,17 @@ def deployment_yaml(
             dtype.value,
         ]
 
+        # Add threadpool arguments if enabled
+        if threadpool == 1:
+            vllm_serve_args.extend(
+                [
+                    "--renderer-num-workers",
+                    str(renderer_num_workers),
+                    "--mm-processor-cache-gb",
+                    "0",
+                ]
+            )
+
         if enforce_eager:
             vllm_serve_args.append("--enforce-eager")
         if skip_tokenizer_init:
diff --git a/plugins/actuators/vllm_performance/yamls/test_geospatial_threadpool.yaml b/plugins/actuators/vllm_performance/yamls/test_geospatial_threadpool.yaml
new file mode 100644
index 000000000..22d955656
--- /dev/null
+++ b/plugins/actuators/vllm_performance/yamls/test_geospatial_threadpool.yaml
@@ -0,0 +1,29 @@
+# Copyright IBM Corporation 2025, 2026
+# SPDX-License-Identifier: MIT
+# Test discoveryspace for geospatial experiments with threadpool properties
+entitySpace:
+  - identifier: model
+    propertyDomain:
+      values:
+        - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11
+  - identifier: endpoint
+    propertyDomain:
+      values:
+        - http://localhost:8000
+  - identifier: request_rate
+    propertyDomain:
+      values: [10]
+  - identifier: threadpool
+    propertyDomain:
+      values: [0, 1]
+  - identifier: renderer_num_workers
+    propertyDomain:
+      values: [16, 32, 64]
+experiments:
+  - actuatorIdentifier: vllm_performance
+    experimentIdentifier: test-geospatial-endpoint-v1
+metadata:
+  description: Test space for geospatial experiments with threadpool configuration
+  name: geospatial_threadpool_test
+
+# Made with Bob

From 4ffc667c3b2fa402b77fbd456f286a10f760f416 Mon Sep 17 00:00:00 2001
From: Michele Gazzetti <michele.gazzetti1@ibm.com>
Date: Wed, 3 Jun 2026 11:44:23 +0100
Subject: [PATCH 2/7] feat(vllm_performance): add logs

Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com>
---
 .../vllm_performance/experiment_executor.py   | 42 +++++++++++++++++--
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 4086bfc1e..27896c355 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -67,12 +67,25 @@ def _get_vllm_version_from_metadata(
     Returns:
         Version string if found in metadata, None otherwise
     """
+    logger.debug(
+        f"_get_vllm_version_from_metadata called for experiment={experiment.identifier}, "
+        f"optionalProperties={experiment.optionalProperties},"
+        f"requiredProperties={experiment.requiredProperties}"
+    )
+
     # Look for image property in experiment's optional or required properties
     for prop in experiment.optionalProperties + experiment.requiredProperties:
-        if prop.identifier == "image" and prop.metadata:
-            vllm_version_map = prop.metadata.get("vllm_version", {})
-            if isinstance(vllm_version_map, dict):
-                return vllm_version_map.get(image_name)
+        if prop.identifier == "image":
+            logger.debug(f"Found image property with metadata: {prop.metadata}")
+            if prop.metadata:
+                vllm_version_map = prop.metadata.get("vllm_version", {})
+                logger.debug(f"vllm_version_map: {vllm_version_map}")
+                if isinstance(vllm_version_map, dict):
+                    version = vllm_version_map.get(image_name)
+                    logger.debug(f"Version lookup for {image_name}: {version}")
+                    return version
+
+    logger.debug(f"No vLLM version found in metadata for image {image_name}")
     return None
 
 
@@ -95,12 +108,19 @@ def _should_enable_threadpool(
     Returns:
         True if threadpool should be enabled, False otherwise
     """
+    logger.debug(
+        f"_should_enable_threadpool called with: image_name={image_name}, "
+        f"threadpool_value={threadpool_value}, experiment_id={experiment.identifier}"
+    )
+
     # If user explicitly disabled, respect that
     if threadpool_value == 0:
+        logger.debug("Threadpool explicitly disabled by user (threadpool_value=0)")
         return False
 
     # Get version from metadata
     vllm_version_str = _get_vllm_version_from_metadata(experiment, image_name)
+    logger.debug(f"Retrieved vLLM version from metadata: {vllm_version_str}")
 
     # If no version metadata, assume it's supported (backward compatible)
     if vllm_version_str is None:
@@ -114,6 +134,9 @@ def _should_enable_threadpool(
     try:
         vllm_ver = version.parse(vllm_version_str)
         min_version = version.parse("0.20.0")
+        logger.debug(
+            f"Parsed versions - vLLM: {vllm_ver}, minimum required: {min_version}"
+        )
 
         if vllm_ver < min_version:
             logger.info(
@@ -130,6 +153,7 @@ def _should_enable_threadpool(
     except Exception as e:
         logger.error(
             f"Failed to parse vLLM version '{vllm_version_str}' for image {image_name}: {e}. "
+            "Assuming threadpool is supported."
         )
         return True
 
@@ -278,12 +302,22 @@ def _create_environment(
                     # Determine if threadpool should be enabled based on version
                     image_name = values.get("image", "")
                     threadpool_requested = int(values.get("threadpool", 1))
+                    logger.debug(
+                        f"Before _should_enable_threadpool: image_name={image_name}, "
+                        f"threadpool_requested={threadpool_requested}"
+                    )
                     enable_threadpool = _should_enable_threadpool(
                         experiment, image_name, threadpool_requested
                     )
+                    logger.debug(
+                        f"After _should_enable_threadpool: enable_threadpool={enable_threadpool}"
+                    )
 
                     # Convert boolean back to int for consistency with existing code
                     threadpool_value = 1 if enable_threadpool else 0
+                    logger.debug(
+                        f"Final threadpool_value to be used: {threadpool_value}"
+                    )
 
                     create_test_environment(
                         k8s_name=env.k8s_name,

From 1f4e2abc1d6c8f5533257a637f7f5b9859ecda4f Mon Sep 17 00:00:00 2001
From: Michele Gazzetti <michele.gazzetti1@ibm.com>
Date: Wed, 3 Jun 2026 13:37:46 +0100
Subject: [PATCH 3/7] fix(vllm_performance): add vllm version ref

Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com>
---
 .../experiments/performance_testing_geospatial.yaml              | 1 +
 1 file changed, 1 insertion(+)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 07f67482c..407a77e42 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -148,6 +148,7 @@ performance_testing-geospatial-full:
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
         vllm_version:
+          "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5": "0.18.0"
           "your/image/with/vllm/and/terratorch:0.1": "0.20.0"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"

From c2676765bd984627e0fc7f91ee043e56fa6c86b3 Mon Sep 17 00:00:00 2001
From: Michele Gazzetti <michele.gazzetti1@ibm.com>
Date: Thu, 4 Jun 2026 06:57:43 +0100
Subject: [PATCH 4/7] feat(vllm): add threadpool experiment properties for
 geospatial deployments

allow PropertyValue to store dict values and make grouped sampling hash dict/list values safely in _build_point_group_values()

add threadpool and renderer_num_workers support through the vLLM actuator deployment pipeline and geospatial test YAMLs in experiment_executor.py, build_components.py, and related config files

include tests and implementation notes in test_experiment_executor.py and ado-threadpool-threadpool-property-implementation.md

Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com>
---
 .../core/discoveryspace/group_samplers.py     |  17 ++-
 orchestrator/schema/property_value.py         |  10 +-
 .../vllm_performance/experiment_executor.py   |  89 ++++++++-------
 .../experiments/performance_testing.yaml      |   8 +-
 .../performance_testing_geospatial.yaml       |  21 ++--
 .../tests/test_experiment_executor.py         |  64 +++++++++++
 tests/core/test_group_samplers.py             | 106 ++++++++++++++++++
 tests/schema/test_property_value.py           |  14 ++-
 8 files changed, 259 insertions(+), 70 deletions(-)
 create mode 100644 plugins/actuators/vllm_performance/tests/test_experiment_executor.py

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index 051c64352..05028f48e 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -55,9 +55,24 @@ def _build_point_group_values(
 ) -> frozenset[tuple[str, Any]]:
     """
     :return: A frozen set of (key,value) pairs
+
+    Note: Converts unhashable values (dict, list) to hashable representations
     """
 
-    return frozenset({(k, v) for k, v in point.items() if k in group})
+    def make_hashable(
+        value: float | list | str | dict | None,
+    ) -> int | float | tuple | str | None:
+        """Convert unhashable types to hashable equivalents"""
+        if isinstance(value, dict):
+            # Convert dict to sorted tuple of items
+            return tuple(sorted(value.items()))
+        if isinstance(value, list):
+            # Convert list to tuple
+            return tuple(value)
+        # Return value as-is if already hashable
+        return value  # type: ignore[return-value]
+
+    return frozenset({(k, make_hashable(v)) for k, v in point.items() if k in group})
 
 
 def _build_groups_dict(
diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index 038085bb2..529facc12 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -59,7 +59,7 @@ class PropertyValue(pydantic.BaseModel):
         ),
     ] = None
     value: Annotated[
-        int | float | list | str | CustomBytes | None,
+        int | float | list | str | CustomBytes | dict | None,
         pydantic.Field(description="The measured value."),
     ]
     property: Annotated[
@@ -86,9 +86,9 @@ def convert_property_to_descriptor(
     @pydantic.field_validator("value")
     def check_value_type(
         cls,
-        value: float | list | str | CustomBytes | None,
+        value: float | list | str | CustomBytes | dict | None,
         context: pydantic.ValidationInfo,
-    ) -> int | float | list | str | CustomBytes | None:
+    ) -> int | float | list | str | CustomBytes | dict | None:
 
         valueType = context.data.get("valueType")
         if valueType:
@@ -111,7 +111,7 @@ def check_value_type(
                     if type(value) not in {float, int} and value is not None:
                         raise ValueError("Validation failed for NUMERIC_VALUE_TYPE")
             elif valueType == ValueTypeEnum.STRING_VALUE_TYPE:
-                if not isinstance(value, str):
+                if not isinstance(value, (str, dict)):
                     raise ValueError(
                         f"ValueType was string but Value was of type {type(value)}"
                     )
@@ -155,6 +155,8 @@ def set_value_type(self) -> "PropertyValue":
                 self.valueType = ValueTypeEnum.BLOB_VALUE_TYPE
             elif isinstance(self.value, list):
                 self.valueType = ValueTypeEnum.VECTOR_VALUE_TYPE
+            elif isinstance(self.value, dict):
+                self.valueType = ValueTypeEnum.STRING_VALUE_TYPE
         elif self.valueType == ValueTypeEnum.NUMERIC_VALUE_TYPE and isinstance(
             self.value, str
         ):
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 27896c355..386aa4abc 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -54,63 +54,49 @@
 logger = logging.getLogger(__name__)
 
 
-def _get_vllm_version_from_metadata(
-    experiment: Experiment, image_name: str
-) -> str | None:
+def _get_vllm_version_from_image_value(image_value: dict | str) -> str | None:
     """
-    Extract vLLM version from experiment metadata for a given image.
+    Extract vLLM version from image property value.
 
     Args:
-        experiment: The experiment object containing metadata
-        image_name: The image name to look up version for
+        image_value: The image property value, either a dict with 'image' and 'vllm_version' keys,
+                    or a string (for backward compatibility)
 
     Returns:
-        Version string if found in metadata, None otherwise
+        Version string if found in dict, None otherwise
     """
-    logger.debug(
-        f"_get_vllm_version_from_metadata called for experiment={experiment.identifier}, "
-        f"optionalProperties={experiment.optionalProperties},"
-        f"requiredProperties={experiment.requiredProperties}"
-    )
+    logger.debug(f"_get_vllm_version_from_image_value called with: {image_value}")
+
+    # If image_value is a dict with vllm_version, extract it
+    if isinstance(image_value, dict):
+        version = image_value.get("vllm_version")
+        logger.debug(f"Extracted vLLM version from dict: {version}")
+        return version
 
-    # Look for image property in experiment's optional or required properties
-    for prop in experiment.optionalProperties + experiment.requiredProperties:
-        if prop.identifier == "image":
-            logger.debug(f"Found image property with metadata: {prop.metadata}")
-            if prop.metadata:
-                vllm_version_map = prop.metadata.get("vllm_version", {})
-                logger.debug(f"vllm_version_map: {vllm_version_map}")
-                if isinstance(vllm_version_map, dict):
-                    version = vllm_version_map.get(image_name)
-                    logger.debug(f"Version lookup for {image_name}: {version}")
-                    return version
-
-    logger.debug(f"No vLLM version found in metadata for image {image_name}")
+    # For backward compatibility: if it's a string, we don't have version info
+    logger.debug("Image value is a string, no version info available")
     return None
 
 
-def _should_enable_threadpool(
-    experiment: Experiment, image_name: str, threadpool_value: int
-) -> bool:
+def _should_enable_threadpool(image_value: dict | str, threadpool_value: int) -> bool:
     """
     Determine if threadpool should be enabled based on vLLM version and user preference.
 
     Threadpool is only supported in vLLM >= 0.20.0. This function checks:
     1. If user explicitly disabled threadpool (threadpool=0), return False
-    2. If vLLM version metadata exists and version < 0.20.0, return False
+    2. If vLLM version exists in image_value dict and version < 0.20.0, return False
     3. Otherwise, return True (user wants it and version supports it or no version info)
 
     Args:
-        experiment: The experiment object containing metadata
-        image_name: The image name to check version for
+        image_value: The image property value (dict with 'image' and 'vllm_version' or string)
         threadpool_value: User's threadpool preference (0 or 1)
 
     Returns:
         True if threadpool should be enabled, False otherwise
     """
     logger.debug(
-        f"_should_enable_threadpool called with: image_name={image_name}, "
-        f"threadpool_value={threadpool_value}, experiment_id={experiment.identifier}"
+        f"_should_enable_threadpool called with: image_value={image_value}, "
+        f"threadpool_value={threadpool_value}"
     )
 
     # If user explicitly disabled, respect that
@@ -118,14 +104,14 @@ def _should_enable_threadpool(
         logger.debug("Threadpool explicitly disabled by user (threadpool_value=0)")
         return False
 
-    # Get version from metadata
-    vllm_version_str = _get_vllm_version_from_metadata(experiment, image_name)
-    logger.debug(f"Retrieved vLLM version from metadata: {vllm_version_str}")
+    # Get version from image value
+    vllm_version_str = _get_vllm_version_from_image_value(image_value)
+    logger.debug(f"Retrieved vLLM version: {vllm_version_str}")
 
-    # If no version metadata, assume it's supported (backward compatible)
+    # If no version info, assume it's supported (backward compatible)
     if vllm_version_str is None:
         logger.warning(
-            f"No vLLM version metadata found for image {image_name}. "
+            f"No vLLM version info found for image {image_value}. "
             "Assuming threadpool is supported."
         )
         return True
@@ -141,18 +127,18 @@ def _should_enable_threadpool(
         if vllm_ver < min_version:
             logger.info(
                 f"Threadpool disabled: vLLM version {vllm_version_str} < 0.20.0 "
-                f"for image {image_name}"
+                f"for image {image_value}"
             )
             return False
 
         logger.info(
             f"Threadpool enabled: vLLM version {vllm_version_str} >= 0.20.0 "
-            f"for image {image_name}"
+            f"for image {image_value}"
         )
         return True
     except Exception as e:
         logger.error(
-            f"Failed to parse vLLM version '{vllm_version_str}' for image {image_name}: {e}. "
+            f"Failed to parse vLLM version '{vllm_version_str}' for image {image_value}: {e}. "
             "Assuming threadpool is supported."
         )
         return True
@@ -176,9 +162,16 @@ def _build_entity_env(values: dict[str, str]) -> str:
     :param values: experiment values
     :return: definition
     """
+    # Extract image string from dict if needed
+    image_value = values.get("image")
+    if isinstance(image_value, dict):
+        image_str = image_value.get("image")
+    else:
+        image_str = image_value
+
     env_values = {
         "model": values.get("model"),
-        "image": values.get("image"),
+        "image": image_str,
         "n_gpus": values.get("n_gpus"),
         "gpu_type": values.get("gpu_type"),
         "n_cpus": values.get("n_cpus"),
@@ -300,14 +293,14 @@ def _create_environment(
                 )
                 try:
                     # Determine if threadpool should be enabled based on version
-                    image_name = values.get("image", "")
+                    image_value = values.get("image", "")
                     threadpool_requested = int(values.get("threadpool", 1))
                     logger.debug(
-                        f"Before _should_enable_threadpool: image_name={image_name}, "
+                        f"Before _should_enable_threadpool: image_value={image_value}, "
                         f"threadpool_requested={threadpool_requested}"
                     )
                     enable_threadpool = _should_enable_threadpool(
-                        experiment, image_name, threadpool_requested
+                        image_value, threadpool_requested
                     )
                     logger.debug(
                         f"After _should_enable_threadpool: enable_threadpool={enable_threadpool}"
@@ -319,6 +312,12 @@ def _create_environment(
                         f"Final threadpool_value to be used: {threadpool_value}"
                     )
 
+                    # Extract image string from dict if needed
+                    if isinstance(image_value, dict):
+                        image_name = image_value.get("image", "")
+                    else:
+                        image_name = image_value
+
                     create_test_environment(
                         k8s_name=env.k8s_name,
                         model=model,
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index e94a1060a..d32d07e1a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -60,7 +60,9 @@ test-deployment-v1:
         description: "(deployment) Docker image to use to create vllm deployments"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: ["vllm/vllm-openai:v0.14.0"]
+        values:
+          - {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"}
+          - {image: "vllm/vllm-openai:latest", vllm_version: "0.21.0"}
     - identifier: n_cpus
       metadata:
         description: "(deployment) the number of CPUs to use"
@@ -128,7 +130,7 @@ test-deployment-v1:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: "vllm/vllm-openai:v0.14.0"
+      value: {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"}
     - property:
         identifier: n_cpus
       value: 8
@@ -461,7 +463,7 @@ test-deployment-guidellm-v1:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: "vllm/vllm-openai:v0.14.0"
+      value: {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"}
     - property:
         identifier: n_cpus
       value: 8
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 407a77e42..0320e59ec 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -147,12 +147,11 @@ performance_testing-geospatial-full:
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
-        vllm_version:
-          "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5": "0.18.0"
-          "your/image/with/vllm/and/terratorch:0.1": "0.20.0"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: ["your/image/with/vllm/and/terratorch:0.1"]
+        values:
+          - {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"}
+          - {image: "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", vllm_version: "0.18.0"}
     - identifier: n_cpus
       metadata:
         description: "(deployment) the number of CPUs to use"
@@ -245,7 +244,7 @@ performance_testing-geospatial-full:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: "your/image/with/vllm/and/terratorch:0.1"
+      value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"}
     - property:
         identifier: n_cpus
       value: 8
@@ -362,8 +361,6 @@ performance_testing-geospatial-full-custom-dataset:
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
-        vllm_version:
-          "your/image/with/vllm/and/terratorch:0.1": "0.20.0"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: ["your/image/with/vllm/and/terratorch:0.1"]
@@ -459,7 +456,7 @@ performance_testing-geospatial-full-custom-dataset:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: "your/image/with/vllm/and/terratorch:0.1"
+      value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"}
     - property:
         identifier: n_cpus
       value: 8
@@ -761,8 +758,6 @@ performance_testing-geospatial-full-guidellm:
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
-        vllm_version:
-          "your/image/with/vllm/and/terratorch:0.1": "0.20.0"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: ["your/image/with/vllm/and/terratorch:0.1"]
@@ -858,7 +853,7 @@ performance_testing-geospatial-full-guidellm:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: "your/image/with/vllm/and/terratorch:0.1"
+      value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"}
     - property:
         identifier: n_cpus
       value: 8
@@ -973,8 +968,6 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset:
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
-        vllm_version:
-          "your/image/with/vllm/and/terratorch:0.1": "0.20.0"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: ["your/image/with/vllm/and/terratorch:0.1"]
@@ -1072,7 +1065,7 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: "your/image/with/vllm/and/terratorch:0.1"
+      value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"}
     - property:
         identifier: n_cpus
       value: 8
diff --git a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
new file mode 100644
index 000000000..645aff82a
--- /dev/null
+++ b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
@@ -0,0 +1,64 @@
+# Copyright IBM Corporation 2025, 2026
+# SPDX-License-Identifier: MIT
+
+"""
+Unit tests for experiment_executor module functions.
+Tests version extraction from image property values.
+"""
+
+from ado_actuators.vllm_performance.experiment_executor import (
+    _get_vllm_version_from_image_value,
+)
+
+
+class TestGetVllmVersionFromImageValue:
+    """Test suite for _get_vllm_version_from_image_value function"""
+
+    def test_version_extraction_from_dict_value(self) -> None:
+        """Test extracting vLLM version from dict image value"""
+        image_value = {
+            "image": "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5",
+            "vllm_version": "0.18.0",
+        }
+
+        version = _get_vllm_version_from_image_value(image_value)
+        assert version == "0.18.0"
+
+    def test_version_extraction_from_another_dict_value(self) -> None:
+        """Test extracting vLLM version from another dict image value"""
+        image_value = {
+            "image": "vllm/vllm-openai:v0.14.0",
+            "vllm_version": "0.14.0",
+        }
+
+        version = _get_vllm_version_from_image_value(image_value)
+        assert version == "0.14.0"
+
+    def test_version_extraction_returns_none_for_string_value(self) -> None:
+        """Test that None is returned when image value is a string (backward compatibility)"""
+        image_value = "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5"
+
+        version = _get_vllm_version_from_image_value(image_value)
+        assert version is None
+
+    def test_version_extraction_returns_none_for_dict_without_version(self) -> None:
+        """Test that None is returned when dict doesn't have vllm_version key"""
+        image_value = {
+            "image": "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5",
+        }
+
+        version = _get_vllm_version_from_image_value(image_value)
+        assert version is None
+
+    def test_version_extraction_with_latest_tag(self) -> None:
+        """Test extracting version for latest tag"""
+        image_value = {
+            "image": "vllm/vllm-openai:latest",
+            "vllm_version": "0.21.0",
+        }
+
+        version = _get_vllm_version_from_image_value(image_value)
+        assert version == "0.21.0"
+
+
+# Made with Bob
diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py
index 2e1204fbd..1de76c494 100644
--- a/tests/core/test_group_samplers.py
+++ b/tests/core/test_group_samplers.py
@@ -10,6 +10,7 @@
     RandomGroupSampleSelector,
     SequentialGroupSampleSelector,
     _build_groups_dict,
+    _build_point_group_values,
     _get_space_matching_points,
 )
 from orchestrator.core.discoveryspace.samplers import (
@@ -312,6 +313,111 @@ async def test_group_sampler_sequential_remote(
         ), "Expected for selectors that the number of entities iterated is equal to number matching entities in source"
 
 
+def test_build_point_group_values_with_unhashable_types() -> None:
+    """Test that _build_point_group_values handles dict and list values correctly."""
+
+    # Test with dictionary values (like the image property in the geospatial case)
+    point_with_dict = {
+        "model": "test-model",
+        "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"},
+        "n_gpus": 1,
+        "memory": "128Gi",
+    }
+
+    group = ["model", "image", "n_gpus"]
+
+    # This should not raise TypeError: unhashable type: 'dict'
+    result = _build_point_group_values(point=point_with_dict, group=group)
+
+    # Verify the result is a frozenset
+    assert isinstance(result, frozenset)
+
+    # Verify the dict was converted to a tuple of sorted items
+    assert ("model", "test-model") in result
+    assert ("n_gpus", 1) in result
+
+    # The dict should be converted to a tuple of sorted items
+    image_tuple = tuple(
+        sorted({"image": "icr.io/test:v1", "vllm_version": "0.18.0"}.items())
+    )
+    assert ("image", image_tuple) in result
+
+    # Test with list values
+    point_with_list = {
+        "model": "test-model",
+        "tags": ["tag1", "tag2", "tag3"],
+        "n_gpus": 1,
+    }
+
+    group_with_list = ["model", "tags"]
+    result_with_list = _build_point_group_values(
+        point=point_with_list, group=group_with_list
+    )
+
+    assert isinstance(result_with_list, frozenset)
+    assert ("model", "test-model") in result_with_list
+    # The list should be converted to a tuple
+    assert ("tags", ("tag1", "tag2", "tag3")) in result_with_list
+
+    # Test that the same dict values produce the same hash
+    point_with_dict2 = {
+        "model": "test-model",
+        "image": {
+            "vllm_version": "0.18.0",
+            "image": "icr.io/test:v1",
+        },  # Different order
+        "n_gpus": 1,
+        "memory": "128Gi",
+    }
+
+    result2 = _build_point_group_values(point=point_with_dict2, group=group)
+
+    # Should be equal because dict items are sorted
+    assert result == result2
+
+
+def test_build_groups_dict_with_unhashable_values() -> None:
+    """Test that _build_groups_dict correctly groups points with dict values."""
+
+    points = [
+        {
+            "model": "model-a",
+            "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"},
+            "n_gpus": 1,
+        },
+        {
+            "model": "model-a",
+            "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"},
+            "n_gpus": 2,
+        },
+        {
+            "model": "model-a",
+            "image": {"image": "icr.io/test:v2", "vllm_version": "0.20.1"},
+            "n_gpus": 1,
+        },
+    ]
+
+    group = ["model", "image"]
+
+    # This should not raise TypeError
+    groups = _build_groups_dict(points=points, group=group)
+
+    # Should have 2 groups (model-a with v1 image, and model-a with v2 image)
+    assert len(groups) == 2
+
+    # Each group should contain the correct points
+    for group_key, group_points in groups.items():
+        if (
+            "image",
+            tuple(
+                sorted({"image": "icr.io/test:v1", "vllm_version": "0.18.0"}.items())
+            ),
+        ) in group_key:
+            assert len(group_points) == 2  # Two points with v1 image
+        else:
+            assert len(group_points) == 1  # One point with v2 image
+
+
 @pytest.mark.asyncio
 async def test_group_sample_generator_fail_on_continuous_space() -> None:
 
diff --git a/tests/schema/test_property_value.py b/tests/schema/test_property_value.py
index b8654a62e..34e547d8e 100644
--- a/tests/schema/test_property_value.py
+++ b/tests/schema/test_property_value.py
@@ -24,10 +24,11 @@ def python_type_value_examples() -> dict[type, tuple[ValueTypeEnum, typing.Any]]
         str: (ValueTypeEnum.STRING_VALUE_TYPE, "string"),
         list: (ValueTypeEnum.VECTOR_VALUE_TYPE, [0, "a", 10]),
         bytes: (ValueTypeEnum.BLOB_VALUE_TYPE, b"PNG\r89\n\x1a\n\x00\x00"),
+        dict: (ValueTypeEnum.STRING_VALUE_TYPE, {"key": "value", "number": 42}),
     }
 
 
-@pytest.fixture(params=[int, float, str, bytes, list, type(None)])
+@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)])
 def value_example(
     python_type_value_examples: dict[type, tuple[ValueTypeEnum, typing.Any]],
     request: pytest.FixtureRequest,
@@ -36,7 +37,7 @@ def value_example(
     return python_type_value_examples[request.param]
 
 
-@pytest.fixture(params=[int, float, str, bytes, list, type(None)])
+@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)])
 def test_value_example(
     python_type_value_examples: dict[type, tuple[ValueTypeEnum, typing.Any]],
     request: pytest.FixtureRequest,
@@ -45,7 +46,7 @@ def test_value_example(
     return python_type_value_examples[request.param]
 
 
-@pytest.fixture(params=[int, float, str, bytes, list, type(None)])
+@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)])
 def property_value(
     request: pytest.FixtureRequest,
 ) -> tuple[ConstitutivePropertyValue, type]:
@@ -68,6 +69,10 @@ def property_value(
         )
     elif request.param is list:
         val = ConstitutivePropertyValue(value=[0, "a", 10], property=prop.descriptor())
+    elif request.param is dict:
+        val = ConstitutivePropertyValue(
+            value={"key": "value", "number": 42}, property=prop.descriptor()
+        )
     elif request.param is type(None):
         val = ConstitutivePropertyValue(value=None, property=prop.descriptor())
     else:
@@ -209,6 +214,9 @@ def test_type_detection(property_value: tuple[PropertyValue, type]) -> None:
         assert val.valueType == ValueTypeEnum.VECTOR_VALUE_TYPE
     elif value_type is bytes:
         assert val.valueType == ValueTypeEnum.BLOB_VALUE_TYPE
+    elif value_type is dict:
+        # Dict values are treated as STRING_VALUE_TYPE (they're serialized as strings)
+        assert val.valueType == ValueTypeEnum.STRING_VALUE_TYPE
     elif value_type is type(None):
         # Treating None as a Numeric type currently
         assert val.valueType == ValueTypeEnum.NUMERIC_VALUE_TYPE

From b08a3a422958a1e8d69772f9534aea88a7f6f1ac Mon Sep 17 00:00:00 2001
From: Michele Gazzetti <michele.gazzetti1@ibm.com>
Date: Thu, 4 Jun 2026 17:43:53 +0100
Subject: [PATCH 5/7] refactor: migrate image property from dict to list format

Replace dict-based image property representation with list-based format
for better compatibility with PropertyValue schema.

Changes:
- Remove dict support from PropertyValue schema (property_value.py)
- Update vLLM actuator to use list format [image_url, vllm_version]
- Migrate experiment YAML files to list-based image values
- Update all related tests to reflect list-based approach
- Remove obsolete dict-based group sampler tests

The new format uses a simple list where:
- First element: image URL string
- Second element (optional): vLLM version string

This simplifies the schema and maintains backward compatibility
with string-only image values.
---
 orchestrator/schema/property_value.py         |  10 +-
 .../vllm_performance/experiment_executor.py   |  37 +++---
 .../experiments/performance_testing.yaml      |   8 +-
 .../performance_testing_geospatial.yaml       |  12 +-
 .../tests/test_experiment_executor.py         |  42 +++----
 tests/core/test_group_samplers.py             | 106 ------------------
 tests/schema/test_property_value.py           |  14 +--
 7 files changed, 58 insertions(+), 171 deletions(-)

diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index 529facc12..038085bb2 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -59,7 +59,7 @@ class PropertyValue(pydantic.BaseModel):
         ),
     ] = None
     value: Annotated[
-        int | float | list | str | CustomBytes | dict | None,
+        int | float | list | str | CustomBytes | None,
         pydantic.Field(description="The measured value."),
     ]
     property: Annotated[
@@ -86,9 +86,9 @@ def convert_property_to_descriptor(
     @pydantic.field_validator("value")
     def check_value_type(
         cls,
-        value: float | list | str | CustomBytes | dict | None,
+        value: float | list | str | CustomBytes | None,
         context: pydantic.ValidationInfo,
-    ) -> int | float | list | str | CustomBytes | dict | None:
+    ) -> int | float | list | str | CustomBytes | None:
 
         valueType = context.data.get("valueType")
         if valueType:
@@ -111,7 +111,7 @@ def check_value_type(
                     if type(value) not in {float, int} and value is not None:
                         raise ValueError("Validation failed for NUMERIC_VALUE_TYPE")
             elif valueType == ValueTypeEnum.STRING_VALUE_TYPE:
-                if not isinstance(value, (str, dict)):
+                if not isinstance(value, str):
                     raise ValueError(
                         f"ValueType was string but Value was of type {type(value)}"
                     )
@@ -155,8 +155,6 @@ def set_value_type(self) -> "PropertyValue":
                 self.valueType = ValueTypeEnum.BLOB_VALUE_TYPE
             elif isinstance(self.value, list):
                 self.valueType = ValueTypeEnum.VECTOR_VALUE_TYPE
-            elif isinstance(self.value, dict):
-                self.valueType = ValueTypeEnum.STRING_VALUE_TYPE
         elif self.valueType == ValueTypeEnum.NUMERIC_VALUE_TYPE and isinstance(
             self.value, str
         ):
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 386aa4abc..3b53f2ffc 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -54,41 +54,44 @@
 logger = logging.getLogger(__name__)
 
 
-def _get_vllm_version_from_image_value(image_value: dict | str) -> str | None:
+def _get_vllm_version_from_image_value(image_value: list | str) -> str | None:
     """
     Extract vLLM version from image property value.
 
     Args:
-        image_value: The image property value, either a dict with 'image' and 'vllm_version' keys,
+        image_value: The image property value, either a list [image_url, vllm_version],
                     or a string (for backward compatibility)
 
     Returns:
-        Version string if found in dict, None otherwise
+        Version string if found in list, None otherwise
     """
     logger.debug(f"_get_vllm_version_from_image_value called with: {image_value}")
 
-    # If image_value is a dict with vllm_version, extract it
-    if isinstance(image_value, dict):
-        version = image_value.get("vllm_version")
-        logger.debug(f"Extracted vLLM version from dict: {version}")
-        return version
+    # If image_value is a list with vllm_version, extract it
+    if isinstance(image_value, list):
+        if len(image_value) > 1:
+            version = image_value[1]
+            logger.debug(f"Extracted vLLM version from list: {version}")
+            return version
+        logger.debug("List has only one element, no version info available")
+        return None
 
     # For backward compatibility: if it's a string, we don't have version info
     logger.debug("Image value is a string, no version info available")
     return None
 
 
-def _should_enable_threadpool(image_value: dict | str, threadpool_value: int) -> bool:
+def _should_enable_threadpool(image_value: list | str, threadpool_value: int) -> bool:
     """
     Determine if threadpool should be enabled based on vLLM version and user preference.
 
     Threadpool is only supported in vLLM >= 0.20.0. This function checks:
     1. If user explicitly disabled threadpool (threadpool=0), return False
-    2. If vLLM version exists in image_value dict and version < 0.20.0, return False
+    2. If vLLM version exists in image_value list and version < 0.20.0, return False
     3. Otherwise, return True (user wants it and version supports it or no version info)
 
     Args:
-        image_value: The image property value (dict with 'image' and 'vllm_version' or string)
+        image_value: The image property value (list [image_url, vllm_version] or string)
         threadpool_value: User's threadpool preference (0 or 1)
 
     Returns:
@@ -162,10 +165,10 @@ def _build_entity_env(values: dict[str, str]) -> str:
     :param values: experiment values
     :return: definition
     """
-    # Extract image string from dict if needed
+    # Extract image string from list if needed
     image_value = values.get("image")
-    if isinstance(image_value, dict):
-        image_str = image_value.get("image")
+    if isinstance(image_value, list):
+        image_str = image_value[0] if len(image_value) > 0 else image_value
     else:
         image_str = image_value
 
@@ -312,9 +315,9 @@ def _create_environment(
                         f"Final threadpool_value to be used: {threadpool_value}"
                     )
 
-                    # Extract image string from dict if needed
-                    if isinstance(image_value, dict):
-                        image_name = image_value.get("image", "")
+                    # Extract image string from list if needed
+                    if isinstance(image_value, list):
+                        image_name = image_value[0] if len(image_value) > 0 else ""
                     else:
                         image_name = image_value
 
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index d32d07e1a..9da7d30b5 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -61,8 +61,8 @@ test-deployment-v1:
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values:
-          - {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"}
-          - {image: "vllm/vllm-openai:latest", vllm_version: "0.21.0"}
+          - ["vllm/vllm-openai:v0.14.0", "0.14.0"]
+          - ["vllm/vllm-openai:latest", "0.21.0"]
     - identifier: n_cpus
       metadata:
         description: "(deployment) the number of CPUs to use"
@@ -130,7 +130,7 @@ test-deployment-v1:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"}
+      value: ["vllm/vllm-openai:v0.14.0", "0.14.0"]
     - property:
         identifier: n_cpus
       value: 8
@@ -463,7 +463,7 @@ test-deployment-guidellm-v1:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: {image: "vllm/vllm-openai:v0.14.0", vllm_version: "0.14.0"}
+      value: ["vllm/vllm-openai:v0.14.0", "0.14.0"]
     - property:
         identifier: n_cpus
       value: 8
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 0320e59ec..6fece3e73 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -150,8 +150,8 @@ performance_testing-geospatial-full:
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values:
-          - {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"}
-          - {image: "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", vllm_version: "0.18.0"}
+          - ["your/image/with/vllm/and/terratorch:0.1", "0.20.0"]
+          - ["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", "0.18.0"]
     - identifier: n_cpus
       metadata:
         description: "(deployment) the number of CPUs to use"
@@ -244,7 +244,7 @@ performance_testing-geospatial-full:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"}
+      value: ["your/image/with/vllm/and/terratorch:0.1", "0.20.0"]
     - property:
         identifier: n_cpus
       value: 8
@@ -456,7 +456,7 @@ performance_testing-geospatial-full-custom-dataset:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"}
+      value: ["your/image/with/vllm/and/terratorch:0.1", "0.20.0"]
     - property:
         identifier: n_cpus
       value: 8
@@ -853,7 +853,7 @@ performance_testing-geospatial-full-guidellm:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"}
+      value: ["your/image/with/vllm/and/terratorch:0.1", "0.20.0"]
     - property:
         identifier: n_cpus
       value: 8
@@ -1065,7 +1065,7 @@ performance_testing-geospatial-guidellm-deployment-custom-dataset:
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: {image: "your/image/with/vllm/and/terratorch:0.1", vllm_version: "0.20.0"}
+      value: ["your/image/with/vllm/and/terratorch:0.1", "0.20.0"]
     - property:
         identifier: n_cpus
       value: 8
diff --git a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
index 645aff82a..483928da5 100644
--- a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
+++ b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
@@ -14,22 +14,22 @@
 class TestGetVllmVersionFromImageValue:
     """Test suite for _get_vllm_version_from_image_value function"""
 
-    def test_version_extraction_from_dict_value(self) -> None:
-        """Test extracting vLLM version from dict image value"""
-        image_value = {
-            "image": "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5",
-            "vllm_version": "0.18.0",
-        }
+    def test_version_extraction_from_list_value(self) -> None:
+        """Test extracting vLLM version from list image value"""
+        image_value = [
+            "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5",
+            "0.18.0",
+        ]
 
         version = _get_vllm_version_from_image_value(image_value)
         assert version == "0.18.0"
 
-    def test_version_extraction_from_another_dict_value(self) -> None:
-        """Test extracting vLLM version from another dict image value"""
-        image_value = {
-            "image": "vllm/vllm-openai:v0.14.0",
-            "vllm_version": "0.14.0",
-        }
+    def test_version_extraction_from_another_list_value(self) -> None:
+        """Test extracting vLLM version from another list image value"""
+        image_value = [
+            "vllm/vllm-openai:v0.14.0",
+            "0.14.0",
+        ]
 
         version = _get_vllm_version_from_image_value(image_value)
         assert version == "0.14.0"
@@ -41,21 +41,21 @@ def test_version_extraction_returns_none_for_string_value(self) -> None:
         version = _get_vllm_version_from_image_value(image_value)
         assert version is None
 
-    def test_version_extraction_returns_none_for_dict_without_version(self) -> None:
-        """Test that None is returned when dict doesn't have vllm_version key"""
-        image_value = {
-            "image": "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5",
-        }
+    def test_version_extraction_returns_none_for_list_without_version(self) -> None:
+        """Test that None is returned when list has only one element (no version)"""
+        image_value = [
+            "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5",
+        ]
 
         version = _get_vllm_version_from_image_value(image_value)
         assert version is None
 
     def test_version_extraction_with_latest_tag(self) -> None:
         """Test extracting version for latest tag"""
-        image_value = {
-            "image": "vllm/vllm-openai:latest",
-            "vllm_version": "0.21.0",
-        }
+        image_value = [
+            "vllm/vllm-openai:latest",
+            "0.21.0",
+        ]
 
         version = _get_vllm_version_from_image_value(image_value)
         assert version == "0.21.0"
diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py
index 1de76c494..2e1204fbd 100644
--- a/tests/core/test_group_samplers.py
+++ b/tests/core/test_group_samplers.py
@@ -10,7 +10,6 @@
     RandomGroupSampleSelector,
     SequentialGroupSampleSelector,
     _build_groups_dict,
-    _build_point_group_values,
     _get_space_matching_points,
 )
 from orchestrator.core.discoveryspace.samplers import (
@@ -313,111 +312,6 @@ async def test_group_sampler_sequential_remote(
         ), "Expected for selectors that the number of entities iterated is equal to number matching entities in source"
 
 
-def test_build_point_group_values_with_unhashable_types() -> None:
-    """Test that _build_point_group_values handles dict and list values correctly."""
-
-    # Test with dictionary values (like the image property in the geospatial case)
-    point_with_dict = {
-        "model": "test-model",
-        "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"},
-        "n_gpus": 1,
-        "memory": "128Gi",
-    }
-
-    group = ["model", "image", "n_gpus"]
-
-    # This should not raise TypeError: unhashable type: 'dict'
-    result = _build_point_group_values(point=point_with_dict, group=group)
-
-    # Verify the result is a frozenset
-    assert isinstance(result, frozenset)
-
-    # Verify the dict was converted to a tuple of sorted items
-    assert ("model", "test-model") in result
-    assert ("n_gpus", 1) in result
-
-    # The dict should be converted to a tuple of sorted items
-    image_tuple = tuple(
-        sorted({"image": "icr.io/test:v1", "vllm_version": "0.18.0"}.items())
-    )
-    assert ("image", image_tuple) in result
-
-    # Test with list values
-    point_with_list = {
-        "model": "test-model",
-        "tags": ["tag1", "tag2", "tag3"],
-        "n_gpus": 1,
-    }
-
-    group_with_list = ["model", "tags"]
-    result_with_list = _build_point_group_values(
-        point=point_with_list, group=group_with_list
-    )
-
-    assert isinstance(result_with_list, frozenset)
-    assert ("model", "test-model") in result_with_list
-    # The list should be converted to a tuple
-    assert ("tags", ("tag1", "tag2", "tag3")) in result_with_list
-
-    # Test that the same dict values produce the same hash
-    point_with_dict2 = {
-        "model": "test-model",
-        "image": {
-            "vllm_version": "0.18.0",
-            "image": "icr.io/test:v1",
-        },  # Different order
-        "n_gpus": 1,
-        "memory": "128Gi",
-    }
-
-    result2 = _build_point_group_values(point=point_with_dict2, group=group)
-
-    # Should be equal because dict items are sorted
-    assert result == result2
-
-
-def test_build_groups_dict_with_unhashable_values() -> None:
-    """Test that _build_groups_dict correctly groups points with dict values."""
-
-    points = [
-        {
-            "model": "model-a",
-            "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"},
-            "n_gpus": 1,
-        },
-        {
-            "model": "model-a",
-            "image": {"image": "icr.io/test:v1", "vllm_version": "0.18.0"},
-            "n_gpus": 2,
-        },
-        {
-            "model": "model-a",
-            "image": {"image": "icr.io/test:v2", "vllm_version": "0.20.1"},
-            "n_gpus": 1,
-        },
-    ]
-
-    group = ["model", "image"]
-
-    # This should not raise TypeError
-    groups = _build_groups_dict(points=points, group=group)
-
-    # Should have 2 groups (model-a with v1 image, and model-a with v2 image)
-    assert len(groups) == 2
-
-    # Each group should contain the correct points
-    for group_key, group_points in groups.items():
-        if (
-            "image",
-            tuple(
-                sorted({"image": "icr.io/test:v1", "vllm_version": "0.18.0"}.items())
-            ),
-        ) in group_key:
-            assert len(group_points) == 2  # Two points with v1 image
-        else:
-            assert len(group_points) == 1  # One point with v2 image
-
-
 @pytest.mark.asyncio
 async def test_group_sample_generator_fail_on_continuous_space() -> None:
 
diff --git a/tests/schema/test_property_value.py b/tests/schema/test_property_value.py
index 34e547d8e..b8654a62e 100644
--- a/tests/schema/test_property_value.py
+++ b/tests/schema/test_property_value.py
@@ -24,11 +24,10 @@ def python_type_value_examples() -> dict[type, tuple[ValueTypeEnum, typing.Any]]
         str: (ValueTypeEnum.STRING_VALUE_TYPE, "string"),
         list: (ValueTypeEnum.VECTOR_VALUE_TYPE, [0, "a", 10]),
         bytes: (ValueTypeEnum.BLOB_VALUE_TYPE, b"PNG\r89\n\x1a\n\x00\x00"),
-        dict: (ValueTypeEnum.STRING_VALUE_TYPE, {"key": "value", "number": 42}),
     }
 
 
-@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)])
+@pytest.fixture(params=[int, float, str, bytes, list, type(None)])
 def value_example(
     python_type_value_examples: dict[type, tuple[ValueTypeEnum, typing.Any]],
     request: pytest.FixtureRequest,
@@ -37,7 +36,7 @@ def value_example(
     return python_type_value_examples[request.param]
 
 
-@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)])
+@pytest.fixture(params=[int, float, str, bytes, list, type(None)])
 def test_value_example(
     python_type_value_examples: dict[type, tuple[ValueTypeEnum, typing.Any]],
     request: pytest.FixtureRequest,
@@ -46,7 +45,7 @@ def test_value_example(
     return python_type_value_examples[request.param]
 
 
-@pytest.fixture(params=[int, float, str, bytes, list, dict, type(None)])
+@pytest.fixture(params=[int, float, str, bytes, list, type(None)])
 def property_value(
     request: pytest.FixtureRequest,
 ) -> tuple[ConstitutivePropertyValue, type]:
@@ -69,10 +68,6 @@ def property_value(
         )
     elif request.param is list:
         val = ConstitutivePropertyValue(value=[0, "a", 10], property=prop.descriptor())
-    elif request.param is dict:
-        val = ConstitutivePropertyValue(
-            value={"key": "value", "number": 42}, property=prop.descriptor()
-        )
     elif request.param is type(None):
         val = ConstitutivePropertyValue(value=None, property=prop.descriptor())
     else:
@@ -214,9 +209,6 @@ def test_type_detection(property_value: tuple[PropertyValue, type]) -> None:
         assert val.valueType == ValueTypeEnum.VECTOR_VALUE_TYPE
     elif value_type is bytes:
         assert val.valueType == ValueTypeEnum.BLOB_VALUE_TYPE
-    elif value_type is dict:
-        # Dict values are treated as STRING_VALUE_TYPE (they're serialized as strings)
-        assert val.valueType == ValueTypeEnum.STRING_VALUE_TYPE
     elif value_type is type(None):
         # Treating None as a Numeric type currently
         assert val.valueType == ValueTypeEnum.NUMERIC_VALUE_TYPE

From ddfaee6a7fd198b98adaf49c24148834df9db4f7 Mon Sep 17 00:00:00 2001
From: Michele Gazzetti <michele.gazzetti1@ibm.com>
Date: Fri, 5 Jun 2026 15:51:04 +0100
Subject: [PATCH 6/7] Fix vLLM actuator cache to include benchmark parameters

- Add composite cache key combining environment and benchmark params
- Move cache to EnvironmentManager actor for persistence across batches
- Ensure cache hits only occur for identical env + benchmark config
- Add comprehensive tests for cache key generation
- Fixes issue where measurements were incorrectly reused

Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com>
---
 .../vllm_performance/env_manager.py           |  33 ++
 .../vllm_performance/experiment_executor.py   | 115 ++++-
 .../tests/test_experiment_executor.py         | 423 ++++++++++++++++++
 3 files changed, 562 insertions(+), 9 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py
index e3242ce22..c79212f5f 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py
@@ -134,6 +134,10 @@ def __init__(
         self.verify_ssl = verify_ssl
         self.otlp_traces_endpoint = otlp_traces_endpoint
 
+        # Measurement cache for reusing results across entities with identical
+        # environment and benchmark parameters
+        self.measurement_cache: dict[str, dict] = {}
+
         # component manager for cleanup
         self.manager = ComponentsManager(
             namespace=self.namespace,
@@ -144,6 +148,35 @@ def __init__(
             pvc_template=pvc_template,
         )
 
+    def get_cached_measurement(self, cache_key: str) -> dict | None:
+        """
+        Get a cached measurement result for the given cache key.
+
+        Args:
+            cache_key: Composite key containing environment and benchmark parameters
+
+        Returns:
+            Cached measurement dict with 'measurements' and 'error' keys, or None if not cached
+        """
+        return self.measurement_cache.get(cache_key)
+
+    def cache_measurement(
+        self, cache_key: str, measurements: list, error: str | None
+    ) -> None:
+        """
+        Cache a measurement result for reuse by subsequent entities with identical parameters.
+
+        Args:
+            cache_key: Composite key containing environment and benchmark parameters
+            measurements: List of measured property values
+            error: Error message if measurement failed, None otherwise
+        """
+        self.measurement_cache[cache_key] = {
+            "measurements": measurements,
+            "error": error,
+        }
+        logger.debug(f"Cached measurement for key: {cache_key}")
+
     def _delete_environment_k8s_resources(self, k8s_name: str) -> None:
         """
         Deletes a deployment. Intended to be used for cleanup or error recovery
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 3b53f2ffc..9f447d97c 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -172,6 +172,27 @@ def _build_entity_env(values: dict[str, str]) -> str:
     else:
         image_str = image_value
 
+    # Determine effective threadpool and renderer_num_workers values
+    threadpool_requested = int(values.get("threadpool", 1))
+    renderer_num_workers_requested = int(values.get("renderer_num_workers", 32))
+
+    # Check if threadpool will actually be enabled based on version
+    # Use empty string as fallback if image_value is None
+    enable_threadpool = _should_enable_threadpool(
+        image_value if image_value is not None else "", threadpool_requested
+    )
+
+    # Normalize values for environment definition:
+    # - If threadpool is disabled (version < 0.20.0 or user disabled it),
+    #   set both to 0 so different renderer_num_workers values don't create
+    #   different environments when they would behave identically
+    if enable_threadpool:
+        threadpool_value = 1
+        renderer_num_workers_value = renderer_num_workers_requested
+    else:
+        threadpool_value = 0
+        renderer_num_workers_value = 0  # Normalize to 0 when not used
+
     env_values = {
         "model": values.get("model"),
         "image": image_str,
@@ -184,12 +205,61 @@ def _build_entity_env(values: dict[str, str]) -> str:
         "dtype": values.get("dtype"),
         "cpu_offload": values.get("cpu_offload"),
         "max_num_seq": values.get("max_num_seq"),
-        "threadpool": values.get("threadpool", 1),
-        "renderer_num_workers": values.get("renderer_num_workers", 32),
+        "threadpool": threadpool_value,
+        "renderer_num_workers": renderer_num_workers_value,
     }
     return json.dumps(env_values)
 
 
+def _build_benchmark_params_key(values: dict[str, str]) -> str:
+    """
+    Build a cache key from benchmark parameters that affect measurement results.
+
+    These parameters define the workload characteristics and must be included
+    in the cache key to ensure measurements are only reused for identical tests.
+
+    Args:
+        values: experiment values
+
+    Returns:
+        JSON string of benchmark parameters
+    """
+    benchmark_params = {
+        "num_prompts": values.get("num_prompts"),
+        "request_rate": values.get("request_rate"),
+        "max_concurrency": values.get("max_concurrency"),
+        "number_input_tokens": values.get("number_input_tokens"),
+        "max_output_tokens": values.get("max_output_tokens"),
+        "burstiness": values.get("burstiness"),
+        "dataset": values.get("dataset"),
+    }
+    return json.dumps(benchmark_params, sort_keys=True)
+
+
+def _build_cache_key(values: dict[str, str]) -> str:
+    """
+    Build a composite cache key from both environment and benchmark parameters.
+
+    Cache hits should only occur when both the deployment environment AND
+    the benchmark workload parameters are identical.
+
+    Args:
+        values: experiment values
+
+    Returns:
+        composite cache key as JSON string
+    """
+    env_key = _build_entity_env(values)
+    benchmark_key = _build_benchmark_params_key(values)
+
+    # Combine both keys into a single cache key
+    composite = {
+        "environment": json.loads(env_key),
+        "benchmark": json.loads(benchmark_key),
+    }
+    return json.dumps(composite, sort_keys=True)
+
+
 def _create_environment(
     values: dict[str, str],
     actuator: VLLMPerformanceTestParameters,
@@ -523,6 +593,30 @@ def run_resource_and_workload_experiment(
         try:
             values = experiment.propertyValuesFromEntity(entity=entity)
 
+            # Check if we've already measured an entity with the same environment and benchmark parameters
+            # Cache key includes both environment (model, GPUs, etc.) and benchmark params (num_prompts, request_rate, etc.)
+            cache_key = _build_cache_key(values)
+            logger.info("cache_key: %s", cache_key)
+
+            # Check actor's cache for this measurement
+            cached_result = ray.get(
+                env_manager.get_cached_measurement.remote(cache_key)
+            )
+            if cached_result is not None:
+                logger.info(
+                    f"Reusing cached measurement for entity {entity.identifier} "
+                    f"(identical environment and benchmark parameters)"
+                )
+                measurements.append(
+                    create_measurement_result(
+                        identifier=entity.identifier,
+                        measurements=cached_result["measurements"],
+                        error=cached_result["error"],
+                        reference=request.experimentReference,
+                    )
+                )
+                continue
+
             logger.info(f"Creating K8s environment for {entity.identifier}")
 
             # Will raise an K8sEnvironmentCreationError if the environment could not be created
@@ -653,14 +747,17 @@ def run_resource_and_workload_experiment(
             )
         else:
             measured_values = result.to_observed_property_values(experiment=experiment)
-            measurements.append(
-                create_measurement_result(
-                    identifier=entity.identifier,
-                    measurements=measured_values,
-                    error=None,
-                    reference=request.experimentReference,
-                )
+            measurement_result = create_measurement_result(
+                identifier=entity.identifier,
+                measurements=measured_values,
+                error=None,
+                reference=request.experimentReference,
             )
+            measurements.append(measurement_result)
+
+            # Cache the measurement in the actor for potential reuse by subsequent entities
+            # with the same environment and benchmark parameters
+            env_manager.cache_measurement.remote(cache_key, measured_values, None)
         finally:
             if started_benchmarking:
                 console.put.remote(
diff --git a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
index 483928da5..2216c2e63 100644
--- a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
+++ b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
@@ -6,7 +6,12 @@
 Tests version extraction from image property values.
 """
 
+import json
+
 from ado_actuators.vllm_performance.experiment_executor import (
+    _build_benchmark_params_key,
+    _build_cache_key,
+    _build_entity_env,
     _get_vllm_version_from_image_value,
 )
 
@@ -61,4 +66,422 @@ def test_version_extraction_with_latest_tag(self) -> None:
         assert version == "0.21.0"
 
 
+class TestBuildEntityEnv:
+    """Test suite for _build_entity_env function"""
+
+    def test_renderer_num_workers_normalized_when_vllm_version_less_than_0_20_0(
+        self,
+    ) -> None:
+        """Test that renderer_num_workers is normalized to 0 when vLLM < 0.20.0"""
+        # Test with vLLM 0.18.0 (< 0.20.0)
+        values = {
+            "model": "test-model",
+            "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+            "renderer_num_workers": "64",  # Should be normalized to 0
+        }
+
+        result = _build_entity_env(values)
+        result_dict = json.loads(result)
+
+        # Both threadpool and renderer_num_workers should be 0
+        assert result_dict["threadpool"] == 0
+        assert result_dict["renderer_num_workers"] == 0
+
+    def test_renderer_num_workers_preserved_when_vllm_version_greater_than_0_20_0(
+        self,
+    ) -> None:
+        """Test that renderer_num_workers is preserved when vLLM >= 0.20.0"""
+        # Test with vLLM 0.21.0 (>= 0.20.0)
+        values = {
+            "model": "test-model",
+            "image": ["icr.io/test/vllm:v0.21.0", "0.21.0"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+            "renderer_num_workers": "64",
+        }
+
+        result = _build_entity_env(values)
+        result_dict = json.loads(result)
+
+        # Both should be preserved
+        assert result_dict["threadpool"] == 1
+        assert result_dict["renderer_num_workers"] == 64
+
+    def test_renderer_num_workers_normalized_when_threadpool_disabled_by_user(
+        self,
+    ) -> None:
+        """Test that renderer_num_workers is normalized to 0 when user disables threadpool"""
+        # Test with vLLM 0.21.0 but threadpool=0
+        values = {
+            "model": "test-model",
+            "image": ["icr.io/test/vllm:v0.21.0", "0.21.0"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "0",  # User explicitly disabled
+            "renderer_num_workers": "64",  # Should be normalized to 0
+        }
+
+        result = _build_entity_env(values)
+        result_dict = json.loads(result)
+
+        # Both should be 0
+        assert result_dict["threadpool"] == 0
+        assert result_dict["renderer_num_workers"] == 0
+
+    def test_different_renderer_num_workers_same_env_when_vllm_less_than_0_20_0(
+        self,
+    ) -> None:
+        """Test that different renderer_num_workers values produce same env when vLLM < 0.20.0"""
+        base_values = {
+            "model": "test-model",
+            "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+        }
+
+        # Test with different renderer_num_workers values
+        values_32 = {**base_values, "renderer_num_workers": "32"}
+        values_64 = {**base_values, "renderer_num_workers": "64"}
+        values_128 = {**base_values, "renderer_num_workers": "128"}
+
+        env_32 = _build_entity_env(values_32)
+        env_64 = _build_entity_env(values_64)
+        env_128 = _build_entity_env(values_128)
+
+        # All should produce the same environment definition
+        assert env_32 == env_64 == env_128
+
+        # Verify they all have renderer_num_workers=0
+        result_dict = json.loads(env_32)
+        assert result_dict["renderer_num_workers"] == 0
+
+    def test_backward_compatibility_with_string_image(self) -> None:
+        """Test backward compatibility when image is a string (no version info)"""
+        values = {
+            "model": "test-model",
+            "image": "icr.io/test/vllm:v0.18.0",  # String, no version info
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+            "renderer_num_workers": "64",
+        }
+
+        result = _build_entity_env(values)
+        result_dict = json.loads(result)
+
+        # Should assume threadpool is supported (backward compatible)
+        assert result_dict["threadpool"] == 1
+        assert result_dict["renderer_num_workers"] == 64
+
+
+class TestBuildBenchmarkParamsKey:
+    """Test suite for _build_benchmark_params_key function"""
+
+    def test_includes_all_benchmark_parameters(self) -> None:
+        """Test that all benchmark parameters are included in the key"""
+        values = {
+            "num_prompts": "100",
+            "request_rate": "10",
+            "max_concurrency": "5",
+            "number_input_tokens": "50",
+            "max_output_tokens": "100",
+            "burstiness": "1.0",
+            "dataset": "random",
+        }
+
+        result = _build_benchmark_params_key(values)
+        result_dict = json.loads(result)
+
+        assert result_dict["num_prompts"] == "100"
+        assert result_dict["request_rate"] == "10"
+        assert result_dict["max_concurrency"] == "5"
+        assert result_dict["number_input_tokens"] == "50"
+        assert result_dict["max_output_tokens"] == "100"
+        assert result_dict["burstiness"] == "1.0"
+        assert result_dict["dataset"] == "random"
+
+    def test_handles_missing_values(self) -> None:
+        """Test that missing values are handled as None"""
+        values = {
+            "num_prompts": "100",
+            # Other parameters missing
+        }
+
+        result = _build_benchmark_params_key(values)
+        result_dict = json.loads(result)
+
+        assert result_dict["num_prompts"] == "100"
+        assert result_dict["request_rate"] is None
+        assert result_dict["max_concurrency"] is None
+        assert result_dict["dataset"] is None
+
+    def test_consistent_output_with_sorted_keys(self) -> None:
+        """Test that output is consistent (keys are sorted)"""
+        values = {
+            "dataset": "random",
+            "num_prompts": "100",
+            "request_rate": "10",
+        }
+
+        result1 = _build_benchmark_params_key(values)
+        result2 = _build_benchmark_params_key(values)
+
+        # Should produce identical output
+        assert result1 == result2
+
+        # Verify keys are sorted in JSON
+        result_dict = json.loads(result1)
+        keys = list(result_dict.keys())
+        assert keys == sorted(keys)
+
+
+class TestBuildCacheKey:
+    """Test suite for _build_cache_key function"""
+
+    def test_combines_environment_and_benchmark_params(self) -> None:
+        """Test that cache key includes both environment and benchmark parameters"""
+        values = {
+            # Environment params
+            "model": "test-model",
+            "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+            "renderer_num_workers": "32",
+            # Benchmark params
+            "num_prompts": "200",
+            "request_rate": "32",
+            "dataset": "random",
+        }
+
+        result = _build_cache_key(values)
+        result_dict = json.loads(result)
+
+        # Should have both environment and benchmark sections
+        assert "environment" in result_dict
+        assert "benchmark" in result_dict
+
+        # Check environment section
+        env = result_dict["environment"]
+        assert env["model"] == "test-model"
+        assert env["n_gpus"] == "1"
+
+        # Check benchmark section
+        benchmark = result_dict["benchmark"]
+        assert benchmark["num_prompts"] == "200"
+        assert benchmark["request_rate"] == "32"
+        assert benchmark["dataset"] == "random"
+
+    def test_different_benchmark_params_produce_different_keys(self) -> None:
+        """Test that different benchmark parameters produce different cache keys"""
+        base_values = {
+            "model": "test-model",
+            "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+            "renderer_num_workers": "32",
+        }
+
+        # Same environment, different num_prompts
+        values1 = {**base_values, "num_prompts": "100", "request_rate": "32"}
+        values2 = {**base_values, "num_prompts": "200", "request_rate": "32"}
+
+        key1 = _build_cache_key(values1)
+        key2 = _build_cache_key(values2)
+
+        # Different benchmark params should produce different keys
+        assert key1 != key2
+
+    def test_same_params_produce_same_key(self) -> None:
+        """Test that identical parameters produce identical cache keys"""
+        values = {
+            "model": "test-model",
+            "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+            "renderer_num_workers": "32",
+            "num_prompts": "200",
+            "request_rate": "32",
+            "dataset": "random",
+        }
+
+        key1 = _build_cache_key(values)
+        key2 = _build_cache_key(values)
+
+        # Identical params should produce identical keys
+        assert key1 == key2
+
+    def test_cache_key_differentiates_on_request_rate(self) -> None:
+        """Test that different request_rate values produce different cache keys"""
+        base_values = {
+            "model": "test-model",
+            "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+            "renderer_num_workers": "32",
+            "num_prompts": "200",
+            "dataset": "random",
+        }
+
+        # Same everything except request_rate
+        values_rate_32 = {**base_values, "request_rate": "32"}
+        values_rate_64 = {**base_values, "request_rate": "64"}
+
+        key_32 = _build_cache_key(values_rate_32)
+        key_64 = _build_cache_key(values_rate_64)
+
+        # Different request rates should produce different keys
+        assert key_32 != key_64
+
+    def test_vllm_0_18_same_cache_key_for_different_renderer_num_workers(self) -> None:
+        """
+        Test that for vLLM 0.18.0, different renderer_num_workers values produce
+        the SAME cache key (because threadpool is not supported and normalized to 0)
+        """
+        base_values = {
+            "model": "test-model",
+            "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+            "num_prompts": "200",
+            "request_rate": "32",
+            "dataset": "random",
+        }
+
+        # Different renderer_num_workers values
+        values_32 = {**base_values, "renderer_num_workers": "32"}
+        values_64 = {**base_values, "renderer_num_workers": "64"}
+
+        key_32 = _build_cache_key(values_32)
+        key_64 = _build_cache_key(values_64)
+
+        # For vLLM 0.18.0, both should produce the same key
+        # because renderer_num_workers is normalized to 0 in the environment
+        assert key_32 == key_64
+
+        # Verify the environment section has renderer_num_workers=0
+        result_dict = json.loads(key_32)
+        assert result_dict["environment"]["renderer_num_workers"] == 0
+
+    def test_vllm_0_20_different_cache_key_for_different_renderer_num_workers(
+        self,
+    ) -> None:
+        """
+        Test that for vLLM 0.20.1, different renderer_num_workers values produce
+        DIFFERENT cache keys (because threadpool is supported)
+        """
+        base_values = {
+            "model": "test-model",
+            "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-a100",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+            "num_prompts": "200",
+            "request_rate": "32",
+            "dataset": "random",
+        }
+
+        # Different renderer_num_workers values
+        values_32 = {**base_values, "renderer_num_workers": "32"}
+        values_64 = {**base_values, "renderer_num_workers": "64"}
+
+        key_32 = _build_cache_key(values_32)
+        key_64 = _build_cache_key(values_64)
+
+        # For vLLM 0.20.1, should produce different keys
+        assert key_32 != key_64
+
+        # Verify the environment sections have different renderer_num_workers
+        result_dict_32 = json.loads(key_32)
+        result_dict_64 = json.loads(key_64)
+        assert result_dict_32["environment"]["renderer_num_workers"] == 32
+        assert result_dict_64["environment"]["renderer_num_workers"] == 64
+
+
 # Made with Bob

From cd6f3502ddc03d5e6f385855489facbb67bd3fac Mon Sep 17 00:00:00 2001
From: Michele Gazzetti <michele.gazzetti1@ibm.com>
Date: Fri, 5 Jun 2026 16:20:53 +0100
Subject: [PATCH 7/7] refactor: improve code readability and reduce duplication

- Create VLLMVersionChecker and CacheKeyBuilder utility classes
- Add CachedMeasurement dataclass for structured cache storage
- Refactor make_hashable to use pattern matching
- Add pytest fixtures and parametrize tests
- Remove excessive comments and simplify docstrings
- All 49 tests passing

Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com>
---
 discoveryspace_9e79e7.yaml                    | 402 +++++++++++++
 discoveryspace_ca6479.yaml                    | 402 +++++++++++++
 operation_entities.csv                        |   9 +
 operation_entities_623871.csv                 |   9 +
 operation_random_walk.yaml                    |  56 ++
 operation_random_walk_623871.yaml             |  56 ++
 operation_results.csv                         |   9 +
 operation_results_623871.csv                  |   9 +
 .../core/discoveryspace/group_samplers.py     |  15 +-
 .../vllm_performance/cache_utils.py           | 120 ++++
 .../vllm_performance/env_manager.py           |  39 +-
 .../vllm_performance/experiment_executor.py   | 348 +----------
 .../vllm_performance/version_utils.py         |  41 ++
 .../tests/test_cache_utils.py                 | 144 +++++
 .../tests/test_experiment_executor.py         | 561 ++++++------------
 .../tests/test_version_utils.py               |  59 ++
 pod_sample.yaml                               | 262 ++++++++
 rhaiis_deployment.yaml                        |  81 +++
 ...do-threadpool-list-based-image-property.md | 201 +++++++
 .../plans/ado-threadpool-refactoring-plan.md  | 248 ++++++++
 ...pool-threadpool-property-implementation.md | 296 +++++++++
 ...dpool-vllm-cache-implementation-summary.md | 141 +++++
 .../ado-threadpool-vllm-cache-improvement.md  | 275 +++++++++
 23 files changed, 3023 insertions(+), 760 deletions(-)
 create mode 100644 discoveryspace_9e79e7.yaml
 create mode 100644 discoveryspace_ca6479.yaml
 create mode 100644 operation_entities.csv
 create mode 100644 operation_entities_623871.csv
 create mode 100644 operation_random_walk.yaml
 create mode 100644 operation_random_walk_623871.yaml
 create mode 100644 operation_results.csv
 create mode 100644 operation_results_623871.csv
 create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py
 create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py
 create mode 100644 plugins/actuators/vllm_performance/tests/test_cache_utils.py
 create mode 100644 plugins/actuators/vllm_performance/tests/test_version_utils.py
 create mode 100644 pod_sample.yaml
 create mode 100644 rhaiis_deployment.yaml
 create mode 100644 ~/workspace/plans/ado-threadpool-list-based-image-property.md
 create mode 100644 ~/workspace/plans/ado-threadpool-refactoring-plan.md
 create mode 100644 ~/workspace/plans/ado-threadpool-threadpool-property-implementation.md
 create mode 100644 ~/workspace/plans/ado-threadpool-vllm-cache-implementation-summary.md
 create mode 100644 ~/workspace/plans/ado-threadpool-vllm-cache-improvement.md

diff --git a/discoveryspace_9e79e7.yaml b/discoveryspace_9e79e7.yaml
new file mode 100644
index 000000000..029bd2836
--- /dev/null
+++ b/discoveryspace_9e79e7.yaml
@@ -0,0 +1,402 @@
+config:
+  entitySpace:
+  - identifier: model
+    propertyDomain:
+      values:
+      - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: request_rate
+    propertyDomain:
+      values:
+      - 32
+      - 64
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: image
+    metadata:
+      description: Docker image with vLLM + terratorch
+    propertyDomain:
+      values:
+      - - icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5
+        - 0.18.0
+      - - icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main
+        - 0.20.1
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: n_gpus
+    propertyDomain:
+      values:
+      - 1
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: gpu_type
+    propertyDomain:
+      values:
+      - NVIDIA-A100-80GB-PCIe
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: max_num_seq
+    propertyDomain:
+      values:
+      - 256
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: max_batch_tokens
+    propertyDomain:
+      values:
+      - 16384
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: n_cpus
+    propertyDomain:
+      values:
+      - 48
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: memory
+    propertyDomain:
+      values:
+      - 128Gi
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: gpu_memory_utilization
+    propertyDomain:
+      values:
+      - 0.9
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: dtype
+    propertyDomain:
+      values:
+      - float16
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: num_prompts
+    propertyDomain:
+      values:
+      - 200
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: dataset
+    propertyDomain:
+      values:
+      - dataset_url_input_india_incluster.jsonl
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: skip_tokenizer_init
+    propertyDomain:
+      values:
+      - 1
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: enforce_eager
+    propertyDomain:
+      values:
+      - 1
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: io_processor_plugin
+    propertyDomain:
+      values:
+      - terratorch_segmentation
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: threadpool
+    propertyDomain:
+      values:
+      - 1
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: renderer_num_workers
+    propertyDomain:
+      values:
+      - 32
+      - 64
+      variableType: DISCRETE_VARIABLE_TYPE
+  experiments:
+    experiments:
+    - actuatorIdentifier: vllm_performance
+      defaultParameterization:
+      - property:
+          identifier: image
+        value:
+        - your/image/with/vllm/and/terratorch:0.1
+        - 0.20.0
+        valueType: VECTOR_VALUE_TYPE
+      - property:
+          identifier: n_cpus
+        value: 8
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: memory
+        value: 128Gi
+        valueType: STRING_VALUE_TYPE
+      - property:
+          identifier: dtype
+        value: auto
+        valueType: STRING_VALUE_TYPE
+      - property:
+          identifier: num_prompts
+        value: 500
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: max_concurrency
+        value: -1
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: burstiness
+        value: 1.0
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: gpu_memory_utilization
+        value: 0.9
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: cpu_offload
+        value: 0
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: max_num_seq
+        value: 256
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: max_batch_tokens
+        value: 16384
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: n_gpus
+        value: 1
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: gpu_type
+        value: NVIDIA-A100-80GB-PCIe
+        valueType: STRING_VALUE_TYPE
+      - property:
+          identifier: skip_tokenizer_init
+        value: 1
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: enforce_eager
+        value: 1
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: io_processor_plugin
+        value: terratorch_segmentation
+        valueType: STRING_VALUE_TYPE
+      - property:
+          identifier: threadpool
+        value: 1
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: renderer_num_workers
+        value: 32
+        valueType: NUMERIC_VALUE_TYPE
+      identifier: test-geospatial-deployment-custom-dataset-v1
+      metadata:
+        description: VLLM performance testing across compute resource and workload
+          configuration
+      optionalProperties:
+      - identifier: num_prompts
+        metadata:
+          description: (benchmark) The number of prompts to send (total number of
+            requests)
+        propertyDomain:
+          domainRange:
+          - 1
+          - 10001
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: max_concurrency
+        metadata:
+          description: (benchmark) The maximum number of concurrent requests to send
+        propertyDomain:
+          domainRange:
+          - -1
+          - 500
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: burstiness
+        metadata:
+          description: (benchmark) The burstiness of the requests - 1.0 is a Poisson
+            distribution with rate = request_rate. Others are gamma distributions
+            with lambda = request_rate and shape = burstiness.
+        propertyDomain:
+          domainRange:
+          - 0
+          - 10
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: image
+        metadata:
+          description: (deployment) Docker image to use to create vllm deployments
+        propertyDomain:
+          values:
+          - your/image/with/vllm/and/terratorch:0.1
+          variableType: OPEN_CATEGORICAL_VARIABLE_TYPE
+      - identifier: n_cpus
+        metadata:
+          description: (deployment) the number of CPUs to use
+        propertyDomain:
+          domainRange:
+          - 1
+          - 256
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: memory
+        metadata:
+          description: (deployment) the amount of memory to allocate to vLLM pod
+        propertyDomain:
+          values:
+          - 64Gi
+          - 128Gi
+          - 256Gi
+          variableType: CATEGORICAL_VARIABLE_TYPE
+      - identifier: dtype
+        metadata:
+          description: "(deployment) data type for model weights and activations.\
+            \ \u201Cauto\u201D will use FP16 precision for FP32 and FP16 models, and\
+            \ BF16 precision for BF16 models."
+        propertyDomain:
+          values:
+          - auto
+          - half
+          - float16
+          - bfloat16
+          - float
+          - float32
+          variableType: CATEGORICAL_VARIABLE_TYPE
+      - identifier: gpu_memory_utilization
+        metadata:
+          description: (deployment) The fraction of GPU memory to be used for the
+            model executor,
+        propertyDomain:
+          values:
+          - 0.5
+          - 0.75
+          - 0.9
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: cpu_offload
+        metadata:
+          description: (deployment) The amount of model weights in GB to offload to
+            the CPU per GPU. 0 means all weights are on GPU,
+        propertyDomain:
+          values:
+          - 0
+          - 8
+          - 16
+          - 24
+          - 32
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: max_num_seq
+        metadata:
+          description: (deployment) Maximum number of sequences per iteration
+        propertyDomain:
+          domainRange:
+          - 32
+          - 2049
+          interval: 32
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: max_batch_tokens
+        metadata:
+          description: (deployment) maximum number of batched tokens per iteration
+        propertyDomain:
+          domainRange:
+          - 256
+          - 32769
+          interval: 256
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: n_gpus
+        metadata:
+          description: (deployment) Number of GPUs to use
+        propertyDomain:
+          domainRange:
+          - 1
+          - 9
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: gpu_type
+        metadata:
+          description: (deployment) The GPU type to use
+        propertyDomain:
+          values:
+          - NVIDIA-A100-80GB-PCIe
+          - NVIDIA-A100-SXM4-80GB
+          - NVIDIA-H100-PCIe
+          variableType: CATEGORICAL_VARIABLE_TYPE
+      - identifier: skip_tokenizer_init
+        metadata:
+          description: (deployment) skip tokenizer initialization
+        propertyDomain:
+          values:
+          - 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: enforce_eager
+        metadata:
+          description: (deployment) enforce PyTorch eager mode
+        propertyDomain:
+          values:
+          - 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: io_processor_plugin
+        metadata:
+          description: IO Processor plugin to load for the model
+        propertyDomain:
+          values:
+          - terratorch_segmentation
+          variableType: OPEN_CATEGORICAL_VARIABLE_TYPE
+      - identifier: threadpool
+        metadata:
+          description: Enable threadpool for vLLM renderer (0=disabled, 1=enabled).
+            When enabled, uses multiple workers for rendering.
+        propertyDomain:
+          values:
+          - 0
+          - 1
+          variableType: CATEGORICAL_VARIABLE_TYPE
+      - identifier: renderer_num_workers
+        metadata:
+          description: Number of renderer workers when threadpool is enabled. Only
+            used when threadpool=1.
+        propertyDomain:
+          domainRange:
+          - 1
+          - 128
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      requiredProperties:
+      - identifier: model
+        metadata:
+          description: model to use for testing. Assumed to be served by all endpoints
+            tested. Required to obtain correct tokenizer for benchmarking metrics
+            calculation
+        propertyDomain:
+          values:
+          - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11
+          variableType: OPEN_CATEGORICAL_VARIABLE_TYPE
+      - identifier: request_rate
+        metadata:
+          description: (benchmark) The number of requests to send per second
+        propertyDomain:
+          domainRange:
+          - -1
+          - 1000
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: dataset
+        metadata:
+          description: (benchmark) The dataset to be used for the experiment
+        propertyDomain:
+          values:
+          - custom_dataset.jsonl
+          variableType: OPEN_CATEGORICAL_VARIABLE_TYPE
+      targetProperties:
+      - identifier: duration
+      - identifier: completed
+      - identifier: request_throughput
+      - identifier: mean_e2el_ms
+      - identifier: median_e2el_ms
+      - identifier: std_e2el_ms
+      - identifier: p25_e2el_ms
+      - identifier: p50_e2el_ms
+      - identifier: p75_e2el_ms
+      - identifier: p99_e2el_ms
+  metadata:
+    description: Test threadpool functionality with vLLM v0.20.1 and v0.18.0 images
+    labels:
+      model_type: geospatial
+      task: flood_detection
+      test_type: threadpool_version_comparison
+    name: Geospatial Terramind Model Performance Testing - Threadpool Version Test
+  sampleStoreIdentifier: a2760d
+created: '2026-06-05T09:48:44.390740Z'
+identifier: space-9e79e7-a2760d
+status:
+- event: created
+  recorded_at: '2026-06-05T09:48:44.390751Z'
+- event: added
+  recorded_at: '2026-06-05T09:48:44.795200Z'
+
diff --git a/discoveryspace_ca6479.yaml b/discoveryspace_ca6479.yaml
new file mode 100644
index 000000000..999ba87ec
--- /dev/null
+++ b/discoveryspace_ca6479.yaml
@@ -0,0 +1,402 @@
+config:
+  entitySpace:
+  - identifier: model
+    propertyDomain:
+      values:
+      - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: request_rate
+    propertyDomain:
+      values:
+      - 32
+      - 64
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: image
+    metadata:
+      description: Docker image with vLLM + terratorch
+    propertyDomain:
+      values:
+      - - icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5
+        - 0.18.0
+      - - icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main
+        - 0.20.1
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: n_gpus
+    propertyDomain:
+      values:
+      - 1
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: gpu_type
+    propertyDomain:
+      values:
+      - NVIDIA-A100-80GB-PCIe
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: max_num_seq
+    propertyDomain:
+      values:
+      - 256
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: max_batch_tokens
+    propertyDomain:
+      values:
+      - 16384
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: n_cpus
+    propertyDomain:
+      values:
+      - 48
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: memory
+    propertyDomain:
+      values:
+      - 128Gi
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: gpu_memory_utilization
+    propertyDomain:
+      values:
+      - 0.9
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: dtype
+    propertyDomain:
+      values:
+      - float16
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: num_prompts
+    propertyDomain:
+      values:
+      - 2000
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: dataset
+    propertyDomain:
+      values:
+      - dataset_url_input_india_incluster.jsonl
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: skip_tokenizer_init
+    propertyDomain:
+      values:
+      - 1
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: enforce_eager
+    propertyDomain:
+      values:
+      - 1
+      variableType: DISCRETE_VARIABLE_TYPE
+  - identifier: io_processor_plugin
+    propertyDomain:
+      values:
+      - terratorch_segmentation
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: threadpool
+    propertyDomain:
+      values:
+      - 1
+      variableType: CATEGORICAL_VARIABLE_TYPE
+  - identifier: renderer_num_workers
+    propertyDomain:
+      values:
+      - 32
+      - 64
+      variableType: DISCRETE_VARIABLE_TYPE
+  experiments:
+    experiments:
+    - actuatorIdentifier: vllm_performance
+      defaultParameterization:
+      - property:
+          identifier: image
+        value:
+        - your/image/with/vllm/and/terratorch:0.1
+        - 0.20.0
+        valueType: VECTOR_VALUE_TYPE
+      - property:
+          identifier: n_cpus
+        value: 8
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: memory
+        value: 128Gi
+        valueType: STRING_VALUE_TYPE
+      - property:
+          identifier: dtype
+        value: auto
+        valueType: STRING_VALUE_TYPE
+      - property:
+          identifier: num_prompts
+        value: 500
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: max_concurrency
+        value: -1
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: burstiness
+        value: 1.0
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: gpu_memory_utilization
+        value: 0.9
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: cpu_offload
+        value: 0
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: max_num_seq
+        value: 256
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: max_batch_tokens
+        value: 16384
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: n_gpus
+        value: 1
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: gpu_type
+        value: NVIDIA-A100-80GB-PCIe
+        valueType: STRING_VALUE_TYPE
+      - property:
+          identifier: skip_tokenizer_init
+        value: 1
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: enforce_eager
+        value: 1
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: io_processor_plugin
+        value: terratorch_segmentation
+        valueType: STRING_VALUE_TYPE
+      - property:
+          identifier: threadpool
+        value: 1
+        valueType: NUMERIC_VALUE_TYPE
+      - property:
+          identifier: renderer_num_workers
+        value: 32
+        valueType: NUMERIC_VALUE_TYPE
+      identifier: test-geospatial-deployment-custom-dataset-v1
+      metadata:
+        description: VLLM performance testing across compute resource and workload
+          configuration
+      optionalProperties:
+      - identifier: num_prompts
+        metadata:
+          description: (benchmark) The number of prompts to send (total number of
+            requests)
+        propertyDomain:
+          domainRange:
+          - 1
+          - 10001
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: max_concurrency
+        metadata:
+          description: (benchmark) The maximum number of concurrent requests to send
+        propertyDomain:
+          domainRange:
+          - -1
+          - 500
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: burstiness
+        metadata:
+          description: (benchmark) The burstiness of the requests - 1.0 is a Poisson
+            distribution with rate = request_rate. Others are gamma distributions
+            with lambda = request_rate and shape = burstiness.
+        propertyDomain:
+          domainRange:
+          - 0
+          - 10
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: image
+        metadata:
+          description: (deployment) Docker image to use to create vllm deployments
+        propertyDomain:
+          values:
+          - your/image/with/vllm/and/terratorch:0.1
+          variableType: OPEN_CATEGORICAL_VARIABLE_TYPE
+      - identifier: n_cpus
+        metadata:
+          description: (deployment) the number of CPUs to use
+        propertyDomain:
+          domainRange:
+          - 1
+          - 256
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: memory
+        metadata:
+          description: (deployment) the amount of memory to allocate to vLLM pod
+        propertyDomain:
+          values:
+          - 64Gi
+          - 128Gi
+          - 256Gi
+          variableType: CATEGORICAL_VARIABLE_TYPE
+      - identifier: dtype
+        metadata:
+          description: "(deployment) data type for model weights and activations.\
+            \ \u201Cauto\u201D will use FP16 precision for FP32 and FP16 models, and\
+            \ BF16 precision for BF16 models."
+        propertyDomain:
+          values:
+          - auto
+          - half
+          - float16
+          - bfloat16
+          - float
+          - float32
+          variableType: CATEGORICAL_VARIABLE_TYPE
+      - identifier: gpu_memory_utilization
+        metadata:
+          description: (deployment) The fraction of GPU memory to be used for the
+            model executor,
+        propertyDomain:
+          values:
+          - 0.5
+          - 0.75
+          - 0.9
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: cpu_offload
+        metadata:
+          description: (deployment) The amount of model weights in GB to offload to
+            the CPU per GPU. 0 means all weights are on GPU,
+        propertyDomain:
+          values:
+          - 0
+          - 8
+          - 16
+          - 24
+          - 32
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: max_num_seq
+        metadata:
+          description: (deployment) Maximum number of sequences per iteration
+        propertyDomain:
+          domainRange:
+          - 32
+          - 2049
+          interval: 32
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: max_batch_tokens
+        metadata:
+          description: (deployment) maximum number of batched tokens per iteration
+        propertyDomain:
+          domainRange:
+          - 256
+          - 32769
+          interval: 256
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: n_gpus
+        metadata:
+          description: (deployment) Number of GPUs to use
+        propertyDomain:
+          domainRange:
+          - 1
+          - 9
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: gpu_type
+        metadata:
+          description: (deployment) The GPU type to use
+        propertyDomain:
+          values:
+          - NVIDIA-A100-80GB-PCIe
+          - NVIDIA-A100-SXM4-80GB
+          - NVIDIA-H100-PCIe
+          variableType: CATEGORICAL_VARIABLE_TYPE
+      - identifier: skip_tokenizer_init
+        metadata:
+          description: (deployment) skip tokenizer initialization
+        propertyDomain:
+          values:
+          - 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: enforce_eager
+        metadata:
+          description: (deployment) enforce PyTorch eager mode
+        propertyDomain:
+          values:
+          - 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: io_processor_plugin
+        metadata:
+          description: IO Processor plugin to load for the model
+        propertyDomain:
+          values:
+          - terratorch_segmentation
+          variableType: OPEN_CATEGORICAL_VARIABLE_TYPE
+      - identifier: threadpool
+        metadata:
+          description: Enable threadpool for vLLM renderer (0=disabled, 1=enabled).
+            When enabled, uses multiple workers for rendering.
+        propertyDomain:
+          values:
+          - 0
+          - 1
+          variableType: CATEGORICAL_VARIABLE_TYPE
+      - identifier: renderer_num_workers
+        metadata:
+          description: Number of renderer workers when threadpool is enabled. Only
+            used when threadpool=1.
+        propertyDomain:
+          domainRange:
+          - 1
+          - 128
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      requiredProperties:
+      - identifier: model
+        metadata:
+          description: model to use for testing. Assumed to be served by all endpoints
+            tested. Required to obtain correct tokenizer for benchmarking metrics
+            calculation
+        propertyDomain:
+          values:
+          - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11
+          variableType: OPEN_CATEGORICAL_VARIABLE_TYPE
+      - identifier: request_rate
+        metadata:
+          description: (benchmark) The number of requests to send per second
+        propertyDomain:
+          domainRange:
+          - -1
+          - 1000
+          interval: 1
+          variableType: DISCRETE_VARIABLE_TYPE
+      - identifier: dataset
+        metadata:
+          description: (benchmark) The dataset to be used for the experiment
+        propertyDomain:
+          values:
+          - custom_dataset.jsonl
+          variableType: OPEN_CATEGORICAL_VARIABLE_TYPE
+      targetProperties:
+      - identifier: duration
+      - identifier: completed
+      - identifier: request_throughput
+      - identifier: mean_e2el_ms
+      - identifier: median_e2el_ms
+      - identifier: std_e2el_ms
+      - identifier: p25_e2el_ms
+      - identifier: p50_e2el_ms
+      - identifier: p75_e2el_ms
+      - identifier: p99_e2el_ms
+  metadata:
+    description: Test threadpool functionality with vLLM v0.20.1 and v0.18.0 images
+    labels:
+      model_type: geospatial
+      task: flood_detection
+      test_type: threadpool_version_comparison
+    name: Geospatial Terramind Model Performance Testing - Threadpool Version Test
+  sampleStoreIdentifier: a2760d
+created: '2026-06-05T08:43:34.231390Z'
+identifier: space-ca6479-a2760d
+status:
+- event: created
+  recorded_at: '2026-06-05T08:43:34.231402Z'
+- event: added
+  recorded_at: '2026-06-05T08:43:34.615893Z'
+
diff --git a/operation_entities.csv b/operation_entities.csv
new file mode 100644
index 000000000..c868d7fde
--- /dev/null
+++ b/operation_entities.csv
@@ -0,0 +1,9 @@
+request_index,result_index,identifier,experiment_id,dataset,dtype,enforce_eager,generatorid,gpu_memory_utilization,gpu_type,image,io_processor_plugin,max_batch_tokens,max_num_seq,memory,model,n_cpus,n_gpus,num_prompts,renderer_num_workers,request_rate,skip_tokenizer_init,threadpool,duration,completed,request_throughput,mean_e2el_ms,median_e2el_ms,std_e2el_ms,p25_e2el_ms,p50_e2el_ms,p75_e2el_ms,p99_e2el_ms,request_id,entity_index,valid
+0,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,32,32,1,1,215.27072143740952,2000,9.290627107325902,80530.65929337498,92830.85599914192,49518.423878155205,20688.09394375421,92830.85599914192,122615.82531570455,151683.5461606551,260012,0,True
+1,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,32,64,1,1,208.70084162708372,2000,9.58309503884844,91508.32508202083,94061.80999567732,52116.900894615734,51941.69757212512,94061.80999567732,135936.58734648488,175984.5853097178,6f069a,0,True
+2,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,64,32,1,1,213.164786985144,2000,9.382412678410084,79639.53276119987,91883.17732699215,48888.65282522971,20816.216070204973,91883.17732699215,121425.36071920767,149769.97912688178,01d594,0,True
+3,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,64,64,1,1,208.27105004619807,2000,9.602870872146491,90961.90372485157,93205.19294030964,52049.438963978195,52220.867299241945,93205.19294030964,135152.128781192,175817.94290206395,56e3d4,0,True
+4,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,32,32,1,1,76.71821601688862,1430,18.639640938538015,7224.8957470541345,2276.4977612532675,7776.232630976363,1995.9888060111552,2276.4977612532675,14173.72929956764,23919.61157440208,2950ca,0,True
+5,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,32,64,1,1,87.09100105147809,1545,17.740064775312153,27247.12148647828,32077.017480507493,19474.259797434173,2426.9744735211134,32077.017480507493,43850.73825716972,55731.31082225591,ea4dd5,0,True
+6,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,64,32,1,1,69.94518145918846,1433,20.487472762310656,4799.1137330050515,2080.0580009818077,4893.778910783289,1845.9379142150285,2080.0580009818077,7618.101900443435,17009.875886179507,bcdd13,0,True
+7,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,2000,64,64,1,1,71.60710450261831,1492,20.835921384664353,20450.668643505865,27168.44614362344,15321.471204638105,1938.958921469748,27168.44614362344,34347.58948907256,40303.56870012358,203a07,0,True
diff --git a/operation_entities_623871.csv b/operation_entities_623871.csv
new file mode 100644
index 000000000..3f28d4125
--- /dev/null
+++ b/operation_entities_623871.csv
@@ -0,0 +1,9 @@
+request_index,result_index,identifier,experiment_id,dataset,dtype,enforce_eager,generatorid,gpu_memory_utilization,gpu_type,image,io_processor_plugin,max_batch_tokens,max_num_seq,memory,model,n_cpus,n_gpus,num_prompts,renderer_num_workers,request_rate,skip_tokenizer_init,threadpool,duration,completed,request_throughput,mean_e2el_ms,median_e2el_ms,std_e2el_ms,p25_e2el_ms,p50_e2el_ms,p75_e2el_ms,p99_e2el_ms,request_id,entity_index,valid
+0,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,32,32,1,1,21.16098095756024,200,9.451357685218532,8323.152535227127,8751.465620007366,4447.680493290849,4647.437798324972,8751.465620007366,12455.881118308753,14942.587131289763,85f9af,0,True
+1,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,32,64,1,1,20.831470943056047,200,9.600858266164249,9387.722638151608,9541.33604792878,5095.870009355212,5034.923402359709,9541.33604792878,13802.96064238064,17705.100336009637,963caa,0,True
+2,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,64,32,1,1,21.051082043908536,200,9.500699279155256,8082.675415757112,7225.963302887976,4594.845149836468,4306.130911456421,7225.963302887976,12459.563876036556,14833.095950279385,774a6f,0,True
+3,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,64,64,1,1,20.759232787415385,200,9.63426741479789,9383.305872818455,9475.987559650091,5158.763184013406,4642.030250979587,9475.987559650091,13991.68548709713,17492.737999986857,24617a,0,True
+4,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,32,32,1,1,7.335592240095139,149,20.31192508023968,1311.595108735081,1347.61365596205,532.2226718124316,966.6640544310212,1347.61365596205,1623.8912288099527,2275.5631955340514,fa955b,0,True
+5,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,32,64,1,1,7.772065187804401,157,20.200551102731076,2756.258463415845,3167.6963847130537,1368.5851351345912,1464.73484672606,3167.6963847130537,4070.4276766628022,4661.2075228616595,c7c1a6,0,True
+6,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,64,32,1,1,6.895735959522426,161,23.3477617102889,837.0693416699119,866.3467029109597,227.43894353062015,715.9368423745036,866.3467029109597,984.7816163673996,1284.9709721282125,5637b4,0,True
+7,0,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",vllm_performance.test-geospatial-deployment-custom-dataset-v1,dataset_url_input_india_incluster.jsonl,float16,1,explicit_grid_sample_generator,0.9,NVIDIA-A100-80GB-PCIe,"['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']",terratorch_segmentation,16384,256,128Gi,ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11,48,1,200,64,64,1,1,5.922862338833511,155,26.169779260904917,1804.9909565357432,1586.8693506345153,894.8397404377881,1008.32075253129,1586.8693506345153,2751.6918242909014,2992.8823397308593,499691,0,True
diff --git a/operation_random_walk.yaml b/operation_random_walk.yaml
new file mode 100644
index 000000000..647efeb89
--- /dev/null
+++ b/operation_random_walk.yaml
@@ -0,0 +1,56 @@
+config:
+  actuatorConfigurationIdentifiers:
+  - actuatorconfiguration-vllm_performance-678e1bd6
+  metadata:
+    description: Test threadpool functionality across vLLM versions (v0.20.1 with
+      threadpool, v0.18.0 without)
+    labels:
+      experiment_type: performance_testing
+      model_type: geospatial
+      test_type: threadpool_version_comparison
+    name: Geospatial Model Performance - Threadpool Version Test
+  operation:
+    module:
+      operationType: search
+      operatorName: random_walk
+    parameters:
+      numberEntities: all
+      samplerConfig:
+        grouping:
+        - model
+        - image
+        - memory
+        - max_batch_tokens
+        - max_num_seq
+        - n_gpus
+        - gpu_type
+        - n_cpus
+        - threadpool
+        - renderer_num_workers
+        mode: sequentialgrouped
+        samplerType: generator
+      singleMeasurement: false
+  spaces:
+  - space-ca6479-a2760d
+created: '2026-06-05T09:08:38.348395Z'
+identifier: random_walk-1.8.1.dev41+gb08a3a42.d20260604164353.dirty-fe5225
+metadata:
+  entities_submitted: 8
+  experiments_requested: 8
+operationType: search
+operatorIdentifier: random_walk-1.8.1.dev41+gb08a3a42.d20260604164353.dirty
+status:
+- event: created
+  recorded_at: '2026-06-05T09:08:38.348402Z'
+- event: added
+  recorded_at: '2026-06-05T09:08:38.360114Z'
+- event: started
+  recorded_at: '2026-06-05T09:08:38.374526Z'
+- event: updated
+  recorded_at: '2026-06-05T09:08:38.374551Z'
+- event: finished
+  exit_state: success
+  recorded_at: '2026-06-05T09:42:31.394135Z'
+- event: updated
+  recorded_at: '2026-06-05T09:42:31.399321Z'
+
diff --git a/operation_random_walk_623871.yaml b/operation_random_walk_623871.yaml
new file mode 100644
index 000000000..13d1ec337
--- /dev/null
+++ b/operation_random_walk_623871.yaml
@@ -0,0 +1,56 @@
+config:
+  actuatorConfigurationIdentifiers:
+  - actuatorconfiguration-vllm_performance-678e1bd6
+  metadata:
+    description: Test threadpool functionality across vLLM versions (v0.20.1 with
+      threadpool, v0.18.0 without)
+    labels:
+      experiment_type: performance_testing
+      model_type: geospatial
+      test_type: threadpool_version_comparison
+    name: Geospatial Model Performance - Threadpool Version Test
+  operation:
+    module:
+      operationType: search
+      operatorName: random_walk
+    parameters:
+      numberEntities: all
+      samplerConfig:
+        grouping:
+        - model
+        - image
+        - memory
+        - max_batch_tokens
+        - max_num_seq
+        - n_gpus
+        - gpu_type
+        - n_cpus
+        - threadpool
+        - renderer_num_workers
+        mode: sequentialgrouped
+        samplerType: generator
+      singleMeasurement: false
+  spaces:
+  - space-9e79e7-a2760d
+created: '2026-06-05T10:00:13.976806Z'
+identifier: random_walk-1.8.1.dev41+gb08a3a42.d20260604164353.dirty-623871
+metadata:
+  entities_submitted: 8
+  experiments_requested: 8
+operationType: search
+operatorIdentifier: random_walk-1.8.1.dev41+gb08a3a42.d20260604164353.dirty
+status:
+- event: created
+  recorded_at: '2026-06-05T10:00:13.976813Z'
+- event: added
+  recorded_at: '2026-06-05T10:00:13.990153Z'
+- event: started
+  recorded_at: '2026-06-05T10:00:14.005488Z'
+- event: updated
+  recorded_at: '2026-06-05T10:00:14.005501Z'
+- event: finished
+  exit_state: success
+  recorded_at: '2026-06-05T10:07:09.909085Z'
+- event: updated
+  recorded_at: '2026-06-05T10:07:09.913408Z'
+
diff --git a/operation_results.csv b/operation_results.csv
new file mode 100644
index 000000000..911503bb5
--- /dev/null
+++ b/operation_results.csv
@@ -0,0 +1,9 @@
+,Result UID,Request ID,Request Index,Request type,Experiment ID,Entity ID,Valid,Number of Properties,Metadata
+0,a0798a17-f7dd-44f4-b35f-2b8906b5a575,260012,0,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{}
+1,ea1afb78-1905-4e70-987b-2367a8ed4867,6f069a,1,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{}
+2,635cdb1d-5d6f-40dd-abc7-5fcad3c99eea,01d594,2,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{}
+3,dd80fcf3-1e71-42b7-af4f-1c1d67e4c154,56e3d4,3,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{}
+4,8d3bc967-4b50-40a4-8d1a-9ef224ef588d,2950ca,4,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{}
+5,e5e74f8f-e0c0-4f53-92b2-0ac4b70ad384,ea4dd5,5,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{}
+6,d29e5933-4f86-4f78-98a7-22e2f67f4155,bcdd13,6,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{}
+7,f41dec02-f8e4-4ca8-a32f-c531df73d5f9,203a07,7,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.2000-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{}
diff --git a/operation_results_623871.csv b/operation_results_623871.csv
new file mode 100644
index 000000000..7755cc944
--- /dev/null
+++ b/operation_results_623871.csv
@@ -0,0 +1,9 @@
+,Result UID,Request ID,Request Index,Request type,Experiment ID,Entity ID,Valid,Number of Properties,Metadata
+0,4ea602e5-bbf0-4cfa-bede-cc5df5525b77,85f9af,0,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{}
+1,b5bc7457-ee2c-4c8a-a162-30ffe8807321,963caa,1,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{}
+2,66087ee8-ce37-4b76-8eff-f029988d3408,774a6f,2,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{}
+3,ff47dee4-2e9b-431e-b8fb-b4c3caf26cce,24617a,3,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5', '0.18.0']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{}
+4,b1308077-3fd9-4100-a20f-6f5c0bd4d7e5,fa955b,4,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{}
+5,de055654-54c1-42a9-9999-32c01d38aa79,c7c1a6,5,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.32-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{}
+6,1f4868b8-7eaa-4549-87de-fc09dddf79ca,5637b4,6,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.32-skip_tokenizer_init.1-threadpool.1",True,10,{}
+7,7df51b86-26c5-4de2-a6c9-fa77f414a57b,499691,7,measured,vllm_performance.test-geospatial-deployment-custom-dataset-v1,"dataset.dataset_url_input_india_incluster.jsonl-dtype.float16-enforce_eager.1-gpu_memory_utilization.0.9-gpu_type.NVIDIA-A100-80GB-PCIe-image.['icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main', '0.20.1']-io_processor_plugin.terratorch_segmentation-max_batch_tokens.16384-max_num_seq.256-memory.128Gi-model.ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11-n_cpus.48-n_gpus.1-num_prompts.200-renderer_num_workers.64-request_rate.64-skip_tokenizer_init.1-threadpool.1",True,10,{}
diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index 05028f48e..c9b1afb53 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -63,14 +63,13 @@ def make_hashable(
         value: float | list | str | dict | None,
     ) -> int | float | tuple | str | None:
         """Convert unhashable types to hashable equivalents"""
-        if isinstance(value, dict):
-            # Convert dict to sorted tuple of items
-            return tuple(sorted(value.items()))
-        if isinstance(value, list):
-            # Convert list to tuple
-            return tuple(value)
-        # Return value as-is if already hashable
-        return value  # type: ignore[return-value]
+        match value:
+            case dict():
+                return tuple(sorted(value.items()))
+            case list():
+                return tuple(value)
+            case _:
+                return value  # type: ignore[return-value]
 
     return frozenset({(k, make_hashable(v)) for k, v in point.items() if k in group})
 
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py
new file mode 100644
index 000000000..7e0016967
--- /dev/null
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py
@@ -0,0 +1,120 @@
+# Copyright IBM Corporation 2025, 2026
+# SPDX-License-Identifier: MIT
+
+"""Utilities for building cache keys for vLLM performance measurements."""
+
+import json
+
+from ado_actuators.vllm_performance.version_utils import VLLMVersionChecker
+
+
+class CacheKeyBuilder:
+    """Build cache keys for vLLM performance measurements.
+
+    Cache keys combine environment parameters (model, GPUs, etc.) and
+    benchmark parameters (num_prompts, request_rate, etc.) to ensure
+    measurements are only reused for identical configurations.
+    """
+
+    # Environment parameters that define the deployment
+    ENV_PARAMS = [
+        "model",
+        "image",
+        "n_gpus",
+        "gpu_type",
+        "n_cpus",
+        "memory",
+        "max_batch_tokens",
+        "gpu_memory_utilization",
+        "dtype",
+        "cpu_offload",
+        "max_num_seq",
+        "threadpool",
+        "renderer_num_workers",
+    ]
+
+    BENCHMARK_PARAMS: list[str] = [
+        "num_prompts",
+        "request_rate",
+        "max_concurrency",
+        "number_input_tokens",
+        "max_output_tokens",
+        "burstiness",
+        "dataset",
+    ]
+
+    # All parameters used in cache key
+    ALL_PARAMS = ENV_PARAMS + BENCHMARK_PARAMS
+
+    @classmethod
+    def _normalize_and_extract_env_params(cls, values: dict[str, str]) -> dict:
+        """Extract and normalize environment parameters."""
+        # Extract and normalize image value
+        image_value = values.get("image")
+        if isinstance(image_value, list):
+            image_str = image_value[0] if len(image_value) > 0 else image_value
+        else:
+            image_str = image_value
+
+        # Normalize threadpool properties based on vLLM version
+        threadpool_requested = int(values.get("threadpool", 1))
+        renderer_num_workers_requested = int(values.get("renderer_num_workers", 32))
+
+        enable_threadpool = VLLMVersionChecker.supports_threadpool(
+            image_value if image_value is not None else "", threadpool_requested
+        )
+
+        if enable_threadpool:
+            threadpool_value = 1
+            renderer_num_workers_value = renderer_num_workers_requested
+        else:
+            threadpool_value = 0
+            renderer_num_workers_value = 0
+
+        return {
+            "model": values.get("model"),
+            "image": image_str,
+            "n_gpus": values.get("n_gpus"),
+            "gpu_type": values.get("gpu_type"),
+            "n_cpus": values.get("n_cpus"),
+            "memory": values.get("memory"),
+            "max_batch_tokens": values.get("max_batch_tokens"),
+            "gpu_memory_utilization": values.get("gpu_memory_utilization"),
+            "dtype": values.get("dtype"),
+            "cpu_offload": values.get("cpu_offload"),
+            "max_num_seq": values.get("max_num_seq"),
+            "threadpool": threadpool_value,
+            "renderer_num_workers": renderer_num_workers_value,
+        }
+
+    @classmethod
+    def build_env_definition(cls, values: dict[str, str]) -> str:
+        """Build environment definition JSON string."""
+        env_values = cls._normalize_and_extract_env_params(values)
+        return json.dumps(env_values)
+
+    @classmethod
+    def build(cls, values: dict[str, str]) -> str:
+        """Build composite cache key from environment and benchmark parameters."""
+        env_values = cls._normalize_and_extract_env_params(values)
+
+        # Build benchmark parameters
+        benchmark_params = {
+            "num_prompts": values.get("num_prompts"),
+            "request_rate": values.get("request_rate"),
+            "max_concurrency": values.get("max_concurrency"),
+            "number_input_tokens": values.get("number_input_tokens"),
+            "max_output_tokens": values.get("max_output_tokens"),
+            "burstiness": values.get("burstiness"),
+            "dataset": values.get("dataset"),
+        }
+
+        # Combine into composite key
+        composite = {
+            "environment": env_values,
+            "benchmark": benchmark_params,
+        }
+        return json.dumps(composite, sort_keys=True)
+
+
+# Made with Bob
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py
index c79212f5f..75c4e25d3 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py
@@ -3,6 +3,7 @@
 
 import asyncio
 import logging
+from dataclasses import dataclass
 from enum import Enum
 from typing import Annotated
 
@@ -26,6 +27,14 @@
 logger = logging.getLogger(__name__)
 
 
+@dataclass
+class CachedMeasurement:
+    """Cached measurement result."""
+
+    measurements: list
+    error: str | None
+
+
 class EnvironmentState(Enum):
     """
     Environment state
@@ -136,7 +145,7 @@ def __init__(
 
         # Measurement cache for reusing results across entities with identical
         # environment and benchmark parameters
-        self.measurement_cache: dict[str, dict] = {}
+        self.measurement_cache: dict[str, CachedMeasurement] = {}
 
         # component manager for cleanup
         self.manager = ComponentsManager(
@@ -148,33 +157,17 @@ def __init__(
             pvc_template=pvc_template,
         )
 
-    def get_cached_measurement(self, cache_key: str) -> dict | None:
-        """
-        Get a cached measurement result for the given cache key.
-
-        Args:
-            cache_key: Composite key containing environment and benchmark parameters
-
-        Returns:
-            Cached measurement dict with 'measurements' and 'error' keys, or None if not cached
-        """
+    def get_cached_measurement(self, cache_key: str) -> CachedMeasurement | None:
+        """Get cached measurement result."""
         return self.measurement_cache.get(cache_key)
 
     def cache_measurement(
         self, cache_key: str, measurements: list, error: str | None
     ) -> None:
-        """
-        Cache a measurement result for reuse by subsequent entities with identical parameters.
-
-        Args:
-            cache_key: Composite key containing environment and benchmark parameters
-            measurements: List of measured property values
-            error: Error message if measurement failed, None otherwise
-        """
-        self.measurement_cache[cache_key] = {
-            "measurements": measurements,
-            "error": error,
-        }
+        """Cache measurement result for reuse."""
+        self.measurement_cache[cache_key] = CachedMeasurement(
+            measurements=measurements, error=error
+        )
         logger.debug(f"Cached measurement for key: {cache_key}")
 
     def _delete_environment_k8s_resources(self, k8s_name: str) -> None:
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 9f447d97c..2eabb7c1b 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -11,6 +11,7 @@
 from ado_actuators.vllm_performance.actuator_parameters import (
     VLLMPerformanceTestParameters,
 )
+from ado_actuators.vllm_performance.cache_utils import CacheKeyBuilder
 from ado_actuators.vllm_performance.env_manager import (
     Environment,
     EnvironmentManager,
@@ -26,6 +27,7 @@
 from ado_actuators.vllm_performance.k8s.yaml_support.build_components import (
     VLLMDtype,
 )
+from ado_actuators.vllm_performance.version_utils import VLLMVersionChecker
 from ado_actuators.vllm_performance.vllm_performance_test.benchmark_models import (
     BenchmarkParameters,
     BenchmarkResult,
@@ -39,7 +41,6 @@
     execute_guidellm_benchmark,
     execute_guidellm_geospatial_benchmark,
 )
-from packaging import version
 from ray.actor import ActorHandle
 
 from orchestrator.modules.actuators.measurement_queue import MeasurementQueue
@@ -54,212 +55,6 @@
 logger = logging.getLogger(__name__)
 
 
-def _get_vllm_version_from_image_value(image_value: list | str) -> str | None:
-    """
-    Extract vLLM version from image property value.
-
-    Args:
-        image_value: The image property value, either a list [image_url, vllm_version],
-                    or a string (for backward compatibility)
-
-    Returns:
-        Version string if found in list, None otherwise
-    """
-    logger.debug(f"_get_vllm_version_from_image_value called with: {image_value}")
-
-    # If image_value is a list with vllm_version, extract it
-    if isinstance(image_value, list):
-        if len(image_value) > 1:
-            version = image_value[1]
-            logger.debug(f"Extracted vLLM version from list: {version}")
-            return version
-        logger.debug("List has only one element, no version info available")
-        return None
-
-    # For backward compatibility: if it's a string, we don't have version info
-    logger.debug("Image value is a string, no version info available")
-    return None
-
-
-def _should_enable_threadpool(image_value: list | str, threadpool_value: int) -> bool:
-    """
-    Determine if threadpool should be enabled based on vLLM version and user preference.
-
-    Threadpool is only supported in vLLM >= 0.20.0. This function checks:
-    1. If user explicitly disabled threadpool (threadpool=0), return False
-    2. If vLLM version exists in image_value list and version < 0.20.0, return False
-    3. Otherwise, return True (user wants it and version supports it or no version info)
-
-    Args:
-        image_value: The image property value (list [image_url, vllm_version] or string)
-        threadpool_value: User's threadpool preference (0 or 1)
-
-    Returns:
-        True if threadpool should be enabled, False otherwise
-    """
-    logger.debug(
-        f"_should_enable_threadpool called with: image_value={image_value}, "
-        f"threadpool_value={threadpool_value}"
-    )
-
-    # If user explicitly disabled, respect that
-    if threadpool_value == 0:
-        logger.debug("Threadpool explicitly disabled by user (threadpool_value=0)")
-        return False
-
-    # Get version from image value
-    vllm_version_str = _get_vllm_version_from_image_value(image_value)
-    logger.debug(f"Retrieved vLLM version: {vllm_version_str}")
-
-    # If no version info, assume it's supported (backward compatible)
-    if vllm_version_str is None:
-        logger.warning(
-            f"No vLLM version info found for image {image_value}. "
-            "Assuming threadpool is supported."
-        )
-        return True
-
-    # Parse and compare version
-    try:
-        vllm_ver = version.parse(vllm_version_str)
-        min_version = version.parse("0.20.0")
-        logger.debug(
-            f"Parsed versions - vLLM: {vllm_ver}, minimum required: {min_version}"
-        )
-
-        if vllm_ver < min_version:
-            logger.info(
-                f"Threadpool disabled: vLLM version {vllm_version_str} < 0.20.0 "
-                f"for image {image_value}"
-            )
-            return False
-
-        logger.info(
-            f"Threadpool enabled: vLLM version {vllm_version_str} >= 0.20.0 "
-            f"for image {image_value}"
-        )
-        return True
-    except Exception as e:
-        logger.error(
-            f"Failed to parse vLLM version '{vllm_version_str}' for image {image_value}: {e}. "
-            "Assuming threadpool is supported."
-        )
-        return True
-
-
-def _build_entity_env(values: dict[str, str]) -> str:
-    """
-    This is the list of entity parameters that define the environment:
-        * model name
-        * image name
-        * number of gpus
-        * gpu type
-        * number of cpus
-        * memory
-        * max batch tokens
-        * max number of sequences
-        * gpu memory utilization
-        * data type
-        * cpu offload
-    Build entity based environment parameters
-    :param values: experiment values
-    :return: definition
-    """
-    # Extract image string from list if needed
-    image_value = values.get("image")
-    if isinstance(image_value, list):
-        image_str = image_value[0] if len(image_value) > 0 else image_value
-    else:
-        image_str = image_value
-
-    # Determine effective threadpool and renderer_num_workers values
-    threadpool_requested = int(values.get("threadpool", 1))
-    renderer_num_workers_requested = int(values.get("renderer_num_workers", 32))
-
-    # Check if threadpool will actually be enabled based on version
-    # Use empty string as fallback if image_value is None
-    enable_threadpool = _should_enable_threadpool(
-        image_value if image_value is not None else "", threadpool_requested
-    )
-
-    # Normalize values for environment definition:
-    # - If threadpool is disabled (version < 0.20.0 or user disabled it),
-    #   set both to 0 so different renderer_num_workers values don't create
-    #   different environments when they would behave identically
-    if enable_threadpool:
-        threadpool_value = 1
-        renderer_num_workers_value = renderer_num_workers_requested
-    else:
-        threadpool_value = 0
-        renderer_num_workers_value = 0  # Normalize to 0 when not used
-
-    env_values = {
-        "model": values.get("model"),
-        "image": image_str,
-        "n_gpus": values.get("n_gpus"),
-        "gpu_type": values.get("gpu_type"),
-        "n_cpus": values.get("n_cpus"),
-        "memory": values.get("memory"),
-        "max_batch_tokens": values.get("max_batch_tokens"),
-        "gpu_memory_utilization": values.get("gpu_memory_utilization"),
-        "dtype": values.get("dtype"),
-        "cpu_offload": values.get("cpu_offload"),
-        "max_num_seq": values.get("max_num_seq"),
-        "threadpool": threadpool_value,
-        "renderer_num_workers": renderer_num_workers_value,
-    }
-    return json.dumps(env_values)
-
-
-def _build_benchmark_params_key(values: dict[str, str]) -> str:
-    """
-    Build a cache key from benchmark parameters that affect measurement results.
-
-    These parameters define the workload characteristics and must be included
-    in the cache key to ensure measurements are only reused for identical tests.
-
-    Args:
-        values: experiment values
-
-    Returns:
-        JSON string of benchmark parameters
-    """
-    benchmark_params = {
-        "num_prompts": values.get("num_prompts"),
-        "request_rate": values.get("request_rate"),
-        "max_concurrency": values.get("max_concurrency"),
-        "number_input_tokens": values.get("number_input_tokens"),
-        "max_output_tokens": values.get("max_output_tokens"),
-        "burstiness": values.get("burstiness"),
-        "dataset": values.get("dataset"),
-    }
-    return json.dumps(benchmark_params, sort_keys=True)
-
-
-def _build_cache_key(values: dict[str, str]) -> str:
-    """
-    Build a composite cache key from both environment and benchmark parameters.
-
-    Cache hits should only occur when both the deployment environment AND
-    the benchmark workload parameters are identical.
-
-    Args:
-        values: experiment values
-
-    Returns:
-        composite cache key as JSON string
-    """
-    env_key = _build_entity_env(values)
-    benchmark_key = _build_benchmark_params_key(values)
-
-    # Combine both keys into a single cache key
-    composite = {
-        "environment": json.loads(env_key),
-        "benchmark": json.loads(benchmark_key),
-    }
-    return json.dumps(composite, sort_keys=True)
-
-
 def _create_environment(
     values: dict[str, str],
     actuator: VLLMPerformanceTestParameters,
@@ -270,28 +65,10 @@ def _create_environment(
     check_interval: int = 5,
     timeout: int = 1200,
 ) -> tuple[str, str]:
-    """
-     Create environment with version-aware threadpool support.
-
-     Important: This function will block until env_manager.get_environment
-     returns an environment.
-     The env_manager will not return an environment until there is one free
-     to be used
-
-     :param values: experiment values
-     :param actuator: actuator parameters
-     :param node_selector: node selector
-     :param request_id the request associated with this environment
-     :param env_manager: environment manager
-     :param experiment: experiment definition (used for version checking)
-     :param check_interval: wait interval
-     :param timeout: timeout
-    :return: kubernetes environment name
-
-    :raises K8sEnvironmentCreationError if there was an issue
-    - If the creation step fails after three attempts
-    - If after creation the environment was not in ready state after timeout seconds (1200 default)
+    """Create environment with version-aware threadpool support.
 
+    Blocks until env_manager returns an available environment.
+    Raises K8sEnvironmentCreationError if creation fails after 3 attempts or timeout.
     """
     from orchestrator.modules.operators.console_output import (
         RichConsoleSpinnerMessage,
@@ -304,7 +81,7 @@ def _create_environment(
     model = values.get("model")
 
     # create environment definition
-    definition = _build_entity_env(values=values)
+    definition = CacheKeyBuilder.build_env_definition(values=values)
     console.put.remote(
         message=RichConsoleSpinnerMessage(
             id=request_id,
@@ -329,7 +106,6 @@ def _create_environment(
             )
             break
 
-        # This is to guarantee that the request is next in line as soon as an environment is available
         ray.get(env_manager.wait_for_env.remote())
 
     error = None
@@ -348,8 +124,6 @@ def _create_environment(
             # Environment does not exist, create it
             logger.debug(f"Environment {env.k8s_name} does not exist. Creating it")
             tmout = 1
-
-            # To avoid data corruption we wait if another environment is concurrently downloading the same model for the first time
             ray.get(
                 env_manager.wait_deployment_before_starting.remote(
                     env=env, request_id=request_id
@@ -365,27 +139,13 @@ def _create_environment(
                     )
                 )
                 try:
-                    # Determine if threadpool should be enabled based on version
                     image_value = values.get("image", "")
                     threadpool_requested = int(values.get("threadpool", 1))
-                    logger.debug(
-                        f"Before _should_enable_threadpool: image_value={image_value}, "
-                        f"threadpool_requested={threadpool_requested}"
-                    )
-                    enable_threadpool = _should_enable_threadpool(
+                    enable_threadpool = VLLMVersionChecker.supports_threadpool(
                         image_value, threadpool_requested
                     )
-                    logger.debug(
-                        f"After _should_enable_threadpool: enable_threadpool={enable_threadpool}"
-                    )
-
-                    # Convert boolean back to int for consistency with existing code
                     threadpool_value = 1 if enable_threadpool else 0
-                    logger.debug(
-                        f"Final threadpool_value to be used: {threadpool_value}"
-                    )
 
-                    # Extract image string from list if needed
                     if isinstance(image_value, list):
                         image_name = image_value[0] if len(image_value) > 0 else ""
                     else:
@@ -426,7 +186,6 @@ def _create_environment(
                         check_interval=check_interval,
                         timeout=timeout,
                     )
-                    # Update manager
                     env_manager.done_creating.remote(identifier=env.k8s_name)
                     error = None
                     break
@@ -439,7 +198,6 @@ def _create_environment(
                     time.sleep(tmout)
                     tmout *= 2
 
-            # Check if error after three attempts
             if error is None:
                 console.put.remote(
                     message=RichConsoleSpinnerMessage(
@@ -460,9 +218,6 @@ def _create_environment(
                     )
                 )
 
-                # In case of failure creating the environment deployment we must release any
-                # other request with a deployment conflicting with this request's deployment
-                # We also need to release the slot for this environment
                 ray.get(
                     env_manager.cleanup_failed_deployment.remote(
                         identifier=env.k8s_name
@@ -481,27 +236,11 @@ def _connect_to_vllm_server(
     actuator_parameters: VLLMPerformanceTestParameters,
     port: int,
 ) -> tuple[str, subprocess.Popen | None]:
-    """Returns the URL of the vLLM inference server
-
-    Creates a port forward for the inference server if test
-    is not running on the cluster with the service
-
-    Parameters:
-        k8s_name: The name of the vLLM service
-        actuator_parameters: VLLMPerformanceTestParameters instance containing
-            namespace and test location (in_cluster or not) information
-
-    Returns:
-        A tuple containing
-        - The URL of the created vLLM server
-        - If a port-forward is created the POpen object for the port-forward
-          Otherwise None
+    """Returns vLLM server URL and optional port-forward process.
 
-    Raise:
-        K8ConnectionError if a port-forward could not be created
+    Creates port-forward if not running in-cluster.
+    Raises K8sConnectionError if port-forward fails.
     """
-
-    # create environment
     if not actuator_parameters.in_cluster:
         logger.info("We are running locally connecting to remote cluster")
         logger.info("please make sure that you have executed `oc login`")
@@ -511,13 +250,11 @@ def _connect_to_vllm_server(
         )
 
     if actuator_parameters.in_cluster:
-        # we are running in cluster, connect to service directly
         base_url = (
             f"http://{k8s_name}.{actuator_parameters.namespace}.svc.cluster.local:80"
         )
         pf = None
     else:
-        # we are running locally. need to do port-forward and connect to the local one
         pf_command_args = [
             "kubectl",
             "port-forward",
@@ -532,9 +269,7 @@ def _connect_to_vllm_server(
                 stdout=subprocess.DEVNULL,
                 stderr=subprocess.DEVNULL,
             )
-            # make sure that port forwarding is up
             time.sleep(5)
-            # Check if there is a returncode- if there is it means port-forward exited
             if pf.returncode:
                 raise K8sConnectionError(
                     f"failed to start port forward to service {k8s_name} - port-forward command exited for unknown reason. Check logs."
@@ -560,32 +295,11 @@ def run_resource_and_workload_experiment(
     env_manager: ActorHandle,
     local_port: int,
 ) -> None:
-    """
-    Runs an experiment on a specific compute resource and inference workload configuration.
-
-    This requires spinning up a vLLM instance with the given compute resources
-
-    :param request: measurement request
-    :param experiment: definition of experiment
-    :param state_update_queue: update queue
-    :param actuator_parameters: actuator parameters
-    :param node_selector: node selector
-    :param env_manager: environment manager
-    :param local_port: local port to use
-    :return:
-    """
-
-    # This function
-    # 1. Performs the measurement represented by MeasurementRequest
-    # 2. Updates MeasurementRequest with the results of the measurement and status
-    # 3. Puts it in the stateUpdateQueue
-
-    # placeholder for measurements
+    """Run experiment on specific compute resource and workload configuration."""
     measurements = []
     current_port = local_port - 1
     console = ray.get_actor(name="RichConsoleQueue")
 
-    # For every entity
     for entity in request.entities:
         port_forward = None
         definition = None
@@ -593,12 +307,9 @@ def run_resource_and_workload_experiment(
         try:
             values = experiment.propertyValuesFromEntity(entity=entity)
 
-            # Check if we've already measured an entity with the same environment and benchmark parameters
-            # Cache key includes both environment (model, GPUs, etc.) and benchmark params (num_prompts, request_rate, etc.)
-            cache_key = _build_cache_key(values)
+            cache_key = CacheKeyBuilder.build(values)
             logger.info("cache_key: %s", cache_key)
 
-            # Check actor's cache for this measurement
             cached_result = ray.get(
                 env_manager.get_cached_measurement.remote(cache_key)
             )
@@ -610,8 +321,8 @@ def run_resource_and_workload_experiment(
                 measurements.append(
                     create_measurement_result(
                         identifier=entity.identifier,
-                        measurements=cached_result["measurements"],
-                        error=cached_result["error"],
+                        measurements=cached_result.measurements,
+                        error=cached_result.error,
                         reference=request.experimentReference,
                     )
                 )
@@ -619,7 +330,6 @@ def run_resource_and_workload_experiment(
 
             logger.info(f"Creating K8s environment for {entity.identifier}")
 
-            # Will raise an K8sEnvironmentCreationError if the environment could not be created
             k8s_name, definition = _create_environment(
                 values=values,
                 actuator=actuator_parameters,
@@ -629,8 +339,6 @@ def run_resource_and_workload_experiment(
                 request_id=request.requestid,
             )
 
-            # Will raise an K8sConnectionError if a port-forward was required
-            # but could not be created
             current_port += 1
             base_url, port_forward = _connect_to_vllm_server(
                 k8s_name, actuator_parameters, current_port
@@ -639,8 +347,6 @@ def run_resource_and_workload_experiment(
             logger.info(f"Will use vllm server at {base_url}")
 
             benchmark_parameters = BenchmarkParameters.model_validate(values)
-            # In this case the endpoint does not come through the property values and is generated
-            # when creating the vLLM deployment
             benchmark_parameters.endpoint = base_url
 
             started_benchmarking = True
@@ -754,9 +460,6 @@ def run_resource_and_workload_experiment(
                 reference=request.experimentReference,
             )
             measurements.append(measurement_result)
-
-            # Cache the measurement in the actor for potential reuse by subsequent entities
-            # with the same environment and benchmark parameters
             env_manager.cache_measurement.remote(cache_key, measured_values, None)
         finally:
             if started_benchmarking:
@@ -772,7 +475,6 @@ def run_resource_and_workload_experiment(
             if definition is not None:
                 env_manager.done_using.remote(identifier=k8s_name)
 
-    # For multi entity experiments if ONE entity had ValidResults the status must be SUCCESS
     if len(measurements) > 0:
         request.measurements = measurements
     request.status = compute_measurement_status(measurements=measurements)
@@ -788,26 +490,8 @@ def run_workload_experiment(
     state_update_queue: MeasurementQueue,
     actuator_parameters: VLLMPerformanceTestParameters,
 ) -> None:
-    """
-    Runs an experiment with a specific inference workload configuration on a given endpoint.
-
-    The compute resource associated with the end-point is not known.
-
-    :param request: measurement request
-    :param experiment: definition of experiment
-    :param state_update_queue: update queue
-    :param actuator_parameters: actuator parameters
-    :return:
-    """
-
-    # This function
-    # 1. Performs the measurement represented by MeasurementRequest
-    # 2. Updates MeasurementRequest with the results of the measurement and status
-    # 3. Puts it in the stateUpdateQueue
-
-    # placeholder for measurements
+    """Run experiment with specific workload configuration on given endpoint."""
     measurements = []
-    # For every entity
     for entity in request.entities:
         measured_values = []
         error = None
@@ -820,7 +504,6 @@ def run_workload_experiment(
 
             benchmark_parameters = BenchmarkParameters.model_validate(values)
 
-            # Will raise VLLMBenchmarkError if there is a problem
             logger.info(f"Executing experiment: {experiment.identifier}")
             result: BenchmarkResult
             if experiment.identifier in [
@@ -908,7 +591,6 @@ def run_workload_experiment(
                 )
             )
 
-    # For multi entity experiments if ONE entity had ValidResults the status must be SUCCESS
     if len(measurements) > 0:
         request.measurements = measurements
     request.status = compute_measurement_status(measurements=measurements)
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py
new file mode 100644
index 000000000..10516847a
--- /dev/null
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py
@@ -0,0 +1,41 @@
+# Copyright IBM Corporation 2025, 2026
+# SPDX-License-Identifier: MIT
+
+"""Utilities for vLLM version checking and threadpool support detection."""
+
+from packaging import version
+
+
+class VLLMVersionChecker:
+    """Utility class for checking vLLM version and threadpool support."""
+
+    THREADPOOL_MIN_VERSION = "0.20.0"
+
+    @staticmethod
+    def parse_version(image_value: list | str) -> str | None:
+        """Extract vLLM version from image property value."""
+        if isinstance(image_value, list) and len(image_value) > 1:
+            return image_value[1]
+        return None
+
+    @classmethod
+    def supports_threadpool(
+        cls, image_value: list | str, threadpool_requested: int
+    ) -> bool:
+        """Check if threadpool is supported and should be enabled."""
+        if threadpool_requested == 0:
+            return False
+
+        vllm_version_str = cls.parse_version(image_value)
+        if vllm_version_str is None:
+            return True
+
+        try:
+            vllm_ver = version.parse(vllm_version_str)
+            min_ver = version.parse(cls.THREADPOOL_MIN_VERSION)
+            return vllm_ver >= min_ver
+        except Exception:
+            return True
+
+
+# Made with Bob
diff --git a/plugins/actuators/vllm_performance/tests/test_cache_utils.py b/plugins/actuators/vllm_performance/tests/test_cache_utils.py
new file mode 100644
index 000000000..26a243b43
--- /dev/null
+++ b/plugins/actuators/vllm_performance/tests/test_cache_utils.py
@@ -0,0 +1,144 @@
+# Copyright IBM Corporation 2025, 2026
+# SPDX-License-Identifier: MIT
+
+"""Tests for cache key building utilities."""
+
+import json
+
+import pytest
+from ado_actuators.vllm_performance.cache_utils import CacheKeyBuilder
+
+
+class TestCacheKeyBuilder:
+    """Tests for CacheKeyBuilder class."""
+
+    @pytest.fixture
+    def base_values(self):
+        """Base values for testing."""
+        return {
+            "model": "meta-llama/Llama-2-7b-hf",
+            "image": ["vllm/vllm-openai:v0.20.1", "0.20.1"],
+            "n_gpus": "1",
+            "gpu_type": "nvidia-l4",
+            "n_cpus": "8",
+            "memory": "32Gi",
+            "max_batch_tokens": "4096",
+            "gpu_memory_utilization": "0.9",
+            "dtype": "auto",
+            "cpu_offload": "0",
+            "max_num_seq": "256",
+            "threadpool": "1",
+            "renderer_num_workers": "32",
+            "num_prompts": "100",
+            "request_rate": "10",
+            "max_concurrency": "50",
+            "number_input_tokens": "128",
+            "max_output_tokens": "256",
+            "burstiness": "1.0",
+            "dataset": "random",
+        }
+
+    def test_build_returns_json_string(self, base_values):
+        """Test that build returns a valid JSON string."""
+        cache_key = CacheKeyBuilder.build(base_values)
+        assert isinstance(cache_key, str)
+        parsed = json.loads(cache_key)
+        assert "environment" in parsed
+        assert "benchmark" in parsed
+
+    def test_same_values_produce_same_key(self, base_values):
+        """Test that identical values produce identical cache keys."""
+        key1 = CacheKeyBuilder.build(base_values)
+        key2 = CacheKeyBuilder.build(base_values.copy())
+        assert key1 == key2
+
+    def test_different_env_params_produce_different_keys(self, base_values):
+        """Test that different environment parameters produce different keys."""
+        key1 = CacheKeyBuilder.build(base_values)
+
+        modified_values = base_values.copy()
+        modified_values["n_gpus"] = "2"
+        key2 = CacheKeyBuilder.build(modified_values)
+
+        assert key1 != key2
+
+    def test_different_benchmark_params_produce_different_keys(self, base_values):
+        """Test that different benchmark parameters produce different keys."""
+        key1 = CacheKeyBuilder.build(base_values)
+
+        modified_values = base_values.copy()
+        modified_values["num_prompts"] = "200"
+        key2 = CacheKeyBuilder.build(modified_values)
+
+        assert key1 != key2
+
+    def test_threadpool_normalization_vllm_0_18(self, base_values):
+        """Test threadpool normalization for vLLM < 0.20.0."""
+        base_values["image"] = ["vllm/vllm-openai:v0.18.0", "0.18.0"]
+        base_values["threadpool"] = "1"
+        base_values["renderer_num_workers"] = "32"
+
+        cache_key = CacheKeyBuilder.build(base_values)
+        parsed = json.loads(cache_key)
+
+        assert parsed["environment"]["threadpool"] == 0
+        assert parsed["environment"]["renderer_num_workers"] == 0
+
+    def test_threadpool_normalization_vllm_0_20(self, base_values):
+        """Test threadpool normalization for vLLM >= 0.20.0."""
+        base_values["image"] = ["vllm/vllm-openai:v0.20.1", "0.20.1"]
+        base_values["threadpool"] = "1"
+        base_values["renderer_num_workers"] = "32"
+
+        cache_key = CacheKeyBuilder.build(base_values)
+        parsed = json.loads(cache_key)
+
+        assert parsed["environment"]["threadpool"] == 1
+        assert parsed["environment"]["renderer_num_workers"] == 32
+
+    def test_different_renderer_num_workers_same_key_when_disabled(self, base_values):
+        """Test that different renderer_num_workers produce same key when threadpool disabled."""
+        base_values["image"] = ["vllm/vllm-openai:v0.18.0", "0.18.0"]
+        base_values["threadpool"] = "1"
+        base_values["renderer_num_workers"] = "32"
+        key1 = CacheKeyBuilder.build(base_values)
+
+        base_values["renderer_num_workers"] = "64"
+        key2 = CacheKeyBuilder.build(base_values)
+
+        assert key1 == key2
+
+    def test_different_renderer_num_workers_different_key_when_enabled(
+        self, base_values
+    ):
+        """Test that different renderer_num_workers produce different keys when threadpool enabled."""
+        base_values["image"] = ["vllm/vllm-openai:v0.20.1", "0.20.1"]
+        base_values["threadpool"] = "1"
+        base_values["renderer_num_workers"] = "32"
+        key1 = CacheKeyBuilder.build(base_values)
+
+        base_values["renderer_num_workers"] = "64"
+        key2 = CacheKeyBuilder.build(base_values)
+
+        assert key1 != key2
+
+    def test_image_list_extraction(self, base_values):
+        """Test that image URL is correctly extracted from list."""
+        base_values["image"] = ["vllm/vllm-openai:v0.20.1", "0.20.1"]
+        cache_key = CacheKeyBuilder.build(base_values)
+        parsed = json.loads(cache_key)
+
+        assert parsed["environment"]["image"] == "vllm/vllm-openai:v0.20.1"
+
+    def test_image_string_backward_compatibility(self, base_values):
+        """Test backward compatibility with string image values."""
+        base_values["image"] = "vllm/vllm-openai:v0.20.1"
+        cache_key = CacheKeyBuilder.build(base_values)
+        parsed = json.loads(cache_key)
+
+        assert parsed["environment"]["image"] == "vllm/vllm-openai:v0.20.1"
+        # When no version info, threadpool should be enabled (backward compatible)
+        assert parsed["environment"]["threadpool"] == 1
+
+
+# Made with Bob
diff --git a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
index 2216c2e63..4d0f8a0e1 100644
--- a/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
+++ b/plugins/actuators/vllm_performance/tests/test_experiment_executor.py
@@ -1,224 +1,120 @@
 # Copyright IBM Corporation 2025, 2026
 # SPDX-License-Identifier: MIT
 
-"""
-Unit tests for experiment_executor module functions.
-Tests version extraction from image property values.
-"""
+"""Unit tests for experiment_executor module functions."""
 
 import json
 
-from ado_actuators.vllm_performance.experiment_executor import (
-    _build_benchmark_params_key,
-    _build_cache_key,
-    _build_entity_env,
-    _get_vllm_version_from_image_value,
-)
+import pytest
+from ado_actuators.vllm_performance.cache_utils import CacheKeyBuilder
+from ado_actuators.vllm_performance.version_utils import VLLMVersionChecker
+
+
+@pytest.fixture
+def base_env_values():
+    """Base environment values for testing."""
+    return {
+        "model": "test-model",
+        "n_gpus": "1",
+        "gpu_type": "nvidia-a100",
+        "n_cpus": "8",
+        "memory": "32Gi",
+        "max_batch_tokens": "4096",
+        "gpu_memory_utilization": "0.9",
+        "dtype": "auto",
+        "cpu_offload": "0",
+        "max_num_seq": "256",
+    }
+
+
+@pytest.fixture
+def base_benchmark_values():
+    """Base benchmark values for testing."""
+    return {
+        "num_prompts": "200",
+        "request_rate": "32",
+        "dataset": "random",
+    }
 
 
 class TestGetVllmVersionFromImageValue:
-    """Test suite for _get_vllm_version_from_image_value function"""
-
-    def test_version_extraction_from_list_value(self) -> None:
-        """Test extracting vLLM version from list image value"""
-        image_value = [
-            "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5",
-            "0.18.0",
-        ]
-
-        version = _get_vllm_version_from_image_value(image_value)
-        assert version == "0.18.0"
-
-    def test_version_extraction_from_another_list_value(self) -> None:
-        """Test extracting vLLM version from another list image value"""
-        image_value = [
-            "vllm/vllm-openai:v0.14.0",
-            "0.14.0",
-        ]
-
-        version = _get_vllm_version_from_image_value(image_value)
-        assert version == "0.14.0"
-
-    def test_version_extraction_returns_none_for_string_value(self) -> None:
-        """Test that None is returned when image value is a string (backward compatibility)"""
-        image_value = "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5"
-
-        version = _get_vllm_version_from_image_value(image_value)
-        assert version is None
-
-    def test_version_extraction_returns_none_for_list_without_version(self) -> None:
-        """Test that None is returned when list has only one element (no version)"""
-        image_value = [
-            "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5",
-        ]
-
-        version = _get_vllm_version_from_image_value(image_value)
-        assert version is None
-
-    def test_version_extraction_with_latest_tag(self) -> None:
-        """Test extracting version for latest tag"""
-        image_value = [
-            "vllm/vllm-openai:latest",
-            "0.21.0",
-        ]
-
-        version = _get_vllm_version_from_image_value(image_value)
-        assert version == "0.21.0"
+    """Test suite for version extraction from image values."""
+
+    @pytest.mark.parametrize(
+        "image_value,expected",
+        [
+            (["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", "0.18.0"], "0.18.0"),
+            (["vllm/vllm-openai:v0.14.0", "0.14.0"], "0.14.0"),
+            (["vllm/vllm-openai:latest", "0.21.0"], "0.21.0"),
+            ("icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", None),
+            (["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5"], None),
+        ],
+    )
+    def test_version_extraction(self, image_value, expected) -> None:
+        """Test version extraction from various image value formats."""
+        version = VLLMVersionChecker.parse_version(image_value)
+        assert version == expected
 
 
 class TestBuildEntityEnv:
-    """Test suite for _build_entity_env function"""
-
-    def test_renderer_num_workers_normalized_when_vllm_version_less_than_0_20_0(
-        self,
-    ) -> None:
-        """Test that renderer_num_workers is normalized to 0 when vLLM < 0.20.0"""
-        # Test with vLLM 0.18.0 (< 0.20.0)
-        values = {
-            "model": "test-model",
-            "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"],
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
-            "threadpool": "1",
-            "renderer_num_workers": "64",  # Should be normalized to 0
-        }
-
-        result = _build_entity_env(values)
-        result_dict = json.loads(result)
-
-        # Both threadpool and renderer_num_workers should be 0
-        assert result_dict["threadpool"] == 0
-        assert result_dict["renderer_num_workers"] == 0
-
-    def test_renderer_num_workers_preserved_when_vllm_version_greater_than_0_20_0(
-        self,
-    ) -> None:
-        """Test that renderer_num_workers is preserved when vLLM >= 0.20.0"""
-        # Test with vLLM 0.21.0 (>= 0.20.0)
-        values = {
-            "model": "test-model",
-            "image": ["icr.io/test/vllm:v0.21.0", "0.21.0"],
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
-            "threadpool": "1",
-            "renderer_num_workers": "64",
-        }
-
-        result = _build_entity_env(values)
-        result_dict = json.loads(result)
-
-        # Both should be preserved
-        assert result_dict["threadpool"] == 1
-        assert result_dict["renderer_num_workers"] == 64
-
-    def test_renderer_num_workers_normalized_when_threadpool_disabled_by_user(
+    """Test suite for environment definition building."""
+
+    @pytest.mark.parametrize(
+        "image,threadpool,renderer_workers,expected_threadpool,expected_workers",
+        [
+            (["icr.io/test/vllm:v0.18.0", "0.18.0"], "1", "64", 0, 0),
+            (["icr.io/test/vllm:v0.21.0", "0.21.0"], "1", "64", 1, 64),
+            (["icr.io/test/vllm:v0.21.0", "0.21.0"], "0", "64", 0, 0),
+            ("icr.io/test/vllm:v0.18.0", "1", "64", 1, 64),
+        ],
+    )
+    def test_threadpool_normalization(
         self,
+        base_env_values,
+        image,
+        threadpool,
+        renderer_workers,
+        expected_threadpool,
+        expected_workers,
     ) -> None:
-        """Test that renderer_num_workers is normalized to 0 when user disables threadpool"""
-        # Test with vLLM 0.21.0 but threadpool=0
+        """Test threadpool and renderer_num_workers normalization."""
         values = {
-            "model": "test-model",
-            "image": ["icr.io/test/vllm:v0.21.0", "0.21.0"],
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
-            "threadpool": "0",  # User explicitly disabled
-            "renderer_num_workers": "64",  # Should be normalized to 0
+            **base_env_values,
+            "image": image,
+            "threadpool": threadpool,
+            "renderer_num_workers": renderer_workers,
         }
 
-        result = _build_entity_env(values)
+        result = CacheKeyBuilder.build_env_definition(values)
         result_dict = json.loads(result)
 
-        # Both should be 0
-        assert result_dict["threadpool"] == 0
-        assert result_dict["renderer_num_workers"] == 0
+        assert result_dict["threadpool"] == expected_threadpool
+        assert result_dict["renderer_num_workers"] == expected_workers
 
-    def test_different_renderer_num_workers_same_env_when_vllm_less_than_0_20_0(
-        self,
+    def test_different_renderer_workers_same_env_vllm_0_18(
+        self, base_env_values
     ) -> None:
-        """Test that different renderer_num_workers values produce same env when vLLM < 0.20.0"""
-        base_values = {
-            "model": "test-model",
+        """Test different renderer_num_workers produce same env for vLLM < 0.20.0."""
+        base = {
+            **base_env_values,
             "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"],
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
             "threadpool": "1",
         }
 
-        # Test with different renderer_num_workers values
-        values_32 = {**base_values, "renderer_num_workers": "32"}
-        values_64 = {**base_values, "renderer_num_workers": "64"}
-        values_128 = {**base_values, "renderer_num_workers": "128"}
-
-        env_32 = _build_entity_env(values_32)
-        env_64 = _build_entity_env(values_64)
-        env_128 = _build_entity_env(values_128)
-
-        # All should produce the same environment definition
-        assert env_32 == env_64 == env_128
-
-        # Verify they all have renderer_num_workers=0
-        result_dict = json.loads(env_32)
-        assert result_dict["renderer_num_workers"] == 0
-
-    def test_backward_compatibility_with_string_image(self) -> None:
-        """Test backward compatibility when image is a string (no version info)"""
-        values = {
-            "model": "test-model",
-            "image": "icr.io/test/vllm:v0.18.0",  # String, no version info
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
-            "threadpool": "1",
-            "renderer_num_workers": "64",
-        }
-
-        result = _build_entity_env(values)
-        result_dict = json.loads(result)
+        envs = [
+            CacheKeyBuilder.build_env_definition({**base, "renderer_num_workers": w})
+            for w in ["32", "64", "128"]
+        ]
 
-        # Should assume threadpool is supported (backward compatible)
-        assert result_dict["threadpool"] == 1
-        assert result_dict["renderer_num_workers"] == 64
+        assert envs[0] == envs[1] == envs[2]
+        assert json.loads(envs[0])["renderer_num_workers"] == 0
 
 
 class TestBuildBenchmarkParamsKey:
-    """Test suite for _build_benchmark_params_key function"""
+    """Test suite for benchmark parameter extraction."""
 
     def test_includes_all_benchmark_parameters(self) -> None:
-        """Test that all benchmark parameters are included in the key"""
+        """Test all benchmark parameters are included."""
         values = {
             "num_prompts": "100",
             "request_rate": "10",
@@ -229,259 +125,132 @@ def test_includes_all_benchmark_parameters(self) -> None:
             "dataset": "random",
         }
 
-        result = _build_benchmark_params_key(values)
-        result_dict = json.loads(result)
+        cache_key = CacheKeyBuilder.build(values)
+        benchmark = json.loads(cache_key)["benchmark"]
 
-        assert result_dict["num_prompts"] == "100"
-        assert result_dict["request_rate"] == "10"
-        assert result_dict["max_concurrency"] == "5"
-        assert result_dict["number_input_tokens"] == "50"
-        assert result_dict["max_output_tokens"] == "100"
-        assert result_dict["burstiness"] == "1.0"
-        assert result_dict["dataset"] == "random"
+        assert benchmark["num_prompts"] == "100"
+        assert benchmark["request_rate"] == "10"
+        assert benchmark["max_concurrency"] == "5"
+        assert benchmark["number_input_tokens"] == "50"
+        assert benchmark["max_output_tokens"] == "100"
+        assert benchmark["burstiness"] == "1.0"
+        assert benchmark["dataset"] == "random"
 
     def test_handles_missing_values(self) -> None:
-        """Test that missing values are handled as None"""
-        values = {
-            "num_prompts": "100",
-            # Other parameters missing
-        }
-
-        result = _build_benchmark_params_key(values)
-        result_dict = json.loads(result)
+        """Test missing values are handled as None."""
+        cache_key = CacheKeyBuilder.build({"num_prompts": "100"})
+        benchmark = json.loads(cache_key)["benchmark"]
 
-        assert result_dict["num_prompts"] == "100"
-        assert result_dict["request_rate"] is None
-        assert result_dict["max_concurrency"] is None
-        assert result_dict["dataset"] is None
+        assert benchmark["num_prompts"] == "100"
+        assert benchmark["request_rate"] is None
+        assert benchmark["max_concurrency"] is None
 
     def test_consistent_output_with_sorted_keys(self) -> None:
-        """Test that output is consistent (keys are sorted)"""
-        values = {
-            "dataset": "random",
-            "num_prompts": "100",
-            "request_rate": "10",
-        }
-
-        result1 = _build_benchmark_params_key(values)
-        result2 = _build_benchmark_params_key(values)
+        """Test output is consistent with sorted keys."""
+        values = {"dataset": "random", "num_prompts": "100", "request_rate": "10"}
 
-        # Should produce identical output
-        assert result1 == result2
+        key1 = json.dumps(
+            json.loads(CacheKeyBuilder.build(values))["benchmark"], sort_keys=True
+        )
+        key2 = json.dumps(
+            json.loads(CacheKeyBuilder.build(values))["benchmark"], sort_keys=True
+        )
 
-        # Verify keys are sorted in JSON
-        result_dict = json.loads(result1)
-        keys = list(result_dict.keys())
-        assert keys == sorted(keys)
+        assert key1 == key2
+        assert list(json.loads(key1).keys()) == sorted(json.loads(key1).keys())
 
 
 class TestBuildCacheKey:
-    """Test suite for _build_cache_key function"""
+    """Test suite for complete cache key building."""
 
-    def test_combines_environment_and_benchmark_params(self) -> None:
-        """Test that cache key includes both environment and benchmark parameters"""
+    def test_combines_environment_and_benchmark_params(
+        self, base_env_values, base_benchmark_values
+    ) -> None:
+        """Test cache key includes both environment and benchmark sections."""
         values = {
-            # Environment params
-            "model": "test-model",
+            **base_env_values,
             "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"],
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
             "threadpool": "1",
             "renderer_num_workers": "32",
-            # Benchmark params
-            "num_prompts": "200",
-            "request_rate": "32",
-            "dataset": "random",
+            **base_benchmark_values,
         }
 
-        result = _build_cache_key(values)
-        result_dict = json.loads(result)
+        result_dict = json.loads(CacheKeyBuilder.build(values))
 
-        # Should have both environment and benchmark sections
         assert "environment" in result_dict
         assert "benchmark" in result_dict
-
-        # Check environment section
-        env = result_dict["environment"]
-        assert env["model"] == "test-model"
-        assert env["n_gpus"] == "1"
-
-        # Check benchmark section
-        benchmark = result_dict["benchmark"]
-        assert benchmark["num_prompts"] == "200"
-        assert benchmark["request_rate"] == "32"
-        assert benchmark["dataset"] == "random"
-
-    def test_different_benchmark_params_produce_different_keys(self) -> None:
-        """Test that different benchmark parameters produce different cache keys"""
-        base_values = {
-            "model": "test-model",
+        assert result_dict["environment"]["model"] == "test-model"
+        assert result_dict["benchmark"]["num_prompts"] == "200"
+
+    @pytest.mark.parametrize(
+        "param,value1,value2",
+        [
+            ("num_prompts", "100", "200"),
+            ("request_rate", "32", "64"),
+        ],
+    )
+    def test_different_params_produce_different_keys(
+        self, base_env_values, base_benchmark_values, param, value1, value2
+    ) -> None:
+        """Test different parameter values produce different cache keys."""
+        base = {
+            **base_env_values,
             "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"],
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
             "threadpool": "1",
             "renderer_num_workers": "32",
+            **base_benchmark_values,
         }
 
-        # Same environment, different num_prompts
-        values1 = {**base_values, "num_prompts": "100", "request_rate": "32"}
-        values2 = {**base_values, "num_prompts": "200", "request_rate": "32"}
+        key1 = CacheKeyBuilder.build({**base, param: value1})
+        key2 = CacheKeyBuilder.build({**base, param: value2})
 
-        key1 = _build_cache_key(values1)
-        key2 = _build_cache_key(values2)
-
-        # Different benchmark params should produce different keys
         assert key1 != key2
 
-    def test_same_params_produce_same_key(self) -> None:
-        """Test that identical parameters produce identical cache keys"""
+    def test_same_params_produce_same_key(
+        self, base_env_values, base_benchmark_values
+    ) -> None:
+        """Test identical parameters produce identical cache keys."""
         values = {
-            "model": "test-model",
+            **base_env_values,
             "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"],
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
             "threadpool": "1",
             "renderer_num_workers": "32",
-            "num_prompts": "200",
-            "request_rate": "32",
-            "dataset": "random",
-        }
-
-        key1 = _build_cache_key(values)
-        key2 = _build_cache_key(values)
-
-        # Identical params should produce identical keys
-        assert key1 == key2
-
-    def test_cache_key_differentiates_on_request_rate(self) -> None:
-        """Test that different request_rate values produce different cache keys"""
-        base_values = {
-            "model": "test-model",
-            "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"],
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
-            "threadpool": "1",
-            "renderer_num_workers": "32",
-            "num_prompts": "200",
-            "dataset": "random",
+            **base_benchmark_values,
         }
 
-        # Same everything except request_rate
-        values_rate_32 = {**base_values, "request_rate": "32"}
-        values_rate_64 = {**base_values, "request_rate": "64"}
-
-        key_32 = _build_cache_key(values_rate_32)
-        key_64 = _build_cache_key(values_rate_64)
+        assert CacheKeyBuilder.build(values) == CacheKeyBuilder.build(values)
 
-        # Different request rates should produce different keys
-        assert key_32 != key_64
-
-    def test_vllm_0_18_same_cache_key_for_different_renderer_num_workers(self) -> None:
-        """
-        Test that for vLLM 0.18.0, different renderer_num_workers values produce
-        the SAME cache key (because threadpool is not supported and normalized to 0)
-        """
-        base_values = {
-            "model": "test-model",
+    def test_vllm_0_18_same_key_different_renderer_workers(
+        self, base_env_values, base_benchmark_values
+    ) -> None:
+        """Test vLLM 0.18.0 produces same key for different renderer_num_workers."""
+        base = {
+            **base_env_values,
             "image": ["icr.io/test/vllm:v0.18.0", "0.18.0"],
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
             "threadpool": "1",
-            "num_prompts": "200",
-            "request_rate": "32",
-            "dataset": "random",
+            **base_benchmark_values,
         }
 
-        # Different renderer_num_workers values
-        values_32 = {**base_values, "renderer_num_workers": "32"}
-        values_64 = {**base_values, "renderer_num_workers": "64"}
+        key_32 = CacheKeyBuilder.build({**base, "renderer_num_workers": "32"})
+        key_64 = CacheKeyBuilder.build({**base, "renderer_num_workers": "64"})
 
-        key_32 = _build_cache_key(values_32)
-        key_64 = _build_cache_key(values_64)
-
-        # For vLLM 0.18.0, both should produce the same key
-        # because renderer_num_workers is normalized to 0 in the environment
         assert key_32 == key_64
+        assert json.loads(key_32)["environment"]["renderer_num_workers"] == 0
 
-        # Verify the environment section has renderer_num_workers=0
-        result_dict = json.loads(key_32)
-        assert result_dict["environment"]["renderer_num_workers"] == 0
-
-    def test_vllm_0_20_different_cache_key_for_different_renderer_num_workers(
-        self,
+    def test_vllm_0_20_different_key_different_renderer_workers(
+        self, base_env_values, base_benchmark_values
     ) -> None:
-        """
-        Test that for vLLM 0.20.1, different renderer_num_workers values produce
-        DIFFERENT cache keys (because threadpool is supported)
-        """
-        base_values = {
-            "model": "test-model",
+        """Test vLLM 0.20.1 produces different keys for different renderer_num_workers."""
+        base = {
+            **base_env_values,
             "image": ["icr.io/test/vllm:v0.20.1", "0.20.1"],
-            "n_gpus": "1",
-            "gpu_type": "nvidia-a100",
-            "n_cpus": "8",
-            "memory": "32Gi",
-            "max_batch_tokens": "4096",
-            "gpu_memory_utilization": "0.9",
-            "dtype": "auto",
-            "cpu_offload": "0",
-            "max_num_seq": "256",
             "threadpool": "1",
-            "num_prompts": "200",
-            "request_rate": "32",
-            "dataset": "random",
+            **base_benchmark_values,
         }
 
-        # Different renderer_num_workers values
-        values_32 = {**base_values, "renderer_num_workers": "32"}
-        values_64 = {**base_values, "renderer_num_workers": "64"}
-
-        key_32 = _build_cache_key(values_32)
-        key_64 = _build_cache_key(values_64)
+        key_32 = CacheKeyBuilder.build({**base, "renderer_num_workers": "32"})
+        key_64 = CacheKeyBuilder.build({**base, "renderer_num_workers": "64"})
 
-        # For vLLM 0.20.1, should produce different keys
         assert key_32 != key_64
-
-        # Verify the environment sections have different renderer_num_workers
-        result_dict_32 = json.loads(key_32)
-        result_dict_64 = json.loads(key_64)
-        assert result_dict_32["environment"]["renderer_num_workers"] == 32
-        assert result_dict_64["environment"]["renderer_num_workers"] == 64
-
-
-# Made with Bob
+        assert json.loads(key_32)["environment"]["renderer_num_workers"] == 32
+        assert json.loads(key_64)["environment"]["renderer_num_workers"] == 64
diff --git a/plugins/actuators/vllm_performance/tests/test_version_utils.py b/plugins/actuators/vllm_performance/tests/test_version_utils.py
new file mode 100644
index 000000000..938acdc4b
--- /dev/null
+++ b/plugins/actuators/vllm_performance/tests/test_version_utils.py
@@ -0,0 +1,59 @@
+# Copyright IBM Corporation 2025, 2026
+# SPDX-License-Identifier: MIT
+
+"""Tests for vLLM version utilities."""
+
+
+from ado_actuators.vllm_performance.version_utils import VLLMVersionChecker
+
+
+class TestVLLMVersionChecker:
+    """Tests for VLLMVersionChecker class."""
+
+    def test_parse_version_from_list(self):
+        """Test version parsing from list format."""
+        image_value = ["vllm/vllm-openai:v0.20.1", "0.20.1"]
+        assert VLLMVersionChecker.parse_version(image_value) == "0.20.1"
+
+    def test_parse_version_from_list_single_element(self):
+        """Test version parsing from list with single element."""
+        image_value = ["vllm/vllm-openai:v0.20.1"]
+        assert VLLMVersionChecker.parse_version(image_value) is None
+
+    def test_parse_version_from_string(self):
+        """Test version parsing from string format (backward compatibility)."""
+        image_value = "vllm/vllm-openai:v0.20.1"
+        assert VLLMVersionChecker.parse_version(image_value) is None
+
+    def test_supports_threadpool_disabled_by_user(self):
+        """Test threadpool disabled when user sets threadpool=0."""
+        image_value = ["vllm/vllm-openai:v0.20.1", "0.20.1"]
+        assert not VLLMVersionChecker.supports_threadpool(image_value, 0)
+
+    def test_supports_threadpool_version_supported(self):
+        """Test threadpool enabled for vLLM >= 0.20.0."""
+        image_value = ["vllm/vllm-openai:v0.20.1", "0.20.1"]
+        assert VLLMVersionChecker.supports_threadpool(image_value, 1)
+
+    def test_supports_threadpool_version_not_supported(self):
+        """Test threadpool disabled for vLLM < 0.20.0."""
+        image_value = ["vllm/vllm-openai:v0.18.0", "0.18.0"]
+        assert not VLLMVersionChecker.supports_threadpool(image_value, 1)
+
+    def test_supports_threadpool_no_version_info(self):
+        """Test threadpool enabled when no version info (backward compatible)."""
+        image_value = "vllm/vllm-openai:v0.20.1"
+        assert VLLMVersionChecker.supports_threadpool(image_value, 1)
+
+    def test_supports_threadpool_invalid_version(self):
+        """Test threadpool enabled for invalid version (fail-safe)."""
+        image_value = ["vllm/vllm-openai:latest", "invalid-version"]
+        assert VLLMVersionChecker.supports_threadpool(image_value, 1)
+
+    def test_supports_threadpool_edge_version(self):
+        """Test threadpool enabled at exact minimum version."""
+        image_value = ["vllm/vllm-openai:v0.20.0", "0.20.0"]
+        assert VLLMVersionChecker.supports_threadpool(image_value, 1)
+
+
+# Made with Bob
diff --git a/pod_sample.yaml b/pod_sample.yaml
new file mode 100644
index 000000000..ef3391982
--- /dev/null
+++ b/pod_sample.yaml
@@ -0,0 +1,262 @@
+# Copyright IBM Corporation 2025, 2026
+# SPDX-License-Identifier: MIT
+
+apiVersion: v1
+kind: Pod
+metadata:
+  annotations:
+    k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.128.14.116/23"],"mac_address":"0a:58:0a:80:0e:74","gateway_ips":["10.128.14.1"],"routes":[{"dest":"10.128.0.0/14","nextHop":"10.128.14.1"},{"dest":"172.30.0.0/16","nextHop":"10.128.14.1"},{"dest":"169.254.169.5/32","nextHop":"10.128.14.1"},{"dest":"100.64.0.0/16","nextHop":"10.128.14.1"}],"ip_address":"10.128.14.116/23","gateway_ip":"10.128.14.1","role":"primary"}}'
+    k8s.v1.cni.cncf.io/network-status: |-
+      [{
+          "name": "ovn-kubernetes",
+          "interface": "eth0",
+          "ips": [
+              "10.128.14.116"
+          ],
+          "mac": "0a:58:0a:80:0e:74",
+          "default": true,
+          "dns": {}
+      }]
+    openshift.io/scc: anyuid
+    security.openshift.io/validated-scc-subject-type: serviceaccount
+  creationTimestamp: "2026-06-03T14:46:46Z"
+  generateName: vllm-prithvi-eo-2-0-300m-tl-se-12a30a40176c40a78df249972a2fe66b-74c45688b4-
+  generation: 1
+  labels:
+    app.kubernetes.io/instance: vllm-prithvi-eo-2-0-300m-tl-se-12a30a40176c40a78df249972a2fe66b
+    app.kubernetes.io/name: vllm
+    pod-template-hash: 74c45688b4
+  name: vllm-prithvi-eo-2-0-300m-tl-se-12a30a40176c40a78df249972a2hrzrm
+  namespace: cp-testing
+spec:
+  containers:
+  - args:
+    - ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11
+    - --max-num-seqs
+    - "256"
+    - --max-num-batched-tokens
+    - "16384"
+    - --gpu-memory-utilization
+    - "0.9"
+    - --cpu-offload-gb
+    - "0"
+    - --max-num-seq
+    - "256"
+    - --tensor-parallel-size
+    - "1"
+    - --dtype
+    - auto
+    - --enforce-eager
+    - --skip-tokenizer-init
+    - --io-processor-plugin
+    - terratorch_segmentation
+    - --enable-mm-embeds
+    - --otlp-traces-endpoint
+    - http://jaeger:4317/v1/traces
+    command:
+    - vllm
+    - serve
+    env:
+    - name: HF_HUB_OFFLINE
+      value: "0"
+    - name: TRANSFORMERS_OFFLINE
+      value: "0"
+    - name: HF_TOKEN
+    - name: OTEL_SERVICE_NAME
+      value: vllm-prithvi-eo-2-0-300m-tl-se-12a30a40176c40a78df249972a2fe66b
+    - name: HOME
+      value: /tmp
+    - name: HF_HOME
+      value: /tmp/transformers_cache
+    - name: VLLM_LOGGING_LEVEL
+      value: DEBUG
+    #image: icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5
+    image: icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main
+    imagePullPolicy: IfNotPresent
+    livenessProbe:
+      exec:
+        command:
+        - sh
+        - -c
+        - |
+          curl -X 'GET' "http://localhost:8000/health" \
+            -H 'accept: application/json'
+      failureThreshold: 100
+      periodSeconds: 100
+      successThreshold: 1
+      timeoutSeconds: 1
+    name: vllm
+    ports:
+    - containerPort: 8000
+      name: http
+      protocol: TCP
+    resources:
+      limits:
+        cpu: "48"
+        memory: 128Gi
+        nvidia.com/gpu: "1"
+      requests:
+        cpu: "48"
+        memory: 128Gi
+        nvidia.com/gpu: "1"
+    securityContext:
+      capabilities:
+        drop:
+        - MKNOD
+    startupProbe:
+      exec:
+        command:
+        - sh
+        - -c
+        - |
+          curl -X 'GET' "http://localhost:8000/health" \
+            -H 'accept: application/json'
+      failureThreshold: 200
+      initialDelaySeconds: 20
+      periodSeconds: 5
+      successThreshold: 1
+      timeoutSeconds: 300
+    terminationMessagePath: /dev/termination-log
+    terminationMessagePolicy: File
+    volumeMounts:
+    - mountPath: /dev/shm
+      name: dshm
+    - mountPath: /dev/cache
+      name: vllm-support
+    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
+      name: kube-api-access-n9srb
+      readOnly: true
+  dnsPolicy: ClusterFirst
+  enableServiceLinks: true
+  imagePullSecrets:
+  - name: cp-icr-pull-secret
+  nodeName: adcpu014
+  nodeSelector:
+    nvidia.com/gpu.product: NVIDIA-A100-80GB-PCIe
+  preemptionPolicy: PreemptLowerPriority
+  priority: 0
+  restartPolicy: Always
+  schedulerName: default-scheduler
+  securityContext:
+    seLinuxOptions:
+      level: s0:c33,c7
+  serviceAccount: default
+  serviceAccountName: default
+  terminationGracePeriodSeconds: 30
+  tolerations:
+  - effect: NoExecute
+    key: node.kubernetes.io/not-ready
+    operator: Exists
+    tolerationSeconds: 300
+  - effect: NoExecute
+    key: node.kubernetes.io/unreachable
+    operator: Exists
+    tolerationSeconds: 300
+  - effect: NoSchedule
+    key: node.kubernetes.io/memory-pressure
+    operator: Exists
+  volumes:
+  - emptyDir:
+      medium: Memory
+    name: dshm
+  - name: vllm-support
+    persistentVolumeClaim:
+      claimName: vllm-support-75966dc2efc74e07a740ba76edca0f1c
+  - name: kube-api-access-n9srb
+    projected:
+      defaultMode: 420
+      sources:
+      - serviceAccountToken:
+          expirationSeconds: 3607
+          path: token
+      - configMap:
+          items:
+          - key: ca.crt
+            path: ca.crt
+          name: kube-root-ca.crt
+      - downwardAPI:
+          items:
+          - fieldRef:
+              apiVersion: v1
+              fieldPath: metadata.namespace
+            path: namespace
+      - configMap:
+          items:
+          - key: service-ca.crt
+            path: service-ca.crt
+          name: openshift-service-ca.crt
+status:
+  conditions:
+  - lastProbeTime: null
+    lastTransitionTime: "2026-06-03T14:46:58Z"
+    status: "True"
+    type: PodReadyToStartContainers
+  - lastProbeTime: null
+    lastTransitionTime: "2026-06-03T14:46:46Z"
+    status: "True"
+    type: Initialized
+  - lastProbeTime: null
+    lastTransitionTime: "2026-06-03T14:46:46Z"
+    message: 'containers with unready status: [vllm]'
+    reason: ContainersNotReady
+    status: "False"
+    type: Ready
+  - lastProbeTime: null
+    lastTransitionTime: "2026-06-03T14:46:46Z"
+    message: 'containers with unready status: [vllm]'
+    reason: ContainersNotReady
+    status: "False"
+    type: ContainersReady
+  - lastProbeTime: null
+    lastTransitionTime: "2026-06-03T14:46:46Z"
+    status: "True"
+    type: PodScheduled
+  containerStatuses:
+  - allocatedResources:
+      cpu: "48"
+      memory: 128Gi
+      nvidia.com/gpu: "1"
+    containerID: cri-o://19404f25f6a87d0de5d10ec7b0feca8ac9d096333320f589db612c62df10a9c7
+    image: icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5
+    imageID: icr.io/drl-nextgen/mgazz/vllm@sha256:b2eb5944328836c91dad29bdabc6313d471a4dd827cb332f9ef284409e8364a7
+    lastState: {}
+    name: vllm
+    ready: false
+    resources:
+      limits:
+        cpu: "48"
+        memory: 128Gi
+        nvidia.com/gpu: "1"
+      requests:
+        cpu: "48"
+        memory: 128Gi
+        nvidia.com/gpu: "1"
+    restartCount: 0
+    started: false
+    state:
+      running:
+        startedAt: "2026-06-03T14:46:57Z"
+    user:
+      linux:
+        gid: 0
+        supplementalGroups:
+        - 0
+        uid: 0
+    volumeMounts:
+    - mountPath: /dev/shm
+      name: dshm
+    - mountPath: /dev/cache
+      name: vllm-support
+    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
+      name: kube-api-access-n9srb
+      readOnly: true
+      recursiveReadOnly: Disabled
+  hostIP: 172.16.1.14
+  hostIPs:
+  - ip: 172.16.1.14
+  phase: Running
+  podIP: 10.128.14.116
+  podIPs:
+  - ip: 10.128.14.116
+  qosClass: Guaranteed
+  startTime: "2026-06-03T14:46:46Z"
diff --git a/rhaiis_deployment.yaml b/rhaiis_deployment.yaml
new file mode 100644
index 000000000..193683bdd
--- /dev/null
+++ b/rhaiis_deployment.yaml
@@ -0,0 +1,81 @@
+# Copyright IBM Corporation 2025, 2026
+# SPDX-License-Identifier: MIT
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm-testing
+  labels:
+    app.kubernetes.io/name: vllm
+    app.kubernetes.io/instance: vllm-testing
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: vllm
+      app.kubernetes.io/instance: vllm-testing
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: vllm
+        app.kubernetes.io/instance: vllm-testing
+    spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                  - key: kubernetes.io/hostname
+                    operator: In
+                    values:
+                      - adcpu015
+      containers:
+        - name: vllm
+          image: "vllm/vllm-openai:v0.13.0"
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: http
+              containerPort: 8000
+              protocol: TCP
+          startupProbe:
+            exec:
+              command:
+                - sh
+                - -c
+                - |
+                  curl -X 'GET' "http://localhost:8000/health" \
+                    -H 'accept: application/json'
+            initialDelaySeconds: 20
+            timeoutSeconds: 300
+            periodSeconds: 5
+            # Allow for up to 20 minutes of startup time
+            failureThreshold: 200
+          livenessProbe:
+            exec:
+              command:
+                - sh
+                - -c
+                - |
+                  curl -X 'GET' "http://localhost:8000/health" \
+                    -H 'accept: application/json'
+            failureThreshold: 100
+            periodSeconds: 100
+          resources:
+            limits:
+              cpu: "8"
+              memory: 128Gi
+              nvidia.com/gpu: "1"
+            requests:
+              cpu: "4"
+              memory: 128Gi
+              nvidia.com/gpu: "1"
+          env:
+            - name: HF_HUB_OFFLINE
+              value: "0"
+            - name: TRANSFORMERS_OFFLINE
+              value: "0"
+          volumeMounts:
+            - mountPath: /dev/shm
+              name: dshm
+      volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
diff --git a/~/workspace/plans/ado-threadpool-list-based-image-property.md b/~/workspace/plans/ado-threadpool-list-based-image-property.md
new file mode 100644
index 000000000..99af4d263
--- /dev/null
+++ b/~/workspace/plans/ado-threadpool-list-based-image-property.md
@@ -0,0 +1,201 @@
+# Implementation Plan: Refactor Image Property from Dict to List Format
+
+## Overview
+Refactor the vllm_performance actuator to handle image properties as positional lists `[image_url, vllm_version]` instead of dictionaries `{image: url, vllm_version: version}`. This simplifies the YAML format and removes the need for special handling of dict values in core ado code.
+
+## Current State Analysis
+
+### Current Implementation (Dict-based)
+```yaml
+- identifier: "image"
+  propertyDomain:
+    variableType: "CATEGORICAL_VARIABLE_TYPE"
+    values: 
+      - image: "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5"
+        vllm_version: "0.18.0"
+      - image: "icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main"
+        vllm_version: "0.20.1"
+```
+
+### Target Implementation (List-based)
+```yaml
+- identifier: "image"
+  propertyDomain:
+    variableType: "CATEGORICAL_VARIABLE_TYPE"
+    values: 
+      - ["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", "0.18.0"]
+      - ["icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main", "0.20.1"]
+```
+
+## Files to Modify
+
+### 1. Actuator Code (vllm_performance plugin)
+**File**: `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py`
+
+**Changes Required**:
+- **Function**: `_get_vllm_version_from_image_value(image_value: dict | str) -> str | None`
+  - Change signature to: `_get_vllm_version_from_image_value(image_value: list | str) -> str | None`
+  - Update logic to handle list format: `image_value[1]` instead of `image_value.get("vllm_version")`
+  - Keep backward compatibility for string format
+
+- **Function**: `_should_enable_threadpool(image_value: dict | str, threadpool_value: int) -> bool`
+  - Change signature to: `_should_enable_threadpool(image_value: list | str, threadpool_value: int) -> bool`
+  - Update to work with list-based image values
+
+- **Function**: `_build_entity_env(values: dict[str, str]) -> str`
+  - Update lines 166-170 to extract image string from list format
+  - Change from: `image_value.get("image")` to `image_value[0]`
+
+**Expected Changes**:
+```python
+# Before (dict-based)
+if isinstance(image_value, dict):
+    version = image_value.get("vllm_version")
+    image_str = image_value.get("image")
+
+# After (list-based)
+if isinstance(image_value, list):
+    version = image_value[1] if len(image_value) > 1 else None
+    image_str = image_value[0]
+```
+
+### 2. Core ado Files (REVERT CHANGES)
+
+#### File: `orchestrator/core/discoveryspace/group_samplers.py`
+**Action**: REVERT changes made to support dict values in grouping
+
+**Lines to Revert**: 53-76
+- Remove the `make_hashable()` function that converts dicts to tuples
+- Restore original simple implementation that doesn't handle unhashable types
+
+**Original Implementation**:
+```python
+def _build_point_group_values(
+    point: dict, group: list[str]
+) -> frozenset[tuple[str, Any]]:
+    """
+    :return: A frozen set of (key,value) pairs
+    """
+    return frozenset({(k, v) for k, v in point.items() if k in group})
+```
+
+#### File: `orchestrator/schema/property_value.py`
+**Action**: REVERT temporary changes for dict value handling
+
+**Lines to Revert**: 
+- Lines 62, 89, 114, 159 (dict type annotations)
+- Lines 158-159 (dict value type detection)
+- Lines 27 (dict in ValueTypeEnum comment)
+
+**Changes**:
+- Remove `dict` from union types in value fields
+- Remove dict handling in `set_value_type()` method (lines 158-159)
+- Remove dict from validation logic
+
+### 3. Test Files (REMOVE/UPDATE)
+
+#### File: `tests/core/test_group_samplers.py`
+**Action**: REMOVE tests for dict value handling
+
+**Lines to Remove**: 316-419
+- Remove `test_build_point_group_values_with_unhashable_types()`
+- Remove `test_build_groups_dict_with_unhashable_values()`
+
+These tests were added specifically to validate dict handling in grouping, which is no longer needed.
+
+#### File: `tests/schema/test_property_value.py`
+**Action**: UPDATE to remove dict from test fixtures
+
+**Lines to Modify**:
+- Line 27: Remove dict from `python_type_value_examples` fixture
+- Lines 31, 40, 49: Remove dict from parametrize decorators
+- Lines 72-75: Remove dict case from `property_value` fixture
+- Lines 217-219: Remove dict case from `test_type_detection`
+
+### 4. Discovery Space YAML (Example Update)
+
+**File**: `../ops/geo/discoveryspace_geospatial_threadpool_test.yaml`
+
+**Lines to Update**: 32-38
+
+**Before**:
+```yaml
+values: 
+  - image: "icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5"
+    vllm_version: "0.18.0"
+  - image: "icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main"
+    vllm_version: "0.20.1"
+```
+
+**After**:
+```yaml
+values: 
+  - ["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", "0.18.0"]
+  - ["icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main", "0.20.1"]
+```
+
+## Implementation Steps
+
+### Phase 1: Update Actuator Code
+1. ✅ Modify `_get_vllm_version_from_image_value()` to handle list format
+2. ✅ Modify `_should_enable_threadpool()` signature and logic
+3. ✅ Modify `_build_entity_env()` to extract image from list
+4. ✅ Add validation for list format (length check, type check)
+5. ✅ Maintain backward compatibility for string-only format
+
+### Phase 2: Revert Core Changes
+1. ✅ Revert `orchestrator/core/discoveryspace/group_samplers.py`
+   - Remove `make_hashable()` function
+   - Restore original `_build_point_group_values()` implementation
+2. ✅ Revert `orchestrator/schema/property_value.py`
+   - Remove dict from type annotations
+   - Remove dict handling in validators
+   - Remove dict from ValueTypeEnum documentation
+
+### Phase 3: Update Tests
+1. ✅ Remove dict-specific tests from `tests/core/test_group_samplers.py`
+2. ✅ Update `tests/schema/test_property_value.py` to remove dict cases
+3. ✅ Run test suite to ensure no regressions
+
+### Phase 4: Update YAML Files
+1. ✅ Update `discoveryspace_geospatial_threadpool_test.yaml`
+2. ✅ Update any other YAML files using dict-based image format
+
+### Phase 5: Validation
+1. ✅ Run linting (black, ruff) on modified files
+2. ✅ Run pytest on affected test modules
+3. ✅ Test the operation command to ensure it works:
+   ```bash
+   uv run ado --remote ../ops/geo/remote_execution_context_threadpool_test.yaml \
+     create operation -f ../ops/geo/operation_geospatial_threadpool_test.yaml
+   ```
+
+## Benefits of This Approach
+
+1. **Simpler YAML**: List format is more concise and readable
+2. **No Core Changes**: Removes need for special dict handling in core ado
+3. **Type Safety**: Lists are hashable and work naturally with frozensets
+4. **Backward Compatible**: String-only format still supported
+5. **Cleaner Code**: Removes temporary workarounds and special cases
+
+## Risk Assessment
+
+**Low Risk**:
+- Changes are localized to vllm_performance actuator
+- Core ado code is simplified (reverted to original)
+- Backward compatibility maintained for string format
+
+**Testing Strategy**:
+- Unit tests for list parsing logic
+- Integration test with actual operation execution
+- Verify grouping still works correctly with list values
+
+## Success Criteria
+
+- [ ] Actuator correctly parses list-based image values
+- [ ] vLLM version extraction works from list format
+- [ ] Threadpool logic correctly uses version from list
+- [ ] Core ado files reverted to original state
+- [ ] All tests pass
+- [ ] Operation executes successfully with new YAML format
+- [ ] No regressions in existing functionality
\ No newline at end of file
diff --git a/~/workspace/plans/ado-threadpool-refactoring-plan.md b/~/workspace/plans/ado-threadpool-refactoring-plan.md
new file mode 100644
index 000000000..2ce162e2f
--- /dev/null
+++ b/~/workspace/plans/ado-threadpool-refactoring-plan.md
@@ -0,0 +1,248 @@
+# Implementation Plan: Code Refactoring for Readability and Conciseness
+
+## Overview
+This plan implements the refactoring recommendations to improve code readability, reduce duplication, and remove excessive comments across the threadpool branch changes.
+
+## Phase 1: Core Utilities (Foundation)
+
+### Task 1.1: Create VLLMVersionChecker utility class
+**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py` (new file)
+
+**Actions:**
+1. Create new file `version_utils.py`
+2. Implement `VLLMVersionChecker` class with:
+   - `THREADPOOL_MIN_VERSION = "0.20.0"` constant
+   - `parse_version()` static method
+   - `supports_threadpool()` class method
+3. Add minimal docstrings (one-line per method)
+4. Add unit tests in `tests/test_version_utils.py`
+
+**Dependencies:** None
+**Estimated effort:** 30 minutes
+
+---
+
+### Task 1.2: Create CachedMeasurement dataclass
+**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py`
+
+**Actions:**
+1. Add `from dataclasses import dataclass` import
+2. Define `CachedMeasurement` dataclass at top of file
+3. Update `EnvironmentManager.__init__()` type hint for `measurement_cache`
+4. Simplify `get_cached_measurement()` - remove verbose docstring
+5. Simplify `cache_measurement()` - remove verbose docstring, keep debug log
+6. Update any code that accesses cache dict structure to use dataclass attributes
+
+**Dependencies:** None
+**Estimated effort:** 20 minutes
+
+---
+
+## Phase 2: Cache Key Refactoring (Core Logic)
+
+### Task 2.1: Create unified CacheKeyBuilder class
+**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py` (new file)
+
+**Actions:**
+1. Create new file `cache_utils.py`
+2. Implement `CacheKeyBuilder` class with:
+   - `ALL_PARAMS` class variable (single list of all parameters)
+   - `build()` class method
+   - Brief docstring referencing YAML files
+3. Import `_normalize_threadpool_properties` or move it to this file
+4. Add unit tests in `tests/test_cache_utils.py`
+
+**Dependencies:** Task 1.1 (for `_normalize_threadpool_properties`)
+**Estimated effort:** 45 minutes
+
+---
+
+### Task 2.2: Refactor _normalize_threadpool_properties
+**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py`
+
+**Actions:**
+1. Import `VLLMVersionChecker` from `version_utils`
+2. Replace `_should_enable_threadpool()` and `_get_vllm_version_from_image_value()` with `VLLMVersionChecker.supports_threadpool()`
+3. Simplify `_normalize_threadpool_properties()` to use new utility
+4. Remove all debug logging statements
+5. Remove verbose comments
+6. Keep only brief docstring
+
+**Dependencies:** Task 1.1
+**Estimated effort:** 30 minutes
+
+---
+
+### Task 2.3: Replace cache key functions with CacheKeyBuilder
+**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py`
+
+**Actions:**
+1. Import `CacheKeyBuilder` from `cache_utils`
+2. Delete `_build_entity_env()` function (62 lines)
+3. Delete `_build_benchmark_params_key()` function (23 lines)
+4. Delete `_build_cache_key()` function
+5. Replace all calls to these functions with `CacheKeyBuilder.build(values)`
+6. Search for any other references to deleted functions
+
+**Dependencies:** Task 2.1, Task 2.2
+**Estimated effort:** 30 minutes
+
+---
+
+## Phase 3: Simplify group_samplers.py
+
+### Task 3.1: Refactor make_hashable with pattern matching
+**File:** `orchestrator/core/discoveryspace/group_samplers.py`
+
+**Actions:**
+1. Replace `if/elif` chain with `match/case` statement
+2. Remove inline comments
+3. Keep docstring
+4. Verify existing tests still pass
+
+**Dependencies:** None
+**Estimated effort:** 15 minutes
+
+---
+
+## Phase 4: Test Refactoring
+
+### Task 4.1: Create pytest fixtures for test_experiment_executor.py
+**File:** `plugins/actuators/vllm_performance/tests/test_experiment_executor.py`
+
+**Actions:**
+1. Create `base_vllm_values` fixture
+2. Identify test methods that can be parametrized
+3. Create parametrized test for threadpool normalization
+4. Create parametrized test for cache key generation
+5. Remove redundant test methods
+6. Simplify docstrings (one-line descriptions)
+7. Update assertions to use `CacheKeyBuilder.build()`
+
+**Dependencies:** Task 2.3
+**Estimated effort:** 60 minutes
+
+---
+
+### Task 4.2: Add tests for new utility classes
+**File:** `plugins/actuators/vllm_performance/tests/test_version_utils.py` (new)
+**File:** `plugins/actuators/vllm_performance/tests/test_cache_utils.py` (new)
+
+**Actions:**
+1. Create test file for `VLLMVersionChecker`:
+   - Test version parsing from list
+   - Test version parsing from string
+   - Test threadpool support detection
+   - Test edge cases (None, invalid versions)
+2. Create test file for `CacheKeyBuilder`:
+   - Test cache key generation
+   - Test parameter extraction
+   - Test normalization integration
+   - Test key consistency
+
+**Dependencies:** Task 1.1, Task 2.1
+**Estimated effort:** 45 minutes
+
+---
+
+## Phase 5: Comment Cleanup
+
+### Task 5.1: Remove excessive comments from experiment_executor.py
+**File:** `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py`
+
+**Actions:**
+1. Remove debug logging statements (20+ lines)
+2. Simplify function docstrings to one-line descriptions
+3. Remove inline comments that restate code
+4. Keep only non-obvious comments (e.g., version thresholds, backward compatibility notes)
+
+**Dependencies:** Task 2.2, Task 2.3
+**Estimated effort:** 20 minutes
+
+---
+
+### Task 5.2: Remove excessive comments from test files
+**File:** `plugins/actuators/vllm_performance/tests/test_experiment_executor.py`
+
+**Actions:**
+1. Simplify test docstrings to brief descriptions
+2. Remove comments that just restate test names
+3. Keep only comments explaining non-obvious test logic
+
+**Dependencies:** Task 4.1
+**Estimated effort:** 15 minutes
+
+---
+
+## Phase 6: Integration and Validation
+
+### Task 6.1: Run full test suite
+**Actions:**
+1. Run linting: `uv run black plugins/actuators/vllm_performance/`
+2. Run linting: `uv run ruff check --fix plugins/actuators/vllm_performance/`
+3. Run linting: `uv run black orchestrator/core/discoveryspace/`
+4. Run linting: `uv run ruff check --fix orchestrator/core/discoveryspace/`
+5. Run tests: `uv run pytest -n auto plugins/actuators/vllm_performance/tests/`
+6. Run tests: `uv run pytest -n auto tests/core/test_group_samplers.py`
+7. Fix any failures
+
+**Dependencies:** All previous tasks
+**Estimated effort:** 30 minutes
+
+---
+
+### Task 6.2: Integration testing
+**Actions:**
+1. Test with actual YAML files from `plugins/actuators/vllm_performance/yamls/`
+2. Verify cache key generation produces expected results
+3. Verify threadpool normalization works correctly
+4. Test with both vLLM 0.18.0 and 0.20.1+ images
+5. Verify backward compatibility with string image values
+
+**Dependencies:** Task 6.1
+**Estimated effort:** 30 minutes
+
+---
+
+## Phase 7: Documentation
+
+### Task 7.1: Update inline documentation
+**Actions:**
+1. Add brief module docstrings to new files (`version_utils.py`, `cache_utils.py`)
+2. Update any affected documentation in `plugins/actuators/vllm_performance/README.md`
+3. Ensure YAML files are referenced correctly in code comments
+
+**Dependencies:** Task 6.2
+**Estimated effort:** 20 minutes
+
+---
+
+## Summary
+
+**Total estimated effort:** ~6 hours
+
+**Files to create:**
+- `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/version_utils.py`
+- `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/cache_utils.py`
+- `plugins/actuators/vllm_performance/tests/test_version_utils.py`
+- `plugins/actuators/vllm_performance/tests/test_cache_utils.py`
+
+**Files to modify:**
+- `orchestrator/core/discoveryspace/group_samplers.py`
+- `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/env_manager.py`
+- `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py`
+- `plugins/actuators/vllm_performance/tests/test_experiment_executor.py`
+
+**Expected outcomes:**
+- ~200+ lines of code/comments removed
+- 4 new utility classes with clear responsibilities
+- 40% reduction in test code through parametrization
+- Improved maintainability and readability
+- All existing functionality preserved
+- All tests passing
+
+**Risk mitigation:**
+- Each phase can be completed and tested independently
+- Existing tests validate behavior is preserved
+- New tests validate new utilities work correctly
+- Integration testing catches any edge cases
\ No newline at end of file
diff --git a/~/workspace/plans/ado-threadpool-threadpool-property-implementation.md b/~/workspace/plans/ado-threadpool-threadpool-property-implementation.md
new file mode 100644
index 000000000..1f06cd362
--- /dev/null
+++ b/~/workspace/plans/ado-threadpool-threadpool-property-implementation.md
@@ -0,0 +1,296 @@
+# Implementation Plan: Add Threadpool Experiment Property to vLLM Performance Actuator
+
+**Project:** ado-threadpool  
+**Branch:** feature/threadpool-property  
+**Issue:** [#988](https://github.com/IBM/ado/issues/988) - Option 1: Dictionary in Metadata  
+**Date:** 2026-06-02
+
+## Overview
+
+Add a `threadpool` experiment property to the vLLM performance actuator that enables/disables threadpool functionality. When enabled, it passes `--renderer-num-workers` and `--mm-processor-cache-gb 0` arguments to vLLM deployments.
+
+## Requirements
+
+### Functional Requirements
+
+1. Add `threadpool` property to all geospatial experiments
+   - Domain: Categorical with values `[0, 1]` (0=disabled, 1=enabled)
+   - Default: 1 (enabled)
+   - Metadata: Clear description of threadpool functionality
+
+2. Add `renderer_num_workers` property to all geospatial experiments
+   - Domain: Discrete integer, range `[1, 128]`
+   - Default: 32
+   - Only used when `threadpool=1`
+   - Metadata: Description of worker count purpose
+
+3. Modify vLLM deployment creation to conditionally add arguments:
+   - When `threadpool=1`: Add `--renderer-num-workers <renderer_num_workers>` and `--mm-processor-cache-gb 0`
+   - When `threadpool=0`: Do not add these arguments
+
+### Non-Functional Requirements
+
+- Backward compatibility: Existing experiments without these properties should continue to work
+- Clear documentation of the new properties
+- Proper validation and error handling
+
+## Technical Design
+
+### Files to Modify
+
+1. **Experiment YAML Files** (2 files)
+   - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml`
+   - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml` (if it has geospatial experiments)
+
+2. **Deployment Builder** (1 file)
+   - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/yaml_support/build_components.py`
+     - Modify `deployment_yaml()` method to accept threadpool parameters
+     - Update `vllm_serve_args` construction logic
+
+3. **Environment Creation** (1 file)
+   - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/create_environment.py`
+     - Modify `create_test_environment()` to accept and pass threadpool parameters
+
+4. **Experiment Executor** (1 file)
+   - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py`
+     - Modify `_create_environment()` to extract and pass threadpool values from entity properties
+
+### Implementation Steps
+
+#### Step 1: Update Experiment YAML Files
+
+For each geospatial experiment in `performance_testing_geospatial.yaml`:
+- `performance_testing-geospatial-endpoint`
+- `performance_testing-geospatial-full`
+- `performance_testing-geospatial-full-custom-dataset`
+- `performance_testing-geospatial-endpoint-custom-dataset`
+- `performance_testing-geospatial-endpoint-guidellm`
+- `performance_testing-geospatial-full-guidellm`
+- `performance_testing-geospatial-guidellm-deployment-custom-dataset`
+- `performance_testing-geospatial-guidellm-endpoint-custom-dataset`
+
+Add to `optionalProperties`:
+```yaml
+- identifier: 'threadpool'
+  metadata:
+    description: "Enable threadpool for vLLM renderer (0=disabled, 1=enabled). When enabled, uses multiple workers for rendering."
+  propertyDomain:
+    variableType: 'CATEGORICAL_VARIABLE_TYPE'
+    values: [0, 1]
+- identifier: 'renderer_num_workers'
+  metadata:
+    description: "Number of renderer workers when threadpool is enabled. Only used when threadpool=1."
+  propertyDomain:
+    variableType: 'DISCRETE_VARIABLE_TYPE'
+    domainRange: [1, 128]
+    interval: 1
+```
+
+Add to `defaultParameterization`:
+```yaml
+- property:
+    identifier: 'threadpool'
+  value: 1
+- property:
+    identifier: 'renderer_num_workers'
+  value: 32
+```
+
+#### Step 2: Update build_components.py
+
+Modify `ComponentsYaml.deployment_yaml()`:
+
+1. Add parameters:
+```python
+def deployment_yaml(
+    # ... existing parameters ...
+    threadpool: int = 1,
+    renderer_num_workers: int = 32,
+) -> dict[str, Any]:
+```
+
+2. Update docstring to document new parameters
+
+3. Modify `vllm_serve_args` construction (around line 173-195):
+```python
+vllm_serve_args = [
+    model,
+    "--max-num-seqs",
+    str(max_num_seq),
+    "--max-num-batched-tokens",
+    str(max_batch_tokens),
+    "--dtype",
+    dtype.value,
+]
+
+# Add threadpool arguments if enabled
+if threadpool == 1:
+    vllm_serve_args.extend([
+        "--renderer-num-workers",
+        str(renderer_num_workers),
+        "--mm-processor-cache-gb",
+        "0",
+    ])
+
+if enforce_eager:
+    vllm_serve_args.append("--enforce-eager")
+# ... rest of the logic
+```
+
+#### Step 3: Update create_environment.py
+
+Modify `create_test_environment()`:
+
+1. Add parameters:
+```python
+def create_test_environment(
+    # ... existing parameters ...
+    threadpool: int = 1,
+    renderer_num_workers: int = 32,
+) -> None:
+```
+
+2. Update docstring
+
+3. Pass to `c_manager.create_deployment()`:
+```python
+c_manager.create_deployment(
+    # ... existing parameters ...
+    threadpool=threadpool,
+    renderer_num_workers=renderer_num_workers,
+)
+```
+
+#### Step 4: Update manage_components.py
+
+Modify `ComponentsManager.create_deployment()`:
+
+1. Add parameters to method signature
+2. Pass to `ComponentsYaml.deployment_yaml()`
+
+#### Step 5: Update experiment_executor.py
+
+Modify `_build_entity_env()` to include threadpool parameters:
+```python
+env_values = {
+    # ... existing values ...
+    "threadpool": values.get("threadpool", 1),
+    "renderer_num_workers": values.get("renderer_num_workers", 32),
+}
+```
+
+Modify `_create_environment()` to extract and pass values:
+```python
+create_test_environment(
+    # ... existing parameters ...
+    threadpool=int(values.get("threadpool", 1)),
+    renderer_num_workers=int(values.get("renderer_num_workers", 32)),
+)
+```
+
+### Testing Strategy
+
+#### Unit Tests
+
+1. **Test YAML Validation**
+   - Validate experiment YAML with new properties
+   - Test with `ado create discoveryspace -f <yaml> --dry-run`
+
+2. **Test Property Extraction**
+   - Verify `_build_entity_env()` correctly extracts threadpool values
+   - Test with both enabled and disabled states
+
+3. **Test Argument Construction**
+   - Verify `deployment_yaml()` correctly builds vllm_serve_args
+   - Test threadpool=0: arguments should NOT be present
+   - Test threadpool=1: arguments should be present with correct values
+
+#### Integration Tests
+
+1. **End-to-End Test with Threadpool Enabled (Default)**
+   - Create discoveryspace without specifying threadpool
+   - Run operation
+   - Verify deployment includes `--renderer-num-workers 32 --mm-processor-cache-gb 0`
+
+2. **End-to-End Test with Custom Threadpool Settings**
+   - Create discoveryspace with threadpool=1, renderer_num_workers=16
+   - Run operation
+   - Verify deployment includes `--renderer-num-workers 16 --mm-processor-cache-gb 0`
+
+3. **End-to-End Test with Threadpool Disabled**
+   - Create discoveryspace with threadpool=0
+   - Run operation
+   - Verify deployment does not include threadpool arguments
+
+### Example YAML Usage
+
+```yaml
+# Example discoveryspace with threadpool enabled
+apiVersion: ado.org/v1
+kind: DiscoverySpace
+metadata:
+  name: vllm-threadpool-test
+spec:
+  entitySpace:
+    - model: "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
+      endpoint: "http://localhost:8000"
+      request_rate: 10
+      threadpool: 1  # Optional: defaults to 1 (enabled)
+      renderer_num_workers: 64  # Optional: defaults to 32
+  experiments:
+    - identifier: test-geospatial-endpoint-v1
+```
+
+### Validation Checklist
+
+- [ ] All 8 geospatial experiments updated with new properties
+- [ ] Default values set correctly in all experiments
+- [ ] `build_components.py` modified to handle threadpool parameters
+- [ ] `create_environment.py` modified to pass threadpool parameters
+- [ ] `manage_components.py` modified to accept and forward parameters
+- [ ] `experiment_executor.py` modified to extract and pass values
+- [ ] YAML validation passes with `--dry-run`
+- [ ] Unit tests written and passing
+- [ ] Integration tests written and passing
+- [ ] Documentation updated (if applicable)
+- [ ] Backward compatibility verified
+
+### Risks and Mitigations
+
+| Risk | Impact | Mitigation |
+|------|--------|------------|
+| Breaking existing experiments | Medium | Use optional properties; threadpool enabled by default may change behavior but should improve performance |
+| Invalid renderer_num_workers values | Medium | Use discrete domain with reasonable range [1, 128] |
+| Threadpool not supported in all vLLM versions | Medium | Document minimum vLLM version requirement |
+| Performance regression if threadpool causes issues | Medium | Users can disable by setting threadpool=0 |
+
+### Dependencies
+
+- vLLM version must support `--renderer-num-workers` and `--mm-processor-cache-gb` arguments
+- No changes to ado core required
+- No changes to other actuators required
+
+### Rollout Plan
+
+1. Implement changes in feature branch
+2. Run full test suite
+3. Test with sample discoveryspace on development cluster
+4. Create PR with detailed description
+5. Code review
+6. Merge to main
+7. Update documentation with examples
+
+### Success Criteria
+
+- [ ] All tests pass
+- [ ] Existing experiments continue to work without modification
+- [ ] New properties can be used to enable threadpool functionality
+- [ ] vLLM deployments correctly include/exclude threadpool arguments based on property values
+- [ ] Code review approved
+- [ ] Documentation updated
+
+## Notes
+
+- The commented-out lines in `build_components.py` (lines 175-178) suggest this feature was previously considered but not fully implemented
+- This implementation follows the pattern established for other optional vLLM arguments like `enforce_eager` and `skip_tokenizer_init`
+- The property uses integer values (0/1) instead of boolean to maintain consistency with other categorical properties in the actuator
\ No newline at end of file
diff --git a/~/workspace/plans/ado-threadpool-vllm-cache-implementation-summary.md b/~/workspace/plans/ado-threadpool-vllm-cache-implementation-summary.md
new file mode 100644
index 000000000..b8978419e
--- /dev/null
+++ b/~/workspace/plans/ado-threadpool-vllm-cache-implementation-summary.md
@@ -0,0 +1,141 @@
+# vLLM Performance Actuator Cache Improvement - Implementation Summary
+
+## Overview
+
+Successfully implemented a fix for the vLLM performance actuator's measurement cache to include both environment and benchmark parameters in the cache key, preventing incorrect measurement reuse.
+
+## Problem Fixed
+
+**Before**: Cache only considered environment parameters (model, GPUs, memory, etc.), causing measurements to be incorrectly reused when benchmark parameters (num_prompts, request_rate, dataset, etc.) differed.
+
+**After**: Cache now uses a composite key that includes both environment AND benchmark parameters, ensuring measurements are only reused when both match.
+
+## Implementation Details
+
+### 1. New Helper Functions Added
+
+#### `_build_benchmark_params_key(values: dict[str, str]) -> str`
+- Location: `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` (lines 214-233)
+- Purpose: Extracts and serializes benchmark parameters that affect measurement results
+- Parameters included:
+  - `num_prompts` - number of prompts to test
+  - `request_rate` - rate of requests
+  - `max_concurrency` - maximum concurrent requests
+  - `number_input_tokens` - input token count
+  - `max_output_tokens` - output token count
+  - `burstiness` - burstiness factor
+  - `dataset` - dataset used for testing
+- Returns: JSON string with sorted keys for consistency
+
+#### `_build_cache_key(values: dict[str, str]) -> str`
+- Location: `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py` (lines 236-257)
+- Purpose: Creates composite cache key combining environment and benchmark parameters
+- Structure:
+  ```json
+  {
+    "environment": { /* environment params from _build_entity_env */ },
+    "benchmark": { /* benchmark params from _build_benchmark_params_key */ }
+  }
+  ```
+- Returns: JSON string with sorted keys
+
+### 2. Cache Logic Updated
+
+Modified `run_resource_and_workload_experiment()` function:
+- Line 600: Changed from `_build_entity_env(values)` to `_build_cache_key(values)`
+- Line 601: Updated log message to reflect composite key
+- Line 603: Updated cache lookup to use new key
+- Line 605-606: Updated log message for cache hits
+- Line 761: Updated cache storage to use new key
+
+### 3. Comprehensive Test Suite
+
+Added 9 new test cases in `plugins/actuators/vllm_performance/tests/test_experiment_executor.py`:
+
+#### TestBuildBenchmarkParamsKey (3 tests)
+- `test_includes_all_benchmark_parameters`: Verifies all benchmark params are included
+- `test_handles_missing_values`: Ensures missing values are handled as None
+- `test_consistent_output_with_sorted_keys`: Confirms consistent JSON output
+
+#### TestBuildCacheKey (6 tests)
+- `test_combines_environment_and_benchmark_params`: Verifies composite structure
+- `test_different_benchmark_params_produce_different_keys`: Ensures differentiation
+- `test_same_params_produce_same_key`: Confirms consistency
+- `test_cache_key_differentiates_on_request_rate`: Tests specific parameter differentiation
+- `test_vllm_0_18_same_cache_key_for_different_renderer_num_workers`: **Critical test** - Verifies that for vLLM 0.18.0, different `renderer_num_workers` values produce the SAME cache key (because threadpool is not supported and normalized to 0)
+- `test_vllm_0_20_different_cache_key_for_different_renderer_num_workers`: Verifies that for vLLM 0.20.1+, different `renderer_num_workers` values produce DIFFERENT cache keys
+
+## Test Results
+
+All 19 tests pass successfully:
+```
+============================= test session starts ==============================
+collected 19 items
+
+tests/test_experiment_executor.py::TestGetVllmVersionFromImageValue::... PASSED
+tests/test_experiment_executor.py::TestBuildEntityEnv::... PASSED
+tests/test_experiment_executor.py::TestBuildBenchmarkParamsKey::... PASSED
+tests/test_experiment_executor.py::TestBuildCacheKey::... PASSED
+
+============================== 19 passed in 0.68s ==============================
+```
+
+Code quality checks:
+- ✅ Black formatting: Passed
+- ✅ Ruff linting: All checks passed
+
+## Key Behavior Verified
+
+### For vLLM 0.18.0 (threadpool not supported)
+When using the test discoveryspace `../ops/geo/discoveryspace_geospatial_threadpool_test.yaml`:
+- Image: `["icr.io/drl-nextgen/mgazz/vllm:v0.18.0-tt.v1.2.5", "0.18.0"]`
+- `renderer_num_workers` values: 32, 64
+- **Result**: Both produce the SAME cache key because:
+  1. vLLM 0.18.0 < 0.20.0 (minimum version for threadpool)
+  2. Both `threadpool` and `renderer_num_workers` are normalized to 0 in environment
+  3. Same environment + same benchmark params = cache hit ✅
+
+### For vLLM 0.20.1+ (threadpool supported)
+- Image: `["icr.io/drl-nextgen/mgazz/vllm:v0.20.1-tt.main", "0.20.1"]`
+- `renderer_num_workers` values: 32, 64
+- **Result**: Produce DIFFERENT cache keys because:
+  1. vLLM 0.20.1 >= 0.20.0 (threadpool supported)
+  2. `renderer_num_workers` values are preserved (32 vs 64)
+  3. Different environment = no cache hit ✅
+
+## Files Modified
+
+1. **plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py**
+   - Added `_build_benchmark_params_key()` function
+   - Added `_build_cache_key()` function
+   - Updated cache logic in `run_resource_and_workload_experiment()`
+
+2. **plugins/actuators/vllm_performance/tests/test_experiment_executor.py**
+   - Added imports for new functions
+   - Added `TestBuildBenchmarkParamsKey` test class (3 tests)
+   - Added `TestBuildCacheKey` test class (6 tests)
+
+## Impact
+
+- **Bug Fix**: Prevents incorrect measurement reuse
+- **No Breaking Changes**: Cache is local to each operation execution
+- **Performance**: No performance regression
+- **Correctness**: Ensures measurements are only reused when truly identical
+
+## Validation
+
+The implementation correctly handles the user's specific test case:
+- For vLLM 0.18.0 with different `renderer_num_workers` values (32, 64)
+- Cache hits occur as expected because threadpool is not supported
+- Both entities share the same normalized environment (threadpool=0, renderer_num_workers=0)
+- Same benchmark parameters (num_prompts=200, request_rate=32/64, dataset, etc.)
+- Result: Measurements are correctly reused for entities with same effective configuration
+
+## Next Steps
+
+The implementation is complete and ready for use. The cache now correctly:
+1. ✅ Differentiates based on benchmark parameters
+2. ✅ Reuses measurements only when appropriate
+3. ✅ Handles vLLM version-specific behavior (threadpool support)
+4. ✅ Maintains backward compatibility
+5. ✅ Passes all tests with proper code quality
\ No newline at end of file
diff --git a/~/workspace/plans/ado-threadpool-vllm-cache-improvement.md b/~/workspace/plans/ado-threadpool-vllm-cache-improvement.md
new file mode 100644
index 000000000..bd8ccdc60
--- /dev/null
+++ b/~/workspace/plans/ado-threadpool-vllm-cache-improvement.md
@@ -0,0 +1,275 @@
+# Plan: Improve vLLM Performance Actuator Measurement Cache
+
+## Problem Statement
+
+The current measurement cache in the vLLM performance actuator (lines 541, 554-571 in `experiment_executor.py`) only considers the entity environment definition when determining cache hits. This is insufficient because:
+
+1. **Current cache key**: Only includes environment parameters (model, image, GPUs, memory, etc.) via `_build_entity_env()`
+2. **Missing from cache key**: Benchmark/workload parameters that affect measurements:
+   - `num_prompts` - number of prompts to test
+   - `request_rate` - rate of requests
+   - `max_concurrency` - maximum concurrent requests
+   - `number_input_tokens` - input token count
+   - `max_output_tokens` - output token count
+   - `burstiness` - burstiness factor
+   - `dataset` - dataset used for testing
+
+3. **Impact**: Entities with identical environments but different benchmark parameters incorrectly reuse cached measurements, producing invalid results.
+
+## Current Implementation Analysis
+
+### Cache Location
+- **File**: `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py`
+- **Function**: `run_resource_and_workload_experiment` (lines 504-738)
+- **Cache variable**: `measurement_cache: dict[str, dict]` (line 541)
+
+### Current Cache Key Generation
+```python
+definition_for_cache = _build_entity_env(values)  # Line 553
+```
+
+The `_build_entity_env()` function (lines 150-211) creates a JSON string containing only environment parameters:
+- model, image, n_gpus, gpu_type, n_cpus, memory
+- max_batch_tokens, gpu_memory_utilization, dtype, cpu_offload
+- max_num_seq, threadpool, renderer_num_workers
+
+### Cache Usage Pattern
+1. Check if `definition_for_cache` exists in cache (line 556)
+2. If hit: reuse cached measurements (lines 557-571)
+3. If miss: execute benchmark and cache result (lines 713-716)
+
+## Solution Design
+
+### New Cache Key Strategy
+
+Create a composite cache key that includes both:
+1. **Environment definition** (existing `_build_entity_env()` output)
+2. **Benchmark parameters** (new component)
+
+### Implementation Approach
+
+#### 1. Create New Function: `_build_benchmark_params_key()`
+
+```python
+def _build_benchmark_params_key(values: dict[str, str]) -> str:
+    """
+    Build a cache key from benchmark parameters that affect measurement results.
+    
+    These parameters define the workload characteristics and must be included
+    in the cache key to ensure measurements are only reused for identical tests.
+    
+    :param values: experiment values
+    :return: JSON string of benchmark parameters
+    """
+    benchmark_params = {
+        "num_prompts": values.get("num_prompts"),
+        "request_rate": values.get("request_rate"),
+        "max_concurrency": values.get("max_concurrency"),
+        "number_input_tokens": values.get("number_input_tokens"),
+        "max_output_tokens": values.get("max_output_tokens"),
+        "burstiness": values.get("burstiness"),
+        "dataset": values.get("dataset"),
+    }
+    return json.dumps(benchmark_params, sort_keys=True)
+```
+
+**Rationale**: 
+- These parameters directly affect benchmark execution and results
+- They correspond to fields in `BenchmarkParameters` model
+- Sorting keys ensures consistent JSON output
+
+#### 2. Create Composite Cache Key Function
+
+```python
+def _build_cache_key(values: dict[str, str]) -> str:
+    """
+    Build a composite cache key from both environment and benchmark parameters.
+    
+    Cache hits should only occur when both the deployment environment AND
+    the benchmark workload parameters are identical.
+    
+    :param values: experiment values
+    :return: composite cache key
+    """
+    env_key = _build_entity_env(values)
+    benchmark_key = _build_benchmark_params_key(values)
+    
+    # Combine both keys into a single cache key
+    composite = {
+        "environment": json.loads(env_key),
+        "benchmark": json.loads(benchmark_key)
+    }
+    return json.dumps(composite, sort_keys=True)
+```
+
+**Rationale**:
+- Separates concerns: environment vs. workload
+- Makes cache key structure explicit
+- Maintains backward compatibility with environment definition
+
+#### 3. Update Cache Usage in `run_resource_and_workload_experiment()`
+
+**Current code (lines 553-571)**:
+```python
+definition_for_cache = _build_entity_env(values)
+logger.info("definition_for_cache: %s", definition_for_cache)
+
+if definition_for_cache in measurement_cache:
+    # ... reuse cached result
+```
+
+**Updated code**:
+```python
+cache_key = _build_cache_key(values)
+logger.info("cache_key: %s", cache_key)
+
+if cache_key in measurement_cache:
+    logger.info(
+        f"Reusing cached measurement for entity {entity.identifier} "
+        f"(identical environment and benchmark parameters)"
+    )
+    # ... reuse cached result (same logic)
+```
+
+**Changes**:
+- Replace `definition_for_cache` with `cache_key`
+- Update log message to reflect both environment and benchmark matching
+- Update cache storage (line 713) to use `cache_key`
+
+## Implementation Steps
+
+### Step 1: Add Helper Functions
+- Location: After `_build_entity_env()` function (after line 211)
+- Add `_build_benchmark_params_key()`
+- Add `_build_cache_key()`
+- Include comprehensive docstrings
+
+### Step 2: Update Cache Logic
+- In `run_resource_and_workload_experiment()`:
+  - Line 553: Replace `_build_entity_env()` with `_build_cache_key()`
+  - Line 554: Update log message
+  - Line 556: Update condition check
+  - Line 557-559: Update log message
+  - Line 713: Update cache storage key
+
+### Step 3: Add Tests
+- Location: `plugins/actuators/vllm_performance/tests/test_experiment_executor.py`
+- Test `_build_benchmark_params_key()`:
+  - Verify all benchmark parameters included
+  - Verify consistent JSON output (sorted keys)
+  - Test with missing/None values
+- Test `_build_cache_key()`:
+  - Verify composite structure
+  - Verify different benchmark params → different keys
+  - Verify same params → same keys
+- Integration test for cache behavior:
+  - Same environment + same benchmark → cache hit
+  - Same environment + different benchmark → cache miss
+  - Different environment + same benchmark → cache miss
+
+### Step 4: Update Documentation
+- Add comments explaining cache key composition
+- Document why benchmark parameters must be in cache key
+- Update any relevant README or design docs
+
+## Testing Strategy
+
+### Unit Tests
+
+1. **Test `_build_benchmark_params_key()`**:
+   ```python
+   def test_build_benchmark_params_key():
+       values = {
+           "num_prompts": 100,
+           "request_rate": 10,
+           "max_concurrency": 5,
+           "dataset": "random"
+       }
+       key = _build_benchmark_params_key(values)
+       assert "num_prompts" in key
+       assert "100" in key
+       # Verify consistent output
+       key2 = _build_benchmark_params_key(values)
+       assert key == key2
+   ```
+
+2. **Test `_build_cache_key()` differentiation**:
+   ```python
+   def test_cache_key_differentiates_benchmark_params():
+       base_values = {
+           "model": "test-model",
+           "image": "test-image",
+           "n_gpus": 1,
+           "num_prompts": 100,
+       }
+       
+       key1 = _build_cache_key(base_values)
+       
+       # Change benchmark param
+       modified_values = base_values.copy()
+       modified_values["num_prompts"] = 200
+       key2 = _build_cache_key(modified_values)
+       
+       assert key1 != key2  # Different benchmark params → different keys
+   ```
+
+3. **Test cache hit/miss behavior**:
+   - Mock the cache and verify correct reuse
+   - Verify measurements not reused when benchmark params differ
+
+### Integration Tests
+
+1. Create test scenario with:
+   - 2 entities with identical environment
+   - Different `num_prompts` values
+   - Verify both entities execute (no cache hit)
+
+2. Create test scenario with:
+   - 2 entities with identical environment AND benchmark params
+   - Verify second entity reuses cache (cache hit)
+
+## Backward Compatibility
+
+**Impact**: None - this is a bug fix, not a breaking change
+
+- Cache is local to each operation execution (not persisted)
+- No external APIs affected
+- Existing operations will simply have more accurate caching
+
+## Edge Cases to Consider
+
+1. **None/missing values**: Ensure consistent handling in JSON serialization
+2. **Default values**: Consider if defaults should be explicit in cache key
+3. **Floating point precision**: `burstiness` is float - ensure consistent serialization
+4. **Dataset paths**: If dataset is a path, ensure normalization
+
+## Success Criteria
+
+1. ✅ Cache key includes all benchmark parameters
+2. ✅ Different benchmark params → different cache keys
+3. ✅ Same environment + same benchmark → cache hit
+4. ✅ Same environment + different benchmark → cache miss
+5. ✅ All tests pass
+6. ✅ No performance regression
+7. ✅ Clear logging of cache hits/misses
+
+## Files to Modify
+
+1. **Primary**:
+   - `plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py`
+
+2. **Tests**:
+   - `plugins/actuators/vllm_performance/tests/test_experiment_executor.py`
+
+## Estimated Effort
+
+- Implementation: 2-3 hours
+- Testing: 2-3 hours
+- Review and refinement: 1 hour
+- **Total**: 5-7 hours
+
+## Notes
+
+- The cache is only used in `run_resource_and_workload_experiment()`, not in `run_workload_experiment()` (which tests existing endpoints)
+- This fix prevents incorrect measurement reuse that could lead to invalid experimental results
+- The fix is localized to the caching logic and doesn't affect environment creation or benchmark execution
\ No newline at end of file