Add configurable reward functions to CloudAIGym (#566)

TaekyungHeo · web-flow · commit e9bc384ec2de · 2025-06-13T10:06:39.000-04:00
diff --git a/src/cloudai/_core/registry.py b/src/cloudai/_core/registry.py
@@ -16,7 +16,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, ClassVar, List, Set, Tuple, Type, Union
+from typing import TYPE_CHECKING, Callable, ClassVar, List, Set, Tuple, Type, Union
 
 if TYPE_CHECKING:
     from ..configurator.base_agent import BaseAgent
@@ -32,6 +32,8 @@
     from .system import System
     from .test_template_strategy import TestTemplateStrategy
 
+RewardFunction = Callable[[List[float]], float]
+
 
 class Singleton(type):
     """Singleton metaclass."""
@@ -79,6 +81,7 @@ class Registry(metaclass=Singleton):
     reports_map: ClassVar[dict[Type[TestDefinition], Set[Type[ReportGenerationStrategy]]]] = {}
     scenario_reports: ClassVar[dict[str, type[Reporter]]] = {}
     report_configs: ClassVar[dict[str, ReportConfig]] = {}
+    reward_functions_map: ClassVar[dict[str, RewardFunction]] = {}
 
     def add_runner(self, name: str, value: Type[BaseRunner]) -> None:
         """
@@ -276,3 +279,18 @@ def add_scenario_report(self, name: str, report: type[Reporter], config: ReportC
     def update_scenario_report(self, name: str, report: type[Reporter], config: ReportConfig) -> None:
         self.scenario_reports[name] = report
         self.report_configs[name] = config
+
+    def add_reward_function(self, name: str, value: RewardFunction) -> None:
+        if name in self.reward_functions_map:
+            raise ValueError(f"Duplicating implementation for '{name}', use 'update()' for replacement.")
+        self.update_reward_function(name, value)
+
+    def update_reward_function(self, name: str, value: RewardFunction) -> None:
+        self.reward_functions_map[name] = value
+
+    def get_reward_function(self, name: str) -> RewardFunction:
+        if name not in self.reward_functions_map:
+            raise KeyError(
+                f"Reward function '{name}' not found. Available functions: {list(self.reward_functions_map.keys())}"
+            )
+        return self.reward_functions_map[name]
diff --git a/src/cloudai/configurator/__init__.py b/src/cloudai/configurator/__init__.py
@@ -19,4 +19,9 @@
 from .cloudai_gym import CloudAIGymEnv
 from .grid_search import GridSearchAgent
 
-__all__ = ["BaseAgent", "BaseGym", "CloudAIGymEnv", "GridSearchAgent"]
+__all__ = [
+    "BaseAgent",
+    "BaseGym",
+    "CloudAIGymEnv",
+    "GridSearchAgent",
+]
diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py
@@ -20,7 +20,7 @@
 import logging
 from typing import Any, Dict, Optional, Tuple
 
-from cloudai.core import METRIC_ERROR, Runner, TestRun
+from cloudai.core import METRIC_ERROR, Registry, Runner, TestRun
 from cloudai.util.lazy_imports import lazy
 
 from .base_gym import BaseGym
@@ -44,6 +44,7 @@ def __init__(self, test_run: TestRun, runner: Runner):
         self.test_run = test_run
         self.runner = runner
         self.max_steps = test_run.test.test_definition.agent_steps
+        self.reward_function = Registry().get_reward_function(test_run.test.test_definition.agent_reward_function)
         super().__init__()
 
     def define_action_space(self) -> Dict[str, Any]:
@@ -144,9 +145,7 @@ def compute_reward(self, observation: list) -> float:
         Returns:
             float: Reward value.
         """
-        if observation and observation[0] != 0:
-            return 1.0 / observation[0]
-        return 0.0
+        return self.reward_function(observation)
 
     def get_observation(self, action: Any) -> list:
         """
diff --git a/src/cloudai/configurator/reward_functions.py b/src/cloudai/configurator/reward_functions.py
@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+
+def inverse_reward(observation: List[float]) -> float:
+    if observation and observation[0] != 0:
+        return 1.0 / observation[0]
+    return 0.0
+
+
+def negative_reward(observation: List[float]) -> float:
+    if observation:
+        return -observation[0]
+    return 0.0
+
+
+def identity_reward(observation: List[float]) -> float:
+    if observation:
+        return observation[0]
+    return 0.0
diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py
@@ -106,6 +106,7 @@ class TestDefinition(BaseModel, ABC):
     agent: str = "grid_search"
     agent_steps: int = 1
     agent_metrics: list[str] = Field(default=["default"])
+    agent_reward_function: str = "inverse"
 
     @property
     def cmd_args_dict(self) -> Dict[str, Union[str, List[str]]]:
diff --git a/src/cloudai/registration.py b/src/cloudai/registration.py
@@ -18,6 +18,11 @@
 def register_all():
     """Register all workloads, systems, runners, installers, and strategies."""
     from cloudai.configurator.grid_search import GridSearchAgent
+    from cloudai.configurator.reward_functions import (
+        identity_reward,
+        inverse_reward,
+        negative_reward,
+    )
     from cloudai.core import (
         CommandGenStrategy,
         GradingStrategy,
@@ -308,3 +313,7 @@ def register_all():
     Registry().add_scenario_report("per_test", PerTestReporter, ReportConfig(enable=True))
     Registry().add_scenario_report("status", StatusReporter, ReportConfig(enable=True))
     Registry().add_scenario_report("tarball", TarballReporter, ReportConfig(enable=True))
+
+    Registry().add_reward_function("inverse", inverse_reward)
+    Registry().add_reward_function("negative", negative_reward)
+    Registry().add_reward_function("identity", identity_reward)
diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py
@@ -92,18 +92,56 @@ def test_observation_space(setup_env):
 
 
 @pytest.mark.parametrize(
-    "observation,expected_reward",
+    "reward_function,test_cases",
     [
-        ([0.34827126874999986], pytest.approx(2.871, 0.001)),
-        ([0.0], 0.0),
-        ([], 0.0),
-        ([2.0, 2.0], 0.5),
+        (
+            "inverse",
+            [
+                ([0.34827126874999986], pytest.approx(2.871, 0.001)),
+                ([0.0], 0.0),
+                ([], 0.0),
+                ([2.0, 2.0], 0.5),
+            ],
+        ),
+        (
+            "negative",
+            [
+                ([2.0], -2.0),
+                ([-1.5], 1.5),
+                ([0.0], 0.0),
+                ([], 0.0),
+            ],
+        ),
+        (
+            "identity",
+            [
+                ([2.0], 2.0),
+                ([-1.5], -1.5),
+                ([0.0], 0.0),
+                ([], 0.0),
+            ],
+        ),
     ],
 )
-def test_compute_reward(observation: list[float], expected_reward: float):
-    env = CloudAIGymEnv(test_run=MagicMock(), runner=MagicMock())
-    reward = env.compute_reward(observation)
-    assert reward == expected_reward
+def test_compute_reward(reward_function, test_cases):
+    test_run = MagicMock()
+    test_run.test.test_definition.agent_reward_function = reward_function
+    env = CloudAIGymEnv(test_run=test_run, runner=MagicMock())
+
+    for input_value, expected_reward in test_cases:
+        reward = env.compute_reward(input_value)
+        assert reward == expected_reward
+
+
+def test_compute_reward_invalid():
+    test_run = MagicMock()
+    test_run.test.test_definition.agent_reward_function = "nonexistent"
+
+    with pytest.raises(KeyError) as exc_info:
+        CloudAIGymEnv(test_run=test_run, runner=MagicMock())
+
+    assert "Reward function 'nonexistent' not found" in str(exc_info.value)
+    assert "Available functions: ['inverse', 'negative', 'identity']" in str(exc_info.value)
 
 
 def test_tr_output_path(setup_env: tuple[TestRun, Runner]):