Skip to content

Commit e1d52c2

Browse files
authored
Add custom reward functions with latency & throughput metrics handling (#674)
1 parent 0b7a15c commit e1d52c2

File tree

3 files changed

+103
-1
lines changed

3 files changed

+103
-1
lines changed

src/cloudai/configurator/reward_functions.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17+
import math
1718
from typing import List
1819

1920

@@ -33,3 +34,95 @@ def identity_reward(observation: List[float]) -> float:
3334
if observation:
3435
return observation[0]
3536
return 0.0
37+
38+
39+
def ai_dynamo_weighted_normalized_reward(observation: List[float]) -> float:
40+
"""Calculate reward using AI Dynamo's custom metrics."""
41+
ttft_idx = 0
42+
itl_idx = 1
43+
throughput_idx = 2
44+
45+
# Normalization
46+
ttft_baseline = 0.3 # seconds
47+
itl_baseline = 0.02 # seconds
48+
throughput_baseline = 50.0 # tokens/s
49+
50+
# Weighting between metrics - equal focus on TTFT and throughput
51+
ttft_weight = 0.45
52+
itl_weight = 0.1
53+
throughput_weight = 0.45
54+
55+
if len(observation) < 3:
56+
return -1.0
57+
58+
ttft = observation[ttft_idx]
59+
itl = observation[itl_idx]
60+
throughput = observation[throughput_idx]
61+
62+
ttft_reward = ttft_baseline / ttft
63+
itl_reward = itl_baseline / itl
64+
65+
throughput_reward = throughput / throughput_baseline
66+
67+
# Weighted combined reward
68+
reward = ttft_weight * ttft_reward + itl_weight * itl_reward + throughput_weight * throughput_reward
69+
70+
return reward
71+
72+
73+
def ai_dynamo_ratio_normalized_reward(observation: List[float]) -> float:
74+
"""Calculate reward as normalized throughput divided by latency metrics."""
75+
ttft_idx = 0
76+
itl_idx = 1
77+
throughput_idx = 2
78+
79+
ttft_baseline = 1.0 # seconds (1000ms)
80+
itl_baseline = 0.03 # seconds (30ms)
81+
throughput_baseline = 1000.0 # tokens/s
82+
83+
if len(observation) < 3:
84+
return 0.0
85+
86+
ttft = observation[ttft_idx]
87+
itl = observation[itl_idx]
88+
throughput = observation[throughput_idx]
89+
90+
if ttft <= 0 or itl <= 0 or throughput <= 0:
91+
return 0.0
92+
93+
ttft_norm = ttft / ttft_baseline
94+
itl_norm = itl / itl_baseline
95+
throughput_norm = throughput / throughput_baseline
96+
97+
reward = throughput_norm / (ttft_norm * itl_norm)
98+
99+
return reward
100+
101+
102+
def ai_dynamo_log_scale_reward(observation: List[float]) -> float:
103+
"""
104+
Calculate reward using log-scale metrics focused on throughput and TTFT.
105+
106+
Since ITL is already optimized, we focus on the primary metrics.
107+
"""
108+
ttft_idx = 0
109+
itl_idx = 1
110+
throughput_idx = 2
111+
112+
if len(observation) < 3:
113+
return 0.0
114+
115+
ttft = observation[ttft_idx]
116+
itl = observation[itl_idx]
117+
throughput = observation[throughput_idx]
118+
119+
if ttft <= 0 or itl <= 0 or throughput <= 0:
120+
return -1e-3
121+
122+
throughput_reward = math.log(throughput + 1)
123+
ttft_penalty = math.log(ttft + 1)
124+
itl_penalty = math.log(itl + 1)
125+
126+
reward = throughput_reward - 0.7 * ttft_penalty - 0.1 * itl_penalty
127+
128+
return reward

src/cloudai/registration.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def register_all():
1919
"""Register all workloads, systems, runners, installers, and strategies."""
2020
from cloudai.configurator.grid_search import GridSearchAgent
2121
from cloudai.configurator.reward_functions import (
22+
ai_dynamo_log_scale_reward,
23+
ai_dynamo_ratio_normalized_reward,
24+
ai_dynamo_weighted_normalized_reward,
2225
identity_reward,
2326
inverse_reward,
2427
negative_reward,
@@ -219,3 +222,6 @@ def register_all():
219222
Registry().add_reward_function("inverse", inverse_reward)
220223
Registry().add_reward_function("negative", negative_reward)
221224
Registry().add_reward_function("identity", identity_reward)
225+
Registry().add_reward_function("ai_dynamo_weighted_normalized", ai_dynamo_weighted_normalized_reward)
226+
Registry().add_reward_function("ai_dynamo_ratio_normalized", ai_dynamo_ratio_normalized_reward)
227+
Registry().add_reward_function("ai_dynamo_log_scale", ai_dynamo_log_scale_reward)

tests/test_cloudaigym.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,10 @@ def test_compute_reward_invalid():
144144
CloudAIGymEnv(test_run=test_run, runner=MagicMock())
145145

146146
assert "Reward function 'nonexistent' not found" in str(exc_info.value)
147-
assert "Available functions: ['inverse', 'negative', 'identity']" in str(exc_info.value)
147+
assert (
148+
"Available functions: ['inverse', 'negative', 'identity', "
149+
"'ai_dynamo_weighted_normalized', 'ai_dynamo_ratio_normalized', 'ai_dynamo_log_scale']" in str(exc_info.value)
150+
)
148151

149152

150153
def test_tr_output_path(setup_env: tuple[TestRun, BaseRunner]):

0 commit comments

Comments
 (0)