Skip to content

Commit f9f1e31

Browse files
Added rampup to constant rate type (#549)
## Summary Simply allows a linear rampup of the constant rate profile. ## Test Plan The simplest test is to run a short constant test with 4 requests per second, with a long rampup. You can see how it ramps as expected. There are also new tests. ## Related Issues Fulfills part of the goals of #428 --- - [x] "I certify that all code in this PR is my own, except as noted below." ## Use of AI - [ ] Includes AI-assisted code completion - [x] Includes code generated by an AI application - [x] Includes AI-generated tests (NOTE: AI written tests should have a docstring that includes `## WRITTEN BY AI ##`)
2 parents 5a61f70 + eb6a803 commit f9f1e31

File tree

4 files changed

+221
-6
lines changed

4 files changed

+221
-6
lines changed

src/guidellm/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ def benchmark():
334334
default=BenchmarkGenerativeTextArgs.get_default("rampup"),
335335
help=(
336336
"The time, in seconds, to ramp up the request rate over. "
337-
"Only applicable for Throughput/Concurrent strategies"
337+
"Applicable for Throughput, Concurrent, and Constant strategies"
338338
),
339339
)
340340
@click.option(

src/guidellm/benchmark/profiles.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,9 @@ def next_strategy(
557557

558558
if self.strategy_type == "constant":
559559
return AsyncConstantStrategy(
560-
rate=current_rate, max_concurrency=self.max_concurrency
560+
rate=current_rate,
561+
max_concurrency=self.max_concurrency,
562+
rampup_duration=self.rampup_duration,
561563
)
562564
elif self.strategy_type == "poisson":
563565
return AsyncPoissonStrategy(

src/guidellm/scheduler/strategies.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from __future__ import annotations
1717

1818
import asyncio
19+
import math
1920
import random
2021
from abc import abstractmethod
2122
from multiprocessing import Event, Value, synchronize
@@ -453,6 +454,13 @@ class AsyncConstantStrategy(SchedulingStrategy):
453454
default=None,
454455
description="Maximum number of concurrent requests to schedule",
455456
)
457+
rampup_duration: NonNegativeFloat = Field(
458+
default=0.0,
459+
description=(
460+
"Duration in seconds to linearly ramp up from 0 to target rate "
461+
"at the beginning of each strategy run"
462+
),
463+
)
456464

457465
def __str__(self) -> str:
458466
"""
@@ -476,19 +484,47 @@ def requests_limit(self) -> PositiveInt | None:
476484

477485
async def next_request_time(self, worker_index: PositiveInt) -> float:
478486
"""
479-
Calculate next request time at fixed intervals.
487+
Calculate next request time at fixed intervals with optional linear rampup.
480488
481489
Schedules requests at uniform intervals determined by the configured rate,
482-
independent of request completion times.
490+
independent of request completion times. If rampup_duration is set, the rate
491+
increases linearly from 0 to the target rate during the rampup period, then
492+
continues at the constant rate.
483493
484494
:param worker_index: Unused for constant strategy
485-
:return: Start time plus constant interval based on request index
495+
:return: Start time plus interval based on request index and
496+
rampup configuration
486497
"""
487498
_ = worker_index # unused
488499
current_index = self.next_request_index()
489500
start_time = await self.get_processes_start_time()
490501

491-
return start_time + current_index / self.rate
502+
if self.rampup_duration > 0:
503+
# Calculate number of requests that would be sent during rampup
504+
# Cumulative requests by time t during rampup:
505+
# n = rate * t² / (2 * rampup_duration)
506+
# At end of rampup (t = rampup_duration), n_rampup is calculated below
507+
n_rampup = self.rate * self.rampup_duration / 2.0
508+
509+
if current_index == 1:
510+
# First request at start_time
511+
return start_time
512+
elif current_index <= n_rampup:
513+
# During rampup: solve for t where
514+
# n = rate * t² / (2 * rampup_duration)
515+
time_offset = math.sqrt(
516+
2.0 * current_index * self.rampup_duration / self.rate
517+
)
518+
return start_time + time_offset
519+
else:
520+
# After rampup: continue at constant rate
521+
time_offset = (
522+
self.rampup_duration + (current_index - n_rampup) / self.rate
523+
)
524+
return start_time + time_offset
525+
else:
526+
# No rampup: uniform intervals
527+
return start_time + current_index / self.rate
492528

493529
def request_completed(self, request_info: RequestInfo):
494530
"""

tests/unit/scheduler/test_strategies.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,8 @@ class TestAsyncConstantStrategy:
389389
{"rate": 1.0},
390390
{"rate": 5.0},
391391
{"rate": 10.3, "max_concurrency": 8},
392+
{"rate": 2.0, "rampup_duration": 1.0},
393+
{"rate": 10.0, "rampup_duration": 2.0, "max_concurrency": 5},
392394
]
393395
)
394396
def valid_instances(self, request):
@@ -412,6 +414,7 @@ def test_initialization(self, valid_instances: tuple[AsyncConstantStrategy, dict
412414
[
413415
("rate", 0),
414416
("rate", -1.0),
417+
("rampup_duration", -1.0),
415418
],
416419
)
417420
def test_invalid_initialization(self, field, value):
@@ -473,6 +476,180 @@ def test_marshalling(self, valid_instances: tuple[AsyncConstantStrategy, dict]):
473476
for key, value in constructor_args.items():
474477
assert getattr(base_json_reconstructed, key) == value
475478

479+
@pytest.mark.smoke
480+
def test_rampup_duration_default(self):
481+
"""Test that rampup_duration defaults to 0.0.
482+
483+
### WRITTEN BY AI ###
484+
"""
485+
instance = AsyncConstantStrategy(rate=1.0)
486+
assert instance.rampup_duration == 0.0
487+
488+
@pytest.mark.smoke
489+
def test_rampup_duration_initialization(self):
490+
"""Test that rampup_duration can be set.
491+
492+
### WRITTEN BY AI ###
493+
"""
494+
instance = AsyncConstantStrategy(rate=10.0, rampup_duration=2.0)
495+
assert instance.rampup_duration == 2.0
496+
497+
@pytest.mark.smoke
498+
@pytest.mark.asyncio
499+
async def test_timing_without_rampup(self):
500+
"""Test timing without rampup matches existing behavior.
501+
502+
### WRITTEN BY AI ###
503+
"""
504+
strategy = AsyncConstantStrategy(rate=10.0, rampup_duration=0.0)
505+
strategy.init_processes_timings(worker_count=1, max_concurrency=100)
506+
start_time = 1000.0
507+
strategy.init_processes_start(start_time)
508+
509+
# Test multiple request indices
510+
# Each call to next_request_time increments the index automatically
511+
for expected_index in range(1, 11):
512+
time = await strategy.next_request_time(0)
513+
expected_time = start_time + expected_index / 10.0
514+
assert time == pytest.approx(expected_time, rel=1e-10), (
515+
f"Request {expected_index}: expected {expected_time}, got {time}"
516+
)
517+
518+
@pytest.mark.smoke
519+
@pytest.mark.asyncio
520+
async def test_timing_with_rampup(self):
521+
"""Test timing with rampup follows quadratic then linear pattern.
522+
523+
### WRITTEN BY AI ###
524+
"""
525+
rate = 10.0
526+
rampup_duration = 2.0
527+
strategy = AsyncConstantStrategy(rate=rate, rampup_duration=rampup_duration)
528+
strategy.init_processes_timings(worker_count=1, max_concurrency=100)
529+
start_time = 1000.0
530+
strategy.init_processes_start(start_time)
531+
532+
# Calculate number of requests during rampup
533+
n_rampup = rate * rampup_duration / 2.0 # Should be 10
534+
535+
# Test first request (index 1) - should be at start_time
536+
time1 = await strategy.next_request_time(0)
537+
assert time1 == pytest.approx(start_time, abs=1e-6), (
538+
f"First request should be at start_time, got {time1}"
539+
)
540+
541+
# Test requests during rampup (indices 2-10)
542+
# For index n during rampup: t = sqrt(2 * n * rampup_duration / rate)
543+
# Each call increments the index automatically
544+
for n in range(2, int(n_rampup) + 1):
545+
time_n = await strategy.next_request_time(0)
546+
expected_time = start_time + math.sqrt(2.0 * n * rampup_duration / rate)
547+
assert time_n == pytest.approx(expected_time, rel=1e-6), (
548+
f"Request {n} during rampup: expected {expected_time}, got {time_n}"
549+
)
550+
551+
# Test request right after rampup (index 11)
552+
# Should be at: rampup_duration + (11 - n_rampup) / rate
553+
time_after = await strategy.next_request_time(0)
554+
expected_after = start_time + rampup_duration + (11 - n_rampup) / rate
555+
assert time_after == pytest.approx(expected_after, rel=1e-6), (
556+
f"Request 11 after rampup: expected {expected_after}, got {time_after}"
557+
)
558+
559+
# Test a few more requests after rampup to verify constant rate
560+
for i in range(12, 15):
561+
time_i = await strategy.next_request_time(0)
562+
expected_i = start_time + rampup_duration + (i - n_rampup) / rate
563+
assert time_i == pytest.approx(expected_i, rel=1e-6), (
564+
f"Request {i} after rampup: expected {expected_i}, got {time_i}"
565+
)
566+
567+
@pytest.mark.sanity
568+
@pytest.mark.asyncio
569+
async def test_timing_with_rampup_edge_cases(self):
570+
"""Test edge cases for rampup timing.
571+
572+
### WRITTEN BY AI ###
573+
"""
574+
575+
# Test with very short rampup_duration
576+
strategy = AsyncConstantStrategy(rate=100.0, rampup_duration=0.01)
577+
strategy.init_processes_timings(worker_count=1, max_concurrency=100)
578+
start_time = 2000.0
579+
strategy.init_processes_start(start_time)
580+
581+
# First request
582+
time1 = await strategy.next_request_time(0)
583+
assert time1 == pytest.approx(start_time, abs=1e-6)
584+
585+
# Test with very long rampup_duration
586+
strategy2 = AsyncConstantStrategy(rate=1.0, rampup_duration=100.0)
587+
strategy2.init_processes_timings(worker_count=1, max_concurrency=100)
588+
start_time2 = 3000.0
589+
strategy2.init_processes_start(start_time2)
590+
591+
# First request
592+
time1_2 = await strategy2.next_request_time(0)
593+
assert time1_2 == pytest.approx(start_time2, abs=1e-6)
594+
595+
# Request at end of rampup
596+
# We need to advance to request index 50 (n_rampup = 1.0 * 100.0 / 2.0)
597+
# Already at index 1, need 49 more calls to reach index 50
598+
time_end_rampup = None
599+
for _ in range(49): # 49 calls to go from index 2 to index 50
600+
time_end_rampup = await strategy2.next_request_time(0)
601+
expected_end = start_time2 + 100.0
602+
assert time_end_rampup == pytest.approx(expected_end, rel=1e-6), (
603+
f"End of rampup: expected {expected_end}, got {time_end_rampup}"
604+
)
605+
606+
@pytest.mark.sanity
607+
@pytest.mark.asyncio
608+
async def test_timing_rampup_transition(self):
609+
"""Test smooth transition from rampup to constant rate.
610+
611+
### WRITTEN BY AI ###
612+
"""
613+
rate = 10.0
614+
rampup_duration = 2.0
615+
strategy = AsyncConstantStrategy(rate=rate, rampup_duration=rampup_duration)
616+
strategy.init_processes_timings(worker_count=1, max_concurrency=100)
617+
start_time = 5000.0
618+
strategy.init_processes_start(start_time)
619+
620+
n_rampup = rate * rampup_duration / 2.0 # 10
621+
622+
# Get to the last request of rampup (index 10)
623+
for _ in range(9): # Already at index 1, need 9 more to reach 10
624+
await strategy.next_request_time(0)
625+
626+
time_last_rampup = await strategy.next_request_time(0)
627+
expected_last_rampup = start_time + math.sqrt(
628+
2.0 * 10 * rampup_duration / rate
629+
)
630+
assert time_last_rampup == pytest.approx(
631+
expected_last_rampup, rel=1e-6
632+
), (
633+
f"Last rampup request: expected {expected_last_rampup}, "
634+
f"got {time_last_rampup}"
635+
)
636+
637+
# First request after rampup (index 11)
638+
time_first_after = await strategy.next_request_time(0)
639+
expected_first_after = start_time + rampup_duration + (11 - n_rampup) / rate
640+
assert time_first_after == pytest.approx(
641+
expected_first_after, rel=1e-6
642+
), (
643+
f"First after rampup: expected {expected_first_after}, "
644+
f"got {time_first_after}"
645+
)
646+
647+
# Verify the transition is smooth (no gap)
648+
# The last rampup request should be at rampup_duration
649+
assert time_last_rampup == pytest.approx(
650+
start_time + rampup_duration, rel=1e-6
651+
), "Last rampup should be at end of rampup period"
652+
476653

477654
class TestAsyncPoissonStrategy:
478655
@pytest.fixture(

0 commit comments

Comments
 (0)