Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
825 changes: 79 additions & 746 deletions pylock.toml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ keywords = [
dependencies = [
"click>=8.0.0,<8.2.0",
"culsans~=0.9.0",
"aiologic~=0.14.0",
"datasets",
"eval_type_backport",
"faker",
Expand Down
10 changes: 10 additions & 0 deletions src/guidellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,16 @@ def benchmark():
default=BenchmarkGenerativeTextArgs.get_default("max_global_error_rate"),
help="Maximum global error rate across all benchmarks.",
)
@click.option(
"--stop-over-saturated",
type=bool,
default=BenchmarkGenerativeTextArgs.get_default("stop_over_saturated"),
help=(
"Set this flag to stop the benchmark if the model is over-saturated. "
"Defaults to False."
),
is_flag=True,
)
def run(**kwargs):
request_type = kwargs.pop("request_type", None)
request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)
Expand Down
4 changes: 4 additions & 0 deletions src/guidellm/benchmark/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ async def resolve_profile(
max_errors: int | None,
max_error_rate: float | None,
max_global_error_rate: float | None,
stop_over_saturated: bool | None = None,
console: Console | None = None,
) -> Profile:
"""
Expand All @@ -281,6 +282,7 @@ async def resolve_profile(
:param max_errors: Maximum number of errors before stopping
:param max_error_rate: Maximum error rate threshold before stopping
:param max_global_error_rate: Maximum global error rate threshold before stopping
:param stop_over_saturated: Whether to stop if over-saturation is detected
:param console: Console instance for progress reporting, or None
:return: Configured Profile instance ready for benchmarking
:raises ValueError: If constraints are provided with a pre-configured Profile
Expand All @@ -297,6 +299,7 @@ async def resolve_profile(
"max_errors": max_errors,
"max_error_rate": max_error_rate,
"max_global_error_rate": max_global_error_rate,
"stop_over_saturated": stop_over_saturated,
}.items():
if val is not None:
constraints[key] = val
Expand Down Expand Up @@ -412,6 +415,7 @@ async def benchmark_generative_text(
max_errors=args.max_errors,
max_error_rate=args.max_error_rate,
max_global_error_rate=args.max_global_error_rate,
stop_over_saturated=args.stop_over_saturated,
console=console,
)
output_formats = await resolve_output_formats(
Expand Down
5 changes: 2 additions & 3 deletions src/guidellm/benchmark/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Generic, Literal

from rich.console import Group
Expand All @@ -42,7 +41,7 @@
GenerativeBenchmark,
)
from guidellm.scheduler import SchedulerState, SchedulingStrategy, StrategyType
from guidellm.utils import Colors, format_value_display
from guidellm.utils import Colors, format_value_display, safe_format_timestamp

__all__ = ["BenchmarkerProgress", "GenerativeConsoleBenchmarkerProgress"]

Expand Down Expand Up @@ -383,7 +382,7 @@ def formatted_start_time(self) -> str:
if self.start_time < 0.0:
return "--:--:--"

return datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S")
return safe_format_timestamp(self.start_time, format_="%H:%M:%S")

@property
def formatted_progress_status(self) -> str:
Expand Down
4 changes: 4 additions & 0 deletions src/guidellm/benchmark/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1952,6 +1952,10 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
max_global_error_rate: float | None = Field(
default=None, description="Maximum global error rate (0-1) before stopping"
)
stop_over_saturated: bool | None = Field(
default=None,
description="Whether to stop the benchmark if the model is over-saturated",
)

@field_validator("data", "data_args", "rate", mode="wrap")
@classmethod
Expand Down
6 changes: 6 additions & 0 deletions src/guidellm/scheduler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
MaxErrorsConstraint,
MaxGlobalErrorRateConstraint,
MaxNumberConstraint,
OverSaturationConstraint,
OverSaturationConstraintInitializer,
OverSaturationDetector,
PydanticConstraintInitializer,
SerializableConstraintInitializer,
UnserializableConstraintInitializer,
Expand Down Expand Up @@ -66,6 +69,9 @@
"MaxNumberConstraint",
"MultiTurnRequestT",
"NonDistributedEnvironment",
"OverSaturationConstraint",
"OverSaturationConstraintInitializer",
"OverSaturationDetector",
"PydanticConstraintInitializer",
"RequestT",
"ResponseT",
Expand Down
51 changes: 51 additions & 0 deletions src/guidellm/scheduler/constraints/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
Constraint system for scheduler behavior control and request processing limits.

Provides flexible constraints for managing scheduler behavior with configurable
thresholds based on time, error rates, and request counts. Constraints evaluate
scheduler state and individual requests to determine whether processing should
continue or stop based on predefined limits. The constraint system enables
sophisticated benchmark stopping criteria through composable constraint types.
"""

from .base import (
PydanticConstraintInitializer,
UnserializableConstraintInitializer,
)
from .factory import ConstraintsInitializerFactory
from .over_saturation import (
OverSaturationConstraint,
OverSaturationConstraintInitializer,
OverSaturationDetector,
)
from .protocols import (
Constraint,
ConstraintInitializer,
SerializableConstraintInitializer,
)
from .standard import (
MaxDurationConstraint,
MaxErrorRateConstraint,
MaxErrorsConstraint,
MaxGlobalErrorRateConstraint,
MaxNumberConstraint,
RequestsExhaustedConstraint,
)

__all__ = [
"Constraint",
"ConstraintInitializer",
"ConstraintsInitializerFactory",
"MaxDurationConstraint",
"MaxErrorRateConstraint",
"MaxErrorsConstraint",
"MaxGlobalErrorRateConstraint",
"MaxNumberConstraint",
"OverSaturationConstraint",
"OverSaturationConstraintInitializer",
"OverSaturationDetector",
"PydanticConstraintInitializer",
"RequestsExhaustedConstraint",
"SerializableConstraintInitializer",
"UnserializableConstraintInitializer",
]
139 changes: 139 additions & 0 deletions src/guidellm/scheduler/constraints/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""
Base classes for constraint initializers.

Provides abstract base classes and utility classes for creating constraint
initializers with Pydantic validation and serialization support.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, Literal

from pydantic import Field

from guidellm.scheduler.schemas import SchedulerState, SchedulerUpdateAction
from guidellm.schemas import RequestInfo
from guidellm.utils import InfoMixin, StandardBaseModel

from .protocols import Constraint

__all__ = [
"PydanticConstraintInitializer",
"UnserializableConstraintInitializer",
]


class PydanticConstraintInitializer(StandardBaseModel, ABC, InfoMixin):
"""
Abstract base for Pydantic-based constraint initializers.

Provides standardized serialization, validation, and metadata handling for
constraint initializers using Pydantic models. Subclasses implement specific
constraint creation logic while inheriting validation and persistence support.
"""

type_: str = Field(description="Type identifier for the constraint initializer")

@property
def info(self) -> dict[str, Any]:
"""
Extract serializable information from this constraint initializer.

:return: Dictionary containing constraint configuration and metadata
"""
return self.model_dump()

@classmethod
@abstractmethod
def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
"""
Validate and process arguments for constraint creation.

Must be implemented by subclasses to handle their specific parameter patterns
and validation requirements.

:param args: Positional arguments passed to the constraint
:param kwargs: Keyword arguments passed to the constraint
:return: Validated dictionary of parameters for constraint creation
:raises NotImplementedError: Must be implemented by subclasses
"""
...

@abstractmethod
def create_constraint(self, **kwargs) -> Constraint:
"""
Create a constraint instance.

Must be implemented by subclasses to return their specific constraint type
with appropriate configuration and validation.

:param kwargs: Additional keyword arguments (usually unused)
:return: Configured constraint instance
:raises NotImplementedError: Must be implemented by subclasses
"""
...


class UnserializableConstraintInitializer(PydanticConstraintInitializer):
"""
Placeholder for constraints that cannot be serialized or executed.

Represents constraint initializers that failed serialization or contain
non-serializable components. Cannot be executed and raises errors when
invoked to prevent runtime failures from invalid constraint state.
"""

type_: Literal["unserializable"] = "unserializable" # type: ignore[assignment]
orig_info: dict[str, Any] = Field(
default_factory=dict,
description="Original constraint information before serialization failure",
)

@classmethod
def validated_kwargs(
cls,
orig_info: dict[str, Any] | None = None,
**kwargs, # noqa: ARG003
) -> dict[str, Any]:
"""
Validate arguments for unserializable constraint creation.

:param orig_info: Original constraint information before serialization failure
:param kwargs: Additional arguments (ignored)
:return: Validated parameters for unserializable constraint creation
"""
return {"orig_info": orig_info or {}}

def create_constraint(
self,
**kwargs, # noqa: ARG002
) -> Constraint:
"""
Raise error for unserializable constraint creation attempt.

:param kwargs: Additional keyword arguments (unused)
:raises RuntimeError: Always raised since unserializable constraints
cannot be executed
"""
raise RuntimeError(
"Cannot create constraint from unserializable constraint instance. "
"This constraint cannot be serialized and therefore cannot be executed."
)

def __call__(
self,
state: SchedulerState, # noqa: ARG002
request: RequestInfo, # noqa: ARG002
) -> SchedulerUpdateAction:
"""
Raise error since unserializable constraints cannot be invoked.

:param state: Current scheduler state (unused)
:param request: Individual request information (unused)
:raises RuntimeError: Always raised for unserializable constraints
"""
raise RuntimeError(
"Cannot invoke unserializable constraint instance. "
"This constraint was not properly serialized and cannot be executed."
)
Loading
Loading