File tree Expand file tree Collapse file tree 12 files changed +213
-195
lines changed Expand file tree Collapse file tree 12 files changed +213
-195
lines changed Original file line number Diff line number Diff line change 3232 GenerationRequest ,
3333 GenerativeRequestLoaderDescription ,
3434 RequestLoaderDescription ,
35+ RequestT ,
36+ ResponseT ,
3537)
3638from guidellm .scheduler import (
3739 GenerativeRequestsWorkerDescription ,
38- RequestT ,
39- ResponseT ,
4040 SchedulerRequestResult ,
4141 WorkerDescription ,
4242)
Original file line number Diff line number Diff line change 2727 GenerationRequest ,
2828 GenerativeRequestLoaderDescription ,
2929 RequestLoaderDescription ,
30+ RequestT ,
31+ ResponseT ,
3032)
3133from guidellm .scheduler import (
3234 GenerativeRequestsWorker ,
3335 RequestsWorker ,
34- RequestT ,
35- ResponseT ,
3636 Scheduler ,
3737 SchedulerRequestResult ,
3838 SchedulingStrategy ,
Original file line number Diff line number Diff line change 11import json
2+ import os
23from collections .abc import Sequence
34from enum import Enum
45from typing import Literal , Optional
@@ -131,8 +132,12 @@ class Settings(BaseSettings):
131132
132133 # Scheduler settings
133134 max_concurrency : int = 512
134- max_worker_processes : int = 10
135- max_add_requests_per_loop : int = 20
135+ max_worker_processes : int = Field (
136+ # use number of CPUs - 1, but at least 10
137+ default_factory = lambda : max ((os .cpu_count () or 1 ) - 1 , 10 )
138+ )
139+ min_queued_requests : int = 20
140+ scheduler_start_delay : float = 5
136141
137142 # Data settings
138143 dataset : DatasetSettings = DatasetSettings ()
Original file line number Diff line number Diff line change 55 RequestLoaderDescription ,
66)
77from .request import GenerationRequest
8+ from .types import RequestT , ResponseT
89
910__all__ = [
1011 "GenerationRequest" ,
1112 "GenerativeRequestLoader" ,
1213 "GenerativeRequestLoaderDescription" ,
1314 "RequestLoader" ,
1415 "RequestLoaderDescription" ,
16+ "RequestT" ,
17+ "ResponseT" ,
1518]
Original file line number Diff line number Diff line change @@ -30,10 +30,10 @@ class RequestLoaderDescription(StandardBaseModel):
3030
3131class RequestLoader (Iterable ):
3232 @abstractmethod
33- def __iter__ (self ): ...
33+ def __iter__ (self ) -> Iterator : ...
3434
3535 @abstractmethod
36- def __len__ (self ): ...
36+ def __len__ (self ) -> int : ...
3737
3838 @property
3939 @abstractmethod
Original file line number Diff line number Diff line change 11from typing import TypeVar
22
3- __all__ = ["RequestT" , "ResponseT" ]
3+ __all__ = [
4+ "RequestT" ,
5+ "ResponseT" ,
6+ ]
47
58
69RequestT = TypeVar ("RequestT" )
Original file line number Diff line number Diff line change 1515 ThroughputStrategy ,
1616 strategy_display_str ,
1717)
18- from .types import RequestT , ResponseT
1918from .worker import (
2019 GenerativeRequestsWorker ,
2120 GenerativeRequestsWorkerDescription ,
2221 RequestsWorker ,
2322 ResolveStatus ,
2423 WorkerDescription ,
25- WorkerProcessRequest ,
2624 WorkerProcessResult ,
2725)
2826
3230 "ConcurrentStrategy" ,
3331 "GenerativeRequestsWorker" ,
3432 "GenerativeRequestsWorkerDescription" ,
35- "RequestT" ,
3633 "RequestsWorker" ,
3734 "ResolveStatus" ,
38- "ResponseT" ,
3935 "Scheduler" ,
4036 "SchedulerRequestInfo" ,
4137 "SchedulerRequestResult" ,
4642 "SynchronousStrategy" ,
4743 "ThroughputStrategy" ,
4844 "WorkerDescription" ,
49- "WorkerProcessRequest" ,
5045 "WorkerProcessResult" ,
5146 "strategy_display_str" ,
5247]
Original file line number Diff line number Diff line change 1+ """
2+ Helper module for importing the correct queue types.
3+ """
4+
5+ from dataclasses import dataclass
6+ from queue import Empty as QueueEmpty
7+ from queue import Full as QueueFull
8+ from queue import Queue
9+ from typing import Generic
10+
11+ from guidellm .request .types import RequestT , ResponseT
12+ from guidellm .scheduler .result import WorkerProcessRequest , WorkerProcessResult
13+
14+ __all__ = [
15+ "MPQueues" ,
16+ "Queue" ,
17+ "QueueEmpty" ,
18+ "QueueFull" ,
19+ ]
20+
21+
22+ @dataclass
23+ class MPQueues (Generic [RequestT , ResponseT ]):
24+ requests : Queue [WorkerProcessRequest [RequestT , ResponseT ]]
25+ responses : Queue [WorkerProcessResult [RequestT , ResponseT ]]
Original file line number Diff line number Diff line change 1+ from dataclasses import dataclass
12from typing import (
23 Generic ,
34 Literal ,
45 Optional ,
56)
67
78from guidellm .objects import StandardBaseModel
9+ from guidellm .request .types import RequestT , ResponseT
810from guidellm .scheduler .strategy import SchedulingStrategy
9- from guidellm .scheduler .types import RequestT , ResponseT
1011
1112__all__ = [
1213 "SchedulerRequestInfo" ,
1314 "SchedulerRequestResult" ,
1415 "SchedulerResult" ,
1516 "SchedulerRunInfo" ,
17+ "WorkerProcessRequest" ,
18+ "WorkerProcessResult" ,
1619]
1720
1821
@@ -135,3 +138,18 @@ class SchedulerRequestResult(
135138 request : RequestT
136139 request_info : SchedulerRequestInfo
137140 response : Optional [ResponseT ] = None
141+
142+
143+ @dataclass
144+ class WorkerProcessRequest (Generic [RequestT , ResponseT ]):
145+ request : RequestT
146+ timeout_time : float
147+ queued_time : float
148+
149+
150+ @dataclass
151+ class WorkerProcessResult (Generic [RequestT , ResponseT ]):
152+ type_ : Literal ["request_scheduled" , "request_start" , "request_complete" ]
153+ request : RequestT
154+ response : Optional [ResponseT ]
155+ info : SchedulerRequestInfo
You can’t perform that action at this time.
0 commit comments