2323from cloudai .models .workload import CmdArgs , TestDefinition
2424
2525
26- class FrontendArgs (BaseModel ):
27- """Arguments for the frontend node ."""
26+ class CommonConfig (BaseModel ):
27+ """Common configuration shared across components ."""
2828
2929 model_config = ConfigDict (extra = "forbid" , populate_by_name = True )
3030
31- endpoint : str = "dynamo.Processor.chat/completions"
32- port : int = 8000
33- port_etcd : int = 2379
34- port_nats : int = 4222
31+ model : str
32+ kv_transfer_config : str = Field ('{"kv_connector":"NixlConnector","kv_role":"kv_both"}' , alias = "kv-transfer-config" )
33+ served_model_name : str
3534
3635
37- class ProcessorArgs (BaseModel ):
38- """Arguments for the processor node."""
36+ class FrontendArgs (BaseModel ):
37+ """Arguments for the frontend node."""
3938
4039 model_config = ConfigDict (extra = "forbid" , populate_by_name = True )
4140
42- block_size : int = Field (64 , alias = "block-size" )
43- max_model_len : int = Field (8192 , alias = "max-model-len" )
44- router : str = "kv"
41+ endpoint : str = "dynamo.SimpleLoadBalancer.generate_disagg"
42+ port : int = 8000
43+ port_etcd : int = 2379
44+ port_nats : int = 4222
4545
4646
47- class RouterArgs (BaseModel ):
48- """Arguments for the router ."""
47+ class SimpleLoadBalancerArgs (BaseModel ):
48+ """Arguments for the load balancer ."""
4949
5050 model_config = ConfigDict (extra = "forbid" , populate_by_name = True )
5151
52- min_workers : int = Field ( 1 , alias = "min-workers" )
52+ enable_disagg : bool = True
5353
5454
55- class PrefillWorkerArgs (BaseModel ):
56- """Arguments for the prefill worker node ."""
55+ class WorkerBaseArgs (BaseModel ):
56+ """Base arguments for VLLM workers ."""
5757
5858 model_config = ConfigDict (extra = "forbid" , populate_by_name = True )
5959
6060 num_nodes : Union [int , list [int ]]
61- kv_transfer_config : str = Field ('{"kv_connector":"DynamoNixlConnector"}' , alias = "kv-transfer-config" )
62- block_size : int = Field (64 , alias = "block-size" )
63- max_model_len : int = Field (8192 , alias = "max-model-len" )
64- max_num_seqs : int = Field (16 , alias = "max-num-seqs" )
65- gpu_memory_utilization : float = Field (0.95 , alias = "gpu-memory-utilization" )
66- tensor_parallel_size : Union [int , list [int ]] = Field (8 , alias = "tensor-parallel-size" )
67- pipeline_parallel_size : Union [int , list [int ]] = Field (1 , alias = "pipeline-parallel-size" )
68- quantization : Optional [str ] = None
6961 service_args : dict = Field ({"workers" : 1 , "resources" : {"gpu" : "8" }}, alias = "ServiceArgs" )
62+ gpu_memory_utilization : float = Field (0.7 , alias = "gpu-memory-utilization" )
63+ tensor_parallel_size : int = Field (8 , alias = "tensor-parallel-size" )
64+ pipeline_parallel_size : int = Field (1 , alias = "pipeline-parallel-size" )
65+ enforce_eager : bool = Field (True , alias = "enforce-eager" )
7066
7167
72- class VllmWorkerArgs ( BaseModel ):
73- """Arguments for the VllmWorker node ."""
68+ class PrefillWorkerArgs ( WorkerBaseArgs ):
69+ """Arguments for the VLLM prefill worker ."""
7470
75- model_config = ConfigDict ( extra = "forbid" , populate_by_name = True )
71+ pass
7672
77- num_nodes : Union [int , list [int ]]
78- kv_transfer_config : str = Field ('{"kv_connector":"DynamoNixlConnector"}' , alias = "kv-transfer-config" )
79- block_size : int = Field (64 , alias = "block-size" )
80- max_model_len : int = Field (8192 , alias = "max-model-len" )
81- max_num_seqs : int = Field (16 , alias = "max-num-seqs" )
82- remote_prefill : bool = Field (True , alias = "remote-prefill" )
83- conditional_disagg : bool = Field (True , alias = "conditional-disagg" )
84- max_local_prefill_length : int = Field (10 , alias = "max-local-prefill-length" )
85- max_prefill_queue_size : int = Field (2 , alias = "max-prefill-queue-size" )
86- gpu_memory_utilization : float = Field (0.95 , alias = "gpu-memory-utilization" )
87- tensor_parallel_size : Union [int , list [int ]] = Field (8 , alias = "tensor-parallel-size" )
88- pipeline_parallel_size : Union [int , list [int ]] = Field (1 , alias = "pipeline-parallel-size" )
89- router : str = "kv"
90- quantization : Optional [str ] = None
91- enable_prefix_caching : bool = Field (True , alias = "enable-prefix-caching" )
92- service_args : dict = Field ({"workers" : 1 , "resources" : {"gpu" : "8" }}, alias = "ServiceArgs" )
73+
74+ class DecodeWorkerArgs (WorkerBaseArgs ):
75+ """Arguments for the VLLM decode worker."""
76+
77+ pass
9378
9479
9580class AIDynamoArgs (BaseModel ):
9681 """Arguments for AI Dynamo setup."""
9782
9883 model_config = ConfigDict (extra = "forbid" )
9984
100- frontend : FrontendArgs = FrontendArgs ( port_etcd = 2379 , port_nats = 4222 )
101- processor : ProcessorArgs = ProcessorArgs ( ** { "block-size" : 64 , "max-model-len" : 8192 , "router" : "kv" } )
102- router : RouterArgs = RouterArgs ( ** { "min-workers" : 1 } )
85+ common : CommonConfig
86+ frontend : FrontendArgs = FrontendArgs ( )
87+ simple_load_balancer : SimpleLoadBalancerArgs = SimpleLoadBalancerArgs ( )
10388 prefill_worker : PrefillWorkerArgs
104- vllm_worker : VllmWorkerArgs
89+ decode_worker : DecodeWorkerArgs
10590
10691
10792class GenAIPerfArgs (BaseModel ):
@@ -132,10 +117,13 @@ class AIDynamoCmdArgs(CmdArgs):
132117 """Arguments for AI Dynamo."""
133118
134119 docker_image_url : str
135- served_model_name : str
120+ huggingface_home_host_path : Path = Path .home () / ".cache/huggingface"
121+ huggingface_home_container_path : Path = Path ("/root/.cache/huggingface" )
136122 dynamo : AIDynamoArgs
137123 sleep_seconds : int = 660
138124 genai_perf : GenAIPerfArgs
125+ node_setup_cmd : str = ""
126+ extra_args : str = ""
139127
140128
141129class AIDynamoTestDefinition (TestDefinition ):
@@ -155,11 +143,8 @@ def installables(self) -> List[Installable]:
155143 return [self .docker_image ]
156144
157145 @property
158- def hugging_face_home_path (self ) -> Path :
159- raw = self .extra_env_vars .get ("HF_HOME" )
160- if not isinstance (raw , str ) or not raw .strip ():
161- raise ValueError ("HF_HOME must be set and non-empty" )
162- path = Path (raw )
146+ def huggingface_home_host_path (self ) -> Path :
147+ path = Path (self .cmd_args .huggingface_home_host_path )
163148 if not path .is_dir ():
164- raise FileNotFoundError (f"HF_HOME path not found at { path } " )
149+ raise FileNotFoundError (f"HuggingFace home path not found at { path } " )
165150 return path
0 commit comments