@@ -41,15 +41,14 @@ class GpuConfig(BaseModel):
4141 model_config = ConfigDict (frozen = True , extra = "forbid" )
4242
4343 name : str
44- zone : str # default zone; flex-start capacity varies across regions, so this
45- # gets overridden via the --zone flag when the default is dry.
44+ zone : str = "us-central1-a"
4645 machine_type : str
47- accelerator : str | None # None for *-ddp variants — accelerators are built
48- # into the machine type, so passing --accelerator is redundant/erroneous.
46+ accelerator : str | None # None for *-ddp variants b/c accelerators are built into the machine type
4947 max_run : str
5048 install_nvidia_driver : bool
5149 reservation_affinity : Literal ["none" , "any" ]
52- wait : bool # whether to block locally on instance creation. False adds --async.
50+ wait_for_instance_creation : bool
51+ is_for_training : bool
5352
5453
5554gpu_type_to_config : dict [GpuType , GpuConfig ] = {
@@ -61,7 +60,7 @@ class GpuConfig(BaseModel):
6160 max_run = "86400s" ,
6261 install_nvidia_driver = False ,
6362 reservation_affinity = "any" ,
64- wait = True , # L4s come up fast; block so errors surface promptly
63+ wait_for_instance_creation = True , # L4s come up fast. Block so errors surface promptly
6564 ),
6665 "h100" : GpuConfig (
6766 name = "grouping-trainer-h100" ,
@@ -71,7 +70,7 @@ class GpuConfig(BaseModel):
7170 max_run = "86400s" ,
7271 install_nvidia_driver = True ,
7372 reservation_affinity = "none" ,
74- wait = False , # flex-start can queue for up to 1h; don't block the shell
73+ wait_for_instance_creation = False , # flex-start can queue for up to 1h
7574 ),
7675 "h100-ddp" : GpuConfig (
7776 name = "grouping-trainer-h100-ddp" ,
@@ -81,7 +80,7 @@ class GpuConfig(BaseModel):
8180 max_run = "172800s" ,
8281 install_nvidia_driver = True ,
8382 reservation_affinity = "none" ,
84- wait = False ,
83+ wait_for_instance_creation = False ,
8584 ),
8685 "a100" : GpuConfig (
8786 name = "grouping-trainer-a100" ,
@@ -91,7 +90,7 @@ class GpuConfig(BaseModel):
9190 max_run = "86400s" ,
9291 install_nvidia_driver = True ,
9392 reservation_affinity = "none" ,
94- wait = False ,
93+ wait_for_instance_creation = False ,
9594 ),
9695 "a100-ddp" : GpuConfig (
9796 name = "grouping-trainer-a100-ddp" ,
@@ -101,7 +100,7 @@ class GpuConfig(BaseModel):
101100 max_run = "172800s" ,
102101 install_nvidia_driver = True ,
103102 reservation_affinity = "none" ,
104- wait = False ,
103+ wait_for_instance_creation = False ,
105104 ),
106105}
107106
0 commit comments