diff --git a/infra/marin-eu-west4-a.yaml b/infra/marin-eu-west4-a.yaml index 80c8153322..746b4034a6 100644 --- a/infra/marin-eu-west4-a.yaml +++ b/infra/marin-eu-west4-a.yaml @@ -194,7 +194,7 @@ available_node_types: tpu_slice_v6e_128: max_workers: 1024 - min_workers: 2 + min_workers: 1 node_config: acceleratorType: v6e-128 runtimeVersion: v2-alpha-tpuv6e diff --git a/infra/marin-eu-west4-vllm.yaml b/infra/marin-eu-west4-vllm.yaml index 631bfd34d3..f5899ea5cc 100644 --- a/infra/marin-eu-west4-vllm.yaml +++ b/infra/marin-eu-west4-vllm.yaml @@ -129,7 +129,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 2 + min_workers: 1 node_config: acceleratorType: v5litepod-4 runtimeVersion: v2-alpha-tpuv5-lite diff --git a/infra/marin-eu-west4.yaml b/infra/marin-eu-west4.yaml index 2db49b62c8..f98fa541eb 100644 --- a/infra/marin-eu-west4.yaml +++ b/infra/marin-eu-west4.yaml @@ -134,7 +134,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 4 + min_workers: 2 node_config: acceleratorType: v5litepod-4 runtimeVersion: v2-alpha-tpuv5-lite @@ -194,7 +194,7 @@ available_node_types: tpu_slice_v5e_128: max_workers: 1024 - min_workers: 1 + min_workers: 0 node_config: acceleratorType: v5litepod-128 runtimeVersion: v2-alpha-tpuv5-lite diff --git a/infra/marin-us-central1-vllm.yaml b/infra/marin-us-central1-vllm.yaml index 4af3805be7..6400344e58 100644 --- a/infra/marin-us-central1-vllm.yaml +++ b/infra/marin-us-central1-vllm.yaml @@ -129,7 +129,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 1 + min_workers: 0 node_config: acceleratorType: v5p-8 runtimeVersion: v2-alpha-tpuv5 @@ -141,7 +141,7 @@ available_node_types: tpu_slice_v5p_8: max_workers: 1024 - min_workers: 2 + min_workers: 1 node_config: acceleratorType: v5p-8 runtimeVersion: v2-alpha-tpuv5 diff --git a/infra/marin-us-central1.yaml b/infra/marin-us-central1.yaml index 0675cfcc3d..637f2cecb6 100644 --- a/infra/marin-us-central1.yaml +++ b/infra/marin-us-central1.yaml @@ -134,7 +134,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 1 + min_workers: 0 node_config: acceleratorType: v5p-8 runtimeVersion: v2-alpha-tpuv5 @@ -146,7 +146,7 @@ available_node_types: tpu_slice_v5p_8: max_workers: 1024 - min_workers: 12 + min_workers: 6 node_config: acceleratorType: v5p-8 runtimeVersion: v2-alpha-tpuv5 @@ -158,7 +158,7 @@ available_node_types: tpu_slice_v5p_16: max_workers: 1024 - min_workers: 1 + min_workers: 0 node_config: acceleratorType: v5p-16 runtimeVersion: v2-alpha-tpuv5 @@ -170,7 +170,7 @@ available_node_types: tpu_slice_v5p_32: max_workers: 1024 - min_workers: 1 + min_workers: 0 node_config: acceleratorType: v5p-32 runtimeVersion: v2-alpha-tpuv5 @@ -182,7 +182,7 @@ available_node_types: tpu_slice_v5p_64: max_workers: 1024 - min_workers: 1 + min_workers: 0 node_config: acceleratorType: v5p-64 runtimeVersion: v2-alpha-tpuv5 diff --git a/infra/marin-us-central2-staging.yaml b/infra/marin-us-central2-staging.yaml index 6c670c215e..7bae1595e9 100644 --- a/infra/marin-us-central2-staging.yaml +++ b/infra/marin-us-central2-staging.yaml @@ -134,7 +134,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 4 + min_workers: 2 node_config: acceleratorType: v4-8 runtimeVersion: tpu-ubuntu2204-base diff --git a/infra/marin-us-central2-vllm.yaml b/infra/marin-us-central2-vllm.yaml index f0dc439dbd..6ac5742b45 100644 --- a/infra/marin-us-central2-vllm.yaml +++ b/infra/marin-us-central2-vllm.yaml @@ -129,7 +129,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 2 + min_workers: 1 node_config: acceleratorType: v4-8 runtimeVersion: tpu-ubuntu2204-base diff --git a/infra/marin-us-central2.yaml b/infra/marin-us-central2.yaml index 235ab87d1e..f5210635da 100644 --- a/infra/marin-us-central2.yaml +++ b/infra/marin-us-central2.yaml @@ -134,7 +134,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 4 + min_workers: 2 node_config: acceleratorType: v4-8 runtimeVersion: tpu-ubuntu2204-base diff --git a/infra/marin-us-east1-d-vllm.yaml b/infra/marin-us-east1-d-vllm.yaml index 9337f39191..053afc8cec 100644 --- a/infra/marin-us-east1-d-vllm.yaml +++ b/infra/marin-us-east1-d-vllm.yaml @@ -129,7 +129,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 2 + min_workers: 1 node_config: acceleratorType: v6e-8 runtimeVersion: v2-alpha-tpuv6e diff --git a/infra/marin-us-east5-a-vllm.yaml b/infra/marin-us-east5-a-vllm.yaml index 694ac7a3d8..7611f45cd8 100644 --- a/infra/marin-us-east5-a-vllm.yaml +++ b/infra/marin-us-east5-a-vllm.yaml @@ -129,7 +129,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 1 + min_workers: 0 node_config: acceleratorType: v5p-8 runtimeVersion: v2-alpha-tpuv5 @@ -141,7 +141,7 @@ available_node_types: tpu_slice_v5p_8: max_workers: 1024 - min_workers: 2 + min_workers: 1 node_config: acceleratorType: v5p-8 runtimeVersion: v2-alpha-tpuv5 diff --git a/infra/marin-us-east5-a.yaml b/infra/marin-us-east5-a.yaml index 25de113c61..18aa896c3b 100644 --- a/infra/marin-us-east5-a.yaml +++ b/infra/marin-us-east5-a.yaml @@ -134,7 +134,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 8 + min_workers: 4 node_config: acceleratorType: v5p-8 runtimeVersion: v2-alpha-tpuv5 @@ -146,7 +146,7 @@ available_node_types: tpu_slice_v5p_8: max_workers: 1024 - min_workers: 8 + min_workers: 4 node_config: acceleratorType: v5p-8 runtimeVersion: v2-alpha-tpuv5 diff --git a/infra/marin-us-east5-b-vllm.yaml b/infra/marin-us-east5-b-vllm.yaml index 92de641efd..6bae558f9b 100644 --- a/infra/marin-us-east5-b-vllm.yaml +++ b/infra/marin-us-east5-b-vllm.yaml @@ -129,7 +129,7 @@ available_node_types: sourceImage: projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts tpu_worker: max_workers: 1024 - min_workers: 2 + min_workers: 1 node_config: acceleratorType: v6e-8 runtimeVersion: v2-alpha-tpuv6e diff --git a/lib/iris/examples/marin-dev.yaml b/lib/iris/examples/marin-dev.yaml index 2bc858bccd..5aa2b73f5a 100644 --- a/lib/iris/examples/marin-dev.yaml +++ b/lib/iris/examples/marin-dev.yaml @@ -29,7 +29,7 @@ controller: image: ghcr.io/marin-community/iris-controller:latest gcp: zone: us-central1-a - machine_type: e2-standard-4 + machine_type: e2-highmem-4 boot_disk_size_gb: 100 port: 10000 diff --git a/lib/iris/examples/marin.yaml b/lib/iris/examples/marin.yaml index d4c4e7dc3d..d88108d7bd 100644 --- a/lib/iris/examples/marin.yaml +++ b/lib/iris/examples/marin.yaml @@ -28,7 +28,7 @@ controller: image: ghcr.io/marin-community/iris-controller:latest gcp: zone: us-central1-a - machine_type: e2-standard-4 + machine_type: e2-highmem-4 boot_disk_size_gb: 100 port: 10000 @@ -42,7 +42,7 @@ scale_groups: priority: 1000 resources: { cpu: 2, ram: 16GB, disk: 100GB, device_type: cpu, preemptible: false } min_slices: 0 - max_slices: 1 + max_slices: 4 slice_template: gcp: mode: GCP_SLICE_MODE_VM @@ -57,7 +57,7 @@ scale_groups: num_vms: 1 priority: 10 resources: { cpu: 112, ram: 192GB, disk: 100GB, device_type: tpu, device_variant: v5litepod-4, device_count: 4, preemptible: true } - min_slices: 1 + min_slices: 3 max_slices: 1024 slice_template: gcp: @@ -107,7 +107,7 @@ scale_groups: num_vms: 32 priority: 60 resources: { cpu: 112, ram: 192GB, disk: 100GB, device_type: tpu, device_variant: v5litepod-128, device_count: 4, preemptible: true } - min_slices: 0 + min_slices: 1 max_slices: 32 slice_template: gcp: @@ -182,7 +182,7 @@ scale_groups: num_vms: 32 priority: 60 resources: { cpu: 180, ram: 720GB, disk: 100GB, device_type: tpu, device_variant: v6e-128, device_count: 4, preemptible: true } - min_slices: 0 + min_slices: 1 max_slices: 32 slice_template: gcp: @@ -208,7 +208,7 @@ scale_groups: num_vms: 1 priority: 20 resources: { cpu: 208, ram: 448GB, disk: 100GB, device_type: tpu, device_variant: v5p-8, device_count: 4, preemptible: true } - min_slices: 0 + min_slices: 8 max_slices: 2048 slice_template: gcp: @@ -303,7 +303,7 @@ scale_groups: num_vms: 1 priority: 20 resources: { cpu: 240, ram: 400GB, disk: 100GB, device_type: tpu, device_variant: v4-8, device_count: 4, preemptible: true } - min_slices: 0 + min_slices: 2 max_slices: 2048 slice_template: gcp: