feat(prover): Add hysteresis for autoscaler targets for smoother scaling (#4142)

yorik · web-flow · commit 97ae45d6c306 · 2025-06-10T08:18:07.000Z
## What ❔
Add hysteresis for autoscaler targets for smoother scaling.

&lt;!-- What are the changes this PR brings about? --&gt;
&lt;!-- Example: This PR adds a PR template to the repo. --&gt;
&lt;!-- (For bigger PRs adding more context is appreciated) --&gt;

## Why ❔

To reduce number of pods which processing the queue very short time or
just getting up and down.
&lt;!-- Why are these changes done? What goal do they contribute to? What
are the principles behind them? --&gt;
&lt;!-- The `Why` has to be clear to non-Matter Labs entities running their
own ZK Chain --&gt;
&lt;!-- Example: PR templates ensure PR reviewers, observers, and future
iterators are in context about the evolution of repos. --&gt;

## Is this a breaking change?
- [ ] Yes
- [x] No

## Operational changes
&lt;!-- Any config changes? Any new flags? Any changes to any scripts? --&gt;
&lt;!-- Please add anything that non-Matter Labs entities running their own
ZK Chain may need to know --&gt;

## Checklist

&lt;!-- Check your PR fulfills the following items. --&gt;
&lt;!-- For draft PRs check the boxes as you complete them. --&gt;

- [x] PR title corresponds to the body of PR (we generate changelog
entries from PRs).
- [x] Tests for the changes have been added / updated.
- [x] Documentation comments have been added / updated.
- [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev
lint`.
ref ZKD-2230
diff --git a/prover/crates/bin/prover_autoscaler/README.md b/prover/crates/bin/prover_autoscaler/README.md
@@ -170,6 +170,7 @@ agent_config:
   - `max_replicas` is a map of cluster name to maximum number of replicas. Note: it can be a number of map of GPU types
     to a number.
   - `speed` is a divider for corresponding queue. Note: it can be a number of map of GPU types to a number.
+  - `hysteresis` is a percentage of queue over provisioning for smoother scaling. Meaningful range: 0 to 100.
   - `priority` is an optional field to override global cluster priorities for this target. For GPU targets it's a sorted
     list of `[cluster, gpu]` pairs, for simple targets it's just list of clusters.
 
diff --git a/prover/crates/bin/prover_autoscaler/src/agent.rs b/prover/crates/bin/prover_autoscaler/src/agent.rs
@@ -66,9 +66,7 @@ fn create_agent_router(watcher: Watcher, scaler: Scaler) -> Router {
         .with_state(app)
 }
 
-// TODO: Use
-// https://github.com/matter-labs/zksync-era/blob/9821a20018c367ce246dba656daab5c2e7757973/core/node/api_server/src/healthcheck.rs#L53
-// instead.
+// TODO: Report health only after get initial cluster state.
 async fn health() -> &'static str {
     "Ok\n"
 }
@@ -101,7 +99,7 @@ pub struct ScaleResponse {
     pub scale_result: Vec<String>,
 }
 
-/// To test or forse scale in particular cluster use:
+/// To test or force scale in particular cluster use:
 /// $ curl -X POST -H "Content-Type: application/json" --data '{"deployments": [{"namespace": "prover-red", "name": "witness-vector-generator-spec-9-f", "size":0},{"namespace": "prover-red", "name": "witness-vector-generator-spec-9-c", "size":0}]}' <ip>:8081/scale
 async fn scale(
     State(app): State<App>,
diff --git a/prover/crates/bin/prover_autoscaler/src/config.rs b/prover/crates/bin/prover_autoscaler/src/config.rs
@@ -168,6 +168,12 @@ pub struct ScalerTarget {
     /// For Simple targets, this is a list of ClusterName.
     #[serde(default)]
     pub priority: Option<PriorityConfig>,
+    /// Optional hysteresis value as percentage of the queue to avoid
+    /// oscillations in scaling.
+    /// Valid values are from 0 to 100.
+    /// Default is 0, which means no hysteresis.
+    #[serde(default)]
+    pub hysteresis: usize,
 }
 
 impl WellKnown for ScalerTarget {
diff --git a/prover/crates/bin/prover_autoscaler/src/global/manager.rs b/prover/crates/bin/prover_autoscaler/src/global/manager.rs
@@ -72,6 +72,7 @@ impl Manager {
                         .map(|(k, v)| (k.clone(), v.into_map_gpukey()))
                         .collect(),
                     c.speed.into_map_gpukey(),
+                    c.hysteresis,
                     scaler_config.clone(),
                     c.priority.clone(),
                 ))),
@@ -84,6 +85,7 @@ impl Manager {
                         .map(|(k, v)| (k.clone(), v.into_map_nokey()))
                         .collect(),
                     c.speed.into_map_nokey(),
+                    c.hysteresis,
                     scaler_config.clone(),
                     c.priority.clone(),
                 ))),
diff --git a/prover/crates/bin/prover_autoscaler/src/global/scaler.rs b/prover/crates/bin/prover_autoscaler/src/global/scaler.rs
@@ -58,17 +58,20 @@ pub struct Scaler<K> {
     max_replicas: HashMap<ClusterName, HashMap<K, usize>>,
     // TODO Add default speed for default K
     speed: HashMap<K, usize>,
+    hysteresis: usize,
     config: Arc<ScalerConfig>,
     target_priority: Option<PriorityConfig>,
 }
 
 impl<K: Key> Scaler<K> {
+    #[allow(clippy::too_many_arguments)]
     pub fn new(
         queue_report_field: QueueReportFields,
         deployment: DeploymentName,
         min_replicas: usize,
         max_replicas: HashMap<ClusterName, HashMap<K, usize>>,
         speed: HashMap<K, usize>,
+        hysteresis: usize,
         config: Arc<ScalerConfig>,
         target_priority: Option<PriorityConfig>,
     ) -> Self {
@@ -78,6 +81,7 @@ impl<K: Key> Scaler<K> {
             min_replicas,
             max_replicas,
             speed,
+            hysteresis,
             config,
             target_priority,
         }
@@ -298,7 +302,7 @@ impl<K: Key> Scaler<K> {
         }
 
         // Remove unneeded pods.
-        if (total as usize) > queue {
+        if total as usize - total as usize * self.hysteresis / 100 > queue {
             for cluster in sorted_clusters.iter().rev() {
                 let mut excess_queue = total - self.normalize_queue(cluster.key, queue) as i64;
                 if excess_queue <= 0 {
@@ -481,6 +485,7 @@ mod tests {
             ]
             .into(),
             [(GpuKey(Gpu::L4), 500), (GpuKey(Gpu::T4), 100)].into(),
+            0,
             scaler_config("prover-other"),
             None,
         );
@@ -633,6 +638,7 @@ mod tests {
                 (GpuKey(Gpu::T4), 700),
             ]
             .into(),
+            0,
             scaler_config("prover"),
             None,
         );
@@ -799,6 +805,7 @@ mod tests {
             ]
             .into(),
             [(GpuKey(Gpu::L4), 500), (GpuKey(Gpu::T4), 100)].into(),
+            0,
             scaler_config("prover"),
             None,
         );
@@ -997,6 +1004,7 @@ mod tests {
             ]
             .into(),
             [(GpuKey(Gpu::L4), 500), (GpuKey(Gpu::T4), 100)].into(),
+            0,
             scaler_config("prover"),
             None,
         );
@@ -1117,6 +1125,7 @@ mod tests {
             ]
             .into(),
             [(NoKey(), 10)].into(),
+            0,
             scaler_config(""),
             None,
         );
@@ -1294,6 +1303,7 @@ mod tests {
             ]
             .into(),
             [(GpuKey(Gpu::L4), 1500), (GpuKey(Gpu::H100), 3000)].into(),
+            0,
             scaler_config("prover"),
             target_priority,
         );
@@ -1396,6 +1406,7 @@ mod tests {
             )]
             .into(),
             [(GpuKey(Gpu::L4), 1500), (GpuKey(Gpu::H100), 3000)].into(),
+            0,
             scaler_config("prover"),
             target_priority,
         );
@@ -1477,6 +1488,139 @@ mod tests {
         );
     }
 
+    #[tracing_test::traced_test]
+    #[test]
+    fn test_calculate_hysteresis() {
+        let target_priority = Some(PriorityConfig::Gpu(vec![
+            ("foo".into(), GpuKey(Gpu::L4)),
+            ("foo".into(), GpuKey(Gpu::H100)),
+        ]));
+
+        let scaler = Scaler::new(
+            QueueReportFields::prover_jobs,
+            "circuit-prover-gpu".into(),
+            0,
+            [(
+                "foo".into(),
+                [(GpuKey(Gpu::L4), 50), (GpuKey(Gpu::H100), 10)].into(),
+            )]
+            .into(),
+            [(GpuKey(Gpu::L4), 1500), (GpuKey(Gpu::H100), 3000)].into(),
+            50,
+            scaler_config("prover"),
+            target_priority.clone(),
+        );
+
+        let scaler2 = Scaler::new(
+            QueueReportFields::prover_jobs,
+            "circuit-prover-gpu".into(),
+            0,
+            [(
+                "foo".into(),
+                [(GpuKey(Gpu::L4), 50), (GpuKey(Gpu::H100), 10)].into(),
+            )]
+            .into(),
+            [(GpuKey(Gpu::L4), 1500), (GpuKey(Gpu::H100), 3000)].into(),
+            0,
+            scaler_config("prover"),
+            target_priority,
+        );
+
+        let clusters = Clusters {
+            clusters: [(
+                "foo".into(),
+                Cluster {
+                    name: "foo".into(),
+                    namespaces: [(
+                        "prover".into(),
+                        Namespace {
+                            deployments: [
+                                ("circuit-prover-gpu".into(), Deployment::default()),
+                                ("circuit-prover-gpu-h100".into(), Deployment::default()),
+                            ]
+                            .into(),
+                            pods: [
+                                (
+                                    "circuit-prover-gpu-7c5f8fc747-gmtcr".into(),
+                                    Pod {
+                                        status: "Running".into(),
+                                        changed: Utc::now(),
+                                        ..Default::default()
+                                    },
+                                ),
+                                (
+                                    "circuit-prover-gpu-7c5f8fc747-gmtc2".into(),
+                                    Pod {
+                                        status: "Running".into(),
+                                        changed: Utc::now(),
+                                        out_of_resources: true,
+                                        ..Default::default()
+                                    },
+                                ),
+                                (
+                                    "circuit-prover-gpu-h100-7c5f8fc747-gmtc3".into(),
+                                    Pod {
+                                        status: "Running".into(),
+                                        changed: Utc::now(),
+                                        ..Default::default()
+                                    },
+                                ),
+                            ]
+                            .into(),
+                            scale_errors: vec![],
+                        },
+                    )]
+                    .into(),
+                },
+            )]
+            .into(),
+            ..Default::default()
+        };
+
+        assert_eq!(
+            scaler.calculate(&"prover".into(), 2 * 1500 + 1 * 3000 - 1500, &clusters),
+            [
+                (
+                    PoolKey {
+                        cluster: "foo".into(),
+                        key: GpuKey(Gpu::L4),
+                    },
+                    2,
+                ),
+                (
+                    PoolKey {
+                        cluster: "foo".into(),
+                        key: GpuKey(Gpu::H100),
+                    },
+                    1,
+                ),
+            ]
+            .into(),
+            "Override priority: H100 in foo, then L4 in bar"
+        );
+        assert_eq!(
+            scaler2.calculate(&"prover".into(), 2 * 1500 + 1 * 3000 - 1500, &clusters),
+            [
+                (
+                    PoolKey {
+                        cluster: "foo".into(),
+                        key: GpuKey(Gpu::L4),
+                    },
+                    1,
+                ),
+                (
+                    PoolKey {
+                        cluster: "foo".into(),
+                        key: GpuKey(Gpu::H100),
+                    },
+                    1,
+                ),
+            ]
+            .into(),
+            "Override priority: H100 in foo, then L4 in bar"
+        );
+    }
+
     #[tracing_test::traced_test]
     #[test]
     fn test_convert_to_pool() {
@@ -1486,6 +1630,7 @@ mod tests {
             2,
             [("foo".into(), [(GpuKey(Gpu::L4), 100)].into())].into(),
             [(GpuKey(Gpu::L4), 500)].into(),
+            0,
             scaler_config("prover"),
             None,
         );

Original file line number	Diff line number	Diff line change
`@@ -66,9 +66,7 @@ fn create_agent_router(watcher: Watcher, scaler: Scaler) -> Router {`
`66`	`66`	`.with_state(app)`
`67`	`67`	`}`
`68`	`68`
`69`		`-// TODO: Use`
`70`		`-// https://github.com/matter-labs/zksync-era/blob/9821a20018c367ce246dba656daab5c2e7757973/core/node/api_server/src/healthcheck.rs#L53`
`71`		`-// instead.`
	`69`	`+// TODO: Report health only after get initial cluster state.`
`72`	`70`	`async fn health() -> &'static str {`
`73`	`71`	`"Ok\n"`
`74`	`72`	`}`
`@@ -101,7 +99,7 @@ pub struct ScaleResponse {`
`101`	`99`	`pub scale_result: Vec<String>,`
`102`	`100`	`}`
`103`	`101`
`104`		`-/// To test or forse scale in particular cluster use:`
	`102`	`+/// To test or force scale in particular cluster use:`
`105`	`103`	`/// $ curl -X POST -H "Content-Type: application/json" --data '{"deployments": [{"namespace": "prover-red", "name": "witness-vector-generator-spec-9-f", "size":0},{"namespace": "prover-red", "name": "witness-vector-generator-spec-9-c", "size":0}]}' <ip>:8081/scale`
`106`	`104`	`async fn scale(`
`107`	`105`	`State(app): State<App>,`