Skip to content

Commit 97ae45d

Browse files
authored
feat(prover): Add hysteresis for autoscaler targets for smoother scaling (#4142)
## What ❔ Add hysteresis for autoscaler targets for smoother scaling. <!-- What are the changes this PR brings about? --> <!-- Example: This PR adds a PR template to the repo. --> <!-- (For bigger PRs adding more context is appreciated) --> ## Why ❔ To reduce number of pods which processing the queue very short time or just getting up and down. <!-- Why are these changes done? What goal do they contribute to? What are the principles behind them? --> <!-- The `Why` has to be clear to non-Matter Labs entities running their own ZK Chain --> <!-- Example: PR templates ensure PR reviewers, observers, and future iterators are in context about the evolution of repos. --> ## Is this a breaking change? - [ ] Yes - [x] No ## Operational changes <!-- Any config changes? Any new flags? Any changes to any scripts? --> <!-- Please add anything that non-Matter Labs entities running their own ZK Chain may need to know --> ## Checklist <!-- Check your PR fulfills the following items. --> <!-- For draft PRs check the boxes as you complete them. --> - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Tests for the changes have been added / updated. - [x] Documentation comments have been added / updated. - [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev lint`. ref ZKD-2230
1 parent 12271c8 commit 97ae45d

File tree

5 files changed

+157
-5
lines changed

5 files changed

+157
-5
lines changed

prover/crates/bin/prover_autoscaler/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ agent_config:
170170
- `max_replicas` is a map of cluster name to maximum number of replicas. Note: it can be a number of map of GPU types
171171
to a number.
172172
- `speed` is a divider for corresponding queue. Note: it can be a number of map of GPU types to a number.
173+
- `hysteresis` is a percentage of queue over provisioning for smoother scaling. Meaningful range: 0 to 100.
173174
- `priority` is an optional field to override global cluster priorities for this target. For GPU targets it's a sorted
174175
list of `[cluster, gpu]` pairs, for simple targets it's just list of clusters.
175176

prover/crates/bin/prover_autoscaler/src/agent.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,7 @@ fn create_agent_router(watcher: Watcher, scaler: Scaler) -> Router {
6666
.with_state(app)
6767
}
6868

69-
// TODO: Use
70-
// https://github.com/matter-labs/zksync-era/blob/9821a20018c367ce246dba656daab5c2e7757973/core/node/api_server/src/healthcheck.rs#L53
71-
// instead.
69+
// TODO: Report health only after get initial cluster state.
7270
async fn health() -> &'static str {
7371
"Ok\n"
7472
}
@@ -101,7 +99,7 @@ pub struct ScaleResponse {
10199
pub scale_result: Vec<String>,
102100
}
103101

104-
/// To test or forse scale in particular cluster use:
102+
/// To test or force scale in particular cluster use:
105103
/// $ curl -X POST -H "Content-Type: application/json" --data '{"deployments": [{"namespace": "prover-red", "name": "witness-vector-generator-spec-9-f", "size":0},{"namespace": "prover-red", "name": "witness-vector-generator-spec-9-c", "size":0}]}' <ip>:8081/scale
106104
async fn scale(
107105
State(app): State<App>,

prover/crates/bin/prover_autoscaler/src/config.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,12 @@ pub struct ScalerTarget {
168168
/// For Simple targets, this is a list of ClusterName.
169169
#[serde(default)]
170170
pub priority: Option<PriorityConfig>,
171+
/// Optional hysteresis value as percentage of the queue to avoid
172+
/// oscillations in scaling.
173+
/// Valid values are from 0 to 100.
174+
/// Default is 0, which means no hysteresis.
175+
#[serde(default)]
176+
pub hysteresis: usize,
171177
}
172178

173179
impl WellKnown for ScalerTarget {

prover/crates/bin/prover_autoscaler/src/global/manager.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ impl Manager {
7272
.map(|(k, v)| (k.clone(), v.into_map_gpukey()))
7373
.collect(),
7474
c.speed.into_map_gpukey(),
75+
c.hysteresis,
7576
scaler_config.clone(),
7677
c.priority.clone(),
7778
))),
@@ -84,6 +85,7 @@ impl Manager {
8485
.map(|(k, v)| (k.clone(), v.into_map_nokey()))
8586
.collect(),
8687
c.speed.into_map_nokey(),
88+
c.hysteresis,
8789
scaler_config.clone(),
8890
c.priority.clone(),
8991
))),

prover/crates/bin/prover_autoscaler/src/global/scaler.rs

Lines changed: 146 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,20 @@ pub struct Scaler<K> {
5858
max_replicas: HashMap<ClusterName, HashMap<K, usize>>,
5959
// TODO Add default speed for default K
6060
speed: HashMap<K, usize>,
61+
hysteresis: usize,
6162
config: Arc<ScalerConfig>,
6263
target_priority: Option<PriorityConfig>,
6364
}
6465

6566
impl<K: Key> Scaler<K> {
67+
#[allow(clippy::too_many_arguments)]
6668
pub fn new(
6769
queue_report_field: QueueReportFields,
6870
deployment: DeploymentName,
6971
min_replicas: usize,
7072
max_replicas: HashMap<ClusterName, HashMap<K, usize>>,
7173
speed: HashMap<K, usize>,
74+
hysteresis: usize,
7275
config: Arc<ScalerConfig>,
7376
target_priority: Option<PriorityConfig>,
7477
) -> Self {
@@ -78,6 +81,7 @@ impl<K: Key> Scaler<K> {
7881
min_replicas,
7982
max_replicas,
8083
speed,
84+
hysteresis,
8185
config,
8286
target_priority,
8387
}
@@ -298,7 +302,7 @@ impl<K: Key> Scaler<K> {
298302
}
299303

300304
// Remove unneeded pods.
301-
if (total as usize) > queue {
305+
if total as usize - total as usize * self.hysteresis / 100 > queue {
302306
for cluster in sorted_clusters.iter().rev() {
303307
let mut excess_queue = total - self.normalize_queue(cluster.key, queue) as i64;
304308
if excess_queue <= 0 {
@@ -481,6 +485,7 @@ mod tests {
481485
]
482486
.into(),
483487
[(GpuKey(Gpu::L4), 500), (GpuKey(Gpu::T4), 100)].into(),
488+
0,
484489
scaler_config("prover-other"),
485490
None,
486491
);
@@ -633,6 +638,7 @@ mod tests {
633638
(GpuKey(Gpu::T4), 700),
634639
]
635640
.into(),
641+
0,
636642
scaler_config("prover"),
637643
None,
638644
);
@@ -799,6 +805,7 @@ mod tests {
799805
]
800806
.into(),
801807
[(GpuKey(Gpu::L4), 500), (GpuKey(Gpu::T4), 100)].into(),
808+
0,
802809
scaler_config("prover"),
803810
None,
804811
);
@@ -997,6 +1004,7 @@ mod tests {
9971004
]
9981005
.into(),
9991006
[(GpuKey(Gpu::L4), 500), (GpuKey(Gpu::T4), 100)].into(),
1007+
0,
10001008
scaler_config("prover"),
10011009
None,
10021010
);
@@ -1117,6 +1125,7 @@ mod tests {
11171125
]
11181126
.into(),
11191127
[(NoKey(), 10)].into(),
1128+
0,
11201129
scaler_config(""),
11211130
None,
11221131
);
@@ -1294,6 +1303,7 @@ mod tests {
12941303
]
12951304
.into(),
12961305
[(GpuKey(Gpu::L4), 1500), (GpuKey(Gpu::H100), 3000)].into(),
1306+
0,
12971307
scaler_config("prover"),
12981308
target_priority,
12991309
);
@@ -1396,6 +1406,7 @@ mod tests {
13961406
)]
13971407
.into(),
13981408
[(GpuKey(Gpu::L4), 1500), (GpuKey(Gpu::H100), 3000)].into(),
1409+
0,
13991410
scaler_config("prover"),
14001411
target_priority,
14011412
);
@@ -1477,6 +1488,139 @@ mod tests {
14771488
);
14781489
}
14791490

1491+
#[tracing_test::traced_test]
1492+
#[test]
1493+
fn test_calculate_hysteresis() {
1494+
let target_priority = Some(PriorityConfig::Gpu(vec![
1495+
("foo".into(), GpuKey(Gpu::L4)),
1496+
("foo".into(), GpuKey(Gpu::H100)),
1497+
]));
1498+
1499+
let scaler = Scaler::new(
1500+
QueueReportFields::prover_jobs,
1501+
"circuit-prover-gpu".into(),
1502+
0,
1503+
[(
1504+
"foo".into(),
1505+
[(GpuKey(Gpu::L4), 50), (GpuKey(Gpu::H100), 10)].into(),
1506+
)]
1507+
.into(),
1508+
[(GpuKey(Gpu::L4), 1500), (GpuKey(Gpu::H100), 3000)].into(),
1509+
50,
1510+
scaler_config("prover"),
1511+
target_priority.clone(),
1512+
);
1513+
1514+
let scaler2 = Scaler::new(
1515+
QueueReportFields::prover_jobs,
1516+
"circuit-prover-gpu".into(),
1517+
0,
1518+
[(
1519+
"foo".into(),
1520+
[(GpuKey(Gpu::L4), 50), (GpuKey(Gpu::H100), 10)].into(),
1521+
)]
1522+
.into(),
1523+
[(GpuKey(Gpu::L4), 1500), (GpuKey(Gpu::H100), 3000)].into(),
1524+
0,
1525+
scaler_config("prover"),
1526+
target_priority,
1527+
);
1528+
1529+
let clusters = Clusters {
1530+
clusters: [(
1531+
"foo".into(),
1532+
Cluster {
1533+
name: "foo".into(),
1534+
namespaces: [(
1535+
"prover".into(),
1536+
Namespace {
1537+
deployments: [
1538+
("circuit-prover-gpu".into(), Deployment::default()),
1539+
("circuit-prover-gpu-h100".into(), Deployment::default()),
1540+
]
1541+
.into(),
1542+
pods: [
1543+
(
1544+
"circuit-prover-gpu-7c5f8fc747-gmtcr".into(),
1545+
Pod {
1546+
status: "Running".into(),
1547+
changed: Utc::now(),
1548+
..Default::default()
1549+
},
1550+
),
1551+
(
1552+
"circuit-prover-gpu-7c5f8fc747-gmtc2".into(),
1553+
Pod {
1554+
status: "Running".into(),
1555+
changed: Utc::now(),
1556+
out_of_resources: true,
1557+
..Default::default()
1558+
},
1559+
),
1560+
(
1561+
"circuit-prover-gpu-h100-7c5f8fc747-gmtc3".into(),
1562+
Pod {
1563+
status: "Running".into(),
1564+
changed: Utc::now(),
1565+
..Default::default()
1566+
},
1567+
),
1568+
]
1569+
.into(),
1570+
scale_errors: vec![],
1571+
},
1572+
)]
1573+
.into(),
1574+
},
1575+
)]
1576+
.into(),
1577+
..Default::default()
1578+
};
1579+
1580+
assert_eq!(
1581+
scaler.calculate(&"prover".into(), 2 * 1500 + 1 * 3000 - 1500, &clusters),
1582+
[
1583+
(
1584+
PoolKey {
1585+
cluster: "foo".into(),
1586+
key: GpuKey(Gpu::L4),
1587+
},
1588+
2,
1589+
),
1590+
(
1591+
PoolKey {
1592+
cluster: "foo".into(),
1593+
key: GpuKey(Gpu::H100),
1594+
},
1595+
1,
1596+
),
1597+
]
1598+
.into(),
1599+
"Override priority: H100 in foo, then L4 in bar"
1600+
);
1601+
assert_eq!(
1602+
scaler2.calculate(&"prover".into(), 2 * 1500 + 1 * 3000 - 1500, &clusters),
1603+
[
1604+
(
1605+
PoolKey {
1606+
cluster: "foo".into(),
1607+
key: GpuKey(Gpu::L4),
1608+
},
1609+
1,
1610+
),
1611+
(
1612+
PoolKey {
1613+
cluster: "foo".into(),
1614+
key: GpuKey(Gpu::H100),
1615+
},
1616+
1,
1617+
),
1618+
]
1619+
.into(),
1620+
"Override priority: H100 in foo, then L4 in bar"
1621+
);
1622+
}
1623+
14801624
#[tracing_test::traced_test]
14811625
#[test]
14821626
fn test_convert_to_pool() {
@@ -1486,6 +1630,7 @@ mod tests {
14861630
2,
14871631
[("foo".into(), [(GpuKey(Gpu::L4), 100)].into())].into(),
14881632
[(GpuKey(Gpu::L4), 500)].into(),
1633+
0,
14891634
scaler_config("prover"),
14901635
None,
14911636
);

0 commit comments

Comments
 (0)