Skip to content

Commit 46bd594

Browse files
[consensus/simplex] Timeout Regression Tests (#3432)
1 parent bec3871 commit 46bd594

2 files changed

Lines changed: 233 additions & 22 deletions

File tree

consensus/src/simplex/actors/voter/mod.rs

Lines changed: 163 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2666,9 +2666,9 @@ mod tests {
26662666
.await;
26672667

26682668
let duplicate_window = context.current() + Duration::from_millis(300);
2669-
while context.current() < duplicate_window {
2669+
loop {
26702670
select! {
2671-
_ = context.sleep(Duration::from_millis(10)) => {},
2671+
_ = context.sleep_until(duplicate_window) => break,
26722672
message = batcher_receiver.recv() => match message.unwrap() {
26732673
batcher::Message::Update { response, .. } => response.send(None).unwrap(),
26742674
batcher::Message::Constructed(Vote::Nullify(nullify))
@@ -3417,9 +3417,9 @@ mod tests {
34173417
// Wait until we actually broadcast a vote in our leader view.
34183418
let ready_deadline = context.current() + Duration::from_secs(1);
34193419
let mut became_ready = false;
3420-
while context.current() < ready_deadline {
3420+
loop {
34213421
select! {
3422-
_ = context.sleep(Duration::from_millis(10)) => {},
3422+
_ = context.sleep_until(ready_deadline) => break,
34233423
message = batcher_receiver.recv() => match message.unwrap() {
34243424
batcher::Message::Update { response, .. } => response.send(None).unwrap(),
34253425
batcher::Message::Constructed(_) => {}
@@ -3501,9 +3501,9 @@ mod tests {
35013501
// We should still broadcast for target_view (typically a nullify after timeout).
35023502
let target_deadline = context.current() + Duration::from_secs(1);
35033503
let mut saw_target_network_vote = false;
3504-
while context.current() < target_deadline {
3504+
loop {
35053505
select! {
3506-
_ = context.sleep(Duration::from_millis(10)) => {},
3506+
_ = context.sleep_until(target_deadline) => break,
35073507
message = batcher_receiver.recv() => match message.unwrap() {
35083508
batcher::Message::Update { response, .. } => response.send(None).unwrap(),
35093509
batcher::Message::Constructed(_) => {}
@@ -5758,8 +5758,9 @@ mod tests {
57585758
// see nullify in this 2s window after proposal handling, even though
57595759
// leader timeout has elapsed.
57605760
let no_nullify_deadline = context.current() + Duration::from_secs(2);
5761-
while context.current() < no_nullify_deadline {
5761+
loop {
57625762
select! {
5763+
_ = context.sleep_until(no_nullify_deadline) => break,
57635764
msg = batcher_receiver.recv() => match msg.unwrap() {
57645765
batcher::Message::Constructed(Vote::Nullify(nullify))
57655766
if nullify.view() == target_view =>
@@ -5770,8 +5771,7 @@ mod tests {
57705771
}
57715772
batcher::Message::Update { response, .. } => response.send(None).unwrap(),
57725773
_ => {}
5773-
},
5774-
_ = context.sleep(Duration::from_millis(10)) => {}
5774+
}
57755775
}
57765776
}
57775777

@@ -5888,8 +5888,9 @@ mod tests {
58885888
// emit a notarize vote or nullify in this 2s window after certificate handling,
58895889
// even though leader timeout has elapsed.
58905890
let quiet_deadline = context.current() + Duration::from_secs(2);
5891-
while context.current() < quiet_deadline {
5891+
loop {
58925892
select! {
5893+
_ = context.sleep_until(quiet_deadline) => break,
58935894
msg = batcher_receiver.recv() => match msg.unwrap() {
58945895
batcher::Message::Constructed(Vote::Notarize(v)) if v.view() == target_view => {
58955896
panic!(
@@ -5905,8 +5906,7 @@ mod tests {
59055906
}
59065907
batcher::Message::Update { response, .. } => response.send(None).unwrap(),
59075908
_ => {}
5908-
},
5909-
_ = context.sleep(Duration::from_millis(10)) => {}
5909+
}
59105910
}
59115911
}
59125912

@@ -5954,6 +5954,153 @@ mod tests {
59545954
);
59555955
}
59565956

5957+
/// Regression: after a timed-out view is nullified and the voter advances,
5958+
/// the next view must start with a fresh leader timeout.
5959+
fn next_view_gets_fresh_timeout_after_prior_view_nullifies<S, F>(mut fixture: F)
5960+
where
5961+
S: Scheme<Sha256Digest, PublicKey = PublicKey>,
5962+
F: FnMut(&mut deterministic::Context, &[u8], u32) -> Fixture<S>,
5963+
{
5964+
let n = 5;
5965+
let quorum = quorum(n);
5966+
let namespace = b"next_view_gets_fresh_timeout_after_prior_view_nullifies".to_vec();
5967+
let executor = deterministic::Runner::timed(Duration::from_secs(15));
5968+
executor.start(|mut context| async move {
5969+
let (network, oracle) = Network::new(
5970+
context.with_label("network"),
5971+
NConfig {
5972+
max_size: 1024 * 1024,
5973+
disconnect_on_block: true,
5974+
tracked_peer_sets: None,
5975+
},
5976+
);
5977+
network.start();
5978+
5979+
let Fixture {
5980+
participants,
5981+
schemes,
5982+
..
5983+
} = fixture(&mut context, &namespace, n);
5984+
5985+
let (mut mailbox, mut batcher_receiver, _, _, _) = setup_voter(
5986+
&mut context,
5987+
&oracle,
5988+
&participants,
5989+
&schemes,
5990+
RoundRobin::<Sha256>::default(),
5991+
Duration::from_secs(1),
5992+
Duration::from_secs(5),
5993+
Duration::from_mins(60),
5994+
mocks::application::Certifier::Always,
5995+
)
5996+
.await;
5997+
5998+
// Wait for the initial view 1 batcher update.
5999+
loop {
6000+
match batcher_receiver.recv().await.unwrap() {
6001+
batcher::Message::Update {
6002+
current, response, ..
6003+
} => {
6004+
response.send(None).unwrap();
6005+
if current == View::new(1) {
6006+
break;
6007+
}
6008+
}
6009+
batcher::Message::Constructed(_) => {}
6010+
}
6011+
}
6012+
6013+
// Allow view 1 to time out and emit a nullify vote.
6014+
loop {
6015+
select! {
6016+
msg = batcher_receiver.recv() => match msg.unwrap() {
6017+
batcher::Message::Constructed(Vote::Nullify(nullify))
6018+
if nullify.view() == View::new(1) =>
6019+
{
6020+
break;
6021+
}
6022+
batcher::Message::Update { response, .. } => response.send(None).unwrap(),
6023+
_ => {}
6024+
},
6025+
_ = context.sleep(Duration::from_secs(2)) => {
6026+
panic!("expected nullify for view 1");
6027+
},
6028+
}
6029+
}
6030+
6031+
// Deliver a nullification certificate for view 1 so the voter enters view 2.
6032+
let (_, nullification) =
6033+
build_nullification(&schemes, Round::new(Epoch::new(333), View::new(1)), quorum);
6034+
mailbox
6035+
.resolved(Certificate::Nullification(nullification))
6036+
.await;
6037+
6038+
loop {
6039+
select! {
6040+
msg = batcher_receiver.recv() => match msg.unwrap() {
6041+
batcher::Message::Constructed(Vote::Nullify(nullify))
6042+
if nullify.view() == View::new(2) =>
6043+
{
6044+
panic!(
6045+
"received nullify for view 2 before its fresh leader timeout elapsed"
6046+
);
6047+
}
6048+
batcher::Message::Update {
6049+
current, response, ..
6050+
} => {
6051+
response.send(None).unwrap();
6052+
if current == View::new(2) {
6053+
break;
6054+
}
6055+
}
6056+
_ => {}
6057+
},
6058+
_ = context.sleep(Duration::from_secs(2)) => {
6059+
panic!("expected voter to advance to view 2");
6060+
},
6061+
}
6062+
}
6063+
6064+
// The old view timed out, but the new view should still get its own leader timeout
6065+
// rather than immediately nullifying on entry.
6066+
let quiet_deadline = context.current() + Duration::from_millis(500);
6067+
loop {
6068+
select! {
6069+
_ = context.sleep_until(quiet_deadline) => break,
6070+
msg = batcher_receiver.recv() => match msg.unwrap() {
6071+
batcher::Message::Constructed(Vote::Nullify(nullify))
6072+
if nullify.view() == View::new(2) =>
6073+
{
6074+
panic!(
6075+
"received nullify for view 2 before its fresh leader timeout elapsed"
6076+
);
6077+
}
6078+
batcher::Message::Update { response, .. } => response.send(None).unwrap(),
6079+
_ => {}
6080+
}
6081+
}
6082+
}
6083+
});
6084+
}
6085+
6086+
#[test_traced]
6087+
fn test_next_view_gets_fresh_timeout_after_prior_view_nullifies() {
6088+
next_view_gets_fresh_timeout_after_prior_view_nullifies::<_, _>(
6089+
bls12381_threshold_vrf::fixture::<MinPk, _>,
6090+
);
6091+
next_view_gets_fresh_timeout_after_prior_view_nullifies::<_, _>(
6092+
bls12381_threshold_vrf::fixture::<MinSig, _>,
6093+
);
6094+
next_view_gets_fresh_timeout_after_prior_view_nullifies::<_, _>(
6095+
bls12381_multisig::fixture::<MinPk, _>,
6096+
);
6097+
next_view_gets_fresh_timeout_after_prior_view_nullifies::<_, _>(
6098+
bls12381_multisig::fixture::<MinSig, _>,
6099+
);
6100+
next_view_gets_fresh_timeout_after_prior_view_nullifies::<_, _>(ed25519::fixture);
6101+
next_view_gets_fresh_timeout_after_prior_view_nullifies::<_, _>(secp256r1::fixture);
6102+
}
6103+
59576104
/// Regression: the first view should make progress without timing out when peers are online.
59586105
///
59596106
/// We require:
@@ -6069,10 +6216,8 @@ mod tests {
60696216

60706217
let deadline = context.current() + Duration::from_secs(3);
60716218
let reached_view2 = loop {
6072-
if context.current() >= deadline {
6073-
break false;
6074-
}
60756219
select! {
6220+
_ = context.sleep_until(deadline) => break false,
60766221
msg = batcher_receiver.recv() => match msg.unwrap() {
60776222
batcher::Message::Constructed(Vote::Finalize(finalize))
60786223
if finalize.view() == View::new(1) =>
@@ -6093,17 +6238,14 @@ mod tests {
60936238
}
60946239
}
60956240
_ => {}
6096-
},
6097-
_ = context.sleep(Duration::from_millis(10)) => {},
6241+
}
60986242
}
60996243
};
61006244
assert!(!reached_view2, "view advanced before finalize for view 1");
61016245

61026246
let reached_view2 = loop {
6103-
if context.current() >= deadline {
6104-
break false;
6105-
}
61066247
select! {
6248+
_ = context.sleep_until(deadline) => break false,
61076249
msg = batcher_receiver.recv() => match msg.unwrap() {
61086250
batcher::Message::Constructed(Vote::Nullify(nullify))
61096251
if nullify.view() == View::new(1) =>
@@ -6119,8 +6261,7 @@ mod tests {
61196261
}
61206262
}
61216263
_ => {}
6122-
},
6123-
_ = context.sleep(Duration::from_millis(10)) => {},
6264+
}
61246265
}
61256266
};
61266267
assert!(reached_view2, "expected progress to view 2 from view 1");

consensus/src/simplex/actors/voter/state.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,6 +1132,76 @@ mod tests {
11321132
});
11331133
}
11341134

1135+
#[test]
1136+
fn entering_next_view_resets_expired_timeout_state() {
1137+
let runtime = deterministic::Runner::default();
1138+
runtime.start(|mut context| async move {
1139+
let namespace = b"ns".to_vec();
1140+
let Fixture {
1141+
schemes, verifier, ..
1142+
} = ed25519::fixture(&mut context, &namespace, 4);
1143+
let leader_timeout = Duration::from_secs(1);
1144+
let retry = Duration::from_secs(3);
1145+
let cfg = Config {
1146+
scheme: schemes[0].clone(),
1147+
elector: <RoundRobin>::default(),
1148+
epoch: Epoch::new(13),
1149+
activity_timeout: ViewDelta::new(3),
1150+
leader_timeout,
1151+
certification_timeout: Duration::from_secs(2),
1152+
timeout_retry: retry,
1153+
};
1154+
let mut state = State::new(context.clone(), cfg);
1155+
state.set_genesis(test_genesis());
1156+
1157+
let view_1 = state.current_view();
1158+
assert_eq!(view_1, View::new(1));
1159+
1160+
// Force the current view into timeout mode and schedule a retry.
1161+
state.trigger_timeout(view_1, TimeoutReason::LeaderTimeout);
1162+
assert!(
1163+
state.next_timeout_deadline() <= context.current(),
1164+
"current view should be expired after timeout is triggered"
1165+
);
1166+
let (was_retry, _) = state
1167+
.construct_nullify(view_1)
1168+
.expect("first timeout nullify should exist");
1169+
assert!(!was_retry);
1170+
let retry_deadline = state.next_timeout_deadline();
1171+
assert_eq!(
1172+
retry_deadline,
1173+
context.current() + retry,
1174+
"timed-out view should schedule a retry"
1175+
);
1176+
1177+
// Advancing into the next view must install fresh deadlines instead of reusing
1178+
// the expired/retrying state from the previous view.
1179+
let votes: Vec<_> = schemes
1180+
.iter()
1181+
.map(|scheme| {
1182+
Nullify::sign::<Sha256Digest>(scheme, Rnd::new(state.epoch(), view_1))
1183+
.expect("nullify")
1184+
})
1185+
.collect();
1186+
let nullification =
1187+
Nullification::from_nullifies(&verifier, &votes, &Sequential).expect("nullify");
1188+
assert!(state.add_nullification(nullification));
1189+
1190+
let view_2 = state.current_view();
1191+
assert_eq!(view_2, View::new(2));
1192+
let next_deadline = state.next_timeout_deadline();
1193+
assert_eq!(
1194+
next_deadline,
1195+
context.current() + leader_timeout,
1196+
"next view should start with a fresh leader timeout"
1197+
);
1198+
assert_ne!(
1199+
next_deadline, retry_deadline,
1200+
"next view must not inherit the previous view retry deadline"
1201+
);
1202+
});
1203+
}
1204+
11351205
#[test]
11361206
fn nullify_only_records_metric_once() {
11371207
let runtime = deterministic::Runner::default();

0 commit comments

Comments
 (0)