Skip to content

Commit 627d099

Browse files
authored
Limit maintenance time (#3951)
# Description Recently there was an issue where indexing 1 particular settlement took a very long time. This caused the whole protocol to slow down which in turn reduced the throughput and increased the time to happy moo. This PR introduces a timeout for the maintenance logic. Whenever we hit this timeout we prefer running a not fully updated auction over stalling the protocol further (e.g. some order might be included which has already been settled).
1 parent 327d323 commit 627d099

File tree

3 files changed

+28
-3
lines changed

3 files changed

+28
-3
lines changed

crates/autopilot/src/arguments.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,13 @@ pub struct Arguments {
267267
/// and not cut any auctions.
268268
#[clap(long, env, default_value = "false", action = clap::ArgAction::Set)]
269269
pub enable_leader_lock: bool,
270+
271+
/// Limits the amount of time the autopilot may spend running the
272+
/// maintenance logic between 2 auctions. When this times out we prefer
273+
/// running a not fully updated auction over stalling the protocol any
274+
/// further.
275+
#[clap(long, env, default_value = "5s", value_parser = humantime::parse_duration)]
276+
pub max_maintenance_timeout: Duration,
270277
}
271278

272279
#[derive(Debug, clap::Parser)]
@@ -399,6 +406,7 @@ impl std::fmt::Display for Arguments {
399406
disable_1271_order_balance_filter,
400407
disable_1271_order_sig_filter,
401408
enable_leader_lock,
409+
max_maintenance_timeout,
402410
} = self;
403411

404412
write!(f, "{shared}")?;
@@ -481,6 +489,7 @@ impl std::fmt::Display for Arguments {
481489
"disable_1271_order_sig_filter: {disable_1271_order_sig_filter}"
482490
)?;
483491
writeln!(f, "enable_leader_lock: {enable_leader_lock}")?;
492+
writeln!(f, "max_maintenance_timeout: {max_maintenance_timeout:?}")?;
484493
Ok(())
485494
}
486495
}

crates/autopilot/src/maintenance.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@ use {
2020
core::{AtomicU64, GenericGauge},
2121
},
2222
shared::{event_handling::AlloyEventRetriever, maintenance::Maintaining},
23-
std::{future::Future, sync::Arc, time::Instant},
23+
std::{
24+
future::Future,
25+
sync::Arc,
26+
time::{Duration, Instant},
27+
},
2428
tokio::sync::Mutex,
2529
};
2630

@@ -36,18 +40,25 @@ pub struct Maintenance {
3640
cow_amm_indexer: Vec<Arc<dyn Maintaining>>,
3741
/// On which block we last ran an update successfully.
3842
last_processed: Mutex<BlockInfo>,
43+
/// Limits the amount of time the autopilot may spend running the
44+
/// maintenance logic between 2 auctions. When this times out we prefer
45+
/// running a not fully updated auction over stalling the protocol any
46+
/// further.
47+
timeout: Duration,
3948
}
4049

4150
impl Maintenance {
4251
pub fn new(
4352
settlement_indexer: EventUpdater<Indexer, AlloyEventRetriever<GPv2SettlementContract>>,
4453
db_cleanup: Postgres,
54+
timeout: Duration,
4555
) -> Self {
4656
Self {
4757
settlement_indexer,
4858
db_cleanup,
4959
cow_amm_indexer: Default::default(),
5060
last_processed: Default::default(),
61+
timeout,
5162
}
5263
}
5364

@@ -62,7 +73,8 @@ impl Maintenance {
6273
}
6374

6475
let start = Instant::now();
65-
if let Err(err) = self.update_inner().await {
76+
77+
if let Err(err) = tokio::time::timeout(self.timeout, self.update_inner()).await {
6678
tracing::warn!(?err, block = new_block.number, "failed to run maintenance");
6779
metrics().updates.with_label_values(&["error"]).inc();
6880
return;

crates/autopilot/src/run.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,11 @@ pub async fn run(args: Arguments, shutdown_controller: ShutdownController) {
569569
let trusted_tokens =
570570
AutoUpdatingTokenList::from_configuration(market_makable_token_list_configuration).await;
571571

572-
let mut maintenance = Maintenance::new(settlement_event_indexer, db_write.clone());
572+
let mut maintenance = Maintenance::new(
573+
settlement_event_indexer,
574+
db_write.clone(),
575+
args.max_maintenance_timeout,
576+
);
573577
maintenance.with_cow_amms(&cow_amm_registry);
574578

575579
if !args.ethflow_contracts.is_empty() {

0 commit comments

Comments
 (0)