Skip to content

Commit 9c63d42

Browse files
committed
feat(coprocessor): make drift auto revert off by default
But keep it running on E2E.
1 parent 74c7bc7 commit 9c63d42

7 files changed

Lines changed: 55 additions & 19 deletions

File tree

charts/coprocessor/templates/coprocessor-gw-listener-deployment.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ spec:
7070
- --get-logs-poll-interval={{ .Values.gwListener.args.getLogsPollInterval }}
7171
- --get-logs-block-batch-size={{ .Values.gwListener.args.getLogsBlockBatchSize }}
7272
- --log-last-processed-every-number-of-updates={{ .Values.gwListener.args.logLastProcessedEveryNumberOfUpdates }}
73-
- --drift-revert-grace-period={{ .Values.gwListener.args.driftRevertGracePeriod }}
73+
- --drift-auto-revert-grace-period={{ .Values.gwListener.args.driftAutoRevertGracePeriod }}
74+
- --drift-auto-revert-enabled={{ .Values.gwListener.args.driftAutoRevertEnabled }}
7475
{{- with .Values.gwListener.extraArgs }}
7576
{{- toYaml . | nindent 12 }}
7677
{{- end }}

charts/coprocessor/values.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,8 @@ gwListener:
593593
getLogsPollInterval: 500ms
594594
getLogsBlockBatchSize: 100
595595
logLastProcessedEveryNumberOfUpdates: 50
596-
driftRevertGracePeriod: 60s
596+
driftAutoRevertGracePeriod: 60s
597+
driftAutoRevertEnabled: false
597598

598599
# Additional args appended after all template-injected and structured args.
599600
# Use for replay parameters, e.g.:

coprocessor/fhevm-engine/gw-listener/src/bin/gw_listener.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,13 @@ struct Conf {
118118
/// running the revert SQL. Gives other services time to see the signal and
119119
/// re-exec before the DB state changes.
120120
#[arg(long, default_value = "60s", value_parser = parse_duration)]
121-
drift_revert_grace_period: Duration,
121+
drift_auto_revert_grace_period: Duration,
122+
123+
/// Enable automatic drift recovery. When false (default), the drift
124+
/// detector still runs and logs drift, but no revert signal is created
125+
/// and no automatic recovery kicks in. Opt-in while the feature rolls out.
126+
#[arg(long, default_value_t = false)]
127+
drift_auto_revert_enabled: bool,
122128
}
123129

124130
fn install_signal_handlers(cancel_token: CancellationToken) -> anyhow::Result<()> {
@@ -196,7 +202,8 @@ async fn main() -> anyhow::Result<()> {
196202
gateway_config_address: conf.gateway_config_address,
197203
drift_no_consensus_timeout: conf.drift_no_consensus_timeout,
198204
drift_post_consensus_grace: conf.drift_post_consensus_grace,
199-
drift_revert_grace_period: conf.drift_revert_grace_period,
205+
drift_auto_revert_grace_period: conf.drift_auto_revert_grace_period,
206+
drift_auto_revert_enabled: conf.drift_auto_revert_enabled,
200207
};
201208

202209
let gw_listener = std::sync::Arc::new(GatewayListener::new(
@@ -229,7 +236,7 @@ async fn main() -> anyhow::Result<()> {
229236
config.database_url.as_str(),
230237
cancel_token.clone(),
231238
Some(RevertRunnerConfig {
232-
grace_period: config.drift_revert_grace_period,
239+
grace_period: config.drift_auto_revert_grace_period,
233240
}),
234241
)
235242
.await?;

coprocessor/fhevm-engine/gw-listener/src/drift_detector.rs

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ pub(crate) struct DriftDetector {
9191
local_node_id: String,
9292
drift_no_consensus_timeout: Duration,
9393
drift_post_consensus_grace: Duration,
94+
auto_revert_enabled: bool,
9495
deferred_drift_detected: u64,
9596
deferred_consensus_timeout: u64,
9697
deferred_missing_submission: u64,
@@ -102,13 +103,15 @@ impl DriftDetector {
102103
expected_senders: Vec<Address>,
103104
drift_no_consensus_timeout: Duration,
104105
drift_post_consensus_grace: Duration,
106+
auto_revert_enabled: bool,
105107
) -> Self {
106108
Self {
107109
current_expected_senders: expected_senders,
108110
open_handles: HashMap::new(),
109111
local_node_id: std::env::var("HOSTNAME").unwrap_or_else(|_| "unknown".to_owned()),
110112
drift_no_consensus_timeout,
111113
drift_post_consensus_grace,
114+
auto_revert_enabled,
112115
deferred_drift_detected: 0,
113116
deferred_consensus_timeout: 0,
114117
deferred_missing_submission: 0,
@@ -339,13 +342,14 @@ impl DriftDetector {
339342
);
340343
self.deferred_drift_detected += 1;
341344

342-
// Auto drift recovery: signal that a revert is needed.
343-
fhevm_engine_common::drift_revert::on_drift_detected(
344-
db_pool,
345-
handle.as_slice(),
346-
chain_id_from_handle(handle),
347-
)
348-
.await;
345+
if self.auto_revert_enabled {
346+
fhevm_engine_common::drift_revert::on_drift_detected(
347+
db_pool,
348+
handle.as_slice(),
349+
chain_id_from_handle(handle),
350+
)
351+
.await;
352+
}
349353
}
350354

351355
let Some(state) = self.open_handles.get_mut(&handle) else {
@@ -698,10 +702,12 @@ mod tests {
698702
let digest_b = FixedBytes::from([0x66; 32]);
699703
let digest_128 = FixedBytes::from([0x77; 32]);
700704
let base = Instant::now();
705+
let auto_revert_enabled = false;
701706
let mut detector = DriftDetector::new(
702707
vec![sender_a, sender_b, sender_c],
703708
Duration::from_secs(50),
704709
Duration::from_secs(10),
710+
auto_revert_enabled,
705711
);
706712

707713
detector.set_replaying(true);
@@ -829,7 +835,23 @@ mod tests {
829835
}
830836

831837
fn detector() -> DriftDetector {
832-
DriftDetector::new(senders(), Duration::from_secs(5), Duration::from_secs(2))
838+
let auto_revert_enabled = false;
839+
DriftDetector::new(
840+
senders(),
841+
Duration::from_secs(5),
842+
Duration::from_secs(2),
843+
auto_revert_enabled,
844+
)
845+
}
846+
847+
fn detector_with_auto_revert() -> DriftDetector {
848+
let auto_revert_enabled = true;
849+
DriftDetector::new(
850+
senders(),
851+
Duration::from_secs(5),
852+
Duration::from_secs(2),
853+
auto_revert_enabled,
854+
)
833855
}
834856

835857
fn make_consensus_state(
@@ -1648,7 +1670,7 @@ mod tests {
16481670
.await
16491671
.unwrap();
16501672

1651-
let mut detector = detector();
1673+
let mut detector = detector_with_auto_revert();
16521674
detector
16531675
.handle_consensus(
16541676
make_consensus_event(
@@ -1683,7 +1705,7 @@ mod tests {
16831705
.await
16841706
.unwrap();
16851707

1686-
let mut detector = detector();
1708+
let mut detector = detector_with_auto_revert();
16871709
detector
16881710
.handle_consensus(
16891711
make_consensus_event(

coprocessor/fhevm-engine/gw-listener/src/gw_listener.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ impl<P: Provider<Ethereum> + Clone + 'static, A: AwsS3Interface + Clone + 'stati
173173
expected_senders,
174174
self.conf.drift_no_consensus_timeout,
175175
self.conf.drift_post_consensus_grace,
176+
self.conf.drift_auto_revert_enabled,
176177
);
177178
if replay_from_block.is_none() {
178179
if let Err(e) = self

coprocessor/fhevm-engine/gw-listener/src/lib.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,13 @@ pub struct ConfigSettings {
6060
/// How long to wait after detecting a pending drift-revert signal before
6161
/// running the revert SQL. Gives other services time to see the signal and
6262
/// drop their in-flight work.
63-
pub drift_revert_grace_period: Duration,
63+
pub drift_auto_revert_grace_period: Duration,
64+
/// If true, the drift detector creates drift-revert signals when it sees
65+
/// a consensus mismatch. If false, drift is still detected and logged,
66+
/// but no signal is created.
67+
pub drift_auto_revert_enabled: bool,
6468
}
6569

66-
/// Default is used by unit tests only. Production defaults come from
67-
/// the CLI arg definitions in `bin/gw_listener.rs` (e.g. `--drift-no-consensus-timeout 5m`).
6870
impl Default for ConfigSettings {
6971
fn default() -> Self {
7072
Self {
@@ -85,7 +87,8 @@ impl Default for ConfigSettings {
8587
gateway_config_address: None,
8688
drift_no_consensus_timeout: Duration::from_secs(5),
8789
drift_post_consensus_grace: Duration::from_secs(2),
88-
drift_revert_grace_period: Duration::from_secs(60),
90+
drift_auto_revert_grace_period: Duration::from_secs(60),
91+
drift_auto_revert_enabled: false,
8992
}
9093
}
9194
}

test-suite/fhevm/docker-compose/coprocessor-docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ services:
7979
- --kms-generation-address=${KMS_GENERATION_ADDRESS}
8080
- --error-sleep-initial-secs=1
8181
- --error-sleep-max-secs=10
82+
- --drift-auto-revert-enabled=true
8283
depends_on:
8384
coprocessor-db-migration:
8485
condition: service_completed_successfully

0 commit comments

Comments
 (0)