Skip to content

Commit ca0bed3

Browse files
authored
Merge pull request DMDcoin#314 from SurfingNerd/i243-no-reports-on-missing-block-production
Connectivity Report spam mitigations
2 parents 8ce0248 + f8607e3 commit ca0bed3

File tree

4 files changed

+196
-105
lines changed

4 files changed

+196
-105
lines changed

crates/ethcore/src/engines/hbbft/hbbft_early_epoch_end_manager.rs

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::{
99
};
1010
use std::{
1111
collections::BTreeMap,
12-
time::{Duration, Instant},
12+
time::{Duration, Instant, UNIX_EPOCH},
1313
};
1414

1515
use super::{
@@ -261,8 +261,8 @@ impl HbbftEarlyEpochEndManager {
261261
}
262262
}
263263

264-
/// decides on the memorium data if we should update to contract data.
265-
/// end executes them.
264+
/// decides on the memorium data if we should update to contract data,
265+
/// and sends out transactions to do so.
266266
pub fn decide(
267267
&mut self,
268268
memorium: &HbbftMessageMemorium,
@@ -286,25 +286,51 @@ impl HbbftEarlyEpochEndManager {
286286
debug!(target: "engine", "early-epoch-end: detected attempt to break because of is_major_syncing() instead of is_synincg()no decision: syncing");
287287
}
288288

289-
let block_num = if let Some(block) = full_client.block(BlockId::Latest) {
290-
block.number()
289+
let (block_num, block_time) = if let Some(block) = full_client.block(BlockId::Latest) {
290+
(block.number(), block.timestamp())
291291
} else {
292292
error!(target:"engine", "early-epoch-end: could not retrieve latest block.");
293293
return;
294294
};
295295

296-
let treshold: u64 = 2;
296+
// start of implementation for:
297+
// https://github.com/DMDcoin/diamond-node/issues/243
298+
// connectivity reports should not trigger if there is no block production
299+
let now = UNIX_EPOCH.elapsed().expect("Time not available").as_secs();
300+
// this should hold true.
301+
if now >= block_time {
302+
let elapsed_since_last_block = now - block_time;
303+
// todo: this is max blocktime (heartbeat) x 2, better read the maximum blocktime.
304+
// on phoenix protocol triggers, this would also skip the sending of disconnectivity reports.
305+
if elapsed_since_last_block > 10 * 60 {
306+
info!(target:"engine", "skipping early-epoch-end: now {now} ; block_time {block_time}: Block WAS created in the future ?!?! :-x. not sending early epoch end reports.");
307+
return;
308+
}
309+
} else {
310+
// if the newest block is from the future, something very problematic happened.
311+
// the system clock could be wrong.
312+
// or the blockchain really produces blocks from the future.
313+
// we are just not sending reports in this case.
314+
315+
error!(target:"engine", "early-epoch-end: now {now} ; block_time {block_time}: Block WAS created in the future ?!?! :-x. not sending early epoch end reports.");
316+
return;
317+
}
318+
// end of implementation for:
319+
// https://github.com/DMDcoin/diamond-node/issues/243
320+
321+
let threshold: u64 = 2;
322+
297323
// todo: read this out from contracts: ConnectivityTrackerHbbft -> reportDisallowPeriod
298324
// requires us to update the Contracts ABIs:
299325
// https://github.com/DMDcoin/diamond-node/issues/115
300-
let treshold_time = Duration::from_secs(12 * 60); // 12 Minutes = 1 times the heartbeat + 2 minutes as grace period.
326+
let threshold_time = Duration::from_secs(12 * 60); // 12 Minutes = 1 times the heartbeat + 2 minutes as grace period.
301327

302-
if self.start_time.elapsed() < treshold_time {
303-
debug!(target: "engine", "early-epoch-end: no decision: Treshold time not reached.");
328+
if self.start_time.elapsed() < threshold_time {
329+
debug!(target: "engine", "early-epoch-end: no decision: Threshold time not reached.");
304330
return;
305331
}
306332

307-
if block_num < self.start_block + treshold {
333+
if block_num < self.start_block + threshold {
308334
// not enought blocks have passed this epoch,
309335
// to judge other nodes.
310336
debug!(target: "engine", "early-epoch-end: no decision: not enough blocks.");
@@ -328,7 +354,7 @@ impl HbbftEarlyEpochEndManager {
328354
if let Some(node_history) = epoch_history.get_history_for_node(validator) {
329355
let last_message_time = node_history.get_last_good_message_time();
330356
let last_message_time_lateness = last_message_time.elapsed();
331-
if last_message_time_lateness > treshold_time {
357+
if last_message_time_lateness > threshold_time {
332358
// we do not have to send notification, if we already did so.
333359
if !self.is_reported(client, validator_address) {
334360
// this function will also add the validator to the list of flagged validators.

crates/ethcore/src/engines/hbbft/hbbft_engine.rs

Lines changed: 133 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ use super::{
6060
sealing::{self, RlpSig, Sealing},
6161
};
6262
use crate::engines::hbbft::hbbft_message_memorium::HbbftMessageDispatcher;
63-
use std::{ops::Deref, sync::atomic::Ordering};
63+
use std::sync::atomic::Ordering;
6464

6565
// Internal representation for storing deferred outgoing consensus messages.
6666
struct StoredOutgoingMessage {
@@ -194,6 +194,8 @@ const ENGINE_VALIDATOR_CANDIDATE_ACTIONS: TimerToken = 4;
194194
// Check for current Phoenix Protocol phase
195195
const ENGINE_PHOENIX_CHECK: TimerToken = 5;
196196

197+
const HBBFT_CONNECTIVITY_TOKEN: TimerToken = 6;
198+
197199
impl TransitionHandler {
198200
fn handle_shutdown_on_missing_block_import(
199201
&self,
@@ -383,6 +385,12 @@ impl IoHandler<()> for TransitionHandler {
383385
.unwrap_or_else(
384386
|e| warn!(target: "consensus", "ENGINE_PHOENIX_CHECK Timer failed: {}.", e),
385387
);
388+
389+
// early epoch end connecitity token should be the same length then the max blocktime.
390+
io.register_timer(HBBFT_CONNECTIVITY_TOKEN, Duration::from_secs(300))
391+
.unwrap_or_else(
392+
|e| warn!(target: "consensus", "ENGINE_PHOENIX_CHECK Timer failed: {}.", e),
393+
);
386394
}
387395

388396
fn timeout(&self, io: &IoContext<()>, timer: TimerToken) {
@@ -472,6 +480,10 @@ impl IoHandler<()> for TransitionHandler {
472480
}
473481
} else if timer == ENGINE_PHOENIX_CHECK {
474482
self.engine.handle_phoenix_recovery_protocol();
483+
} else if timer == HBBFT_CONNECTIVITY_TOKEN {
484+
if let Err(err) = self.engine.do_validator_engine_early_epoch_end_actions() {
485+
error!(target: "consensus", "do_validator_engine_early_epoch_end_actions failed: {:?}", err);
486+
}
475487
}
476488
}
477489
}
@@ -884,7 +896,7 @@ impl HoneyBadgerBFT {
884896
self.hbbft_message_dispatcher.report_seal_bad(
885897
&sender_id,
886898
block_num,
887-
BadSealReason::ErrorTresholdSignStep,
899+
BadSealReason::ErrorThresholdSignStep,
888900
);
889901
}
890902
}
@@ -1224,102 +1236,135 @@ impl HoneyBadgerBFT {
12241236
return Ok(());
12251237
}
12261238

1227-
// If we have no signer there is nothing for us to send.
1228-
let mining_address = match self.signer.read().as_ref() {
1229-
Some(signer) => signer.address(),
1230-
None => {
1231-
// we do not have a signer on Full and RPC nodes.
1232-
// here is a possible performance improvement:
1233-
// this won't change during the lifetime of the application ?!
1234-
return Ok(());
1235-
}
1236-
};
1239+
self.hbbft_peers_service
1240+
.channel()
1241+
.send(HbbftConnectToPeersMessage::AnnounceAvailability)?;
12371242

1238-
let engine_client = client_arc.as_ref();
1239-
if let Err(err) = self
1240-
.hbbft_engine_cache
1241-
.lock()
1242-
.refresh_cache(mining_address, engine_client)
1243-
{
1244-
trace!(target: "engine", "do_validator_engine_actions: data could not get updated, follow up tasks might fail: {:?}", err);
1245-
}
1243+
self.hbbft_peers_service
1244+
.send_message(HbbftConnectToPeersMessage::AnnounceOwnInternetAddress)?;
12461245

1247-
let engine_client = client_arc.deref();
1246+
if self.should_connect_to_validator_set() {
1247+
// we just keep those variables here, because we need them in the early_epoch_end_manager.
1248+
// this is just an optimization, so we do not acquire the lock for that much time.
1249+
let mut validator_set: Vec<NodeId> = Vec::new();
12481250

1249-
let block_chain_client = match engine_client.as_full_client() {
1250-
Some(block_chain_client) => block_chain_client,
1251-
None => {
1252-
return Err("Unable to retrieve client.as_full_client()".into());
1253-
}
1254-
};
1251+
{
1252+
let hbbft_state_option =
1253+
self.hbbft_state.try_read_for(Duration::from_millis(250));
1254+
match hbbft_state_option {
1255+
Some(hbbft_state) => {
1256+
//hbbft_state.is_validator();
12551257

1256-
let should_connect_to_validator_set = self.should_connect_to_validator_set();
1257-
let mut should_handle_early_epoch_end = false;
1258-
1259-
// we just keep those variables here, because we need them in the early_epoch_end_manager.
1260-
// this is just an optimization, so we do not acquire the lock for that much time.
1261-
let mut validator_set: Vec<NodeId> = Vec::new();
1262-
let mut epoch_start_block: u64 = 0;
1263-
let mut epoch_num: u64 = 0;
1264-
1265-
{
1266-
let hbbft_state_option =
1267-
self.hbbft_state.try_read_for(Duration::from_millis(250));
1268-
match hbbft_state_option {
1269-
Some(hbbft_state) => {
1270-
should_handle_early_epoch_end = hbbft_state.is_validator();
1271-
1272-
// if we are a pending validator, we will also do the reserved peers management.
1273-
if should_handle_early_epoch_end {
1274-
// we already remember here stuff the early epoch manager needs,
1275-
// so we do not have to acquire the lock for that long.
1276-
epoch_num = hbbft_state.get_current_posdao_epoch();
1277-
epoch_start_block =
1278-
hbbft_state.get_current_posdao_epoch_start_block();
12791258
validator_set = hbbft_state.get_validator_set();
12801259
}
1281-
}
1282-
None => {
1283-
// maybe improve here, to return with a result, that triggers a retry soon.
1284-
debug!(target: "engine", "Unable to do_validator_engine_actions: Could not acquire read lock for hbbft state. Unable to decide about early epoch end. retrying soon.");
1285-
}
1286-
};
1287-
} // drop lock for hbbft_state
1260+
None => {
1261+
// maybe improve here, to return with a result, that triggers a retry soon.
1262+
debug!(target: "engine", "Unable to do_validator_engine_actions: Could not acquire read lock for hbbft state. Unable to decide about early epoch end. retrying soon.");
1263+
}
1264+
};
1265+
} // drop lock for hbbft_state
12881266

1289-
// if we do not have to do anything, we can return early.
1290-
if !(should_connect_to_validator_set || should_handle_early_epoch_end) {
1291-
return Ok(());
1267+
if !validator_set.is_empty() {
1268+
self.hbbft_peers_service.send_message(
1269+
HbbftConnectToPeersMessage::ConnectToCurrentPeers(validator_set),
1270+
)?;
1271+
}
12921272
}
12931273

1294-
self.hbbft_peers_service
1295-
.channel()
1296-
.send(HbbftConnectToPeersMessage::AnnounceAvailability)?;
1274+
self.do_keygen();
12971275

1298-
self.hbbft_peers_service
1299-
.send_message(HbbftConnectToPeersMessage::AnnounceOwnInternetAddress)?;
1276+
return Ok(());
1277+
}
13001278

1301-
if should_connect_to_validator_set {
1302-
self.hbbft_peers_service.send_message(
1303-
HbbftConnectToPeersMessage::ConnectToCurrentPeers(validator_set.clone()),
1304-
)?;
1305-
}
1279+
None => {
1280+
// client arc not ready yet,
1281+
// can happen during initialization and shutdown.
1282+
return Ok(());
1283+
}
1284+
}
1285+
}
13061286

1307-
if should_handle_early_epoch_end {
1308-
self.handle_early_epoch_end(
1309-
block_chain_client,
1310-
engine_client,
1311-
&mining_address,
1312-
epoch_start_block,
1313-
epoch_num,
1314-
&validator_set,
1315-
);
1287+
/// refreshes engine cache and returns the mining address.
1288+
fn refresh_engine_cache(&self, engine_client: &dyn EngineClient) -> Option<Address> {
1289+
let mining_address = match self.signer.read().as_ref() {
1290+
Some(signer) => signer.address(),
1291+
None => {
1292+
// we do not have a signer on Full and RPC nodes.
1293+
// here is a possible performance improvement:
1294+
// this won't change during the lifetime of the application ?!
1295+
return None;
1296+
}
1297+
};
1298+
1299+
if let Err(err) = self
1300+
.hbbft_engine_cache
1301+
.lock()
1302+
.refresh_cache(mining_address, engine_client)
1303+
{
1304+
warn!(target: "engine", "do_validator_engine_actions: data could not get updated, follow up tasks might fail: {:?}", err);
1305+
}
1306+
1307+
return Some(mining_address);
1308+
}
1309+
1310+
/// hbbft early epoch end actions are executed on a different timing than the regular validator engine steps
1311+
fn do_validator_engine_early_epoch_end_actions(&self) -> Result<(), Error> {
1312+
// here we need to differentiate the different engine functions,
1313+
// that requires different levels of access to the client.
1314+
trace!(target: "engine", "do_validator_engine_actions.");
1315+
match self.client_arc() {
1316+
Some(client_arc) => {
1317+
if self.is_syncing(&client_arc) {
1318+
// we are syncing - do not do anything.
1319+
trace!(target: "engine", "do_validator_engine_actions: skipping because we are syncing.");
1320+
return Ok(());
13161321
}
13171322

1318-
self.do_keygen();
1323+
let engine_client = client_arc.as_ref();
1324+
let mining_address = match self.refresh_engine_cache(engine_client) {
1325+
Some(h) => h,
1326+
None => return Ok(()),
1327+
};
1328+
1329+
let block_chain_client = match engine_client.as_full_client() {
1330+
Some(block_chain_client) => block_chain_client,
1331+
None => {
1332+
return Err("Unable to retrieve client.as_full_client()".into());
1333+
}
1334+
};
1335+
1336+
let hbbft_state_option = self.hbbft_state.try_read_for(Duration::from_millis(250));
1337+
match hbbft_state_option {
1338+
Some(hbbft_state) => {
1339+
if !hbbft_state.is_validator() {
1340+
// we are not known as validator, we dont have to further process early epoch end actions.
1341+
return Ok(());
1342+
}
1343+
1344+
// if we are a pending validator, we will also do the reserved peers management.
1345+
// we already remember here stuff the early epoch manager needs,
1346+
// so we do not have to acquire the lock for that long.
1347+
let epoch_num = hbbft_state.get_current_posdao_epoch();
1348+
let epoch_start_block = hbbft_state.get_current_posdao_epoch_start_block();
1349+
let validator_set = hbbft_state.get_validator_set();
1350+
1351+
self.handle_early_epoch_end(
1352+
block_chain_client,
1353+
engine_client,
1354+
&mining_address,
1355+
epoch_start_block,
1356+
epoch_num,
1357+
&validator_set,
1358+
);
1359+
}
1360+
None => {
1361+
// maybe improve here, to return with a result, that triggers a retry soon.
1362+
debug!(target: "engine", "Unable to do_validator_engine_early_epoch_end_actions: Could not acquire read lock for hbbft state. Unable to decide about early epoch end. retrying soon.");
1363+
}
1364+
};
13191365

13201366
return Ok(());
13211367
}
1322-
13231368
None => {
13241369
// client arc not ready yet,
13251370
// can happen during initialization and shutdown.
@@ -1417,16 +1462,21 @@ impl HoneyBadgerBFT {
14171462
}
14181463
}
14191464

1420-
/** returns if the signer of hbbft is tracked as available in the hbbft contracts..*/
1465+
/// returns if the signer of hbbft is tracked as available in the hbbft contracts.
14211466
pub fn is_available(&self) -> bool {
14221467
self.hbbft_engine_cache.lock().is_available()
14231468
}
14241469

1425-
/** returns if the signer of hbbft is stacked. */
1470+
/// returns if the signer of hbbft is stacked.
14261471
pub fn is_staked(&self) -> bool {
14271472
self.hbbft_engine_cache.lock().is_staked()
14281473
}
14291474

1475+
/// returns if the signer of hbbft is a current validator.
1476+
pub fn is_validator(&self) -> bool {
1477+
self.hbbft_state.read().is_validator()
1478+
}
1479+
14301480
fn start_hbbft_epoch_if_ready(&self) {
14311481
if let Some(client) = self.client_arc() {
14321482
if self.transaction_queue_and_time_thresholds_reached(&client) {

0 commit comments

Comments
 (0)