Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 84 additions & 39 deletions polkadot/node/network/collator-protocol/src/collator_side/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1276,61 +1276,85 @@ async fn handle_incoming_request<Context>(
Ok(())
}

/// Peer's view has changed. Send advertisements for new relay parents
/// if there're any.
/// Advertises collations for the given relay parents to the specified peer.
///
/// Returns a list of unknown relay parents.
#[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)]
async fn handle_peer_view_change<Context>(
async fn advertise_collations_for_relay_parents<Context>(
ctx: &mut Context,
state: &mut State,
peer_id: PeerId,
view: View,
) {
let Some(PeerData { view: current, unknown_heads }) = state.peer_data.get_mut(&peer_id) else {
return;
};

let added: Vec<Hash> = view.difference(&*current).cloned().collect();
peer_id: &PeerId,
relay_parents: impl IntoIterator<Item = Hash>,
) -> Vec<Hash> {
let mut unknown_relay_parents = Vec::new();

*current = view;

for added in added.into_iter() {
let block_hashes = match state.per_relay_parent.contains_key(&added) {
for relay_parent in relay_parents {
let block_hashes = match state.per_relay_parent.contains_key(&relay_parent) {
true => state
.implicit_view
.as_ref()
.and_then(|implicit_view| {
implicit_view.known_allowed_relay_parents_under(&added, state.collating_on)
implicit_view
.known_allowed_relay_parents_under(&relay_parent, state.collating_on)
})
.unwrap_or_default(),
false => {
gum::trace!(
target: LOG_TARGET,
?peer_id,
new_leaf = ?added,
"New leaf in peer's view is unknown",
);

unknown_heads.insert(added, ());

unknown_relay_parents.push(relay_parent);
continue;
},
};

for block_hash in block_hashes {
let Some(per_relay_parent) = state.per_relay_parent.get_mut(block_hash) else {
continue;
};
if let Some(per_relay_parent) = state.per_relay_parent.get_mut(block_hash) {
advertise_collation(
ctx,
*block_hash,
per_relay_parent,
peer_id,
&state.peer_ids,
&mut state.advertisement_timeouts,
&state.metrics,
)
.await;
}
}
}

advertise_collation(
ctx,
*block_hash,
per_relay_parent,
&peer_id,
&state.peer_ids,
&mut state.advertisement_timeouts,
&state.metrics,
)
.await;
unknown_relay_parents
}

/// Peer's view has changed. Send advertisements for new relay parents
/// if there're any.
#[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)]
async fn handle_peer_view_change<Context>(
ctx: &mut Context,
state: &mut State,
peer_id: PeerId,
view: View,
) {
let Some(added) = state.peer_data.get_mut(&peer_id).map(|peer| {
let diff: Vec<Hash> = view.difference(&peer.view).cloned().collect();
peer.view = view;
diff
}) else {
return;
};

let unknown_relay_parents =
advertise_collations_for_relay_parents(ctx, state, &peer_id, added).await;

if !unknown_relay_parents.is_empty() {
gum::trace!(
target: LOG_TARGET,
?peer_id,
new_leaves = ?unknown_relay_parents,
"New leaves in peer's view are unknown",
);

if let Some(PeerData { unknown_heads, .. }) = state.peer_data.get_mut(&peer_id) {
for unknown in unknown_relay_parents {
unknown_heads.insert(unknown, ());
}
}
}
}
Expand Down Expand Up @@ -1443,8 +1467,29 @@ async fn handle_network_msg<Context>(
UpdatedAuthorityIds(peer_id, authority_ids) => {
gum::trace!(target: LOG_TARGET, ?peer_id, ?authority_ids, "Updated authority ids");
if state.peer_data.contains_key(&peer_id) {
if state.peer_ids.insert(peer_id, authority_ids).is_none() {
let is_new_peer = state.peer_ids.insert(peer_id, authority_ids).is_none();

if is_new_peer {
declare(ctx, state, &peer_id).await;
} else {
// Authority IDs changed for an existing peer. Re-advertise collations
// for relay parents already in their view, as the previous authority IDs
// may not have matched our validator groups.
let relay_parents_in_view: Vec<_> = state
.peer_data
.get(&peer_id)
.map(|data| data.view.iter().cloned().collect())
.unwrap_or_default();

// Unknown relay parents are ignored because they were
// handled when the peer's view was first processed.
let _ = advertise_collations_for_relay_parents(
ctx,
state,
&peer_id,
relay_parents_in_view,
)
.await;
}
}
},
Expand Down
180 changes: 180 additions & 0 deletions polkadot/node/network/collator-protocol/src/collator_side/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2042,3 +2042,183 @@ fn connect_advertise_disconnect_three_backing_groups() {
},
);
}

/// Test that collations are re-advertised when a peer's authority IDs are updated
/// to match the validator group (when they previously didn't match).
///
/// This tests the fix for a race condition where:
/// 1. A peer connects with authority IDs that don't match the validator group
/// 2. A collation is distributed but can't be advertised (no authority ID match)
/// 3. The peer's view includes the relay parent
/// 4. UpdatedAuthorityIds arrives with new authority IDs that DO match
/// 5. The collation should now be advertised
#[test]
fn readvertise_collation_on_authority_id_update() {
let test_state = TestState::default();
let local_peer_id = test_state.local_peer_id;
let collator_pair = test_state.collator_pair.clone();

test_harness(
local_peer_id,
collator_pair,
ReputationAggregator::new(|_| true),
|mut test_harness| async move {
let virtual_overseer = &mut test_harness.virtual_overseer;

let peer = test_state.current_group_validator_peer_ids()[0];
// Use an unrelated authority ID that doesn't match the validator group
let unrelated_authority_id: AuthorityDiscoveryId = Sr25519Keyring::Eve.public().into();
// The actual authority ID that matches the validator group
let matching_authority_id =
test_state.current_group_validator_authority_ids()[0].clone();

overseer_send(virtual_overseer, CollatorProtocolMessage::ConnectToBackingGroups).await;

overseer_send(virtual_overseer, CollatorProtocolMessage::CollateOn(test_state.para_id))
.await;

update_view(
Some(test_state.current_group_validator_authority_ids()),
&test_state,
virtual_overseer,
vec![(test_state.relay_parent, 10)],
1,
)
.await;

// Connect peer with an authority ID that does NOT match the validator group
connect_peer(
virtual_overseer,
peer,
CollationVersion::V2,
Some(unrelated_authority_id),
)
.await;

// We still declare to the peer (declaration doesn't depend on validator group match)
expect_declare_msg(virtual_overseer, &test_state, &peer).await;

// Distribute a collation
let DistributeCollation { candidate, .. } = distribute_collation(
virtual_overseer,
test_state.current_group_validator_authority_ids(),
&test_state,
test_state.relay_parent,
CoreIndex(0),
)
.await;

// Send peer view change - peer is interested in the relay parent
send_peer_view_change(virtual_overseer, &peer, vec![test_state.relay_parent]).await;

// No advertisement should happen because the peer's authority ID (Eve)
// doesn't match any validator in the group for this collation
assert!(
overseer_recv_with_timeout(virtual_overseer, TIMEOUT).await.is_none(),
"Should not advertise to peer with non-matching authority ID"
);

// Now send UpdatedAuthorityIds with the correct authority ID
overseer_send(
virtual_overseer,
CollatorProtocolMessage::NetworkBridgeUpdate(
NetworkBridgeEvent::UpdatedAuthorityIds(
peer,
HashSet::from([matching_authority_id]),
),
),
)
.await;

// Now the collation should be advertised because the authority ID matches
expect_advertise_collation_msg(
virtual_overseer,
&[peer],
test_state.relay_parent,
vec![candidate.hash()],
)
.await;

test_harness
},
)
}

/// Test that UpdatedAuthorityIds for existing peers with unchanged matching authority IDs
/// doesn't cause duplicate advertisements (idempotency check).
#[test]
fn no_duplicate_advertisement_on_authority_id_update() {
let test_state = TestState::default();
let local_peer_id = test_state.local_peer_id;
let collator_pair = test_state.collator_pair.clone();

test_harness(
local_peer_id,
collator_pair,
ReputationAggregator::new(|_| true),
|mut test_harness| async move {
let virtual_overseer = &mut test_harness.virtual_overseer;

let peer = test_state.current_group_validator_peer_ids()[0];
let validator_id = test_state.current_group_validator_authority_ids()[0].clone();

overseer_send(virtual_overseer, CollatorProtocolMessage::ConnectToBackingGroups).await;

overseer_send(virtual_overseer, CollatorProtocolMessage::CollateOn(test_state.para_id))
.await;

update_view(
Some(test_state.current_group_validator_authority_ids()),
&test_state,
virtual_overseer,
vec![(test_state.relay_parent, 10)],
1,
)
.await;

// Connect peer with matching authority ID
connect_peer(virtual_overseer, peer, CollationVersion::V2, Some(validator_id.clone()))
.await;
expect_declare_msg(virtual_overseer, &test_state, &peer).await;

// Distribute a collation
let DistributeCollation { candidate, .. } = distribute_collation(
virtual_overseer,
test_state.current_group_validator_authority_ids(),
&test_state,
test_state.relay_parent,
CoreIndex(0),
)
.await;

// Peer view change triggers advertisement
send_peer_view_change(virtual_overseer, &peer, vec![test_state.relay_parent]).await;

// First advertisement
expect_advertise_collation_msg(
virtual_overseer,
&[peer],
test_state.relay_parent,
vec![candidate.hash()],
)
.await;

// Send UpdatedAuthorityIds with the same authority ID
overseer_send(
virtual_overseer,
CollatorProtocolMessage::NetworkBridgeUpdate(
NetworkBridgeEvent::UpdatedAuthorityIds(peer, HashSet::from([validator_id])),
),
)
.await;

// No duplicate advertisement should happen (already advertised to this validator)
assert!(
overseer_recv_with_timeout(virtual_overseer, TIMEOUT).await.is_none(),
"Should not re-advertise to peer that was already advertised to"
);

test_harness
},
)
}
25 changes: 25 additions & 0 deletions prdoc/pr_10891.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
title: 'collator-protocol: Re-advertise collations when peer authority IDs are updated'
doc:
- audience: Node Dev
description: |-
The collator protocol contained a race-condition which could manifest as "Collation wasn't advertised".

A given peer ("A") can connect before the new authority keys are received via `UpdatedAuthorityIds` (nk -- new key).

- T0: peer A connects`PeerConnected`
- T1: peer A sends its current view `PeerViewChange`
- Peer A wants the block N
- T2: `validator_group.should_advertise_to`: checks peer A for key nK (the new key)
- We don't have this key stored and therefore return `ShouldAdvertiseTo::NotAuthority`
- T3: `UpdatedAuthorityIds` arrives with (peer A, [nK])

At this point, we have the collation, peer A wants to collation, we know peer A is an authority but we never send the collation back. Then, the collation will expire with "Collation wasn't advertised".

To close the gap, the `UpdatedAuthorityIds` events will trigger a re-advertisement of collations
- note: if the advertisement was already sent, the logic does not resend it (achieved in should_advertise_to).

Part of the stabilization of:
- https://github.com/paritytech/polkadot-sdk/issues/10425
crates:
- name: polkadot-collator-protocol
bump: patch
13 changes: 13 additions & 0 deletions prdoc/pr_10950.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
title: 'fix(revive): handle transaction hash conflicts during re-org'
doc:
- audience: Runtime Dev
description: "## Summary\n\nFixes a UNIQUE constraint violation when processing\
\ blocks after a re-org:\n```\nUNIQUE constraint failed: transaction_hashes.transaction_hash\n\
```\n\n## Problem\n\nWhen a blockchain re-org occurs:\n1. Block A contains transaction\
\ TX1 \u2192 stored in `transaction_hashes`\n2. Server restarts (clearing the\
\ in-memory `block_number_to_hashes` map)\n3. Re-org happens, Block B (different\
\ hash) now contains the same TX1\n4. INSERT fails because TX1 already exists\
\ with old block_hash"
crates:
- name: pallet-revive-eth-rpc
bump: patch

This file was deleted.

Loading
Loading