Skip to content

Do not let gossip start with unset shred version #5837

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions gossip/src/cluster_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -504,9 +504,7 @@ impl ClusterInfo {
.rpc()
.filter(|addr| self.socket_addr_space.check(addr))?;
let node_version = self.get_node_version(node.pubkey());
if my_shred_version != 0
&& (node.shred_version() != 0 && node.shred_version() != my_shred_version)
{
if node.shred_version() != 0 && node.shred_version() != my_shred_version {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can node shred_version be ever zero in the first place?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not anymore since we switched spy to run on a valid shred version

return None;
}
let rpc_addr = node_rpc.ip();
Expand Down Expand Up @@ -561,7 +559,7 @@ impl ClusterInfo {
}

let node_version = self.get_node_version(node.pubkey());
if my_shred_version != 0 && (node.shred_version() != 0 && node.shred_version() != my_shred_version) {
if node.shred_version() != 0 && node.shred_version() != my_shred_version {
different_shred_nodes = different_shred_nodes.saturating_add(1);
None
} else {
Expand Down Expand Up @@ -1974,7 +1972,7 @@ impl ClusterInfo {
let self_pubkey = self.id();
// Filter out values if the shred-versions are different.
let self_shred_version = self.my_shred_version();
if self_shred_version != 0 {
{
let gossip_crds = self.gossip.crds.read().unwrap();
let discard_different_shred_version = |msg| {
discard_different_shred_version(msg, self_shred_version, &gossip_crds, &self.stats)
Expand Down
29 changes: 5 additions & 24 deletions validator/src/bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,15 @@ fn start_gossip_node(
ledger_path: &Path,
gossip_addr: &SocketAddr,
gossip_socket: UdpSocket,
expected_shred_version: Option<u16>,
expected_shred_version: u16,
gossip_validators: Option<HashSet<Pubkey>>,
should_check_duplicate_instance: bool,
socket_addr_space: SocketAddrSpace,
) -> (Arc<ClusterInfo>, Arc<AtomicBool>, GossipService) {
let contact_info = ClusterInfo::gossip_contact_info(
identity_keypair.pubkey(),
*gossip_addr,
expected_shred_version.unwrap_or(0),
expected_shred_version,
);
let mut cluster_info = ClusterInfo::new(contact_info, identity_keypair, socket_addr_space);
cluster_info.set_entrypoints(cluster_entrypoints.to_vec());
Expand All @@ -173,7 +173,6 @@ fn start_gossip_node(

fn get_rpc_peers(
cluster_info: &ClusterInfo,
cluster_entrypoints: &[ContactInfo],
validator_config: &ValidatorConfig,
blacklisted_rpc_nodes: &mut HashSet<Pubkey>,
blacklist_timeout: &Instant,
Expand All @@ -183,21 +182,6 @@ fn get_rpc_peers(
let shred_version = validator_config
.expected_shred_version
.unwrap_or_else(|| cluster_info.my_shred_version());
if shred_version == 0 {
let all_zero_shred_versions = cluster_entrypoints.iter().all(|cluster_entrypoint| {
cluster_entrypoint
.gossip()
.and_then(|addr| cluster_info.lookup_contact_info_by_gossip_addr(&addr))
.is_some_and(|entrypoint| entrypoint.shred_version() == 0)
});

if all_zero_shred_versions {
eprintln!("Entrypoint shred version is zero. Restart with --expected-shred-version");
exit(1);
}
info!("Waiting to adopt entrypoint shred version...");
return vec![];
}

info!(
"Searching for an RPC service with shred version {shred_version}{}...",
Expand Down Expand Up @@ -473,15 +457,13 @@ fn ping(addr: &SocketAddr) -> Option<Duration> {
fn get_vetted_rpc_nodes(
vetted_rpc_nodes: &mut Vec<(ContactInfo, Option<SnapshotHash>, RpcClient)>,
cluster_info: &Arc<ClusterInfo>,
cluster_entrypoints: &[ContactInfo],
validator_config: &ValidatorConfig,
blacklisted_rpc_nodes: &mut HashSet<Pubkey>,
bootstrap_config: &RpcBootstrapConfig,
) {
while vetted_rpc_nodes.is_empty() {
let rpc_node_details = match get_rpc_nodes(
cluster_info,
cluster_entrypoints,
validator_config,
blacklisted_rpc_nodes,
bootstrap_config,
Expand Down Expand Up @@ -631,7 +613,9 @@ pub fn rpc_bootstrap(
.gossip()
.expect("Operator must spin up node with valid gossip address"),
node.sockets.gossip.try_clone().unwrap(),
validator_config.expected_shred_version,
validator_config
.expected_shred_version
.expect("expected_shred_version should not be None"),
validator_config.gossip_validators.clone(),
should_check_duplicate_instance,
socket_addr_space,
Expand All @@ -642,7 +626,6 @@ pub fn rpc_bootstrap(
get_vetted_rpc_nodes(
&mut vetted_rpc_nodes,
&gossip.as_ref().unwrap().0,
cluster_entrypoints,
validator_config,
&mut blacklisted_rpc_nodes,
&bootstrap_config,
Expand Down Expand Up @@ -712,7 +695,6 @@ pub fn rpc_bootstrap(
/// This function finds the highest compatible snapshots from the cluster and returns RPC peers.
fn get_rpc_nodes(
cluster_info: &ClusterInfo,
cluster_entrypoints: &[ContactInfo],
validator_config: &ValidatorConfig,
blacklisted_rpc_nodes: &mut HashSet<Pubkey>,
bootstrap_config: &RpcBootstrapConfig,
Expand All @@ -728,7 +710,6 @@ fn get_rpc_nodes(

let rpc_peers = get_rpc_peers(
cluster_info,
cluster_entrypoints,
validator_config,
blacklisted_rpc_nodes,
&blacklist_timeout,
Expand Down
Loading