Skip to content

Commit 3ed065a

Browse files
committed
chore(rtx-xdp): move init as early as possible in process start
1 parent 1154625 commit 3ed065a

File tree

6 files changed

+56
-46
lines changed

6 files changed

+56
-46
lines changed

core/src/validator.rs

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ use {
141141
solana_turbine::{
142142
self,
143143
broadcast_stage::BroadcastStageType,
144-
xdp::{master_ip_if_bonded, XdpConfig, XdpRetransmitBuilder, XdpRetransmitter},
144+
xdp::{XdpRetransmitBuilder, XdpRetransmitter},
145145
},
146146
solana_unified_scheduler_logic::SchedulingMode,
147147
solana_unified_scheduler_pool::{DefaultSchedulerPool, SupportedSchedulingMode},
@@ -151,7 +151,7 @@ use {
151151
std::{
152152
borrow::Cow,
153153
collections::{HashMap, HashSet},
154-
net::{IpAddr, SocketAddr},
154+
net::SocketAddr,
155155
num::{NonZeroU64, NonZeroUsize},
156156
path::{Path, PathBuf},
157157
str::FromStr,
@@ -412,7 +412,6 @@ pub struct ValidatorConfig {
412412
pub replay_transactions_threads: NonZeroUsize,
413413
pub tvu_shred_sigverify_threads: NonZeroUsize,
414414
pub delay_leader_block_for_pending_fork: bool,
415-
pub retransmit_xdp: Option<XdpConfig>,
416415
pub repair_handler_type: RepairHandlerType,
417416
}
418417

@@ -494,7 +493,6 @@ impl ValidatorConfig {
494493
tvu_shred_sigverify_threads: NonZeroUsize::new(get_thread_count())
495494
.expect("thread count is non-zero"),
496495
delay_leader_block_for_pending_fork: false,
497-
retransmit_xdp: None,
498496
repair_handler_type: RepairHandlerType::default(),
499497
}
500498
}
@@ -701,6 +699,7 @@ impl Validator {
701699
socket_addr_space: SocketAddrSpace,
702700
tpu_config: ValidatorTpuConfig,
703701
admin_rpc_service_post_init: Arc<RwLock<Option<AdminRpcRequestMetadataPostInit>>>,
702+
maybe_xdp_retransmit_builder: Option<XdpRetransmitBuilder>,
704703
) -> Result<Self> {
705704
#[cfg(debug_assertions)]
706705
const DEBUG_ASSERTION_STATUS: &str = "enabled";
@@ -1543,28 +1542,13 @@ impl Validator {
15431542
)
15441543
});
15451544

1546-
let (xdp_retransmitter, xdp_sender) = if let Some(xdp_config) =
1547-
config.retransmit_xdp.clone()
1548-
{
1549-
let src_port = node.sockets.retransmit_sockets[0]
1550-
.local_addr()
1551-
.expect("failed to get local address")
1552-
.port();
1553-
let src_ip = match node.bind_ip_addrs.active() {
1554-
IpAddr::V4(ip) if !ip.is_unspecified() => Some(ip),
1555-
IpAddr::V4(_unspecified) => xdp_config
1556-
.interface
1557-
.as_ref()
1558-
.and_then(|iface| master_ip_if_bonded(iface)),
1559-
_ => panic!("IPv6 not supported"),
1545+
let (xdp_retransmitter, xdp_sender) =
1546+
if let Some(xdp_retransmit_builder) = maybe_xdp_retransmit_builder {
1547+
let (rtx, sender) = xdp_retransmit_builder.build(exit.clone());
1548+
(Some(rtx), Some(sender))
1549+
} else {
1550+
(None, None)
15601551
};
1561-
let xdp_retransmit_builder = XdpRetransmitBuilder::new(xdp_config, src_port, src_ip)
1562-
.expect("failed to create xdp retransmitter");
1563-
let (rtx, sender) = xdp_retransmit_builder.build(exit.clone());
1564-
(Some(rtx), Some(sender))
1565-
} else {
1566-
(None, None)
1567-
};
15681552

15691553
// disable all2all tests if not allowed for a given cluster type
15701554
let alpenglow_socket = if genesis_config.cluster_type == ClusterType::Testnet
@@ -2968,6 +2952,7 @@ mod tests {
29682952
SocketAddrSpace::Unspecified,
29692953
ValidatorTpuConfig::new_for_tests(),
29702954
Arc::new(RwLock::new(None)),
2955+
None,
29712956
)
29722957
.expect("assume successful validator start");
29732958
assert_eq!(
@@ -3182,6 +3167,7 @@ mod tests {
31823167
SocketAddrSpace::Unspecified,
31833168
ValidatorTpuConfig::new_for_tests(),
31843169
Arc::new(RwLock::new(None)),
3170+
None,
31853171
)
31863172
.expect("assume successful validator start")
31873173
})

local-cluster/src/local_cluster.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ impl LocalCluster {
425425
// to use the same QUIC ports due to SO_REUSEPORT.
426426
ValidatorTpuConfig::new_for_tests(),
427427
Arc::new(RwLock::new(None)),
428+
None,
428429
)
429430
.expect("assume successful validator start");
430431

@@ -636,6 +637,7 @@ impl LocalCluster {
636637
socket_addr_space,
637638
ValidatorTpuConfig::new_for_tests(),
638639
Arc::new(RwLock::new(None)),
640+
None,
639641
)
640642
.expect("assume successful validator start");
641643

@@ -702,6 +704,7 @@ impl LocalCluster {
702704
socket_addr_space,
703705
ValidatorTpuConfig::new_for_tests(),
704706
Arc::new(RwLock::new(None)),
707+
None,
705708
)
706709
.unwrap_or_else(|e| panic!("Cluster leader failed to start: {e:?}"));
707710

@@ -768,6 +771,7 @@ impl LocalCluster {
768771
socket_addr_space,
769772
ValidatorTpuConfig::new_for_tests(),
770773
Arc::new(RwLock::new(None)),
774+
None,
771775
)
772776
.unwrap_or_else(|e| panic!("Validator {i} failed to start: {e:?}"));
773777

@@ -1321,6 +1325,7 @@ impl Cluster for LocalCluster {
13211325
socket_addr_space,
13221326
ValidatorTpuConfig::new_for_tests(),
13231327
Arc::new(RwLock::new(None)),
1328+
None,
13241329
)
13251330
.expect("assume successful validator start");
13261331
cluster_validator_info.validator = Some(restarted_node);

local-cluster/src/validator_configs.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig {
7979
replay_transactions_threads: config.replay_transactions_threads,
8080
tvu_shred_sigverify_threads: config.tvu_shred_sigverify_threads,
8181
delay_leader_block_for_pending_fork: config.delay_leader_block_for_pending_fork,
82-
retransmit_xdp: config.retransmit_xdp.clone(),
8382
repair_handler_type: config.repair_handler_type.clone(),
8483
}
8584
}

test-validator/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,6 +1227,7 @@ impl TestValidator {
12271227
socket_addr_space,
12281228
ValidatorTpuConfig::new_for_tests(),
12291229
config.admin_rpc_service_post_init.clone(),
1230+
None,
12301231
)?);
12311232

12321233
let test_validator = TestValidator {

validator/src/admin_rpc_service.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,6 +1618,7 @@ mod tests {
16181618
SocketAddrSpace::Unspecified,
16191619
ValidatorTpuConfig::new_for_tests(),
16201620
post_init.clone(),
1621+
None,
16211622
)
16221623
.expect("assume successful validator start");
16231624
assert_eq!(

validator/src/commands/run/execute.rs

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ use {
6666
},
6767
solana_turbine::{
6868
broadcast_stage::BroadcastStageType,
69-
xdp::{set_cpu_affinity, XdpConfig},
69+
xdp::{master_ip_if_bonded, set_cpu_affinity, XdpConfig, XdpRetransmitBuilder},
7070
},
7171
solana_validator_exit::Exit,
7272
std::{
@@ -269,6 +269,42 @@ pub fn execute(
269269

270270
let mut node = Node::new_with_external_ip(&identity_keypair.pubkey(), node_config);
271271

272+
// XDP _MUST_ be setup _BEFORE_ the app spawns any threads to ensure linux
273+
// capabilities do not leak, leaving the process in a state where it could
274+
// potentially be used as a privilege escalation gadget
275+
let maybe_xdp_retransmit_builder = retransmit_xdp.clone().map(|xdp_config| {
276+
let src_port = node.sockets.retransmit_sockets[0]
277+
.local_addr()
278+
.expect("failed to get local address")
279+
.port();
280+
let src_ip = match node.bind_ip_addrs.active() {
281+
IpAddr::V4(ip) if !ip.is_unspecified() => Some(ip),
282+
IpAddr::V4(_unspecified) => xdp_config
283+
.interface
284+
.as_ref()
285+
.and_then(|iface| master_ip_if_bonded(iface)),
286+
_ => panic!("IPv6 not supported"),
287+
};
288+
XdpRetransmitBuilder::new(xdp_config, src_port, src_ip)
289+
.expect("failed to create xdp retransmitter")
290+
});
291+
292+
let reserved = retransmit_xdp
293+
.map(|xdp| xdp.cpus.clone())
294+
.unwrap_or_default()
295+
.iter()
296+
.cloned()
297+
.collect::<HashSet<_>>();
298+
if !reserved.is_empty() {
299+
let available = core_affinity::get_core_ids()
300+
.unwrap_or_default()
301+
.into_iter()
302+
.map(|core_id| core_id.id)
303+
.collect::<HashSet<_>>();
304+
let available = available.difference(&reserved);
305+
set_cpu_affinity(available.into_iter().copied()).unwrap();
306+
}
307+
272308
solana_core::validator::report_target_features();
273309

274310
let authorized_voter_keypairs = keypairs_of(matches, "authorized_voter_keypairs")
@@ -714,7 +750,6 @@ pub fn execute(
714750
wen_restart_proto_path: value_t!(matches, "wen_restart", PathBuf).ok(),
715751
wen_restart_coordinator: value_t!(matches, "wen_restart_coordinator", Pubkey).ok(),
716752
turbine_disabled: Arc::<AtomicBool>::default(),
717-
retransmit_xdp,
718753
broadcast_stage_type: BroadcastStageType::Standard,
719754
block_verification_method: value_t_or_exit!(
720755
matches,
@@ -751,24 +786,6 @@ pub fn execute(
751786
.into(),
752787
};
753788

754-
let reserved = validator_config
755-
.retransmit_xdp
756-
.as_ref()
757-
.map(|xdp| xdp.cpus.clone())
758-
.unwrap_or_default()
759-
.iter()
760-
.cloned()
761-
.collect::<HashSet<_>>();
762-
if !reserved.is_empty() {
763-
let available = core_affinity::get_core_ids()
764-
.unwrap_or_default()
765-
.into_iter()
766-
.map(|core_id| core_id.id)
767-
.collect::<HashSet<_>>();
768-
let available = available.difference(&reserved);
769-
set_cpu_affinity(available.into_iter().copied()).unwrap();
770-
}
771-
772789
let vote_account = pubkey_of(matches, "vote_account").unwrap_or_else(|| {
773790
if !validator_config.voting_disabled {
774791
warn!("--vote-account not specified, validator will not vote");
@@ -1031,6 +1048,7 @@ pub fn execute(
10311048
vote_quic_server_config,
10321049
},
10331050
admin_service_post_init,
1051+
maybe_xdp_retransmit_builder,
10341052
) {
10351053
Ok(validator) => Ok(validator),
10361054
Err(err) => {

0 commit comments

Comments
 (0)