Skip to content

Commit ef495b8

Browse files
authored
mdns/fix: Failed to register opened substream (#301)
This PR ensures that when MDNS encounters an error it does not terminate other litep2p components. Previously, if MDNS failed to send a query or to handle the incoming packets it would exit. The exit is presented by the following log line observed on kusama validator: ``` tokio-runtime-worker litep2p::mdns: failed to send mdns query error=IoError(NetworkUnreachable) ``` This situation is causing the substrate Discovery mechanism to also exit, which propagates to the litep2p kademlia handler that exits as well. This leaves the node unable to discover the network or handle incoming substreams. ### Testing Done The issue was reproduced locally with a tokio interval patch that exits the MDNS component after having connectivity in Kusama: ``` 2024-12-11 12:50:34.425 ERROR tokio-runtime-worker litep2p::mdns: interval tick MDNS 2024-12-11 12:50:34.425 ERROR tokio-runtime-worker litep2p::mdns: interval tick expired, closing MDNS 2024-12-11 12:50:35.111 ERROR tokio-runtime-worker litep2p::tcp::connection: failed to register opened substream to protocol protocol=Allocated("/b0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe/kad") peer=PeerId("12D3KooWEwh9AwKFUJKPFqmJXWByH7JKYRcfAUfPvp9f3xzj3ibJ") endpoint=Dialer { address: "/ip4/3.96.91.180/tcp/30333", connection_id: ConnectionId(200) } error=ConnectionClosed ... 2024-12-11 12:50:38.753 ERROR tokio-runtime-worker litep2p::tcp::connection: failed to register opened substream to protocol protocol=Allocated("/b0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe/kad") peer=PeerId("12D3KooWJb1W7jmqDCaU3Hsh6NRfDo12gnj8hnKfGwA77vRE4jBv") endpoint=Dialer { address: "/ip4/51.38.63.126/tcp/30333", connection_id: ConnectionId(294) } error=ConnectionClosed 2024-12-11 12:50:40.389 ERROR tokio-runtime-worker litep2p::tcp::connection: failed to register opened substream to protocol protocol=Allocated("/b0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe/kad") peer=PeerId("12D3KooWGXXuap75AN24aA5XP9S1X3BKqdDbYyHwBTJakMyv1P5V") endpoint=Dialer { address: "/ip4/104.243.41.217/tcp/30330", connection_id: ConnectionId(29) } error=ConnectionClosed ... 2024-12-11 12:53:15.690 ERROR tokio-runtime-worker litep2p::tcp: connection exited with error connection_id=ConnectionId(29) error=EssentialTaskClosed 2024-12-11 12:53:40.071 ERROR tokio-runtime-worker litep2p::tcp::connection: failed to register opened substream to protocol protocol=Allocated("/b0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe/kad") peer=PeerId("12D3KooWGphqiEqsfR5ZnV7R2Lgubxi7eAo6MTx3tVmso8oCkvJn") endpoint=Dialer { address: "/ip4/51.163.1.153/tcp/30003", connection_id: ConnectionId(51) } error=ConnectionClosed 2024-12-11 12:53:40.233 ERROR tokio-runtime-worker litep2p::tcp::connection: failed to register opened substream to protocol protocol=Allocated("/b0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe/kad") peer=PeerId("12D3KooWM5mnupyiDGtdN6qm3riQDjBbAZfFqAJfMbcbPQbkEn8u") endpoint=Dialer { address: "/ip4/168.119.149.170/tcp/30333", connection_id: ConnectionId(28) } error=ConnectionClosed 2024-12-11 12:53:41.060 ERROR tokio-runtime-worker litep2p::tcp::connection: failed to register opened substream to protocol protocol=Allocated("/b0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe/kad") peer=PeerId("12D3KooWGphqiEqsfR5ZnV7R2Lgubxi7eAo6MTx3tVmso8oCkvJn") endpoint=Dialer { address: "/ip4/51.163.1.153/tcp/30003", connection_id: ConnectionId(51) } error=ConnectionClosed 2024-12-11 12:53:42.766 ERROR tokio-runtime-worker litep2p::tcp::connection: failed to register opened substream to protocol protocol=Allocated("/b0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe/kad") peer=PeerId("12D3KooWM5mnupyiDGtdN6qm3riQDjBbAZfFqAJfMbcbPQbkEn8u") endpoint=Dialer { address: "/ip4/168.119.149.170/tcp/30333", connection_id: ConnectionId(28) } error=ConnectionClosed ``` Closes: #300 Thanks @dmitry-markin for also confirming this 🙏 cc @paritytech/networking --------- Signed-off-by: Alexandru Vasile <[email protected]>
1 parent e9a009f commit ef495b8

File tree

1 file changed

+15
-16
lines changed

1 file changed

+15
-16
lines changed

src/protocol/mdns.rs

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
//! [Multicast DNS](https://en.wikipedia.org/wiki/Multicast_DNS) implementation.
2323
24-
use crate::{error::Error, transport::manager::TransportManagerHandle, DEFAULT_CHANNEL_SIZE};
24+
use crate::{transport::manager::TransportManagerHandle, DEFAULT_CHANNEL_SIZE};
2525

2626
use futures::Stream;
2727
use multiaddr::Multiaddr;
@@ -95,7 +95,7 @@ pub(crate) struct Mdns {
9595
socket: UdpSocket,
9696

9797
/// Query interval.
98-
query_interval: Duration,
98+
query_interval: tokio::time::Interval,
9999

100100
/// TX channel for sending events to user.
101101
event_tx: Sender<MdnsEvent>,
@@ -138,12 +138,15 @@ impl Mdns {
138138
socket.join_multicast_v4(&IPV4_MULTICAST_ADDRESS, &Ipv4Addr::UNSPECIFIED)?;
139139
socket.set_nonblocking(true)?;
140140

141+
let mut query_interval = tokio::time::interval(config.query_interval);
142+
query_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
143+
141144
Ok(Self {
142145
_transport_handle,
143146
event_tx: config.tx,
144147
next_query_id: 1337u16,
145148
discovered: HashSet::new(),
146-
query_interval: config.query_interval,
149+
query_interval,
147150
receive_buffer: vec![0u8; 4096],
148151
username: rand::thread_rng()
149152
.sample_iter(&Alphanumeric)
@@ -276,24 +279,19 @@ impl Mdns {
276279
}
277280

278281
/// Event loop for [`Mdns`].
279-
pub(crate) async fn start(mut self) -> crate::Result<()> {
282+
pub(crate) async fn start(mut self) {
280283
tracing::debug!(target: LOG_TARGET, "starting mdns event loop");
281284

282-
// before starting the loop, make an initial query to the network
283-
//
284-
// bail early if the socket is not working
285-
self.on_outbound_request().await?;
286-
287285
loop {
288286
tokio::select! {
289-
_ = tokio::time::sleep(self.query_interval) => {
290-
tracing::trace!(target: LOG_TARGET, "timeout expired");
287+
_ = self.query_interval.tick() => {
288+
tracing::trace!(target: LOG_TARGET, "query interval ticked");
291289

292290
if let Err(error) = self.on_outbound_request().await {
293291
tracing::error!(target: LOG_TARGET, ?error, "failed to send mdns query");
294-
return Err(error);
295292
}
296-
}
293+
},
294+
297295
result = self.socket.recv_from(&mut self.receive_buffer) => match result {
298296
Ok((nread, address)) => match Packet::parse(&self.receive_buffer[..nread]) {
299297
Ok(packet) => match packet.has_flags(PacketFlag::RESPONSE) {
@@ -308,9 +306,11 @@ impl Mdns {
308306
}
309307
}
310308
false => if let Some(response) = self.on_inbound_request(packet) {
311-
self.socket
309+
if let Err(error) = self.socket
312310
.send_to(&response, (IPV4_MULTICAST_ADDRESS, IPV4_MULTICAST_PORT))
313-
.await?;
311+
.await {
312+
tracing::error!(target: LOG_TARGET, ?error, "failed to send mdns response");
313+
}
314314
}
315315
}
316316
Err(error) => tracing::debug!(
@@ -323,7 +323,6 @@ impl Mdns {
323323
}
324324
Err(error) => {
325325
tracing::error!(target: LOG_TARGET, ?error, "failed to read from socket");
326-
return Err(Error::from(error));
327326
}
328327
},
329328
}

0 commit comments

Comments
 (0)