Skip to content

Commit e77a824

Browse files
fix(self-shutdown): abort frozen ios when unsharing shutdown nexus
These frozen IOs prevent the nexus from shutting down. We don't have any hooks today to do this whilsts the target is stopping so we add a simple loop which tries a number of times. Ideally we should get some sort of callback to trigger this. Signed-off-by: Tiago Castro <[email protected]>
1 parent 88d1cc3 commit e77a824

File tree

5 files changed

+142
-17
lines changed

5 files changed

+142
-17
lines changed

io-engine/src/bdev/nexus/nexus_bdev.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,18 @@ impl<'n> Nexus<'n> {
11141114
Ok(())
11151115
}
11161116

1117+
/// Aborts all frozen IOs of a shutdown Nexus.
1118+
/// # Warning
1119+
/// These aborts may translate into I/O errors for the initiator.
1120+
pub async fn abort_shutdown_frozen_ios(&self) {
1121+
if self.status() == NexusStatus::Shutdown {
1122+
self.traverse_io_channels_async((), |channel, _| {
1123+
channel.abort_frozen();
1124+
})
1125+
.await;
1126+
}
1127+
}
1128+
11171129
/// Suspend any incoming IO to the bdev pausing the controller allows us to
11181130
/// handle internal events and which is a protocol feature.
11191131
/// In case concurrent pause requests take place, the other callers

io-engine/src/bdev/nexus/nexus_channel.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ impl<'n> Debug for NexusChannel<'n> {
3232
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
3333
write!(
3434
f,
35-
"{io} chan '{nex}' core:{core}({cur}) [R:{r} W:{w} D:{d} L:{l} C:{c}]",
35+
"{io} chan '{nex}' core:{core}({cur}) [R:{r} W:{w} D:{d} L:{l} C:{c} F:{f}]",
3636
io = if self.is_io_chan { "I/O" } else { "Aux" },
3737
nex = self.nexus.nexus_name(),
3838
core = self.core,
@@ -42,6 +42,7 @@ impl<'n> Debug for NexusChannel<'n> {
4242
d = self.detached.len(),
4343
l = self.io_logs.len(),
4444
c = self.nexus.child_count(),
45+
f = self.frozen_ios.len()
4546
)
4647
}
4748
}

io-engine/src/bdev/nexus/nexus_share.rs

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
use crate::bdev::PtplFileOps;
1+
use super::{nexus_err, nexus_lookup, Error, NbdDisk, Nexus, NexusTarget};
2+
use crate::{
3+
bdev::PtplFileOps,
4+
core::{NvmfShareProps, Protocol, PtplProps, Reactors, Share, UpdateProps},
5+
sleep::mayastor_sleep,
6+
};
27
use async_trait::async_trait;
8+
use futures::{channel::oneshot, future::FusedFuture};
39
use snafu::ResultExt;
4-
use std::pin::Pin;
5-
6-
use super::{nexus_err, Error, NbdDisk, Nexus, NexusTarget};
7-
8-
use crate::core::{NvmfShareProps, Protocol, PtplProps, Share, UpdateProps};
10+
use std::{pin::Pin, time::Duration};
911

1012
///
1113
/// The sharing of the nexus is different compared to regular bdevs
@@ -78,6 +80,31 @@ impl<'n> Share for Nexus<'n> {
7880
async fn unshare(mut self: Pin<&mut Self>) -> Result<(), Self::Error> {
7981
info!("{:?}: unsharing nexus bdev...", self);
8082

83+
// TODO: we should not allow new initiator connections for a shutdown
84+
// nexus!
85+
86+
// TODO: we may want to disable the freeze at this point instead,
87+
// allowing new I/Os to fail "normally"
88+
let name = self.name.clone();
89+
let (_s, r) = oneshot::channel::<()>();
90+
Reactors::master().send_future(async move {
91+
for _ in 0 ..= 10 {
92+
mayastor_sleep(Duration::from_secs(2)).await.ok();
93+
if r.is_terminated() {
94+
// This means the unshare is complete, so nothing to do here
95+
return;
96+
}
97+
// If we're not unshared, then abort any I/Os that may have
98+
// reached
99+
if let Some(nexus) = nexus_lookup(&name) {
100+
nexus.abort_shutdown_frozen_ios().await;
101+
}
102+
}
103+
});
104+
105+
// Aborts frozen I/Os a priori
106+
self.abort_shutdown_frozen_ios().await;
107+
81108
let name = self.name.clone();
82109
self.as_mut().pin_bdev_mut().unshare().await.context(
83110
nexus_err::UnshareNexus {

io-engine/tests/persistence.rs

Lines changed: 74 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::common::fio_run_verify;
1+
use crate::common::{dd_random_file, fio_run_verify};
22
use common::compose::{
33
rpc::v0::{
44
mayastor::{
@@ -347,11 +347,80 @@ async fn persistent_store_connection() {
347347
assert!(get_nexus(ms1, nexus_uuid).await.is_some());
348348
}
349349

350+
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
351+
async fn self_shutdown_destroy() {
352+
let test = start_infrastructure_("self_shutdown_destroy", Some("1")).await;
353+
let grpc = GrpcConnect::new(&test);
354+
let ms1 = &mut grpc.grpc_handle("ms1").await.unwrap();
355+
let ms2 = &mut grpc.grpc_handle("ms2").await.unwrap();
356+
let ms3 = &mut grpc.grpc_handle("ms3").await.unwrap();
357+
358+
// Create bdevs and share over nvmf.
359+
let child1 = create_and_share_bdevs(ms2, CHILD1_UUID).await;
360+
let child2 = create_and_share_bdevs(ms3, CHILD2_UUID).await;
361+
362+
// Create and publish a nexus.
363+
let nexus_uuid = "8272e9d3-3738-4e33-b8c3-769d8eed5771";
364+
create_nexus(ms1, nexus_uuid, vec![child1.clone(), child2.clone()]).await;
365+
let nexus_uri = publish_nexus(ms1, nexus_uuid).await;
366+
367+
// Create and connect NVMF target.
368+
let target = libnvme_rs::NvmeTarget::try_from(nexus_uri.clone())
369+
.unwrap()
370+
.with_reconnect_delay(Some(1))
371+
.ctrl_loss_timeout(Some(1))
372+
.with_rand_hostnqn(true);
373+
target.connect().unwrap();
374+
375+
// simulate node with child and etcd going down
376+
test.stop("etcd").await.unwrap();
377+
test.stop("ms3").await.unwrap();
378+
379+
// allow pstor to timeout and self shutdown
380+
// todo: use wait loop
381+
tokio::time::sleep(Duration::from_secs(2)).await;
382+
383+
let nexus = get_nexus(ms1, nexus_uuid).await.unwrap();
384+
assert_eq!(nexus.state, NexusState::NexusShutdown as i32);
385+
386+
let devices = target.block_devices(2).unwrap();
387+
let fio_hdl = tokio::spawn(async move {
388+
dd_random_file(&devices[0].to_string(), 4096, 1)
389+
});
390+
391+
test.start("etcd").await.unwrap();
392+
393+
ms1.mayastor
394+
.destroy_nexus(DestroyNexusRequest {
395+
uuid: nexus_uuid.to_string(),
396+
})
397+
.await
398+
.expect("Failed to destroy nexus");
399+
400+
// Disconnect NVMF target
401+
target.disconnect().unwrap();
402+
403+
fio_hdl.await.unwrap();
404+
}
405+
350406
/// Start the containers for the tests.
351407
async fn start_infrastructure(test_name: &str) -> ComposeTest {
408+
start_infrastructure_(test_name, None).await
409+
}
410+
411+
/// Start the containers for the tests.
412+
async fn start_infrastructure_(
413+
test_name: &str,
414+
ps_retries: Option<&str>,
415+
) -> ComposeTest {
352416
common::composer_init();
353417

354418
let etcd_endpoint = format!("http://etcd.{test_name}:2379");
419+
let mut args = vec!["-p", &etcd_endpoint];
420+
if let Some(retries) = ps_retries {
421+
args.extend(["--ps-retries", retries]);
422+
}
423+
355424
Builder::new()
356425
.name(test_name)
357426
.add_container_spec(
@@ -371,20 +440,17 @@ async fn start_infrastructure(test_name: &str) -> ComposeTest {
371440
)
372441
.add_container_bin(
373442
"ms1",
374-
Binary::from_dbg("io-engine").with_args(vec!["-p", &etcd_endpoint]),
443+
Binary::from_dbg("io-engine").with_args(args.clone()),
375444
)
376445
.add_container_bin(
377446
"ms2",
378-
Binary::from_dbg("io-engine").with_args(vec!["-p", &etcd_endpoint]),
447+
Binary::from_dbg("io-engine").with_args(args.clone()),
379448
)
380449
.add_container_bin(
381450
"ms3",
382-
Binary::from_dbg("io-engine").with_args(vec!["-p", &etcd_endpoint]),
383-
)
384-
.add_container_bin(
385-
"ms4",
386-
Binary::from_dbg("io-engine").with_args(vec!["-p", &etcd_endpoint]),
451+
Binary::from_dbg("io-engine").with_args(args.clone()),
387452
)
453+
.add_container_bin("ms4", Binary::from_dbg("io-engine").with_args(args))
388454
.build()
389455
.await
390456
.unwrap()

libnvme-rs/src/nvme_uri.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ pub struct NvmeTarget {
6464
trtype: NvmeTransportType,
6565
/// Auto-Generate random HostNqn.
6666
hostnqn_autogen: bool,
67+
/// The Reconnect Delay.
68+
reconnect_delay: Option<u8>,
69+
/// The Controller Loss Timeout.
70+
ctrl_loss_timeout: Option<u32>,
6771
}
6872

6973
impl TryFrom<String> for NvmeTarget {
@@ -117,6 +121,8 @@ impl TryFrom<&str> for NvmeTarget {
117121
trsvcid: url.port().unwrap_or(4420),
118122
subsysnqn: subnqn,
119123
hostnqn_autogen: false,
124+
reconnect_delay: None,
125+
ctrl_loss_timeout: None,
120126
})
121127
}
122128
}
@@ -128,6 +134,16 @@ impl NvmeTarget {
128134
self.hostnqn_autogen = random;
129135
self
130136
}
137+
/// With the reconnect delay.
138+
pub fn with_reconnect_delay(mut self, delay: Option<u8>) -> Self {
139+
self.reconnect_delay = delay;
140+
self
141+
}
142+
/// With the ctrl loss timeout.
143+
pub fn ctrl_loss_timeout(mut self, timeout: Option<u32>) -> Self {
144+
self.ctrl_loss_timeout = timeout;
145+
self
146+
}
131147
/// Connect to NVMe target
132148
/// Returns Ok on successful connect
133149
pub fn connect(&self) -> Result<(), NvmeError> {
@@ -184,8 +200,11 @@ impl NvmeTarget {
184200
host_iface,
185201
queue_size: 0,
186202
nr_io_queues: 0,
187-
reconnect_delay: 0,
188-
ctrl_loss_tmo: crate::NVMF_DEF_CTRL_LOSS_TMO as i32,
203+
reconnect_delay: self.reconnect_delay.unwrap_or(0) as i32,
204+
ctrl_loss_tmo: self
205+
.ctrl_loss_timeout
206+
.unwrap_or(crate::NVMF_DEF_CTRL_LOSS_TMO)
207+
as i32,
189208
fast_io_fail_tmo: 0,
190209
keep_alive_tmo: 0,
191210
nr_write_queues: 0,

0 commit comments

Comments
 (0)