11use super :: { IoMode , Nexus , NexusChild } ;
2- use crate :: { persistent_store:: PersistentStore , sleep:: mayastor_sleep} ;
2+ use crate :: {
3+ persistent_store:: { to_json_byte_vec, PersistentStore } ,
4+ sleep:: mayastor_sleep,
5+ store:: store_defs:: StoreError ,
6+ } ;
7+ use etcd_client:: Error as EtcdErr ;
38use serde:: { Deserialize , Serialize } ;
49use std:: time:: Duration ;
510
@@ -8,10 +13,10 @@ use super::Error;
813/// Information associated with the persisted NexusInfo structure.
914pub struct PersistentNexusInfo {
1015 /// Structure that is written to the persistent store.
11- inner : NexusInfo ,
16+ pub inner : NexusInfo ,
1217 /// Key to use to persist the NexusInfo structure.
1318 /// If `Some` the key has been supplied by the control plane.
14- key : Option < String > ,
19+ pub key : Option < String > ,
1520}
1621
1722impl PersistentNexusInfo {
@@ -31,13 +36,20 @@ impl PersistentNexusInfo {
3136
3237/// Definition of the nexus information that gets saved in the persistent
3338/// store.
34- #[ derive( Serialize , Deserialize , Debug , Default ) ]
39+ #[ derive( Clone , Serialize , Deserialize , Debug , Default ) ]
3540pub struct NexusInfo {
3641 /// Nexus destroyed successfully.
3742 pub clean_shutdown : bool ,
43+ /// Nexus needs to be shutdown.
44+ pub do_self_shutdown : bool ,
3845 /// Information about children.
3946 pub children : Vec < ChildInfo > ,
4047}
48+ pub struct NexusInfoTxn < ' a > {
49+ key_info : & ' a mut PersistentNexusInfo ,
50+ // Expected value for the key.
51+ expected : NexusInfo ,
52+ }
4153
4254/// Definition of the child information that gets saved in the persistent
4355/// store.
@@ -96,6 +108,7 @@ impl<'n> Nexus<'n> {
96108 // expect the NexusInfo structure to contain default values.
97109 assert ! ( nexus_info. children. is_empty( ) ) ;
98110 assert ! ( !nexus_info. clean_shutdown) ;
111+ assert ! ( !nexus_info. do_self_shutdown) ;
99112 self . children_iter ( ) . for_each ( |c| {
100113 let child_info = ChildInfo {
101114 uuid : NexusChild :: uuid ( c. uri ( ) ) . expect ( "Failed to get child UUID." ) ,
@@ -164,11 +177,40 @@ impl<'n> Nexus<'n> {
164177
165178 let uuid = NexusChild :: uuid ( child_uri) . expect ( "Failed to get child UUID." ) ;
166179
180+ let expected_value = nexus_info. clone ( ) ;
167181 nexus_info. children . iter_mut ( ) . for_each ( |c| {
168182 if c. uuid == uuid {
169183 c. healthy = * healthy;
170184 }
171185 } ) ;
186+
187+ let mut txn = NexusInfoTxn {
188+ key_info : & mut persistent_nexus_info,
189+ expected : expected_value,
190+ } ;
191+
192+ // Try executing the transaction. If the nexus info key's value isn't what we
193+ // expect, shutdown this nexus. This can only happen if do_self_shutdown is
194+ // found true in etcd for this key, which can only be set by core-agent's
195+ // republish path. In this situation, the newly published nexus could've
196+ // picked up the replica that we are trying to mark unhealthy here. Shutting
197+ // down this nexus here ensures we don't let this IO succeed to other replicas
198+ // after marking this one as unhealthy.
199+ match self . save_txn ( & mut txn) . await {
200+ Ok ( _) => {
201+ self . set_nexus_io_mode ( IoMode :: Normal ) . await ;
202+ return Ok ( ( ) ) ;
203+ }
204+ Err ( e) => {
205+ error ! (
206+ "{self:?}: failed to update persistent store txn, \
207+ will shutdown the nexus: {e}"
208+ ) ;
209+ self . try_self_shutdown ( ) ;
210+
211+ return Err ( e) ;
212+ }
213+ }
172214 }
173215 PersistOp :: Shutdown => {
174216 // Only update the clean shutdown variable. Do not update the
@@ -243,4 +285,82 @@ impl<'n> Nexus<'n> {
243285 }
244286 }
245287 }
288+
289+ async fn save_txn ( & self , info : & mut NexusInfoTxn < ' _ > ) -> Result < ( ) , Error > {
290+ // If a key has been provided, use it to store the NexusInfo; use the
291+ // nexus uuid as the key otherwise.
292+ let key = match & info. key_info . key {
293+ Some ( k) => k. clone ( ) ,
294+ None => self . uuid ( ) . to_string ( ) ,
295+ } ;
296+
297+ let mut retry = PersistentStore :: retries ( ) ;
298+
299+ let new_value = to_json_byte_vec ( & info. key_info . inner ) ;
300+ let expected_value = to_json_byte_vec ( & info. expected ) ;
301+ let mut logged = false ;
302+
303+ loop {
304+ match PersistentStore :: txn_create_execute ( & key, & new_value, & expected_value) . await {
305+ Ok ( txn_resp) => {
306+ if let Some ( current_value) = txn_resp {
307+ let val = serde_json:: from_slice :: < NexusInfo > ( & current_value) . unwrap ( ) ;
308+
309+ // The server had likely received the transaction and executed it, but
310+ // client here saw a timeout. So if the current value is same as what we intended
311+ // to set, then consider success. Don't trust any other value and shutdown.
312+ if current_value == new_value {
313+ info ! ( "value for key {key} already updated: {val:?}" ) ;
314+ return Ok ( ( ) ) ;
315+ }
316+
317+ warn ! ( "current state found: key - {key}, value - {val:?}" ) ;
318+
319+ // This nexus won't be used again but let's still update this field with what's
320+ // found in etcd.
321+ info. key_info . inner_mut ( ) . do_self_shutdown = val. do_self_shutdown ;
322+
323+ return Err ( Error :: SaveStateFailed {
324+ source : StoreError :: Txn {
325+ key : key. clone ( ) ,
326+ source : EtcdErr :: IoError ( std:: io:: Error :: new (
327+ std:: io:: ErrorKind :: Other ,
328+ "Txn CompareOp failed" ,
329+ ) ) ,
330+ } ,
331+ name : self . name . clone ( ) ,
332+ } ) ;
333+ } else {
334+ // Don't need to check individual op responses.
335+ debug ! ( ?key, "{self:?}: the state was saved successfully via txn" ) ;
336+ return Ok ( ( ) ) ;
337+ }
338+ }
339+
340+ Err ( err) => {
341+ retry -= 1 ;
342+ if retry == 0 {
343+ return Err ( Error :: SaveStateFailed {
344+ source : err,
345+ name : self . name . clone ( ) ,
346+ } ) ;
347+ }
348+
349+ if !logged {
350+ error ! (
351+ "{self:?}: failed to persist nexus info transaction: {err}\
352+ will silently retry ({retry} left): {err}"
353+ ) ;
354+ logged = true ;
355+ }
356+
357+ // Allow some time for the connection to the persistent
358+ // store to be re-established before retrying the operation.
359+ if mayastor_sleep ( Duration :: from_secs ( 1 ) ) . await . is_err ( ) {
360+ error ! ( "{self:?}: failed to wait for sleep" ) ;
361+ }
362+ }
363+ } ;
364+ }
365+ }
246366}
0 commit comments