Skip to content

Commit a4f6287

Browse files
mayastor-borsabhilashshetty04
andcommitted
chore(bors): merge pull request #920
920: fix(volume_reconciler): changes in replica disown logic r=abhilashshetty04 a=abhilashshetty04 While we disown a replica from volume we used to validate if the replica is owned by any nexus. In some cases involving Outage on volume target node we observed that the new nexus do get created after the `volumeattachments` are deleted from the node. However, we werent able to disown the `Unknown` replica from volume since it was still owned by old target. Now we check if replica is owned by the `current target` of the volume rather then if it is owned by any target. Co-authored-by: Abhilash Shetty <[email protected]>
2 parents 700ccc8 + aa8814a commit a4f6287

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

control-plane/agents/src/bin/core/controller/reconciler/volume/garbage_collector.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::controller::{
1010
use agents::errors::SvcError;
1111
use stor_port::types::v0::{
1212
store::{nexus_persistence::NexusInfo, replica::ReplicaSpec, volume::VolumeSpec},
13-
transport::{DestroyVolume, NexusOwners, ReplicaOwners, ResizeReplica, VolumeStatus},
13+
transport::{DestroyVolume, NexusId, NexusOwners, ReplicaOwners, ResizeReplica, VolumeStatus},
1414
};
1515
use tracing::Instrument;
1616

@@ -150,16 +150,16 @@ async fn disown_unused_nexuses(
150150
context: &PollContext,
151151
) -> PollResult {
152152
let mut results = vec![];
153-
154153
for nexus in context.specs().volume_nexuses(volume.uuid()) {
155154
if let Ok(mut nexus) = nexus.operation_guard() {
156155
if let Some(target) = volume.as_ref().target() {
157156
if target.nexus() == nexus.uid() || nexus.as_ref().is_shutdown() {
158157
continue;
159158
}
160159
}
161-
162-
nexus.warn_span(|| tracing::warn!("Attempting to disown unused nexus"));
160+
nexus.info_span(|| {
161+
tracing::info!("Disowning unused nexus from its volume");
162+
});
163163
// the nexus garbage collector will destroy the disowned nexuses
164164
let owner = NexusOwners::Volume(volume.uuid().clone());
165165
match nexus.remove_owners(context.registry(), &owner, false).await {
@@ -206,7 +206,8 @@ async fn disown_unused_replicas(
206206
// don't attempt to disown the replicas if the volume state is faulted or unknown
207207
return PollResult::Ok(PollerState::Busy);
208208
}
209-
209+
let vol_nexus = context.specs().volume_nexuses(volume.uuid());
210+
let vol_nexus_ids: Vec<&NexusId> = vol_nexus.iter().map(|n| n.uuid()).collect();
210211
let mut nexus_info = None; // defer reading from the persistent store unless we find a candidate
211212
let mut results = vec![];
212213

@@ -221,12 +222,12 @@ async fn disown_unused_replicas(
221222
if !replica_online
222223
&& !replica.as_ref().dirty()
223224
&& replica.as_ref().owners.owned_by(volume.uuid())
224-
&& !replica.as_ref().owners.owned_by_a_nexus()
225+
&& !replica.as_ref().owned_by_nexus(&vol_nexus_ids)
225226
&& !replica_in_target
226227
&& !is_replica_healthy(context, &mut nexus_info, replica.as_ref(), volume.as_ref())
227228
.await?
228229
{
229-
volume.warn_span(|| tracing::warn!(replica.uuid = %replica.as_ref().uuid, "Attempting to disown unused replica"));
230+
volume.info_span(|| tracing::info!(replica.uuid = %replica.as_ref().uuid, "Attempting to disown unused replica"));
230231

231232
// the replica garbage collector will destroy the disowned replica
232233
match replica

control-plane/stor-port/src/types/v0/store/replica.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ use crate::{
88
AsOperationSequencer, OperationSequence, SpecStatus, SpecTransaction,
99
},
1010
transport::{
11-
self, CreateReplica, HostNqn, PoolId, PoolUuid, Protocol, ReplicaId, ReplicaKind,
12-
ReplicaName, ReplicaOwners, ReplicaShareProtocol, SnapshotCloneSpecParams, VolumeId,
11+
self, CreateReplica, HostNqn, NexusId, PoolId, PoolUuid, Protocol, ReplicaId,
12+
ReplicaKind, ReplicaName, ReplicaOwners, ReplicaShareProtocol, SnapshotCloneSpecParams,
13+
VolumeId,
1314
},
1415
},
1516
IntoOption,
@@ -108,6 +109,13 @@ impl ReplicaSpec {
108109
pub fn owned_by(&self, id: &VolumeId) -> bool {
109110
self.owners.owned_by(id)
110111
}
112+
/// Check if this replica is owned by any of the nexus in volume spec.
113+
pub fn owned_by_nexus(&self, nexus_id: &[&NexusId]) -> bool {
114+
self.owners
115+
.nexuses()
116+
.iter()
117+
.any(|owner| nexus_id.contains(&owner))
118+
}
111119
}
112120

113121
/// Reference of a pool.

0 commit comments

Comments
 (0)