Skip to content

Commit b04c16b

Browse files
mayastor-borsAbhinandan-Purkait
andcommitted
chore(bors): merge pull request #968
968: feat(scaling volume): prefer cordoned nodes while removing replicas r=Abhinandan-Purkait a=Abhinandan-Purkait Currently on scaling down if a replica exists on a node which is cordoned we don't seem to consider that replica explicitly for removal. With this change we will prefer replicas on cordoned nodes first. Co-authored-by: Abhinandan Purkait <[email protected]>
2 parents 3e352f6 + c4ab363 commit b04c16b

File tree

4 files changed

+32
-0
lines changed

4 files changed

+32
-0
lines changed

control-plane/agents/src/bin/core/controller/scheduling/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ impl ChildSorters {
168168
match Self::sort_by_health(a, b) {
169169
Ordering::Equal => match Self::sort_by_child(a, b) {
170170
Ordering::Equal => {
171+
// Remove replicas from nodes which are cordoned with most priority.
171172
// remove mismatched topology replicas first
172173
if let (Some(a), Some(b)) = (a.valid_node_topology(), b.valid_node_topology()) {
173174
match a.cmp(b) {
@@ -183,6 +184,15 @@ impl ChildSorters {
183184
}
184185
}
185186

187+
match if let (Some(a), Some(b)) = (a.node_spec(), b.node_spec()) {
188+
b.cordoned().cmp(&a.cordoned())
189+
} else {
190+
a.node_spec().is_some().cmp(&b.node_spec().is_some())
191+
} {
192+
Ordering::Equal => {}
193+
_else => return _else,
194+
}
195+
186196
let childa_is_local = !a.spec().share.shared();
187197
let childb_is_local = !b.spec().share.shared();
188198
if childa_is_local == childb_is_local {

control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use stor_port::types::v0::{
77
store::{
88
nexus_child::NexusChild,
99
nexus_persistence::{ChildInfo, NexusInfo},
10+
node::NodeSpec,
1011
replica::ReplicaSpec,
1112
snapshots::replica::ReplicaSnapshot,
1213
volume::VolumeSpec,
@@ -178,10 +179,12 @@ pub(crate) struct ReplicaItem {
178179
ag_replicas_on_pool: Option<u64>,
179180
valid_pool_topology: Option<bool>,
180181
valid_node_topology: Option<bool>,
182+
node_spec: Option<NodeSpec>,
181183
}
182184

183185
impl ReplicaItem {
184186
/// Create new `Self` from the provided arguments.
187+
#[allow(clippy::too_many_arguments)]
185188
pub(crate) fn new(
186189
replica: ReplicaSpec,
187190
replica_state: Option<&Replica>,
@@ -190,6 +193,7 @@ impl ReplicaItem {
190193
child_spec: Option<NexusChild>,
191194
child_info: Option<ChildInfo>,
192195
ag_replicas_on_pool: Option<u64>,
196+
node_spec: Option<NodeSpec>,
193197
) -> Self {
194198
Self {
195199
replica_spec: replica,
@@ -201,6 +205,7 @@ impl ReplicaItem {
201205
ag_replicas_on_pool,
202206
valid_pool_topology: None,
203207
valid_node_topology: None,
208+
node_spec,
204209
}
205210
}
206211
/// Set the validity of the replica's node topology.
@@ -221,6 +226,10 @@ impl ReplicaItem {
221226
pub(crate) fn valid_node_topology(&self) -> &Option<bool> {
222227
&self.valid_node_topology
223228
}
229+
/// Get a reference to the node topology validity flag.
230+
pub(crate) fn node_spec(&self) -> &Option<NodeSpec> {
231+
&self.node_spec
232+
}
224233
/// Get a reference to the pool topology validity flag.
225234
pub(crate) fn valid_pool_topology(&self) -> &Option<bool> {
226235
&self.valid_pool_topology

control-plane/agents/src/bin/core/controller/scheduling/volume.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ impl GetChildForRemovalContext {
297297
let replica_states = self.registry.replicas().await;
298298
replicas
299299
.map(|replica_spec| {
300+
let replica_node_spec = self.registry.specs().replica_node_spec(&replica_spec.uuid);
300301
let ag_rep_count = pool_ag_rep
301302
.as_ref()
302303
.and_then(|map| map.get(replica_spec.pool_name()).cloned());
@@ -347,6 +348,7 @@ impl GetChildForRemovalContext {
347348
.cloned()
348349
}),
349350
ag_rep_count,
351+
replica_node_spec,
350352
)
351353
.with_node_topology({
352354
Some(NodeFilters::topology_replica(

control-plane/agents/src/bin/core/pool/specs.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use stor_port::{
1313
transport_api::ResourceKind,
1414
types::v0::{
1515
store::{
16+
node::NodeSpec,
1617
pool::{PoolLabelOp, PoolOperation, PoolSpec, PoolUnLabelOp},
1718
replica::{ReplicaOperation, ReplicaSpec},
1819
SpecStatus, SpecTransaction,
@@ -344,6 +345,16 @@ impl ResourceSpecsLocked {
344345
specs.replica_spec_node(replica.immutable_ref())
345346
}
346347

348+
/// Get the node where the replica `id` resides on, if it exists.
349+
pub(crate) fn replica_node_spec(&self, id: &ReplicaId) -> Option<NodeSpec> {
350+
let node_id = self.replica_id_node(id);
351+
if let Some(node_id) = node_id {
352+
let specs = self.read();
353+
return specs.nodes.get(&node_id).map(|n| n.lock().clone());
354+
};
355+
None
356+
}
357+
347358
/// Get or Create the resourced PoolSpec for the given request.
348359
pub(crate) fn get_or_create_pool(&self, request: &CreatePool) -> ResourceMutex<PoolSpec> {
349360
let mut specs = self.write();

0 commit comments

Comments
 (0)