Skip to content

Commit 319fa7d

Browse files
mayastor-borstiagolobocastro
andcommitted
chore(bors): merge pull request #1012
1012: Refactor Scheduling Sorting r=tiagolobocastro a=tiagolobocastro We're trying to find spots where total order is not implemented, leading to panics by the rust std library. This PR doesn't solve anything, just some minor refactor and a couple of fixes where sort order was incorrect. We do switch to the unstable sorter which might "help", since in our simulated testing we could not reproduce the panics with the unstable sort. Further we've identified the dual weight scoring as the main cause behind the total order issue, given that by nature, it's not a total order!! Co-authored-by: Tiago Castro <[email protected]>
2 parents ae10841 + 59a5f30 commit 319fa7d

File tree

4 files changed

+60
-80
lines changed

4 files changed

+60
-80
lines changed

control-plane/agents/src/bin/core/controller/scheduling/mod.rs

Lines changed: 30 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,15 @@ pub(crate) trait ResourceFilter: Sized {
8282
}
8383
fn sort<F: FnMut(&Self::Item, &Self::Item) -> std::cmp::Ordering>(mut self, sort: F) -> Self {
8484
let data = self.data();
85-
data.list.sort_by(sort);
85+
data.list.sort_unstable_by(sort);
8686
self
8787
}
8888
fn sort_ctx<F: FnMut(&Self::Request, &Self::Item, &Self::Item) -> std::cmp::Ordering>(
8989
mut self,
9090
mut sort: F,
9191
) -> Self {
9292
let data = self.data();
93-
data.list.sort_by(|a, b| sort(&data.context, a, b));
93+
data.list.sort_unstable_by(|a, b| sort(&data.context, a, b));
9494
self
9595
}
9696
fn collect(self) -> Vec<Self::Item>;
@@ -195,16 +195,12 @@ impl ChildSorters {
195195

196196
let childa_is_local = !a.spec().share.shared();
197197
let childb_is_local = !b.spec().share.shared();
198-
if childa_is_local == childb_is_local {
199-
match a.ag_replicas_on_pool().cmp(&b.ag_replicas_on_pool()) {
200-
Ordering::Less => Ordering::Greater,
201-
Ordering::Equal => Ordering::Equal,
202-
Ordering::Greater => Ordering::Less,
198+
match (childa_is_local, childb_is_local) {
199+
(true, true) | (false, false) => {
200+
b.ag_replicas_on_pool().cmp(&a.ag_replicas_on_pool())
203201
}
204-
} else if childa_is_local {
205-
std::cmp::Ordering::Greater
206-
} else {
207-
std::cmp::Ordering::Less
202+
(true, false) => Ordering::Greater,
203+
(false, true) => Ordering::Less,
208204
}
209205
}
210206
ord => ord,
@@ -214,53 +210,30 @@ impl ChildSorters {
214210
}
215211
// sort replicas by their health: prefer healthy replicas over unhealthy
216212
fn sort_by_health(a: &ReplicaItem, b: &ReplicaItem) -> std::cmp::Ordering {
217-
match a.child_info() {
218-
None => {
219-
match b.child_info() {
220-
Some(b_info) if b_info.healthy => {
221-
// sort replicas by their health: prefer healthy replicas over unhealthy
222-
std::cmp::Ordering::Less
223-
}
224-
_ => std::cmp::Ordering::Equal,
225-
}
226-
}
227-
Some(a_info) => match b.child_info() {
228-
Some(b_info) if a_info.healthy && !b_info.healthy => std::cmp::Ordering::Greater,
229-
Some(b_info) if !a_info.healthy && b_info.healthy => std::cmp::Ordering::Less,
230-
_ => std::cmp::Ordering::Equal,
231-
},
213+
match (a.child_info(), b.child_info()) {
214+
(None, None) => Ordering::Equal,
215+
(None, Some(b)) if b.healthy => Ordering::Less,
216+
(None, Some(_)) => Ordering::Equal,
217+
(Some(a), None) if a.healthy => Ordering::Greater,
218+
(Some(_), None) => Ordering::Equal,
219+
(Some(a), Some(b)) => a.healthy.cmp(&b.healthy),
232220
}
233221
}
234222
// remove unused replicas first
235223
fn sort_by_child(a: &ReplicaItem, b: &ReplicaItem) -> std::cmp::Ordering {
236-
match a.child_spec() {
237-
None => {
238-
match b.child_spec() {
239-
None => std::cmp::Ordering::Equal,
240-
Some(_) => {
241-
// prefer the replica that is not part of a nexus
242-
std::cmp::Ordering::Greater
243-
}
244-
}
245-
}
246-
Some(_) => {
247-
match b.child_spec() {
248-
// prefer the replica that is not part of a nexus
249-
None => std::cmp::Ordering::Less,
250-
// compare the child states, and then the rebuild progress
251-
Some(_) => match (a.child_state(), b.child_state()) {
252-
(Some(a_state), Some(b_state)) => {
253-
match a_state.state.partial_cmp(&b_state.state) {
254-
None => a_state.rebuild_progress.cmp(&b_state.rebuild_progress),
255-
Some(ord) => ord,
256-
}
257-
}
258-
(Some(_), None) => std::cmp::Ordering::Less,
259-
(None, Some(_)) => std::cmp::Ordering::Greater,
260-
(None, None) => std::cmp::Ordering::Equal,
261-
},
262-
}
263-
}
224+
match (a.child_spec(), b.child_spec()) {
225+
(None, None) => Ordering::Equal,
226+
(None, Some(_)) => Ordering::Less,
227+
(Some(_), None) => Ordering::Greater,
228+
(Some(_), Some(_)) => match (a.child_state(), b.child_state()) {
229+
(Some(a_state), Some(b_state)) => match a_state.state.cmp(&b_state.state) {
230+
Ordering::Equal => a_state.rebuild_progress.cmp(&b_state.rebuild_progress),
231+
ord => ord,
232+
},
233+
(Some(_), None) => std::cmp::Ordering::Greater,
234+
(None, Some(_)) => std::cmp::Ordering::Less,
235+
(None, None) => std::cmp::Ordering::Equal,
236+
},
264237
}
265238
}
266239
}
@@ -363,6 +336,8 @@ impl AddReplicaSorters {
363336
a: &ChildItem,
364337
b: &ChildItem,
365338
) -> std::cmp::Ordering {
339+
// todo: preferring local to healthy children seems strange, though also why would we
340+
// have healthy replicas not part of the nexus?
366341
let a_is_local = a.state().node == request.nexus_spec().node;
367342
let b_is_local = b.state().node == request.nexus_spec().node;
368343
match (a_is_local, b_is_local) {
@@ -374,7 +349,7 @@ impl AddReplicaSorters {
374349
match (a_healthy, b_healthy) {
375350
(true, false) => std::cmp::Ordering::Less,
376351
(false, true) => std::cmp::Ordering::Greater,
377-
(_, _) => a.pool().free_space().cmp(&b.pool().free_space()),
352+
(_, _) => b.pool().free_space().cmp(&a.pool().free_space()),
378353
}
379354
}
380355
}

control-plane/agents/src/bin/core/controller/scheduling/volume_policy/simple.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,10 @@ impl SimplePolicy {
138138
b: &PoolItem,
139139
) -> std::cmp::Ordering {
140140
match request.registry().encryption_preference_soft() {
141-
true => match b.pool.encrypted().partial_cmp(&a.pool.encrypted()) {
142-
Some(Ordering::Greater) => Ordering::Greater,
143-
Some(Ordering::Less) => Ordering::Less,
144-
None | Some(Ordering::Equal) => {
141+
true => match b.pool.encrypted().cmp(&a.pool.encrypted()) {
142+
Ordering::Greater => Ordering::Greater,
143+
Ordering::Less => Ordering::Less,
144+
Ordering::Equal => {
145145
// sort pools in order of total weight of certain field values.
146146
SimplePolicy::sort_by_weights(request, a, b)
147147
}
@@ -152,7 +152,7 @@ impl SimplePolicy {
152152
}
153153

154154
/// Sort pools using weights between:
155-
/// 1. number of replicas or number of replicas of a ag (N_REPL_WEIGHT %)
155+
/// 1. number of replicas or number of replicas of an ag (N_REPL_WEIGHT %)
156156
/// 2. free space (FREE_SPACE_WEIGHT %)
157157
/// 3. overcommitment (OVER_COMMIT_WEIGHT %)
158158
pub(crate) fn sort_by_weights(

control-plane/agents/src/bin/core/controller/scheduling/volume_policy/thick.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ impl ThickPolicy {
9292
b: &PoolItem,
9393
) -> std::cmp::Ordering {
9494
match request.registry().encryption_preference_soft() {
95-
true => match b.pool.encrypted().partial_cmp(&a.pool.encrypted()) {
96-
Some(Ordering::Greater) => Ordering::Greater,
97-
Some(Ordering::Less) => Ordering::Less,
98-
None | Some(Ordering::Equal) => {
95+
true => match b.pool.encrypted().cmp(&a.pool.encrypted()) {
96+
Ordering::Greater => Ordering::Greater,
97+
Ordering::Less => Ordering::Less,
98+
Ordering::Equal => {
9999
// sort pools in order of preference (from least to most number of replicas)
100100
ThickPolicy::sort_by_weights(request, a, b)
101101
}

control-plane/stor-port/src/types/v0/transport/child.rs

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -99,30 +99,35 @@ impl ChildState {
9999
}
100100
impl PartialOrd for ChildState {
101101
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
102+
Some(self.cmp(other))
103+
}
104+
}
105+
impl Ord for ChildState {
106+
fn cmp(&self, other: &Self) -> Ordering {
102107
match &self {
103108
ChildState::Unknown => match &other {
104-
ChildState::Unknown => Some(Ordering::Equal),
105-
ChildState::Online => Some(Ordering::Less),
106-
ChildState::Degraded => Some(Ordering::Less),
107-
ChildState::Faulted => Some(Ordering::Greater),
109+
ChildState::Unknown => Ordering::Equal,
110+
ChildState::Online => Ordering::Less,
111+
ChildState::Degraded => Ordering::Less,
112+
ChildState::Faulted => Ordering::Greater,
108113
},
109114
ChildState::Online => match &other {
110-
ChildState::Unknown => Some(Ordering::Greater),
111-
ChildState::Online => Some(Ordering::Equal),
112-
ChildState::Degraded => Some(Ordering::Greater),
113-
ChildState::Faulted => Some(Ordering::Greater),
115+
ChildState::Unknown => Ordering::Greater,
116+
ChildState::Online => Ordering::Equal,
117+
ChildState::Degraded => Ordering::Greater,
118+
ChildState::Faulted => Ordering::Greater,
114119
},
115120
ChildState::Degraded => match &other {
116-
ChildState::Unknown => Some(Ordering::Greater),
117-
ChildState::Online => Some(Ordering::Less),
118-
ChildState::Degraded => Some(Ordering::Equal),
119-
ChildState::Faulted => Some(Ordering::Greater),
121+
ChildState::Unknown => Ordering::Greater,
122+
ChildState::Online => Ordering::Less,
123+
ChildState::Degraded => Ordering::Equal,
124+
ChildState::Faulted => Ordering::Greater,
120125
},
121126
ChildState::Faulted => match &other {
122-
ChildState::Unknown => Some(Ordering::Less),
123-
ChildState::Online => Some(Ordering::Less),
124-
ChildState::Degraded => Some(Ordering::Less),
125-
ChildState::Faulted => Some(Ordering::Equal),
127+
ChildState::Unknown => Ordering::Less,
128+
ChildState::Online => Ordering::Less,
129+
ChildState::Degraded => Ordering::Less,
130+
ChildState::Faulted => Ordering::Equal,
126131
},
127132
}
128133
}

0 commit comments

Comments
 (0)