Skip to content

Commit 9064f7e

Browse files
committed
fix: adjust diskNormalized strategy to exponentially penalize cost based on disk utilization
1 parent ad106a8 commit 9064f7e

2 files changed

Lines changed: 78 additions & 14 deletions

File tree

server/src/main/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategy.java

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,19 @@
2626

2727
/**
2828
* A {@link BalancerStrategy} which normalizes the cost of placing a segment on a
29-
* server as calculated by {@link CostBalancerStrategy} by multiplying it by the
30-
* server's disk usage ratio.
29+
* server as calculated by {@link CostBalancerStrategy} by dividing by the
30+
* server's available disk headroom.
3131
* <pre>
32-
* normalizedCost = cost * usageRatio
32+
* normalizedCost = cost / max(EPSILON, 1 - usageRatio)
3333
* where usageRatio = diskUsed / totalDiskSpace
3434
* </pre>
35-
* This penalizes servers that are more full, driving disk utilization to equalize
36-
* across the tier. When all servers have equal disk usage, the behavior is identical
37-
* to {@link CostBalancerStrategy}. When historicals have different disk capacities,
38-
* this naturally accounts for both fill level and total capacity.
35+
* The denominator diverges as a server approaches full, so disk fullness has
36+
* more weight over the placement decision when servers are nearly full,
37+
* regardless of asymmetries in the locality cost. {@link #EPSILON} is a small
38+
* numerical floor on the divisor to guard against division by zero (or by
39+
* negative values during in-flight loads).
3940
* <p>
40-
* To prevent oscillation when servers have similar utilization, any server that
41+
* To prevent oscillation when servers have similar headroom, any server that
4142
* is already projected to hold the segment (the source on a move, or a currently
4243
* serving node on a drop) receives a cost discount equal to
4344
* {@link #DEFAULT_MOVE_COST_SAVINGS_THRESHOLD}. A move therefore fires only when
@@ -55,6 +56,13 @@ public class DiskNormalizedCostBalancerStrategy extends CostBalancerStrategy
5556
*/
5657
static final double DEFAULT_MOVE_COST_SAVINGS_THRESHOLD = 0.05;
5758

59+
/**
60+
* Numerical floor on the headroom divisor to prevent division by zero or by
61+
* negative values when {@code usageRatio >= 1.0} (possible for over-allocated
62+
* servers or during in-flight loads).
63+
*/
64+
static final double EPSILON = 1e-6;
65+
5866
private final double sourceCostMultiplier;
5967

6068
public DiskNormalizedCostBalancerStrategy(ListeningExecutorService exec)
@@ -85,17 +93,16 @@ protected double computePlacementCost(
8593
return cost;
8694
}
8795

88-
// Guard against NaN propagation in the cost comparator if a server
89-
// somehow reports a non-positive maxSize. Such a server cannot hold
90-
// anything and will be rejected by canLoadSegment, so returning the
91-
// raw cost is safe.
96+
// A server with non-positive maxSize cannot hold anything and will be
97+
// rejected by canLoadSegment; return the raw cost to avoid NaN propagation.
9298
final long maxSize = server.getMaxSize();
9399
if (maxSize <= 0) {
94100
return cost;
95101
}
96102

97-
double usageRatio = (double) server.getSizeUsed() / maxSize;
98-
double normalizedCost = cost * usageRatio;
103+
final double usageRatio = (double) server.getSizeUsed() / maxSize;
104+
final double headroom = Math.max(EPSILON, 1.0 - usageRatio);
105+
double normalizedCost = cost / headroom;
99106

100107
if (server.isProjectedSegment(proposalSegment)) {
101108
normalizedCost *= sourceCostMultiplier;

server/src/test/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategyTest.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,63 @@ public void testThresholdBlocksMarginalMove()
304304
Assert.assertEquals("DEST", movedTo.getServer().getName());
305305
}
306306

307+
@Test
308+
public void testNearFullServerIsNotChosenForNewSegmentLoad()
309+
{
310+
final long maxSize = 10_000_000L;
311+
// A: 95% full, 5 same-DS DAY segments -> raw cost = 10 * K (low, few co-located segs)
312+
final ServerHolder nearFull = buildServer("A", maxSize, 9_500_000L, 0, 5);
313+
// B: 70% full, 20 same-DS DAY segments -> raw cost = 40 * K (higher, more co-located)
314+
final ServerHolder partial = buildServer("B", maxSize, 7_000_000L, 100, 20);
315+
316+
final DataSegment newSegment = getSegment(1000);
317+
final List<ServerHolder> servers = new ArrayList<>();
318+
servers.add(nearFull);
319+
servers.add(partial);
320+
321+
// CostBalancerStrategy picks A because raw cost 10K < 40K.
322+
Assert.assertEquals(
323+
"Pure CostBalancerStrategy must pick the near-full server (lower raw cost)",
324+
"A",
325+
newCostStrategy().findServersToLoadSegment(newSegment, servers).next().getServer().getName()
326+
);
327+
328+
// DiskNormalized: A_norm = 10K / 0.05 = 200K, B_norm = 40K / 0.30 = 133K -> B wins.
329+
Assert.assertEquals(
330+
"DiskNormalized must prefer the emptier server despite its higher raw cost",
331+
"B",
332+
newDiskNormalizedStrategy().findServersToLoadSegment(newSegment, servers).next().getServer().getName()
333+
);
334+
}
335+
336+
@Test
337+
public void testNearFullServerIsNotChosenAsMoveDestination()
338+
{
339+
final long maxSize = 10_000_000L;
340+
// SOURCE: 70% full, 20 same-DS DAY segments; segmentToMove is one of them.
341+
final ServerHolder source = buildServer("SOURCE", maxSize, 7_000_000L, 0, 20);
342+
// DEST: 95% full, 5 same-DS DAY segments -> raw cost 10K < SOURCE's 38K.
343+
final ServerHolder nearFullDest = buildServer("DEST", maxSize, 9_500_000L, 100, 5);
344+
345+
final DataSegment segmentToMove = getSegment(0);
346+
final List<ServerHolder> servers = new ArrayList<>();
347+
servers.add(source);
348+
servers.add(nearFullDest);
349+
350+
// CostBalancerStrategy: DEST raw cost (10K) < SOURCE raw cost (38K) -> recommends the move.
351+
final ServerHolder costResult =
352+
newCostStrategy().findDestinationServerToMoveSegment(segmentToMove, source, servers);
353+
Assert.assertNotNull("CostBalancerStrategy must recommend moving to the near-full DEST", costResult);
354+
Assert.assertEquals("DEST", costResult.getServer().getName());
355+
356+
// DiskNormalized: DEST_norm = 10K / 0.05 = 200K > SOURCE_norm = 38K / 0.30 * 0.95 ≈ 120K.
357+
// Near-full DEST is too expensive after normalization -> no move.
358+
Assert.assertNull(
359+
"DiskNormalized must block the move to the near-full server",
360+
newDiskNormalizedStrategy().findDestinationServerToMoveSegment(segmentToMove, source, servers)
361+
);
362+
}
363+
307364
@Test
308365
public void testRejectsInvalidThreshold()
309366
{

0 commit comments

Comments
 (0)