|
25 | 25 | import org.apache.druid.indexing.common.stats.DropwizardRowIngestionMeters; |
26 | 26 | import org.apache.druid.indexing.overlord.supervisor.SupervisorSpec; |
27 | 27 | import org.apache.druid.indexing.overlord.supervisor.autoscaler.LagStats; |
28 | | -import org.apache.druid.indexing.overlord.supervisor.autoscaler.ScaleDirection; |
29 | 28 | import org.apache.druid.indexing.overlord.supervisor.autoscaler.SupervisorTaskAutoScaler; |
30 | 29 | import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner; |
31 | 30 | import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor; |
32 | | -import org.apache.druid.java.util.common.DateTimes; |
33 | 31 | import org.apache.druid.java.util.common.Either; |
34 | 32 | import org.apache.druid.java.util.common.StringUtils; |
35 | 33 | import org.apache.druid.java.util.common.concurrent.Execs; |
@@ -91,8 +89,6 @@ public class CostBasedAutoScaler implements SupervisorTaskAutoScaler |
91 | 89 | private final WeightedCostFunction costFunction; |
92 | 90 | private volatile CostMetrics lastKnownMetrics; |
93 | 91 |
|
94 | | - private volatile long lastScaleActionTimeMillis = -1; |
95 | | - |
96 | 92 | public CostBasedAutoScaler( |
97 | 93 | SeekableStreamSupervisor supervisor, |
98 | 94 | CostBasedAutoScalerConfig config, |
@@ -177,48 +173,38 @@ public int computeTaskCountForScaleAction() |
177 | 173 |
|
178 | 174 | int currentTaskCount = supervisor.getIoConfig().getTaskCount(); |
179 | 175 |
|
180 | | - // 2) If already outsized, scale to the configured boundary to get back to a safe state, |
181 | | - // regardless of optimal task count. Clamp either way so subsequent logic uses a valid |
182 | | - // reference even when scaling is on cooldown. |
183 | 176 | final boolean isTaskCountOutOfBounds = currentTaskCount < config.getTaskCountMin() |
184 | 177 | || currentTaskCount > config.getTaskCountMax(); |
185 | 178 | if (isTaskCountOutOfBounds) { |
186 | 179 | final int clampedTaskCount = Math.min(config.getTaskCountMax(), Math.max(config.getTaskCountMin(), currentTaskCount)); |
187 | | - final ScaleDirection direction = currentTaskCount < config.getTaskCountMin() |
188 | | - ? ScaleDirection.SCALE_UP : ScaleDirection.SCALE_DOWN; |
189 | | - if (isScaleActionAllowed(direction)) { |
190 | | - lastScaleActionTimeMillis = DateTimes.nowUtc().getMillis(); |
191 | | - log.info( |
192 | | - "Task count for supervisor[%s] was out of bounds [%d,%d], urgently scaling from [%d] to [%d].", |
193 | | - supervisorId, config.getTaskCountMin(), config.getTaskCountMax(), currentTaskCount, clampedTaskCount |
194 | | - ); |
195 | | - return clampedTaskCount; |
196 | | - } |
197 | | - currentTaskCount = clampedTaskCount; |
| 180 | + log.info( |
| 181 | + "Task count for supervisor[%s] was out of bounds [%d,%d], recommending scale from [%d] to [%d].", |
| 182 | + supervisorId, config.getTaskCountMin(), config.getTaskCountMax(), currentTaskCount, clampedTaskCount |
| 183 | + ); |
| 184 | + return clampedTaskCount; |
198 | 185 | } |
199 | 186 |
|
200 | 187 | final int optimalTaskCount = computeOptimalTaskCount(lastKnownMetrics); |
| 188 | + if (optimalTaskCount == -1) { |
| 189 | + return -1; |
| 190 | + } |
201 | 191 |
|
202 | | - if (optimalTaskCount > currentTaskCount && isScaleActionAllowed(ScaleDirection.SCALE_UP)) { |
203 | | - lastScaleActionTimeMillis = DateTimes.nowUtc().getMillis(); |
204 | | - log.info("Updating taskCount for supervisor[%s] from [%d] to [%d] (scale up).", supervisorId, currentTaskCount, optimalTaskCount); |
| 192 | + if (optimalTaskCount > currentTaskCount) { |
| 193 | + log.info("Recommending taskCount scale-up for supervisor[%s] from [%d] to [%d].", supervisorId, currentTaskCount, optimalTaskCount); |
205 | 194 | return optimalTaskCount; |
206 | 195 | } else if (!config.isScaleDownOnTaskRolloverOnly() |
207 | | - && optimalTaskCount > 0 // guards against the -1 sentinel |
208 | | - && optimalTaskCount < currentTaskCount |
209 | | - && isScaleActionAllowed(ScaleDirection.SCALE_DOWN)) { |
210 | | - lastScaleActionTimeMillis = DateTimes.nowUtc().getMillis(); |
211 | | - log.info("Updating taskCount for supervisor[%s] from [%d] to [%d] (scale down).", supervisorId, currentTaskCount, optimalTaskCount); |
| 196 | + && optimalTaskCount < currentTaskCount) { |
| 197 | + log.info("Recommending taskCount scale-down for supervisor[%s] from [%d] to [%d].", supervisorId, currentTaskCount, optimalTaskCount); |
212 | 198 | return optimalTaskCount; |
213 | 199 | } |
214 | 200 |
|
215 | | - // No scaling (optimalTaskCount == -1, cooldown, or rollover-only mode). |
216 | | - log.debug("No scaling required for supervisor[%s]", supervisorId); |
217 | | - if (optimalTaskCount >= config.getTaskCountMax() || currentTaskCount == config.getTaskCountMax()) { |
| 201 | + // No scaling (optimalTaskCount == currentTaskCount, optimalTaskCount <= min, optimalTaskCount >= max or rollover-only mode). |
| 202 | + log.debug("No scaling allowed/required for supervisor[%s]", supervisorId); |
| 203 | + if (optimalTaskCount >= config.getTaskCountMax()) { |
218 | 204 | emitter.emit(getMetricBuilder() |
219 | 205 | .setDimension(SeekableStreamSupervisor.AUTOSCALER_SKIP_REASON_DIMENSION, "Already at max task count") |
220 | 206 | .setMetric(SeekableStreamSupervisor.AUTOSCALER_REQUIRED_TASKS_METRIC, currentTaskCount)); |
221 | | - } else if (optimalTaskCount == config.getTaskCountMin() || currentTaskCount == config.getTaskCountMin()) { |
| 207 | + } else if (optimalTaskCount <= config.getTaskCountMin()) { |
222 | 208 | emitter.emit(getMetricBuilder() |
223 | 209 | .setDimension(SeekableStreamSupervisor.AUTOSCALER_SKIP_REASON_DIMENSION, "Already at min task count") |
224 | 210 | .setMetric(SeekableStreamSupervisor.AUTOSCALER_REQUIRED_TASKS_METRIC, currentTaskCount)); |
@@ -590,24 +576,4 @@ private Either<String, Boolean> validateMetricsForScaling(CostMetrics metrics) |
590 | 576 | } |
591 | 577 | } |
592 | 578 |
|
593 | | - /** |
594 | | - * Determines if a scale action in the given direction is currently allowed based on the elapsed time |
595 | | - * since the last scale action and the configured delay for that direction. |
596 | | - */ |
597 | | - private boolean isScaleActionAllowed(ScaleDirection direction) |
598 | | - { |
599 | | - if (lastScaleActionTimeMillis < 0) { |
600 | | - return true; |
601 | | - } |
602 | | - |
603 | | - final long barrierMillis = direction == ScaleDirection.SCALE_UP |
604 | | - ? config.getMinScaleUpDelay().getMillis() |
605 | | - : config.getMinScaleDownDelay().getMillis(); |
606 | | - if (barrierMillis <= 0) { |
607 | | - return true; |
608 | | - } |
609 | | - |
610 | | - final long elapsedMillis = DateTimes.nowUtc().getMillis() - lastScaleActionTimeMillis; |
611 | | - return elapsedMillis >= barrierMillis; |
612 | | - } |
613 | 579 | } |
0 commit comments