Skip to content

Commit ad264b0

Browse files
committed
feat: support scaling direction-aware cooldown for task auto-scalers
1 parent 9a70797 commit ad264b0

14 files changed

Lines changed: 550 additions & 60 deletions

File tree

docs/ingestion/supervisor.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@ The following table outlines the configuration properties for `autoScalerConfig`
7979
|`taskCountMax`|The maximum number of ingestion tasks. Must be greater than or equal to `taskCountMin`. If `taskCountMax` is greater than the number of Kafka partitions or Kinesis shards, Druid sets the maximum number of reading tasks to the number of Kafka partitions or Kinesis shards and ignores `taskCountMax`.|Yes||
8080
|`taskCountMin`|The minimum number of ingestion tasks. When you enable the autoscaler, Druid computes the initial number of tasks to launch by checking the configs in the following order: `taskCountStart`, then `taskCount` (in `ioConfig`), then `taskCountMin`.|Yes||
8181
|`taskCountStart`|Optional config to specify the number of ingestion tasks to start with. When you enable the autoscaler, Druid computes the initial number of tasks to launch by checking the configs in the following order: `taskCountStart`, then `taskCount` (in `ioConfig`), then `taskCountMin`.|No|`taskCount` or `taskCountMin`|
82-
|`minTriggerScaleActionFrequencyMillis`|The minimum time interval between two scale actions.| No|600000|
82+
|`minTriggerScaleActionFrequencyMillis`|The minimum time interval between any two scale actions. Used as the default fallback when `minTriggerScaleUpActionFrequencyMillis` or `minTriggerScaleDownActionFrequencyMillis` are not set.| No|600000|
83+
|`minTriggerScaleUpActionFrequencyMillis`|The minimum time interval between two scale-up actions. Falls back to `minTriggerScaleActionFrequencyMillis` if not set.| No||
84+
|`minTriggerScaleDownActionFrequencyMillis`|The minimum time interval between two scale-down actions. Falls back to `minTriggerScaleActionFrequencyMillis` if not set.| No||
8385
|`autoScalerStrategy`|The algorithm of autoscaler. Druid only supports the `lagBased` strategy. See [Autoscaler strategy](#autoscaler-strategy) for more information.|No|`lagBased`|
8486
|`stopTaskCountRatio`|A variable version of `ioConfig.stopTaskCount` with a valid range of (0.0, 1.0]. Allows the maximum number of stoppable tasks in steady state to be proportional to the number of tasks currently running.|No||
8587

indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java

Lines changed: 78 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@
6464
import org.apache.druid.indexing.overlord.supervisor.SupervisorSpec;
6565
import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManager;
6666
import org.apache.druid.indexing.overlord.supervisor.autoscaler.LagStats;
67+
import org.apache.druid.indexing.overlord.supervisor.autoscaler.ScaleActionSupplier;
68+
import org.apache.druid.indexing.overlord.supervisor.autoscaler.ScaleDirection;
6769
import org.apache.druid.indexing.overlord.supervisor.autoscaler.SupervisorTaskAutoScaler;
6870
import org.apache.druid.indexing.seekablestream.SeekableStreamDataSourceMetadata;
6971
import org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers;
@@ -122,7 +124,6 @@
122124
import java.util.SortedMap;
123125
import java.util.TreeMap;
124126
import java.util.TreeSet;
125-
import java.util.concurrent.Callable;
126127
import java.util.concurrent.ConcurrentHashMap;
127128
import java.util.concurrent.CopyOnWriteArrayList;
128129
import java.util.concurrent.ExecutionException;
@@ -454,13 +455,13 @@ public boolean equals(Object obj)
454455
// change taskCount without resubmitting.
455456
private class DynamicAllocationTasksNotice implements Notice
456457
{
457-
Callable<Integer> computeDesiredTaskCount;
458+
ScaleActionSupplier computeDesiredTaskCount;
458459
ServiceEmitter emitter;
459460
Runnable onSuccessfulScale;
460461
private static final String TYPE = "dynamic_allocation_tasks_notice";
461462

462463
DynamicAllocationTasksNotice(
463-
Callable<Integer> computeDesiredTaskCount,
464+
ScaleActionSupplier computeDesiredTaskCount,
464465
Runnable onSuccessfulScale,
465466
ServiceEmitter emitter
466467
)
@@ -499,59 +500,92 @@ public void handle()
499500
supervisorId,
500501
dataSource
501502
);
502-
final Integer desiredTaskCount = computeDesiredTaskCount.call();
503-
ServiceMetricEvent.Builder event = ServiceMetricEvent.builder()
504-
.setDimension(DruidMetrics.SUPERVISOR_ID, supervisorId)
505-
.setDimension(DruidMetrics.DATASOURCE, dataSource)
506-
.setDimension(DruidMetrics.STREAM, getIoConfig().getStream());
507-
for (CopyOnWriteArrayList<TaskGroup> list : pendingCompletionTaskGroups.values()) {
508-
// There are expected to be pending tasks if this scaling is happening on task rollover
509-
if (!list.isEmpty() && !isScalingTasksOnRollover.get()) {
510-
log.info(
511-
"Skipping DynamicAllocationTasksNotice execution for supervisor[%s] for datasource[%s] because following tasks are pending [%s]",
512-
supervisorId,
513-
dataSource,
514-
list
515-
);
516-
if (desiredTaskCount > 0) {
503+
final int desiredTaskCount = computeDesiredTaskCount.computeTaskCount();
504+
final int currentTaskCount = getCurrentTaskCount();
505+
final boolean needToScale = desiredTaskCount > 0 && desiredTaskCount != currentTaskCount;
506+
507+
if (needToScale) {
508+
ServiceMetricEvent.Builder event = ServiceMetricEvent.builder()
509+
.setDimension(DruidMetrics.SUPERVISOR_ID, supervisorId)
510+
.setDimension(DruidMetrics.DATASOURCE, dataSource)
511+
.setDimension(DruidMetrics.STREAM, getIoConfig().getStream());
512+
513+
// 1) Make sure we wait for any pending completion tasks to finish.
514+
// At this point there could be 2 generations of tasks: pending completion tasks (old generation), running tasks (current generation), and (after our scale) pending tasks (new generation).
515+
// We want to avoid killing any old generation tasks preemptively, as that might cause the current generation tasks' offsets to become invalid.
516+
for (CopyOnWriteArrayList<TaskGroup> list : pendingCompletionTaskGroups.values()) {
517+
// There are expected to be pending tasks if this scaling is happening on task rollover
518+
if (!list.isEmpty() && !isScalingTasksOnRollover.get()) {
519+
log.info(
520+
"Skipping DynamicAllocationTasksNotice execution for supervisor[%s] for datasource[%s] because following tasks are pending [%s]",
521+
supervisorId,
522+
dataSource,
523+
list
524+
);
517525
emitter.emit(event.setDimension(
518526
AUTOSCALER_SKIP_REASON_DIMENSION,
519527
"There are tasks pending completion"
520528
)
521529
.setMetric(AUTOSCALER_REQUIRED_TASKS_METRIC, desiredTaskCount));
530+
return;
522531
}
523-
return;
524532
}
525-
}
526-
if (nowTime - dynamicTriggerLastRunTime < autoScalerConfig.getMinTriggerScaleActionFrequencyMillis()) {
527-
log.info(
528-
"DynamicAllocationTasksNotice submitted again in [%d] millis, minTriggerDynamicFrequency is [%s] for supervisor[%s] for dataSource[%s], skipping it! desired task count is [%s], active task count is [%s]",
529-
nowTime - dynamicTriggerLastRunTime,
530-
autoScalerConfig.getMinTriggerScaleActionFrequencyMillis(),
531-
supervisorId,
532-
dataSource,
533-
desiredTaskCount,
534-
getActiveTaskGroupsCount()
535-
);
536533

537-
if (desiredTaskCount > 0) {
534+
// 2) Make sure we are not breaching any scaling cooldown limits
535+
final long lastRunTime;
536+
final long effectiveFreq;
537+
final ScaleDirection scaleDirection;
538+
539+
if (desiredTaskCount > currentTaskCount) {
540+
// Scale up: use the scale-up specific frequency, falling back to base.
541+
Long specificFreq = autoScalerConfig.getMinTriggerScaleUpActionFrequencyMillis();
542+
effectiveFreq = specificFreq != null
543+
? specificFreq
544+
: autoScalerConfig.getMinTriggerScaleActionFrequencyMillis();
545+
lastRunTime = dynamicTriggerLastScaleUpRunTime;
546+
scaleDirection = ScaleDirection.SCALE_UP;
547+
} else {
548+
// Scale down: use the scale-down specific frequency, falling back to base.
549+
Long specificFreq = autoScalerConfig.getMinTriggerScaleDownActionFrequencyMillis();
550+
effectiveFreq = specificFreq != null
551+
? specificFreq
552+
: autoScalerConfig.getMinTriggerScaleActionFrequencyMillis();
553+
lastRunTime = dynamicTriggerLastScaleDownRunTime;
554+
scaleDirection = ScaleDirection.SCALE_DOWN;
555+
}
556+
557+
if (nowTime - lastRunTime < effectiveFreq) {
558+
log.info(
559+
"DynamicAllocationTasksNotice submitted again in [%d]ms, effective [%s] throttle is [%d]ms for supervisor[%s] for dataSource[%s], skipping it! desired task count is [%d], current task count is [%d]",
560+
nowTime - lastRunTime,
561+
scaleDirection,
562+
effectiveFreq,
563+
supervisorId,
564+
dataSource,
565+
desiredTaskCount,
566+
currentTaskCount
567+
);
568+
538569
emitter.emit(event.setDimension(
539570
AUTOSCALER_SKIP_REASON_DIMENSION,
540571
"minTriggerScaleActionFrequencyMillis not elapsed yet"
541572
)
542573
.setMetric(AUTOSCALER_REQUIRED_TASKS_METRIC, desiredTaskCount));
574+
return;
543575
}
544-
return;
545-
}
546576

547-
if (desiredTaskCount > 0) {
577+
// At this point, we can attempt a scaling action, so emit
548578
emitter.emit(event.setMetric(AUTOSCALER_REQUIRED_TASKS_METRIC, desiredTaskCount));
549-
}
550579

551-
boolean allocationSuccess = changeTaskCount(desiredTaskCount);
552-
if (allocationSuccess) {
553-
onSuccessfulScale.run();
554-
dynamicTriggerLastRunTime = nowTime;
580+
boolean allocationSuccess = changeTaskCount(desiredTaskCount);
581+
if (allocationSuccess) {
582+
onSuccessfulScale.run();
583+
if (desiredTaskCount > currentTaskCount) {
584+
dynamicTriggerLastScaleUpRunTime = nowTime;
585+
} else {
586+
dynamicTriggerLastScaleDownRunTime = nowTime;
587+
}
588+
}
555589
}
556590
}
557591
catch (Exception ex) {
@@ -586,7 +620,8 @@ public String getType()
586620
* After the taskCount is changed in SeekableStreamSupervisorIOConfig, next RunNotice will create scaled number of ingest tasks without resubmitting the supervisor.
587621
*
588622
* @param desiredActiveTaskCount desired taskCount computed from AutoScaler
589-
* @return Boolean flag indicating if scale action was executed or not. If true, it will wait at least 'minTriggerScaleActionFrequencyMillis' before next 'changeTaskCount'.
623+
* @return Boolean flag indicating if scale action was executed or not. If true, it will wait at least the configured
624+
* minTriggerScale(Up|Down)ActionFrequencyMillis (falling back to minTriggerScaleActionFrequencyMillis) before the next same-direction 'changeTaskCount'.
590625
* If false, it will do 'changeTaskCount' again after 'scaleActionPeriodMillis' millis.
591626
* @throws InterruptedException
592627
* @throws ExecutionException
@@ -958,7 +993,8 @@ public String getType()
958993
private final boolean useExclusiveStartingSequence;
959994
private boolean listenerRegistered = false;
960995
private long lastRunTime;
961-
private long dynamicTriggerLastRunTime;
996+
private long dynamicTriggerLastScaleUpRunTime;
997+
private long dynamicTriggerLastScaleDownRunTime;
962998
private int initRetryCounter = 0;
963999
private volatile DateTime firstRunTime;
9641000
private volatile DateTime earlyStopTime = null;
@@ -1416,7 +1452,7 @@ public void tryInit()
14161452
}
14171453

14181454
public Runnable buildDynamicAllocationTask(
1419-
Callable<Integer> scaleAction,
1455+
ScaleActionSupplier scaleAction,
14201456
Runnable onSuccessfulScale,
14211457
ServiceEmitter emitter
14221458
)

indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/autoscaler/AutoScalerConfig.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import org.apache.druid.indexing.overlord.supervisor.autoscaler.SupervisorTaskAutoScaler;
2929
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
3030

31+
import javax.annotation.Nullable;
32+
3133
@UnstableApi
3234
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "autoScalerStrategy", defaultImpl = LagBasedAutoScalerConfig.class)
3335
@JsonSubTypes(value = {
@@ -38,6 +40,19 @@ public interface AutoScalerConfig
3840
{
3941
boolean getEnableTaskAutoScaler();
4042
long getMinTriggerScaleActionFrequencyMillis();
43+
44+
@Nullable
45+
default Long getMinTriggerScaleUpActionFrequencyMillis()
46+
{
47+
return null;
48+
}
49+
50+
@Nullable
51+
default Long getMinTriggerScaleDownActionFrequencyMillis()
52+
{
53+
return null;
54+
}
55+
4156
int getTaskCountMax();
4257
int getTaskCountMin();
4358
Integer getTaskCountStart();

indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/autoscaler/CostBasedAutoScaler.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ public int computeTaskCountForRollover()
169169
}
170170
}
171171

172+
@Override
172173
public int computeTaskCountForScaleAction()
173174
{
174175
lastKnownMetrics = collectMetrics();

indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/autoscaler/CostBasedAutoScalerConfig.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ public class CostBasedAutoScalerConfig implements AutoScalerConfig
5555
private final int taskCountMin;
5656
private final Integer taskCountStart;
5757
private final long minTriggerScaleActionFrequencyMillis;
58+
@Nullable private final Long minTriggerScaleUpActionFrequencyMillis;
59+
@Nullable private final Long minTriggerScaleDownActionFrequencyMillis;
5860
private final Double stopTaskCountRatio;
5961
private final long scaleActionPeriodMillis;
6062

@@ -72,6 +74,8 @@ public CostBasedAutoScalerConfig(
7274
@Nullable @JsonProperty("enableTaskAutoScaler") Boolean enableTaskAutoScaler,
7375
@Nullable @JsonProperty("taskCountStart") Integer taskCountStart,
7476
@Nullable @JsonProperty("minTriggerScaleActionFrequencyMillis") Long minTriggerScaleActionFrequencyMillis,
77+
@Nullable @JsonProperty("minTriggerScaleUpActionFrequencyMillis") Long minTriggerScaleUpActionFrequencyMillis,
78+
@Nullable @JsonProperty("minTriggerScaleDownActionFrequencyMillis") Long minTriggerScaleDownActionFrequencyMillis,
7579
@Nullable @JsonProperty("stopTaskCountRatio") Double stopTaskCountRatio,
7680
@Nullable @JsonProperty("scaleActionPeriodMillis") Long scaleActionPeriodMillis,
7781
@Nullable @JsonProperty("lagWeight") Double lagWeight,
@@ -92,6 +96,8 @@ public CostBasedAutoScalerConfig(
9296
minTriggerScaleActionFrequencyMillis,
9397
DEFAULT_MIN_TRIGGER_SCALE_ACTION_FREQUENCY_MILLIS
9498
);
99+
this.minTriggerScaleUpActionFrequencyMillis = minTriggerScaleUpActionFrequencyMillis;
100+
this.minTriggerScaleDownActionFrequencyMillis = minTriggerScaleDownActionFrequencyMillis;
95101

96102
// Cost function weights with defaults
97103
this.lagWeight = Configs.valueOrDefault(lagWeight, DEFAULT_LAG_WEIGHT);
@@ -172,6 +178,22 @@ public long getMinTriggerScaleActionFrequencyMillis()
172178
return minTriggerScaleActionFrequencyMillis;
173179
}
174180

181+
@Override
182+
@JsonProperty
183+
@Nullable
184+
public Long getMinTriggerScaleUpActionFrequencyMillis()
185+
{
186+
return minTriggerScaleUpActionFrequencyMillis;
187+
}
188+
189+
@Override
190+
@JsonProperty
191+
@Nullable
192+
public Long getMinTriggerScaleDownActionFrequencyMillis()
193+
{
194+
return minTriggerScaleDownActionFrequencyMillis;
195+
}
196+
175197
@Override
176198
@JsonProperty
177199
@Nullable
@@ -259,6 +281,8 @@ public boolean equals(Object o)
259281
&& taskCountMax == that.taskCountMax
260282
&& taskCountMin == that.taskCountMin
261283
&& minTriggerScaleActionFrequencyMillis == that.minTriggerScaleActionFrequencyMillis
284+
&& Objects.equals(minTriggerScaleUpActionFrequencyMillis, that.minTriggerScaleUpActionFrequencyMillis)
285+
&& Objects.equals(minTriggerScaleDownActionFrequencyMillis, that.minTriggerScaleDownActionFrequencyMillis)
262286
&& scaleActionPeriodMillis == that.scaleActionPeriodMillis
263287
&& Double.compare(that.lagWeight, lagWeight) == 0
264288
&& Double.compare(that.idleWeight, idleWeight) == 0
@@ -279,6 +303,8 @@ public int hashCode()
279303
taskCountMin,
280304
taskCountStart,
281305
minTriggerScaleActionFrequencyMillis,
306+
minTriggerScaleUpActionFrequencyMillis,
307+
minTriggerScaleDownActionFrequencyMillis,
282308
stopTaskCountRatio,
283309
scaleActionPeriodMillis,
284310
lagWeight,
@@ -299,6 +325,8 @@ public String toString()
299325
", taskCountMin=" + taskCountMin +
300326
", taskCountStart=" + taskCountStart +
301327
", minTriggerScaleActionFrequencyMillis=" + minTriggerScaleActionFrequencyMillis +
328+
", minTriggerScaleUpActionFrequencyMillis=" + minTriggerScaleUpActionFrequencyMillis +
329+
", minTriggerScaleDownActionFrequencyMillis=" + minTriggerScaleDownActionFrequencyMillis +
302330
", stopTaskCountRatio=" + stopTaskCountRatio +
303331
", scaleActionPeriodMillis=" + scaleActionPeriodMillis +
304332
", lagWeight=" + lagWeight +
@@ -321,6 +349,8 @@ public static class Builder
321349
private Boolean enableTaskAutoScaler = true;
322350
private Integer taskCountStart;
323351
private Long minTriggerScaleActionFrequencyMillis;
352+
private Long minTriggerScaleUpActionFrequencyMillis;
353+
private Long minTriggerScaleDownActionFrequencyMillis;
324354
private Double stopTaskCountRatio;
325355
private Long scaleActionPeriodMillis;
326356
private Double lagWeight;
@@ -364,6 +394,18 @@ public Builder minTriggerScaleActionFrequencyMillis(long minTriggerScaleActionFr
364394
return this;
365395
}
366396

397+
public Builder minTriggerScaleUpActionFrequencyMillis(Long minTriggerScaleUpActionFrequencyMillis)
398+
{
399+
this.minTriggerScaleUpActionFrequencyMillis = minTriggerScaleUpActionFrequencyMillis;
400+
return this;
401+
}
402+
403+
public Builder minTriggerScaleDownActionFrequencyMillis(Long minTriggerScaleDownActionFrequencyMillis)
404+
{
405+
this.minTriggerScaleDownActionFrequencyMillis = minTriggerScaleDownActionFrequencyMillis;
406+
return this;
407+
}
408+
367409
public Builder stopTaskCountRatio(Double stopTaskCountRatio)
368410
{
369411
this.stopTaskCountRatio = stopTaskCountRatio;
@@ -420,6 +462,8 @@ public CostBasedAutoScalerConfig build()
420462
enableTaskAutoScaler,
421463
taskCountStart,
422464
minTriggerScaleActionFrequencyMillis,
465+
minTriggerScaleUpActionFrequencyMillis,
466+
minTriggerScaleDownActionFrequencyMillis,
423467
stopTaskCountRatio,
424468
scaleActionPeriodMillis,
425469
lagWeight,

0 commit comments

Comments
 (0)