apache
diff --git a/‎distribution/docker/Dockerfile‎
Lines changed: 2 additions & 2 deletions b/‎distribution/docker/Dockerfile‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/configuration/index.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/configuration/index.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/development/extensions-core/k8s-jobs.md‎
Lines changed: 10 additions & 0 deletions b/‎docs/development/extensions-core/k8s-jobs.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎docs/ingestion/supervisor.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/ingestion/supervisor.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/querying/groupbyquery.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/querying/groupbyquery.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/IngestionSmokeTest.java‎
Lines changed: 11 additions & 0 deletions b/‎embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/IngestionSmokeTest.java‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎embedded-tests/src/test/java/org/apache/druid/testing/embedded/query/QueryVirtualStorageTest.java‎
Lines changed: 3 additions & 2 deletions b/‎embedded-tests/src/test/java/org/apache/druid/testing/embedded/query/QueryVirtualStorageTest.java‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorIOConfigTest.java‎
Lines changed: 46 additions & 1 deletion b/‎extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorIOConfigTest.java‎
Lines changed: 46 additions & 1 deletion
diff --git a/‎extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java‎
Lines changed: 43 additions & 0 deletions b/‎extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesTaskRunnerConfig.java‎
Lines changed: 15 additions & 1 deletion b/‎extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesTaskRunnerConfig.java‎
Lines changed: 15 additions & 1 deletion
@@ -35,15 +35,15 @@ RUN export DEBIAN_FRONTEND=noninteractive \
 
 COPY . /src
 WORKDIR /src
-RUN --mount=type=cache,target=/root/.m2 if [ "$BUILD_FROM_SOURCE" = "true" ]; then \
+RUN --mount=type=cache,target=/root/.m2,sharing=locked if [ "$BUILD_FROM_SOURCE" = "true" ]; then \
       mvn -B -ff -q \
       install \
       -Pdist,bundle-contrib-exts \
       -Pskip-static-checks,skip-tests \
       -Dmaven.javadoc.skip=true -T1C \
       ; fi
 
-RUN --mount=type=cache,target=/root/.m2 VERSION=$(mvn -B -q org.apache.maven.plugins:maven-help-plugin:3.2.0:evaluate \
+RUN --mount=type=cache,target=/root/.m2,sharing=locked VERSION=$(mvn -B -q org.apache.maven.plugins:maven-help-plugin:3.2.0:evaluate \
       -Dexpression=project.version -DforceStdout=true \
     ) \
  && tar -zxf ./distribution/target/apache-druid-${VERSION}-bin.tar.gz -C /opt \
 
@@ -2231,6 +2231,7 @@ Supported runtime properties:
 |`druid.query.groupBy.maxMergingDictionarySize`|Maximum amount of heap space (approximately) to use for per-query string dictionaries. When the dictionary exceeds this size, a spill to disk will be triggered. See [groupBy memory tuning and resource limits](../querying/groupbyquery.md#memory-tuning-and-resource-limits) for details.|100000000|
 |`druid.query.groupBy.maxOnDiskStorage`|Maximum amount of disk space to use, per-query, for spilling result sets to disk when either the merging buffer or the dictionary fills up. Queries that exceed this limit will fail. Set to zero to disable disk spilling.|0 (disabled)|
 |`druid.query.groupBy.maxSpillFileCount`|Maximum number of spill files allowed per GroupBy query. Queries that exceed this limit will fail. See [groupBy memory tuning and resource limits](../querying/groupbyquery.md#memory-tuning-and-resource-limits) for details.|Integer.MAX_VALUE (unlimited)|
+|`druid.query.groupBy.minSpillFileSize`|Minimum number of bytes that must accumulate across pending in-memory spill runs before they are flushed as a single file to disk. Smaller spills are batched in heap memory to avoid creating many tiny files. Higher values reduce file count but increase heap usage.|1048576 (1 MiB)|
 |`druid.query.groupBy.defaultOnDiskStorage`|Default amount of disk space to use, per-query, for spilling the result sets to disk when either the merging buffer or the dictionary fills up. Set to zero to disable disk spilling for queries which don't override `maxOnDiskStorage` in their context.|`druid.query.groupBy.maxOnDiskStorage`|
 
 Supported query contexts:
@@ -2241,6 +2242,7 @@ Supported query contexts:
 |`maxMergingDictionarySize`|Can be used to lower the value of `druid.query.groupBy.maxMergingDictionarySize` for this query.|
 |`maxOnDiskStorage`|Can be used to set `maxOnDiskStorage` to a value between 0 and `druid.query.groupBy.maxOnDiskStorage` for this query. If this query context override exceeds `druid.query.groupBy.maxOnDiskStorage`, the query will use `druid.query.groupBy.maxOnDiskStorage`. Omitting this from the query context will cause the query to use `druid.query.groupBy.defaultOnDiskStorage` for `maxOnDiskStorage`|
 |`maxSpillFileCount`|Can be used to override the value of `druid.query.groupBy.maxSpillFileCount` for this query.|
+|`minSpillFileSize`|Can be used to override the value of `druid.query.groupBy.minSpillFileSize` for this query.|
 
 ### Advanced configurations
 
 
@@ -763,6 +763,16 @@ All three examples below are equivalent.
 
 In all the above cases, Druid will match the selector to any value of task type. Druid applies similar logic for `dataSource`. For `context.tags` setting `null` or an empty object `{}` is equivalent. 
 
+##### Override pod template via context
+
+Set the `podTemplateSelectionKey` key in a task's context to pick a configured pod template directly, bypassing the selection strategy. The value is the same `selectionKey` used by `selectorBased` strategy (i.e. the suffix of `druid.indexer.runner.k8s.podTemplate.<selectionKey>`).
+
+```json
+"context": { "podTemplateSelectionKey": "podSpec1" }
+```
+
+This is gated by the runtime property `druid.indexer.runner.allowTaskPodTemplateSelection`, which defaults to `false`. If the key doesn't match any configured template, the task fails to launch.
+
 #### Running Task Pods in Another Namespace
 
 It is possible to run task pods in a different namespace from the rest of your Druid cluster.
 
@@ -78,7 +78,7 @@ The following table outlines the configuration properties for `autoScalerConfig`
 |`enableTaskAutoScaler`|Enables the autoscaler. If not specified, Druid disables the autoscaler even when `autoScalerConfig` is not null.|No|`false`|
 |`taskCountMax`|The maximum number of ingestion tasks. Must be greater than or equal to `taskCountMin`. If `taskCountMax` is greater than the number of Kafka partitions or Kinesis shards, Druid sets the maximum number of reading tasks to the number of Kafka partitions or Kinesis shards and ignores `taskCountMax`.|Yes||
 |`taskCountMin`|The minimum number of ingestion tasks. When you enable the autoscaler, Druid computes the initial number of tasks to launch by checking the configs in the following order: `taskCountStart`, then `taskCount` (in `ioConfig`), then `taskCountMin`.|Yes||
-|`taskCountStart`|Optional config to specify the number of ingestion tasks to start with. When you enable the autoscaler, Druid computes the initial number of tasks to launch by checking the configs in the following order: `taskCountStart`, then `taskCount` (in `ioConfig`), then `taskCountMin`.|No|`taskCount` or `taskCountMin`|
+|`taskCountStart`|Optional config to specify the number of ingestion tasks to start with. If `taskCountStart` is provided on POST of a supervisor, it takes priority and the `taskCount` is reset to `taskCountStart` at that time.|No|`taskCount` or `taskCountMin`|
 |`minScaleUpDelay`|Minimum cooldown duration between scale-up actions, specified as an ISO-8601 duration string. Falls back to `minTriggerScaleActionFrequencyMillis` if not set.|No||
 |`minScaleDownDelay`|Minimum cooldown duration between scale-down actions, specified as an ISO-8601 duration string. Falls back to `minTriggerScaleActionFrequencyMillis` if not set.|No||
 |`minTriggerScaleActionFrequencyMillis`|**Deprecated.** Use `minScaleUpDelay` and `minScaleDownDelay` instead. Minimum time interval in milliseconds between scale actions, used as the fallback when the Duration-based fields are not set.|No|600000|
 
@@ -358,13 +358,15 @@ Supported runtime properties:
 |`druid.query.groupBy.maxMergingDictionarySize`|Maximum amount of heap space (approximately) to use for per-query string dictionaries. When the dictionary exceeds this size, a spill to disk will be triggered. If set to `0` (automatic), each query's dictionary uses 30% of the Java heap divided by `druid.processing.numMergeBuffers`, or 1GB, whichever is smaller.<br /><br />See [Memory tuning and resource limits](#memory-tuning-and-resource-limits) for details on changing this property.|0 (automatic)|
 |`druid.query.groupBy.maxOnDiskStorage`|Maximum amount of disk space to use, per-query, for spilling result sets to disk when either the merging buffer or the dictionary fills up. Queries that exceed this limit will fail. Set to zero to disable disk spilling.|0 (disabled)|
 |`druid.query.groupBy.maxSpillFileCount`|Maximum number of spill files allowed per GroupBy query. Queries that exceed this limit will fail.<br /><br />See [Memory tuning and resource limits](#memory-tuning-and-resource-limits) for details on changing this property.|Integer.MAX_VALUE (unlimited)|
+|`druid.query.groupBy.minSpillFileSize`|Minimum number of bytes that must accumulate across pending in-memory spill runs before they are flushed as a single file to disk. Smaller spills are batched in heap memory to avoid creating many tiny files. Higher values reduce file count but increase heap usage.|1048576 (1 MiB)|
 
 Supported query contexts:
 
 |Key|Description|
 |---|-----------|
 |`maxOnDiskStorage`|Can be used to lower the value of `druid.query.groupBy.maxOnDiskStorage` for this query.|
 |`maxSpillFileCount`|Can be used to override the value of `druid.query.groupBy.maxSpillFileCount` for this query.|
+|`minSpillFileSize`|Can be used to override the value of `druid.query.groupBy.minSpillFileSize` for this query.|
 
 ### Advanced configurations
 
 
@@ -189,6 +189,9 @@ public void test_runIndexTask_andKillData()
                       .hasService("druid/broker")
     );
 
+    waitForNextCoordinatorCacheSync();
+    waitForNextBrokerCacheSync();
+
     cluster.callApi().verifySqlQuery("SELECT * FROM sys.segments WHERE datasource='%s'", dataSource, "");
 
     // Kill all unused segments
@@ -430,6 +433,14 @@ protected void waitForNextCoordinatorCacheSync()
     );
   }
 
+  protected void waitForNextBrokerCacheSync()
+  {
+    eventCollector.latchableEmitter().waitForNextEvent(
+        event -> event.hasMetricName("segment/metadataCache/sync/time")
+                      .hasService("druid/broker")
+    );
+  }
+
   /**
    * Verifies the total number of used segments in {@link #dataSource}.
    */
 
@@ -67,7 +67,7 @@
 class QueryVirtualStorageTest extends EmbeddedClusterTestBase
 {
   // size of wiki segments, adjust this if segment size changes for some reason
-  private static final long SIZE_BYTES = 3776682L;
+  private static final long SIZE_BYTES = 3777834L;
   private static final long CACHE_SIZE = HumanReadableBytes.parse("1MiB");
   private static final long MAX_SIZE = HumanReadableBytes.parse("100MiB");
 
@@ -294,7 +294,8 @@ void testQueryTooMuchDataButWithDart()
     Assertions.assertTrue(segmentChannelCounters.getLoadFiles()[0] > 0 && segmentChannelCounters.getLoadFiles()[0] <= segmentChannelCounters.getFiles()[0]);
     // size of all segments at time of writing, possibly we have to load all of them, but possibly less depending on
     // test order
-    Assertions.assertTrue(segmentChannelCounters.getLoadBytes()[0] > 0 && segmentChannelCounters.getLoadBytes()[0] <= SIZE_BYTES);
+    Assertions.assertTrue(segmentChannelCounters.getLoadBytes()[0] > 0);
+    Assertions.assertTrue(segmentChannelCounters.getLoadBytes()[0] <= SIZE_BYTES);
     Assertions.assertTrue(segmentChannelCounters.getLoadTime()[0] > 0);
     Assertions.assertTrue(segmentChannelCounters.getLoadWait()[0] > 0);
   }
 
@@ -381,7 +381,7 @@ public void testAutoScalingConfigSerde() throws JsonProcessingException
         false,
         null
     );
-    Assert.assertEquals(5, kafkaSupervisorIOConfig.getTaskCount().intValue());
+    Assert.assertEquals(1, kafkaSupervisorIOConfig.getTaskCount());
 
     Assert.assertThrows(
         "taskCountMin <= taskCountStart <= taskCountMax",
@@ -400,6 +400,51 @@ public void testAutoScalingConfigSerde() throws JsonProcessingException
     );
   }
 
+  @Test
+  public void testTaskCountStartFallbackAndExplicitFlag()
+  {
+    final Map<String, Object> autoScalerConfig = ImmutableMap.of(
+        "enableTaskAutoScaler", true,
+        "taskCountMin", 1,
+        "taskCountMax", 10,
+        "taskCountStart", 5
+    );
+
+    Assert.assertEquals(7, makeIOConfig(7, autoScalerConfig).getTaskCount());
+    Assert.assertTrue(makeIOConfig(7, autoScalerConfig).isTaskCountExplicit());
+
+    Assert.assertEquals(5, makeIOConfig(null, autoScalerConfig).getTaskCount());
+    Assert.assertFalse(makeIOConfig(null, autoScalerConfig).isTaskCountExplicit());
+  }
+
+  private KafkaSupervisorIOConfig makeIOConfig(Integer taskCount, Map<String, Object> autoScalerConfig)
+  {
+    return new KafkaSupervisorIOConfig(
+        "test",
+        null,
+        null,
+        1,
+        taskCount,
+        new Period("PT1H"),
+        ImmutableMap.of("bootstrap.servers", "localhost:8082"),
+        mapper.convertValue(autoScalerConfig, LagBasedAutoScalerConfig.class),
+        LagAggregator.DEFAULT,
+        KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS,
+        new Period("P1D"),
+        new Period("PT30S"),
+        true,
+        new Period("PT30M"),
+        null,
+        null,
+        null,
+        null,
+        null,
+        null,
+        false,
+        null
+    );
+  }
+
   @Test
   public void testIdleConfigSerde() throws JsonProcessingException
   {
 
@@ -30,7 +30,10 @@
 import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator;
 import org.apache.druid.indexing.overlord.TaskMaster;
 import org.apache.druid.indexing.overlord.TaskStorage;
+import org.apache.druid.indexing.overlord.supervisor.SupervisorSpec;
 import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig;
+import org.apache.druid.indexing.seekablestream.supervisor.LagAggregator;
+import org.apache.druid.indexing.seekablestream.supervisor.autoscaler.CostBasedAutoScalerConfig;
 import org.apache.druid.jackson.DefaultObjectMapper;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.granularity.Granularities;
@@ -422,6 +425,46 @@ public void testSuspendResume() throws IOException
     Assert.assertFalse(runningSpec.isSuspended());
   }
 
+  @Test
+  public void testTaskCountSerdeRoundTrip() throws IOException
+  {
+    // A persisted taskCount must survive a serialize/deserialize round-trip even when
+    // autoScalerConfig.taskCountStart is set.
+    final CostBasedAutoScalerConfig autoScalerConfig =
+        CostBasedAutoScalerConfig.builder()
+            .enableTaskAutoScaler(true)
+            .taskCountMin(1)
+            .taskCountMax(100)
+            .taskCountStart(25)
+            .build();
+
+    final KafkaSupervisorSpec spec = new KafkaSupervisorSpecBuilder()
+        .withDataSchema(
+            schema -> schema
+                .withTimestamp(TimestampSpec.DEFAULT)
+                .withAggregators(new CountAggregatorFactory("rows"))
+                .withGranularity(new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null))
+        )
+        .withIoConfig(
+            ioConfig -> ioConfig
+                .withJsonInputFormat()
+                .withConsumerProperties(Map.of("bootstrap.servers", "localhost:9092"))
+                .withTaskCount(25)
+                .withAutoScalerConfig(autoScalerConfig)
+                .withLagAggregator(LagAggregator.DEFAULT)
+        )
+        .build("testDs", "metrics");
+
+    // Mutate taskCount the same way SeekableStreamSupervisor.changeTaskCountInIOConfig does,
+    // and verify that the mutation is picked up by serialization.
+    spec.getIoConfig().setTaskCount(50);
+    final byte[] payload = mapper.writeValueAsBytes(spec);
+    final KafkaSupervisorSpec roundTripped =
+        (KafkaSupervisorSpec) mapper.readValue(payload, SupervisorSpec.class);
+    Assert.assertEquals(50, roundTripped.getIoConfig().getTaskCount());
+    Assert.assertTrue(roundTripped.getIoConfig().isTaskCountExplicit());
+  }
+
   @Test
   public void test_validateSpecUpdateTo()
   {
 
@@ -91,6 +91,12 @@ public interface KubernetesTaskRunnerConfig
    */
   Period getK8sSharedInformerResyncPeriod();
 
+  /**
+   * Whether tasks may select a configured pod template via the {@code DruidK8sConstants.TASK_CONTEXT_POD_TEMPLATE_SELECTION_KEY}
+   * task context key, overriding the configured {@code PodTemplateSelectStrategy}.
+   */
+  boolean isAllowTaskPodTemplateSelection();
+
   static Builder builder()
   {
     return new Builder();
@@ -121,6 +127,7 @@ public static class Builder
     private Period logSaveTimeout;
     private boolean useK8sSharedInformers;
     private Period k8sSharedInformerResyncPeriod;
+    private boolean allowTaskPodTemplateSelection;
 
     public Builder()
     {
@@ -265,6 +272,12 @@ public Builder withK8sSharedInformerResyncPeriod(Period k8sSharedInformerResyncP
       return this;
     }
 
+    public Builder withAllowTaskPodTemplateSelection(boolean allowTaskPodTemplateSelection)
+    {
+      this.allowTaskPodTemplateSelection = allowTaskPodTemplateSelection;
+      return this;
+    }
+
     public KubernetesTaskRunnerStaticConfig build()
     {
       return new KubernetesTaskRunnerStaticConfig(
@@ -290,7 +303,8 @@ public KubernetesTaskRunnerStaticConfig build()
           this.capacity,
           this.taskJoinTimeout,
           this.useK8sSharedInformers,
-          this.k8sSharedInformerResyncPeriod
+          this.k8sSharedInformerResyncPeriod,
+          this.allowTaskPodTemplateSelection
       );
     }
   }