diff --git a/.github/workflows/unit-and-integration-tests-unified.yml b/.github/workflows/unit-and-integration-tests-unified.yml
index b5860b494557..e960493dba31 100644
--- a/.github/workflows/unit-and-integration-tests-unified.yml
+++ b/.github/workflows/unit-and-integration-tests-unified.yml
@@ -64,4 +64,4 @@ jobs:
runs-on: ubuntu-latest
if: ${{ !cancelled() }}
steps:
- - uses: Kesin11/actions-timeline@54d513e0b5ff1158f1cf8321108d666a5a6c1fca
+ - uses: Kesin11/actions-timeline@44c9c178ffb2fb1d9859614a3ffa79ccfb77565e
diff --git a/benchmarks/pom.xml b/benchmarks/pom.xml
index 429b9323777a..c02c2166f115 100644
--- a/benchmarks/pom.xml
+++ b/benchmarks/pom.xml
@@ -244,6 +244,26 @@
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.14.1
+ true
+
+ ${maven.compiler.release}
+
+ org.openjdk.jmh.generators.BenchmarkProcessor
+
+
+
+ org.openjdk.jmh
+ jmh-generator-annprocess
+ ${jmh.version}
+
+
+
+ org.apache.maven.pluginsmaven-assembly-plugin
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java
index 6b36d72c672e..777dd297ecb9 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java
@@ -19,29 +19,87 @@
package org.apache.druid.benchmark;
+import com.fasterxml.jackson.databind.InjectableValues;
+import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import org.apache.druid.client.cache.CacheConfig;
+import org.apache.druid.client.cache.CachePopulatorStats;
+import org.apache.druid.client.cache.MapCache;
import org.apache.druid.data.input.MapBasedInputRow;
+import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.data.input.impl.TimestampSpec;
+import org.apache.druid.guice.BuiltInTypesModule;
+import org.apache.druid.indexer.granularity.UniformGranularitySpec;
+import org.apache.druid.jackson.AggregatorsModule;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.java.util.emitter.core.LoggingEmitter;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.query.DefaultGenericQueryMetricsFactory;
+import org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate;
import org.apache.druid.query.Druids;
+import org.apache.druid.query.ForwardingQueryProcessingPool;
+import org.apache.druid.query.Query;
import org.apache.druid.query.QueryPlus;
-import org.apache.druid.query.Result;
+import org.apache.druid.query.QueryRunnerFactory;
+import org.apache.druid.query.QueryRunnerFactoryConglomerate;
+import org.apache.druid.query.QueryRunnerTestHelper;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.query.context.ResponseContext;
+import org.apache.druid.query.expression.TestExprMacroTable;
+import org.apache.druid.query.groupby.GroupByQuery;
+import org.apache.druid.query.groupby.GroupByQueryConfig;
+import org.apache.druid.query.groupby.GroupByQueryRunnerTest;
+import org.apache.druid.query.groupby.TestGroupByBuffers;
+import org.apache.druid.query.metadata.SegmentMetadataQueryConfig;
+import org.apache.druid.query.metadata.SegmentMetadataQueryQueryToolChest;
+import org.apache.druid.query.metadata.SegmentMetadataQueryRunnerFactory;
+import org.apache.druid.query.metadata.metadata.ListColumnIncluderator;
+import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery;
+import org.apache.druid.query.policy.NoopPolicyEnforcer;
+import org.apache.druid.query.scan.ScanQuery;
+import org.apache.druid.query.scan.ScanQueryConfig;
+import org.apache.druid.query.scan.ScanQueryEngine;
+import org.apache.druid.query.scan.ScanQueryQueryToolChest;
+import org.apache.druid.query.scan.ScanQueryRunnerFactory;
+import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.timeseries.TimeseriesQuery;
-import org.apache.druid.query.timeseries.TimeseriesResultValue;
+import org.apache.druid.query.timeseries.TimeseriesQueryEngine;
+import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest;
+import org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory;
+import org.apache.druid.segment.IndexIO;
+import org.apache.druid.segment.IndexMerger;
+import org.apache.druid.segment.IndexMergerV9;
+import org.apache.druid.segment.IndexSpec;
+import org.apache.druid.segment.column.ColumnConfig;
+import org.apache.druid.segment.incremental.ParseExceptionHandler;
+import org.apache.druid.segment.incremental.RowIngestionMeters;
+import org.apache.druid.segment.incremental.SimpleRowIngestionMeters;
+import org.apache.druid.segment.indexing.DataSchema;
+import org.apache.druid.segment.indexing.TuningConfig;
+import org.apache.druid.segment.loading.DataSegmentPusher;
+import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig;
+import org.apache.druid.segment.realtime.SegmentGenerationMetrics;
import org.apache.druid.segment.realtime.appenderator.Appenderator;
+import org.apache.druid.segment.realtime.appenderator.AppenderatorConfig;
+import org.apache.druid.segment.realtime.appenderator.Appenderators;
import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
-import org.apache.druid.segment.realtime.appenderator.StreamAppenderatorTester;
+import org.apache.druid.segment.realtime.appenderator.TestAppenderatorConfig;
import org.apache.druid.segment.realtime.sink.Committers;
+import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
+import org.apache.druid.server.coordination.NoopDataSegmentAnnouncer;
+import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -59,8 +117,11 @@
import org.openjdk.jmh.infra.Blackhole;
import java.io.File;
+import java.net.URI;
import java.util.Arrays;
import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
@State(Scope.Benchmark)
@@ -71,6 +132,18 @@
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public class SinkQuerySegmentWalkerBenchmark
{
+ private static final String DATASOURCE = "foo";
+ private static final List QUERY_COLUMNS = ImmutableList.of("__time", "dim", "count", "met");
+ private static final MultipleIntervalSegmentSpec QUERY_INTERVALS =
+ new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2000/2001")));
+ private static final String SET_PROCESSING_THREAD_NAMES = "setProcessingThreadNames";
+
+ @Param({"timeseries", "scan", "segmentMetadata", "groupBy"})
+ private String queryType;
+
+ @Param({"false", "true"})
+ private boolean setProcessingThreadNames;
+
@Param({"10", "50", "100", "200"})
private int numFireHydrants;
@@ -78,24 +151,66 @@ public class SinkQuerySegmentWalkerBenchmark
private final ServiceEmitter serviceEmitter = new ServiceEmitter("test", "test", loggingEmitter);
private File cacheDir;
+ private ExecutorService queryExecutor;
private Appenderator appenderator;
+ private TestGroupByBuffers groupByBuffers;
@Setup(Level.Trial)
public void setup() throws Exception
{
final String userConfiguredCacheDir = System.getProperty("druid.benchmark.cacheDir", System.getenv("DRUID_BENCHMARK_CACHE_DIR"));
cacheDir = new File(userConfiguredCacheDir);
- final StreamAppenderatorTester tester =
- new StreamAppenderatorTester.Builder().maxRowsInMemory(1)
- .basePersistDirectory(cacheDir)
- .withServiceEmitter(serviceEmitter)
- .build();
+ FileUtils.deleteDirectory(cacheDir);
+ final ObjectMapper objectMapper = makeObjectMapper();
+ final IndexIO indexIO = new IndexIO(
+ objectMapper,
+ new ColumnConfig()
+ {
+ }
+ );
+ final IndexMergerV9 indexMerger = new IndexMergerV9(
+ objectMapper,
+ indexIO,
+ OffHeapMemorySegmentWriteOutMediumFactory.instance()
+ );
+ final DataSchema schema = makeDataSchema();
+ final RowIngestionMeters rowIngestionMeters = new SimpleRowIngestionMeters();
+ final AppenderatorConfig tuningConfig = makeTuningConfig();
+
+ queryExecutor = Execs.singleThreaded("queryExecutor(%d)");
+ groupByBuffers = TestGroupByBuffers.createDefault();
- appenderator = tester.getAppenderator();
+ serviceEmitter.start();
+ EmittingLogger.registerEmitter(serviceEmitter);
+
+ final QueryRunnerFactoryConglomerate conglomerate = makeQueryRunnerFactoryConglomerate();
+ appenderator = Appenderators.createRealtime(
+ null,
+ schema.getDataSource(),
+ schema,
+ tuningConfig,
+ new SegmentGenerationMetrics(),
+ makeDataSegmentPusher(),
+ objectMapper,
+ indexIO,
+ indexMerger,
+ conglomerate,
+ new NoopDataSegmentAnnouncer(),
+ serviceEmitter,
+ new ForwardingQueryProcessingPool(queryExecutor),
+ MapCache.create(2048),
+ new CacheConfig(),
+ new CachePopulatorStats(),
+ NoopPolicyEnforcer.instance(),
+ rowIngestionMeters,
+ new ParseExceptionHandler(rowIngestionMeters, false, Integer.MAX_VALUE, 0),
+ CentralizedDatasourceSchemaConfig.create(),
+ interval -> {}
+ );
appenderator.startJob();
final SegmentIdWithShardSpec segmentIdWithShardSpec = new SegmentIdWithShardSpec(
- StreamAppenderatorTester.DATASOURCE,
+ DATASOURCE,
Intervals.of("2000/2001"),
"A",
new LinearShardSpec(0)
@@ -119,33 +234,214 @@ public void setup() throws Exception
@TearDown(Level.Trial)
public void tearDown() throws Exception
{
- appenderator.close();
- FileUtils.deleteDirectory(cacheDir);
+ try {
+ if (appenderator != null) {
+ appenderator.close();
+ }
+ }
+ finally {
+ if (queryExecutor != null) {
+ queryExecutor.shutdownNow();
+ }
+ try {
+ if (groupByBuffers != null) {
+ groupByBuffers.close();
+ }
+ }
+ finally {
+ FileUtils.deleteDirectory(cacheDir);
+ }
+ }
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
- public void emitSinkMetrics(Blackhole blackhole) throws Exception
+ public void runSinkQuery(Blackhole blackhole) throws Exception
{
- {
- final TimeseriesQuery query1 = Druids.newTimeseriesQueryBuilder()
- .dataSource(StreamAppenderatorTester.DATASOURCE)
- .intervals(ImmutableList.of(Intervals.of("2000/2001")))
- .aggregators(
- Arrays.asList(
- new LongSumAggregatorFactory("count", "count"),
- new LongSumAggregatorFactory("met", "met")
- )
- )
- .granularity(Granularities.DAY)
- .build();
-
- final List> results =
- QueryPlus.wrap(query1).run(appenderator, ResponseContext.createEmpty()).toList();
- blackhole.consume(results);
-
- serviceEmitter.flush();
+ final Query> query = makeQuery();
+ final List> results = QueryPlus.wrap(query).run(appenderator, ResponseContext.createEmpty()).toList();
+ blackhole.consume(results);
+
+ serviceEmitter.flush();
+ }
+
+ private Query> makeQuery()
+ {
+ switch (queryType) {
+ case "timeseries":
+ return makeTimeseriesQuery();
+ case "scan":
+ return makeScanQuery();
+ case "segmentMetadata":
+ return makeSegmentMetadataQuery();
+ case "groupBy":
+ return makeGroupByQuery();
+ default:
+ throw new IllegalStateException("Unsupported query type[" + queryType + "]");
}
}
+
+ private QueryRunnerFactoryConglomerate makeQueryRunnerFactoryConglomerate()
+ {
+ return DefaultQueryRunnerFactoryConglomerate.buildFromQueryRunnerFactories(
+ ImmutableMap., QueryRunnerFactory>builder()
+ .put(
+ TimeseriesQuery.class,
+ new TimeseriesQueryRunnerFactory(
+ new TimeseriesQueryQueryToolChest(),
+ new TimeseriesQueryEngine(),
+ QueryRunnerTestHelper.NOOP_QUERYWATCHER
+ )
+ )
+ .put(
+ ScanQuery.class,
+ new ScanQueryRunnerFactory(
+ new ScanQueryQueryToolChest(DefaultGenericQueryMetricsFactory.instance()),
+ new ScanQueryEngine(),
+ new ScanQueryConfig()
+ )
+ )
+ .put(
+ SegmentMetadataQuery.class,
+ new SegmentMetadataQueryRunnerFactory(
+ new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()),
+ QueryRunnerTestHelper.NOOP_QUERYWATCHER
+ )
+ )
+ .put(
+ GroupByQuery.class,
+ GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig(), groupByBuffers)
+ )
+ .build()
+ );
+ }
+
+ private TimeseriesQuery makeTimeseriesQuery()
+ {
+ return Druids.newTimeseriesQueryBuilder()
+ .dataSource(DATASOURCE)
+ .intervals(QUERY_INTERVALS)
+ .aggregators(makeAggregators())
+ .granularity(Granularities.DAY)
+ .context(makeQueryContext())
+ .build();
+ }
+
+ private ScanQuery makeScanQuery()
+ {
+ return Druids.newScanQueryBuilder()
+ .dataSource(DATASOURCE)
+ .intervals(QUERY_INTERVALS)
+ .columns(QUERY_COLUMNS)
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .context(makeQueryContext())
+ .build();
+ }
+
+ private SegmentMetadataQuery makeSegmentMetadataQuery()
+ {
+ return Druids.newSegmentMetadataQueryBuilder()
+ .dataSource(DATASOURCE)
+ .intervals(QUERY_INTERVALS)
+ .toInclude(new ListColumnIncluderator(QUERY_COLUMNS))
+ .analysisTypes(
+ SegmentMetadataQuery.AnalysisType.CARDINALITY,
+ SegmentMetadataQuery.AnalysisType.SIZE,
+ SegmentMetadataQuery.AnalysisType.INTERVAL,
+ SegmentMetadataQuery.AnalysisType.MINMAX,
+ SegmentMetadataQuery.AnalysisType.AGGREGATORS
+ )
+ .merge(true)
+ .context(makeQueryContext())
+ .build();
+ }
+
+ private GroupByQuery makeGroupByQuery()
+ {
+ return GroupByQuery.builder()
+ .setDataSource(DATASOURCE)
+ .setInterval("2000/2001")
+ .setGranularity(Granularities.ALL)
+ .setAggregatorSpecs(makeAggregators())
+ .setContext(makeQueryContext())
+ .build();
+ }
+
+ private List makeAggregators()
+ {
+ return Arrays.asList(
+ new LongSumAggregatorFactory("count", "count"),
+ new LongSumAggregatorFactory("met", "met")
+ );
+ }
+
+ private Map makeQueryContext()
+ {
+ return ImmutableMap.of(SET_PROCESSING_THREAD_NAMES, setProcessingThreadNames);
+ }
+
+ private static ObjectMapper makeObjectMapper()
+ {
+ final ObjectMapper objectMapper = new DefaultObjectMapper();
+ objectMapper.registerSubtypes(LinearShardSpec.class);
+ objectMapper.registerModules(new AggregatorsModule());
+ objectMapper.registerModules(new BuiltInTypesModule().getJacksonModules());
+ objectMapper.setInjectableValues(
+ new InjectableValues.Std()
+ .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE)
+ .addValue(ObjectMapper.class.getName(), objectMapper)
+ );
+ return objectMapper;
+ }
+
+ private static DataSchema makeDataSchema()
+ {
+ return DataSchema.builder()
+ .withDataSource(DATASOURCE)
+ .withTimestamp(new TimestampSpec("ts", "auto", null))
+ .withDimensions(DimensionsSpec.EMPTY)
+ .withAggregators(
+ new CountAggregatorFactory("count"),
+ new LongSumAggregatorFactory("met", "met")
+ )
+ .withGranularity(new UniformGranularitySpec(Granularities.MINUTE, Granularities.NONE, null))
+ .build();
+ }
+
+ private AppenderatorConfig makeTuningConfig()
+ {
+ return new TestAppenderatorConfig(
+ TuningConfig.DEFAULT_APPENDABLE_INDEX,
+ 1,
+ Runtime.getRuntime().totalMemory() / 3,
+ false,
+ IndexSpec.getDefault(),
+ 0,
+ false,
+ 0L,
+ OffHeapMemorySegmentWriteOutMediumFactory.instance(),
+ IndexMerger.UNLIMITED_MAX_COLUMNS_TO_MERGE,
+ cacheDir,
+ false
+ );
+ }
+
+ private static DataSegmentPusher makeDataSegmentPusher()
+ {
+ return new DataSegmentPusher()
+ {
+ @Override
+ public DataSegment push(File file, DataSegment segment, boolean useUniquePath)
+ {
+ return segment;
+ }
+
+ @Override
+ public Map makeLoadSpec(URI uri)
+ {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
}
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
index 8988973f9825..0ef6395a1fce 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -21,12 +21,15 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
@@ -171,6 +174,9 @@ public class SqlExpressionBenchmark extends SqlBaseQueryBenchmark
})
private String deferExpressionDimensions;
+ @Param({"false", "true"})
+ private boolean useVectorApi;
+
@Param({
// non-expression reference
"0",
@@ -238,6 +244,16 @@ public class SqlExpressionBenchmark extends SqlBaseQueryBenchmark
})
private String query;
+ @Setup(Level.Trial)
+ public void setupExpressionProcessing()
+ {
+ if (useVectorApi) {
+ ExpressionProcessing.initializeForVectorApiTests();
+ } else {
+ ExpressionProcessing.initializeForTests();
+ }
+ }
+
@Override
public String getQuery()
{
diff --git a/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientConfig.java b/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientConfig.java
index e8d299cbd851..9fdf9ba592f7 100644
--- a/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientConfig.java
+++ b/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientConfig.java
@@ -19,7 +19,9 @@
package org.apache.druid.common.aws;
+import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.druid.utils.RuntimeInfo;
import javax.annotation.Nullable;
@@ -31,7 +33,17 @@ public class AWSClientConfig
private static final int DEFAULT_CONNECTION_TIMEOUT_MILLIS = 10_000;
private static final int DEFAULT_SOCKET_TIMEOUT_MILLIS = 50_000;
- private static final int DEFAULT_MAX_CONNECTIONS = 50;
+ /** AWS SDK v2's own default. */
+ private static final int DEFAULT_MAX_CONNECTIONS_FLOOR = 50;
+
+ /**
+ * Used by {@link #getMaxConnections} to scale the default connection pool with host size so hosts large enough to
+ * do a lot of concurrent deep-storage I/O (e.g. virtual-storage historicals fanning out on-demand loads to S3)
+ * aren't bottlenecked at the SDK's connection pool. The field initializer covers direct construction (no Jackson);
+ * Jackson overwrites with the injected {@link RuntimeInfo} during deserialization.
+ */
+ @JacksonInject
+ private final RuntimeInfo runtimeInfo = new RuntimeInfo();
@JsonProperty
private String protocol = "https"; // The default of aws-java-sdk
@@ -60,8 +72,13 @@ public class AWSClientConfig
@JsonProperty
private int socketTimeout = DEFAULT_SOCKET_TIMEOUT_MILLIS;
+ /**
+ * Null means use the dynamic default in {@link #getMaxConnections} ({@code max(50, 4 × availableProcessors)});
+ * any explicit value set in JSON wins.
+ */
@JsonProperty
- private int maxConnections = DEFAULT_MAX_CONNECTIONS;
+ @Nullable
+ private Integer maxConnections = null;
public String getProtocol()
{
@@ -123,7 +140,10 @@ public int getSocketTimeoutMillis()
public int getMaxConnections()
{
- return maxConnections;
+ if (maxConnections != null) {
+ return maxConnections;
+ }
+ return Math.max(DEFAULT_MAX_CONNECTIONS_FLOOR, 4 * runtimeInfo.getAvailableProcessors());
}
@Override
@@ -136,7 +156,7 @@ public String toString()
", crossRegionAccessEnabled=" + isCrossRegionAccessEnabled() +
", connectionTimeout=" + connectionTimeout +
", socketTimeout=" + socketTimeout +
- ", maxConnections=" + maxConnections +
+ ", maxConnections=" + getMaxConnections() +
'}';
}
}
diff --git a/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientConfigTest.java b/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientConfigTest.java
index 4e8837566ca7..99b927efb2ba 100644
--- a/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientConfigTest.java
+++ b/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientConfigTest.java
@@ -19,13 +19,24 @@
package org.apache.druid.common.aws;
+import com.fasterxml.jackson.databind.InjectableValues;
import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.utils.RuntimeInfo;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
public class AWSClientConfigTest
{
- private static final ObjectMapper MAPPER = new ObjectMapper();
+ private static final ObjectMapper MAPPER = new ObjectMapper().setInjectableValues(
+ new InjectableValues.Std().addValue(RuntimeInfo.class, new RuntimeInfo())
+ );
+
+ private static ObjectMapper mapperWithRuntimeInfo(RuntimeInfo runtimeInfo)
+ {
+ return new ObjectMapper().setInjectableValues(
+ new InjectableValues.Std().addValue(RuntimeInfo.class, runtimeInfo)
+ );
+ }
@Test
public void testDefaultCrossRegionAccessEnabled() throws Exception
@@ -83,4 +94,44 @@ public void testDeprecatedNotSetFallsThroughToCrossRegion() throws Exception
Assertions.assertNull(config.isForceGlobalBucketAccessEnabled());
Assertions.assertTrue(config.isCrossRegionAccessEnabled());
}
+
+ @Test
+ public void testDefaultMaxConnectionsKeepsAwsSdkFloorOnSmallHost() throws Exception
+ {
+ AWSClientConfig config = mapperWithRuntimeInfo(new FixedProcessorsRuntimeInfo(8))
+ .readValue("{}", AWSClientConfig.class);
+ Assertions.assertEquals(50, config.getMaxConnections());
+ }
+
+ @Test
+ public void testDefaultMaxConnectionsScalesWithCoresOnLargeHost() throws Exception
+ {
+ AWSClientConfig config = mapperWithRuntimeInfo(new FixedProcessorsRuntimeInfo(32))
+ .readValue("{}", AWSClientConfig.class);
+ Assertions.assertEquals(128, config.getMaxConnections());
+ }
+
+ @Test
+ public void testExplicitMaxConnectionsOverridesDefault() throws Exception
+ {
+ AWSClientConfig config = mapperWithRuntimeInfo(new FixedProcessorsRuntimeInfo(64))
+ .readValue("{\"maxConnections\": 200}", AWSClientConfig.class);
+ Assertions.assertEquals(200, config.getMaxConnections());
+ }
+
+ private static final class FixedProcessorsRuntimeInfo extends RuntimeInfo
+ {
+ private final int availableProcessors;
+
+ private FixedProcessorsRuntimeInfo(int availableProcessors)
+ {
+ this.availableProcessors = availableProcessors;
+ }
+
+ @Override
+ public int getAvailableProcessors()
+ {
+ return availableProcessors;
+ }
+ }
}
diff --git a/docs/api-reference/service-status-api.md b/docs/api-reference/service-status-api.md
index 1ba8b55c4d78..1c192162d5d0 100644
--- a/docs/api-reference/service-status-api.md
+++ b/docs/api-reference/service-status-api.md
@@ -154,36 +154,6 @@ Host: http://ROUTER_IP:ROUTER_PORT
"name": "org.apache.druid.query.aggregation.datasketches.kll.KllSketchModule",
"artifact": "druid-datasketches",
"version": "26.0.0"
- },
- {
- "name": "org.apache.druid.msq.guice.MSQExternalDataSourceModule",
- "artifact": "druid-multi-stage-query",
- "version": "26.0.0"
- },
- {
- "name": "org.apache.druid.msq.guice.MSQIndexingModule",
- "artifact": "druid-multi-stage-query",
- "version": "26.0.0"
- },
- {
- "name": "org.apache.druid.msq.guice.MSQDurableStorageModule",
- "artifact": "druid-multi-stage-query",
- "version": "26.0.0"
- },
- {
- "name": "org.apache.druid.msq.guice.MSQServiceClientModule",
- "artifact": "druid-multi-stage-query",
- "version": "26.0.0"
- },
- {
- "name": "org.apache.druid.msq.guice.MSQSqlModule",
- "artifact": "druid-multi-stage-query",
- "version": "26.0.0"
- },
- {
- "name": "org.apache.druid.msq.guice.SqlTaskModule",
- "artifact": "druid-multi-stage-query",
- "version": "26.0.0"
}
],
"memory": {
@@ -326,7 +296,7 @@ Host: http://ROUTER_IP:ROUTER_PORT
"log4j.shutdownHookEnabled": "true",
"java.vm.vendor": "Homebrew",
"sun.arch.data.model": "64",
- "druid.extensions.loadList": "[\"druid-hdfs-storage\", \"druid-kafka-indexing-service\", \"druid-datasketches\", \"druid-multi-stage-query\"]",
+ "druid.extensions.loadList": "[\"druid-hdfs-storage\", \"druid-kafka-indexing-service\", \"druid-datasketches\"]",
"java.vendor.url": "https://github.com/Homebrew/homebrew-core/issues",
"druid.router.coordinatorServiceName": "druid/coordinator",
"user.timezone": "UTC",
diff --git a/docs/api-reference/sql-ingestion-api.md b/docs/api-reference/sql-ingestion-api.md
index 59942aff8e0c..9348291581e8 100644
--- a/docs/api-reference/sql-ingestion-api.md
+++ b/docs/api-reference/sql-ingestion-api.md
@@ -26,9 +26,8 @@ import TabItem from '@theme/TabItem';
-->
:::info
- This page describes SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md)
- extension, new in Druid 24.0. Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which
- ingestion method is right for you.
+ This page describes SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md).
+ Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you.
:::
The **Query** view in the web console provides a friendly experience for the multi-stage query task engine (MSQ task engine) and multi-stage query architecture. We recommend using the web console if you don't need a programmatic interface.
@@ -847,4 +846,4 @@ The response shows the ID of the task that was canceled.
{
"task": "query-655efe33-781a-4c50-ae84-c2911b42d63c"
}
-```
\ No newline at end of file
+```
diff --git a/docs/api-reference/supervisor-api.md b/docs/api-reference/supervisor-api.md
index d321af143020..8f9c5c36dc5c 100644
--- a/docs/api-reference/supervisor-api.md
+++ b/docs/api-reference/supervisor-api.md
@@ -3539,6 +3539,109 @@ when the supervisor's tasks restart, they resume reading from `{"0": 100, "1": 1
```
+### Reset offsets to latest and start a backfill supervisor
+
+This endpoint is supported for Apache Kafka and RabbitMQ Stream supervisors. Amazon Kinesis is not supported yet.
+
+Resets the supervisor to the latest available stream offsets and starts a new bounded backfill supervisor to ingest the data in the skipped range.
+
+This endpoint is useful when a supervisor has fallen behind and you want to catch it up to the latest offsets without losing the skipped data. The main supervisor resumes ingesting from the latest offsets, while the backfill supervisor processes the range from the previously checkpointed offsets up to the latest offsets at the time of the reset.
+
+**Duplicate ingestion notice:** The main supervisor is not quiesced before the reset. This means duplicate data can occur in two ways:
+- **Backfill overlap:** Any tasks that were in-flight at the time of the reset may publish segments covering part of the backfill range before being shut down.
+- **Reset race:** If a task checkpoint is written to the metadata store between when this endpoint captures the current offsets and when it applies the reset, that checkpoint can be overwritten, causing the main supervisor to re-ingest already-processed data.
+
+Both windows are narrow in practice, but cannot be fully eliminated without manually suspending the main supervisor before calling this endpoint and waiting for all pending tasks to complete.
+
+The following requirements must be met before calling this endpoint:
+
+- The supervisor must be a [streaming supervisor](../ingestion/supervisor.md).
+- The supervisor's `useEarliestSequenceNumber` property must be `false`.
+- The supervisor context must have `useConcurrentLocks` set to `true` to allow the backfill supervisor's tasks to write concurrently with the main supervisor's tasks.
+- The supervisor must be in a `RUNNING` state.
+
+The backfill supervisor has the same configuration as the source supervisor except for its ID, which takes the form `{supervisorId}_backfill_{randomSuffix}`, and its `boundedStreamConfig`, which is set to the skipped offset range. If `backfillTaskCount` is specified, it overrides the `taskCount` for the backfill supervisor only.
+
+#### URL
+
+`POST` `/druid/indexer/v1/supervisor/{supervisorId}/resetToLatestAndBackfill`
+
+#### Query parameters
+
+| Parameter | Type | Description | Default |
+|---------|---------|---------|---------|
+| `backfillTaskCount` | Integer | Number of parallel tasks for the backfill supervisor. | Defaults to `taskCount` from the source supervisor if not specified |
+
+#### Responses
+
+
+
+
+
+
+*Successfully reset and started backfill supervisor*
+
+
+
+
+
+*Supervisor does not meet requirements (wrong type, `useEarliestSequenceNumber` is true, `useConcurrentLocks` not enabled, or supervisor not RUNNING)*
+
+
+
+
+
+*Invalid supervisor ID*
+
+
+
+
+
+*Failed to retrieve stream offsets or serialize the backfill spec*
+
+
+
+
+---
+
+#### Sample request
+
+The following example resets a supervisor named `social_media` and starts a backfill supervisor with 2 tasks.
+
+
+
+
+
+
+```shell
+curl --request POST "http://ROUTER_IP:ROUTER_PORT/druid/indexer/v1/supervisor/social_media/resetToLatestAndBackfill?backfillTaskCount=2"
+```
+
+
+
+
+
+```HTTP
+POST /druid/indexer/v1/supervisor/social_media/resetToLatestAndBackfill?backfillTaskCount=2 HTTP/1.1
+Host: http://ROUTER_IP:ROUTER_PORT
+```
+
+
+
+
+#### Sample response
+
+
+ View the response
+
+ ```json
+{
+ "id": "social_media",
+ "backfillSupervisorId": "social_media_backfill_abcdefgh"
+}
+ ```
+
+
### Terminate a supervisor
Terminates a supervisor and its associated indexing tasks, triggering the publishing of their segments. When you terminate a supervisor, Druid places a tombstone marker in the metadata store to prevent reloading on restart.
diff --git a/docs/configuration/extensions.md b/docs/configuration/extensions.md
index 6c802739fc4b..31f1a5b62b29 100644
--- a/docs/configuration/extensions.md
+++ b/docs/configuration/extensions.md
@@ -50,7 +50,6 @@ Core extensions are maintained by Druid committers.
|druid-kerberos|Kerberos authentication for druid processes.|[link](../development/extensions-core/druid-kerberos.md)|
|druid-lookups-cached-global|A module for [lookups](../querying/lookups.md) providing a jvm-global eager caching for lookups. It provides JDBC and URI implementations for fetching lookup data.|[link](../querying/lookups-cached-global.md)|
|druid-lookups-cached-single| Per lookup caching module to support the use cases where a lookup need to be isolated from the global pool of lookups |[link](../development/extensions-core/druid-lookups.md)|
-|druid-multi-stage-query| Support for the multi-stage query architecture for Apache Druid and the multi-stage query task engine.|[link](../multi-stage-query/index.md)|
|druid-orc-extensions|Support for data in Apache ORC data format.|[link](../development/extensions-core/orc.md)|
|druid-parquet-extensions|Support for data in Apache Parquet data format. Requires druid-avro-extensions to be loaded.|[link](../development/extensions-core/parquet.md)|
|druid-protobuf-extensions| Support for data in Protobuf data format.|[link](../development/extensions-core/protobuf.md)|
diff --git a/docs/configuration/index.md b/docs/configuration/index.md
index f0b80523c401..aad78964f199 100644
--- a/docs/configuration/index.md
+++ b/docs/configuration/index.md
@@ -156,24 +156,6 @@ Druid interacts with ZooKeeper through a set of standard path configurations. We
|`druid.zk.paths.base`|Base ZooKeeper path.|`/druid`|
|`druid.zk.paths.coordinatorPath`|Used by the Coordinator for leader election.|`${druid.zk.paths.base}/coordinator`|
-The indexing service also uses its own set of paths. These configs can be included in the common configuration.
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.zk.paths.indexer.base`|Base ZooKeeper path for |`${druid.zk.paths.base}/indexer`|
-|`druid.zk.paths.indexer.announcementsPath`|Middle Managers announce themselves here.|`${druid.zk.paths.indexer.base}/announcements`|
-|`druid.zk.paths.indexer.tasksPath`|Used to assign tasks to Middle Managers.|`${druid.zk.paths.indexer.base}/tasks`|
-|`druid.zk.paths.indexer.statusPath`|Parent path for announcement of task statuses.|`${druid.zk.paths.indexer.base}/status`|
-
-If `druid.zk.paths.base` and `druid.zk.paths.indexer.base` are both set, and none of the other `druid.zk.paths.*` or `druid.zk.paths.indexer.*` values are set, then the other properties will be evaluated relative to their respective `base`.
-For example, if `druid.zk.paths.base` is set to `/druid1` and `druid.zk.paths.indexer.base` is set to `/druid2` then `druid.zk.paths.coordinatorPath` will default to `/druid1/coordinator` while `druid.zk.paths.indexer.announcementsPath` will default to `/druid2/announcements`.
-
-The following path is used for service discovery. It is **not** affected by `druid.zk.paths.base` and **must** be specified separately.
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.discovery.curator.path`|Services announce themselves under this ZooKeeper path.|`/druid/discovery`|
-
### TLS
#### General configuration
@@ -724,8 +706,8 @@ These Coordinator static configurations can be defined in the `coordinator/runti
|`druid.coordinator.period`|The run period for the Coordinator. The Coordinator operates by maintaining the current state of the world in memory and periodically looking at the set of "used" segments and segments being served to make decisions about whether any changes need to be made to the data topology. This property sets the delay between each of these runs.|`PT60S`|
|`druid.coordinator.startDelay`|The operation of the Coordinator works on the assumption that it has an up-to-date view of the state of the world when it runs, the current ZooKeeper interaction code, however, is written in a way that doesn’t allow the Coordinator to know for a fact that it’s done loading the current state of the world. This delay is a hack to give it enough time to believe that it has all the data.|`PT300S`|
|`druid.coordinator.load.timeout`|The timeout duration for when the Coordinator assigns a segment to a Historical service.|`PT15M`|
-|`druid.coordinator.balancer.strategy`|The [balancing strategy](../design/coordinator.md#balancing-segments-in-a-tier) used by the Coordinator to distribute segments among the Historical servers in a tier. The `cost` strategy distributes segments by minimizing a cost function, `diskNormalized` weights these costs with the disk usage ratios of the servers and `random` distributes segments randomly.|`cost`|
-|`druid.coordinator.balancer.diskNormalized.moveCostSavingsThreshold`|Only used when `druid.coordinator.balancer.strategy` is `diskNormalized`. Minimum fractional cost reduction required before a segment is moved off a server that already holds it. A value of `0.05` requires the destination to be at least 5% cheaper than the source, which prevents oscillation between servers with similar disk utilization. Must be in `[0.0, 1.0)`; `0.0` disables the anti-oscillation discount.|`0.05`|
+|`druid.coordinator.balancer.strategy`|The [balancing strategy](../design/coordinator.md#balancing-segments-in-a-tier) used by the Coordinator to distribute segments among the Historical servers in a tier. The `cost` strategy distributes segments by minimizing a cost function, `diskNormalized` divides these costs by the projected available disk headroom of each server and `random` distributes segments randomly.|`cost`|
+|`druid.coordinator.balancer.diskNormalized.moveCostSavingsThreshold`|Only used when `druid.coordinator.balancer.strategy` is `diskNormalized`. Minimum fractional cost reduction required before a segment is moved off a server that already holds it. A value of `0.05` requires the destination to be at least 5% cheaper than the source, which prevents oscillation between servers with similar projected headroom. Must be in `[0.0, 1.0)`; `0.0` disables the anti-oscillation discount.|`0.05`|
|`druid.coordinator.loadqueuepeon.http.repeatDelay`|The start and repeat delay (in milliseconds) for the load queue peon, which manages the load/drop queue of segments for any server.|1 minute|
|`druid.coordinator.loadqueuepeon.http.batchSize`|Number of segment load/drop requests to batch in one HTTP request. Note that it must be smaller than or equal to the `druid.segmentCache.numLoadingThreads` config on Historical service. If this value is not configured, the coordinator uses the value of the `numLoadingThreads` for the respective server. | `druid.segmentCache.numLoadingThreads` |
|`druid.coordinator.asOverlord.enabled`|Boolean value for whether this Coordinator service should act like an Overlord as well. This configuration allows users to simplify a Druid cluster by not having to deploy any standalone Overlord services. If set to true, then Overlord console is available at `http://coordinator-host:port/console.html` and be sure to set `druid.coordinator.asOverlord.overlordService` also.|false|
@@ -966,7 +948,7 @@ These Overlord static configurations can be defined in the `overlord/runtime.pro
|Property|Description|Default|
|--------|-----------|-------|
-|`druid.indexer.runner.type`|Indicates whether tasks should be run locally using `local` or in a distributed environment using `remote`. The recommended option is `httpRemote`, which is similar to `remote` but uses HTTP to interact with Middle Managers instead of ZooKeeper.|`httpRemote`|
+|`druid.indexer.runner.type`|Indicates whether tasks should be run locally using `local` or in a distributed environment using `httpRemote`. `httpRemote` is recommended for distributed deployments and uses HTTP to interact with Middle Managers.|`httpRemote`|
|`druid.indexer.server.maxConcurrentActions`|Maximum number of concurrent action requests (such as getting locks, creating segments, fetching segments etc) that the Overlord will process simultaneously. This prevents thread exhaustion while preserving access to health check endpoints. Set to `0` to disable quality of service filtering entirely. If not specified, defaults to `max(1, max(serverHttpNumThreads - 4, serverHttpNumThreads * 0.8))`.|`max(1, max(serverHttpNumThreads - 4, serverHttpNumThreads * 0.8))`|
|`druid.indexer.storage.type`|Indicates whether incoming tasks should be stored locally (in heap) or in metadata storage. One of `local` or `metadata`. `local` is mainly for internal testing while `metadata` is recommended in production because storing incoming tasks in metadata storage allows for tasks to be resumed if the Overlord should fail.|`local`|
|`druid.indexer.storage.recentlyFinishedThreshold`|Duration of time to store task results. Default is 24 hours. If you have hundreds of tasks running in a day, consider increasing this threshold.|`PT24H`|
@@ -981,17 +963,14 @@ These Overlord static configurations can be defined in the `overlord/runtime.pro
|`druid.indexer.queue.storageSyncRate`|Sync Overlord state this often with an underlying task persistence mechanism.|`PT1M`|
|`druid.indexer.queue.maxTaskPayloadSize`|Maximum allowed size in bytes of a single task payload accepted by the Overlord.|none (allow all task payload sizes)|
-The following configs only apply if the Overlord is running in remote mode. For a description of local vs. remote mode, see [Overlord service](../design/overlord.md).
+The following configs apply when the Overlord is running with the `httpRemote` runner. For a description of local vs. distributed mode, see [Overlord service](../design/overlord.md).
|Property|Description|Default|
|--------|-----------|-------|
|`druid.indexer.runner.taskAssignmentTimeout`|How long to wait after a task has been assigned to a Middle Manager before throwing an error.|`PT5M`|
|`druid.indexer.runner.minWorkerVersion`|The minimum Middle Manager version to send tasks to. The version number is a string. This affects the expected behavior during certain operations like comparison against `druid.worker.version`. Specifically, the version comparison follows dictionary order. Use ISO8601 date format for the version to accommodate date comparisons. |"0"|
|`druid.indexer.runner.parallelIndexTaskSlotRatio`| The ratio of task slots available for parallel indexing supervisor tasks per worker. The specified value must be in the range `[0, 1]`. |1|
-|`druid.indexer.runner.compressZnodes`|Indicates whether or not the Overlord should expect Middle Managers to compress Znodes.|true|
-|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in ZooKeeper, should be in the range of `[10KiB, 2GiB)`. [Human-readable format](human-readable-byte.md) is supported.| 512 KiB |
-|`druid.indexer.runner.taskCleanupTimeout`|How long to wait before failing a task after a Middle Manager is disconnected from ZooKeeper.|`PT15M`|
-|`druid.indexer.runner.taskShutdownLinkTimeout`|How long to wait on a shutdown request to a Middle Manager before timing out|`PT1M`|
+|`druid.indexer.runner.taskCleanupTimeout`|How long to wait before failing a task after a Middle Manager is disconnected.|`PT15M`|
|`druid.indexer.runner.pendingTasksRunnerNumThreads`|Number of threads to allocate pending-tasks to workers, must be at least 1.|1|
|`druid.indexer.runner.maxRetriesBeforeBlacklist`|Number of consecutive times the Middle Manager can fail tasks, before the worker is blacklisted, must be at least 1|5|
|`druid.indexer.runner.workerBlackListBackoffTime`|How long to wait before a task is whitelisted again. This value should be greater that the value set for taskBlackListCleanupPeriod.|`PT15M`|
@@ -1322,12 +1301,10 @@ Middle Managers pass their configurations down to their child peons. The Middle
|Property|Description|Default|
|--------|-----------|-------|
|`druid.indexer.runner.allowedPrefixes`|Whitelist of prefixes for configs that can be passed down to child peons.|`com.metamx`, `druid`, `org.apache.druid`, `user.timezone`, `file.encoding`, `java.io.tmpdir`, `hadoop`|
-|`druid.indexer.runner.compressZnodes`|Indicates whether or not the Middle Managers should compress Znodes.|true|
|`druid.indexer.runner.classpath`|Java classpath for the peon.|`System.getProperty("java.class.path")`|
|`druid.indexer.runner.javaCommand`|Command required to execute java.|java|
|`druid.indexer.runner.javaOpts`|_DEPRECATED_ A string of -X Java options to pass to the peon's JVM. Quotable parameters or parameters with spaces are encouraged to use javaOptsArray|`''`|
|`druid.indexer.runner.javaOptsArray`|A JSON array of strings to be passed in as options to the peon's JVM. This is additive to `druid.indexer.runner.javaOpts` and is recommended for properly handling arguments which contain quotes or spaces like `["-XX:OnOutOfMemoryError=kill -9 %p"]`|`[]`|
-|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in ZooKeeper, should be in the range of [10KiB, 2GiB). [Human-readable format](human-readable-byte.md) is supported.|512KiB|
|`druid.indexer.runner.startPort`|Starting port used for Peon services, should be greater than 1023 and less than 65536.|8100|
|`druid.indexer.runner.endPort`|Ending port used for Peon services, should be greater than or equal to `druid.indexer.runner.startPort` and less than 65536.|65535|
|`druid.indexer.runner.ports`|A JSON array of integers to specify ports that used for Peon services. If provided and non-empty, ports for Peon services will be chosen from these ports. And `druid.indexer.runner.startPort/druid.indexer.runner.endPort` will be completely ignored.|`[]`|
diff --git a/docs/design/coordinator.md b/docs/design/coordinator.md
index e63a5b4c3d54..f2d735000cf9 100644
--- a/docs/design/coordinator.md
+++ b/docs/design/coordinator.md
@@ -88,7 +88,7 @@ But in a tier with several Historicals (or a low replication factor), segment re
Thus, the Coordinator constantly monitors the set of segments present on each Historical in a tier and employs one of the following strategies to identify segments that may be moved from one Historical to another to retain balance.
- `cost` (default): For a given segment in a tier, this strategy picks the server with the minimum "cost" of placing that segment. The cost is a function of the data interval of the segment and the data intervals of all the segments already present on the candidate server. In essence, this strategy tries to avoid placing segments with adjacent or overlapping data intervals on the same server. This is based on the premise that adjacent-interval segments are more likely to be used together in a query and placing them on the same server may lead to skewed CPU usages of Historicals.
-- `diskNormalized`: A derivative of the `cost` strategy that multiplies the cost of placing a segment on a server by the server's disk usage ratio (`diskUsed / maxSize`). This penalizes fuller servers and drives disk utilization to equalize across the tier, which is useful when historicals within a tier hold segments of widely varying sizes. To prevent oscillation when servers have similar utilization, a segment that is already placed on a server receives a cost discount; a move only fires when the destination saves at least `druid.coordinator.balancer.diskNormalized.moveCostSavingsThreshold` (default `0.05`, i.e. 5%) of the source's cost.
+- `diskNormalized`: A derivative of the `cost` strategy that divides the cost of placing a segment on a server by the server's projected available disk headroom. The projected usage ratio is `(diskUsed + segmentSizeIfNotAlreadyProjected) / maxSize`, so the disk-adjusted cost is `cost / max(EPSILON, 1 - projectedUsageRatio)`. This strongly penalizes servers that would be nearly full after placement and drives disk utilization to equalize across the tier, which is useful when historicals within a tier hold segments of widely varying sizes. To prevent oscillation when servers have similar headroom, a segment that is already placed on a server receives a cost discount; a move only fires when the destination saves at least `druid.coordinator.balancer.diskNormalized.moveCostSavingsThreshold` (default `0.05`, i.e. 5%) of the source's cost.
- `random`: Distributes segments randomly across servers. This is an experimental strategy and is not recommended for a production cluster.
All of the above strategies prioritize moving segments from the Historical with the least available disk space.
diff --git a/docs/design/zookeeper.md b/docs/design/zookeeper.md
index ca64e1a0d5bc..d69ba92f0a1c 100644
--- a/docs/design/zookeeper.md
+++ b/docs/design/zookeeper.md
@@ -36,9 +36,8 @@ The operations that happen over ZK are:
1. [Coordinator](../design/coordinator.md) leader election
2. [Overlord](../design/overlord.md) leader election
3. Service (node) announcement and discovery — services announce their presence so other services can find them
-4. [Overlord](../design/overlord.md) and [Middle Manager](../design/middlemanager.md) task management
-Segment loading, dropping, and discovery no longer use ZooKeeper — they are served over HTTP.
+Segment loading, segment discovery, and Overlord ↔ Middle Manager task management no longer use ZooKeeper — they are served over HTTP.
## Coordinator leader election
diff --git a/docs/development/extensions-core/k8s-jobs.md b/docs/development/extensions-core/k8s-jobs.md
index 67be33522ef1..b65a7bb496bd 100644
--- a/docs/development/extensions-core/k8s-jobs.md
+++ b/docs/development/extensions-core/k8s-jobs.md
@@ -1019,7 +1019,7 @@ To do this, set the following property.
|Property| Possible Values |Description|Default|required|
|--------|-----------------|-----------|-------|--------|
|`druid.indexer.runner.k8sAndWorker.runnerStrategy.type`| `String` (e.g., `k8s`, `worker`, `taskType`)| Defines the strategy for task runner selection. |`k8s`|No|
-|`druid.indexer.runner.k8sAndWorker.runnerStrategy.workerType`| `String` (e.g., `httpRemote`, `remote`)| Specifies the variant of the worker task runner to be utilized.|`httpRemote`|No|
+|`druid.indexer.runner.k8sAndWorker.runnerStrategy.workerType`| `String` (e.g., `httpRemote`)| Specifies the variant of the worker task runner to be utilized.|`httpRemote`|No|
| **For `taskType` runner strategy:**|||||
|`druid.indexer.runner.k8sAndWorker.runnerStrategy.taskType.default`| `String` (e.g., `k8s`, `worker`) | Specifies the default runner to use if no overrides apply. This setting ensures there is always a fallback runner available.|None|No|
|`druid.indexer.runner.k8sAndWorker.runnerStrategy.taskType.overrides`| `JsonObject`(e.g., `{"index_kafka": "worker"}`)| Defines task-specific overrides for runner types. Each entry sets a task type to a specific runner, allowing fine control. |`{}`|No|
diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index 50eaf43366dc..986d7e977975 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -24,7 +24,7 @@ sidebar_label: JSON-based batch
-->
:::info
- This page describes JSON-based batch ingestion using [ingestion specs](ingestion-spec.md). For SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md) engine, see [SQL-based ingestion](../multi-stage-query/index.md). Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you.
+ This page describes JSON-based batch ingestion using [ingestion specs](ingestion-spec.md). For SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md), see [SQL-based ingestion](../multi-stage-query/index.md). Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you.
:::
Apache Druid supports the following types of JSON-based batch indexing tasks:
diff --git a/docs/multi-stage-query/security.md b/docs/multi-stage-query/security.md
index 77acafc29f51..0a50b68d4d6f 100644
--- a/docs/multi-stage-query/security.md
+++ b/docs/multi-stage-query/security.md
@@ -23,9 +23,9 @@ sidebar_label: Security
~ under the License.
-->
-All authenticated users can use the multi-stage query task engine (MSQ task engine) through the UI and API if the
-extension is loaded. However, without additional permissions, users are not able to issue queries that read or write
-Druid datasources or external data. The permission needed depends on what the user is trying to do.
+All authenticated users can use the multi-stage query task engine (MSQ task engine) through the UI and API. However,
+without additional permissions, users are not able to issue queries that read or write Druid datasources or external
+data. The permission needed depends on what the user is trying to do.
To submit a query:
@@ -77,4 +77,3 @@ The MSQ task engine needs the following permissions for pushing, fetching, and r
- `Microsoft.Storage/storageAccounts/blobServices/containers/blobs/delete` to delete files when they're no longer needed.
-
diff --git a/docs/operations/java.md b/docs/operations/java.md
index f4a8c029db24..c6117e1f4263 100644
--- a/docs/operations/java.md
+++ b/docs/operations/java.md
@@ -85,5 +85,9 @@ added. There are many ways of doing this. Choose the one that works best for you
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \
--add-opens=java.base/java.io=ALL-UNNAMED \
--add-opens=java.base/java.lang=ALL-UNNAMED \
---add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED
+--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED \
+--add-modules=jdk.incubator.vector
```
+
+The `--add-modules=jdk.incubator.vector` flag is optional, but adding it makes the JDK's incubator Vector API available
+to Druid to support `druid.expressions.useVectorApi=true`.
diff --git a/docs/operations/web-console.md b/docs/operations/web-console.md
index ef1118ebc4ce..5d935106c3d5 100644
--- a/docs/operations/web-console.md
+++ b/docs/operations/web-console.md
@@ -65,7 +65,7 @@ You can access the [data loader](#data-loader) and [lookups view](#lookups) from
## Query
-SQL-based ingestion and the multi-stage query task engine use the **Query** view, which provides you with a UI to edit and use SQL queries. You should see this UI automatically in Druid 24.0 and later since the multi-stage query extension is loaded by default.
+SQL-based ingestion and the multi-stage query task engine use the **Query** view, which provides you with a UI to edit and use SQL queries.
The following screenshot shows a populated enhanced **Query** view along with a description of its parts:
diff --git a/docs/querying/aggregations.md b/docs/querying/aggregations.md
index c7b7d4e4efc2..3add7863c46a 100644
--- a/docs/querying/aggregations.md
+++ b/docs/querying/aggregations.md
@@ -471,7 +471,11 @@ For these reasons, we have deprecated this aggregator and recommend using the Da
### Expression aggregator
-Aggregator applicable only at query time. Aggregates results using [Druid expressions](./math-expr.md) functions to facilitate building custom functions.
+Aggregates results using [Druid expressions](./math-expr.md) functions to facilitate building custom functions.
+
+The expression aggregator can be used at query time with any intermediate type. It can also be used at ingest time, but
+only when the type of `initialValue` is a primitive numeric type (`LONG` or `DOUBLE`) and matches the type of
+`initialCombineValue`. Other intermediate types, such as strings, arrays, and complex types, are query-time only.
| Property | Description | Required |
| --- | --- | --- |
diff --git a/docs/querying/query-context-reference.md b/docs/querying/query-context-reference.md
index c485c0231c06..41bd206199e7 100644
--- a/docs/querying/query-context-reference.md
+++ b/docs/querying/query-context-reference.md
@@ -68,7 +68,7 @@ Unless otherwise noted, the following parameters apply to all query types, and t
|`useFilterCNF`|`false`| If true, Druid will attempt to convert the query filter to Conjunctive Normal Form (CNF). During query processing, columns can be pre-filtered by intersecting the bitmap indexes of all values that match the eligible filters, often greatly reducing the raw number of rows which need to be scanned. But this effect only happens for the top level filter, or individual clauses of a top level 'and' filter. As such, filters in CNF potentially have a higher chance to utilize a large amount of bitmap indexes on string columns during pre-filtering. However, this setting should be used with great caution, as it can sometimes have a negative effect on performance, and in some cases, the act of computing CNF of a filter can be expensive. We recommend hand tuning your filters to produce an optimal form if possible, or at least verifying through experimentation that using this parameter actually improves your query performance with no ill-effects.|
|`secondaryPartitionPruning`|`true`|Enable secondary partition pruning on the Broker. The Broker will always prune unnecessary segments from the input scan based on a filter on time intervals, but if the data is further partitioned with hash or range partitioning, this option will enable additional pruning based on a filter on secondary partition dimensions.|
|`debug`| `false` | Flag indicating whether to enable debugging outputs for the query. When set to false, no additional logs will be produced (logs produced will be entirely dependent on your logging level). When set to true, the following addition logs will be produced: - Log the stack trace of the exception (if any) produced by the query |
-|`setProcessingThreadNames`|`true`| Whether processing thread names will be set to `queryType_dataSource_intervals` while processing a query. This aids in interpreting thread dumps, and is on by default. Query overhead can be reduced slightly by setting this to `false`. This has a tiny effect in most scenarios, but can be meaningful in high-QPS, low-per-segment-processing-time scenarios. |
+|`setProcessingThreadNames`|`false`| Flag indicating whether processing thread names will be set to `processing_` while processing a query. Thread renaming aids in interpreting thread dumps, but has measurable thread renaming overhead when segment scans are very quick. |
|`sqlPlannerBloat`|`1000`|Calcite parameter which controls whether to merge two Project operators when inlining expressions causes complexity to increase. Implemented as a workaround to exception `There are not enough rules to produce a node with desired properties: convention=DRUID, sort=[]` thrown after rejecting the merge of two projects.|
|`cloneQueryMode`|`excludeClones`| Indicates whether clone Historicals should be queried by brokers. Clone servers are created by the `cloneServers` Coordinator dynamic configuration. Possible values are `excludeClones`, `includeClones` and `preferClones`. `excludeClones` means that clone Historicals are not queried by the broker. `preferClones` indicates that when given a choice between the clone Historical and the original Historical which is being cloned, the broker chooses the clones. Historicals which are not involved in the cloning process will still be queried. `includeClones` means that broker queries any Historical without regarding clone status. This parameter only affects native queries. MSQ does not query Historicals directly.|
|`realtimeSegmentsMode` |`include`| Controls whether realtime segments are queried. `include` queries all segments, including realtime. `exclude` skips realtime segments. `exclusive` queries only realtime segments. |
@@ -140,4 +140,3 @@ For more information, see the following topics:
- [Set query context](./query-context.md) to learn how to configure query context parameters.
- [SQL query context](sql-query-context.md) for query context parameters specific to Druid SQL.
- [SQL-based ingestion reference](../multi-stage-query/reference/#context-parameters) for context parameters used in SQL-based ingestion (MSQ).
-
diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md
index f270c1b74353..730fef78d074 100644
--- a/docs/tutorials/index.md
+++ b/docs/tutorials/index.md
@@ -67,7 +67,7 @@ The distribution directory contains `LICENSE` and `NOTICE` files and subdirector
## Start up Druid services
Start up Druid services using the automatic single-machine configuration.
-This configuration includes default settings that are appropriate for this tutorial, such as loading the `druid-multi-stage-query` extension by default so that you can use the MSQ task engine.
+This configuration includes default settings that are appropriate for this tutorial.
You can view the default settings in the configuration files located in `conf/druid/auto`.
diff --git a/docs/tutorials/tutorial-msq-convert-spec.md b/docs/tutorials/tutorial-msq-convert-spec.md
index 0d386bc06293..a8501284ca9d 100644
--- a/docs/tutorials/tutorial-msq-convert-spec.md
+++ b/docs/tutorials/tutorial-msq-convert-spec.md
@@ -25,9 +25,8 @@ description: How to convert an ingestion spec to a query for SQL-based ingestion
-->
:::info
- This page describes SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md)
- extension, new in Druid 24.0. Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which
- ingestion method is right for you.
+ This page describes SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md).
+ Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you.
:::
If you're already ingesting data with [native batch ingestion](../ingestion/native-batch.md), you can use the [web console](../operations/web-console.md) to convert the ingestion spec to a SQL query that the multi-stage query task engine can use to ingest data.
diff --git a/docs/tutorials/tutorial-msq-extern.md b/docs/tutorials/tutorial-msq-extern.md
index dcd0d5095980..1cb7aac89092 100644
--- a/docs/tutorials/tutorial-msq-extern.md
+++ b/docs/tutorials/tutorial-msq-extern.md
@@ -25,9 +25,8 @@ description: How to generate a query that references externally hosted data
-->
:::info
- This page describes SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md)
- extension, new in Druid 24.0. Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which
- ingestion method is right for you.
+ This page describes SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md).
+ Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you.
:::
This tutorial demonstrates how to generate a query that references externally hosted data using the **Connect external data** wizard.
diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java
index 84ee947c8467..4692ec0715f5 100644
--- a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java
+++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java
@@ -33,9 +33,12 @@
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.jackson.JacksonUtils;
import org.apache.druid.query.Druids;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
+import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory;
import org.apache.druid.query.aggregation.datasketches.hll.HllSketchModule;
import org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchModule;
import org.apache.druid.query.aggregation.datasketches.theta.SketchModule;
+import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.testing.embedded.EmbeddedClusterApis;
@@ -55,6 +58,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.Collectors;
@@ -107,6 +111,65 @@ public class CompactionTaskTest extends CompactionTestBase
"namespace", "continent", "country", "region", "city", "timestamp"
);
+ /**
+ * Index task identical in shape to {@link MoreResources.Task#INDEX_TASK_WITH_AGGREGATORS} but with a pair of
+ * {@link ExpressionLambdaAggregatorFactory} metrics over the {@code added} long field. Used by
+ * {@link #testCompactionWithExpressionLambdaAggregator} to verify that an expression aggregator works correctly.
+ */
+ private static final Supplier INDEX_TASK_WITH_EXPR_AGG = () ->
+ TaskBuilder
+ .ofTypeIndex()
+ .jsonInputFormat()
+ .localInputSourceWithFiles(
+ Resources.DataFile.tinyWiki1Json(),
+ Resources.DataFile.tinyWiki2Json(),
+ Resources.DataFile.tinyWiki3Json()
+ )
+ .timestampColumn("timestamp")
+ .dimensions(
+ "page",
+ "language", "tags", "user", "unpatrolled", "newPage", "robot",
+ "anonymous", "namespace", "continent", "country", "region", "city"
+ )
+ .metricAggregates(
+ new CountAggregatorFactory("ingested_events"),
+ new ExpressionLambdaAggregatorFactory(
+ "added_sum_expr",
+ Set.of("added"),
+ null,
+ "0",
+ null,
+ null,
+ false,
+ false,
+ "__acc + added",
+ null,
+ null,
+ null,
+ null,
+ TestExprMacroTable.INSTANCE
+ ),
+ new ExpressionLambdaAggregatorFactory(
+ "added_or_expr",
+ Set.of("added"),
+ null,
+ "0",
+ null,
+ null,
+ false,
+ false,
+ "bitwiseOr(\"__acc\", \"added\")",
+ null,
+ null,
+ null,
+ null,
+ TestExprMacroTable.INSTANCE
+ )
+ )
+ .dynamicPartitionWithMaxRows(3)
+ .granularitySpec("DAY", "SECOND", true)
+ .appendToExisting(false);
+
private String fullDatasourceName;
@BeforeEach
@@ -259,6 +322,33 @@ public void testCompactionWithTimestampDimension() throws Exception
loadDataAndCompact(INDEX_TASK_WITH_TIMESTAMP.get(), COMPACTION_TASK.get(), null);
}
+ @Test
+ public void testCompactionWithExpressionLambdaAggregator() throws Exception
+ {
+ try (final Closeable ignored = unloader(fullDatasourceName)) {
+ runTask(INDEX_TASK_WITH_EXPR_AGG.get());
+ verifySegmentsCount(4);
+
+ // Snapshot metric values prior to compaction.
+ final String preCompact = cluster.runSql(
+ "SELECT SUM(added_sum_expr), SUM(added_or_expr) FROM %s",
+ fullDatasourceName
+ );
+
+ // Compact 4 segments -> 2; this performs cross-segment rollup which drives RowCombiningTimeAndDimsIterator
+ // into ExpressionLambdaAggregatorFactory.makeAggregateCombiner().
+ compactData(COMPACTION_TASK.get(), null, null);
+ verifySegmentsCount(2);
+
+ // Metric values must round-trip through compaction unchanged.
+ final String postCompact = cluster.runSql(
+ "SELECT SUM(added_sum_expr), SUM(added_or_expr) FROM %s",
+ fullDatasourceName
+ );
+ Assertions.assertEquals(preCompact, postCompact);
+ }
+ }
+
private void loadDataAndCompact(
TaskBuilder.Index indexTask,
TaskBuilder.Compact compactionResource,
diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java
index 7e22d85d9cab..fa184418df52 100644
--- a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java
+++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java
@@ -292,6 +292,48 @@ public void test_boundedSupervisor_doesNotSilentlyCompleteWhenStaleOffsetExceeds
Assertions.assertEquals("UNHEALTHY_SUPERVISOR", status2.getState(), "Supervisor state should be UNHEALTHY_SUPERVISOR");
}
+ @Test
+ public void test_resetToLatestAndBackfill()
+ {
+ final String topic = IdUtils.getRandomId();
+ kafkaServer.createTopicWithPartitions(topic, 2);
+
+ // Create a streaming supervisor with concurrent locks and withUseEarliestSequenceNumber=false
+ final KafkaSupervisorSpec supervisor = createKafkaSupervisor(kafkaServer)
+ .withContext(Map.of("useConcurrentLocks", true))
+ .withIoConfig(io -> io
+ .withKafkaInputFormat(new JsonInputFormat(null, null, null, null, null))
+ .withUseEarliestSequenceNumber(false)
+ )
+ .build(dataSource, topic);
+
+ cluster.callApi().postSupervisor(supervisor);
+
+ waitForSupervisorDetailedState(supervisor.getId(), "RUNNING");
+
+ final int totalRecords = publish1kRecords(topic, false);
+ waitUntilPublishedRecordsAreIngested(totalRecords);
+
+ // Reset the main supervisor and spin up a backfill supervisor.
+ // Since all records are already ingested before the call, the backfill
+ // supervisor will complete immediately without ingesting anything.
+ final Map result = cluster.callApi().resetToLatestAndBackfill(supervisor.getId());
+ Assertions.assertEquals(supervisor.getId(), result.get("id"));
+ final String backfillSupervisorId = (String) result.get("backfillSupervisorId");
+
+ // Wait for the backfill to finish
+ waitForSupervisorToComplete(backfillSupervisorId);
+
+ // Main supervisor should still be running
+ final SupervisorStatus mainStatus = cluster.callApi().getSupervisorStatus(supervisor.getId());
+ Assertions.assertEquals("RUNNING", mainStatus.getState());
+ Assertions.assertTrue(mainStatus.isHealthy());
+
+ final SupervisorStatus backfillStatus = cluster.callApi().getSupervisorStatus(backfillSupervisorId);
+ Assertions.assertEquals("COMPLETED", backfillStatus.getState());
+ Assertions.assertTrue(backfillStatus.isHealthy());
+ }
+
private void waitForSupervisorToComplete(String supervisorId)
{
overlord.latchableEmitter().waitForEvent(
@@ -301,6 +343,15 @@ private void waitForSupervisorToComplete(String supervisorId)
);
}
+ private void waitForSupervisorDetailedState(String supervisorId, String detailedState)
+ {
+ overlord.latchableEmitter().waitForEvent(
+ event -> event.hasMetricName("supervisor/count")
+ .hasDimension(DruidMetrics.SUPERVISOR_ID, supervisorId)
+ .hasDimension("detailedState", detailedState)
+ );
+ }
+
private void waitForSupervisorToBeUnhealthy(String supervisorId)
{
overlord.latchableEmitter().waitForEvent(
diff --git a/examples/bin/run-java b/examples/bin/run-java
index 80190d0a793c..5a30cd54fbdf 100755
--- a/examples/bin/run-java
+++ b/examples/bin/run-java
@@ -43,6 +43,7 @@ then
--add-opens=java.base/java.io=ALL-UNNAMED \
--add-opens=java.base/java.lang=ALL-UNNAMED \
--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED \
+ --add-modules=jdk.incubator.vector \
"$@"
else
exec "$JAVA_BIN" "$@"
diff --git a/examples/bin/start-druid b/examples/bin/start-druid
index 81d8938adbb2..f39053ce16a5 100755
--- a/examples/bin/start-druid
+++ b/examples/bin/start-druid
@@ -31,5 +31,5 @@ elif [ -x "$(command -v python)" ]
then
exec python "$WHEREAMI/start-druid-main.py" "$@"
else
- echo "python interepreter not found"
+ echo "python interpreter not found"
fi
diff --git a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java
index 6099105b3374..04973a5272fd 100644
--- a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java
+++ b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java
@@ -322,7 +322,7 @@ protected Map getTimeLagPerPartition(Map currentOffs
}
@Override
- protected RabbitStreamDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map map)
+ public RabbitStreamDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map map)
{
return new RabbitStreamDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, map));
}
@@ -408,7 +408,7 @@ public LagStats computeLagStats()
}
@Override
- protected void updatePartitionLagFromStream()
+ public void updatePartitionLagFromStream()
{
getRecordSupplierLock().lock();
@@ -435,7 +435,7 @@ protected void updatePartitionLagFromStream()
}
@Override
- protected Map getLatestSequencesFromStream()
+ public Map getLatestSequencesFromStream()
{
return latestSequenceFromStream != null ? latestSequenceFromStream : new HashMap<>();
}
diff --git a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java
index 4a445f6f1c11..4763a949a615 100644
--- a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java
+++ b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java
@@ -30,6 +30,7 @@
import org.apache.druid.indexing.overlord.supervisor.Supervisor;
import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig;
import org.apache.druid.indexing.rabbitstream.RabbitStreamIndexTaskClientFactory;
+import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig;
import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.metrics.DruidMonitorSchedulerConfig;
@@ -155,6 +156,55 @@ protected RabbitStreamSupervisorSpec toggleSuspend(boolean suspend)
supervisorStateManagerConfig);
}
+ @Override
+ public RabbitStreamSupervisorSpec createBackfillSpec(
+ String backfillId,
+ BoundedStreamConfig boundedStreamConfig,
+ @Nullable Integer taskCount
+ )
+ {
+ RabbitStreamSupervisorIOConfig ioConfig = getSpec().getIOConfig();
+ RabbitStreamSupervisorIOConfig backfillIoConfig = new RabbitStreamSupervisorIOConfig(
+ ioConfig.getStream(),
+ ioConfig.getUri(),
+ ioConfig.getInputFormat(),
+ ioConfig.getReplicas(),
+ taskCount != null ? taskCount : ioConfig.getTaskCount(),
+ ioConfig.getTaskDuration().toPeriod(),
+ ioConfig.getConsumerProperties(),
+ ioConfig.getAutoScalerConfig(),
+ ioConfig.getPollTimeout(),
+ ioConfig.getStartDelay().toPeriod(),
+ ioConfig.getPeriod().toPeriod(),
+ ioConfig.getCompletionTimeout().toPeriod(),
+ ioConfig.isUseEarliestSequenceNumber(),
+ ioConfig.getLateMessageRejectionPeriod().isPresent() ? ioConfig.getLateMessageRejectionPeriod().get().toPeriod() : null,
+ ioConfig.getEarlyMessageRejectionPeriod().isPresent() ? ioConfig.getEarlyMessageRejectionPeriod().get().toPeriod() : null,
+ ioConfig.getLateMessageRejectionStartDateTime().isPresent() ? ioConfig.getLateMessageRejectionStartDateTime().get() : null,
+ ioConfig.getStopTaskCount(),
+ ioConfig.getServerPriorityToReplicas(),
+ boundedStreamConfig
+ );
+ return new RabbitStreamSupervisorSpec(
+ backfillId,
+ null,
+ getSpec().getDataSchema(),
+ getSpec().getTuningConfig(),
+ backfillIoConfig,
+ getContext(),
+ isSuspended(),
+ taskStorage,
+ taskMaster,
+ indexerMetadataStorageCoordinator,
+ (RabbitStreamIndexTaskClientFactory) indexTaskClientFactory,
+ mapper,
+ emitter,
+ monitorSchedulerConfig,
+ rowIngestionMetersFactory,
+ supervisorStateManagerConfig
+ );
+ }
+
@Override
public String toString()
{
diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java
index 727eb52db272..5863284cc2d9 100644
--- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java
+++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java
@@ -356,7 +356,7 @@ protected Map getTimeLagPerPartition(Map map)
+ public KafkaDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map map)
{
return new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, map));
}
@@ -548,7 +548,7 @@ private Map getTimestampPerPartitionAtCurrentOffset(S
*
*/
@Override
- protected void updatePartitionLagFromStream()
+ public void updatePartitionLagFromStream()
{
if (getIoConfig().isEmitTimeLagMetrics()) {
updatePartitionTimeAndRecordLagFromStream();
@@ -597,7 +597,7 @@ private void updateOffsetSnapshot(
}
@Override
- protected Map getLatestSequencesFromStream()
+ public Map getLatestSequencesFromStream()
{
return offsetSnapshotRef.get().getLatestOffsetsFromStream();
}
@@ -630,7 +630,7 @@ protected boolean isMultiTopic()
* Gets the offsets as stored in the metadata store. The map returned will only contain
* offsets from topic partitions that match the current supervisor config stream. This
* override is needed because in the case of multi-topic, a user could have updated the supervisor
- * config from single topic to mult-topic, where the new multi-topic pattern regex matches the
+ * config from single topic to multi-topic, where the new multi-topic pattern regex matches the
* old config single topic. Without this override, the previously stored metadata for the single
* topic would be deemed as different from the currently configure stream, and not be included in
* the offset map returned. This implementation handles these cases appropriately.
@@ -640,7 +640,7 @@ protected boolean isMultiTopic()
* updated to single topic or multi-topic depending on the supervisor config, as needed.
*/
@Override
- protected Map getOffsetsFromMetadataStorage()
+ public Map getOffsetsFromMetadataStorage()
{
final DataSourceMetadata dataSourceMetadata = retrieveDataSourceMetadata();
if (checkSourceMetadataMatch(dataSourceMetadata)) {
diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java
index b607ade1acfe..31d3e8fad691 100644
--- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java
+++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java
@@ -36,6 +36,7 @@
import org.apache.druid.indexing.overlord.supervisor.Supervisor;
import org.apache.druid.indexing.overlord.supervisor.SupervisorSpec;
import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig;
+import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig;
import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
@@ -173,6 +174,59 @@ protected KafkaSupervisorSpec toggleSuspend(boolean suspend)
);
}
+ @Override
+ public KafkaSupervisorSpec createBackfillSpec(
+ String backfillId,
+ BoundedStreamConfig boundedStreamConfig,
+ @Nullable Integer taskCount
+ )
+ {
+ KafkaSupervisorIOConfig ioConfig = getSpec().getIOConfig();
+ KafkaSupervisorIOConfig backfillIoConfig = new KafkaSupervisorIOConfig(
+ ioConfig.getTopic(),
+ ioConfig.getTopicPattern(),
+ ioConfig.getInputFormat(),
+ ioConfig.getReplicas(),
+ taskCount != null ? taskCount : ioConfig.getTaskCount(),
+ ioConfig.getTaskDuration().toPeriod(),
+ ioConfig.getConsumerProperties(),
+ ioConfig.getAutoScalerConfig(),
+ ioConfig.getLagAggregator(),
+ ioConfig.getPollTimeout(),
+ ioConfig.getStartDelay().toPeriod(),
+ ioConfig.getPeriod().toPeriod(),
+ ioConfig.isUseEarliestSequenceNumber(),
+ ioConfig.getCompletionTimeout().toPeriod(),
+ ioConfig.getLateMessageRejectionPeriod().isPresent() ? ioConfig.getLateMessageRejectionPeriod().get().toPeriod() : null,
+ ioConfig.getEarlyMessageRejectionPeriod().isPresent() ? ioConfig.getEarlyMessageRejectionPeriod().get().toPeriod() : null,
+ ioConfig.getLateMessageRejectionStartDateTime().isPresent() ? ioConfig.getLateMessageRejectionStartDateTime().get() : null,
+ ioConfig.getConfigOverrides(),
+ ioConfig.getIdleConfig(),
+ ioConfig.getStopTaskCount(),
+ ioConfig.isEmitTimeLagMetrics(),
+ ioConfig.getServerPriorityToReplicas(),
+ boundedStreamConfig
+ );
+ return new KafkaSupervisorSpec(
+ backfillId,
+ null,
+ getSpec().getDataSchema(),
+ getSpec().getTuningConfig(),
+ backfillIoConfig,
+ getContext(),
+ isSuspended(),
+ taskStorage,
+ taskMaster,
+ indexerMetadataStorageCoordinator,
+ (KafkaIndexTaskClientFactory) indexTaskClientFactory,
+ mapper,
+ emitter,
+ monitorSchedulerConfig,
+ rowIngestionMetersFactory,
+ supervisorStateManagerConfig
+ );
+ }
+
/**
* Extends {@link SeekableStreamSupervisorSpec#validateSpecUpdateTo} to ensure that the proposed spec and current spec are either both multi-topic or both single-topic.
*
diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java
index 8879ff6d9753..06ca9b64ced5 100644
--- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java
+++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java
@@ -32,6 +32,7 @@
import org.apache.druid.indexing.overlord.TaskStorage;
import org.apache.druid.indexing.overlord.supervisor.SupervisorSpec;
import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig;
+import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig;
import org.apache.druid.indexing.seekablestream.supervisor.LagAggregator;
import org.apache.druid.indexing.seekablestream.supervisor.autoscaler.CostBasedAutoScalerConfig;
import org.apache.druid.jackson.DefaultObjectMapper;
@@ -564,6 +565,38 @@ public void test_validateSpecUpdateTo()
sourceSpec.validateSpecUpdateTo(validDestSpec);
}
+ @Test
+ public void testCreateBackfillSpec()
+ {
+ KafkaSupervisorSpec spec = new KafkaSupervisorSpecBuilder()
+ .withDataSchema(
+ schema -> schema
+ .withTimestamp(TimestampSpec.DEFAULT)
+ .withAggregators(new CountAggregatorFactory("rows"))
+ .withGranularity(new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null))
+ )
+ .withIoConfig(
+ ioConfig -> ioConfig
+ .withJsonInputFormat()
+ .withConsumerProperties(Map.of("bootstrap.servers", "localhost:9092"))
+ .withTaskCount(3)
+ )
+ .build("testDs", "metrics");
+
+ BoundedStreamConfig boundedStreamConfig = new BoundedStreamConfig(
+ Map.of("0", 100L, "1", 200L),
+ Map.of("0", 500L, "1", 600L)
+ );
+
+ KafkaSupervisorSpec backfill = (KafkaSupervisorSpec) spec.createBackfillSpec("backfill-id", boundedStreamConfig, 2);
+
+ Assert.assertEquals("backfill-id", backfill.getId());
+ Assert.assertEquals("testDs", backfill.getSpec().getDataSchema().getDataSource());
+ Assert.assertEquals("metrics", backfill.getSpec().getIOConfig().getTopic());
+ Assert.assertEquals(2, backfill.getSpec().getIOConfig().getTaskCount());
+ Assert.assertEquals(boundedStreamConfig, backfill.getSpec().getIOConfig().getBoundedStreamConfig());
+ }
+
private KafkaSupervisorSpec getSpec(String topic, String topicPattern)
{
KafkaSupervisorSpecBuilder builder = new KafkaSupervisorSpecBuilder()
diff --git a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java
index 0f91fc0965db..3f1f4034f3ce 100644
--- a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java
+++ b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java
@@ -321,7 +321,7 @@ protected Map getTimeLagPerPartition(Map currentOf
}
@Override
- protected SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset(
+ public SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset(
String stream,
Map map
)
@@ -336,7 +336,7 @@ protected OrderedSequenceNumber makeSequenceNumber(String seq, boolean i
}
@Override
- protected void updatePartitionLagFromStream()
+ public void updatePartitionLagFromStream()
{
KinesisRecordSupplier supplier = (KinesisRecordSupplier) recordSupplier;
// this recordSupplier method is thread safe, so does not need to acquire the recordSupplierLock
diff --git a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java
index 8e6615716809..4899337797bf 100644
--- a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java
+++ b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java
@@ -35,6 +35,7 @@
import org.apache.druid.indexing.overlord.TaskStorage;
import org.apache.druid.indexing.overlord.supervisor.Supervisor;
import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig;
+import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig;
import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.metrics.DruidMonitorSchedulerConfig;
@@ -193,4 +194,57 @@ protected KinesisSupervisorSpec toggleSuspend(boolean suspend)
supervisorStateManagerConfig
);
}
+
+ @Override
+ public KinesisSupervisorSpec createBackfillSpec(
+ String backfillId,
+ BoundedStreamConfig boundedStreamConfig,
+ @Nullable Integer taskCount
+ )
+ {
+ KinesisSupervisorIOConfig ioConfig = getSpec().getIOConfig();
+ KinesisSupervisorIOConfig backfillIoConfig = new KinesisSupervisorIOConfig(
+ ioConfig.getStream(),
+ ioConfig.getInputFormat(),
+ ioConfig.getEndpoint(),
+ null,
+ ioConfig.getReplicas(),
+ taskCount != null ? taskCount : ioConfig.getTaskCount(),
+ ioConfig.getTaskDuration().toPeriod(),
+ ioConfig.getStartDelay().toPeriod(),
+ ioConfig.getPeriod().toPeriod(),
+ ioConfig.isUseEarliestSequenceNumber(),
+ ioConfig.getCompletionTimeout().toPeriod(),
+ ioConfig.getLateMessageRejectionPeriod().isPresent() ? ioConfig.getLateMessageRejectionPeriod().get().toPeriod() : null,
+ ioConfig.getEarlyMessageRejectionPeriod().isPresent() ? ioConfig.getEarlyMessageRejectionPeriod().get().toPeriod() : null,
+ ioConfig.getLateMessageRejectionStartDateTime().isPresent() ? ioConfig.getLateMessageRejectionStartDateTime().get() : null,
+ ioConfig.getRecordsPerFetch(),
+ ioConfig.getFetchDelayMillis(),
+ ioConfig.getAwsAssumedRoleArn(),
+ ioConfig.getAwsExternalId(),
+ ioConfig.getAutoScalerConfig(),
+ ioConfig.isDeaggregate(),
+ ioConfig.getServerPriorityToReplicas(),
+ boundedStreamConfig
+ );
+ return new KinesisSupervisorSpec(
+ backfillId,
+ null,
+ getSpec().getDataSchema(),
+ getSpec().getTuningConfig(),
+ backfillIoConfig,
+ getContext(),
+ isSuspended(),
+ taskStorage,
+ taskMaster,
+ indexerMetadataStorageCoordinator,
+ (KinesisIndexTaskClientFactory) indexTaskClientFactory,
+ mapper,
+ emitter,
+ monitorSchedulerConfig,
+ rowIngestionMetersFactory,
+ awsCredentialsConfig,
+ supervisorStateManagerConfig
+ );
+ }
}
diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskSerdeTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskSerdeTest.java
index 3089a39537cd..5173dfae9ffe 100644
--- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskSerdeTest.java
+++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskSerdeTest.java
@@ -35,7 +35,6 @@
import org.apache.druid.segment.incremental.RowIngestionMetersFactory;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.realtime.ChatHandlerProvider;
-import org.apache.druid.segment.realtime.NoopChatHandlerProvider;
import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
import org.apache.druid.server.security.Action;
import org.apache.druid.server.security.Resource;
@@ -160,7 +159,7 @@ private static ObjectMapper createObjectMapper()
binder.bindConstant().annotatedWith(Names.named("serviceName")).to("test");
binder.bindConstant().annotatedWith(Names.named("servicePort")).to(8000);
binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(9000);
- binder.bind(ChatHandlerProvider.class).toInstance(new NoopChatHandlerProvider());
+ binder.bind(ChatHandlerProvider.class).toInstance(new ChatHandlerProvider());
binder.bind(RowIngestionMetersFactory.class).toInstance(new DropwizardRowIngestionMetersFactory());
binder.bind(AppenderatorsManager.class).toInstance(new TestAppenderatorsManager());
}
diff --git a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java
index 89311981b0e2..9ec20045361c 100644
--- a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java
+++ b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java
@@ -23,7 +23,6 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.commons.lang3.ObjectUtils;
-import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory;
import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunnerFactory;
import javax.annotation.Nullable;
@@ -51,11 +50,9 @@ public KubernetesAndWorkerTaskRunnerConfig(
this.runnerStrategy = ObjectUtils.getIfNull(runnerStrategy, KubernetesTaskRunnerFactory.TYPE_NAME);
this.workerType = ObjectUtils.getIfNull(workerType, HttpRemoteTaskRunnerFactory.TYPE_NAME);
Preconditions.checkArgument(
- this.workerType.equals(HttpRemoteTaskRunnerFactory.TYPE_NAME) ||
- this.workerType.equals(RemoteTaskRunnerFactory.TYPE_NAME),
- "workerType must be set to one of (%s, %s)",
- HttpRemoteTaskRunnerFactory.TYPE_NAME,
- RemoteTaskRunnerFactory.TYPE_NAME
+ this.workerType.equals(HttpRemoteTaskRunnerFactory.TYPE_NAME),
+ "workerType must be set to [%s]; the ZooKeeper-based 'remote' worker type has been removed.",
+ HttpRemoteTaskRunnerFactory.TYPE_NAME
);
}
diff --git a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java
index 6e82bb8766ff..b45aa6fb846d 100644
--- a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java
+++ b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java
@@ -44,7 +44,6 @@
import org.apache.druid.guice.annotations.Self;
import org.apache.druid.guice.annotations.Smile;
import org.apache.druid.indexing.common.config.TaskConfig;
-import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory;
import org.apache.druid.indexing.overlord.TaskRunnerFactory;
import org.apache.druid.indexing.overlord.WorkerTaskRunner;
import org.apache.druid.indexing.overlord.config.TaskQueueConfig;
@@ -264,10 +263,10 @@ public void stop()
}
/**
- * Provides a TaskRunnerFactory instance suitable for environments without Zookeeper.
- * In such environments, the standard RemoteTaskRunnerFactory may not be operational.
- * Depending on the workerType defined in KubernetesAndWorkerTaskRunnerConfig,
- * this method selects and returns an appropriate TaskRunnerFactory implementation.
+ * Provides the worker-side {@link TaskRunnerFactory} that the {@code k8sAndWorker} runner pairs
+ * with {@link KubernetesTaskRunnerFactory}. Only {@link HttpRemoteTaskRunnerFactory} is
+ * supported; the ZooKeeper-based 'remote' worker type was removed, and
+ * {@link KubernetesAndWorkerTaskRunnerConfig} enforces this at config-validation time.
*/
@Provides
@LazySingleton
@@ -277,10 +276,8 @@ TaskRunnerFactory extends WorkerTaskRunner> provideWorkerTaskRunner(
Injector injector
)
{
- String workerType = runnerConfig.getWorkerType();
- return HttpRemoteTaskRunnerFactory.TYPE_NAME.equals(workerType)
- ? injector.getInstance(HttpRemoteTaskRunnerFactory.class)
- : injector.getInstance(RemoteTaskRunnerFactory.class);
+ // workerType is validated to be HttpRemoteTaskRunnerFactory.TYPE_NAME by the config.
+ return injector.getInstance(HttpRemoteTaskRunnerFactory.class);
}
/**
diff --git a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java
index 329a1ea52bce..5338ad2ebb9b 100644
--- a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java
+++ b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java
@@ -38,7 +38,7 @@ public void test_deserializable() throws IOException
);
Assertions.assertEquals("worker", config.getRunnerStrategy());
- Assertions.assertEquals("remote", config.getWorkerType());
+ Assertions.assertEquals("httpRemote", config.getWorkerType());
}
@Test
diff --git a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java
index e37313ebb0fb..55e5103567b6 100644
--- a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java
+++ b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java
@@ -35,7 +35,6 @@
import org.apache.druid.guice.annotations.EscalatedGlobal;
import org.apache.druid.guice.annotations.Self;
import org.apache.druid.indexing.common.config.TaskConfig;
-import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory;
import org.apache.druid.indexing.overlord.TaskRunnerFactory;
import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunnerFactory;
import org.apache.druid.jackson.JacksonModule;
@@ -77,8 +76,6 @@ public class KubernetesOverlordModuleTest
@Mock
private HttpClient httpClient;
@Mock
- private RemoteTaskRunnerFactory remoteTaskRunnerFactory;
- @Mock
private HttpRemoteTaskRunnerFactory httpRemoteTaskRunnerFactory;
@Mock
private ConfigManagerConfig configManagerConfig;
@@ -111,7 +108,7 @@ public void setUpConfigManagerMock()
@Test
public void testDefaultHttpRemoteTaskRunnerFactoryBindSuccessfully()
{
- injector = makeInjectorWithProperties(initializePropertes(false), false, true);
+ injector = makeInjectorWithProperties(initializePropertes(), true);
KubernetesAndWorkerTaskRunnerFactory taskRunnerFactory = injector.getInstance(
KubernetesAndWorkerTaskRunnerFactory.class);
Assertions.assertNotNull(taskRunnerFactory);
@@ -122,32 +119,21 @@ public void testDefaultHttpRemoteTaskRunnerFactoryBindSuccessfully()
@Test
public void testMultipleKubernetesTaskRunnerFactoryBindSuccessfully()
{
- final Properties props = initializePropertes(false);
+ final Properties props = initializePropertes();
props.setProperty("druid.indexer.runner.type", MultipleKubernetesTaskRunnerFactory.TYPE_NAME);
props.setProperty("druid.indexer.runner.clusters[0].taskNamespace", "NAMESPACE");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
final TaskRunnerFactory> taskRunnerFactory = injector.getInstance(TaskRunnerFactory.class);
Assertions.assertInstanceOf(MultipleKubernetesTaskRunnerFactory.class, taskRunnerFactory);
}
- @Test
- public void testRemoteTaskRunnerFactoryBindSuccessfully()
- {
- injector = makeInjectorWithProperties(initializePropertes(true), true, false);
- KubernetesAndWorkerTaskRunnerFactory taskRunnerFactory = injector.getInstance(
- KubernetesAndWorkerTaskRunnerFactory.class);
- Assertions.assertNotNull(taskRunnerFactory);
-
- Assertions.assertNotNull(taskRunnerFactory.build());
- }
-
@Test
public void testExceptionThrownIfNoTaskRunnerFactoryBind()
{
Assertions.assertThrows(ProvisionException.class, () -> {
- injector = makeInjectorWithProperties(initializePropertes(false), false, false);
+ injector = makeInjectorWithProperties(initializePropertes(), false);
injector.getInstance(KubernetesAndWorkerTaskRunnerFactory.class);
});
}
@@ -159,7 +145,7 @@ public void test_build_withMultiContainerAdapterType_returnsWithMultiContainerTa
props.setProperty("druid.indexer.runner.k8s.adapter.type", "overlordMultiContainer");
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
TaskAdapter taskAdapter = injector.getInstance(
TaskAdapter.class);
@@ -173,7 +159,7 @@ public void test_build_withSingleContainerAdapterType_returnsKubernetesTaskRunne
Properties props = new Properties();
props.setProperty("druid.indexer.runner.k8s.adapter.type", "overlordSingleContainer");
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
TaskAdapter taskAdapter = injector.getInstance(
TaskAdapter.class);
@@ -188,7 +174,7 @@ public void test_build_withSingleContainerAdapterTypeAndSidecarSupport_throwsPro
props.setProperty("druid.indexer.runner.k8s.adapter.type", "overlordSingleContainer");
props.setProperty("druid.indexer.runner.sidecarSupport", "true");
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
Assertions.assertThrows(
ProvisionException.class,
@@ -203,7 +189,7 @@ public void test_build_withSidecarSupport_returnsKubernetesTaskRunnerWithMultiCo
Properties props = new Properties();
props.setProperty("druid.indexer.runner.sidecarSupport", "true");
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
TaskAdapter adapter = injector.getInstance(TaskAdapter.class);
@@ -218,7 +204,7 @@ public void test_build_withoutSidecarSupport_returnsKubernetesTaskRunnerWithSing
Properties props = new Properties();
props.setProperty("druid.indexer.runner.sidecarSupport", "false");
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
TaskAdapter adapter = injector.getInstance(TaskAdapter.class);
@@ -235,7 +221,7 @@ public void test_build_withPodTemplateAdapterType_returnsKubernetesTaskRunnerWit
props.setProperty("druid.indexer.runner.k8s.adapter.type", "customTemplateAdapter");
props.setProperty("druid.indexer.runner.k8s.podTemplate.base", url.getPath());
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
TaskAdapter adapter = injector.getInstance(TaskAdapter.class);
@@ -251,7 +237,7 @@ public void test_httpClientFactory_defaultsToVertx()
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
// Don't set httpClientType - should default to vertx
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class);
Assertions.assertNotNull(factory);
@@ -266,7 +252,7 @@ public void test_httpClientFactory_okhttpSelection()
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "okhttp");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class);
Assertions.assertNotNull(factory);
@@ -281,7 +267,7 @@ public void test_httpClientFactory_vertxExplicitSelection()
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "vertx");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class);
Assertions.assertNotNull(factory);
@@ -296,7 +282,7 @@ public void test_httpClientFactory_jdkSelection()
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "javaStandardHttp");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class);
Assertions.assertNotNull(factory);
@@ -312,7 +298,7 @@ public void test_httpClientFactory_invalidTypeThrowsException()
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "invalid");
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
injector.getInstance(DruidKubernetesHttpClientFactory.class);
});
}
@@ -324,7 +310,7 @@ public void test_druidKubernetesClient_createdWithVertxClient()
props.setProperty("druid.indexer.runner.namespace", "NAMESPACE");
// Don't set httpClientType - should default to vertx
- injector = makeInjectorWithProperties(props, false, true);
+ injector = makeInjectorWithProperties(props, true);
DruidKubernetesClient client = injector.getInstance(DruidKubernetesClient.class);
Assertions.assertNotNull(client, "DruidKubernetesClient should be created successfully");
@@ -333,8 +319,7 @@ public void test_druidKubernetesClient_createdWithVertxClient()
private Injector makeInjectorWithProperties(
final Properties props,
- boolean isWorkerTypeRemote,
- boolean isWorkerTypeHttpRemote
+ boolean bindHttpRemoteTaskRunnerFactory
)
{
return Guice.createInjector(
@@ -350,10 +335,7 @@ private Injector makeInjectorWithProperties(
binder.bind(DruidNode.class)
.annotatedWith(Self.class)
.toInstance(new DruidNode("test-inject", null, false, null, null, true, false));
- if (isWorkerTypeRemote) {
- binder.bind(RemoteTaskRunnerFactory.class).toInstance(remoteTaskRunnerFactory);
- }
- if (isWorkerTypeHttpRemote) {
+ if (bindHttpRemoteTaskRunnerFactory) {
binder.bind(HttpRemoteTaskRunnerFactory.class).toInstance(httpRemoteTaskRunnerFactory);
}
binder.bind(
@@ -374,14 +356,11 @@ private Injector makeInjectorWithProperties(
));
}
- private static Properties initializePropertes(boolean isWorkerTypeRemote)
+ private static Properties initializePropertes()
{
final Properties props = new Properties();
props.put("druid.indexer.runner.namespace", "NAMESPACE");
props.put("druid.indexer.runner.k8sAndWorker.runnerStrategy.type", "k8s");
- if (isWorkerTypeRemote) {
- props.put("druid.indexer.runner.k8sAndWorker.runnerStrategy.workerType", "remote");
- }
return props;
}
}
diff --git a/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json b/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json
index 43e7414f11f8..de09ff0ee209 100644
--- a/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json
+++ b/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json
@@ -1,4 +1,4 @@
{
"runnerStrategy.type": "worker",
- "runnerStrategy.workerType": "remote"
-}
\ No newline at end of file
+ "runnerStrategy.workerType": "httpRemote"
+}
diff --git a/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java b/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java
index 7474a79a15eb..96bfc03214dc 100644
--- a/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java
+++ b/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java
@@ -36,12 +36,14 @@
import java.io.File;
import java.io.IOException;
import java.util.Objects;
+import java.util.concurrent.atomic.AtomicBoolean;
public class OrcInputFormat extends NestedInputFormat
{
static final long SCALE_FACTOR = 8L;
private final boolean binaryAsString;
private final Configuration conf;
+ private final AtomicBoolean fileSystemInitialized = new AtomicBoolean(false);
@JsonCreator
public OrcInputFormat(
@@ -55,19 +57,20 @@ public OrcInputFormat(
this.conf = conf;
}
- private void initialize(Configuration conf)
+ // Init FileSystem once under this class's classloader to avoid concurrent setContextClassLoader races.
+ private void ensureFileSystemInitialized()
{
- //Initializing seperately since during eager initialization, resolving
- //namenode hostname throws an error if nodes are ephemeral
-
- // Ensure that FileSystem class level initialization happens with correct CL
- // See https://github.com/apache/druid/issues/1714
- ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader();
+ if (!fileSystemInitialized.compareAndSet(false, true)) {
+ return;
+ }
+ final ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
FileSystem.get(conf);
}
catch (IOException ex) {
+ // Reset so a subsequent createReader can retry init instead of skipping it.
+ fileSystemInitialized.set(false);
throw new RuntimeException(ex);
}
finally {
@@ -91,7 +94,7 @@ public boolean getBinaryAsString()
@Override
public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity source, File temporaryDirectory)
{
- initialize(conf);
+ ensureFileSystemInitialized();
return new OrcReader(conf, inputRowSchema, source, temporaryDirectory, getFlattenSpec(), binaryAsString);
}
diff --git a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java
index a7f6e5131c3a..555d1de2c998 100644
--- a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java
+++ b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java
@@ -73,6 +73,7 @@ public void testEquals()
{
EqualsVerifier.forClass(OrcInputFormat.class)
.withPrefabValues(Configuration.class, new Configuration(), new Configuration())
+ .withIgnoredFields("fileSystemInitialized")
.usingGetClass()
.verify();
}
diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java
index 2cac95a200b1..82412fe412c9 100644
--- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java
+++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java
@@ -417,9 +417,9 @@ static void uploadFileIfPossible(
/**
* Determines whether to use HTTP or HTTPS protocol based on configuration.
*/
- public static boolean useHttps(AWSClientConfig clientConfig, AWSEndpointConfig endpointConfig)
+ public static boolean useHttps(@Nullable AWSClientConfig clientConfig, AWSEndpointConfig endpointConfig)
{
- String protocol = clientConfig.getProtocol();
+ final String protocol = clientConfig == null ? null : clientConfig.getProtocol();
final String endpointUrl = endpointConfig.getUrl();
if (org.apache.commons.lang3.StringUtils.isNotEmpty(endpointUrl)) {
diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java
index 168216affdca..e9291c739249 100644
--- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java
+++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java
@@ -404,6 +404,39 @@ public void testSerdeWithCloudConfigPropertiesWithSessionToken() throws Exceptio
EasyMock.verify(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER);
}
+ @Test
+ public void testSchemelessEndpointConfigUrlWithNullClientConfigResolvesSupplier() throws Exception
+ {
+ EasyMock.reset(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER);
+ EasyMock.expect(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER.getS3StorageConfig())
+ .andStubReturn(S3_STORAGE_CONFIG);
+ EasyMock.replay(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER);
+
+ final AWSEndpointConfig schemelessEndpoint = MAPPER.readValue(
+ "{\"url\":\"s3.example.com\",\"signingRegion\":\"us-east-1\"}",
+ AWSEndpointConfig.class
+ );
+
+ final S3InputSource inputSource = new S3InputSource(
+ SERVICE,
+ SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER,
+ INPUT_DATA_CONFIG,
+ null,
+ null,
+ EXPECTED_LOCATION,
+ null,
+ CLOUD_CONFIG_PROPERTIES,
+ null,
+ schemelessEndpoint,
+ null
+ );
+
+ // Forces s3ClientSupplier evaluation, which hits S3Utils.useHttps and confirms a null client config does not blow up.
+ inputSource.createEntity(new CloudObjectLocation("bucket", "path"));
+
+ EasyMock.verify(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER);
+ }
+
@Test
public void testGetSetSessionToken()
{
diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageDruidModuleTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageDruidModuleTest.java
index 3932c147695b..5d4fc4f188f2 100644
--- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageDruidModuleTest.java
+++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageDruidModuleTest.java
@@ -19,11 +19,12 @@
package org.apache.druid.storage.s3;
-import com.google.common.collect.ImmutableList;
+import com.google.inject.Guice;
import com.google.inject.Injector;
import org.apache.druid.common.aws.AWSModule;
-import org.apache.druid.guice.GuiceInjectors;
+import org.apache.druid.guice.DruidSecondaryModule;
import org.apache.druid.guice.ServerModule;
+import org.apache.druid.guice.StartupInjectorBuilder;
import org.apache.druid.segment.loading.OmniDataSegmentArchiver;
import org.apache.druid.segment.loading.OmniDataSegmentKiller;
import org.apache.druid.segment.loading.OmniDataSegmentMover;
@@ -70,12 +71,12 @@ public void testSegmentMoverBoundSingleton()
private static Injector createInjector()
{
- return GuiceInjectors.makeStartupInjectorWithModules(
- ImmutableList.of(
- new AWSModule(),
- new S3StorageDruidModule(),
- new ServerModule()
- )
+ final Injector startupInjector = new StartupInjectorBuilder().forServer().build();
+ return Guice.createInjector(
+ startupInjector.getInstance(DruidSecondaryModule.class),
+ new AWSModule(),
+ new S3StorageDruidModule(),
+ new ServerModule()
);
}
}
diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java
index 6c46df7d993a..16b8c20d0f2e 100644
--- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java
+++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java
@@ -19,6 +19,9 @@
package org.apache.druid.storage.s3;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.common.aws.AWSClientConfig;
+import org.apache.druid.common.aws.AWSEndpointConfig;
import org.easymock.Capture;
import org.easymock.CaptureType;
import org.easymock.EasyMock;
@@ -382,4 +385,42 @@ public void testRetryWithS3MultiObjectDeleteException() throws Exception
);
Assert.assertEquals(maxRetries, count.get());
}
+
+ private static final ObjectMapper JSON = new ObjectMapper();
+
+ private static AWSEndpointConfig endpointWith(String json) throws IOException
+ {
+ return JSON.readValue(json, AWSEndpointConfig.class);
+ }
+
+ @Test
+ public void testUseHttpsNullClientConfigSchemelessEndpointReturnsTrue() throws IOException
+ {
+ Assert.assertTrue(S3Utils.useHttps(null, endpointWith("{\"url\":\"s3.example.com\"}")));
+ }
+
+ @Test
+ public void testUseHttpsNullClientConfigHttpEndpointReturnsFalse() throws IOException
+ {
+ Assert.assertFalse(S3Utils.useHttps(null, endpointWith("{\"url\":\"http://s3.example.com\"}")));
+ }
+
+ @Test
+ public void testUseHttpsNullClientConfigHttpsEndpointReturnsTrue() throws IOException
+ {
+ Assert.assertTrue(S3Utils.useHttps(null, endpointWith("{\"url\":\"https://s3.example.com\"}")));
+ }
+
+ @Test
+ public void testUseHttpsNullClientConfigNullEndpointUrlReturnsTrue() throws IOException
+ {
+ Assert.assertTrue(S3Utils.useHttps(null, new AWSEndpointConfig()));
+ }
+
+ @Test
+ public void testUseHttpsDefaultClientConfigSchemelessEndpointReturnsTrue() throws IOException
+ {
+ // Sanity check: default AWSClientConfig protocol is "https"; schemeless URL inherits "https".
+ Assert.assertTrue(S3Utils.useHttps(new AWSClientConfig(), endpointWith("{\"url\":\"s3.example.com\"}")));
+ }
}
diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml
index 42e117203649..e6cc7d787c10 100644
--- a/indexing-service/pom.xml
+++ b/indexing-service/pom.xml
@@ -92,14 +92,6 @@
io.nettynetty
-
- org.apache.zookeeper
- zookeeper
-
-
- org.apache.zookeeper
- zookeeper-jute
- com.fasterxml.jackson.corejackson-core
@@ -112,10 +104,6 @@
com.google.guavaguava
-
- org.apache.curator
- curator-recipes
- jakarta.validationjakarta.validation-api
diff --git a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java
index cc3732439d8a..da60043c6780 100644
--- a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java
+++ b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java
@@ -22,8 +22,6 @@
import com.google.inject.Binder;
import org.apache.druid.indexing.overlord.config.ForkingTaskRunnerConfig;
import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig;
-import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
-import org.apache.druid.server.initialization.IndexerZkConfig;
/**
*/
@@ -34,8 +32,6 @@ public class IndexingServiceModuleHelper
public static void configureTaskRunnerConfigs(Binder binder)
{
JsonConfigProvider.bind(binder, INDEXER_RUNNER_PROPERTY_PREFIX, ForkingTaskRunnerConfig.class);
- JsonConfigProvider.bind(binder, INDEXER_RUNNER_PROPERTY_PREFIX, RemoteTaskRunnerConfig.class);
JsonConfigProvider.bind(binder, INDEXER_RUNNER_PROPERTY_PREFIX, HttpRemoteTaskRunnerConfig.class);
- JsonConfigProvider.bind(binder, "druid.zk.paths.indexer", IndexerZkConfig.class);
}
}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
index eb3b5c0e84f8..b29899a77cf5 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
@@ -435,7 +435,7 @@ public TaskStatus runTask(final TaskToolbox toolbox)
// ParallelIndexSupervisorTask because it doesn't support APIs for live ingestion reports.
log.warn("Chat handler is already registered. Skipping chat handler registration.");
} else {
- toolbox.getChatHandlerProvider().register(getId(), this, false);
+ toolbox.getChatHandlerProvider().register(getId(), this);
}
this.authorizerMapper = toolbox.getAuthorizerMapper();
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
index 44147e242955..9025c2ce8a6a 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
@@ -522,7 +522,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception
Preconditions.checkNotNull(toolbox.getChatHandlerProvider(), "chatHandlerProvider").getClass().getName()
);
authorizerMapper = toolbox.getAuthorizerMapper();
- toolbox.getChatHandlerProvider().register(getId(), this, false);
+ toolbox.getChatHandlerProvider().register(getId(), this);
// the lineage-based segment allocation protocol must be used as the legacy protocol has a critical bug
// (see SinglePhaseParallelIndexTaskRunner.allocateNewSegment()). However, we tell subtasks to use
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java
index f706aafe39dd..53a480b66cc2 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java
@@ -242,7 +242,7 @@ public TaskStatus runTask(final TaskToolbox toolbox) throws Exception
}
this.authorizerMapper = toolbox.getAuthorizerMapper();
- toolbox.getChatHandlerProvider().register(getId(), this, false);
+ toolbox.getChatHandlerProvider().register(getId(), this);
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java
index 350f9b08cf91..4dac9ea81c1a 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java
@@ -23,9 +23,7 @@
import com.google.common.base.Optional;
import com.google.inject.Inject;
import org.apache.druid.client.indexing.IndexingService;
-import org.apache.druid.curator.discovery.ServiceAnnouncer;
import org.apache.druid.discovery.DruidLeaderSelector;
-import org.apache.druid.guice.annotations.Self;
import org.apache.druid.indexing.common.actions.SegmentAllocationQueue;
import org.apache.druid.indexing.common.actions.TaskActionClientFactory;
import org.apache.druid.indexing.common.task.TaskContextEnricher;
@@ -42,7 +40,6 @@
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.metadata.segment.cache.SegmentMetadataCache;
-import org.apache.druid.server.DruidNode;
import org.apache.druid.server.coordinator.CoordinatorOverlordServiceConfig;
import java.util.concurrent.atomic.AtomicReference;
@@ -67,10 +64,9 @@ public class DruidOverlord
private final AtomicReference leaderLifecycleRef = new AtomicReference<>(null);
/**
- * Indicates that all services have been started and the node can now announce
- * itself with {@link ServiceAnnouncer#announce}. This must be set to false
- * as soon as {@link DruidLeaderSelector.Listener#stopBeingLeader()} is
- * called.
+ * Indicates that all services have been started and the node is ready to serve
+ * leader-only HTTP routes. This must be set to false as soon as
+ * {@link DruidLeaderSelector.Listener#stopBeingLeader()} is called.
*/
private volatile boolean initialized;
@@ -83,9 +79,7 @@ public DruidOverlord(
final GlobalTaskLockbox taskLockbox,
final TaskStorage taskStorage,
final TaskActionClientFactory taskActionClientFactory,
- @Self final DruidNode selfNode,
final TaskRunnerFactory runnerFactory,
- final ServiceAnnouncer serviceAnnouncer,
final CoordinatorOverlordServiceConfig coordinatorOverlordServiceConfig,
final ServiceEmitter emitter,
final SupervisorManager supervisorManager,
@@ -103,9 +97,6 @@ public DruidOverlord(
this.segmentMetadataCache = segmentMetadataCache;
this.coordinatorOverlordServiceConfig = coordinatorOverlordServiceConfig;
- final DruidNode node = coordinatorOverlordServiceConfig.getOverlordService() == null ? selfNode :
- selfNode.withService(coordinatorOverlordServiceConfig.getOverlordService());
-
this.leadershipListener = new DruidLeaderSelector.Listener()
{
@Override
@@ -173,15 +164,13 @@ public void start()
compactionScheduler.becomeLeader();
scheduledBatchTaskManager.start();
- // Announce the node only after all the services have been initialized
+ // Mark ready only after all the services have been initialized
initialized = true;
- serviceAnnouncer.announce(node);
}
@Override
public void stop()
{
- serviceAnnouncer.unannounce(node);
scheduledBatchTaskManager.stop();
compactionScheduler.stopBeingLeader();
taskMaster.downgradeToHalfLeader();
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ForkingTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ForkingTaskRunner.java
index fb09cb5f1547..52c2dcc7ba38 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ForkingTaskRunner.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ForkingTaskRunner.java
@@ -115,7 +115,8 @@ public class ForkingTaskRunner
"--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED",
"--add-opens=java.base/java.io=ALL-UNNAMED",
"--add-opens=java.base/java.lang=ALL-UNNAMED",
- "--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED"
+ "--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED",
+ "--add-modules=jdk.incubator.vector"
);
private final ForkingTaskRunnerConfig config;
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java
index cd911ed99811..76a8385d7095 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java
@@ -105,7 +105,8 @@ public ImmutableWorkerInfo(
}
/**
- * Helper used by {@link ZkWorker} and {@link org.apache.druid.indexing.overlord.hrtr.WorkerHolder}.
+ * Helper used by {@link org.apache.druid.indexing.overlord.hrtr.WorkerHolder} to build a worker view from a set of
+ * task announcements.
*/
public static ImmutableWorkerInfo fromWorkerAnnouncements(
final Worker worker,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunner.java
deleted file mode 100644
index 4018701d447f..000000000000
--- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunner.java
+++ /dev/null
@@ -1,1673 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.indexing.overlord;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Joiner;
-import com.google.common.base.Optional;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Predicate;
-import com.google.common.base.Stopwatch;
-import com.google.common.base.Supplier;
-import com.google.common.base.Throwables;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.util.concurrent.FutureCallback;
-import com.google.common.util.concurrent.Futures;
-import com.google.common.util.concurrent.ListenableFuture;
-import com.google.common.util.concurrent.ListenableScheduledFuture;
-import com.google.common.util.concurrent.ListeningScheduledExecutorService;
-import com.google.common.util.concurrent.MoreExecutors;
-import com.google.common.util.concurrent.SettableFuture;
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.curator.framework.CuratorFramework;
-import org.apache.curator.framework.recipes.cache.PathChildrenCache;
-import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener;
-import org.apache.curator.utils.ZKPaths;
-import org.apache.druid.concurrent.LifecycleLock;
-import org.apache.druid.curator.CuratorUtils;
-import org.apache.druid.curator.cache.PathChildrenCacheFactory;
-import org.apache.druid.indexer.RunnerTaskState;
-import org.apache.druid.indexer.TaskLocation;
-import org.apache.druid.indexer.TaskState;
-import org.apache.druid.indexer.TaskStatus;
-import org.apache.druid.indexing.common.task.IndexTaskUtils;
-import org.apache.druid.indexing.common.task.Task;
-import org.apache.druid.indexing.overlord.autoscaling.ProvisioningService;
-import org.apache.druid.indexing.overlord.autoscaling.ProvisioningStrategy;
-import org.apache.druid.indexing.overlord.autoscaling.ScalingStats;
-import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
-import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig;
-import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig;
-import org.apache.druid.indexing.overlord.setup.WorkerSelectStrategy;
-import org.apache.druid.indexing.worker.TaskAnnouncement;
-import org.apache.druid.indexing.worker.Worker;
-import org.apache.druid.java.util.common.DateTimes;
-import org.apache.druid.java.util.common.ISE;
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.java.util.common.RE;
-import org.apache.druid.java.util.common.StringUtils;
-import org.apache.druid.java.util.common.concurrent.Execs;
-import org.apache.druid.java.util.common.concurrent.ScheduledExecutors;
-import org.apache.druid.java.util.common.io.Closer;
-import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
-import org.apache.druid.java.util.common.lifecycle.LifecycleStop;
-import org.apache.druid.java.util.emitter.EmittingLogger;
-import org.apache.druid.java.util.emitter.service.ServiceEmitter;
-import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
-import org.apache.druid.java.util.http.client.HttpClient;
-import org.apache.druid.java.util.http.client.Request;
-import org.apache.druid.java.util.http.client.response.InputStreamResponseHandler;
-import org.apache.druid.java.util.http.client.response.StatusResponseHandler;
-import org.apache.druid.java.util.http.client.response.StatusResponseHolder;
-import org.apache.druid.server.initialization.IndexerZkConfig;
-import org.apache.druid.tasklogs.TaskLogStreamer;
-import org.apache.zookeeper.CreateMode;
-import org.apache.zookeeper.KeeperException;
-import org.jboss.netty.handler.codec.http.HttpMethod;
-import org.jboss.netty.handler.codec.http.HttpResponseStatus;
-import org.joda.time.Duration;
-import org.joda.time.Period;
-
-import javax.annotation.Nullable;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URL;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.CopyOnWriteArrayList;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Executor;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.ScheduledFuture;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
-
-/**
- * The RemoteTaskRunner's primary responsibility is to assign tasks to worker nodes.
- * The RemoteTaskRunner uses Zookeeper to keep track of which workers are running which tasks. Tasks are assigned by
- * creating ephemeral nodes in ZK that workers must remove. Workers announce the statuses of the tasks they are running.
- * Once a task completes, it is up to the RTR to remove the task status and run any necessary cleanup.
- * The RemoteTaskRunner is event driven and updates state according to ephemeral node changes in ZK.
- *
- * The RemoteTaskRunner will assign tasks to a node until the node hits capacity. At that point, task assignment will
- * fail. The RemoteTaskRunner depends on another component to create additional worker resources.
- *
- * If a worker node becomes inexplicably disconnected from Zk, the RemoteTaskRunner will fail any tasks associated with the
- * worker after waiting for RemoteTaskRunnerConfig.taskCleanupTimeout for the worker to show up.
- *
- * The RemoteTaskRunner uses ZK for job management and assignment and http for IPC messages.
- */
-public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer
-{
- private static final EmittingLogger log = new EmittingLogger(RemoteTaskRunner.class);
- private static final Joiner JOINER = Joiner.on("/");
-
- private final ObjectMapper jsonMapper;
- private final RemoteTaskRunnerConfig config;
- private final Duration shutdownTimeout;
- private final IndexerZkConfig indexerZkConfig;
- private final CuratorFramework cf;
- private final PathChildrenCacheFactory workerStatusPathChildrenCacheFactory;
- private final ExecutorService workerStatusPathChildrenCacheExecutor;
- private final PathChildrenCache workerPathCache;
- private final HttpClient httpClient;
- private final Supplier workerConfigRef;
-
- // all workers that exist in ZK
- private final ConcurrentMap zkWorkers = new ConcurrentHashMap<>();
- // payloads of pending tasks, which we remember just long enough to assign to workers
- private final ConcurrentMap pendingTaskPayloads = new ConcurrentHashMap<>();
- // tasks that have not yet been assigned to a worker
- private final RemoteTaskRunnerWorkQueue pendingTasks = new RemoteTaskRunnerWorkQueue();
- // all tasks that have been assigned to a worker
- private final RemoteTaskRunnerWorkQueue runningTasks = new RemoteTaskRunnerWorkQueue();
- // tasks that are complete but not cleaned up yet
- private final RemoteTaskRunnerWorkQueue completeTasks = new RemoteTaskRunnerWorkQueue();
-
- private final ExecutorService runPendingTasksExec;
-
- // Workers that have been marked as lazy. these workers are not running any tasks and can be terminated safely by the scaling policy.
- private final ConcurrentMap lazyWorkers = new ConcurrentHashMap<>();
-
- // Workers that have been blacklisted.
- private final Set blackListedWorkers = Collections.synchronizedSet(new HashSet<>());
-
- // task runner listeners
- private final CopyOnWriteArrayList> listeners = new CopyOnWriteArrayList<>();
-
- // workers which were assigned a task and are yet to acknowledge same.
- // Map: workerId -> taskId
- private final ConcurrentMap workersWithUnacknowledgedTask = new ConcurrentHashMap<>();
- // Map: taskId -> taskId .tasks which are being tried to be assigned to a worker
- private final ConcurrentMap tryAssignTasks = new ConcurrentHashMap<>();
-
- private final Object statusLock = new Object();
-
- private final LifecycleLock lifecycleLock = new LifecycleLock();
-
- private final ListeningScheduledExecutorService cleanupExec;
-
- private final ConcurrentMap removedWorkerCleanups = new ConcurrentHashMap<>();
- private final ProvisioningStrategy provisioningStrategy;
- private final ServiceEmitter emitter;
- private ProvisioningService provisioningService;
-
- public RemoteTaskRunner(
- ObjectMapper jsonMapper,
- RemoteTaskRunnerConfig config,
- IndexerZkConfig indexerZkConfig,
- CuratorFramework cf,
- PathChildrenCacheFactory.Builder pathChildrenCacheFactory,
- HttpClient httpClient,
- Supplier workerConfigRef,
- ProvisioningStrategy provisioningStrategy,
- ServiceEmitter emitter
- )
- {
- this.jsonMapper = jsonMapper;
- this.config = config;
- this.shutdownTimeout = config.getTaskShutdownLinkTimeout().toStandardDuration(); // Fail fast
- this.indexerZkConfig = indexerZkConfig;
- this.cf = cf;
- this.workerPathCache = pathChildrenCacheFactory.build().make(cf, indexerZkConfig.getAnnouncementsPath());
- this.workerStatusPathChildrenCacheExecutor = PathChildrenCacheFactory.Builder.createDefaultExecutor();
- this.workerStatusPathChildrenCacheFactory = pathChildrenCacheFactory
- .withExecutorService(workerStatusPathChildrenCacheExecutor)
- .withShutdownExecutorOnClose(false)
- .build();
- this.httpClient = httpClient;
- this.workerConfigRef = workerConfigRef;
- this.cleanupExec = MoreExecutors.listeningDecorator(
- ScheduledExecutors.fixed(1, "RemoteTaskRunner-Scheduled-Cleanup--%d")
- );
- this.provisioningStrategy = provisioningStrategy;
- this.runPendingTasksExec = Execs.multiThreaded(
- config.getPendingTasksRunnerNumThreads(),
- "rtr-pending-tasks-runner-%d"
- );
- this.emitter = emitter;
- }
-
- @Override
- @LifecycleStart
- public void start()
- {
- if (!lifecycleLock.canStart()) {
- return;
- }
- try {
- log.info("Starting RemoteTaskRunner...");
- final MutableInt waitingFor = new MutableInt(1);
- final Object waitingForMonitor = new Object();
-
- // Add listener for creation/deletion of workers
- workerPathCache.getListenable().addListener(
- (client, event) -> {
- final Worker worker;
- switch (event.getType()) {
- case CHILD_ADDED:
- worker = jsonMapper.readValue(
- event.getData().getData(),
- Worker.class
- );
- synchronized (waitingForMonitor) {
- waitingFor.increment();
- }
- Futures.addCallback(
- addWorker(worker),
- new FutureCallback<>()
- {
- @Override
- public void onSuccess(ZkWorker zkWorker)
- {
- synchronized (waitingForMonitor) {
- waitingFor.decrement();
- waitingForMonitor.notifyAll();
- }
- }
-
- @Override
- public void onFailure(Throwable throwable)
- {
- synchronized (waitingForMonitor) {
- waitingFor.decrement();
- waitingForMonitor.notifyAll();
- }
- }
- },
- MoreExecutors.directExecutor()
- );
- break;
- case CHILD_UPDATED:
- worker = jsonMapper.readValue(
- event.getData().getData(),
- Worker.class
- );
- updateWorker(worker);
- break;
-
- case CHILD_REMOVED:
- worker = jsonMapper.readValue(
- event.getData().getData(),
- Worker.class
- );
- removeWorker(worker);
- break;
- case INITIALIZED:
- // Schedule cleanup for task status of the workers that might have disconnected while overlord was not running
- List workers;
- try {
- workers = cf.getChildren().forPath(indexerZkConfig.getStatusPath());
- }
- catch (KeeperException.NoNodeException e) {
- // statusPath doesn't exist yet; can occur if no middleManagers have started.
- workers = ImmutableList.of();
- }
- for (String workerId : workers) {
- final String workerAnnouncePath = JOINER.join(indexerZkConfig.getAnnouncementsPath(), workerId);
- final String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId);
- if (!zkWorkers.containsKey(workerId) && cf.checkExists().forPath(workerAnnouncePath) == null) {
- try {
- scheduleTasksCleanupForWorker(workerId, cf.getChildren().forPath(workerStatusPath));
- }
- catch (Exception e) {
- log.warn(
- e,
- "Could not schedule cleanup for worker[%s] during startup (maybe someone removed the status znode[%s]?). Skipping.",
- workerId,
- workerStatusPath
- );
- }
- }
- }
- synchronized (waitingForMonitor) {
- waitingFor.decrement();
- waitingForMonitor.notifyAll();
- }
- break;
- case CONNECTION_SUSPENDED:
- case CONNECTION_RECONNECTED:
- case CONNECTION_LOST:
- // do nothing
- }
- }
- );
- workerPathCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT);
- synchronized (waitingForMonitor) {
- while (waitingFor.intValue() > 0) {
- waitingForMonitor.wait();
- }
- }
-
- ScheduledExecutors.scheduleAtFixedRate(
- cleanupExec,
- Period.ZERO.toStandardDuration(),
- config.getWorkerBlackListCleanupPeriod().toStandardDuration(),
- this::checkBlackListedNodes
- );
-
- provisioningService = provisioningStrategy.makeProvisioningService(this);
- lifecycleLock.started();
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- finally {
- lifecycleLock.exitStart();
- }
- }
-
- @Override
- @LifecycleStop
- public void stop()
- {
- if (!lifecycleLock.canStop()) {
- return;
- }
- try {
- log.info("Stopping RemoteTaskRunner...");
- provisioningService.close();
-
- Closer closer = Closer.create();
- for (ZkWorker zkWorker : zkWorkers.values()) {
- closer.register(zkWorker);
- }
- closer.register(workerPathCache);
- try {
- closer.close();
- }
- finally {
- workerStatusPathChildrenCacheExecutor.shutdown();
- }
-
- if (runPendingTasksExec != null) {
- runPendingTasksExec.shutdown();
- }
-
- if (cleanupExec != null) {
- cleanupExec.shutdown();
- }
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- finally {
- lifecycleLock.exitStop();
- }
- }
-
- @Override
- public List>> restore()
- {
- return ImmutableList.of();
- }
-
- @Override
- public void registerListener(TaskRunnerListener listener, Executor executor)
- {
- for (Pair pair : listeners) {
- if (pair.lhs.getListenerId().equals(listener.getListenerId())) {
- throw new ISE("Listener [%s] already registered", listener.getListenerId());
- }
- }
-
- final Pair listenerPair = Pair.of(listener, executor);
-
- synchronized (statusLock) {
- for (Map.Entry entry : runningTasks.entrySet()) {
- TaskRunnerUtils.notifyLocationChanged(
- ImmutableList.of(listenerPair),
- entry.getKey(),
- entry.getValue().getLocation()
- );
- }
-
- log.info("Registered listener [%s]", listener.getListenerId());
- listeners.add(listenerPair);
- }
- }
-
- @Override
- public void unregisterListener(String listenerId)
- {
- for (Pair pair : listeners) {
- if (pair.lhs.getListenerId().equals(listenerId)) {
- listeners.remove(pair);
- log.info("Unregistered listener [%s]", listenerId);
- return;
- }
- }
- }
-
- @Override
- public Collection getWorkers()
- {
- return getImmutableWorkerFromZK(zkWorkers.values());
- }
-
- @Override
- public Collection getRunningTasks()
- {
- return ImmutableList.copyOf(runningTasks.values());
- }
-
- @Override
- public Collection getPendingTasks()
- {
- return ImmutableList.copyOf(pendingTasks.values());
- }
-
- @Override
- public Collection getPendingTaskPayloads()
- {
- // return a snapshot of current pending task payloads.
- return ImmutableList.copyOf(pendingTaskPayloads.values());
- }
-
- @Override
- public RemoteTaskRunnerConfig getConfig()
- {
- return config;
- }
-
- @Override
- public Collection getKnownTasks()
- {
- // Use a map to dedupe tasks, since they may transition from one state to another while this method is iterating
- // through the various collections.
- final Map items = new LinkedHashMap<>();
-
- // Racey, since there is a period of time during assignment when a task is neither pending nor running.
- for (RemoteTaskRunnerWorkItem item : pendingTasks.values()) {
- items.put(item.getTaskId(), item);
- }
-
- for (RemoteTaskRunnerWorkItem item : runningTasks.values()) {
- items.put(item.getTaskId(), item);
- }
-
- for (RemoteTaskRunnerWorkItem item : completeTasks.values()) {
- items.put(item.getTaskId(), item);
- }
-
- return ImmutableList.copyOf(items.values());
- }
-
- @Nullable
- @Override
- public RunnerTaskState getRunnerTaskState(String taskId)
- {
- if (pendingTasks.containsKey(taskId)) {
- return RunnerTaskState.PENDING;
- }
- if (runningTasks.containsKey(taskId)) {
- return RunnerTaskState.RUNNING;
- }
- if (completeTasks.containsKey(taskId)) {
- return RunnerTaskState.NONE;
- }
-
- return null;
- }
-
- @Override
- public TaskLocation getTaskLocation(String taskId)
- {
- if (pendingTasks.containsKey(taskId)) {
- return pendingTasks.get(taskId).getLocation();
- }
- if (runningTasks.containsKey(taskId)) {
- return runningTasks.get(taskId).getLocation();
- }
- if (completeTasks.containsKey(taskId)) {
- return completeTasks.get(taskId).getLocation();
- }
-
- return TaskLocation.unknown();
- }
-
- @Override
- public Optional getScalingStats()
- {
- return Optional.fromNullable(provisioningService.getStats());
- }
-
- @Nullable
- public ZkWorker findWorkerRunningTask(String taskId)
- {
- for (ZkWorker zkWorker : zkWorkers.values()) {
- if (zkWorker.isRunningTask(taskId)) {
- return zkWorker;
- }
- }
- return null;
- }
-
- /**
- * Retrieve {@link ZkWorker} based on an ID (host), or null if the ID doesn't exist.
- */
- @Nullable
- ZkWorker findWorkerId(String workerId)
- {
- return zkWorkers.get(workerId);
- }
-
- public boolean isWorkerRunningTask(ZkWorker worker, String taskId)
- {
- return Preconditions.checkNotNull(worker, "worker").isRunningTask(taskId);
- }
-
- /**
- * A task will be run only if there is no current knowledge in the RemoteTaskRunner of the task.
- *
- * @param task task to run
- */
- @Override
- public ListenableFuture run(final Task task)
- {
- final RemoteTaskRunnerWorkItem completeTask, runningTask, pendingTask;
- if ((pendingTask = pendingTasks.get(task.getId())) != null) {
- log.info("Assigned a task[%s] that is already pending!", task.getId());
- runPendingTasks();
- return pendingTask.getResult();
- } else if ((runningTask = runningTasks.get(task.getId())) != null) {
- ZkWorker zkWorker = findWorkerRunningTask(task.getId());
- if (zkWorker == null) {
- log.warn("Told to run task[%s], but no worker has started running it yet.", task.getId());
- } else {
- log.info("Task[%s] already running on %s.", task.getId(), zkWorker.getWorker().getHost());
- TaskAnnouncement announcement = zkWorker.getRunningTasks().get(task.getId());
- if (announcement.getTaskStatus().isComplete()) {
- taskComplete(runningTask, zkWorker, announcement.getTaskStatus());
- }
- }
- return runningTask.getResult();
- } else if ((completeTask = completeTasks.get(task.getId())) != null) {
- return completeTask.getResult();
- } else {
- RemoteTaskRunnerWorkItem workItem = addPendingTask(task);
- runPendingTasks();
- return workItem.getResult();
- }
- }
-
- /**
- * Finds the worker running the task and forwards the shutdown signal to the worker.
- *
- * @param taskId - task id to shutdown
- */
- @Override
- public void shutdown(final String taskId, String reason)
- {
- log.info("Shutdown [%s] because: [%s]", taskId, reason);
- if (!lifecycleLock.awaitStarted(1, TimeUnit.SECONDS)) {
- log.info("This TaskRunner is stopped or not yet started. Ignoring shutdown command for task: %s", taskId);
- } else if (pendingTasks.remove(taskId) != null) {
- pendingTaskPayloads.remove(taskId);
- log.info("Removed task from pending queue: %s", taskId);
- } else if (completeTasks.containsKey(taskId)) {
- cleanup(taskId);
- } else {
- final ZkWorker zkWorker = findWorkerRunningTask(taskId);
-
- if (zkWorker == null) {
- log.info("Can't shutdown! No worker running task %s", taskId);
- return;
- }
- URL url = null;
- try {
- url = TaskRunnerUtils.makeWorkerURL(zkWorker.getWorker(), "/druid/worker/v1/task/%s/shutdown", taskId);
- final StatusResponseHolder response = httpClient.go(
- new Request(HttpMethod.POST, url),
- StatusResponseHandler.getInstance(),
- shutdownTimeout
- ).get();
-
- log.info(
- "Sent shutdown message to worker: %s, status %s, response: %s",
- zkWorker.getWorker().getHost(),
- response.getStatus(),
- response.getContent()
- );
-
- if (!HttpResponseStatus.OK.equals(response.getStatus())) {
- log.error("Shutdown failed for %s! Are you sure the task was running?", taskId);
- }
- }
- catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- throw new RE(e, "Interrupted posting shutdown to [%s] for task [%s]", url, taskId);
- }
- catch (Exception e) {
- throw new RE(e, "Error in handling post to [%s] for task [%s]", zkWorker.getWorker().getHost(), taskId);
- }
- }
- }
-
- @Override
- public Optional streamTaskLog(final String taskId, final long offset) throws IOException
- {
- final ZkWorker zkWorker = findWorkerRunningTask(taskId);
-
- if (zkWorker == null) {
- // Worker is not running this task, it might be available in deep storage
- return Optional.absent();
- } else {
- // Worker is still running this task
- final URL url = TaskRunnerUtils.makeWorkerURL(
- zkWorker.getWorker(),
- "/druid/worker/v1/task/%s/log?offset=%s",
- taskId,
- Long.toString(offset)
- );
- try {
- return Optional.of(httpClient.go(
- new Request(HttpMethod.GET, url),
- new InputStreamResponseHandler()
- ).get());
- }
- catch (InterruptedException e) {
- throw new RuntimeException(e);
- }
- catch (ExecutionException e) {
- // Unwrap if possible
- Throwables.propagateIfPossible(e.getCause(), IOException.class);
- throw new RuntimeException(e);
- }
- }
- }
-
-
- @Override
- public Optional streamTaskReports(final String taskId) throws IOException
- {
- final ZkWorker zkWorker = findWorkerRunningTask(taskId);
-
- if (zkWorker == null) {
- // Worker is not running this task, it might be available in deep storage
- return Optional.absent();
- }
-
- final RemoteTaskRunnerWorkItem runningWorkItem = runningTasks.get(taskId);
-
- if (runningWorkItem == null) {
- // Worker very recently exited.
- return Optional.absent();
- }
-
- final TaskLocation taskLocation = runningWorkItem.getLocation();
-
- if (TaskLocation.unknown().equals(taskLocation)) {
- // No location known for this task. It may have not been assigned one yet.
- return Optional.absent();
- }
-
- final URL url = TaskRunnerUtils.makeTaskLocationURL(
- taskLocation,
- "/druid/worker/v1/chat/%s/liveReports",
- taskId
- );
-
- return TaskRunnerUtils.streamTaskReportsFromTaskLocation(httpClient, url);
- }
-
-
- /**
- * Adds a task to the pending queue.
- * {@link #runPendingTasks()} should be called to run the pending task.
- */
- @VisibleForTesting
- RemoteTaskRunnerWorkItem addPendingTask(final Task task)
- {
- log.info("Added pending task %s", task.getId());
- final RemoteTaskRunnerWorkItem taskRunnerWorkItem = new RemoteTaskRunnerWorkItem(
- task.getId(),
- task.getType(),
- null,
- null,
- task.getDataSource()
- );
- pendingTaskPayloads.put(task.getId(), task);
- pendingTasks.put(task.getId(), taskRunnerWorkItem);
- return taskRunnerWorkItem;
- }
-
- /**
- * This method uses a multi-threaded executor to extract all pending tasks and attempt to run them. Any tasks that
- * are successfully assigned to a worker will be moved from pendingTasks to runningTasks. This method is thread-safe.
- * This method should be run each time there is new worker capacity or if new tasks are assigned.
- */
- @VisibleForTesting
- void runPendingTasks()
- {
- runPendingTasksExec.submit(
- (Callable) () -> {
- try {
- // make a copy of the pending tasks because tryAssignTask may delete tasks from pending and move them
- // into running status
- List copy = Lists.newArrayList(pendingTasks.values());
- sortByInsertionTime(copy);
-
- for (RemoteTaskRunnerWorkItem taskRunnerWorkItem : copy) {
- runPendingTask(taskRunnerWorkItem);
- }
- }
- catch (Exception e) {
- log.makeAlert(e, "Exception in running pending tasks").emit();
- }
-
- return null;
- }
- );
- }
-
- /**
- * Run one pending task. This method must be called in the same class except for unit tests.
- */
- @VisibleForTesting
- void runPendingTask(RemoteTaskRunnerWorkItem taskRunnerWorkItem)
- {
- String taskId = taskRunnerWorkItem.getTaskId();
- if (tryAssignTasks.putIfAbsent(taskId, taskId) == null) {
- try {
- //this can still be null due to race from explicit task shutdown request
- //or if another thread steals and completes this task right after this thread makes copy
- //of pending tasks. See https://github.com/apache/druid/issues/2842 .
- Task task = pendingTaskPayloads.get(taskId);
- if (task != null && tryAssignTask(task, taskRunnerWorkItem)) {
- pendingTaskPayloads.remove(taskId);
- }
- }
- catch (Exception e) {
- log.makeAlert(e, "Exception while trying to assign task")
- .addData("taskId", taskRunnerWorkItem.getTaskId())
- .emit();
- RemoteTaskRunnerWorkItem workItem = pendingTasks.remove(taskId);
- if (workItem != null) {
- taskComplete(
- workItem,
- null,
- TaskStatus.failure(
- taskId,
- StringUtils.format("Failed to assign this task. See overlord logs for more details.")
- )
- );
- }
- }
- finally {
- tryAssignTasks.remove(taskId);
- }
- }
- }
-
- @VisibleForTesting
- static void sortByInsertionTime(List tasks)
- {
- Collections.sort(tasks, Comparator.comparing(RemoteTaskRunnerWorkItem::getQueueInsertionTime));
- }
-
- /**
- * Removes a task from the complete queue and clears out the ZK status path of the task.
- *
- * @param taskId - the task to cleanup
- */
- private void cleanup(final String taskId)
- {
- if (!lifecycleLock.awaitStarted(1, TimeUnit.SECONDS)) {
- return;
- }
- final RemoteTaskRunnerWorkItem removed = completeTasks.remove(taskId);
- final Worker worker;
- if (removed == null || (worker = removed.getWorker()) == null) {
- log.makeAlert("Asked to cleanup nonexistent task")
- .addData("taskId", taskId)
- .emit();
- } else {
- final String workerId = worker.getHost();
- log.info("Cleaning up task[%s] on worker[%s]", taskId, workerId);
- final String statusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId, taskId);
- try {
- cf.delete().guaranteed().forPath(statusPath);
- }
- catch (KeeperException.NoNodeException e) {
- log.info("Tried to delete status path[%s] that didn't exist! Must've gone away already?", statusPath);
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
- }
-
- /**
- * Ensures no workers are already running a task before assigning the task to a worker.
- * It is possible that a worker is running a task that the RTR has no knowledge of. This occurs when the RTR
- * needs to bootstrap after a restart.
- *
- * @param taskRunnerWorkItem - the task to assign
- * @return true iff the task is now assigned
- */
- private boolean tryAssignTask(final Task task, final RemoteTaskRunnerWorkItem taskRunnerWorkItem) throws Exception
- {
- Preconditions.checkNotNull(task, "task");
- Preconditions.checkNotNull(taskRunnerWorkItem, "taskRunnerWorkItem");
- Preconditions.checkArgument(task.getId().equals(taskRunnerWorkItem.getTaskId()), "task id != workItem id");
-
- if (runningTasks.containsKey(task.getId()) || findWorkerRunningTask(task.getId()) != null) {
- log.info("Task[%s] already running.", task.getId());
- return true;
- } else {
- // Nothing running this task, announce it in ZK for a worker to run it
- WorkerBehaviorConfig workerConfig = workerConfigRef.get();
- WorkerSelectStrategy strategy;
- if (workerConfig == null || workerConfig.getSelectStrategy() == null) {
- strategy = WorkerBehaviorConfig.DEFAULT_STRATEGY;
- log.debug("No worker selection strategy set. Using default of [%s]", strategy.getClass().getSimpleName());
- } else {
- strategy = workerConfig.getSelectStrategy();
- }
-
- ZkWorker assignedWorker = null;
- final ImmutableWorkerInfo immutableZkWorker;
- try {
- synchronized (workersWithUnacknowledgedTask) {
- immutableZkWorker = strategy.findWorkerForTask(
- config,
- ImmutableMap.copyOf(getWorkersEligibleToRunTasks()),
- task
- );
-
- if (immutableZkWorker != null &&
- workersWithUnacknowledgedTask.putIfAbsent(immutableZkWorker.getWorker().getHost(), task.getId())
- == null) {
- assignedWorker = zkWorkers.get(immutableZkWorker.getWorker().getHost());
- }
- }
-
- if (assignedWorker != null) {
- return announceTask(task, assignedWorker, taskRunnerWorkItem);
- } else {
- log.debug(
- "Unsuccessful task-assign attempt for task [%s] on workers [%s]. Workers to ack tasks are [%s].",
- task.getId(),
- zkWorkers.values(),
- workersWithUnacknowledgedTask
- );
- }
-
- return false;
- }
- finally {
- if (assignedWorker != null) {
- workersWithUnacknowledgedTask.remove(assignedWorker.getWorker().getHost());
- //if this attempt won the race to run the task then other task might be able to use this worker now after task ack.
- runPendingTasks();
- }
- }
- }
- }
-
- Map getWorkersEligibleToRunTasks()
- {
- return Maps.transformEntries(
- Maps.filterEntries(
- zkWorkers,
- input -> !lazyWorkers.containsKey(input.getKey()) &&
- !workersWithUnacknowledgedTask.containsKey(input.getKey()) &&
- !blackListedWorkers.contains(input.getValue())
- ),
- (String key, ZkWorker value) -> value.toImmutable()
- );
- }
-
- /**
- * Creates a ZK entry under a specific path associated with a worker. The worker is responsible for
- * removing the task ZK entry and creating a task status ZK entry.
- *
- * @param theZkWorker The worker the task is assigned to
- * @param taskRunnerWorkItem The task to be assigned
- * @return boolean indicating whether the task was successfully assigned or not
- */
- private boolean announceTask(
- final Task task,
- final ZkWorker theZkWorker,
- final RemoteTaskRunnerWorkItem taskRunnerWorkItem
- ) throws Exception
- {
- Preconditions.checkArgument(task.getId().equals(taskRunnerWorkItem.getTaskId()), "task id != workItem id");
- final String worker = theZkWorker.getWorker().getHost();
- synchronized (statusLock) {
- if (!zkWorkers.containsKey(worker) || lazyWorkers.containsKey(worker)) {
- // the worker might have been killed or marked as lazy
- log.debug("Not assigning task to already removed worker[%s]", worker);
- return false;
- }
- log.info("Assigning task [%s] to worker [%s]", task.getId(), worker);
-
- CuratorUtils.createIfNotExists(
- cf,
- JOINER.join(indexerZkConfig.getTasksPath(), worker, task.getId()),
- CreateMode.EPHEMERAL,
- jsonMapper.writeValueAsBytes(task),
- config.getMaxZnodeBytes()
- );
-
- RemoteTaskRunnerWorkItem workItem = pendingTasks.remove(task.getId());
- if (workItem == null) {
- log.makeAlert("Ignoring null work item from pending task queue")
- .addData("taskId", task.getId())
- .emit();
- return false;
- }
-
- final ServiceMetricEvent.Builder metricBuilder = new ServiceMetricEvent.Builder();
- IndexTaskUtils.setTaskDimensions(metricBuilder, task);
- emitter.emit(metricBuilder.setMetric(
- "task/pending/time",
- new Duration(workItem.getQueueInsertionTime(), DateTimes.nowUtc()).getMillis())
- );
-
- RemoteTaskRunnerWorkItem newWorkItem = workItem.withWorker(theZkWorker.getWorker(), null);
- runningTasks.put(task.getId(), newWorkItem);
- log.info("Task [%s] started running on worker [%s]", task.getId(), newWorkItem.getWorker().getHost());
- TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.running(task.getId()));
-
- // Syncing state with Zookeeper - don't assign new tasks until the task we just assigned is actually running
- // on a worker - this avoids overflowing a worker with tasks
- Stopwatch timeoutStopwatch = Stopwatch.createStarted();
- while (!isWorkerRunningTask(theZkWorker, task.getId())) {
- final long waitMs = config.getTaskAssignmentTimeout().toStandardDuration().getMillis();
- statusLock.wait(waitMs);
- long elapsed = timeoutStopwatch.elapsed(TimeUnit.MILLISECONDS);
- if (elapsed >= waitMs) {
- log.makeAlert(
- "Task assignment timed out on worker [%s], never ran task [%s]! Timeout: (%s >= %s)!",
- worker,
- task.getId(),
- elapsed,
- config.getTaskAssignmentTimeout()
- ).emit();
- taskComplete(
- taskRunnerWorkItem,
- theZkWorker,
- TaskStatus.failure(
- task.getId(),
- StringUtils.format(
- "The worker that this task is assigned did not start it in timeout[%s]. "
- + "See overlord logs for more details.",
- config.getTaskAssignmentTimeout()
- )
- )
- );
- break;
- }
- }
- return true;
- }
- }
-
- private boolean cancelWorkerCleanup(String workerHost)
- {
- ScheduledFuture previousCleanup = removedWorkerCleanups.remove(workerHost);
- if (previousCleanup != null) {
- log.info("Cancelling Worker[%s] scheduled task cleanup", workerHost);
- previousCleanup.cancel(false);
- }
- return previousCleanup != null;
- }
-
- /**
- * When a new worker appears, listeners are registered for status changes associated with tasks assigned to
- * the worker. Status changes indicate the creation or completion of a task.
- * The RemoteTaskRunner updates state according to these changes.
- *
- * @param worker contains metadata for a worker that has appeared in ZK
- * @return future that will contain a fully initialized worker
- */
- private ListenableFuture addWorker(final Worker worker)
- {
- log.info("Worker[%s] reportin' for duty!", worker.getHost());
-
- try {
- cancelWorkerCleanup(worker.getHost());
-
- final String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), worker.getHost());
- final PathChildrenCache statusCache = workerStatusPathChildrenCacheFactory.make(cf, workerStatusPath);
- final SettableFuture retVal = SettableFuture.create();
- final ZkWorker zkWorker = new ZkWorker(
- worker,
- statusCache,
- jsonMapper
- );
-
- // Add status listener to the watcher for status changes
- zkWorker.addListener(getStatusListener(worker, zkWorker, retVal));
- zkWorker.start();
- return retVal;
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- @VisibleForTesting
- PathChildrenCacheListener getStatusListener(final Worker worker, final ZkWorker zkWorker, final SettableFuture retVal)
- {
- return (client, event) -> {
- final String taskId;
- final RemoteTaskRunnerWorkItem taskRunnerWorkItem;
- synchronized (statusLock) {
- try {
- switch (event.getType()) {
- case CHILD_ADDED:
- case CHILD_UPDATED:
- if (event.getData() == null) {
- log.error("Unexpected null for event.getData() in handle new worker status for [%s]", event.getType().toString());
- log.makeAlert("Unexpected null for event.getData() in handle new worker status")
- .addData("worker", zkWorker.getWorker().getHost())
- .addData("eventType", event.getType().toString())
- .emit();
- return;
- }
- taskId = ZKPaths.getNodeFromPath(event.getData().getPath());
- final TaskAnnouncement announcement = jsonMapper.readValue(
- event.getData().getData(), TaskAnnouncement.class
- );
-
- log.info(
- "Worker[%s] wrote %s status for task [%s] on [%s]",
- zkWorker.getWorker().getHost(),
- announcement.getTaskStatus().getStatusCode(),
- taskId,
- announcement.getTaskLocation()
- );
-
- // Synchronizing state with ZK
- statusLock.notifyAll();
-
- final RemoteTaskRunnerWorkItem tmp;
- if ((tmp = runningTasks.get(taskId)) != null) {
- taskRunnerWorkItem = tmp;
- } else {
- final RemoteTaskRunnerWorkItem newTaskRunnerWorkItem = new RemoteTaskRunnerWorkItem(
- taskId,
- announcement.getTaskType(),
- zkWorker.getWorker(),
- TaskLocation.unknown(),
- announcement.getTaskDataSource()
- );
- final RemoteTaskRunnerWorkItem existingItem = runningTasks.putIfAbsent(
- taskId,
- newTaskRunnerWorkItem
- );
- if (existingItem == null) {
- log.warn(
- "Worker[%s] announced a status for a task I didn't know about, adding to runningTasks: %s",
- zkWorker.getWorker().getHost(),
- taskId
- );
- taskRunnerWorkItem = newTaskRunnerWorkItem;
- } else {
- taskRunnerWorkItem = existingItem;
- }
- }
-
- if (!announcement.getTaskLocation().equals(taskRunnerWorkItem.getLocation())) {
- taskRunnerWorkItem.setLocation(announcement.getTaskLocation());
- TaskRunnerUtils.notifyLocationChanged(listeners, taskId, announcement.getTaskLocation());
- }
-
- if (announcement.getTaskStatus().isComplete()) {
- taskComplete(taskRunnerWorkItem, zkWorker, announcement.getTaskStatus());
- runPendingTasks();
- }
- break;
- case CHILD_REMOVED:
- if (event.getData() == null) {
- log.error("Unexpected null for event.getData() in handle new worker status for [%s]", event.getType().toString());
- log.makeAlert("Unexpected null for event.getData() in handle new worker status")
- .addData("worker", zkWorker.getWorker().getHost())
- .addData("eventType", event.getType().toString())
- .emit();
- return;
- }
- taskId = ZKPaths.getNodeFromPath(event.getData().getPath());
- taskRunnerWorkItem = runningTasks.remove(taskId);
- if (taskRunnerWorkItem != null) {
- log.warn("Task[%s] just disappeared!", taskId);
- final TaskStatus taskStatus = TaskStatus.failure(
- taskId,
- "The worker that this task was assigned disappeared. See overlord logs for more details."
- );
- taskRunnerWorkItem.setResult(taskStatus);
- TaskRunnerUtils.notifyStatusChanged(listeners, taskId, taskStatus);
- } else {
- log.info("Task[%s] went bye bye.", taskId);
- }
- break;
- case INITIALIZED:
- if (zkWorkers.putIfAbsent(worker.getHost(), zkWorker) == null) {
- retVal.set(zkWorker);
- } else {
- final String message = StringUtils.format(
- "This should not happen...tried to add already-existing worker[%s]",
- worker.getHost()
- );
- log.makeAlert(message)
- .addData("workerHost", worker.getHost())
- .addData("workerIp", worker.getIp())
- .emit();
- retVal.setException(new IllegalStateException(message));
- }
- runPendingTasks();
- break;
- case CONNECTION_SUSPENDED:
- case CONNECTION_RECONNECTED:
- case CONNECTION_LOST:
- // do nothing
- }
- }
- catch (Exception e) {
- String znode = null;
- if (event.getData() != null) {
- znode = event.getData().getPath();
- }
- log.makeAlert(e, "Failed to handle new worker status")
- .addData("worker", zkWorker.getWorker().getHost())
- .addData("znode", znode)
- .addData("eventType", event.getType().toString())
- .emit();
- }
- }
- };
- }
-
- /**
- * We allow workers to change their own capacities and versions. They cannot change their own hosts or ips without
- * dropping themselves and re-announcing.
- */
- private void updateWorker(final Worker worker)
- {
- final ZkWorker zkWorker = zkWorkers.get(worker.getHost());
- if (zkWorker != null) {
- log.info("Worker[%s] updated its announcement from[%s] to[%s].", worker.getHost(), zkWorker.getWorker(), worker);
- zkWorker.setWorker(worker);
- } else {
- log.warn(
- "Worker[%s] updated its announcement but we didn't have a ZkWorker for it. Ignoring.",
- worker.getHost()
- );
- }
- }
-
- /**
- * When a ephemeral worker node disappears from ZK, incomplete running tasks will be retried by
- * the logic in the status listener. We still have to make sure there are no tasks assigned
- * to the worker but not yet running.
- *
- * @param worker - the removed worker
- */
- private void removeWorker(final Worker worker)
- {
- log.info("Kaboom! Worker[%s] removed!", worker.getHost());
-
- final ZkWorker zkWorker = zkWorkers.get(worker.getHost());
- if (zkWorker != null) {
- try {
- scheduleTasksCleanupForWorker(worker.getHost(), getAssignedTasks(worker));
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- finally {
- try {
- zkWorker.close();
- }
- catch (Exception e) {
- log.error(e, "Exception closing worker[%s]!", worker.getHost());
- }
- zkWorkers.remove(worker.getHost());
- checkBlackListedNodes();
- }
- }
- lazyWorkers.remove(worker.getHost());
- }
-
- /**
- * Schedule a task that will, at some point in the future, clean up znodes and issue failures for "tasksToFail"
- * if they are being run by "worker".
- */
- private void scheduleTasksCleanupForWorker(final String worker, final List tasksToFail)
- {
- // This method is only called from the PathChildrenCache event handler, so this may look like a race,
- // but is actually not.
- cancelWorkerCleanup(worker);
-
- final ListenableScheduledFuture> cleanupTask = cleanupExec.schedule(
- () -> {
- log.info("Running scheduled cleanup for Worker[%s]", worker);
- try {
- for (String assignedTask : tasksToFail) {
- String taskPath = JOINER.join(indexerZkConfig.getTasksPath(), worker, assignedTask);
- String statusPath = JOINER.join(indexerZkConfig.getStatusPath(), worker, assignedTask);
- if (cf.checkExists().forPath(taskPath) != null) {
- cf.delete().guaranteed().forPath(taskPath);
- }
-
- if (cf.checkExists().forPath(statusPath) != null) {
- cf.delete().guaranteed().forPath(statusPath);
- }
-
- log.info("Failing task[%s]", assignedTask);
- RemoteTaskRunnerWorkItem taskRunnerWorkItem = runningTasks.remove(assignedTask);
- if (taskRunnerWorkItem != null) {
- final TaskStatus taskStatus = TaskStatus.failure(
- assignedTask,
- StringUtils.format("Canceled for worker cleanup. See overlord logs for more details.")
- );
- taskRunnerWorkItem.setResult(taskStatus);
- TaskRunnerUtils.notifyStatusChanged(listeners, assignedTask, taskStatus);
- } else {
- log.warn("RemoteTaskRunner has no knowledge of task[%s]", assignedTask);
- }
- }
-
- // worker is gone, remove worker task status announcements path.
- String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), worker);
- if (cf.checkExists().forPath(workerStatusPath) != null) {
- cf.delete().guaranteed().forPath(JOINER.join(indexerZkConfig.getStatusPath(), worker));
- }
- }
- catch (Exception e) {
- log.makeAlert("Exception while cleaning up worker[%s]", worker).emit();
- throw new RuntimeException(e);
- }
- },
- config.getTaskCleanupTimeout().toStandardDuration().getMillis(),
- TimeUnit.MILLISECONDS
- );
-
- removedWorkerCleanups.put(worker, cleanupTask);
-
- // Remove this entry from removedWorkerCleanups when done, if it's actually the one in there.
- Futures.addCallback(
- cleanupTask,
- new FutureCallback
-
- org.apache.curator
- curator-x-discovery
- ${apache.curator.version}
-
-
- com.fasterxml.jackson.core
- jackson-databind
-
-
- org.apache.calcitecalcite-core
@@ -1794,6 +1786,8 @@
**/*_jmhType_*.class**/*_jmhTest_*.class**/*_generated*.class
+
+ **/math/expr/vector/simd/Simd*.class**.SuppressForbidden
@@ -2143,6 +2137,11 @@
org.apache.hadoop.fs
+
+
+
+ --add-modules=jdk.incubator.vector
+
@@ -2152,6 +2151,9 @@
true${maven.compiler.release}
+
+ --add-modules=jdk.incubator.vector
+
@@ -2212,6 +2214,8 @@
-J--add-exports=java.base/sun.nio.ch=ALL-UNNAMED-J--add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED-J--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED
+ -J--add-modules=jdk.incubator.vector
+ --add-modules=jdk.incubator.vector
diff --git a/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java b/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java
index fd8ee5e9e02a..890e3c233a04 100644
--- a/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java
+++ b/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java
@@ -44,19 +44,25 @@ public class GuiceAnnotationIntrospector extends NopAnnotationIntrospector
@Override
public JacksonInject.Value findInjectableValue(AnnotatedMember m)
{
- Object id = findGuiceInjectId(m);
+ // Preserve useInput / optional from the annotation. The simpler Value.forId(id) drops
+ // them and relies on AnnotationIntrospectorPair's fallback. See FasterXML/jackson-databind#1381.
+ final JacksonInject annotation = m.getAnnotation(JacksonInject.class);
+ if (annotation == null) {
+ return null;
+ }
+ final Object id = findGuiceInjectId(m);
if (id == null) {
return null;
}
- return JacksonInject.Value.forId(id);
+ return JacksonInject.Value.from(annotation).withId(id);
}
+ /**
+ * Resolves the Guice {@link Key} for an annotated member. Callers must verify that {@code m}
+ * carries a {@link JacksonInject} annotation before invoking; this method does not re-check.
+ */
private Object findGuiceInjectId(AnnotatedMember m)
{
- if (m.getAnnotation(JacksonInject.class) == null) {
- return null;
- }
-
Type genericType = null;
Annotation guiceAnnotation = null;
diff --git a/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java b/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java
index 954610c4e287..70dd1cc51519 100644
--- a/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java
+++ b/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java
@@ -55,6 +55,8 @@ public class StartupInjectorBuilder extends BaseInjectorBuilder double
* double, long -> double
* double, double -> double
+ *
+ * If a non-null {@link SimdSupportedBinaryOp} is supplied to the constructor and
+ * {@link ExpressionProcessing#useVectorApi()} is true, this factory will return SIMD-specialized processors backed
+ * by the JDK incubator {@code jdk.incubator.vector} API instead of the standard scalar implementations.
*/
public class SimpleVectorMathBivariateProcessorFactory extends VectorMathBivariateProcessorFactory
{
@@ -38,6 +47,8 @@ public class SimpleVectorMathBivariateProcessorFactory extends VectorMathBivaria
private final DoubleBivariateLongDoubleFunction longDoubleFunction;
private final DoubleBivariateDoubleLongFunction doubleLongFunction;
private final DoubleBivariateDoublesFunction doublesFunction;
+ @Nullable
+ private final SimdSupportedBinaryOp simdOp;
protected SimpleVectorMathBivariateProcessorFactory(
LongBivariateLongsFunction longsFunction,
@@ -45,16 +56,36 @@ protected SimpleVectorMathBivariateProcessorFactory(
DoubleBivariateDoubleLongFunction doubleLongFunction,
DoubleBivariateDoublesFunction doublesFunction
)
+ {
+ this(longsFunction, longDoubleFunction, doubleLongFunction, doublesFunction, null);
+ }
+
+ protected SimpleVectorMathBivariateProcessorFactory(
+ LongBivariateLongsFunction longsFunction,
+ DoubleBivariateLongDoubleFunction longDoubleFunction,
+ DoubleBivariateDoubleLongFunction doubleLongFunction,
+ DoubleBivariateDoublesFunction doublesFunction,
+ @Nullable SimdSupportedBinaryOp simdOp
+ )
{
this.longsFunction = longsFunction;
this.longDoubleFunction = longDoubleFunction;
this.doubleLongFunction = doubleLongFunction;
this.doublesFunction = doublesFunction;
+ this.simdOp = simdOp;
}
@Override
public final ExprVectorProcessor longsProcessor(Expr.VectorInputBindingInspector inspector, Expr left, Expr right)
{
+ if (simdOp != null && ExpressionProcessing.useVectorApi()) {
+ return SimdProcessors.makeLongLong(
+ left.asVectorProcessor(inspector),
+ right.asVectorProcessor(inspector),
+ simdOp,
+ longsFunction
+ );
+ }
return new LongBivariateLongsFunctionVectorProcessor(
left.asVectorProcessor(inspector),
right.asVectorProcessor(inspector),
@@ -69,6 +100,14 @@ public final ExprVectorProcessor longDoubleProcessor(
Expr right
)
{
+ if (simdOp != null && ExpressionProcessing.useVectorApi()) {
+ return SimdProcessors.makeLongDouble(
+ left.asVectorProcessor(inspector),
+ right.asVectorProcessor(inspector),
+ simdOp,
+ longDoubleFunction
+ );
+ }
return new DoubleBivariateLongDoubleFunctionVectorProcessor(
left.asVectorProcessor(inspector),
right.asVectorProcessor(inspector),
@@ -83,6 +122,14 @@ public final ExprVectorProcessor doubleLongProcessor(
Expr right
)
{
+ if (simdOp != null && ExpressionProcessing.useVectorApi()) {
+ return SimdProcessors.makeDoubleLong(
+ left.asVectorProcessor(inspector),
+ right.asVectorProcessor(inspector),
+ simdOp,
+ doubleLongFunction
+ );
+ }
return new DoubleBivariateDoubleLongFunctionVectorProcessor(
left.asVectorProcessor(inspector),
right.asVectorProcessor(inspector),
@@ -97,6 +144,14 @@ public final ExprVectorProcessor doublesProcessor(
Expr right
)
{
+ if (simdOp != null && ExpressionProcessing.useVectorApi()) {
+ return SimdProcessors.makeDoubleDouble(
+ left.asVectorProcessor(inspector),
+ right.asVectorProcessor(inspector),
+ simdOp,
+ doublesFunction
+ );
+ }
return new DoubleBivariateDoublesFunctionVectorProcessor(
left.asVectorProcessor(inspector),
right.asVectorProcessor(inspector),
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java b/processing/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java
index c12ebc55eaa9..4a26f8141531 100644
--- a/processing/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java
@@ -23,6 +23,7 @@
import com.google.common.primitives.Ints;
import org.apache.druid.math.expr.ExpressionValidationException;
import org.apache.druid.math.expr.Function;
+import org.apache.druid.math.expr.vector.simd.SimdSupportedBinaryOp;
public class VectorMathProcessors
{
@@ -300,7 +301,7 @@ public static final class Add extends SimpleVectorMathBivariateProcessorFactory
public Add()
{
- super(Long::sum, Double::sum, Double::sum, Double::sum);
+ super(Long::sum, Double::sum, Double::sum, Double::sum, SimdSupportedBinaryOp.ADD);
}
}
@@ -314,7 +315,8 @@ public Subtract()
(left, right) -> left - right,
(left, right) -> (double) left - right,
(left, right) -> left - (double) right,
- (left, right) -> left - right
+ (left, right) -> left - right,
+ SimdSupportedBinaryOp.SUB
);
}
}
@@ -325,7 +327,13 @@ public static final class Multiply extends SimpleVectorMathBivariateProcessorFac
public Multiply()
{
- super(Multiply::multiply, Multiply::multiply, Multiply::multiply, Multiply::multiply);
+ super(
+ Multiply::multiply,
+ Multiply::multiply,
+ Multiply::multiply,
+ Multiply::multiply,
+ SimdSupportedBinaryOp.MUL
+ );
}
private static long multiply(long x, long y)
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleAddProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleAddProcessor.java
new file mode 100644
index 000000000000..d476468cb076
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleAddProcessor.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoublesFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (double[], double[]) -> double[]} addition. The op is hardcoded to
+ * {@link DoubleVector#add} so the JIT statically resolves it to the platform's double-add intrinsic.
+ */
+public final class SimdDoubleDoubleAddProcessor extends SimdDoubleDoubleProcessor
+{
+ public SimdDoubleDoubleAddProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateDoublesFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ double[] leftInput,
+ double[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = SPECIES.length();
+ final int upperBound = SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i);
+ final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i);
+ va.add(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(SPECIES, rightNulls, i);
+ }
+ final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i);
+ final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i);
+ va.add(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleMulProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleMulProcessor.java
new file mode 100644
index 000000000000..56cf53e3309e
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleMulProcessor.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoublesFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (double[], double[]) -> double[]} multiplication. The op is hardcoded to
+ * {@link DoubleVector#mul} so the JIT statically resolves it to the platform's double-multiply intrinsic.
+ */
+public final class SimdDoubleDoubleMulProcessor extends SimdDoubleDoubleProcessor
+{
+ public SimdDoubleDoubleMulProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateDoublesFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ double[] leftInput,
+ double[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = SPECIES.length();
+ final int upperBound = SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i);
+ final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i);
+ va.mul(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(SPECIES, rightNulls, i);
+ }
+ final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i);
+ final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i);
+ va.mul(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleProcessor.java
new file mode 100644
index 000000000000..8f3eeebac2c8
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleProcessor.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.VectorSpecies;
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor;
+import org.apache.druid.math.expr.vector.ExprEvalDoubleVector;
+import org.apache.druid.math.expr.vector.ExprEvalVector;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoublesFunction;
+
+import javax.annotation.Nullable;
+
+/**
+ * Abstract base for SIMD processors that compute {@code (double[], double[]) -> double[]} ops. See
+ * {@link SimdLongLongProcessor} for the design rationale.
+ */
+abstract class SimdDoubleDoubleProcessor implements ExprVectorProcessor
+{
+ static final VectorSpecies SPECIES = DoubleVector.SPECIES_PREFERRED;
+
+ private final ExprVectorProcessor left;
+ private final ExprVectorProcessor right;
+ final DoubleBivariateDoublesFunction scalarFallback;
+ final double[] outValues;
+ final boolean[] outNulls;
+
+ protected SimdDoubleDoubleProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateDoublesFunction scalarFallback
+ )
+ {
+ this.left = CastToTypeVectorProcessor.cast(left, ExpressionType.DOUBLE);
+ this.right = CastToTypeVectorProcessor.cast(right, ExpressionType.DOUBLE);
+ this.scalarFallback = scalarFallback;
+ this.outValues = new double[this.left.maxVectorSize()];
+ this.outNulls = new boolean[this.left.maxVectorSize()];
+ }
+
+ @Override
+ public final ExprEvalVector evalVector(Expr.VectorInputBinding bindings)
+ {
+ final ExprEvalVector lhs = left.evalVector(bindings);
+ final ExprEvalVector rhs = right.evalVector(bindings);
+ processVector(
+ lhs.values(),
+ rhs.values(),
+ lhs.getNullVector(),
+ rhs.getNullVector(),
+ bindings.getCurrentVectorSize()
+ );
+ return new ExprEvalDoubleVector(outValues, outNulls);
+ }
+
+ protected abstract void processVector(
+ double[] leftInput,
+ double[] rightInput,
+ @Nullable boolean[] leftNulls,
+ @Nullable boolean[] rightNulls,
+ int currentSize
+ );
+
+ @Override
+ public final ExpressionType getOutputType()
+ {
+ return ExpressionType.DOUBLE;
+ }
+
+ @Override
+ public final int maxVectorSize()
+ {
+ return outValues.length;
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleSubProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleSubProcessor.java
new file mode 100644
index 000000000000..9f290240bce8
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleSubProcessor.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoublesFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (double[], double[]) -> double[]} subtraction. The op is hardcoded to
+ * {@link DoubleVector#sub} so the JIT statically resolves it to the platform's double-subtract intrinsic.
+ */
+public final class SimdDoubleDoubleSubProcessor extends SimdDoubleDoubleProcessor
+{
+ public SimdDoubleDoubleSubProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateDoublesFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ double[] leftInput,
+ double[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = SPECIES.length();
+ final int upperBound = SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i);
+ final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i);
+ va.sub(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(SPECIES, rightNulls, i);
+ }
+ final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i);
+ final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i);
+ va.sub(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongAddProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongAddProcessor.java
new file mode 100644
index 000000000000..5d1eb74f0d96
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongAddProcessor.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoubleLongFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (double[], long[]) -> double[]} addition. The op is hardcoded to
+ * {@link DoubleVector#add} so the JIT statically resolves it to the platform's double-add intrinsic.
+ */
+public final class SimdDoubleLongAddProcessor extends SimdDoubleLongProcessor
+{
+ public SimdDoubleLongAddProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateDoubleLongFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ double[] leftInput,
+ long[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = DOUBLE_SPECIES.length();
+ final int upperBound = DOUBLE_SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i);
+ final DoubleVector vb =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0);
+ va.add(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i);
+ }
+ final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i);
+ final DoubleVector vb =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0);
+ va.add(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongMulProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongMulProcessor.java
new file mode 100644
index 000000000000..b593799ee262
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongMulProcessor.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoubleLongFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (double[], long[]) -> double[]} multiplication. The op is hardcoded to
+ * {@link DoubleVector#mul} so the JIT statically resolves it to the platform's double-multiply intrinsic.
+ */
+public final class SimdDoubleLongMulProcessor extends SimdDoubleLongProcessor
+{
+ public SimdDoubleLongMulProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateDoubleLongFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ double[] leftInput,
+ long[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = DOUBLE_SPECIES.length();
+ final int upperBound = DOUBLE_SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i);
+ final DoubleVector vb =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0);
+ va.mul(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i);
+ }
+ final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i);
+ final DoubleVector vb =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0);
+ va.mul(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongProcessor.java
new file mode 100644
index 000000000000..e3e705d656a1
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongProcessor.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorSpecies;
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor;
+import org.apache.druid.math.expr.vector.ExprEvalDoubleVector;
+import org.apache.druid.math.expr.vector.ExprEvalVector;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoubleLongFunction;
+
+import javax.annotation.Nullable;
+
+/**
+ * Abstract base for SIMD processors that compute {@code (double[], long[]) -> double[]} ops. The long lane is
+ * widened to {@link DoubleVector} via {@code castShape(DoubleVector.SPECIES_PREFERRED, 0)} in each subclass's hot
+ * loop. See {@link SimdLongLongProcessor} for the design rationale.
+ */
+abstract class SimdDoubleLongProcessor implements ExprVectorProcessor
+{
+ static final VectorSpecies LONG_SPECIES = LongVector.SPECIES_PREFERRED;
+ static final VectorSpecies DOUBLE_SPECIES = DoubleVector.SPECIES_PREFERRED;
+
+ private final ExprVectorProcessor left;
+ private final ExprVectorProcessor right;
+ final DoubleBivariateDoubleLongFunction scalarFallback;
+ final double[] outValues;
+ final boolean[] outNulls;
+
+ protected SimdDoubleLongProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateDoubleLongFunction scalarFallback
+ )
+ {
+ this.left = CastToTypeVectorProcessor.cast(left, ExpressionType.DOUBLE);
+ this.right = CastToTypeVectorProcessor.cast(right, ExpressionType.LONG);
+ this.scalarFallback = scalarFallback;
+ this.outValues = new double[this.left.maxVectorSize()];
+ this.outNulls = new boolean[this.left.maxVectorSize()];
+ }
+
+ @Override
+ public final ExprEvalVector evalVector(Expr.VectorInputBinding bindings)
+ {
+ final ExprEvalVector lhs = left.evalVector(bindings);
+ final ExprEvalVector rhs = right.evalVector(bindings);
+ processVector(
+ lhs.values(),
+ rhs.values(),
+ lhs.getNullVector(),
+ rhs.getNullVector(),
+ bindings.getCurrentVectorSize()
+ );
+ return new ExprEvalDoubleVector(outValues, outNulls);
+ }
+
+ protected abstract void processVector(
+ double[] leftInput,
+ long[] rightInput,
+ @Nullable boolean[] leftNulls,
+ @Nullable boolean[] rightNulls,
+ int currentSize
+ );
+
+ @Override
+ public final ExpressionType getOutputType()
+ {
+ return ExpressionType.DOUBLE;
+ }
+
+ @Override
+ public final int maxVectorSize()
+ {
+ return outValues.length;
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongSubProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongSubProcessor.java
new file mode 100644
index 000000000000..97da5a718f60
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongSubProcessor.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoubleLongFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (double[], long[]) -> double[]} subtraction. The op is hardcoded to
+ * {@link DoubleVector#sub} so the JIT statically resolves it to the platform's double-subtract intrinsic.
+ */
+public final class SimdDoubleLongSubProcessor extends SimdDoubleLongProcessor
+{
+ public SimdDoubleLongSubProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateDoubleLongFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ double[] leftInput,
+ long[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = DOUBLE_SPECIES.length();
+ final int upperBound = DOUBLE_SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i);
+ final DoubleVector vb =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0);
+ va.sub(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i);
+ }
+ final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i);
+ final DoubleVector vb =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0);
+ va.sub(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleAddProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleAddProcessor.java
new file mode 100644
index 000000000000..bd077a05a023
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleAddProcessor.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateLongDoubleFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (long[], double[]) -> double[]} addition. The op is hardcoded to
+ * {@link DoubleVector#add} so the JIT statically resolves it to the platform's double-add intrinsic.
+ */
+public final class SimdLongDoubleAddProcessor extends SimdLongDoubleProcessor
+{
+ public SimdLongDoubleAddProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateLongDoubleFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ long[] leftInput,
+ double[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = DOUBLE_SPECIES.length();
+ final int upperBound = DOUBLE_SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final DoubleVector va =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0);
+ final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i);
+ va.add(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i);
+ }
+ final DoubleVector va =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0);
+ final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i);
+ va.add(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleMulProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleMulProcessor.java
new file mode 100644
index 000000000000..2d211e26b7e1
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleMulProcessor.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateLongDoubleFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (long[], double[]) -> double[]} multiplication. The op is hardcoded to
+ * {@link DoubleVector#mul} so the JIT statically resolves it to the platform's double-multiply intrinsic.
+ */
+public final class SimdLongDoubleMulProcessor extends SimdLongDoubleProcessor
+{
+ public SimdLongDoubleMulProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateLongDoubleFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ long[] leftInput,
+ double[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = DOUBLE_SPECIES.length();
+ final int upperBound = DOUBLE_SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final DoubleVector va =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0);
+ final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i);
+ va.mul(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i);
+ }
+ final DoubleVector va =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0);
+ final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i);
+ va.mul(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleProcessor.java
new file mode 100644
index 000000000000..366354f82b2e
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleProcessor.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorSpecies;
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor;
+import org.apache.druid.math.expr.vector.ExprEvalDoubleVector;
+import org.apache.druid.math.expr.vector.ExprEvalVector;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateLongDoubleFunction;
+
+import javax.annotation.Nullable;
+
+/**
+ * Abstract base for SIMD processors that compute {@code (long[], double[]) -> double[]} ops. The long lane is
+ * widened to {@link DoubleVector} via {@code castShape(DoubleVector.SPECIES_PREFERRED, 0)} in each subclass's hot
+ * loop. See {@link SimdLongLongProcessor} for the design rationale.
+ */
+abstract class SimdLongDoubleProcessor implements ExprVectorProcessor
+{
+ static final VectorSpecies LONG_SPECIES = LongVector.SPECIES_PREFERRED;
+ static final VectorSpecies DOUBLE_SPECIES = DoubleVector.SPECIES_PREFERRED;
+
+ private final ExprVectorProcessor left;
+ private final ExprVectorProcessor right;
+ final DoubleBivariateLongDoubleFunction scalarFallback;
+ final double[] outValues;
+ final boolean[] outNulls;
+
+ protected SimdLongDoubleProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateLongDoubleFunction scalarFallback
+ )
+ {
+ this.left = CastToTypeVectorProcessor.cast(left, ExpressionType.LONG);
+ this.right = CastToTypeVectorProcessor.cast(right, ExpressionType.DOUBLE);
+ this.scalarFallback = scalarFallback;
+ this.outValues = new double[this.left.maxVectorSize()];
+ this.outNulls = new boolean[this.left.maxVectorSize()];
+ }
+
+ @Override
+ public final ExprEvalVector evalVector(Expr.VectorInputBinding bindings)
+ {
+ final ExprEvalVector lhs = left.evalVector(bindings);
+ final ExprEvalVector rhs = right.evalVector(bindings);
+ processVector(
+ lhs.values(),
+ rhs.values(),
+ lhs.getNullVector(),
+ rhs.getNullVector(),
+ bindings.getCurrentVectorSize()
+ );
+ return new ExprEvalDoubleVector(outValues, outNulls);
+ }
+
+ protected abstract void processVector(
+ long[] leftInput,
+ double[] rightInput,
+ @Nullable boolean[] leftNulls,
+ @Nullable boolean[] rightNulls,
+ int currentSize
+ );
+
+ @Override
+ public final ExpressionType getOutputType()
+ {
+ return ExpressionType.DOUBLE;
+ }
+
+ @Override
+ public final int maxVectorSize()
+ {
+ return outValues.length;
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleSubProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleSubProcessor.java
new file mode 100644
index 000000000000..33c8602cf884
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleSubProcessor.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateLongDoubleFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (long[], double[]) -> double[]} subtraction. The op is hardcoded to
+ * {@link DoubleVector#sub} so the JIT statically resolves it to the platform's double-subtract intrinsic.
+ */
+public final class SimdLongDoubleSubProcessor extends SimdLongDoubleProcessor
+{
+ public SimdLongDoubleSubProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ DoubleBivariateLongDoubleFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ long[] leftInput,
+ double[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = DOUBLE_SPECIES.length();
+ final int upperBound = DOUBLE_SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final DoubleVector va =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0);
+ final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i);
+ va.sub(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i);
+ }
+ final DoubleVector va =
+ (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0);
+ final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i);
+ va.sub(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongAddProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongAddProcessor.java
new file mode 100644
index 000000000000..f5e0298af09e
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongAddProcessor.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.LongBivariateLongsFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (long[], long[]) -> long[]} addition. The op is hardcoded to {@link LongVector#add}
+ * so the JIT statically resolves it to the platform's long-add intrinsic.
+ */
+public final class SimdLongLongAddProcessor extends SimdLongLongProcessor
+{
+ public SimdLongLongAddProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ LongBivariateLongsFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ long[] leftInput,
+ long[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = SPECIES.length();
+ final int upperBound = SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final LongVector va = LongVector.fromArray(SPECIES, leftInput, i);
+ final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i);
+ va.add(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(SPECIES, rightNulls, i);
+ }
+ final LongVector va = LongVector.fromArray(SPECIES, leftInput, i);
+ final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i);
+ va.add(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongMulProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongMulProcessor.java
new file mode 100644
index 000000000000..32e8e8aa751b
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongMulProcessor.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.LongBivariateLongsFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (long[], long[]) -> long[]} multiplication. The op is hardcoded to
+ * {@link LongVector#mul} so the JIT statically resolves it to the platform's long-multiply intrinsic.
+ */
+public final class SimdLongLongMulProcessor extends SimdLongLongProcessor
+{
+ public SimdLongLongMulProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ LongBivariateLongsFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ long[] leftInput,
+ long[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = SPECIES.length();
+ final int upperBound = SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final LongVector va = LongVector.fromArray(SPECIES, leftInput, i);
+ final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i);
+ va.mul(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(SPECIES, rightNulls, i);
+ }
+ final LongVector va = LongVector.fromArray(SPECIES, leftInput, i);
+ final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i);
+ va.mul(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongProcessor.java
new file mode 100644
index 000000000000..999f4149fac2
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongProcessor.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorSpecies;
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor;
+import org.apache.druid.math.expr.vector.ExprEvalLongVector;
+import org.apache.druid.math.expr.vector.ExprEvalVector;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.LongBivariateLongsFunction;
+
+import javax.annotation.Nullable;
+
+/**
+ * Abstract base for SIMD processors that compute {@code (long[], long[]) -> long[]} ops. Each concrete subclass
+ * (one per op) overrides {@link #processVector} with a hot loop that calls a statically-resolved {@link LongVector}
+ * method (e.g. {@code va.add(vb)}) so the JIT emits the corresponding SIMD intrinsic.
+ */
+abstract class SimdLongLongProcessor implements ExprVectorProcessor
+{
+ static final VectorSpecies SPECIES = LongVector.SPECIES_PREFERRED;
+
+ private final ExprVectorProcessor left;
+ private final ExprVectorProcessor right;
+ final LongBivariateLongsFunction scalarFallback;
+ final long[] outValues;
+ final boolean[] outNulls;
+
+ protected SimdLongLongProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ LongBivariateLongsFunction scalarFallback
+ )
+ {
+ this.left = CastToTypeVectorProcessor.cast(left, ExpressionType.LONG);
+ this.right = CastToTypeVectorProcessor.cast(right, ExpressionType.LONG);
+ this.scalarFallback = scalarFallback;
+ this.outValues = new long[this.left.maxVectorSize()];
+ this.outNulls = new boolean[this.left.maxVectorSize()];
+ }
+
+ @Override
+ public final ExprEvalVector evalVector(Expr.VectorInputBinding bindings)
+ {
+ final ExprEvalVector lhs = left.evalVector(bindings);
+ final ExprEvalVector rhs = right.evalVector(bindings);
+ processVector(
+ lhs.values(),
+ rhs.values(),
+ lhs.getNullVector(),
+ rhs.getNullVector(),
+ bindings.getCurrentVectorSize()
+ );
+ return new ExprEvalLongVector(outValues, outNulls);
+ }
+
+ protected abstract void processVector(
+ long[] leftInput,
+ long[] rightInput,
+ @Nullable boolean[] leftNulls,
+ @Nullable boolean[] rightNulls,
+ int currentSize
+ );
+
+ @Override
+ public final ExpressionType getOutputType()
+ {
+ return ExpressionType.LONG;
+ }
+
+ @Override
+ public final int maxVectorSize()
+ {
+ return outValues.length;
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongSubProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongSubProcessor.java
new file mode 100644
index 000000000000..ab85396463dd
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongSubProcessor.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorMask;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.LongBivariateLongsFunction;
+
+import java.util.Arrays;
+
+/**
+ * SIMD specialization of {@code (long[], long[]) -> long[]} subtraction. The op is hardcoded to {@link LongVector#sub}
+ * so the JIT statically resolves it to the platform's long-subtract intrinsic.
+ */
+public final class SimdLongLongSubProcessor extends SimdLongLongProcessor
+{
+ public SimdLongLongSubProcessor(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ LongBivariateLongsFunction scalarFallback
+ )
+ {
+ super(left, right, scalarFallback);
+ }
+
+ @Override
+ protected void processVector(
+ long[] leftInput,
+ long[] rightInput,
+ boolean[] leftNulls,
+ boolean[] rightNulls,
+ int currentSize
+ )
+ {
+ final boolean hasLeftNulls = leftNulls != null;
+ final boolean hasRightNulls = rightNulls != null;
+ final int laneCount = SPECIES.length();
+ final int upperBound = SPECIES.loopBound(currentSize);
+ int i = 0;
+ if (!hasLeftNulls && !hasRightNulls) {
+ for (; i < upperBound; i += laneCount) {
+ final LongVector va = LongVector.fromArray(SPECIES, leftInput, i);
+ final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i);
+ va.sub(vb).intoArray(outValues, i);
+ }
+ for (; i < currentSize; i++) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ Arrays.fill(outNulls, 0, currentSize, false);
+ } else {
+ for (; i < upperBound; i += laneCount) {
+ final VectorMask nm;
+ if (hasLeftNulls && hasRightNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i)
+ .or(VectorMask.fromArray(SPECIES, rightNulls, i));
+ } else if (hasLeftNulls) {
+ nm = VectorMask.fromArray(SPECIES, leftNulls, i);
+ } else {
+ nm = VectorMask.fromArray(SPECIES, rightNulls, i);
+ }
+ final LongVector va = LongVector.fromArray(SPECIES, leftInput, i);
+ final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i);
+ va.sub(vb).intoArray(outValues, i);
+ nm.intoArray(outNulls, i);
+ }
+ for (; i < currentSize; i++) {
+ final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]);
+ outNulls[i] = isNull;
+ if (!isNull) {
+ outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdProcessors.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdProcessors.java
new file mode 100644
index 000000000000..d8d74021c7a0
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdProcessors.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr.vector.simd;
+
+import org.apache.druid.error.DruidException;
+import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoubleLongFunction;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoublesFunction;
+import org.apache.druid.math.expr.vector.functional.DoubleBivariateLongDoubleFunction;
+import org.apache.druid.math.expr.vector.functional.LongBivariateLongsFunction;
+
+/**
+ * Dispatch table from a {@link SimdSupportedBinaryOp} identifier to a concrete, op-specialized SIMD processor.
+ * One class per op and type-combo so the JIT sees a monomorphic call site for the SIMD operation in each hot loop.
+ */
+public final class SimdProcessors
+{
+ private SimdProcessors()
+ {
+ }
+
+ public static ExprVectorProcessor makeLongLong(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ SimdSupportedBinaryOp op,
+ LongBivariateLongsFunction scalarFallback
+ )
+ {
+ return switch (op) {
+ case ADD -> new SimdLongLongAddProcessor(left, right, scalarFallback);
+ case SUB -> new SimdLongLongSubProcessor(left, right, scalarFallback);
+ case MUL -> new SimdLongLongMulProcessor(left, right, scalarFallback);
+ default -> throw DruidException.defensive("Unsupported SIMD binary op[%s]", op);
+ };
+ }
+
+ public static ExprVectorProcessor makeDoubleDouble(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ SimdSupportedBinaryOp op,
+ DoubleBivariateDoublesFunction scalarFallback
+ )
+ {
+ return switch (op) {
+ case ADD -> new SimdDoubleDoubleAddProcessor(left, right, scalarFallback);
+ case SUB -> new SimdDoubleDoubleSubProcessor(left, right, scalarFallback);
+ case MUL -> new SimdDoubleDoubleMulProcessor(left, right, scalarFallback);
+ default -> throw DruidException.defensive("Unsupported SIMD binary op[%s]", op);
+ };
+ }
+
+ public static ExprVectorProcessor makeLongDouble(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ SimdSupportedBinaryOp op,
+ DoubleBivariateLongDoubleFunction scalarFallback
+ )
+ {
+ return switch (op) {
+ case ADD -> new SimdLongDoubleAddProcessor(left, right, scalarFallback);
+ case SUB -> new SimdLongDoubleSubProcessor(left, right, scalarFallback);
+ case MUL -> new SimdLongDoubleMulProcessor(left, right, scalarFallback);
+ default -> throw DruidException.defensive("Unsupported SIMD binary op[%s]", op);
+ };
+ }
+
+ public static ExprVectorProcessor makeDoubleLong(
+ ExprVectorProcessor> left,
+ ExprVectorProcessor> right,
+ SimdSupportedBinaryOp op,
+ DoubleBivariateDoubleLongFunction scalarFallback
+ )
+ {
+ return switch (op) {
+ case ADD -> new SimdDoubleLongAddProcessor(left, right, scalarFallback);
+ case SUB -> new SimdDoubleLongSubProcessor(left, right, scalarFallback);
+ case MUL -> new SimdDoubleLongMulProcessor(left, right, scalarFallback);
+ default -> throw DruidException.defensive("Unsupported SIMD binary op[%s]", op);
+ };
+ }
+}
diff --git a/server/src/main/java/org/apache/druid/server/initialization/CuratorDiscoveryConfig.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdSupportedBinaryOp.java
similarity index 55%
rename from server/src/main/java/org/apache/druid/server/initialization/CuratorDiscoveryConfig.java
rename to processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdSupportedBinaryOp.java
index c8c9ef95127c..953571ba4d36 100644
--- a/server/src/main/java/org/apache/druid/server/initialization/CuratorDiscoveryConfig.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdSupportedBinaryOp.java
@@ -17,24 +17,20 @@
* under the License.
*/
-package org.apache.druid.server.initialization;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
+package org.apache.druid.math.expr.vector.simd;
/**
+ * Identifies which binary math operations have a {@code jdk.incubator.vector} (SIMD) specialization. Used by
+ * {@link org.apache.druid.math.expr.vector.SimpleVectorMathBivariateProcessorFactory} subclasses to declare that
+ * their operation can be dispatched to a SIMD variant when the user enables
+ * {@link org.apache.druid.math.expr.ExpressionProcessingConfig#USE_VECTOR_API}.
+ *
+ * Deliberately does not reference any {@code jdk.incubator.vector} types so that callers wiring the enum into
+ * factories do not need the incubator module visible.
*/
-public class CuratorDiscoveryConfig
+public enum SimdSupportedBinaryOp
{
- @JsonProperty
- private String path = "/druid/discovery";
-
- public String getPath()
- {
- return path;
- }
-
- public boolean useDiscovery()
- {
- return path != null;
- }
+ ADD,
+ SUB,
+ MUL
}
diff --git a/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java b/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java
index 74f2ffc634a3..fc56504b90e9 100644
--- a/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java
+++ b/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java
@@ -79,7 +79,7 @@ public Sequence run(final QueryPlus queryPlus, final ResponseContext respo
{
Query query = queryPlus.getQuery();
final int priority = query.context().getPriority();
- final Ordering ordering = query.getResultOrdering();
+ final Ordering ordering = query.getResultOrdering();
final QueryPlus threadSafeQueryPlus = queryPlus.withoutThreadUnsafeState();
final QueryContext context = query.context();
@@ -91,7 +91,7 @@ public Sequence run(final QueryPlus queryPlus, final ResponseContext respo
@Override
public Iterator make()
{
- // Make it a List<> to materialize all of the values (so that it will submit everything to the executor)
+ // Make it a List<> to materialize all the values (so that it will submit everything to the executor)
List>> futures =
Lists.newArrayList(
Iterables.transform(
diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java
index 3235d709eee6..c901b52962fc 100644
--- a/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java
+++ b/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java
@@ -40,9 +40,11 @@
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.virtual.ExpressionPlan;
import org.apache.druid.segment.virtual.ExpressionPlanner;
import org.apache.druid.segment.virtual.ExpressionSelectors;
@@ -347,6 +349,86 @@ public Object combine(@Nullable Object lhs, @Nullable Object rhs)
).value();
}
+ @Override
+ public AggregateCombiner makeAggregateCombiner()
+ {
+ final ColumnType intermediateType = getIntermediateType();
+ // The combiner delegates to combine(), which feeds inputs into combineExpression typed against initialCombineValue.
+ // If the fold-side intermediate type (what's stored in the segment column) differs from the combine-side type,
+ // the primitive selector would silently feed wrong-typed values into the expression. Fall through to UOE.
+ if (!intermediateType.equals(ExpressionType.toColumnType(initialCombineValue.get().type()))) {
+ return super.makeAggregateCombiner();
+ }
+ if (intermediateType.is(ValueType.LONG)) {
+ return new LongAggregateCombiner()
+ {
+ private long state;
+ private boolean isNull;
+
+ @Override
+ public void reset(ColumnValueSelector selector)
+ {
+ state = selector.getLong();
+ isNull = selector.isNull();
+ }
+
+ @Override
+ public void fold(ColumnValueSelector selector)
+ {
+ final Object combined = combine(isNull ? null : state, selector.getObject());
+ isNull = combined == null;
+ state = combined == null ? 0L : ((Number) combined).longValue();
+ }
+
+ @Override
+ public long getLong()
+ {
+ return state;
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ return isNull;
+ }
+ };
+ } else if (intermediateType.is(ValueType.DOUBLE)) {
+ return new DoubleAggregateCombiner()
+ {
+ private double state;
+ private boolean isNull;
+
+ @Override
+ public void reset(ColumnValueSelector selector)
+ {
+ state = selector.getDouble();
+ isNull = selector.isNull();
+ }
+
+ @Override
+ public void fold(ColumnValueSelector selector)
+ {
+ final Object combined = combine(isNull ? null : state, selector.getObject());
+ isNull = combined == null;
+ state = combined == null ? 0.0 : ((Number) combined).doubleValue();
+ }
+
+ @Override
+ public double getDouble()
+ {
+ return state;
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ return isNull;
+ }
+ };
+ }
+ return super.makeAggregateCombiner();
+ }
+
@Override
public Object deserialize(Object object)
{
diff --git a/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java
index b5f67595ffa4..96668b306886 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java
@@ -24,10 +24,13 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.common.io.BaseEncoding;
import com.google.common.primitives.Chars;
+import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.segment.filter.LikeFilter;
@@ -154,6 +157,20 @@ public Filter toFilter()
@Override
public RangeSet getDimensionRangeSet(String dimension)
{
+ if (!this.dimension.equals(dimension) || extractionFn != null) {
+ return null;
+ }
+ final LikeDimFilter.LikeMatcher.SuffixMatch suffixMatch = likeMatcher.getSuffixMatch();
+ final String prefix = likeMatcher.getPrefix();
+ if (suffixMatch == LikeMatcher.SuffixMatch.MATCH_EMPTY) {
+ // The full pattern was a literal (no wildcards); LIKE acts as equality on `prefix`.
+ return ImmutableRangeSet.of(Range.singleton(prefix));
+ }
+ if (suffixMatch == LikeMatcher.SuffixMatch.MATCH_ANY) {
+ // LIKE 'prefix%' matches every string starting with `prefix`; bare LIKE '%' matches everything
+ return ImmutableRangeSet.of(prefix.isEmpty() ? Range.all() : prefixRange(prefix));
+ }
+ // mid-string wildcards aren't expressible as a single Range.
return null;
}
@@ -197,6 +214,42 @@ public String toString()
return builder.appendFilterTuning(filterTuning).build();
}
+ /**
+ * Range covering every string that starts with {@code prefix}
+ */
+ public static Range prefixRange(String prefix)
+ {
+ if (prefix.isEmpty()) {
+ throw DruidException.defensive("prefix is empty; use Range.all() explicitly for the match-everything case");
+ }
+ final String successor = lexicographicSuccessor(prefix);
+ return successor == null ? Range.atLeast(prefix) : Range.closedOpen(prefix, successor);
+ }
+
+ /**
+ * Smallest string strictly greater than {@code s} in lexicographic (UTF-16) order: increment the last
+ * non-{@link Character#MAX_VALUE} char and truncate everything after it. Returns {@code null} when {@code s}
+ * is a non-empty run of {@code MAX_VALUE} chars and the carry would overflow.
+ */
+ @Nullable
+ @VisibleForTesting
+ static String lexicographicSuccessor(String s)
+ {
+ if (s.isEmpty()) {
+ return "\u0000";
+ }
+ final char[] chars = s.toCharArray();
+ int i = chars.length - 1;
+ while (i >= 0 && chars[i] == Character.MAX_VALUE) {
+ i--;
+ }
+ if (i < 0) {
+ return null;
+ }
+ chars[i]++;
+ return new String(chars, 0, i + 1);
+ }
+
public static class LikeMatcher
{
public enum SuffixMatch
diff --git a/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java b/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java
index 7f1a37f61e63..f888bf87e473 100644
--- a/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java
+++ b/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java
@@ -48,6 +48,7 @@ public class SpecificSegmentQueryRunner implements QueryRunner
@VisibleForTesting
static final String CTX_SET_THREAD_NAME = "setProcessingThreadNames";
+ static final boolean DEFAULT_SET_THREAD_NAME_ENABLED = false;
public SpecificSegmentQueryRunner(
QueryRunner base,
@@ -68,7 +69,7 @@ public Sequence run(final QueryPlus input, final ResponseContext responseC
)
);
- final boolean setName = input.getQuery().context().getBoolean(CTX_SET_THREAD_NAME, true);
+ final boolean setName = input.getQuery().context().getBoolean(CTX_SET_THREAD_NAME, DEFAULT_SET_THREAD_NAME_ENABLED);
final Query query = queryPlus.getQuery();
diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java
index 1283b4475b44..50c68de30c35 100644
--- a/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java
+++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java
@@ -630,7 +630,7 @@ protected Metadata makeProjections(
final String section2 = "build projection[" + projectionSchema.getName() + "] inverted index and columns";
progress.startSection(section2);
- segmentFileBuilder.startFileGroup(projectionSchema.getName());
+ segmentFileBuilder.startFileBundle(projectionSchema.getName());
if (projectionSchema.getTimeColumnName() != null) {
makeTimeColumn(
segmentFileBuilder,
diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java
index 91d2661841ec..28d9ee56345c 100644
--- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java
+++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java
@@ -218,7 +218,7 @@ protected File makeIndexFiles(
/************ Create Inverted Indexes and Finalize Build Columns *************/
final String section = "build inverted index and columns";
progress.startSection(section);
- v10Smoosher.startFileGroup(Projections.BASE_TABLE_PROJECTION_NAME);
+ v10Smoosher.startFileBundle(Projections.BASE_TABLE_PROJECTION_NAME);
makeTimeColumn(v10Smoosher, progress, timeWriter, indexSpec, basePrefix + ColumnHolder.TIME_COLUMN_NAME);
makeMetricsColumns(
v10Smoosher,
diff --git a/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java b/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java
index c622ec756418..5db4914a464a 100644
--- a/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java
+++ b/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java
@@ -28,6 +28,7 @@
import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.io.Closer;
+import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.segment.data.CompressionStrategy;
import org.apache.druid.segment.loading.SegmentRangeReader;
import org.apache.druid.utils.CloseableUtils;
@@ -83,7 +84,13 @@
*/
public class PartialSegmentFileMapperV10 implements SegmentFileMapper
{
- static final String METADATA_HEADER_SUFFIX = ".header";
+ private static final Logger LOG = new Logger(PartialSegmentFileMapperV10.class);
+
+ /**
+ * Suffix appended to the target filename to form the local header file. Public so cache-manager components can
+ * recognize the partial-download on-disk layout during bootstrap restore and reservation cleanup.
+ */
+ public static final String METADATA_HEADER_SUFFIX = ".header";
/**
* Create (or restore) a lazy mapper for the main segment file with attached external file mappers. If persisted state
@@ -146,9 +153,16 @@ static PartialSegmentFileMapperV10 createForFile(
bitmapBuffer = mmapBitmap(headerFile, result);
}
catch (Exception e) {
- // corrupted file (partial write, truncated bitmap, bad JSON, etc.) — delete and re-fetch
+ // corrupted file (partial write, truncated bitmap, bad JSON, etc.), delete and re-fetch
result = null;
- headerFile.delete();
+ if (!headerFile.delete()) {
+ LOG.warn(
+ e,
+ "Failed to delete corrupted header file[%s] for [%s]; will be overwritten by re-fetch",
+ headerFile,
+ targetFilename
+ );
+ }
}
}
@@ -167,7 +181,32 @@ static PartialSegmentFileMapperV10 createForFile(
bitmapBuffer
);
- // restore downloaded files from the bitmap
+ // bitmap-vs-container repair pre-pass: if the bitmap claims a file is downloaded but its container file is
+ // missing on disk, the bitmap is lying (e.g. partial-cache eviction that cleared containers but couldn't atomically
+ // clear bits, or external file-system damage). Clear those bits before the restore loop so we don't spuriously
+ // sparse-allocate empty containers in the restore loop's ensureContainerInitialized call and treat their files as
+ // downloaded.
+ for (int i = 0; i < mapper.sortedFileNames.size(); i++) {
+ final int byteIndex = i / 8;
+ final int bitMask = 1 << (i % 8);
+ if ((bitmapBuffer.get(byteIndex) & bitMask) == 0) {
+ continue;
+ }
+ final String name = mapper.sortedFileNames.get(i);
+ final SegmentInternalFileMetadata fileMetadata = result.getMetadata().getFiles().get(name);
+ if (fileMetadata == null) {
+ continue;
+ }
+ final File containerFile = new File(
+ localCacheDir,
+ StringUtils.format("%s.container.%05d", targetFilename, fileMetadata.getContainer())
+ );
+ if (!containerFile.exists()) {
+ bitmapBuffer.put(byteIndex, (byte) (bitmapBuffer.get(byteIndex) & ~bitMask));
+ }
+ }
+
+ // restore downloaded files from the (now-repaired) bitmap
for (int i = 0; i < mapper.sortedFileNames.size(); i++) {
final int byteIndex = i / 8;
final int bitIndex = i % 8;
@@ -249,6 +288,57 @@ public SegmentFileMetadata getSegmentFileMetadata()
return metadata;
}
+ /**
+ * Names of the external segment files attached to this mapper (each one is its own {@link PartialSegmentFileMapperV10}
+ * accessible via {@link #getExternalMapper}). Empty for mappers with no externals.
+ */
+ public Set getExternalFilenames()
+ {
+ return externalMappers.keySet();
+ }
+
+ /**
+ * Look up the child mapper for an external segment file. Returns {@code null} if no external with that name is
+ * attached. Cache-layer callers use this to walk external files' {@link SegmentFileMetadata} and route
+ * {@link #initializeContainer} / {@link #evictContainer} calls to the right physical file.
+ */
+ @Nullable
+ public PartialSegmentFileMapperV10 getExternalMapper(String externalFilename)
+ {
+ return externalMappers.get(externalFilename);
+ }
+
+ /**
+ * Resolve {@code this} when {@code externalFilename} is null (main file), otherwise the named external child
+ * mapper. Throws if the external is not attached. Useful for routing container operations from cache-layer code
+ * that holds {@code (externalFilename, containerIndex)} refs.
+ */
+ public PartialSegmentFileMapperV10 mapperForContainer(@Nullable String externalFilename)
+ {
+ if (externalFilename == null) {
+ return this;
+ }
+ final PartialSegmentFileMapperV10 external = externalMappers.get(externalFilename);
+ if (external == null) {
+ throw DruidException.defensive(
+ "External mapper[%s] is not attached to this mapper for [%s]",
+ externalFilename,
+ targetFilename
+ );
+ }
+ return external;
+ }
+
+ /**
+ * The {@code targetFilename} this mapper writes/reads to/from inside the cache directory. For the entry-point
+ * mapper this is e.g. {@link org.apache.druid.segment.IndexIO#V10_FILE_NAME}; for an external child mapper it's
+ * the external file's name.
+ */
+ public String getTargetFilename()
+ {
+ return targetFilename;
+ }
+
@Override
public Set getInternalFilenames()
{
@@ -290,8 +380,8 @@ public ByteBuffer mapExternalFile(String filename, String name) throws IOExcepti
/**
* Pre-download a set of internal files so that subsequent {@link #mapFile(String)} calls for these files will not
- * trigger individual downloads. Files that are already downloaded are skipped. This is useful for batch-downloading
- * all files for a projection at once.
+ * trigger individual downloads. Files that are already downloaded are skipped. Useful for batch-downloading all
+ * files in a bundle at once (see {@link SegmentFileBuilder#startFileBundle}).
*/
public void ensureFilesAvailable(Set fileNames) throws IOException
{
@@ -303,6 +393,27 @@ public void ensureFilesAvailable(Set fileNames) throws IOException
}
}
+ /**
+ * Total on-disk size of the header file(s) backing this mapper, summed across the main file and any external file
+ * mappers. This is the actual reservation size that should be charged against the local cache once the metadata has
+ * been fetched and persisted; callers can compare it against an up-front pessimistic estimate to decide whether to
+ * shrink the reservation.
+ */
+ public long getOnDiskHeaderSize()
+ {
+ long total = headerFileSize(localCacheDir, targetFilename);
+ for (PartialSegmentFileMapperV10 ext : externalMappers.values()) {
+ total += headerFileSize(ext.localCacheDir, ext.targetFilename);
+ }
+ return total;
+ }
+
+ private static long headerFileSize(File dir, String filename)
+ {
+ final File header = new File(dir, filename + METADATA_HEADER_SUFFIX);
+ return header.exists() ? header.length() : 0;
+ }
+
/**
* Total bytes downloaded so far across all internal files, including external mappers.
*/
@@ -384,6 +495,104 @@ private void ensureFileDownloaded(String name, SegmentInternalFileMetadata fileM
}
}
+ /**
+ * Public entry point for cache-layer code that wants to ensure a container is materialized before any data is
+ * downloaded into it (e.g. when a per-bundle cache entry is mounted, the entry pre-allocates its container files
+ * so that subsequent {@link #mapFile} calls have somewhere to write into and the cache layer can charge the
+ * reservation up front).
+ */
+ public void initializeContainer(int containerIndex) throws IOException
+ {
+ checkClosed();
+ ensureContainerInitialized(containerIndex);
+ }
+
+ /**
+ * Reverse of {@link #initializeContainer(int)}: unmap the in-memory view of the container, delete the local
+ * container file, and clear the bitmap bits + {@link #downloadedFiles} entries for every internal file that lived
+ * in this container.
+ *
+ * Used by per-bundle cache entries on unmount/eviction to release the disk and memory footprint of one bundle
+ * without affecting other bundles sharing the same {@link PartialSegmentFileMapperV10}. After eviction, subsequent
+ * {@link #mapFile} calls for files in this container will re-trigger downloads via {@link #initializeContainer}
+ * and the bitmap will be repopulated incrementally.
+ *
+ * Concurrency contract. The caller is responsible for ensuring no concurrent {@link #mapFile} (or
+ * {@link #ensureFilesAvailable}) call is in flight for any file in this container. This is enforced one layer up
+ * by the cache-entry refcount: {@code PartialSegmentBundleCacheEntry} only invokes {@code evictContainer} from its
+ * {@code doActualUnmount} callback, which fires only after every reference acquired via {@code acquireReference()}
+ * has been closed. Bypassing that gate is dangerous, {@link ByteBufferUtils#unmap} frees the off-heap mapping, so a
+ * {@link ByteBuffer#slice} from a concurrent reader is a JVM SIGSEGV, not a recoverable error.
+ *
+ * No-op if the container has not been initialized.
+ */
+ public void evictContainer(int containerIndex)
+ {
+ checkClosed();
+ containerLocks[containerIndex].lock();
+ try {
+ final MappedByteBuffer existing = containers[containerIndex];
+ if (existing != null) {
+ ByteBufferUtils.unmap(existing);
+ containers[containerIndex] = null;
+ }
+ // Try the cached containerFiles[i] first. If it's null, the container was never initialized in this mapper
+ // instance (typical right after create() with an empty bitmap), but the on-disk file may still exist from a
+ // previous run. Fall back to the deterministic path so eviction is always effective.
+ File containerFile = containerFiles[containerIndex];
+ if (containerFile == null) {
+ containerFile = new File(
+ localCacheDir,
+ StringUtils.format("%s.container.%05d", targetFilename, containerIndex)
+ );
+ }
+ if (containerFile.exists() && !containerFile.delete()) {
+ LOG.warn(
+ "Failed to delete container file[%s] during eviction of container[%d] for [%s]; leaking on disk",
+ containerFile,
+ containerIndex,
+ targetFilename
+ );
+ }
+ containerFiles[containerIndex] = null;
+ }
+ finally {
+ containerLocks[containerIndex].unlock();
+ }
+
+ // clear bitmap bits + downloadedFiles entries for files that lived in this container. Iterates
+ // metadata.getFiles() without external synchronization: SegmentFileMetadata is constructed once at mapper
+ // creation and its file map is effectively immutable for the mapper's lifetime, so concurrent iteration is safe.
+ for (Map.Entry entry : metadata.getFiles().entrySet()) {
+ if (entry.getValue().getContainer() != containerIndex) {
+ continue;
+ }
+ final String fileName = entry.getKey();
+ if (downloadedFiles.remove(fileName)) {
+ downloadedBytes.addAndGet(-entry.getValue().getSize());
+ }
+ clearBitmapBit(fileName);
+ }
+ }
+
+ private void clearBitmapBit(String name)
+ {
+ final Integer index = fileNameToIndex.get(name);
+ if (index == null) {
+ return;
+ }
+ final int byteIndex = index / 8;
+ final int bitMask = 1 << (index % 8);
+ bitmapLock.lock();
+ try {
+ final byte existing = bitmapBuffer.get(byteIndex);
+ bitmapBuffer.put(byteIndex, (byte) (existing & ~bitMask));
+ }
+ finally {
+ bitmapLock.unlock();
+ }
+ }
+
/**
* Initialize a local container file if not already done. Creates a sparse file at the original container size
* and memory-maps it. The channel is closed immediately after mapping, the mmap persists independently, backed by
diff --git a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java
index 6d5aea47374c..d589213f6054 100644
--- a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java
+++ b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java
@@ -41,25 +41,33 @@
*/
public interface SegmentFileBuilder extends Closeable
{
+ /**
+ * Default bundle name for containers written without an explicit {@link #startFileBundle} call. Thinking of file
+ * bundles as directories, this is the root directory that sits above any named subdirectories the writer declares.
+ * Containers always carry a non-null bundle name; if the writer never calls {@code startFileBundle}, they are
+ * tagged with this default. Cache-layer readers treat all containers sharing this name as one mount/evict unit.
+ */
+ String ROOT_BUNDLE_NAME = "__root__";
+
/**
* Add a column to the metadata of this segment file
*/
void addColumn(String name, ColumnDescriptor columnDescriptor);
/**
- * Declare that subsequent writes belong to a named group of files that should be stored together. This is a hint
+ * Declare that subsequent writes belong to a named bundle of files that should be stored together. This is a hint
* about physical layout, it does not constrain the names of files subsequently added, and implementations are free
* to ignore it entirely (the default is a no-op for formats that don't organize data into coarse-grained
* groupings). Projections are the primary caller today, but the mechanism is generic, it's equally applicable to
* grouping internal metadata, data shared across columns, etc.
*
- * Callers should invoke this before writing each group's files; passing {@code null} clears the current group.
- * Callers should not invoke this while a writer returned by {@link #addWithChannel} is still open (implementations
- * may reject such calls).
+ * Callers should invoke this before writing each bundle's files; passing {@code null} resets the current bundle to
+ * the {@link #ROOT_BUNDLE_NAME} default. Callers should not invoke this while a writer returned by
+ * {@link #addWithChannel} is still open (implementations may reject such calls).
*
- * @see SegmentFileBuilderV10#startFileGroup(String) for the V10 semantics
+ * @see SegmentFileBuilderV10#startFileBundle(String) for the V10 semantics
*/
- default void startFileGroup(@Nullable String groupName)
+ default void startFileBundle(@Nullable String bundleName)
{
}
diff --git a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java
index 0b17960aa573..ba4a79cb0c8c 100644
--- a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java
+++ b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java
@@ -50,7 +50,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import java.util.Objects;
import java.util.TreeMap;
/**
@@ -61,20 +60,21 @@
* V10 file format:
* | version (byte) | meta compression (byte) | meta length (int) | meta json | container 0 | ... | container n |
*
- * Containers are scoped to at most one declared file group. Callers declare which group they are writing via
- * {@link #startFileGroup(String)} before writing its files; a new container is started when the declared group
- * changes or the current container would exceed {@link #maxContainerSize}. A group whose total size exceeds the max
- * container size spans multiple containers, all tagged with the same group. This gives readers a clean 1:1 (or 1:N)
- * mapping between groups and containers, which supports per-group partial loading without any read-side reorganization.
- * Projections are the primary caller today, but the mechanism is equally usable for other organizational needs
- * (shared data across columns, internal metadata, etc.).
+ * Containers are scoped to exactly one declared bundle. Callers declare which bundle they are writing via
+ * {@link #startFileBundle(String)} before writing its files; a new container is started when the declared bundle
+ * changes or the current container would exceed {@link #maxContainerSize}. A bundle whose total size exceeds the max
+ * container size spans multiple containers, all tagged with the same bundle. This gives readers a clean 1:1 (or 1:N)
+ * mapping between bundles and containers, which supports per-bundle partial loading without any read-side
+ * reorganization. Projections are the primary caller today, but the mechanism is equally usable for other
+ * organizational needs (shared data across columns, internal metadata, etc.).
*
- * Callers that never invoke {@link #startFileGroup(String)} are mapped to a null-group container.
+ * Callers that never invoke {@link #startFileBundle(String)} have all writes tagged with the
+ * {@link SegmentFileBuilder#ROOT_BUNDLE_NAME} default bundle.
*
* Much of the logic here was ported from {@link org.apache.druid.java.util.common.io.smoosh.FileSmoosher} of the V9
* format and there is a fair bit of overlap. In fact, the initial implementation of this class wrapped a V9 smoosher
* to build the files before combining them into the V10 format. The main difference is that V9 fills each container to
- * the max while here we organize with file groups.
+ * the max while here we organize with bundles.
*/
public class SegmentFileBuilderV10 implements SegmentFileBuilder
{
@@ -115,8 +115,8 @@ public static SegmentFileBuilderV10 create(ObjectMapper jsonMapper, File baseDir
// Nested addWithChannel calls (for example a serializer that, while being written, emits sub-files for its own
// columnar parts) can't write into the current container concurrently with the outer writer. These nested writes are
// redirected to temporary files and merged back into container(s) once the outer writer completes. Each entry
- // carries the file group that was active when the delegate was created so that the merge routes it into the
- // correct container even if the active group has since changed.
+ // carries the bundle that was active when the delegate was created so that the merge routes it into the correct
+ // container even if the active bundle has since changed.
private final List completedDelegates = new ArrayList<>();
private final List inProgressDelegates = new ArrayList<>();
private long delegateFileCounter = 0;
@@ -124,11 +124,11 @@ public static SegmentFileBuilderV10 create(ObjectMapper jsonMapper, File baseDir
@Nullable
private ContainerWriter currentContainer = null;
private boolean writerCurrentlyInUse = false;
- // The file group declared by the most recent {@link #startFileGroup} call. Writes are routed into containers
- // tagged with this group. Remains {@code null} if the caller never declares one, in which case all writes share
- // a single null-group container.
- @Nullable
- private String currentFileGroup = null;
+
+ /**
+ * The bundle declared by the most recent {@link #startFileBundle} call
+ */
+ private String currentBundle = SegmentFileBuilder.ROOT_BUNDLE_NAME;
@Nullable
private String interval = null;
@@ -189,7 +189,7 @@ public SegmentFileChannel addWithChannel(final String name, final long size) thr
if (internalFiles.containsKey(name)) {
throw new IAE("Cannot add files of the same name, already have [%s]", name);
}
- ensureNameMatchesActiveGroup(name);
+ ensureNameMatchesActiveBundle(name);
if (size > maxContainerSize) {
throw DruidException.forPersona(DruidException.Persona.ADMIN)
.ofCategory(DruidException.Category.RUNTIME_FAILURE)
@@ -207,7 +207,7 @@ public SegmentFileChannel addWithChannel(final String name, final long size) thr
return delegateChannel(name, size);
}
- ensureContainer(currentFileGroup, size);
+ ensureContainer(currentBundle, size);
final ContainerWriter target = currentContainer;
final long startOffset = target.currOffset;
writerCurrentlyInUse = true;
@@ -284,59 +284,69 @@ public SegmentFileBuilder getExternalBuilder(String externalFile)
{
return externalSegmentFileBuilders.computeIfAbsent(
externalFile,
- (k) -> new SegmentFileBuilderV10(jsonMapper, externalFile, baseDir, maxContainerSize, metadataCompression)
+ (k) -> {
+ final SegmentFileBuilderV10 fresh =
+ new SegmentFileBuilderV10(jsonMapper, externalFile, baseDir, maxContainerSize, metadataCompression);
+ // A late-attached external inherits the parent's currently-active bundle on creation only; subsequent
+ // bundle changes flow through the parent's startFileBundle broadcast. Re-applying on every fetch would
+ // close the external's in-progress container, since V10 bundles cannot currently be re-entered.
+ if (!SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(currentBundle)) {
+ fresh.startFileBundle(currentBundle);
+ }
+ return fresh;
+ }
);
}
@Override
public void addColumn(String name, ColumnDescriptor columnDescriptor)
{
- ensureNameMatchesActiveGroup(name);
+ ensureNameMatchesActiveBundle(name);
this.columns.put(name, columnDescriptor);
}
/**
- * If a file group is currently active (set by the most recent {@link #startFileGroup} call), enforce that names of
- * files and columns added under it are prefixed by {@code groupName + "/"}. Prevents silent collisions where two
- * groups write a file/column of the same bare name and the second silently overwrites the first in the metadata
- * maps. Existing production callers (e.g. {@code IndexMergerV10} via
- * {@code Projections.getProjectionSegmentInternalFileName}) already construct prefixed names, so this is a no-op
- * for them; it catches new writers that forget the convention.
+ * If a named bundle is currently active (set by the most recent {@link #startFileBundle} call to a non-root value),
+ * enforce that names of files and columns added under it are prefixed by {@code bundleName + "/"}. The root bundle
+ * is unconstrained.
*/
- private void ensureNameMatchesActiveGroup(String name)
+ private void ensureNameMatchesActiveBundle(String name)
{
- if (currentFileGroup != null && !name.startsWith(currentFileGroup + "/")) {
+ if (!SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(currentBundle) && !name.startsWith(currentBundle + "/")) {
throw DruidException.defensive(
- "Name[%s] must start with the active file group prefix[%s/]",
+ "Name[%s] must start with the active bundle prefix[%s/]",
name,
- currentFileGroup
+ currentBundle
);
}
}
/**
- * Declare the file group that subsequent writes belong to. Writes are routed into a container tagged with the
- * declared group; a new container is rolled when the group changes or the incoming file won't fit. A group whose
- * total size exceeds {@link #maxContainerSize} is split across multiple consecutive containers, all tagged with
- * the same group. Passing {@code null} clears the current group; subsequent writes are then routed into a
- * null-group container until the next call.
+ * Declare the bundle that subsequent writes belong to. Writes are routed into a container tagged with the declared
+ * bundle; a new container is rolled when the bundle changes or the incoming file won't fit. A bundle whose total
+ * size exceeds {@link #maxContainerSize} is split across multiple consecutive containers, all tagged with the same
+ * bundle. Passing {@code null} resets to {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}; subsequent writes are then
+ * routed into a root-bundle container until the next call.
*
* Current V10-specific limitations worth knowing:
*
- *
Groups cannot be re-entered. Once a different group (or {@code null}) has been declared, the previous
- * group's container is closed, and you cannot go back and append more files to it, any such writes would
- * open a fresh container for the re-declared group, so the group's files would end up in non-contiguous
- * containers. If all of a group's files must land in the same container(s), write them contiguously.
+ *
Bundles cannot be re-entered. Once a different bundle has been declared the previous bundle's container is
+ * closed, and you cannot go back and append more files to it; any such writes would open a fresh container
+ * for the re-declared bundle, so the bundle's files would end up in non-contiguous containers. If all of a
+ * bundle's files must land in the same container(s), write them contiguously.
*
Throws if called while a writer returned by {@link #addWithChannel} is still open.
*
*/
@Override
- public void startFileGroup(@Nullable String groupName)
+ public void startFileBundle(@Nullable String bundleName)
{
if (writerCurrentlyInUse) {
- throw DruidException.defensive("Cannot start file group[%s] while a writer is in progress", groupName);
+ throw DruidException.defensive("Cannot start file bundle[%s] while a writer is in progress", bundleName);
+ }
+ this.currentBundle = bundleName == null ? SegmentFileBuilder.ROOT_BUNDLE_NAME : bundleName;
+ for (SegmentFileBuilderV10 externalFile : externalSegmentFileBuilders.values()) {
+ externalFile.startFileBundle(bundleName);
}
- this.currentFileGroup = groupName;
}
public void addInterval(String interval)
@@ -464,35 +474,35 @@ private List buildContainerMetadata()
long offset = 0;
for (ContainerWriter container : containers) {
final long length = container.file.length();
- result.add(new SegmentFileContainerMetadata(offset, length, container.group));
+ result.add(new SegmentFileContainerMetadata(offset, length, container.bundle));
offset += length;
}
return result;
}
/**
- * Ensure that {@link #currentContainer} is ready to accept {@code size} bytes of a file belonging to {@code group}.
+ * Ensure that {@link #currentContainer} is ready to accept {@code size} bytes of a file belonging to {@code bundle}.
* Rolls the current container and starts a new one when:
*
*
there is no current container, or
- *
the current container is for a different group, or
+ *
the current container is for a different bundle, or
*
the current container cannot fit the incoming bytes within {@link #maxContainerSize}.
*
*/
- private void ensureContainer(@Nullable String group, long size) throws IOException
+ private void ensureContainer(String bundle, long size) throws IOException
{
if (currentContainer == null
- || !Objects.equals(currentContainer.group, group)
+ || !currentContainer.bundle.equals(bundle)
|| !currentContainer.canFit(size)) {
if (currentContainer != null) {
currentContainer.close();
}
- currentContainer = openNewContainer(group);
+ currentContainer = openNewContainer(bundle);
containers.add(currentContainer);
}
}
- private ContainerWriter openNewContainer(@Nullable String group) throws IOException
+ private ContainerWriter openNewContainer(String bundle) throws IOException
{
FileUtils.mkdirp(baseDir);
final int fileNum = containers.size();
@@ -500,7 +510,7 @@ private ContainerWriter openNewContainer(@Nullable String group) throws IOExcept
baseDir,
StringUtils.format("%s-%05d.container", outputFileName, fileNum)
);
- return new ContainerWriter(fileNum, containerFile, group, maxContainerSize);
+ return new ContainerWriter(fileNum, containerFile, bundle, maxContainerSize);
}
private SegmentFileChannel delegateChannel(final String name, final long size) throws IOException
@@ -509,9 +519,9 @@ private SegmentFileChannel delegateChannel(final String name, final long size) t
// cannot collide, since main and external always have distinct output file names.
final String delegateName = StringUtils.format("%s-delegate-%d", outputFileName, delegateFileCounter++);
final File tmpFile = new File(baseDir, delegateName);
- // Snapshot the active group now so that if this delegate is merged after the outer writer has advanced past
- // the group it was created under, it still routes into the correct container.
- final DelegateEntry entry = new DelegateEntry(tmpFile, name, currentFileGroup);
+ // Snapshot the active bundle now so that if this delegate is merged after the outer writer has advanced past
+ // the bundle it was created under, it still routes into the correct container.
+ final DelegateEntry entry = new DelegateEntry(tmpFile, name, currentBundle);
inProgressDelegates.add(entry);
return new SegmentFileChannel()
@@ -576,9 +586,9 @@ public void close() throws IOException
/**
* Move completed delegate temp files into containers by replaying them as regular {@link #add} calls. Only called
- * when no outer writer is currently holding the builder. Each entry's snapshotted group is restored as
- * {@link #currentFileGroup} during its replay so the file lands in the container that was active when the
- * nested write was originally requested, not whichever group happens to be active at merge time.
+ * when no outer writer is currently holding the builder. Each entry's snapshotted bundle is restored as
+ * {@link #currentBundle} during its replay so the file lands in the container that was active when the nested
+ * write was originally requested, not whichever bundle happens to be active at merge time.
*/
private void mergeDelegatedFiles() throws IOException
{
@@ -587,10 +597,10 @@ private void mergeDelegatedFiles() throws IOException
}
final List toProcess = new ArrayList<>(completedDelegates);
completedDelegates.clear();
- final String savedGroup = currentFileGroup;
+ final String savedBundle = currentBundle;
try {
for (DelegateEntry entry : toProcess) {
- currentFileGroup = entry.group;
+ currentBundle = entry.bundle;
add(entry.name, entry.file);
if (!entry.file.delete()) {
LOG.warn("Unable to delete delegate file[%s]", entry.file);
@@ -598,33 +608,32 @@ private void mergeDelegatedFiles() throws IOException
}
}
finally {
- currentFileGroup = savedGroup;
+ currentBundle = savedBundle;
}
}
- private record DelegateEntry(File file, String name, @Nullable String group)
+ private record DelegateEntry(File file, String name, String bundle)
{
}
/**
- * Low-level writer for a single container chunk file. One container holds internal files from at most one group.
+ * Low-level writer for a single container chunk file. One container holds internal files from exactly one bundle.
*/
private static class ContainerWriter implements GatheringByteChannel
{
private final int fileNum;
private final File file;
- @Nullable
- private final String group;
+ private final String bundle;
private final long maxSize;
private final Closer closer = Closer.create();
private final GatheringByteChannel channel;
private long currOffset = 0;
- ContainerWriter(int fileNum, File file, @Nullable String group, long maxSize) throws IOException
+ ContainerWriter(int fileNum, File file, String bundle, long maxSize) throws IOException
{
this.fileNum = fileNum;
this.file = file;
- this.group = group;
+ this.bundle = bundle;
this.maxSize = maxSize;
final FileOutputStream outStream = closer.register(new FileOutputStream(file));
this.channel = closer.register(outStream.getChannel());
@@ -675,9 +684,9 @@ public void close() throws IOException
closer.close();
if (LOG.isDebugEnabled()) {
LOG.debug(
- "Created container file[%s] for group[%s] of size[%,d] bytes.",
+ "Created container file[%s] for bundle[%s] of size[%,d] bytes.",
file.getAbsolutePath(),
- group,
+ bundle,
file.length()
);
}
diff --git a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java
index 3739eb7718b8..45005a5a9f4d 100644
--- a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java
+++ b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java
@@ -22,6 +22,7 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.druid.annotations.SuppressFBWarnings;
import javax.annotation.Nullable;
import java.util.Objects;
@@ -30,30 +31,29 @@
* Starting offset and size of a 'container' stored in a V10 segment file; think the V10 equivalent of V9's external
* 'smoosh' files, e.g. 00000.smoosh.
*
- * Each container holds internal files belonging to at most one named file group, as declared at write time via
- * {@link SegmentFileBuilder#startFileGroup}. The {@link #fileGroup} field records that name so readers can attribute
- * a container to its group without parsing internal-file names. The field is {@code null} for containers written
- * without a {@code startFileGroup} call (or with {@code startFileGroup(null)}), and for containers from segments
- * produced by writers that pre-date this field; null serializes as a Jackson-omitted property so old segments
- * round-trip unchanged.
+ * Each container holds internal files belonging to exactly one named bundle, as declared at write time via
+ * {@link SegmentFileBuilder#startFileBundle}. The {@link #bundle} field records that name so readers can attribute a
+ * container to its bundle without parsing internal-file names. Containers written without an explicit
+ * {@code startFileBundle} call are tagged with {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}; that default value is
+ * omitted from JSON output, so segments produced by writers pre-dating this field deserialize cleanly (missing
+ * property normalizes to the default in the constructor).
*/
public class SegmentFileContainerMetadata
{
private final long startOffset;
private final long size;
- @Nullable
- private final String fileGroup;
+ private final String bundle;
@JsonCreator
public SegmentFileContainerMetadata(
@JsonProperty("startOffset") long startOffset,
@JsonProperty("size") long size,
- @JsonProperty("fileGroup") @Nullable String fileGroup
+ @JsonProperty("bundle") @Nullable String bundle
)
{
this.startOffset = startOffset;
this.size = size;
- this.fileGroup = fileGroup;
+ this.bundle = bundle == null ? SegmentFileBuilder.ROOT_BUNDLE_NAME : bundle;
}
@JsonProperty
@@ -69,11 +69,10 @@ public long getSize()
}
@JsonProperty
- @JsonInclude(JsonInclude.Include.NON_NULL)
- @Nullable
- public String getFileGroup()
+ @JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = DefaultBundleFilter.class)
+ public String getBundle()
{
- return fileGroup;
+ return bundle;
}
@Override
@@ -88,13 +87,13 @@ public boolean equals(Object o)
SegmentFileContainerMetadata that = (SegmentFileContainerMetadata) o;
return startOffset == that.startOffset
&& size == that.size
- && Objects.equals(fileGroup, that.fileGroup);
+ && Objects.equals(bundle, that.bundle);
}
@Override
public int hashCode()
{
- return Objects.hash(startOffset, size, fileGroup);
+ return Objects.hash(startOffset, size, bundle);
}
@Override
@@ -103,7 +102,32 @@ public String toString()
return "SegmentFileContainerMetadata{"
+ "startOffset=" + startOffset
+ ", size=" + size
- + ", fileGroup=" + fileGroup
+ + ", bundle=" + bundle
+ '}';
}
+
+ /**
+ * Jackson {@code valueFilter} that omits the {@code bundle} field from JSON when it carries the
+ * {@link SegmentFileBuilder#ROOT_BUNDLE_NAME} default. Jackson invokes {@code equals(value)} against the filter
+ * instance with the property value (a {@link String} here, not another filter): returning {@code true} means
+ * "value equals default, omit it." The asymmetric equals contract is intentional and required by Jackson's filter
+ * API, so the standard same-class check would defeat the mechanism.
+ */
+ static final class DefaultBundleFilter
+ {
+
+ @Override
+ @SuppressWarnings("EqualsDoesntCheckParameterClass")
+ @SuppressFBWarnings("EQ_CHECK_FOR_OPERAND_NOT_COMPATIBLE_WITH_THIS")
+ public boolean equals(Object value)
+ {
+ return SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(value);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return 0;
+ }
+ }
}
diff --git a/processing/src/test/java/org/apache/druid/guice/StartupInjectorBuilderTest.java b/processing/src/test/java/org/apache/druid/guice/StartupInjectorBuilderTest.java
index ece5e78cbf72..c8264ec85231 100644
--- a/processing/src/test/java/org/apache/druid/guice/StartupInjectorBuilderTest.java
+++ b/processing/src/test/java/org/apache/druid/guice/StartupInjectorBuilderTest.java
@@ -215,6 +215,34 @@ public void testValidator_acceptsHttpServerViewType()
new StartupInjectorBuilder().withExtensions().withProperties(props).build();
}
+ @Test
+ public void testValidator_rejectsRemoteIndexerRunnerType()
+ {
+ final Properties props = new Properties();
+ props.setProperty(StartupInjectorBuilder.INDEXER_RUNNER_TYPE_CONFIG_STRING, "remote");
+
+ final StartupInjectorBuilder builder = new StartupInjectorBuilder().withExtensions().withProperties(props);
+
+ Throwable t = Assertions.assertThrows(ISE.class, builder::build);
+ Assertions.assertEquals(
+ "Invalid value[remote] for property[druid.indexer.runner.type]. The ZooKeeper-based"
+ + " 'remote' task runner has been removed. Remove this property to use the default"
+ + " 'httpRemote' runner (or set it to 'local' for single-process testing). See the"
+ + " Druid upgrade notes for details.",
+ t.getMessage()
+ );
+ }
+
+ @Test
+ public void testValidator_acceptsHttpRemoteIndexerRunnerType()
+ {
+ final Properties props = new Properties();
+ props.setProperty(StartupInjectorBuilder.INDEXER_RUNNER_TYPE_CONFIG_STRING, "httpRemote");
+
+ // Should not throw
+ new StartupInjectorBuilder().withExtensions().withProperties(props).build();
+ }
+
@Test
public void verifyInjectorBuild_withDeletedConfig_throwsException()
{
diff --git a/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyTest.java b/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyTest.java
index 7f21e489f3d8..9ffe5da3ace8 100644
--- a/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyTest.java
+++ b/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyTest.java
@@ -67,7 +67,7 @@ public class VectorExprResultConsistencyTest extends InitializedNullHandlingTest
{
private static final Logger log = new Logger(VectorExprResultConsistencyTest.class);
private static final int NUM_ITERATIONS = 10;
- private static final int VECTOR_SIZE = 4;
+ private static final List VECTOR_SIZES = List.of(3, 8, 17, 67);
private static final Map LOOKUP = Map.of(
@@ -764,16 +764,18 @@ public static void testExpressionSequentialBindings(
final int numIterations
)
{
- for (int iter = 0; iter < numIterations; iter++) {
- assertEvalsMatch(
- expr,
- parsed,
- makeSequentialBinding(
- VECTOR_SIZE,
- types,
- -2 + (iter * VECTOR_SIZE) // include negative numbers and zero
- )
- );
+ for (int vectorSize : VECTOR_SIZES) {
+ for (int iter = 0; iter < numIterations; iter++) {
+ assertEvalsMatch(
+ expr,
+ parsed,
+ makeSequentialBinding(
+ vectorSize,
+ types,
+ -2 + (iter * vectorSize) // include negative numbers and zero
+ )
+ );
+ }
}
}
@@ -784,8 +786,10 @@ public static void testExpressionRandomizedBindings(
final int numIterations
)
{
- for (int iterations = 0; iterations < numIterations; iterations++) {
- assertEvalsMatch(expr, parsed, makeRandomizedBindings(VECTOR_SIZE, types));
+ for (int vectorSize : VECTOR_SIZES) {
+ for (int iterations = 0; iterations < numIterations; iterations++) {
+ assertEvalsMatch(expr, parsed, makeRandomizedBindings(vectorSize, types));
+ }
}
}
@@ -808,7 +812,8 @@ public static void assertEvalsMatch(
);
if (vectorEval.isValue() && nonVectorEval.isValue()) {
- for (int i = 0; i < VECTOR_SIZE; i++) {
+ final int vectorSize = bindings.lhs.length;
+ for (int i = 0; i < vectorSize; i++) {
final String message = StringUtils.format(
"Values do not match for row[%s] for expression[%s], bindings[%s]",
i,
@@ -1000,9 +1005,9 @@ private static Either evalNonVector(
@Nullable ExpressionType outputType
)
{
- final Object[] exprValues = new Object[VECTOR_SIZE];
+ final Object[] exprValues = new Object[bindings.length];
- for (int i = 0; i < VECTOR_SIZE; i++) {
+ for (int i = 0; i < bindings.length; i++) {
ExprEval> eval;
try {
eval = expr.eval(bindings[i]);
diff --git a/server/src/main/java/org/apache/druid/segment/realtime/NoopChatHandlerProvider.java b/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyVectorApiTest.java
similarity index 58%
rename from server/src/main/java/org/apache/druid/segment/realtime/NoopChatHandlerProvider.java
rename to processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyVectorApiTest.java
index 9480bdf2bc7e..a14190796a27 100644
--- a/server/src/main/java/org/apache/druid/segment/realtime/NoopChatHandlerProvider.java
+++ b/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyVectorApiTest.java
@@ -17,35 +17,26 @@
* under the License.
*/
-package org.apache.druid.segment.realtime;
+package org.apache.druid.math.expr;
-import com.google.common.base.Optional;
+import org.junit.After;
+import org.junit.Before;
/**
+ * Re-runs every {@link VectorExprResultConsistencyTest} case with the SIMD ({@code jdk.incubator.vector}) expression
+ * vector processors enabled, ensuring the SIMD specializations agree with the non-vectorized reference.
*/
-public class NoopChatHandlerProvider implements ChatHandlerProvider
+public class VectorExprResultConsistencyVectorApiTest extends VectorExprResultConsistencyTest
{
- @Override
- public void register(String key, ChatHandler handler)
+ @Before
+ public void enableVectorApi()
{
- // do nothing
+ ExpressionProcessing.initializeForVectorApiTests();
}
- @Override
- public void register(String key, ChatHandler handler, boolean announce)
+ @After
+ public void resetExpressionProcessing()
{
- // do nothing
- }
-
- @Override
- public void unregister(String key)
- {
- // do nothing
- }
-
- @Override
- public Optional get(String key)
- {
- return Optional.absent();
+ ExpressionProcessing.initializeForTests();
}
}
diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregationTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregationTest.java
new file mode 100644
index 000000000000..baef2de3a0a2
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregationTest.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
+import org.apache.druid.data.input.InputRow;
+import org.apache.druid.data.input.MapBasedInputRow;
+import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.data.input.impl.StringDimensionSchema;
+import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.granularity.Granularities;
+import org.apache.druid.java.util.common.guava.Sequence;
+import org.apache.druid.query.Druids;
+import org.apache.druid.query.Result;
+import org.apache.druid.query.expression.TestExprMacroTable;
+import org.apache.druid.query.timeseries.TimeseriesQuery;
+import org.apache.druid.query.timeseries.TimeseriesResultValue;
+import org.apache.druid.segment.IndexBuilder;
+import org.apache.druid.segment.QueryableIndex;
+import org.apache.druid.segment.QueryableIndexSegment;
+import org.apache.druid.segment.Segment;
+import org.apache.druid.segment.incremental.IncrementalIndexSchema;
+import org.apache.druid.testing.InitializedNullHandlingTest;
+import org.apache.druid.timeline.SegmentId;
+import org.apache.druid.utils.CloseableUtils;
+import org.joda.time.DateTime;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Verifies that {@link ExpressionLambdaAggregatorFactory} can be used as an ingest-time metric for primitive numeric
+ * types.
+ */
+public class ExpressionLambdaAggregationTest extends InitializedNullHandlingTest
+{
+ private static final String DIM = "groupKey";
+ private static final String LONG_FIELD = "longField";
+ private static final String DOUBLE_FIELD = "doubleField";
+ private static final DateTime TIMESTAMP = DateTimes.of("2020-01-01");
+
+ @Rule
+ public final TemporaryFolder tempFolder = new TemporaryFolder();
+
+ private QueryableIndex mergedIndex;
+ private Segment segment;
+
+ @After
+ public void tearDown()
+ {
+ if (segment != null) {
+ CloseableUtils.closeAndWrapExceptions(segment);
+ }
+ if (mergedIndex != null) {
+ CloseableUtils.closeAndWrapExceptions(mergedIndex);
+ }
+ }
+
+ @Test
+ public void testNumericExpressionLambdaIngestRollupViaMerge() throws Exception
+ {
+ // Three rows sharing the same (timestamp, dim) so they roll up into a single output row during merge.
+ // longField values: 1 (0b001), 2 (0b010), 4 (0b100) -> sum=7, bitwiseOr=7
+ // doubleField values: 1.5, 2.0, 0.25 -> sum=3.75
+ final List rows = List.of(
+ row(1L, 1.5),
+ row(2L, 2.0),
+ row(4L, 0.25)
+ );
+
+ final ExpressionLambdaAggregatorFactory longSum = new ExpressionLambdaAggregatorFactory(
+ "long_sum",
+ Set.of(LONG_FIELD),
+ null,
+ "0",
+ null,
+ null,
+ false,
+ false,
+ "__acc + " + LONG_FIELD,
+ null,
+ null,
+ null,
+ null,
+ TestExprMacroTable.INSTANCE
+ );
+
+ // BitwiseSqlAggregator-style: same single-field, op("__acc", field) fold
+ final ExpressionLambdaAggregatorFactory bitwiseOr = new ExpressionLambdaAggregatorFactory(
+ "bitwise_or",
+ ImmutableSet.of(LONG_FIELD),
+ null,
+ "0",
+ null,
+ null,
+ false,
+ false,
+ "bitwiseOr(\"__acc\", \"" + LONG_FIELD + "\")",
+ null,
+ null,
+ null,
+ null,
+ TestExprMacroTable.INSTANCE
+ );
+
+ final ExpressionLambdaAggregatorFactory doubleSum = new ExpressionLambdaAggregatorFactory(
+ "double_sum",
+ ImmutableSet.of(DOUBLE_FIELD),
+ null,
+ "0.0",
+ null,
+ null,
+ false,
+ false,
+ "__acc + " + DOUBLE_FIELD,
+ null,
+ null,
+ null,
+ null,
+ TestExprMacroTable.INSTANCE
+ );
+
+ final IncrementalIndexSchema schema = IncrementalIndexSchema.builder()
+ .withQueryGranularity(Granularities.NONE)
+ .withRollup(true)
+ .withDimensionsSpec(
+ DimensionsSpec.builder()
+ .setDimensions(ImmutableList.of(new StringDimensionSchema(DIM)))
+ .build()
+ )
+ .withMetrics(
+ new CountAggregatorFactory("count"),
+ longSum,
+ bitwiseOr,
+ doubleSum
+ )
+ .build();
+
+ mergedIndex = IndexBuilder.create()
+ .tmpDir(tempFolder.newFolder())
+ .schema(schema)
+ .intermediaryPersistSize(1)
+ .rows(rows)
+ .buildMMappedMergedIndex();
+
+ segment = new QueryableIndexSegment(mergedIndex, SegmentId.dummy("test"));
+
+ final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
+ .dataSource("test")
+ .granularity(Granularities.ALL)
+ .intervals("1970/2050")
+ .aggregators(
+ new LongSumAggregatorFactory("count", "count"),
+ longSum.getCombiningFactory(),
+ bitwiseOr.getCombiningFactory(),
+ doubleSum.getCombiningFactory()
+ )
+ .build();
+
+ try (final AggregationTestHelper helper =
+ AggregationTestHelper.createTimeseriesQueryAggregationTestHelper(Collections.emptyList(), tempFolder)) {
+
+ final Sequence> seq = helper.runQueryOnSegmentsObjs(
+ ImmutableList.of(segment),
+ query
+ );
+ final TimeseriesResultValue result = Iterables.getOnlyElement(seq.toList()).getValue();
+
+ // Three input rows rolled up into one, count reflects rollup happened
+ Assert.assertEquals(3L, result.getLongMetric("count").longValue());
+ Assert.assertEquals(7L, result.getLongMetric("long_sum").longValue());
+ Assert.assertEquals(7L, result.getLongMetric("bitwise_or").longValue());
+ Assert.assertEquals(3.75, result.getDoubleMetric("double_sum").doubleValue(), 0.0);
+ }
+ }
+
+ private static InputRow row(long longVal, double doubleVal)
+ {
+ return new MapBasedInputRow(
+ TIMESTAMP,
+ ImmutableList.of(DIM),
+ ImmutableMap.of(
+ DIM, "a",
+ LONG_FIELD, longVal,
+ DOUBLE_FIELD, doubleVal
+ )
+ );
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java
index 499bcef08fe2..29bf850d3d44 100644
--- a/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java
+++ b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java
@@ -24,24 +24,31 @@
import com.google.common.collect.ImmutableSet;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.druid.java.util.common.HumanReadableBytes;
+import org.apache.druid.java.util.common.UOE;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.query.Druids;
import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory;
import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator;
import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator;
import org.apache.druid.query.expression.TestExprMacroTable;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.query.timeseries.TimeseriesQuery;
import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest;
+import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.selector.TestColumnValueSelector;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
+import javax.annotation.Nullable;
import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
public class ExpressionLambdaAggregatorFactoryTest extends InitializedNullHandlingTest
{
@@ -545,6 +552,303 @@ public void testComplexTypeFinalized()
Assert.assertEquals(ColumnType.DOUBLE, agg.getResultType());
}
+ @Test
+ public void testLongAggregateCombiner()
+ {
+ ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory(
+ "expr_agg_name",
+ ImmutableSet.of("x"),
+ null,
+ "0",
+ null,
+ true,
+ false,
+ false,
+ "__acc + x",
+ null,
+ null,
+ null,
+ null,
+ TestExprMacroTable.INSTANCE
+ );
+
+ AggregateCombiner combiner = agg.makeAggregateCombiner();
+ TestColumnValueSelector selector = TestColumnValueSelector.of(
+ Long.class,
+ Arrays.asList(1L, 2L, 3L)
+ );
+ selector.advance();
+ combiner.reset(selector);
+ Assert.assertEquals(1L, combiner.getLong());
+
+ selector.advance();
+ combiner.fold(selector);
+ Assert.assertEquals(3L, combiner.getLong());
+
+ selector.advance();
+ combiner.fold(selector);
+ Assert.assertEquals(6L, combiner.getLong());
+ }
+
+ @Test
+ public void testDoubleAggregateCombiner()
+ {
+ ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory(
+ "expr_agg_name",
+ ImmutableSet.of("x"),
+ null,
+ "0.0",
+ null,
+ true,
+ false,
+ false,
+ "__acc + x",
+ null,
+ null,
+ null,
+ null,
+ TestExprMacroTable.INSTANCE
+ );
+
+ AggregateCombiner combiner = agg.makeAggregateCombiner();
+ TestColumnValueSelector selector = TestColumnValueSelector.of(
+ Double.class,
+ Arrays.asList(1.5, 2.25, 0.25)
+ );
+ selector.advance();
+ combiner.reset(selector);
+ Assert.assertEquals(1.5, combiner.getDouble(), 0.0);
+
+ selector.advance();
+ combiner.fold(selector);
+ Assert.assertEquals(3.75, combiner.getDouble(), 0.0);
+
+ selector.advance();
+ combiner.fold(selector);
+ Assert.assertEquals(4.0, combiner.getDouble(), 0.0);
+ }
+
+ @Test
+ public void testNullableAggregateCombinerSkipsNulls()
+ {
+ ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory(
+ "expr_agg_name",
+ ImmutableSet.of("x"),
+ null,
+ "0",
+ null,
+ true,
+ false,
+ false,
+ "__acc + x",
+ null,
+ null,
+ null,
+ null,
+ TestExprMacroTable.INSTANCE
+ );
+
+ AggregateCombiner combiner = agg.makeNullableAggregateCombiner();
+ NullableLongSelector selector = new NullableLongSelector(Arrays.asList(null, 5L, null, 7L));
+ selector.advance();
+ combiner.reset(selector);
+ Assert.assertTrue(combiner.isNull());
+
+ selector.advance();
+ combiner.fold(selector);
+ Assert.assertFalse(combiner.isNull());
+ Assert.assertEquals(5L, combiner.getLong());
+
+ selector.advance();
+ combiner.fold(selector);
+ Assert.assertEquals(5L, combiner.getLong());
+
+ selector.advance();
+ combiner.fold(selector);
+ Assert.assertEquals(12L, combiner.getLong());
+ }
+
+ @Test
+ public void testNullableAggregateCombinerWhenCombineAggregatesNullsExpressionSeesNulls()
+ {
+ // shouldCombineAggregateNullInputs=true means the combine expression sees null inputs directly. The expression
+ // itself is responsible for handling them; here `nvl` coalesces nulls to 0 so the accumulator keeps advancing.
+ ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory(
+ "expr_agg_name",
+ ImmutableSet.of("x"),
+ null,
+ "0",
+ null,
+ true,
+ true,
+ true,
+ "nvl(__acc, 0) + nvl(x, 0)",
+ null,
+ null,
+ null,
+ null,
+ TestExprMacroTable.INSTANCE
+ );
+
+ AggregateCombiner combiner = agg.makeNullableAggregateCombiner();
+ NullableLongSelector selector = new NullableLongSelector(Arrays.asList(1L, null, 3L));
+ selector.advance();
+ combiner.reset(selector);
+ Assert.assertEquals(1L, combiner.getLong());
+
+ // null is passed through to the expression, which coalesces to 0
+ selector.advance();
+ combiner.fold(selector);
+ Assert.assertEquals(1L, combiner.getLong());
+
+ selector.advance();
+ combiner.fold(selector);
+ Assert.assertEquals(4L, combiner.getLong());
+ }
+
+ @Test
+ public void testNullableAggregateCombinerNullExpressionResultPropagates()
+ {
+ // shouldCombineAggregateNullInputs=true with an expression that doesn't handle nulls: `__acc + null` evaluates
+ // to null in Druid expression semantics, and the combiner reports isNull accordingly.
+ ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory(
+ "expr_agg_name",
+ ImmutableSet.of("x"),
+ null,
+ "0",
+ null,
+ true,
+ true,
+ true,
+ "__acc + x",
+ null,
+ null,
+ null,
+ null,
+ TestExprMacroTable.INSTANCE
+ );
+
+ AggregateCombiner combiner = agg.makeNullableAggregateCombiner();
+ NullableLongSelector selector = new NullableLongSelector(Arrays.asList(1L, null));
+ selector.advance();
+ combiner.reset(selector);
+ Assert.assertFalse(combiner.isNull());
+ Assert.assertEquals(1L, combiner.getLong());
+
+ selector.advance();
+ combiner.fold(selector);
+ Assert.assertTrue(combiner.isNull());
+ }
+
+
+ private static final class NullableLongSelector implements ColumnValueSelector
+ {
+ private final List values;
+ private int index = -1;
+
+ NullableLongSelector(List values)
+ {
+ this.values = values;
+ }
+
+ void advance()
+ {
+ index++;
+ }
+
+ @Override
+ public long getLong()
+ {
+ Long v = values.get(index);
+ return v == null ? 0L : v;
+ }
+
+ @Override
+ public double getDouble()
+ {
+ return getLong();
+ }
+
+ @Override
+ public float getFloat()
+ {
+ return getLong();
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ return values.get(index) == null;
+ }
+
+ @Nullable
+ @Override
+ public Long getObject()
+ {
+ return values.get(index);
+ }
+
+ @Override
+ public Class classOfObject()
+ {
+ return Long.class;
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+ }
+ }
+
+ @Test(expected = UOE.class)
+ public void testAggregateCombinerNotSupportedForNonNumericTypes()
+ {
+ ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory(
+ "expr_agg_name",
+ ImmutableSet.of("x"),
+ null,
+ "''",
+ "''",
+ true,
+ true,
+ true,
+ "concat(__acc, x)",
+ "concat(__acc, expr_agg_name)",
+ null,
+ null,
+ new HumanReadableBytes(2048),
+ TestExprMacroTable.INSTANCE
+ );
+
+ agg.makeAggregateCombiner();
+ }
+
+ @Test(expected = UOE.class)
+ public void testAggregateCombinerNotSupportedWhenFoldAndCombineTypesDiffer()
+ {
+ // fold seed is LONG (intermediate column type), but combine seed is LONG_ARRAY — combining a long segment column
+ // with an expression that expects arrays would silently produce wrong values, so the combiner refuses to handle it.
+ ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory(
+ "expr_agg_name",
+ ImmutableSet.of("x"),
+ null,
+ "0",
+ "ARRAY[]",
+ null,
+ false,
+ false,
+ "__acc + x",
+ "array_set_add(__acc, expr_agg_name)",
+ null,
+ null,
+ new HumanReadableBytes(2048),
+ TestExprMacroTable.INSTANCE
+ );
+
+ Assert.assertEquals(ColumnType.LONG, agg.getIntermediateType());
+ agg.makeAggregateCombiner();
+ }
+
@Test
public void testResultArraySignature()
{
diff --git a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java
index afa450bc471c..d122963f2efc 100644
--- a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java
+++ b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java
@@ -20,8 +20,11 @@
package org.apache.druid.query.filter;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.ImmutableRangeSet;
+import com.google.common.collect.Range;
import com.google.common.collect.Sets;
import nl.jqno.equalsverifier.EqualsVerifier;
+import org.apache.druid.error.DruidException;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.query.extraction.SubstringDimExtractionFn;
import org.apache.druid.segment.column.ColumnIndexSupplier;
@@ -322,6 +325,134 @@ public void testPatternFindsCorrectMiddleMatch()
assertMatch("1 _ 5%6", "1 2 3 1 4 5 6", DruidPredicateMatch.FALSE);
}
+ @Test
+ public void testGetDimensionRangeSet_literalPattern()
+ {
+ final LikeDimFilter filter = new LikeDimFilter("foo", "bar", null, null);
+ Assert.assertEquals(
+ ImmutableRangeSet.of(Range.singleton("bar")),
+ filter.getDimensionRangeSet("foo")
+ );
+ }
+
+ @Test
+ public void testGetDimensionRangeSet_prefixPattern()
+ {
+ final LikeDimFilter filter = new LikeDimFilter("foo", "bar%", null, null);
+ Assert.assertEquals(
+ ImmutableRangeSet.of(Range.closedOpen("bar", "bas")),
+ filter.getDimensionRangeSet("foo")
+ );
+ }
+
+ @Test
+ public void testGetDimensionRangeSet_midPatternWildcard_returnsNull()
+ {
+ final LikeDimFilter filter = new LikeDimFilter("foo", "bar%baz", null, null);
+ Assert.assertNull(filter.getDimensionRangeSet("foo"));
+ }
+
+ @Test
+ public void testGetDimensionRangeSet_suffixPattern_returnsNull()
+ {
+ final LikeDimFilter filter = new LikeDimFilter("foo", "%bar", null, null);
+ Assert.assertNull(filter.getDimensionRangeSet("foo"));
+ }
+
+ @Test
+ public void testGetDimensionRangeSet_singleWildcard_returnsAll()
+ {
+ final LikeDimFilter filter = new LikeDimFilter("foo", "%", null, null);
+ Assert.assertEquals(
+ ImmutableRangeSet.of(Range.all()),
+ filter.getDimensionRangeSet("foo")
+ );
+ }
+
+ @Test
+ public void testGetDimensionRangeSet_otherDimension_returnsNull()
+ {
+ final LikeDimFilter filter = new LikeDimFilter("foo", "bar%", null, null);
+ Assert.assertNull(filter.getDimensionRangeSet("other"));
+ }
+
+ @Test
+ public void testGetDimensionRangeSet_withExtractionFn_returnsNull()
+ {
+ final LikeDimFilter filter = new LikeDimFilter("foo", "bar%", null, new SubstringDimExtractionFn(0, 3));
+ Assert.assertNull(filter.getDimensionRangeSet("foo"));
+ }
+
+ @Test
+ public void testPrefixRange_singleLowercaseChar()
+ {
+ Assert.assertEquals(Range.closedOpen("foo", "fop"), LikeDimFilter.prefixRange("foo"));
+ }
+
+ @Test
+ public void testPrefixRange_uppercaseCarryStaysWithinAscii()
+ {
+ Assert.assertEquals(Range.closedOpen("foZ", "fo["), LikeDimFilter.prefixRange("foZ"));
+ }
+
+ @Test
+ public void testPrefixRange_trailingMaxValue_carriesPastIt()
+ {
+ Assert.assertEquals(
+ Range.closedOpen("foo", "fop"),
+ LikeDimFilter.prefixRange("foo")
+ );
+ }
+
+ @Test
+ public void testPrefixRange_allMaxValue_fallsBackToAtLeast()
+ {
+ Assert.assertEquals(Range.atLeast(""), LikeDimFilter.prefixRange(""));
+ }
+
+ @Test
+ public void testPrefixRange_empty_throws()
+ {
+ Assert.assertThrows(DruidException.class, () -> LikeDimFilter.prefixRange(""));
+ }
+
+ @Test
+ public void testPrefixRange_enclosesAllPrefixedStrings()
+ {
+ final Range range = LikeDimFilter.prefixRange("foo");
+ Assert.assertTrue(range.contains("foo"));
+ Assert.assertTrue(range.contains("foo0"));
+ Assert.assertTrue(range.contains("foobar"));
+ Assert.assertTrue(range.contains("foozzz"));
+ Assert.assertFalse(range.contains("fo"));
+ Assert.assertFalse(range.contains("fop"));
+ Assert.assertFalse(range.contains("fox"));
+ }
+
+ @Test
+ public void testLexicographicSuccessor_basic()
+ {
+ Assert.assertEquals("fop", LikeDimFilter.lexicographicSuccessor("foo"));
+ }
+
+ @Test
+ public void testLexicographicSuccessor_empty_returnsNullChar()
+ {
+ Assert.assertEquals("\u0000", LikeDimFilter.lexicographicSuccessor(""));
+ }
+
+ @Test
+ public void testLexicographicSuccessor_singleMaxValue_returnsNull()
+ {
+ Assert.assertNull(LikeDimFilter.lexicographicSuccessor(""));
+ }
+
+ @Test
+ public void testLexicographicSuccessor_trailingMaxValues_truncatedAndCarried()
+ {
+ Assert.assertEquals("fop", LikeDimFilter.lexicographicSuccessor("foo"));
+ }
+
private void assertCompilation(String pattern, String expected)
{
LikeDimFilter.LikeMatcher matcher = LikeDimFilter.LikeMatcher.from(pattern, '\\');
diff --git a/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java
index 5ab3783869ed..0101ce990b49 100644
--- a/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java
+++ b/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java
@@ -51,6 +51,8 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicReference;
public class SpecificSegmentQueryRunnerTest
{
@@ -182,8 +184,6 @@ public void run()
new CountAggregatorFactory("rows")
)
)
- // Do one test with CTX_SET_THREAD_NAME = false.
- .context(ImmutableMap.of(SpecificSegmentQueryRunner.CTX_SET_THREAD_NAME, false))
.build();
Sequence results = queryRunner.run(QueryPlus.wrap(query), responseContext);
List> res = results.toList();
@@ -197,6 +197,101 @@ public void run()
validate(mapper, descriptor, responseContext);
}
+ @Test
+ public void testSetThreadName()
+ {
+ assertThreadNameDuringProcessing(null, "original-test-thread");
+ assertThreadNameDuringProcessing(false, "original-test-thread");
+ assertThreadNameDuringProcessing(true, "processing_thread-name-query");
+ }
+
+ private void assertThreadNameDuringProcessing(
+ final Boolean setProcessingThreadNames,
+ final String expectedThreadNameDuringProcessing
+ )
+ {
+ final String originalThreadName = Thread.currentThread().getName();
+
+ try {
+ Thread.currentThread().setName("original-test-thread");
+
+ final AtomicReference runnerThreadName = new AtomicReference<>();
+ final AtomicReference sequenceThreadName = new AtomicReference<>();
+ final Result value = makeResult();
+ final SegmentDescriptor descriptor = new SegmentDescriptor(
+ Intervals.of("2012-01-01T00:00:00Z/P1D"),
+ "version",
+ 0
+ );
+
+ final SpecificSegmentQueryRunner> queryRunner = new SpecificSegmentQueryRunner<>(
+ new QueryRunner<>()
+ {
+ @Override
+ public Sequence> run(
+ QueryPlus> queryPlus,
+ ResponseContext responseContext
+ )
+ {
+ runnerThreadName.set(Thread.currentThread().getName());
+ return Sequences.withEffect(
+ Sequences.simple(Collections.singletonList(value)),
+ () -> sequenceThreadName.set(Thread.currentThread().getName()),
+ Execs.directExecutor()
+ );
+ }
+ },
+ new SpecificSegmentSpec(descriptor)
+ );
+
+ final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
+ .dataSource("foo")
+ .granularity(Granularities.ALL)
+ .intervals(ImmutableList.of(Intervals.of("2012-01-01T00:00:00Z/P1D")))
+ .aggregators(
+ ImmutableList.of(
+ new CountAggregatorFactory("rows")
+ )
+ )
+ .context(makeThreadNameContext(setProcessingThreadNames))
+ .queryId("thread-name-query")
+ .build();
+
+ final Sequence> results = queryRunner.run(
+ QueryPlus.wrap(query),
+ ResponseContext.createEmpty()
+ );
+ results.toList();
+
+ Assertions.assertEquals(expectedThreadNameDuringProcessing, runnerThreadName.get());
+ Assertions.assertEquals(expectedThreadNameDuringProcessing, sequenceThreadName.get());
+ Assertions.assertEquals("original-test-thread", Thread.currentThread().getName());
+ }
+ finally {
+ Thread.currentThread().setName(originalThreadName);
+ }
+ }
+
+ private static Map makeThreadNameContext(final Boolean setProcessingThreadNames)
+ {
+ if (setProcessingThreadNames == null) {
+ return Collections.emptyMap();
+ } else {
+ return ImmutableMap.of(SpecificSegmentQueryRunner.CTX_SET_THREAD_NAME, setProcessingThreadNames);
+ }
+ }
+
+ private static Result makeResult()
+ {
+ final TimeseriesResultBuilder builder = new TimeseriesResultBuilder(
+ DateTimes.of("2012-01-01T00:00:00Z")
+ );
+ final CountAggregator rows = new CountAggregator();
+ rows.aggregate();
+ builder.addMetric("rows", rows.get());
+ return builder.build();
+ }
+
private void validate(ObjectMapper mapper, SegmentDescriptor descriptor, ResponseContext responseContext)
throws IOException
{
diff --git a/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java b/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java
index bb3e865fa118..6e82fb4a76a8 100644
--- a/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java
@@ -37,9 +37,10 @@
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.CompressionStrategy;
+import org.apache.druid.segment.file.CountingRangeReader;
+import org.apache.druid.segment.file.DirectoryBackedRangeReader;
import org.apache.druid.segment.file.PartialSegmentFileMapperV10;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
-import org.apache.druid.segment.loading.SegmentRangeReader;
import org.apache.druid.segment.projections.QueryableProjection;
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
import org.apache.druid.testing.InitializedNullHandlingTest;
@@ -49,19 +50,14 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
-import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ThreadLocalRandom;
-import java.util.concurrent.atomic.AtomicInteger;
class PartialQueryableIndexTest extends InitializedNullHandlingTest
{
@@ -421,7 +417,7 @@ void testMatchesEagerQueryableIndex() throws IOException
// verify that the partial index produces the same schema info as the eager (full) index
final IndexIO indexIO = TestHelper.getTestIndexIO();
final File cacheDir = newCacheDir("match_eager");
- final DirectoryRangeReader rangeReader = new DirectoryRangeReader(segmentDir);
+ final DirectoryBackedRangeReader rangeReader = new DirectoryBackedRangeReader(segmentDir);
try (
QueryableIndex eagerIndex = indexIO.loadIndex(segmentDir);
@@ -470,62 +466,4 @@ private File newCacheDir(String name) throws IOException
FileUtils.mkdirp(dir);
return dir;
}
-
- static class DirectoryRangeReader implements SegmentRangeReader
- {
- private final File directory;
-
- DirectoryRangeReader(File directory)
- {
- this.directory = directory;
- }
-
- @Override
- public InputStream readRange(String filename, long offset, long length) throws IOException
- {
- File target = new File(directory, filename);
- try (RandomAccessFile raf = new RandomAccessFile(target, "r")) {
- final int available = (int) Math.min(length, Math.max(0, raf.length() - offset));
- byte[] data = new byte[available];
- raf.seek(offset);
- raf.readFully(data);
- return new ByteArrayInputStream(data);
- }
- }
- }
-
- static class CountingRangeReader extends DirectoryRangeReader
- {
- private final AtomicInteger readCount = new AtomicInteger(0);
- private final Set readFilenames = ConcurrentHashMap.newKeySet();
-
- CountingRangeReader(File directory)
- {
- super(directory);
- }
-
- int getReadCount()
- {
- return readCount.get();
- }
-
- Set getReadFilenames()
- {
- return Set.copyOf(readFilenames);
- }
-
- void resetCount()
- {
- readCount.set(0);
- readFilenames.clear();
- }
-
- @Override
- public InputStream readRange(String filename, long offset, long length) throws IOException
- {
- readCount.incrementAndGet();
- readFilenames.add(filename);
- return super.readRange(filename, offset, length);
- }
- }
}
diff --git a/processing/src/test/java/org/apache/druid/segment/file/CountingRangeReader.java b/processing/src/test/java/org/apache/druid/segment/file/CountingRangeReader.java
new file mode 100644
index 000000000000..37f50250abd0
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/file/CountingRangeReader.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.file;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * A {@link DirectoryBackedRangeReader} that tracks range-read activity across the partial-segment test suite. Records
+ * total reads, header-only reads (offset == 0, which corresponds to V10 header preamble fetches), and the set of
+ * filenames that have been read. Each call site reads only the metric(s) it cares about.
+ */
+public class CountingRangeReader extends DirectoryBackedRangeReader
+{
+ private final AtomicInteger readCount = new AtomicInteger(0);
+ private final AtomicInteger headerReadCount = new AtomicInteger(0);
+ private final Set readFilenames = ConcurrentHashMap.newKeySet();
+
+ public CountingRangeReader(File directory)
+ {
+ super(directory);
+ }
+
+ public int getReadCount()
+ {
+ return readCount.get();
+ }
+
+ public int getHeaderReadCount()
+ {
+ return headerReadCount.get();
+ }
+
+ public Set getReadFilenames()
+ {
+ return Set.copyOf(readFilenames);
+ }
+
+ public void resetCount()
+ {
+ readCount.set(0);
+ headerReadCount.set(0);
+ readFilenames.clear();
+ }
+
+ @Override
+ public InputStream readRange(String filename, long offset, long length) throws IOException
+ {
+ readCount.incrementAndGet();
+ if (offset == 0) {
+ headerReadCount.incrementAndGet();
+ }
+ readFilenames.add(filename);
+ return super.readRange(filename, offset, length);
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/file/DirectoryBackedRangeReader.java b/processing/src/test/java/org/apache/druid/segment/file/DirectoryBackedRangeReader.java
new file mode 100644
index 000000000000..bc1df52946a6
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/file/DirectoryBackedRangeReader.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.file;
+
+import org.apache.druid.segment.loading.SegmentRangeReader;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+
+/**
+ * A {@link SegmentRangeReader} backed by a directory of files. Used across the partial-segment test suite (processing
+ * + server modules) to simulate deep-storage range reads against an on-disk layout produced by
+ * {@link SegmentFileBuilderV10} or {@link org.apache.druid.segment.IndexMergerV10}.
+ */
+public class DirectoryBackedRangeReader implements SegmentRangeReader
+{
+ private final File directory;
+
+ public DirectoryBackedRangeReader(File directory)
+ {
+ this.directory = directory;
+ }
+
+ @Override
+ public InputStream readRange(String filename, long offset, long length) throws IOException
+ {
+ final File target = new File(directory, filename);
+ try (RandomAccessFile raf = new RandomAccessFile(target, "r")) {
+ final int available = (int) Math.min(length, Math.max(0, raf.length() - offset));
+ final byte[] data = new byte[available];
+ raf.seek(offset);
+ raf.readFully(data);
+ return new ByteArrayInputStream(data);
+ }
+ }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java b/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java
index 790ba10ece73..0294f65e20bf 100644
--- a/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java
+++ b/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java
@@ -33,11 +33,9 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
-import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
-import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.util.ArrayList;
@@ -48,7 +46,6 @@
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadLocalRandom;
-import java.util.concurrent.atomic.AtomicInteger;
class PartialSegmentFileMapperV10Test
{
@@ -554,59 +551,4 @@ private static PartialSegmentFileMapperV10 createMapper(
);
}
- /**
- * A {@link SegmentRangeReader} backed by a directory of files, supporting both main and external file reads.
- */
- static class DirectoryBackedRangeReader implements SegmentRangeReader
- {
- private final File directory;
-
- DirectoryBackedRangeReader(File directory)
- {
- this.directory = directory;
- }
-
- @Override
- public InputStream readRange(String filename, long offset, long length) throws IOException
- {
- File target = new File(directory, filename);
- try (RandomAccessFile raf = new RandomAccessFile(target, "r")) {
- final int available = (int) Math.min(length, Math.max(0, raf.length() - offset));
- byte[] data = new byte[available];
- raf.seek(offset);
- raf.readFully(data);
- return new ByteArrayInputStream(data);
- }
- }
- }
-
- /**
- * A {@link DirectoryBackedRangeReader} that counts range reads (excluding metadata fetches).
- */
- static class CountingRangeReader extends DirectoryBackedRangeReader
- {
- private final AtomicInteger readCount = new AtomicInteger(0);
-
- CountingRangeReader(File directory)
- {
- super(directory);
- }
-
- int getReadCount()
- {
- return readCount.get();
- }
-
- void resetCount()
- {
- readCount.set(0);
- }
-
- @Override
- public InputStream readRange(String filename, long offset, long length) throws IOException
- {
- readCount.incrementAndGet();
- return super.readRange(filename, offset, length);
- }
- }
}
diff --git a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java
index 6dd01d8e5bd9..8f065990012b 100644
--- a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java
+++ b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java
@@ -53,13 +53,13 @@ void testOneContainerPerProjection() throws IOException
{
final File baseDir = newBaseDir();
- // matches the production usage pattern in IndexMergerV10: call startFileGroup then write that projection's
+ // matches the production usage pattern in IndexMergerV10: call startFileBundle then write that projection's
// columns, then move on to the next projection.
final String[] projections = {"__base", "projA", "projB"};
final int colCount = 3;
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
for (String projection : projections) {
- builder.startFileGroup(projection);
+ builder.startFileBundle(projection);
for (int col = 0; col < colCount; col++) {
final String name = projection + "/col" + col;
final File tmpFile = new File(tempDir, StringUtils.format("%s-%s.bin", projection, col));
@@ -89,14 +89,14 @@ void testProjectionNameWithSlashRoutesCorrectly() throws IOException
final String slashyProjection = "nested/projection";
final int colCount = 3;
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
- builder.startFileGroup("__base");
+ builder.startFileBundle("__base");
for (int col = 0; col < colCount; col++) {
final String name = "__base/col" + col;
final File tmpFile = new File(tempDir, StringUtils.format("base-%s.bin", col));
Files.write(Ints.toByteArray(name.hashCode()), tmpFile);
builder.add(name, tmpFile);
}
- builder.startFileGroup(slashyProjection);
+ builder.startFileBundle(slashyProjection);
for (int col = 0; col < colCount; col++) {
final String name = slashyProjection + "/col" + col;
final File tmpFile = new File(tempDir, StringUtils.format("slashy-%s.bin", col));
@@ -133,7 +133,7 @@ void testAddWithoutGroupPrefixThrowsWhenGroupActive() throws IOException
final File baseDir = newBaseDir();
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
- builder.startFileGroup("projA");
+ builder.startFileBundle("projA");
final File tmp = new File(tempDir, "no-prefix.bin");
Files.write(Ints.toByteArray(1), tmp);
// file name doesn't start with "projA/", so add must throw
@@ -147,7 +147,7 @@ void testAddWithChannelWithoutGroupPrefixThrowsWhenGroupActive() throws IOExcept
final File baseDir = newBaseDir();
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
- builder.startFileGroup("projA");
+ builder.startFileBundle("projA");
Assertions.assertThrows(RuntimeException.class, () -> builder.addWithChannel("wrong/col0", 4));
}
}
@@ -158,7 +158,7 @@ void testAddColumnWithoutGroupPrefixThrowsWhenGroupActive() throws IOException
final File baseDir = newBaseDir();
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
- builder.startFileGroup("projA");
+ builder.startFileBundle("projA");
Assertions.assertThrows(
RuntimeException.class,
() -> builder.addColumn("wrong_no_prefix", new ColumnDescriptor.Builder()
@@ -169,12 +169,12 @@ void testAddColumnWithoutGroupPrefixThrowsWhenGroupActive() throws IOException
}
@Test
- void testAddWithoutPrefixIsAllowedWhenNoGroupActive() throws IOException
+ void testAddWithoutPrefixIsAllowedInRootBundle() throws IOException
{
final File baseDir = newBaseDir();
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
- // never call startFileGroup; bare names are fine
+ // never call startFileBundle; bare names are fine under the default root bundle
final File tmp = new File(tempDir, "bare.bin");
Files.write(Ints.toByteArray(1), tmp);
builder.add("col0", tmp);
@@ -183,7 +183,7 @@ void testAddWithoutPrefixIsAllowedWhenNoGroupActive() throws IOException
}
@Test
- void testContainerMetadataCarriesFileGroup() throws IOException
+ void testContainerMetadataCarriesBundle() throws IOException
{
final File baseDir = newBaseDir();
@@ -191,7 +191,7 @@ void testContainerMetadataCarriesFileGroup() throws IOException
final int colCount = 2;
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
for (String projection : projections) {
- builder.startFileGroup(projection);
+ builder.startFileBundle(projection);
for (int col = 0; col < colCount; col++) {
final String name = projection + "/col" + col;
final File tmpFile = new File(tempDir, StringUtils.format("%s-%s.bin", projection, col));
@@ -206,35 +206,34 @@ void testContainerMetadataCarriesFileGroup() throws IOException
final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata();
Assertions.assertEquals(projections.length, metadata.getContainers().size());
- // Each container's fileGroup must equal the group active when it was written. Build the expected list by
- // walking the files: each container holds files from exactly one group, so the first file's group prefix is
- // authoritative.
+ // Each container's bundle must equal the bundle active when it was written. Each container holds files from
+ // exactly one bundle, so the first file's name prefix is authoritative.
for (int ci = 0; ci < metadata.getContainers().size(); ci++) {
final int containerIdx = ci;
- final String expectedGroup = metadata.getFiles().entrySet().stream()
+ final String expectedBundle = metadata.getFiles().entrySet().stream()
.filter(e -> e.getValue().getContainer() == containerIdx)
.map(e -> e.getKey().substring(0, e.getKey().indexOf('/')))
.findFirst()
.orElseThrow();
Assertions.assertEquals(
- expectedGroup,
- metadata.getContainers().get(ci).getFileGroup(),
- "container " + ci + " fileGroup mismatch"
+ expectedBundle,
+ metadata.getContainers().get(ci).getBundle(),
+ "container " + ci + " bundle mismatch"
);
}
}
}
@Test
- void testContainerWrittenWithoutStartFileGroupHasNullFileGroup() throws IOException
+ void testContainerWrittenWithoutStartFileBundleDefaultsToRoot() throws IOException
{
final File baseDir = newBaseDir();
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
- // never call startFileGroup; the single container should carry fileGroup == null
+ // never call startFileBundle; the single container should be tagged with ROOT_BUNDLE_NAME
for (int col = 0; col < 3; col++) {
final String name = "col" + col;
- final File tmpFile = new File(tempDir, StringUtils.format("nogroup-%s.bin", col));
+ final File tmpFile = new File(tempDir, StringUtils.format("nobundle-%s.bin", col));
Files.write(Ints.toByteArray(name.hashCode()), tmpFile);
builder.add(name, tmpFile);
}
@@ -244,50 +243,81 @@ void testContainerWrittenWithoutStartFileGroupHasNullFileGroup() throws IOExcept
try (SegmentFileMapperV10 mapper = SegmentFileMapperV10.create(segmentFile, JSON_MAPPER)) {
final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata();
Assertions.assertEquals(1, metadata.getContainers().size());
- Assertions.assertNull(metadata.getContainers().get(0).getFileGroup());
+ Assertions.assertEquals(
+ SegmentFileBuilder.ROOT_BUNDLE_NAME,
+ metadata.getContainers().get(0).getBundle()
+ );
}
}
@Test
- void testStartFileGroupNullClearsCurrentGroup() throws IOException
+ void testStartFileBundleNullResetsToRoot() throws IOException
{
final File baseDir = newBaseDir();
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
- builder.startFileGroup("first");
+ builder.startFileBundle("first");
final File firstFile = new File(tempDir, "first.bin");
Files.write(Ints.toByteArray(1), firstFile);
builder.add("first/a", firstFile);
- builder.startFileGroup(null);
- final File noGroupFile = new File(tempDir, "ng.bin");
- Files.write(Ints.toByteArray(2), noGroupFile);
- builder.add("ng/a", noGroupFile);
+ // Passing null resets to ROOT_BUNDLE_NAME; subsequent writes go in a root-bundle container.
+ builder.startFileBundle(null);
+ final File rootFile = new File(tempDir, "root.bin");
+ Files.write(Ints.toByteArray(2), rootFile);
+ builder.add("root_a", rootFile);
}
final File segmentFile = new File(baseDir, IndexIO.V10_FILE_NAME);
try (SegmentFileMapperV10 mapper = SegmentFileMapperV10.create(segmentFile, JSON_MAPPER)) {
final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata();
Assertions.assertEquals(2, metadata.getContainers().size());
- Assertions.assertEquals("first", metadata.getContainers().get(0).getFileGroup());
- Assertions.assertNull(metadata.getContainers().get(1).getFileGroup());
+ Assertions.assertEquals("first", metadata.getContainers().get(0).getBundle());
+ Assertions.assertEquals(
+ SegmentFileBuilder.ROOT_BUNDLE_NAME,
+ metadata.getContainers().get(1).getBundle()
+ );
}
}
@Test
- void testStartFileGroupWhileWriterInUseThrows() throws IOException
+ void testStartFileBundleWhileWriterInUseThrows() throws IOException
{
final File baseDir = newBaseDir();
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
- builder.startFileGroup("__base");
+ builder.startFileBundle("__base");
try (SegmentFileChannel outer = builder.addWithChannel("__base/col0", 4)) {
- Assertions.assertThrows(RuntimeException.class, () -> builder.startFileGroup("projA"));
+ Assertions.assertThrows(RuntimeException.class, () -> builder.startFileBundle("projA"));
outer.write(ByteBuffer.wrap(new byte[]{1, 2, 3, 4}));
}
}
}
+ @Test
+ void testStartFileBundleWithRootNameIsSameAsNull() throws IOException
+ {
+ final File baseDir = newBaseDir();
+
+ try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
+ // Explicit ROOT_BUNDLE_NAME and null are equivalent; both resolve to the default root bundle.
+ builder.startFileBundle(SegmentFileBuilder.ROOT_BUNDLE_NAME);
+ final File tmp = new File(baseDir, "tmp.bin");
+ Files.write(new byte[]{1, 2, 3, 4}, tmp);
+ builder.add("col0", tmp);
+ }
+
+ final File segmentFile = new File(baseDir, IndexIO.V10_FILE_NAME);
+ try (SegmentFileMapperV10 mapper = SegmentFileMapperV10.create(segmentFile, JSON_MAPPER)) {
+ final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata();
+ Assertions.assertEquals(1, metadata.getContainers().size());
+ Assertions.assertEquals(
+ SegmentFileBuilder.ROOT_BUNDLE_NAME,
+ metadata.getContainers().get(0).getBundle()
+ );
+ }
+ }
+
@Test
void testExternalBuilderAlsoSplitsContainersByProjection() throws IOException
{
@@ -300,7 +330,7 @@ void testExternalBuilderAlsoSplitsContainersByProjection() throws IOException
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
for (String projection : mainProjections) {
- builder.startFileGroup(projection);
+ builder.startFileBundle(projection);
for (int col = 0; col < colCount; col++) {
final String name = projection + "/col" + col;
final File tmpFile = new File(tempDir, StringUtils.format("main-%s-%s.bin", projection, col));
@@ -313,7 +343,7 @@ void testExternalBuilderAlsoSplitsContainersByProjection() throws IOException
// sub-file with its own header + containers. Projection-per-container splitting must apply there too.
final SegmentFileBuilder external = builder.getExternalBuilder(externalName);
for (String projection : externalProjections) {
- external.startFileGroup(projection);
+ external.startFileBundle(projection);
for (int col = 0; col < colCount; col++) {
final String name = projection + "/col" + (col + 1000);
final File tmpFile = new File(tempDir, StringUtils.format("ext-%s-%s.bin", projection, col));
@@ -370,7 +400,7 @@ void testNestedAddWithChannelDelegatesPerBuilder() throws IOException
final byte[] nestedBytes = new byte[]{5, 6, 7, 8};
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
- builder.startFileGroup("__base");
+ builder.startFileBundle("__base");
try (SegmentFileChannel outer = builder.addWithChannel("__base/outer", outerBytes.length)) {
// nested write while outer is in use → forced into delegate temp file
try (SegmentFileChannel nested = builder.addWithChannel("__base/nested", nestedBytes.length)) {
@@ -380,7 +410,7 @@ void testNestedAddWithChannelDelegatesPerBuilder() throws IOException
}
final SegmentFileBuilder external = builder.getExternalBuilder(externalName);
- external.startFileGroup("extProj");
+ external.startFileBundle("extProj");
try (SegmentFileChannel extOuter = external.addWithChannel("extProj/outer", outerBytes.length)) {
try (SegmentFileChannel extNested = external.addWithChannel("extProj/nested", nestedBytes.length)) {
extNested.write(ByteBuffer.wrap(nestedBytes));
@@ -399,12 +429,12 @@ void testNestedAddWithChannelDelegatesPerBuilder() throws IOException
}
@Test
- void testNestedDelegateClosedAfterOuterRoutesToOriginalGroup() throws IOException
+ void testNestedDelegateClosedAfterOuterRoutesToOriginalBundle() throws IOException
{
// doing something like this is weird and probably should happen in practice, but if a nested write was requested
- // while file group "groupA" was active; even if the caller switches to "groupB" before finally closing the nested
- // channel, the delegated bytes must still land in groupA's container, not groupB's. Otherwise the grouping breaks,
- // and files from other groups end up in the same container.
+ // while bundle "groupA" was active; even if the caller switches to "groupB" before finally closing the nested
+ // channel, the delegated bytes must still land in groupA's container, not groupB's. Otherwise bundles break and
+ // files from other bundles end up in the same container.
final File baseDir = newBaseDir();
final byte[] outerBytes = new byte[]{1, 2, 3, 4};
@@ -412,7 +442,7 @@ void testNestedDelegateClosedAfterOuterRoutesToOriginalGroup() throws IOExceptio
final byte[] groupBBytes = new byte[]{9, 10, 11, 12};
try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) {
- builder.startFileGroup("groupA");
+ builder.startFileBundle("groupA");
final SegmentFileChannel outer = builder.addWithChannel("groupA/outer", outerBytes.length);
final SegmentFileChannel nested = builder.addWithChannel("groupA/nested", nestedBytes.length);
@@ -423,7 +453,7 @@ void testNestedDelegateClosedAfterOuterRoutesToOriginalGroup() throws IOExceptio
outer.close();
// switch group before closing the still-open nested delegate; merge must use the snapshotted "groupA"
- builder.startFileGroup("groupB");
+ builder.startFileBundle("groupB");
nested.close();
// and a real groupB file so we can verify groupB's container is independent of the nested file
diff --git a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java
index 5a56dcd7faf2..24374071d148 100644
--- a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java
@@ -36,33 +36,48 @@ void testEqualsAndHashCode()
}
@Test
- void testSerdeWithFileGroup() throws Exception
+ void testSerdeWithNamedBundle() throws Exception
{
final SegmentFileContainerMetadata metadata = new SegmentFileContainerMetadata(100, 4096, "projA");
final String json = JSON_MAPPER.writeValueAsString(metadata);
- Assertions.assertTrue(json.contains("\"fileGroup\":\"projA\""), "fileGroup must be present in serialized JSON: " + json);
+ Assertions.assertTrue(json.contains("\"bundle\":\"projA\""), "bundle must be present in serialized JSON: " + json);
Assertions.assertEquals(metadata, JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class));
}
@Test
- void testSerdeWithNullFileGroupOmitsField() throws Exception
+ void testNullBundleNormalizesToRootAndOmitsFromJson() throws Exception
{
- // Old-format segments don't have fileGroup; serializing null must omit the property so older readers (and
- // future versions reading old segments) round-trip unchanged.
+ // Null in the constructor is the writer-side equivalent of "no explicit startFileBundle call"; the field
+ // normalizes to ROOT_BUNDLE_NAME, and the default value is omitted from JSON so segments without explicit
+ // bundles stay compact on disk.
final SegmentFileContainerMetadata metadata = new SegmentFileContainerMetadata(0, 1024, null);
+ Assertions.assertEquals(SegmentFileBuilder.ROOT_BUNDLE_NAME, metadata.getBundle());
final String json = JSON_MAPPER.writeValueAsString(metadata);
- Assertions.assertFalse(json.contains("fileGroup"), "null fileGroup must be omitted from JSON, got: " + json);
+ Assertions.assertFalse(json.contains("bundle"), "default bundle must be omitted from JSON, got: " + json);
Assertions.assertEquals(metadata, JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class));
}
@Test
- void testDeserializeLegacyJsonWithoutFileGroup() throws Exception
+ void testExplicitRootBundleAlsoOmitsFromJson() throws Exception
{
- // Bytes produced by a writer pre-dating the fileGroup field must deserialize cleanly with fileGroup == null.
- final String legacyJson = "{\"startOffset\":42,\"size\":8192}";
- final SegmentFileContainerMetadata metadata = JSON_MAPPER.readValue(legacyJson, SegmentFileContainerMetadata.class);
+ // Passing ROOT_BUNDLE_NAME explicitly is equivalent to passing null; both normalize to the default and both
+ // omit the field from JSON.
+ final SegmentFileContainerMetadata metadata =
+ new SegmentFileContainerMetadata(0, 1024, SegmentFileBuilder.ROOT_BUNDLE_NAME);
+ final String json = JSON_MAPPER.writeValueAsString(metadata);
+ Assertions.assertFalse(json.contains("bundle"), "explicit root bundle must be omitted from JSON, got: " + json);
+ Assertions.assertEquals(metadata, JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class));
+ }
+
+ @Test
+ void testDeserializeJsonWithoutBundleFieldDefaultsToRoot() throws Exception
+ {
+ // Bytes produced by a writer that didn't include a bundle field (old segments, or new segments without
+ // explicit startFileBundle) must deserialize to the ROOT_BUNDLE_NAME default.
+ final String json = "{\"startOffset\":42,\"size\":8192}";
+ final SegmentFileContainerMetadata metadata = JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class);
Assertions.assertEquals(42, metadata.getStartOffset());
Assertions.assertEquals(8192, metadata.getSize());
- Assertions.assertNull(metadata.getFileGroup());
+ Assertions.assertEquals(SegmentFileBuilder.ROOT_BUNDLE_NAME, metadata.getBundle());
}
}
diff --git a/server/pom.xml b/server/pom.xml
index 9fffcbf86882..c0f4ee55f6d1 100644
--- a/server/pom.xml
+++ b/server/pom.xml
@@ -78,10 +78,6 @@
-
- org.apache.curator
- curator-x-discovery
- com.fasterxml.jackson.jaxrsjackson-jaxrs-json-provider
diff --git a/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceAnnouncer.java b/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceAnnouncer.java
deleted file mode 100644
index 26afee8d0e75..000000000000
--- a/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceAnnouncer.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.curator.discovery;
-
-import com.google.inject.Inject;
-import org.apache.curator.x.discovery.ServiceDiscovery;
-import org.apache.curator.x.discovery.ServiceInstance;
-import org.apache.druid.java.util.emitter.EmittingLogger;
-import org.apache.druid.server.DruidNode;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * This class is deprecated, Add service to {@link org.apache.druid.discovery.DruidNodeAnnouncer} node announcement instead.
- *
- * Uses the Curator Service Discovery recipe to announce services.
- */
-@Deprecated
-public class CuratorServiceAnnouncer implements ServiceAnnouncer
-{
- private static final EmittingLogger log = new EmittingLogger(CuratorServiceAnnouncer.class);
-
- private final ServiceDiscovery discovery;
- private final Map> instanceMap = new HashMap<>();
- private final Object monitor = new Object();
-
- @Inject
- public CuratorServiceAnnouncer(
- ServiceDiscovery discovery
- )
- {
- this.discovery = discovery;
- }
-
- @Override
- public void announce(DruidNode service)
- {
- final String serviceName = CuratorServiceUtils.makeCanonicalServiceName(service.getServiceName());
-
- final ServiceInstance instance;
- synchronized (monitor) {
- if (instanceMap.containsKey(serviceName)) {
- log.warn("Ignoring request to announce service[%s]", service);
- return;
- } else {
- try {
- instance = ServiceInstance.builder()
- .name(serviceName)
- .address(service.getHost())
- .port(service.getPlaintextPort())
- .sslPort(service.getTlsPort())
- .build();
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
-
- instanceMap.put(serviceName, instance);
- }
- }
-
- try {
- log.info("Announcing service[%s]", service);
- discovery.registerService(instance);
- }
- catch (Exception e) {
- log.warn("Failed to announce service[%s]", service);
- synchronized (monitor) {
- instanceMap.remove(serviceName);
- }
- }
- }
-
- @Override
- public void unannounce(DruidNode service)
- {
- final String serviceName = CuratorServiceUtils.makeCanonicalServiceName(service.getServiceName());
- final ServiceInstance instance;
-
- synchronized (monitor) {
- instance = instanceMap.get(serviceName);
- if (instance == null) {
- log.warn("Ignoring request to unannounce service[%s]", service);
- return;
- }
- }
-
- log.info("Unannouncing service[%s]", service);
- try {
- discovery.unregisterService(instance);
- }
- catch (Exception e) {
- log.makeAlert(e, "Failed to unannounce service[%s], zombie znode perhaps in existence.", serviceName)
- .addData("service", service)
- .emit();
- }
- finally {
- synchronized (monitor) {
- instanceMap.remove(serviceName);
- }
- }
- }
-}
diff --git a/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceUtils.java b/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceUtils.java
deleted file mode 100644
index 1513d9fc05d8..000000000000
--- a/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceUtils.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.curator.discovery;
-
-/**
- * This class is only used by Deprecated classes.
- */
-@Deprecated
-public class CuratorServiceUtils
-{
- /**
- * Replacing '/' with ':' in service names makes it easier to provide an HTTP interface using
- * curator-x-discovery-server
- *
- * This method is marked protected because it should never be used outside of the org.apache.druid.curator.discovery
- * package. If you are tempted to use this method anywhere else you are most likely doing something wrong.
- * Mapping the actual service name to the name used within curator should be left to {@link CuratorServiceAnnouncer}
- * and {@link ServerDiscoveryFactory}
- *
- * @see CuratorServiceAnnouncer
- * @see ServerDiscoveryFactory
- *
- * @param serviceName
- * @return
- */
- protected static String makeCanonicalServiceName(String serviceName)
- {
- return serviceName.replace('/', ':');
- }
-}
diff --git a/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java b/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java
index bd1ad64aacaa..c859e88ead94 100644
--- a/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java
+++ b/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java
@@ -19,168 +19,41 @@
package org.apache.druid.curator.discovery;
-import com.google.common.collect.ImmutableList;
import com.google.inject.Binder;
import com.google.inject.Inject;
-import com.google.inject.Injector;
import com.google.inject.Key;
import com.google.inject.Module;
import com.google.inject.Provider;
-import com.google.inject.Provides;
-import com.google.inject.TypeLiteral;
-import com.google.inject.name.Named;
-import com.google.inject.name.Names;
-import io.netty.util.SuppressForbidden;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.utils.ZKPaths;
-import org.apache.curator.x.discovery.DownInstancePolicy;
-import org.apache.curator.x.discovery.InstanceFilter;
-import org.apache.curator.x.discovery.ProviderStrategy;
-import org.apache.curator.x.discovery.ServiceCache;
-import org.apache.curator.x.discovery.ServiceCacheBuilder;
-import org.apache.curator.x.discovery.ServiceDiscovery;
-import org.apache.curator.x.discovery.ServiceDiscoveryBuilder;
-import org.apache.curator.x.discovery.ServiceInstance;
-import org.apache.curator.x.discovery.ServiceProvider;
-import org.apache.curator.x.discovery.ServiceProviderBuilder;
-import org.apache.curator.x.discovery.details.ServiceCacheListener;
import org.apache.druid.client.coordinator.Coordinator;
import org.apache.druid.client.indexing.IndexingService;
-import org.apache.druid.curator.ZkEnablementConfig;
import org.apache.druid.discovery.DruidLeaderSelector;
import org.apache.druid.discovery.DruidNodeAnnouncer;
import org.apache.druid.discovery.DruidNodeDiscoveryProvider;
-import org.apache.druid.guice.DruidBinders;
-import org.apache.druid.guice.JsonConfigProvider;
-import org.apache.druid.guice.KeyHolder;
import org.apache.druid.guice.LazySingleton;
-import org.apache.druid.guice.LifecycleModule;
import org.apache.druid.guice.PolyBind;
import org.apache.druid.guice.annotations.Self;
-import org.apache.druid.java.util.common.lifecycle.Lifecycle;
import org.apache.druid.server.DruidNode;
import org.apache.druid.server.ServiceAnnouncementState;
-import org.apache.druid.server.initialization.CuratorDiscoveryConfig;
import org.apache.druid.server.initialization.ZkPathsConfig;
-import java.lang.annotation.Annotation;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Properties;
-import java.util.Set;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.Executor;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.ThreadFactory;
import java.util.function.Function;
/**
- * The DiscoveryModule allows for the registration of Keys of DruidNode objects, which it intends to be
- * automatically announced at the end of the lifecycle start.
- *
- * In order for this to work a ServiceAnnouncer instance *must* be injected and instantiated first.
- * This can often be achieved by registering ServiceAnnouncer.class with the LifecycleModule.
+ * Binds {@link DruidNodeAnnouncer}, {@link DruidNodeDiscoveryProvider}, and the coordinator/overlord
+ * {@link DruidLeaderSelector}s to their curator-backed implementations.
*/
public class DiscoveryModule implements Module
{
- private static final String NAME = "DiscoveryModule:internal";
-
private static final String INTERNAL_DISCOVERY_PROP = "druid.discovery.type";
private static final String CURATOR_KEY = "curator";
- private boolean isZkEnabled = true;
-
- @Inject
- public void configure(Properties properties)
- {
- isZkEnabled = ZkEnablementConfig.isEnabled(properties);
- }
-
- /**
- * Requests that the un-annotated DruidNode instance be injected and published as part of the lifecycle.
- *
- * That is, this module will announce the DruidNode instance returned by
- * injector.getInstance(Key.get(DruidNode.class)) automatically.
- * Announcement will happen in the ANNOUNCEMENTS stage of the Lifecycle
- *
- * @param binder the Binder to register with
- */
- public static void registerDefault(Binder binder)
- {
- registerKey(binder, Key.get(new TypeLiteral<>() {}));
- }
-
- /**
- * Requests that the annotated DruidNode instance be injected and published as part of the lifecycle.
- *
- * That is, this module will announce the DruidNode instance returned by
- * injector.getInstance(Key.get(DruidNode.class, annotation)) automatically.
- * Announcement will happen in the ANNOUNCEMENTS stage of the Lifecycle
- *
- * @param annotation The annotation instance to use in finding the DruidNode instance, usually a Named annotation
- */
- public static void register(Binder binder, Annotation annotation)
- {
- registerKey(binder, Key.get(new TypeLiteral<>() {}, annotation));
- }
-
- /**
- * Requests that the annotated DruidNode instance be injected and published as part of the lifecycle.
- *
- * That is, this module will announce the DruidNode instance returned by
- * injector.getInstance(Key.get(DruidNode.class, annotation)) automatically.
- * Announcement will happen in the ANNOUNCEMENTS stage of the Lifecycle
- *
- * @param binder the Binder to register with
- * @param annotation The annotation class to use in finding the DruidNode instance
- */
- public static void register(Binder binder, Class extends Annotation> annotation)
- {
- registerKey(binder, Key.get(new TypeLiteral<>() {}, annotation));
- }
-
- /**
- * Requests that the keyed DruidNode instance be injected and published as part of the lifecycle.
- *
- * That is, this module will announce the DruidNode instance returned by
- * injector.getInstance(Key.get(DruidNode.class, annotation)) automatically.
- * Announcement will happen in the ANNOUNCEMENTS stage of the Lifecycle
- *
- * @param binder the Binder to register with
- * @param key The key to use in finding the DruidNode instance
- */
- public static void registerKey(Binder binder, Key key)
- {
- DruidBinders.discoveryAnnouncementBinder(binder).addBinding().toInstance(new KeyHolder<>(key));
- LifecycleModule.register(binder, ServiceAnnouncer.class);
- }
-
@Override
public void configure(Binder binder)
{
- JsonConfigProvider.bind(binder, "druid.discovery.curator", CuratorDiscoveryConfig.class);
-
- binder.bind(CuratorServiceAnnouncer.class).in(LazySingleton.class);
-
binder.bind(ServiceAnnouncementState.class).in(LazySingleton.class);
- // Build the binder so that it will at a minimum inject an empty set.
- DruidBinders.discoveryAnnouncementBinder(binder);
-
- if (isZkEnabled) {
- binder.bind(ServiceAnnouncer.class)
- .to(Key.get(CuratorServiceAnnouncer.class, Names.named(NAME)))
- .in(LazySingleton.class);
- } else {
- binder.bind(Key.get(ServiceAnnouncer.Noop.class, Names.named(NAME))).toInstance(new ServiceAnnouncer.Noop());
- binder.bind(ServiceAnnouncer.class)
- .to(Key.get(ServiceAnnouncer.Noop.class, Names.named(NAME)))
- .in(LazySingleton.class);
- }
-
- // internal discovery bindings.
PolyBind.createChoiceWithDefault(binder, INTERNAL_DISCOVERY_PROP, Key.get(DruidNodeAnnouncer.class), CURATOR_KEY);
PolyBind.createChoiceWithDefault(
@@ -233,318 +106,6 @@ public void configure(Binder binder)
.in(LazySingleton.class);
}
- @Provides
- @LazySingleton
- @Named(NAME)
- public CuratorServiceAnnouncer getServiceAnnouncer(
- final CuratorServiceAnnouncer announcer,
- final Injector injector,
- final Set> nodesToAnnounce,
- final Lifecycle lifecycle
- ) throws Exception
- {
- lifecycle.addMaybeStartHandler(
- new Lifecycle.Handler()
- {
- private volatile List nodes = null;
-
- @Override
- public void start()
- {
- if (nodes == null) {
- nodes = new ArrayList<>();
- for (KeyHolder holder : nodesToAnnounce) {
- nodes.add(injector.getInstance(holder.getKey()));
- }
- }
-
- for (DruidNode node : nodes) {
- announcer.announce(node);
- }
- }
-
- @Override
- public void stop()
- {
- if (nodes != null) {
- for (DruidNode node : nodes) {
- announcer.unannounce(node);
- }
- }
- }
- },
- Lifecycle.Stage.ANNOUNCEMENTS
- );
-
- return announcer;
- }
-
- @Provides
- @LazySingleton
- public ServiceDiscovery getServiceDiscovery(
- CuratorFramework curator,
- CuratorDiscoveryConfig config,
- Lifecycle lifecycle
- ) throws Exception
- {
- if (!config.useDiscovery()) {
- return new NoopServiceDiscovery<>();
- }
-
- final ServiceDiscovery serviceDiscovery =
- ServiceDiscoveryBuilder.builder(Void.class)
- .basePath(config.getPath())
- .client(curator)
- .build();
-
- lifecycle.addMaybeStartHandler(
- new Lifecycle.Handler()
- {
- @Override
- public void start() throws Exception
- {
- serviceDiscovery.start();
- }
-
- @Override
- public void stop()
- {
- try {
- serviceDiscovery.close();
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
- }
- );
-
- return serviceDiscovery;
- }
-
- @Provides
- @LazySingleton
- public ServerDiscoveryFactory getServerDiscoveryFactory(
- ServiceDiscovery serviceDiscovery
- )
- {
- return new ServerDiscoveryFactory(serviceDiscovery);
- }
-
- private static class NoopServiceDiscovery implements ServiceDiscovery
- {
- @Override
- public void start()
- {
-
- }
-
- @Override
- public void registerService(ServiceInstance service)
- {
-
- }
-
- @Override
- public void updateService(ServiceInstance service)
- {
-
- }
-
- @Override
- public void unregisterService(ServiceInstance service)
- {
-
- }
-
- @Override
- public ServiceCacheBuilder serviceCacheBuilder()
- {
- return new NoopServiceCacheBuilder<>();
- }
-
- @Override
- public Collection queryForNames()
- {
- return ImmutableList.of();
- }
-
- @Override
- public Collection> queryForInstances(String name)
- {
- return ImmutableList.of();
- }
-
- @Override
- public ServiceInstance queryForInstance(String name, String id)
- {
- return null;
- }
-
- @Override
- public ServiceProviderBuilder serviceProviderBuilder()
- {
- return new NoopServiceProviderBuilder<>();
- }
-
- @Override
- public void close()
- {
-
- }
- }
-
- private static class NoopServiceCacheBuilder implements ServiceCacheBuilder
- {
- @Override
- public ServiceCache build()
- {
- return new NoopServiceCache<>();
- }
-
- @Override
- public ServiceCacheBuilder name(String name)
- {
- return this;
- }
-
- @Override
- public ServiceCacheBuilder threadFactory(ThreadFactory threadFactory)
- {
- return this;
- }
-
- @Override
- public ServiceCacheBuilder executorService(ExecutorService executorService)
- {
- return this;
- }
-
- private static class NoopServiceCache implements ServiceCache
- {
- @Override
- public List> getInstances()
- {
- return ImmutableList.of();
- }
-
- @Override
- public void start()
- {
- // nothing
- }
-
- @Override
- public CountDownLatch startImmediate()
- {
- return null;
- }
-
- @Override
- public void close()
- {
- // nothing
- }
-
- @Override
- public void addListener(ServiceCacheListener listener)
- {
- // nothing
- }
-
- @Override
- public void addListener(ServiceCacheListener listener, Executor executor)
- {
- // nothing
- }
-
- @Override
- public void removeListener(ServiceCacheListener listener)
- {
- // nothing
- }
- }
- }
-
- private static class NoopServiceProviderBuilder implements ServiceProviderBuilder
- {
- @Override
- public ServiceProvider build()
- {
- return new NoopServiceProvider<>();
- }
-
- @Override
- public ServiceProviderBuilder serviceName(String serviceName)
- {
- return this;
- }
-
- @Override
- public ServiceProviderBuilder providerStrategy(ProviderStrategy providerStrategy)
- {
- return this;
- }
-
- @Override
- public ServiceProviderBuilder threadFactory(ThreadFactory threadFactory)
- {
- return this;
- }
-
- @Override
- public ServiceProviderBuilder downInstancePolicy(DownInstancePolicy downInstancePolicy)
- {
- return this;
- }
-
- @Override
- @SuppressForbidden(reason = "org.apache.curator.shaded.com.google.common.base.Predicate")
- public ServiceProviderBuilder additionalFilter(InstanceFilter tInstanceFilter)
- {
- return this;
- }
-
- @Override
- public ServiceProviderBuilder executorService(ExecutorService executorService)
- {
- return this;
- }
- }
-
- private static class NoopServiceProvider implements ServiceProvider
- {
- @Override
- public void start()
- {
- // nothing
- }
-
- @Override
- public ServiceInstance getInstance()
- {
- return null;
- }
-
- @Override
- public Collection> getAllInstances()
- {
- return Collections.emptyList();
- }
-
- @Override
- public void noteError(ServiceInstance tServiceInstance)
- {
- // nothing
- }
-
- @Override
- public void close()
- {
- // nothing
- }
- }
-
private static class DruidLeaderSelectorProvider implements Provider
{
@Inject
diff --git a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoveryFactory.java b/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoveryFactory.java
deleted file mode 100644
index ca3cba0132ca..000000000000
--- a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoveryFactory.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.curator.discovery;
-
-import com.google.inject.Inject;
-import org.apache.curator.x.discovery.ServiceDiscovery;
-import org.apache.curator.x.discovery.ServiceInstance;
-import org.apache.curator.x.discovery.ServiceProvider;
-
-import java.util.Collection;
-
-/**
- * Use {@link org.apache.druid.discovery.DruidNodeDiscovery} for discovery.
- */
-@Deprecated
-public class ServerDiscoveryFactory
-{
- private final ServiceDiscovery serviceDiscovery;
-
- @Inject
- public ServerDiscoveryFactory(
- ServiceDiscovery serviceDiscovery
- )
- {
- this.serviceDiscovery = serviceDiscovery;
- }
-
- public ServerDiscoverySelector createSelector(String serviceName)
- {
- if (serviceName == null) {
- return new ServerDiscoverySelector(new NoopServiceProvider(), serviceName);
- }
-
- final ServiceProvider serviceProvider = serviceDiscovery
- .serviceProviderBuilder()
- .serviceName(CuratorServiceUtils.makeCanonicalServiceName(serviceName))
- .build();
- return new ServerDiscoverySelector(serviceProvider, serviceName);
- }
-
- private static class NoopServiceProvider implements ServiceProvider
- {
- @Override
- public void start()
- {
- // do nothing
- }
-
- @Override
- public ServiceInstance getInstance()
- {
- return null;
- }
-
- @Override
- public Collection> getAllInstances()
- {
- return null;
- }
-
- @Override
- public void noteError(ServiceInstance tServiceInstance)
- {
- // do nothing
- }
-
- @Override
- public void close()
- {
- // do nothing
- }
- }
-
-}
diff --git a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoverySelector.java b/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoverySelector.java
deleted file mode 100644
index 84f4fe81ce0f..000000000000
--- a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoverySelector.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.curator.discovery;
-
-import com.google.common.base.Function;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Collections2;
-import com.google.common.net.HostAndPort;
-import org.apache.curator.x.discovery.ServiceInstance;
-import org.apache.curator.x.discovery.ServiceProvider;
-import org.apache.druid.client.selector.DiscoverySelector;
-import org.apache.druid.client.selector.Server;
-import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
-import org.apache.druid.java.util.common.lifecycle.LifecycleStop;
-import org.apache.druid.java.util.common.logger.Logger;
-
-import javax.annotation.Nullable;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-
-/**
- * Use {@link org.apache.druid.discovery.DruidNodeDiscovery} for discovery.
- */
-@Deprecated
-public class ServerDiscoverySelector implements DiscoverySelector
-{
- private static final Logger log = new Logger(ServerDiscoverySelector.class);
-
- private final ServiceProvider serviceProvider;
- private final String name;
-
- public ServerDiscoverySelector(ServiceProvider serviceProvider, String name)
- {
- this.serviceProvider = serviceProvider;
- this.name = name;
- }
-
- private static final Function TO_SERVER = new Function<>()
- {
- @Override
- public Server apply(final ServiceInstance instance)
- {
- Preconditions.checkState(
- instance.getPort() >= 0 || (instance.getSslPort() != null && instance.getSslPort() >= 0),
- "Both port and sslPort not set"
- );
- final int port;
- final String scheme;
- if (instance.getSslPort() == null) {
- port = instance.getPort();
- scheme = "http";
- } else {
- port = instance.getSslPort() >= 0 ? instance.getSslPort() : instance.getPort();
- scheme = instance.getSslPort() >= 0 ? "https" : "http";
- }
- return new Server()
- {
- @Override
- public String getHost()
- {
- return HostAndPort.fromParts(getAddress(), getPort()).toString();
- }
-
- @Override
- public String getAddress()
- {
- return instance.getAddress();
- }
-
- @Override
- public int getPort()
- {
- return port;
- }
-
- @Override
- public String getScheme()
- {
- return scheme;
- }
- };
- }
- };
-
- @Nullable
- @Override
- public Server pick()
- {
- final ServiceInstance instance;
- try {
- instance = serviceProvider.getInstance();
- }
- catch (Exception e) {
- log.info(e, "Exception getting instance for [%s]", name);
- return null;
- }
-
- if (instance == null) {
- log.error("No server instance found for [%s]", name);
- return null;
- }
-
- return TO_SERVER.apply(instance);
- }
-
- public Collection getAll()
- {
- try {
- return Collections2.transform(serviceProvider.getAllInstances(), TO_SERVER);
- }
- catch (Exception e) {
- log.info(e, "Unable to get all instances");
- return Collections.emptyList();
- }
- }
-
- @LifecycleStart
- public void start() throws Exception
- {
- serviceProvider.start();
- }
-
- @LifecycleStop
- public void stop() throws IOException
- {
- serviceProvider.close();
- }
-}
diff --git a/server/src/main/java/org/apache/druid/curator/discovery/ServiceAnnouncer.java b/server/src/main/java/org/apache/druid/curator/discovery/ServiceAnnouncer.java
deleted file mode 100644
index 1533422824eb..000000000000
--- a/server/src/main/java/org/apache/druid/curator/discovery/ServiceAnnouncer.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.curator.discovery;
-
-import org.apache.druid.server.DruidNode;
-
-/**
- * This class is deprecated, Add service to {@link org.apache.druid.discovery.DruidNodeAnnouncer} node announcement instead.
- *
- * Announces our ability to serve a particular function. Multiple users may announce the same service, in which
- * case they are treated as interchangeable instances of that service.
- */
-@Deprecated
-public interface ServiceAnnouncer
-{
- void announce(DruidNode node);
-
- void unannounce(DruidNode node);
-
- class Noop implements ServiceAnnouncer
- {
-
- @Override
- public void announce(DruidNode node)
- {
-
- }
-
- @Override
- public void unannounce(DruidNode node)
- {
-
- }
- }
-}
diff --git a/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java b/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java
index e48822afab6c..2476c7d9af92 100644
--- a/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java
+++ b/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java
@@ -27,7 +27,6 @@
import org.apache.druid.curator.announcement.NodeAnnouncer;
import org.apache.druid.curator.announcement.PathChildrenAnnouncer;
import org.apache.druid.curator.announcement.ServiceAnnouncer;
-import org.apache.druid.guice.annotations.DirectExecutorAnnouncer;
import org.apache.druid.guice.annotations.SingleThreadedAnnouncer;
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.server.coordination.BatchDataSegmentAnnouncer;
@@ -57,16 +56,4 @@ public ServiceAnnouncer getAnnouncerWithSingleThreadedExecutorService(CuratorFra
}
}
- @Provides
- @DirectExecutorAnnouncer
- @ManageLifecycleAnnouncements
- public ServiceAnnouncer getAnnouncerWithDirectExecutorService(CuratorFramework curator, CuratorConfig config)
- {
- boolean usingPathChildrenCacheAnnouncer = config.getPathChildrenCacheStrategy();
- if (usingPathChildrenCacheAnnouncer) {
- return new PathChildrenAnnouncer(curator, Execs.directExecutor());
- } else {
- return new NodeAnnouncer(curator, Execs.directExecutor());
- }
- }
}
diff --git a/server/src/main/java/org/apache/druid/guice/DruidBinders.java b/server/src/main/java/org/apache/druid/guice/DruidBinders.java
index 86424af74581..5c969d4ddf6b 100644
--- a/server/src/main/java/org/apache/druid/guice/DruidBinders.java
+++ b/server/src/main/java/org/apache/druid/guice/DruidBinders.java
@@ -32,7 +32,6 @@
import org.apache.druid.query.QueryToolChest;
import org.apache.druid.segment.SegmentWrangler;
import org.apache.druid.segment.join.JoinableFactory;
-import org.apache.druid.server.DruidNode;
import java.lang.annotation.Annotation;
import java.util.Set;
@@ -134,11 +133,6 @@ public QueryBinder naiveBinding(
}
}
- public static Multibinder> discoveryAnnouncementBinder(Binder binder)
- {
- return Multibinder.newSetBinder(binder, new TypeLiteral<>() {});
- }
-
public static Multibinder> metricMonitorBinder(Binder binder)
{
return Multibinder.newSetBinder(binder, new TypeLiteral<>() {});
diff --git a/server/src/main/java/org/apache/druid/guice/annotations/DirectExecutorAnnouncer.java b/server/src/main/java/org/apache/druid/guice/annotations/DirectExecutorAnnouncer.java
deleted file mode 100644
index 0d675469222b..000000000000
--- a/server/src/main/java/org/apache/druid/guice/annotations/DirectExecutorAnnouncer.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.guice.annotations;
-
-import com.google.inject.BindingAnnotation;
-
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
-
-@BindingAnnotation
-@Target({ElementType.FIELD, ElementType.PARAMETER, ElementType.METHOD})
-@Retention(RetentionPolicy.RUNTIME)
-public @interface DirectExecutorAnnouncer
-{
-}
diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java b/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java
index 81fccf19f131..2b1ad6a555a7 100644
--- a/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java
+++ b/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java
@@ -114,6 +114,12 @@ public ListenableFuture