diff --git a/.github/workflows/unit-and-integration-tests-unified.yml b/.github/workflows/unit-and-integration-tests-unified.yml index b5860b494557..e960493dba31 100644 --- a/.github/workflows/unit-and-integration-tests-unified.yml +++ b/.github/workflows/unit-and-integration-tests-unified.yml @@ -64,4 +64,4 @@ jobs: runs-on: ubuntu-latest if: ${{ !cancelled() }} steps: - - uses: Kesin11/actions-timeline@54d513e0b5ff1158f1cf8321108d666a5a6c1fca + - uses: Kesin11/actions-timeline@44c9c178ffb2fb1d9859614a3ffa79ccfb77565e diff --git a/benchmarks/pom.xml b/benchmarks/pom.xml index 429b9323777a..c02c2166f115 100644 --- a/benchmarks/pom.xml +++ b/benchmarks/pom.xml @@ -244,6 +244,26 @@ + + + org.apache.maven.plugins + maven-compiler-plugin + 3.14.1 + true + + ${maven.compiler.release} + + org.openjdk.jmh.generators.BenchmarkProcessor + + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + + + org.apache.maven.plugins maven-assembly-plugin diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java index 6b36d72c672e..777dd297ecb9 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java @@ -19,29 +19,87 @@ package org.apache.druid.benchmark; +import com.fasterxml.jackson.databind.InjectableValues; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.druid.client.cache.CacheConfig; +import org.apache.druid.client.cache.CachePopulatorStats; +import org.apache.druid.client.cache.MapCache; import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.guice.BuiltInTypesModule; +import org.apache.druid.indexer.granularity.UniformGranularitySpec; +import org.apache.druid.jackson.AggregatorsModule; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.java.util.emitter.core.LoggingEmitter; import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.DefaultGenericQueryMetricsFactory; +import org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate; import org.apache.druid.query.Druids; +import org.apache.druid.query.ForwardingQueryProcessingPool; +import org.apache.druid.query.Query; import org.apache.druid.query.QueryPlus; -import org.apache.druid.query.Result; +import org.apache.druid.query.QueryRunnerFactory; +import org.apache.druid.query.QueryRunnerFactoryConglomerate; +import org.apache.druid.query.QueryRunnerTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.context.ResponseContext; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTest; +import org.apache.druid.query.groupby.TestGroupByBuffers; +import org.apache.druid.query.metadata.SegmentMetadataQueryConfig; +import org.apache.druid.query.metadata.SegmentMetadataQueryQueryToolChest; +import org.apache.druid.query.metadata.SegmentMetadataQueryRunnerFactory; +import org.apache.druid.query.metadata.metadata.ListColumnIncluderator; +import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.apache.druid.query.policy.NoopPolicyEnforcer; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.scan.ScanQueryConfig; +import org.apache.druid.query.scan.ScanQueryEngine; +import org.apache.druid.query.scan.ScanQueryQueryToolChest; +import org.apache.druid.query.scan.ScanQueryRunnerFactory; +import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.query.timeseries.TimeseriesQuery; -import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.query.timeseries.TimeseriesQueryEngine; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.IndexMerger; +import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.incremental.ParseExceptionHandler; +import org.apache.druid.segment.incremental.RowIngestionMeters; +import org.apache.druid.segment.incremental.SimpleRowIngestionMeters; +import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.indexing.TuningConfig; +import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.segment.realtime.appenderator.Appenderator; +import org.apache.druid.segment.realtime.appenderator.AppenderatorConfig; +import org.apache.druid.segment.realtime.appenderator.Appenderators; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; -import org.apache.druid.segment.realtime.appenderator.StreamAppenderatorTester; +import org.apache.druid.segment.realtime.appenderator.TestAppenderatorConfig; import org.apache.druid.segment.realtime.sink.Committers; +import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import org.apache.druid.server.coordination.NoopDataSegmentAnnouncer; +import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.LinearShardSpec; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -59,8 +117,11 @@ import org.openjdk.jmh.infra.Blackhole; import java.io.File; +import java.net.URI; import java.util.Arrays; import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; @State(Scope.Benchmark) @@ -71,6 +132,18 @@ @OutputTimeUnit(TimeUnit.MILLISECONDS) public class SinkQuerySegmentWalkerBenchmark { + private static final String DATASOURCE = "foo"; + private static final List QUERY_COLUMNS = ImmutableList.of("__time", "dim", "count", "met"); + private static final MultipleIntervalSegmentSpec QUERY_INTERVALS = + new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2000/2001"))); + private static final String SET_PROCESSING_THREAD_NAMES = "setProcessingThreadNames"; + + @Param({"timeseries", "scan", "segmentMetadata", "groupBy"}) + private String queryType; + + @Param({"false", "true"}) + private boolean setProcessingThreadNames; + @Param({"10", "50", "100", "200"}) private int numFireHydrants; @@ -78,24 +151,66 @@ public class SinkQuerySegmentWalkerBenchmark private final ServiceEmitter serviceEmitter = new ServiceEmitter("test", "test", loggingEmitter); private File cacheDir; + private ExecutorService queryExecutor; private Appenderator appenderator; + private TestGroupByBuffers groupByBuffers; @Setup(Level.Trial) public void setup() throws Exception { final String userConfiguredCacheDir = System.getProperty("druid.benchmark.cacheDir", System.getenv("DRUID_BENCHMARK_CACHE_DIR")); cacheDir = new File(userConfiguredCacheDir); - final StreamAppenderatorTester tester = - new StreamAppenderatorTester.Builder().maxRowsInMemory(1) - .basePersistDirectory(cacheDir) - .withServiceEmitter(serviceEmitter) - .build(); + FileUtils.deleteDirectory(cacheDir); + final ObjectMapper objectMapper = makeObjectMapper(); + final IndexIO indexIO = new IndexIO( + objectMapper, + new ColumnConfig() + { + } + ); + final IndexMergerV9 indexMerger = new IndexMergerV9( + objectMapper, + indexIO, + OffHeapMemorySegmentWriteOutMediumFactory.instance() + ); + final DataSchema schema = makeDataSchema(); + final RowIngestionMeters rowIngestionMeters = new SimpleRowIngestionMeters(); + final AppenderatorConfig tuningConfig = makeTuningConfig(); + + queryExecutor = Execs.singleThreaded("queryExecutor(%d)"); + groupByBuffers = TestGroupByBuffers.createDefault(); - appenderator = tester.getAppenderator(); + serviceEmitter.start(); + EmittingLogger.registerEmitter(serviceEmitter); + + final QueryRunnerFactoryConglomerate conglomerate = makeQueryRunnerFactoryConglomerate(); + appenderator = Appenderators.createRealtime( + null, + schema.getDataSource(), + schema, + tuningConfig, + new SegmentGenerationMetrics(), + makeDataSegmentPusher(), + objectMapper, + indexIO, + indexMerger, + conglomerate, + new NoopDataSegmentAnnouncer(), + serviceEmitter, + new ForwardingQueryProcessingPool(queryExecutor), + MapCache.create(2048), + new CacheConfig(), + new CachePopulatorStats(), + NoopPolicyEnforcer.instance(), + rowIngestionMeters, + new ParseExceptionHandler(rowIngestionMeters, false, Integer.MAX_VALUE, 0), + CentralizedDatasourceSchemaConfig.create(), + interval -> {} + ); appenderator.startJob(); final SegmentIdWithShardSpec segmentIdWithShardSpec = new SegmentIdWithShardSpec( - StreamAppenderatorTester.DATASOURCE, + DATASOURCE, Intervals.of("2000/2001"), "A", new LinearShardSpec(0) @@ -119,33 +234,214 @@ public void setup() throws Exception @TearDown(Level.Trial) public void tearDown() throws Exception { - appenderator.close(); - FileUtils.deleteDirectory(cacheDir); + try { + if (appenderator != null) { + appenderator.close(); + } + } + finally { + if (queryExecutor != null) { + queryExecutor.shutdownNow(); + } + try { + if (groupByBuffers != null) { + groupByBuffers.close(); + } + } + finally { + FileUtils.deleteDirectory(cacheDir); + } + } } @Benchmark @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void emitSinkMetrics(Blackhole blackhole) throws Exception + public void runSinkQuery(Blackhole blackhole) throws Exception { - { - final TimeseriesQuery query1 = Druids.newTimeseriesQueryBuilder() - .dataSource(StreamAppenderatorTester.DATASOURCE) - .intervals(ImmutableList.of(Intervals.of("2000/2001"))) - .aggregators( - Arrays.asList( - new LongSumAggregatorFactory("count", "count"), - new LongSumAggregatorFactory("met", "met") - ) - ) - .granularity(Granularities.DAY) - .build(); - - final List> results = - QueryPlus.wrap(query1).run(appenderator, ResponseContext.createEmpty()).toList(); - blackhole.consume(results); - - serviceEmitter.flush(); + final Query query = makeQuery(); + final List results = QueryPlus.wrap(query).run(appenderator, ResponseContext.createEmpty()).toList(); + blackhole.consume(results); + + serviceEmitter.flush(); + } + + private Query makeQuery() + { + switch (queryType) { + case "timeseries": + return makeTimeseriesQuery(); + case "scan": + return makeScanQuery(); + case "segmentMetadata": + return makeSegmentMetadataQuery(); + case "groupBy": + return makeGroupByQuery(); + default: + throw new IllegalStateException("Unsupported query type[" + queryType + "]"); } } + + private QueryRunnerFactoryConglomerate makeQueryRunnerFactoryConglomerate() + { + return DefaultQueryRunnerFactoryConglomerate.buildFromQueryRunnerFactories( + ImmutableMap., QueryRunnerFactory>builder() + .put( + TimeseriesQuery.class, + new TimeseriesQueryRunnerFactory( + new TimeseriesQueryQueryToolChest(), + new TimeseriesQueryEngine(), + QueryRunnerTestHelper.NOOP_QUERYWATCHER + ) + ) + .put( + ScanQuery.class, + new ScanQueryRunnerFactory( + new ScanQueryQueryToolChest(DefaultGenericQueryMetricsFactory.instance()), + new ScanQueryEngine(), + new ScanQueryConfig() + ) + ) + .put( + SegmentMetadataQuery.class, + new SegmentMetadataQueryRunnerFactory( + new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()), + QueryRunnerTestHelper.NOOP_QUERYWATCHER + ) + ) + .put( + GroupByQuery.class, + GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig(), groupByBuffers) + ) + .build() + ); + } + + private TimeseriesQuery makeTimeseriesQuery() + { + return Druids.newTimeseriesQueryBuilder() + .dataSource(DATASOURCE) + .intervals(QUERY_INTERVALS) + .aggregators(makeAggregators()) + .granularity(Granularities.DAY) + .context(makeQueryContext()) + .build(); + } + + private ScanQuery makeScanQuery() + { + return Druids.newScanQueryBuilder() + .dataSource(DATASOURCE) + .intervals(QUERY_INTERVALS) + .columns(QUERY_COLUMNS) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(makeQueryContext()) + .build(); + } + + private SegmentMetadataQuery makeSegmentMetadataQuery() + { + return Druids.newSegmentMetadataQueryBuilder() + .dataSource(DATASOURCE) + .intervals(QUERY_INTERVALS) + .toInclude(new ListColumnIncluderator(QUERY_COLUMNS)) + .analysisTypes( + SegmentMetadataQuery.AnalysisType.CARDINALITY, + SegmentMetadataQuery.AnalysisType.SIZE, + SegmentMetadataQuery.AnalysisType.INTERVAL, + SegmentMetadataQuery.AnalysisType.MINMAX, + SegmentMetadataQuery.AnalysisType.AGGREGATORS + ) + .merge(true) + .context(makeQueryContext()) + .build(); + } + + private GroupByQuery makeGroupByQuery() + { + return GroupByQuery.builder() + .setDataSource(DATASOURCE) + .setInterval("2000/2001") + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(makeAggregators()) + .setContext(makeQueryContext()) + .build(); + } + + private List makeAggregators() + { + return Arrays.asList( + new LongSumAggregatorFactory("count", "count"), + new LongSumAggregatorFactory("met", "met") + ); + } + + private Map makeQueryContext() + { + return ImmutableMap.of(SET_PROCESSING_THREAD_NAMES, setProcessingThreadNames); + } + + private static ObjectMapper makeObjectMapper() + { + final ObjectMapper objectMapper = new DefaultObjectMapper(); + objectMapper.registerSubtypes(LinearShardSpec.class); + objectMapper.registerModules(new AggregatorsModule()); + objectMapper.registerModules(new BuiltInTypesModule().getJacksonModules()); + objectMapper.setInjectableValues( + new InjectableValues.Std() + .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE) + .addValue(ObjectMapper.class.getName(), objectMapper) + ); + return objectMapper; + } + + private static DataSchema makeDataSchema() + { + return DataSchema.builder() + .withDataSource(DATASOURCE) + .withTimestamp(new TimestampSpec("ts", "auto", null)) + .withDimensions(DimensionsSpec.EMPTY) + .withAggregators( + new CountAggregatorFactory("count"), + new LongSumAggregatorFactory("met", "met") + ) + .withGranularity(new UniformGranularitySpec(Granularities.MINUTE, Granularities.NONE, null)) + .build(); + } + + private AppenderatorConfig makeTuningConfig() + { + return new TestAppenderatorConfig( + TuningConfig.DEFAULT_APPENDABLE_INDEX, + 1, + Runtime.getRuntime().totalMemory() / 3, + false, + IndexSpec.getDefault(), + 0, + false, + 0L, + OffHeapMemorySegmentWriteOutMediumFactory.instance(), + IndexMerger.UNLIMITED_MAX_COLUMNS_TO_MERGE, + cacheDir, + false + ); + } + + private static DataSegmentPusher makeDataSegmentPusher() + { + return new DataSegmentPusher() + { + @Override + public DataSegment push(File file, DataSegment segment, boolean useUniquePath) + { + return segment; + } + + @Override + public Map makeLoadSpec(URI uri) + { + throw new UnsupportedOperationException(); + } + }; + } } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index 8988973f9825..0ef6395a1fce 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -21,12 +21,15 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.druid.math.expr.ExpressionProcessing; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Warmup; @@ -171,6 +174,9 @@ public class SqlExpressionBenchmark extends SqlBaseQueryBenchmark }) private String deferExpressionDimensions; + @Param({"false", "true"}) + private boolean useVectorApi; + @Param({ // non-expression reference "0", @@ -238,6 +244,16 @@ public class SqlExpressionBenchmark extends SqlBaseQueryBenchmark }) private String query; + @Setup(Level.Trial) + public void setupExpressionProcessing() + { + if (useVectorApi) { + ExpressionProcessing.initializeForVectorApiTests(); + } else { + ExpressionProcessing.initializeForTests(); + } + } + @Override public String getQuery() { diff --git a/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientConfig.java b/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientConfig.java index e8d299cbd851..9fdf9ba592f7 100644 --- a/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientConfig.java +++ b/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientConfig.java @@ -19,7 +19,9 @@ package org.apache.druid.common.aws; +import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.utils.RuntimeInfo; import javax.annotation.Nullable; @@ -31,7 +33,17 @@ public class AWSClientConfig private static final int DEFAULT_CONNECTION_TIMEOUT_MILLIS = 10_000; private static final int DEFAULT_SOCKET_TIMEOUT_MILLIS = 50_000; - private static final int DEFAULT_MAX_CONNECTIONS = 50; + /** AWS SDK v2's own default. */ + private static final int DEFAULT_MAX_CONNECTIONS_FLOOR = 50; + + /** + * Used by {@link #getMaxConnections} to scale the default connection pool with host size so hosts large enough to + * do a lot of concurrent deep-storage I/O (e.g. virtual-storage historicals fanning out on-demand loads to S3) + * aren't bottlenecked at the SDK's connection pool. The field initializer covers direct construction (no Jackson); + * Jackson overwrites with the injected {@link RuntimeInfo} during deserialization. + */ + @JacksonInject + private final RuntimeInfo runtimeInfo = new RuntimeInfo(); @JsonProperty private String protocol = "https"; // The default of aws-java-sdk @@ -60,8 +72,13 @@ public class AWSClientConfig @JsonProperty private int socketTimeout = DEFAULT_SOCKET_TIMEOUT_MILLIS; + /** + * Null means use the dynamic default in {@link #getMaxConnections} ({@code max(50, 4 × availableProcessors)}); + * any explicit value set in JSON wins. + */ @JsonProperty - private int maxConnections = DEFAULT_MAX_CONNECTIONS; + @Nullable + private Integer maxConnections = null; public String getProtocol() { @@ -123,7 +140,10 @@ public int getSocketTimeoutMillis() public int getMaxConnections() { - return maxConnections; + if (maxConnections != null) { + return maxConnections; + } + return Math.max(DEFAULT_MAX_CONNECTIONS_FLOOR, 4 * runtimeInfo.getAvailableProcessors()); } @Override @@ -136,7 +156,7 @@ public String toString() ", crossRegionAccessEnabled=" + isCrossRegionAccessEnabled() + ", connectionTimeout=" + connectionTimeout + ", socketTimeout=" + socketTimeout + - ", maxConnections=" + maxConnections + + ", maxConnections=" + getMaxConnections() + '}'; } } diff --git a/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientUtil.java b/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientUtil.java index 68fa9e25ca40..4f138e3a750f 100644 --- a/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientUtil.java +++ b/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientUtil.java @@ -62,6 +62,12 @@ public class AWSClientUtil "Throttling" ); + private static final String UNABLE_TO_LOAD_CREDENTIALS_FROM_PROVIDER_CHAIN = + "Unable to load credentials from any of the providers in the chain"; + private static final String FAILED_TO_LOAD_CREDENTIALS_FROM_IMDS = "Failed to load credentials from IMDS"; + private static final String CANNOT_REFRESH_AWS_CREDENTIALS = "cannot refresh AWS credentials"; + private static final String CANNOT_FETCH_CREDENTIALS_FROM_CONTAINER = "Cannot fetch credentials from container"; + /** * Checks whether an exception can be retried or not for AWS SDK v2. */ @@ -101,11 +107,15 @@ public static boolean isClientExceptionRecoverable(SdkException exception) // Check for SdkClientException specific messages if (exception instanceof SdkClientException) { - String message = exception.getMessage(); + final String message = exception.getMessage(); if (message != null) { if (message.contains("Unable to execute HTTP request") || message.contains("Data read has a different length than the expected") || - message.contains("Unable to find a region")) { + message.contains("Unable to find a region") || + message.contains(UNABLE_TO_LOAD_CREDENTIALS_FROM_PROVIDER_CHAIN) || + message.contains(FAILED_TO_LOAD_CREDENTIALS_FROM_IMDS) || + message.contains(CANNOT_REFRESH_AWS_CREDENTIALS) || + message.contains(CANNOT_FETCH_CREDENTIALS_FROM_CONTAINER)) { return true; } } diff --git a/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientConfigTest.java b/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientConfigTest.java index 4e8837566ca7..99b927efb2ba 100644 --- a/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientConfigTest.java +++ b/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientConfigTest.java @@ -19,13 +19,24 @@ package org.apache.druid.common.aws; +import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.utils.RuntimeInfo; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; public class AWSClientConfigTest { - private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final ObjectMapper MAPPER = new ObjectMapper().setInjectableValues( + new InjectableValues.Std().addValue(RuntimeInfo.class, new RuntimeInfo()) + ); + + private static ObjectMapper mapperWithRuntimeInfo(RuntimeInfo runtimeInfo) + { + return new ObjectMapper().setInjectableValues( + new InjectableValues.Std().addValue(RuntimeInfo.class, runtimeInfo) + ); + } @Test public void testDefaultCrossRegionAccessEnabled() throws Exception @@ -83,4 +94,44 @@ public void testDeprecatedNotSetFallsThroughToCrossRegion() throws Exception Assertions.assertNull(config.isForceGlobalBucketAccessEnabled()); Assertions.assertTrue(config.isCrossRegionAccessEnabled()); } + + @Test + public void testDefaultMaxConnectionsKeepsAwsSdkFloorOnSmallHost() throws Exception + { + AWSClientConfig config = mapperWithRuntimeInfo(new FixedProcessorsRuntimeInfo(8)) + .readValue("{}", AWSClientConfig.class); + Assertions.assertEquals(50, config.getMaxConnections()); + } + + @Test + public void testDefaultMaxConnectionsScalesWithCoresOnLargeHost() throws Exception + { + AWSClientConfig config = mapperWithRuntimeInfo(new FixedProcessorsRuntimeInfo(32)) + .readValue("{}", AWSClientConfig.class); + Assertions.assertEquals(128, config.getMaxConnections()); + } + + @Test + public void testExplicitMaxConnectionsOverridesDefault() throws Exception + { + AWSClientConfig config = mapperWithRuntimeInfo(new FixedProcessorsRuntimeInfo(64)) + .readValue("{\"maxConnections\": 200}", AWSClientConfig.class); + Assertions.assertEquals(200, config.getMaxConnections()); + } + + private static final class FixedProcessorsRuntimeInfo extends RuntimeInfo + { + private final int availableProcessors; + + private FixedProcessorsRuntimeInfo(int availableProcessors) + { + this.availableProcessors = availableProcessors; + } + + @Override + public int getAvailableProcessors() + { + return availableProcessors; + } + } } diff --git a/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientUtilTest.java b/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientUtilTest.java index 6f47e78471c9..535bfcc8b39e 100644 --- a/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientUtilTest.java +++ b/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientUtilTest.java @@ -89,6 +89,33 @@ public void testRecoverableException_ProvisionedThroughputExceededException() Assert.assertTrue(AWSClientUtil.isClientExceptionRecoverable(ex)); } + @Test + public void testRecoverableException_CredentialsProviderChain() + { + final SdkClientException ex = SdkClientException.builder() + .message("Unable to load credentials from any of the providers in the chain AwsCredentialsProviderChain") + .build(); + Assert.assertTrue(AWSClientUtil.isClientExceptionRecoverable(ex)); + } + + @Test + public void testRecoverableException_FileSessionCredentialsRefresh() + { + final SdkClientException ex = SdkClientException.builder() + .message("LazyFileSessionCredentialsProvider(): cannot refresh AWS credentials") + .build(); + Assert.assertTrue(AWSClientUtil.isClientExceptionRecoverable(ex)); + } + + @Test + public void testRecoverableException_InstanceProfileCredentials() + { + final SdkClientException ex = SdkClientException.builder() + .message("InstanceProfileCredentialsProvider(): Failed to load credentials from IMDS.") + .build(); + Assert.assertTrue(AWSClientUtil.isClientExceptionRecoverable(ex)); + } + @Test public void testRecoverableException_ClockSkewedError() { diff --git a/docs/api-reference/service-status-api.md b/docs/api-reference/service-status-api.md index 1ba8b55c4d78..1c192162d5d0 100644 --- a/docs/api-reference/service-status-api.md +++ b/docs/api-reference/service-status-api.md @@ -154,36 +154,6 @@ Host: http://ROUTER_IP:ROUTER_PORT "name": "org.apache.druid.query.aggregation.datasketches.kll.KllSketchModule", "artifact": "druid-datasketches", "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.MSQExternalDataSourceModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.MSQIndexingModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.MSQDurableStorageModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.MSQServiceClientModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.MSQSqlModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.SqlTaskModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" } ], "memory": { @@ -326,7 +296,7 @@ Host: http://ROUTER_IP:ROUTER_PORT "log4j.shutdownHookEnabled": "true", "java.vm.vendor": "Homebrew", "sun.arch.data.model": "64", - "druid.extensions.loadList": "[\"druid-hdfs-storage\", \"druid-kafka-indexing-service\", \"druid-datasketches\", \"druid-multi-stage-query\"]", + "druid.extensions.loadList": "[\"druid-hdfs-storage\", \"druid-kafka-indexing-service\", \"druid-datasketches\"]", "java.vendor.url": "https://github.com/Homebrew/homebrew-core/issues", "druid.router.coordinatorServiceName": "druid/coordinator", "user.timezone": "UTC", diff --git a/docs/api-reference/sql-ingestion-api.md b/docs/api-reference/sql-ingestion-api.md index 59942aff8e0c..9348291581e8 100644 --- a/docs/api-reference/sql-ingestion-api.md +++ b/docs/api-reference/sql-ingestion-api.md @@ -26,9 +26,8 @@ import TabItem from '@theme/TabItem'; --> :::info - This page describes SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md) - extension, new in Druid 24.0. Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which - ingestion method is right for you. + This page describes SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md). + Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you. ::: The **Query** view in the web console provides a friendly experience for the multi-stage query task engine (MSQ task engine) and multi-stage query architecture. We recommend using the web console if you don't need a programmatic interface. @@ -847,4 +846,4 @@ The response shows the ID of the task that was canceled. { "task": "query-655efe33-781a-4c50-ae84-c2911b42d63c" } -``` \ No newline at end of file +``` diff --git a/docs/api-reference/supervisor-api.md b/docs/api-reference/supervisor-api.md index d321af143020..8f9c5c36dc5c 100644 --- a/docs/api-reference/supervisor-api.md +++ b/docs/api-reference/supervisor-api.md @@ -3539,6 +3539,109 @@ when the supervisor's tasks restart, they resume reading from `{"0": 100, "1": 1 ``` +### Reset offsets to latest and start a backfill supervisor + +This endpoint is supported for Apache Kafka and RabbitMQ Stream supervisors. Amazon Kinesis is not supported yet. + +Resets the supervisor to the latest available stream offsets and starts a new bounded backfill supervisor to ingest the data in the skipped range. + +This endpoint is useful when a supervisor has fallen behind and you want to catch it up to the latest offsets without losing the skipped data. The main supervisor resumes ingesting from the latest offsets, while the backfill supervisor processes the range from the previously checkpointed offsets up to the latest offsets at the time of the reset. + +**Duplicate ingestion notice:** The main supervisor is not quiesced before the reset. This means duplicate data can occur in two ways: +- **Backfill overlap:** Any tasks that were in-flight at the time of the reset may publish segments covering part of the backfill range before being shut down. +- **Reset race:** If a task checkpoint is written to the metadata store between when this endpoint captures the current offsets and when it applies the reset, that checkpoint can be overwritten, causing the main supervisor to re-ingest already-processed data. + +Both windows are narrow in practice, but cannot be fully eliminated without manually suspending the main supervisor before calling this endpoint and waiting for all pending tasks to complete. + +The following requirements must be met before calling this endpoint: + +- The supervisor must be a [streaming supervisor](../ingestion/supervisor.md). +- The supervisor's `useEarliestSequenceNumber` property must be `false`. +- The supervisor context must have `useConcurrentLocks` set to `true` to allow the backfill supervisor's tasks to write concurrently with the main supervisor's tasks. +- The supervisor must be in a `RUNNING` state. + +The backfill supervisor has the same configuration as the source supervisor except for its ID, which takes the form `{supervisorId}_backfill_{randomSuffix}`, and its `boundedStreamConfig`, which is set to the skipped offset range. If `backfillTaskCount` is specified, it overrides the `taskCount` for the backfill supervisor only. + +#### URL + +`POST` `/druid/indexer/v1/supervisor/{supervisorId}/resetToLatestAndBackfill` + +#### Query parameters + +| Parameter | Type | Description | Default | +|---------|---------|---------|---------| +| `backfillTaskCount` | Integer | Number of parallel tasks for the backfill supervisor. | Defaults to `taskCount` from the source supervisor if not specified | + +#### Responses + + + + + + +*Successfully reset and started backfill supervisor* + + + + + +*Supervisor does not meet requirements (wrong type, `useEarliestSequenceNumber` is true, `useConcurrentLocks` not enabled, or supervisor not RUNNING)* + + + + + +*Invalid supervisor ID* + + + + + +*Failed to retrieve stream offsets or serialize the backfill spec* + + + + +--- + +#### Sample request + +The following example resets a supervisor named `social_media` and starts a backfill supervisor with 2 tasks. + + + + + + +```shell +curl --request POST "http://ROUTER_IP:ROUTER_PORT/druid/indexer/v1/supervisor/social_media/resetToLatestAndBackfill?backfillTaskCount=2" +``` + + + + + +```HTTP +POST /druid/indexer/v1/supervisor/social_media/resetToLatestAndBackfill?backfillTaskCount=2 HTTP/1.1 +Host: http://ROUTER_IP:ROUTER_PORT +``` + + + + +#### Sample response + +
+ View the response + + ```json +{ + "id": "social_media", + "backfillSupervisorId": "social_media_backfill_abcdefgh" +} + ``` +
+ ### Terminate a supervisor Terminates a supervisor and its associated indexing tasks, triggering the publishing of their segments. When you terminate a supervisor, Druid places a tombstone marker in the metadata store to prevent reloading on restart. diff --git a/docs/configuration/extensions.md b/docs/configuration/extensions.md index 6c802739fc4b..31f1a5b62b29 100644 --- a/docs/configuration/extensions.md +++ b/docs/configuration/extensions.md @@ -50,7 +50,6 @@ Core extensions are maintained by Druid committers. |druid-kerberos|Kerberos authentication for druid processes.|[link](../development/extensions-core/druid-kerberos.md)| |druid-lookups-cached-global|A module for [lookups](../querying/lookups.md) providing a jvm-global eager caching for lookups. It provides JDBC and URI implementations for fetching lookup data.|[link](../querying/lookups-cached-global.md)| |druid-lookups-cached-single| Per lookup caching module to support the use cases where a lookup need to be isolated from the global pool of lookups |[link](../development/extensions-core/druid-lookups.md)| -|druid-multi-stage-query| Support for the multi-stage query architecture for Apache Druid and the multi-stage query task engine.|[link](../multi-stage-query/index.md)| |druid-orc-extensions|Support for data in Apache ORC data format.|[link](../development/extensions-core/orc.md)| |druid-parquet-extensions|Support for data in Apache Parquet data format. Requires druid-avro-extensions to be loaded.|[link](../development/extensions-core/parquet.md)| |druid-protobuf-extensions| Support for data in Protobuf data format.|[link](../development/extensions-core/protobuf.md)| diff --git a/docs/configuration/index.md b/docs/configuration/index.md index f0b80523c401..aad78964f199 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -156,24 +156,6 @@ Druid interacts with ZooKeeper through a set of standard path configurations. We |`druid.zk.paths.base`|Base ZooKeeper path.|`/druid`| |`druid.zk.paths.coordinatorPath`|Used by the Coordinator for leader election.|`${druid.zk.paths.base}/coordinator`| -The indexing service also uses its own set of paths. These configs can be included in the common configuration. - -|Property|Description|Default| -|--------|-----------|-------| -|`druid.zk.paths.indexer.base`|Base ZooKeeper path for |`${druid.zk.paths.base}/indexer`| -|`druid.zk.paths.indexer.announcementsPath`|Middle Managers announce themselves here.|`${druid.zk.paths.indexer.base}/announcements`| -|`druid.zk.paths.indexer.tasksPath`|Used to assign tasks to Middle Managers.|`${druid.zk.paths.indexer.base}/tasks`| -|`druid.zk.paths.indexer.statusPath`|Parent path for announcement of task statuses.|`${druid.zk.paths.indexer.base}/status`| - -If `druid.zk.paths.base` and `druid.zk.paths.indexer.base` are both set, and none of the other `druid.zk.paths.*` or `druid.zk.paths.indexer.*` values are set, then the other properties will be evaluated relative to their respective `base`. -For example, if `druid.zk.paths.base` is set to `/druid1` and `druid.zk.paths.indexer.base` is set to `/druid2` then `druid.zk.paths.coordinatorPath` will default to `/druid1/coordinator` while `druid.zk.paths.indexer.announcementsPath` will default to `/druid2/announcements`. - -The following path is used for service discovery. It is **not** affected by `druid.zk.paths.base` and **must** be specified separately. - -|Property|Description|Default| -|--------|-----------|-------| -|`druid.discovery.curator.path`|Services announce themselves under this ZooKeeper path.|`/druid/discovery`| - ### TLS #### General configuration @@ -724,8 +706,8 @@ These Coordinator static configurations can be defined in the `coordinator/runti |`druid.coordinator.period`|The run period for the Coordinator. The Coordinator operates by maintaining the current state of the world in memory and periodically looking at the set of "used" segments and segments being served to make decisions about whether any changes need to be made to the data topology. This property sets the delay between each of these runs.|`PT60S`| |`druid.coordinator.startDelay`|The operation of the Coordinator works on the assumption that it has an up-to-date view of the state of the world when it runs, the current ZooKeeper interaction code, however, is written in a way that doesn’t allow the Coordinator to know for a fact that it’s done loading the current state of the world. This delay is a hack to give it enough time to believe that it has all the data.|`PT300S`| |`druid.coordinator.load.timeout`|The timeout duration for when the Coordinator assigns a segment to a Historical service.|`PT15M`| -|`druid.coordinator.balancer.strategy`|The [balancing strategy](../design/coordinator.md#balancing-segments-in-a-tier) used by the Coordinator to distribute segments among the Historical servers in a tier. The `cost` strategy distributes segments by minimizing a cost function, `diskNormalized` weights these costs with the disk usage ratios of the servers and `random` distributes segments randomly.|`cost`| -|`druid.coordinator.balancer.diskNormalized.moveCostSavingsThreshold`|Only used when `druid.coordinator.balancer.strategy` is `diskNormalized`. Minimum fractional cost reduction required before a segment is moved off a server that already holds it. A value of `0.05` requires the destination to be at least 5% cheaper than the source, which prevents oscillation between servers with similar disk utilization. Must be in `[0.0, 1.0)`; `0.0` disables the anti-oscillation discount.|`0.05`| +|`druid.coordinator.balancer.strategy`|The [balancing strategy](../design/coordinator.md#balancing-segments-in-a-tier) used by the Coordinator to distribute segments among the Historical servers in a tier. The `cost` strategy distributes segments by minimizing a cost function, `diskNormalized` divides these costs by the projected available disk headroom of each server and `random` distributes segments randomly.|`cost`| +|`druid.coordinator.balancer.diskNormalized.moveCostSavingsThreshold`|Only used when `druid.coordinator.balancer.strategy` is `diskNormalized`. Minimum fractional cost reduction required before a segment is moved off a server that already holds it. A value of `0.05` requires the destination to be at least 5% cheaper than the source, which prevents oscillation between servers with similar projected headroom. Must be in `[0.0, 1.0)`; `0.0` disables the anti-oscillation discount.|`0.05`| |`druid.coordinator.loadqueuepeon.http.repeatDelay`|The start and repeat delay (in milliseconds) for the load queue peon, which manages the load/drop queue of segments for any server.|1 minute| |`druid.coordinator.loadqueuepeon.http.batchSize`|Number of segment load/drop requests to batch in one HTTP request. Note that it must be smaller than or equal to the `druid.segmentCache.numLoadingThreads` config on Historical service. If this value is not configured, the coordinator uses the value of the `numLoadingThreads` for the respective server. | `druid.segmentCache.numLoadingThreads` | |`druid.coordinator.asOverlord.enabled`|Boolean value for whether this Coordinator service should act like an Overlord as well. This configuration allows users to simplify a Druid cluster by not having to deploy any standalone Overlord services. If set to true, then Overlord console is available at `http://coordinator-host:port/console.html` and be sure to set `druid.coordinator.asOverlord.overlordService` also.|false| @@ -966,7 +948,7 @@ These Overlord static configurations can be defined in the `overlord/runtime.pro |Property|Description|Default| |--------|-----------|-------| -|`druid.indexer.runner.type`|Indicates whether tasks should be run locally using `local` or in a distributed environment using `remote`. The recommended option is `httpRemote`, which is similar to `remote` but uses HTTP to interact with Middle Managers instead of ZooKeeper.|`httpRemote`| +|`druid.indexer.runner.type`|Indicates whether tasks should be run locally using `local` or in a distributed environment using `httpRemote`. `httpRemote` is recommended for distributed deployments and uses HTTP to interact with Middle Managers.|`httpRemote`| |`druid.indexer.server.maxConcurrentActions`|Maximum number of concurrent action requests (such as getting locks, creating segments, fetching segments etc) that the Overlord will process simultaneously. This prevents thread exhaustion while preserving access to health check endpoints. Set to `0` to disable quality of service filtering entirely. If not specified, defaults to `max(1, max(serverHttpNumThreads - 4, serverHttpNumThreads * 0.8))`.|`max(1, max(serverHttpNumThreads - 4, serverHttpNumThreads * 0.8))`| |`druid.indexer.storage.type`|Indicates whether incoming tasks should be stored locally (in heap) or in metadata storage. One of `local` or `metadata`. `local` is mainly for internal testing while `metadata` is recommended in production because storing incoming tasks in metadata storage allows for tasks to be resumed if the Overlord should fail.|`local`| |`druid.indexer.storage.recentlyFinishedThreshold`|Duration of time to store task results. Default is 24 hours. If you have hundreds of tasks running in a day, consider increasing this threshold.|`PT24H`| @@ -981,17 +963,14 @@ These Overlord static configurations can be defined in the `overlord/runtime.pro |`druid.indexer.queue.storageSyncRate`|Sync Overlord state this often with an underlying task persistence mechanism.|`PT1M`| |`druid.indexer.queue.maxTaskPayloadSize`|Maximum allowed size in bytes of a single task payload accepted by the Overlord.|none (allow all task payload sizes)| -The following configs only apply if the Overlord is running in remote mode. For a description of local vs. remote mode, see [Overlord service](../design/overlord.md). +The following configs apply when the Overlord is running with the `httpRemote` runner. For a description of local vs. distributed mode, see [Overlord service](../design/overlord.md). |Property|Description|Default| |--------|-----------|-------| |`druid.indexer.runner.taskAssignmentTimeout`|How long to wait after a task has been assigned to a Middle Manager before throwing an error.|`PT5M`| |`druid.indexer.runner.minWorkerVersion`|The minimum Middle Manager version to send tasks to. The version number is a string. This affects the expected behavior during certain operations like comparison against `druid.worker.version`. Specifically, the version comparison follows dictionary order. Use ISO8601 date format for the version to accommodate date comparisons. |"0"| |`druid.indexer.runner.parallelIndexTaskSlotRatio`| The ratio of task slots available for parallel indexing supervisor tasks per worker. The specified value must be in the range `[0, 1]`. |1| -|`druid.indexer.runner.compressZnodes`|Indicates whether or not the Overlord should expect Middle Managers to compress Znodes.|true| -|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in ZooKeeper, should be in the range of `[10KiB, 2GiB)`. [Human-readable format](human-readable-byte.md) is supported.| 512 KiB | -|`druid.indexer.runner.taskCleanupTimeout`|How long to wait before failing a task after a Middle Manager is disconnected from ZooKeeper.|`PT15M`| -|`druid.indexer.runner.taskShutdownLinkTimeout`|How long to wait on a shutdown request to a Middle Manager before timing out|`PT1M`| +|`druid.indexer.runner.taskCleanupTimeout`|How long to wait before failing a task after a Middle Manager is disconnected.|`PT15M`| |`druid.indexer.runner.pendingTasksRunnerNumThreads`|Number of threads to allocate pending-tasks to workers, must be at least 1.|1| |`druid.indexer.runner.maxRetriesBeforeBlacklist`|Number of consecutive times the Middle Manager can fail tasks, before the worker is blacklisted, must be at least 1|5| |`druid.indexer.runner.workerBlackListBackoffTime`|How long to wait before a task is whitelisted again. This value should be greater that the value set for taskBlackListCleanupPeriod.|`PT15M`| @@ -1322,12 +1301,10 @@ Middle Managers pass their configurations down to their child peons. The Middle |Property|Description|Default| |--------|-----------|-------| |`druid.indexer.runner.allowedPrefixes`|Whitelist of prefixes for configs that can be passed down to child peons.|`com.metamx`, `druid`, `org.apache.druid`, `user.timezone`, `file.encoding`, `java.io.tmpdir`, `hadoop`| -|`druid.indexer.runner.compressZnodes`|Indicates whether or not the Middle Managers should compress Znodes.|true| |`druid.indexer.runner.classpath`|Java classpath for the peon.|`System.getProperty("java.class.path")`| |`druid.indexer.runner.javaCommand`|Command required to execute java.|java| |`druid.indexer.runner.javaOpts`|_DEPRECATED_ A string of -X Java options to pass to the peon's JVM. Quotable parameters or parameters with spaces are encouraged to use javaOptsArray|`''`| |`druid.indexer.runner.javaOptsArray`|A JSON array of strings to be passed in as options to the peon's JVM. This is additive to `druid.indexer.runner.javaOpts` and is recommended for properly handling arguments which contain quotes or spaces like `["-XX:OnOutOfMemoryError=kill -9 %p"]`|`[]`| -|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in ZooKeeper, should be in the range of [10KiB, 2GiB). [Human-readable format](human-readable-byte.md) is supported.|512KiB| |`druid.indexer.runner.startPort`|Starting port used for Peon services, should be greater than 1023 and less than 65536.|8100| |`druid.indexer.runner.endPort`|Ending port used for Peon services, should be greater than or equal to `druid.indexer.runner.startPort` and less than 65536.|65535| |`druid.indexer.runner.ports`|A JSON array of integers to specify ports that used for Peon services. If provided and non-empty, ports for Peon services will be chosen from these ports. And `druid.indexer.runner.startPort/druid.indexer.runner.endPort` will be completely ignored.|`[]`| diff --git a/docs/design/coordinator.md b/docs/design/coordinator.md index e63a5b4c3d54..f2d735000cf9 100644 --- a/docs/design/coordinator.md +++ b/docs/design/coordinator.md @@ -88,7 +88,7 @@ But in a tier with several Historicals (or a low replication factor), segment re Thus, the Coordinator constantly monitors the set of segments present on each Historical in a tier and employs one of the following strategies to identify segments that may be moved from one Historical to another to retain balance. - `cost` (default): For a given segment in a tier, this strategy picks the server with the minimum "cost" of placing that segment. The cost is a function of the data interval of the segment and the data intervals of all the segments already present on the candidate server. In essence, this strategy tries to avoid placing segments with adjacent or overlapping data intervals on the same server. This is based on the premise that adjacent-interval segments are more likely to be used together in a query and placing them on the same server may lead to skewed CPU usages of Historicals. -- `diskNormalized`: A derivative of the `cost` strategy that multiplies the cost of placing a segment on a server by the server's disk usage ratio (`diskUsed / maxSize`). This penalizes fuller servers and drives disk utilization to equalize across the tier, which is useful when historicals within a tier hold segments of widely varying sizes. To prevent oscillation when servers have similar utilization, a segment that is already placed on a server receives a cost discount; a move only fires when the destination saves at least `druid.coordinator.balancer.diskNormalized.moveCostSavingsThreshold` (default `0.05`, i.e. 5%) of the source's cost. +- `diskNormalized`: A derivative of the `cost` strategy that divides the cost of placing a segment on a server by the server's projected available disk headroom. The projected usage ratio is `(diskUsed + segmentSizeIfNotAlreadyProjected) / maxSize`, so the disk-adjusted cost is `cost / max(EPSILON, 1 - projectedUsageRatio)`. This strongly penalizes servers that would be nearly full after placement and drives disk utilization to equalize across the tier, which is useful when historicals within a tier hold segments of widely varying sizes. To prevent oscillation when servers have similar headroom, a segment that is already placed on a server receives a cost discount; a move only fires when the destination saves at least `druid.coordinator.balancer.diskNormalized.moveCostSavingsThreshold` (default `0.05`, i.e. 5%) of the source's cost. - `random`: Distributes segments randomly across servers. This is an experimental strategy and is not recommended for a production cluster. All of the above strategies prioritize moving segments from the Historical with the least available disk space. diff --git a/docs/design/zookeeper.md b/docs/design/zookeeper.md index ca64e1a0d5bc..d69ba92f0a1c 100644 --- a/docs/design/zookeeper.md +++ b/docs/design/zookeeper.md @@ -36,9 +36,8 @@ The operations that happen over ZK are: 1. [Coordinator](../design/coordinator.md) leader election 2. [Overlord](../design/overlord.md) leader election 3. Service (node) announcement and discovery — services announce their presence so other services can find them -4. [Overlord](../design/overlord.md) and [Middle Manager](../design/middlemanager.md) task management -Segment loading, dropping, and discovery no longer use ZooKeeper — they are served over HTTP. +Segment loading, segment discovery, and Overlord ↔ Middle Manager task management no longer use ZooKeeper — they are served over HTTP. ## Coordinator leader election diff --git a/docs/development/extensions-core/k8s-jobs.md b/docs/development/extensions-core/k8s-jobs.md index 67be33522ef1..b65a7bb496bd 100644 --- a/docs/development/extensions-core/k8s-jobs.md +++ b/docs/development/extensions-core/k8s-jobs.md @@ -1019,7 +1019,7 @@ To do this, set the following property. |Property| Possible Values |Description|Default|required| |--------|-----------------|-----------|-------|--------| |`druid.indexer.runner.k8sAndWorker.runnerStrategy.type`| `String` (e.g., `k8s`, `worker`, `taskType`)| Defines the strategy for task runner selection. |`k8s`|No| -|`druid.indexer.runner.k8sAndWorker.runnerStrategy.workerType`| `String` (e.g., `httpRemote`, `remote`)| Specifies the variant of the worker task runner to be utilized.|`httpRemote`|No| +|`druid.indexer.runner.k8sAndWorker.runnerStrategy.workerType`| `String` (e.g., `httpRemote`)| Specifies the variant of the worker task runner to be utilized.|`httpRemote`|No| | **For `taskType` runner strategy:**||||| |`druid.indexer.runner.k8sAndWorker.runnerStrategy.taskType.default`| `String` (e.g., `k8s`, `worker`) | Specifies the default runner to use if no overrides apply. This setting ensures there is always a fallback runner available.|None|No| |`druid.indexer.runner.k8sAndWorker.runnerStrategy.taskType.overrides`| `JsonObject`(e.g., `{"index_kafka": "worker"}`)| Defines task-specific overrides for runner types. Each entry sets a task type to a specific runner, allowing fine control. |`{}`|No| diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md index 50eaf43366dc..986d7e977975 100644 --- a/docs/ingestion/native-batch.md +++ b/docs/ingestion/native-batch.md @@ -24,7 +24,7 @@ sidebar_label: JSON-based batch --> :::info - This page describes JSON-based batch ingestion using [ingestion specs](ingestion-spec.md). For SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md) engine, see [SQL-based ingestion](../multi-stage-query/index.md). Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you. + This page describes JSON-based batch ingestion using [ingestion specs](ingestion-spec.md). For SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md), see [SQL-based ingestion](../multi-stage-query/index.md). Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you. ::: Apache Druid supports the following types of JSON-based batch indexing tasks: diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 6fb63924ee75..82d85b52eb73 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -417,6 +417,7 @@ The following table lists the context parameters for the MSQ task engine: | `includeAllCounters` | SELECT, INSERT or REPLACE

Whether to include counters that were added in Druid 31 or later. This is a backwards compatibility option that must be set to `false` during a rolling update from versions prior to Druid 31. | `true` | | `maxFrameSize` | SELECT, INSERT or REPLACE

Size of frames used for data transfer within the MSQ engine. You generally do not need to change this unless you have very large rows. | `1000000` (1 MB) | | `maxInputFilesPerWorker` | SELECT, INSERT, REPLACE

Maximum number of input files or segments per worker. If a single worker would need to read more than this number of files, the query fails with a `TooManyInputFiles` error. In this case, you should either increase this limit if your tasks have enough memory to handle more files, add more workers by increasing `maxNumTasks`, or split your query into smaller queries that process fewer files. | 10,000 | +| `backgroundFetchExternalFiles` | SELECT, INSERT, REPLACE

Controls how workers read cloud storage files (e.g. `s3`, `gs`, `azure`) referenced by [`EXTERN`](#extern-function). When `true` (the default), each worker fetches its input files asynchronously when possible before reading them. This overlaps downloading with processing, which generally improves throughput when reading larger numbers of files. Otherwise, workers stream each file directly from cloud storage while processing it.

This setting has no effect for input sources that use `prefix`, or input sources that request [system fields](../ingestion/input-sources.md). | `true` | | `maxPartitions` | SELECT, INSERT, REPLACE

Maximum number of output partitions for any single stage. For INSERT or REPLACE queries, this controls the maximum number of segments that can be generated. If the query would exceed this limit, it fails with a `TooManyPartitions` error. You can increase this limit if needed, break your query into smaller queries, or use a larger target segment size (via `rowsPerSegment`). | 25,000 | | `maxThreads` | SELECT, INSERT or REPLACE

Maximum number of threads to use for processing. This only has an effect if it is greater than zero and less than the default thread count based on system configuration. Otherwise, it is ignored, and workers use the default thread count. | Not set (use default thread count) | diff --git a/docs/multi-stage-query/security.md b/docs/multi-stage-query/security.md index 77acafc29f51..0a50b68d4d6f 100644 --- a/docs/multi-stage-query/security.md +++ b/docs/multi-stage-query/security.md @@ -23,9 +23,9 @@ sidebar_label: Security ~ under the License. --> -All authenticated users can use the multi-stage query task engine (MSQ task engine) through the UI and API if the -extension is loaded. However, without additional permissions, users are not able to issue queries that read or write -Druid datasources or external data. The permission needed depends on what the user is trying to do. +All authenticated users can use the multi-stage query task engine (MSQ task engine) through the UI and API. However, +without additional permissions, users are not able to issue queries that read or write Druid datasources or external +data. The permission needed depends on what the user is trying to do. To submit a query: @@ -77,4 +77,3 @@ The MSQ task engine needs the following permissions for pushing, fetching, and r - `Microsoft.Storage/storageAccounts/blobServices/containers/blobs/delete` to delete files when they're no longer needed. - diff --git a/docs/operations/java.md b/docs/operations/java.md index f4a8c029db24..c6117e1f4263 100644 --- a/docs/operations/java.md +++ b/docs/operations/java.md @@ -85,5 +85,9 @@ added. There are many ways of doing this. Choose the one that works best for you --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \ --add-opens=java.base/java.io=ALL-UNNAMED \ --add-opens=java.base/java.lang=ALL-UNNAMED \ ---add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED +--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED \ +--add-modules=jdk.incubator.vector ``` + +The `--add-modules=jdk.incubator.vector` flag is optional, but adding it makes the JDK's incubator Vector API available +to Druid to support `druid.expressions.useVectorApi=true`. diff --git a/docs/operations/web-console.md b/docs/operations/web-console.md index ef1118ebc4ce..5d935106c3d5 100644 --- a/docs/operations/web-console.md +++ b/docs/operations/web-console.md @@ -65,7 +65,7 @@ You can access the [data loader](#data-loader) and [lookups view](#lookups) from ## Query -SQL-based ingestion and the multi-stage query task engine use the **Query** view, which provides you with a UI to edit and use SQL queries. You should see this UI automatically in Druid 24.0 and later since the multi-stage query extension is loaded by default. +SQL-based ingestion and the multi-stage query task engine use the **Query** view, which provides you with a UI to edit and use SQL queries. The following screenshot shows a populated enhanced **Query** view along with a description of its parts: diff --git a/docs/querying/aggregations.md b/docs/querying/aggregations.md index c7b7d4e4efc2..3add7863c46a 100644 --- a/docs/querying/aggregations.md +++ b/docs/querying/aggregations.md @@ -471,7 +471,11 @@ For these reasons, we have deprecated this aggregator and recommend using the Da ### Expression aggregator -Aggregator applicable only at query time. Aggregates results using [Druid expressions](./math-expr.md) functions to facilitate building custom functions. +Aggregates results using [Druid expressions](./math-expr.md) functions to facilitate building custom functions. + +The expression aggregator can be used at query time with any intermediate type. It can also be used at ingest time, but +only when the type of `initialValue` is a primitive numeric type (`LONG` or `DOUBLE`) and matches the type of +`initialCombineValue`. Other intermediate types, such as strings, arrays, and complex types, are query-time only. | Property | Description | Required | | --- | --- | --- | diff --git a/docs/querying/query-context-reference.md b/docs/querying/query-context-reference.md index c485c0231c06..41bd206199e7 100644 --- a/docs/querying/query-context-reference.md +++ b/docs/querying/query-context-reference.md @@ -68,7 +68,7 @@ Unless otherwise noted, the following parameters apply to all query types, and t |`useFilterCNF`|`false`| If true, Druid will attempt to convert the query filter to Conjunctive Normal Form (CNF). During query processing, columns can be pre-filtered by intersecting the bitmap indexes of all values that match the eligible filters, often greatly reducing the raw number of rows which need to be scanned. But this effect only happens for the top level filter, or individual clauses of a top level 'and' filter. As such, filters in CNF potentially have a higher chance to utilize a large amount of bitmap indexes on string columns during pre-filtering. However, this setting should be used with great caution, as it can sometimes have a negative effect on performance, and in some cases, the act of computing CNF of a filter can be expensive. We recommend hand tuning your filters to produce an optimal form if possible, or at least verifying through experimentation that using this parameter actually improves your query performance with no ill-effects.| |`secondaryPartitionPruning`|`true`|Enable secondary partition pruning on the Broker. The Broker will always prune unnecessary segments from the input scan based on a filter on time intervals, but if the data is further partitioned with hash or range partitioning, this option will enable additional pruning based on a filter on secondary partition dimensions.| |`debug`| `false` | Flag indicating whether to enable debugging outputs for the query. When set to false, no additional logs will be produced (logs produced will be entirely dependent on your logging level). When set to true, the following addition logs will be produced:
- Log the stack trace of the exception (if any) produced by the query | -|`setProcessingThreadNames`|`true`| Whether processing thread names will be set to `queryType_dataSource_intervals` while processing a query. This aids in interpreting thread dumps, and is on by default. Query overhead can be reduced slightly by setting this to `false`. This has a tiny effect in most scenarios, but can be meaningful in high-QPS, low-per-segment-processing-time scenarios. | +|`setProcessingThreadNames`|`false`| Flag indicating whether processing thread names will be set to `processing_` while processing a query. Thread renaming aids in interpreting thread dumps, but has measurable thread renaming overhead when segment scans are very quick. | |`sqlPlannerBloat`|`1000`|Calcite parameter which controls whether to merge two Project operators when inlining expressions causes complexity to increase. Implemented as a workaround to exception `There are not enough rules to produce a node with desired properties: convention=DRUID, sort=[]` thrown after rejecting the merge of two projects.| |`cloneQueryMode`|`excludeClones`| Indicates whether clone Historicals should be queried by brokers. Clone servers are created by the `cloneServers` Coordinator dynamic configuration. Possible values are `excludeClones`, `includeClones` and `preferClones`. `excludeClones` means that clone Historicals are not queried by the broker. `preferClones` indicates that when given a choice between the clone Historical and the original Historical which is being cloned, the broker chooses the clones. Historicals which are not involved in the cloning process will still be queried. `includeClones` means that broker queries any Historical without regarding clone status. This parameter only affects native queries. MSQ does not query Historicals directly.| |`realtimeSegmentsMode` |`include`| Controls whether realtime segments are queried. `include` queries all segments, including realtime. `exclude` skips realtime segments. `exclusive` queries only realtime segments. | @@ -140,4 +140,3 @@ For more information, see the following topics: - [Set query context](./query-context.md) to learn how to configure query context parameters. - [SQL query context](sql-query-context.md) for query context parameters specific to Druid SQL. - [SQL-based ingestion reference](../multi-stage-query/reference/#context-parameters) for context parameters used in SQL-based ingestion (MSQ). - diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index f270c1b74353..730fef78d074 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -67,7 +67,7 @@ The distribution directory contains `LICENSE` and `NOTICE` files and subdirector ## Start up Druid services Start up Druid services using the automatic single-machine configuration. -This configuration includes default settings that are appropriate for this tutorial, such as loading the `druid-multi-stage-query` extension by default so that you can use the MSQ task engine. +This configuration includes default settings that are appropriate for this tutorial. You can view the default settings in the configuration files located in `conf/druid/auto`. diff --git a/docs/tutorials/tutorial-msq-convert-spec.md b/docs/tutorials/tutorial-msq-convert-spec.md index 0d386bc06293..a8501284ca9d 100644 --- a/docs/tutorials/tutorial-msq-convert-spec.md +++ b/docs/tutorials/tutorial-msq-convert-spec.md @@ -25,9 +25,8 @@ description: How to convert an ingestion spec to a query for SQL-based ingestion --> :::info - This page describes SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md) - extension, new in Druid 24.0. Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which - ingestion method is right for you. + This page describes SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md). + Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you. ::: If you're already ingesting data with [native batch ingestion](../ingestion/native-batch.md), you can use the [web console](../operations/web-console.md) to convert the ingestion spec to a SQL query that the multi-stage query task engine can use to ingest data. diff --git a/docs/tutorials/tutorial-msq-extern.md b/docs/tutorials/tutorial-msq-extern.md index dcd0d5095980..1cb7aac89092 100644 --- a/docs/tutorials/tutorial-msq-extern.md +++ b/docs/tutorials/tutorial-msq-extern.md @@ -25,9 +25,8 @@ description: How to generate a query that references externally hosted data --> :::info - This page describes SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md) - extension, new in Druid 24.0. Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which - ingestion method is right for you. + This page describes SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md). + Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you. ::: This tutorial demonstrates how to generate a query that references externally hosted data using the **Connect external data** wizard. diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java index 84ee947c8467..4692ec0715f5 100644 --- a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java +++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java @@ -33,9 +33,12 @@ import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.jackson.JacksonUtils; import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory; import org.apache.druid.query.aggregation.datasketches.hll.HllSketchModule; import org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchModule; import org.apache.druid.query.aggregation.datasketches.theta.SketchModule; +import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; import org.apache.druid.segment.TestHelper; import org.apache.druid.testing.embedded.EmbeddedClusterApis; @@ -55,6 +58,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -107,6 +111,65 @@ public class CompactionTaskTest extends CompactionTestBase "namespace", "continent", "country", "region", "city", "timestamp" ); + /** + * Index task identical in shape to {@link MoreResources.Task#INDEX_TASK_WITH_AGGREGATORS} but with a pair of + * {@link ExpressionLambdaAggregatorFactory} metrics over the {@code added} long field. Used by + * {@link #testCompactionWithExpressionLambdaAggregator} to verify that an expression aggregator works correctly. + */ + private static final Supplier INDEX_TASK_WITH_EXPR_AGG = () -> + TaskBuilder + .ofTypeIndex() + .jsonInputFormat() + .localInputSourceWithFiles( + Resources.DataFile.tinyWiki1Json(), + Resources.DataFile.tinyWiki2Json(), + Resources.DataFile.tinyWiki3Json() + ) + .timestampColumn("timestamp") + .dimensions( + "page", + "language", "tags", "user", "unpatrolled", "newPage", "robot", + "anonymous", "namespace", "continent", "country", "region", "city" + ) + .metricAggregates( + new CountAggregatorFactory("ingested_events"), + new ExpressionLambdaAggregatorFactory( + "added_sum_expr", + Set.of("added"), + null, + "0", + null, + null, + false, + false, + "__acc + added", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ), + new ExpressionLambdaAggregatorFactory( + "added_or_expr", + Set.of("added"), + null, + "0", + null, + null, + false, + false, + "bitwiseOr(\"__acc\", \"added\")", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ) + ) + .dynamicPartitionWithMaxRows(3) + .granularitySpec("DAY", "SECOND", true) + .appendToExisting(false); + private String fullDatasourceName; @BeforeEach @@ -259,6 +322,33 @@ public void testCompactionWithTimestampDimension() throws Exception loadDataAndCompact(INDEX_TASK_WITH_TIMESTAMP.get(), COMPACTION_TASK.get(), null); } + @Test + public void testCompactionWithExpressionLambdaAggregator() throws Exception + { + try (final Closeable ignored = unloader(fullDatasourceName)) { + runTask(INDEX_TASK_WITH_EXPR_AGG.get()); + verifySegmentsCount(4); + + // Snapshot metric values prior to compaction. + final String preCompact = cluster.runSql( + "SELECT SUM(added_sum_expr), SUM(added_or_expr) FROM %s", + fullDatasourceName + ); + + // Compact 4 segments -> 2; this performs cross-segment rollup which drives RowCombiningTimeAndDimsIterator + // into ExpressionLambdaAggregatorFactory.makeAggregateCombiner(). + compactData(COMPACTION_TASK.get(), null, null); + verifySegmentsCount(2); + + // Metric values must round-trip through compaction unchanged. + final String postCompact = cluster.runSql( + "SELECT SUM(added_sum_expr), SUM(added_or_expr) FROM %s", + fullDatasourceName + ); + Assertions.assertEquals(preCompact, postCompact); + } + } + private void loadDataAndCompact( TaskBuilder.Index indexTask, TaskBuilder.Compact compactionResource, diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java index 7e22d85d9cab..fa184418df52 100644 --- a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java +++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java @@ -292,6 +292,48 @@ public void test_boundedSupervisor_doesNotSilentlyCompleteWhenStaleOffsetExceeds Assertions.assertEquals("UNHEALTHY_SUPERVISOR", status2.getState(), "Supervisor state should be UNHEALTHY_SUPERVISOR"); } + @Test + public void test_resetToLatestAndBackfill() + { + final String topic = IdUtils.getRandomId(); + kafkaServer.createTopicWithPartitions(topic, 2); + + // Create a streaming supervisor with concurrent locks and withUseEarliestSequenceNumber=false + final KafkaSupervisorSpec supervisor = createKafkaSupervisor(kafkaServer) + .withContext(Map.of("useConcurrentLocks", true)) + .withIoConfig(io -> io + .withKafkaInputFormat(new JsonInputFormat(null, null, null, null, null)) + .withUseEarliestSequenceNumber(false) + ) + .build(dataSource, topic); + + cluster.callApi().postSupervisor(supervisor); + + waitForSupervisorDetailedState(supervisor.getId(), "RUNNING"); + + final int totalRecords = publish1kRecords(topic, false); + waitUntilPublishedRecordsAreIngested(totalRecords); + + // Reset the main supervisor and spin up a backfill supervisor. + // Since all records are already ingested before the call, the backfill + // supervisor will complete immediately without ingesting anything. + final Map result = cluster.callApi().resetToLatestAndBackfill(supervisor.getId()); + Assertions.assertEquals(supervisor.getId(), result.get("id")); + final String backfillSupervisorId = (String) result.get("backfillSupervisorId"); + + // Wait for the backfill to finish + waitForSupervisorToComplete(backfillSupervisorId); + + // Main supervisor should still be running + final SupervisorStatus mainStatus = cluster.callApi().getSupervisorStatus(supervisor.getId()); + Assertions.assertEquals("RUNNING", mainStatus.getState()); + Assertions.assertTrue(mainStatus.isHealthy()); + + final SupervisorStatus backfillStatus = cluster.callApi().getSupervisorStatus(backfillSupervisorId); + Assertions.assertEquals("COMPLETED", backfillStatus.getState()); + Assertions.assertTrue(backfillStatus.isHealthy()); + } + private void waitForSupervisorToComplete(String supervisorId) { overlord.latchableEmitter().waitForEvent( @@ -301,6 +343,15 @@ private void waitForSupervisorToComplete(String supervisorId) ); } + private void waitForSupervisorDetailedState(String supervisorId, String detailedState) + { + overlord.latchableEmitter().waitForEvent( + event -> event.hasMetricName("supervisor/count") + .hasDimension(DruidMetrics.SUPERVISOR_ID, supervisorId) + .hasDimension("detailedState", detailedState) + ); + } + private void waitForSupervisorToBeUnhealthy(String supervisorId) { overlord.latchableEmitter().waitForEvent( diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/EmbeddedMSQApis.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/EmbeddedMSQApis.java index e58ad81b4659..8154ac988a3d 100644 --- a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/EmbeddedMSQApis.java +++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/EmbeddedMSQApis.java @@ -243,44 +243,61 @@ public boolean cancelDartQuery(String sqlQueryId, EmbeddedBroker targetBroker) } /** - * Returns the sum of completed queries across all input channel snapshots from all stages and workers. + * Returns the sums of all input channel counters across all workers. */ - public long getQueriesSum(final MSQTaskReportPayload payload) + public ChannelSums getInputChannelSums(final MSQTaskReportPayload payload, final int stageNumber) { - return getAllInputChannelCounters(payload) - .stream() - .filter(snapshot -> snapshot.getQueries() != null) - .mapToLong(snapshot -> Arrays.stream(snapshot.getQueries()).sum()) - .sum(); + long rows = 0; + long bytes = 0; + long files = 0; + long totalFiles = 0; + long queries = 0; + long totalQueries = 0; + long loadBytes = 0; + long loadFiles = 0; + long loadTime = 0; + long loadWait = 0; + + for (final ChannelCounters.Snapshot snapshot : getAllInputChannelCounters(payload, stageNumber)) { + rows += sum(snapshot.getRows()); + bytes += sum(snapshot.getBytes()); + files += sum(snapshot.getFiles()); + totalFiles += sum(snapshot.getTotalFiles()); + queries += sum(snapshot.getQueries()); + totalQueries += sum(snapshot.getTotalQueries()); + loadBytes += sum(snapshot.getLoadBytes()); + loadFiles += sum(snapshot.getLoadFiles()); + loadTime += sum(snapshot.getLoadTime()); + loadWait += sum(snapshot.getLoadWait()); + } + + return new ChannelSums(rows, bytes, files, totalFiles, queries, totalQueries, loadBytes, loadFiles, loadTime, loadWait); } /** - * Returns the sum of files read across all input channel snapshots from all stages and workers. + * Sums the values of a nullable channel counter array, treating {@code null} as empty. */ - public long getFilesSum(final MSQTaskReportPayload payload) + private static long sum(@Nullable final long[] values) { - return getAllInputChannelCounters(payload) - .stream() - .filter(snapshot -> snapshot.getFiles() != null) - .mapToLong(snapshot -> Arrays.stream(snapshot.getFiles()).sum()) - .sum(); + return values == null ? 0 : Arrays.stream(values).sum(); } /** - * Returns all {@link ChannelCounters.Snapshot} from input channels across all stages and workers. + * Returns all {@link ChannelCounters.Snapshot} from input channels across all workers for a stage. */ - private List getAllInputChannelCounters(final MSQTaskReportPayload payload) + private List getAllInputChannelCounters( + final MSQTaskReportPayload payload, + final int stageNumber + ) { - final Map> countersMap = payload.getCounters().copyMap(); final List snapshots = new ArrayList<>(); + final Map stageMap = payload.getCounters().snapshotForStage(stageNumber); - for (final Map.Entry> stageEntry : countersMap.entrySet()) { - for (final Map.Entry workerEntry : stageEntry.getValue().entrySet()) { - for (final Map.Entry counterEntry : workerEntry.getValue().getMap().entrySet()) { - if (counterEntry.getKey().startsWith("input") - && counterEntry.getValue() instanceof ChannelCounters.Snapshot) { - snapshots.add((ChannelCounters.Snapshot) counterEntry.getValue()); - } + for (final Map.Entry workerEntry : stageMap.entrySet()) { + for (final Map.Entry counterEntry : workerEntry.getValue().getMap().entrySet()) { + if (counterEntry.getKey().startsWith("input") + && counterEntry.getValue() instanceof ChannelCounters.Snapshot counterSnapshot) { + snapshots.add(counterSnapshot); } } } @@ -297,4 +314,33 @@ private static GetQueryReportResponse parseReportResponse(String responseJson, O throw DruidException.defensive(e, "Failed to parse query report response[%s]", responseJson); } } + + /** + * Sums of input channel counters computed by {@link #getInputChannelSums(MSQTaskReportPayload, int)}. + * + * @param rows total rows read + * @param bytes total bytes read + * @param files total files read + * @param totalFiles total number of files to read + * @param queries total queries completed + * @param totalQueries total number of queries to run + * @param loadBytes total bytes loaded into the virtual storage file cache (VSF) + * @param loadFiles total files loaded into the VSF + * @param loadTime total time (in milliseconds) spent loading files into the VSF + * @param loadWait total time (in milliseconds) spent waiting to load files into the VSF + */ + public record ChannelSums( + long rows, + long bytes, + long files, + long totalFiles, + long queries, + long totalQueries, + long loadBytes, + long loadFiles, + long loadTime, + long loadWait + ) + { + } } diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/EmbeddedMSQRealtimeQueryTest.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/EmbeddedMSQRealtimeQueryTest.java index a651ccf03ad2..2d3e53b2a039 100644 --- a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/EmbeddedMSQRealtimeQueryTest.java +++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/EmbeddedMSQRealtimeQueryTest.java @@ -38,7 +38,6 @@ import org.apache.druid.testing.embedded.EmbeddedRouter; import org.hamcrest.CoreMatchers; import org.hamcrest.MatcherAssert; -import org.hamcrest.Matchers; import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -189,9 +188,18 @@ public void test_selectCount_task_withRealtime() payload.getResults().getResults() ); - // Verify that realtime queries were issued and no files were read (all data is realtime). - MatcherAssert.assertThat(msqApis.getQueriesSum(payload), Matchers.greaterThan(0L)); - Assertions.assertEquals(0, msqApis.getFilesSum(payload)); + // Verify that for the first stage, realtime queries were issued and no files were read (all data is realtime). + final EmbeddedMSQApis.ChannelSums channelSums = msqApis.getInputChannelSums(payload, 0); + + // 2 realtime tasks running on an Indexer => 1 query that gets data from both. + Assertions.assertEquals(1, channelSums.queries()); + Assertions.assertEquals(1, channelSums.totalQueries()); + Assertions.assertEquals(0, channelSums.files()); + Assertions.assertEquals(0, channelSums.totalFiles()); + + // We get 1 row back with the COUNT from both tasks. + Assertions.assertEquals(1, channelSums.rows()); + Assertions.assertEquals(0, channelSums.bytes()); // Realtime queries do not report bytes } @Test @@ -432,6 +440,19 @@ public void test_selectJoinWithConcatVirtualDimension_task_withRealtime() ), payload.getResults().getResults() ); + + // Verify that for the first stage, realtime queries were issued and no files were read (all data is realtime). + final EmbeddedMSQApis.ChannelSums channelSums = msqApis.getInputChannelSums(payload, 0); + + // 2 realtime tasks running on an Indexer => 1 query that gets data from both. + Assertions.assertEquals(1, channelSums.queries()); + Assertions.assertEquals(1, channelSums.totalQueries()); + Assertions.assertEquals(0, channelSums.files()); + Assertions.assertEquals(0, channelSums.totalFiles()); + + // We get 3 rows back. + Assertions.assertEquals(3, channelSums.rows()); + Assertions.assertEquals(0, channelSums.bytes()); // Realtime queries do not report bytes } @Test @@ -488,7 +509,6 @@ public void test_scanWithFilter_task_withRealtime() ); } - @Test @Timeout(60) public void test_scanWithFilter_dart() diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/S3ExternQueryTest.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/S3ExternQueryTest.java new file mode 100644 index 000000000000..8eb06e76a399 --- /dev/null +++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/msq/S3ExternQueryTest.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.testing.embedded.msq; + +import org.apache.druid.data.input.s3.S3InputSourceDruidModule; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.msq.indexing.report.MSQTaskReportPayload; +import org.apache.druid.sql.calcite.BaseCalciteQueryTest; +import org.apache.druid.testing.embedded.EmbeddedBroker; +import org.apache.druid.testing.embedded.EmbeddedCoordinator; +import org.apache.druid.testing.embedded.EmbeddedDruidCluster; +import org.apache.druid.testing.embedded.EmbeddedIndexer; +import org.apache.druid.testing.embedded.EmbeddedOverlord; +import org.apache.druid.testing.embedded.junit5.EmbeddedClusterTestBase; +import org.apache.druid.testing.embedded.minio.MinIOStorageResource; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.zip.GZIPOutputStream; + +/** + * Tests MSQ task-based SELECT queries that read external data via {@code EXTERN} from S3 (backed by MinIO). + */ +public class S3ExternQueryTest extends EmbeddedClusterTestBase +{ + /** + * Base key under the bucket where the external input files are uploaded. + */ + private static final String DATA_PATH = "extern-input"; + private static final String PLAIN_FILE = "data1.json"; + private static final String GZ_FILE = "data2.json.gz"; + + private static final String PLAIN_FILE_CONTENT = """ + {"timestamp":"2020-01-01T00:00:00Z","page":"A","added":10} + {"timestamp":"2020-01-01T01:00:00Z","page":"B","added":20} + """; + + private static final String GZ_FILE_CONTENT = """ + {"timestamp":"2020-01-02T00:00:00Z","page":"C","added":30} + {"timestamp":"2020-01-02T01:00:00Z","page":"D","added":40} + {"timestamp":"2020-01-02T02:00:00Z","page":"E","added":50} + """; + + /** + * Total number of bytes uploaded across {@link #PLAIN_FILE} and {@link #GZ_FILE}. Populated by + * {@link #uploadExternalFiles()} and used to verify the VSF {@code loadBytes} counter. + */ + private long totalUploadedBytes; + + private final MinIOStorageResource storageResource = new MinIOStorageResource(); + private final EmbeddedOverlord overlord = new EmbeddedOverlord(); + private final EmbeddedCoordinator coordinator = new EmbeddedCoordinator(); + private final EmbeddedIndexer indexer = new EmbeddedIndexer() + .setServerMemory(300_000_000L) + .addProperty("druid.worker.capacity", "2"); + private final EmbeddedBroker broker = new EmbeddedBroker().setServerMemory(200_000_000); + + private EmbeddedMSQApis msqApis; + + @Override + protected EmbeddedDruidCluster createCluster() + { + return EmbeddedDruidCluster + .withEmbeddedDerbyAndZookeeper() + .useLatchableEmitter() + .addResource(storageResource) + .addExtension(S3InputSourceDruidModule.class) + .addServer(overlord) + .addServer(coordinator) + .addServer(indexer) + .addServer(broker); + } + + @BeforeAll + public void setupCluster() throws IOException + { + msqApis = new EmbeddedMSQApis(cluster, overlord); + uploadExternalFiles(); + } + + @Test + public void test_extern_backgroundFetchEnabled() + { + runQueryAndVerify(true); + } + + @Test + public void test_extern_backgroundFetchDisabled() + { + runQueryAndVerify(false); + } + + private void runQueryAndVerify(final boolean backgroundFetchExternalFiles) + { + final String inputSourceJson = StringUtils.format( + "{\"type\":\"s3\",\"uris\":[\"s3://%s/%s/%s\",\"s3://%s/%s/%s\"]}", + storageResource.getBucket(), DATA_PATH, PLAIN_FILE, + storageResource.getBucket(), DATA_PATH, GZ_FILE + ); + + final String sql = StringUtils.format( + """ + SET backgroundFetchExternalFiles = %s; + SELECT page, added + FROM TABLE( + EXTERN( + '%s', + '{"type":"json"}' + ) + ) EXTEND ("page" VARCHAR, "added" BIGINT) + ORDER BY page + """, + backgroundFetchExternalFiles, + inputSourceJson + ); + + final MSQTaskReportPayload report = msqApis.runTaskSqlAndGetReport(sql); + + BaseCalciteQueryTest.assertResultsEquals( + sql, + List.of( + new Object[]{"A", 10}, + new Object[]{"B", 20}, + new Object[]{"C", 30}, + new Object[]{"D", 40}, + new Object[]{"E", 50} + ), + report.getResults().getResults() + ); + + // Verify input counters. + final EmbeddedMSQApis.ChannelSums channelSums = msqApis.getInputChannelSums(report, 0); + Assertions.assertEquals(2, channelSums.files(), "files"); + Assertions.assertEquals(2, channelSums.totalFiles(), "totalFiles"); + Assertions.assertEquals(5, channelSums.rows(), "rows"); + Assertions.assertEquals(PLAIN_FILE_CONTENT.length() + GZ_FILE_CONTENT.length(), channelSums.bytes(), "bytes"); + Assertions.assertEquals(0, channelSums.queries(), "queries"); + Assertions.assertEquals(0, channelSums.queries(), "totalQueries"); + + // Verify load counters. + if (backgroundFetchExternalFiles) { + Assertions.assertEquals(totalUploadedBytes, channelSums.loadBytes(), "VSF bytes loaded"); + Assertions.assertEquals(2, channelSums.loadFiles(), "VSF files loaded"); + Assertions.assertTrue(channelSums.loadTime() >= 0, "VSF load time"); + Assertions.assertEquals(0, channelSums.loadWait(), "VSF load wait time"); + } else { + Assertions.assertEquals(0, channelSums.loadBytes(), "VSF bytes loaded"); + Assertions.assertEquals(0, channelSums.loadFiles(), "VSF files loaded"); + Assertions.assertEquals(0, channelSums.loadTime(), "VSF load time"); + Assertions.assertEquals(0, channelSums.loadWait(), "VSF load wait time"); + } + } + + /** + * Uploads {@link #PLAIN_FILE} (plain JSON) and {@link #GZ_FILE} (gzipped JSON) to the MinIO bucket. + */ + private void uploadExternalFiles() throws IOException + { + final S3Client s3Client = storageResource.getS3Client(); + + final byte[] plainBytes = PLAIN_FILE_CONTENT.getBytes(StandardCharsets.UTF_8); + final byte[] gzBytes = gzip(GZ_FILE_CONTENT); + totalUploadedBytes += plainBytes.length + gzBytes.length; + + s3Client.putObject( + PutObjectRequest.builder() + .bucket(storageResource.getBucket()) + .key(DATA_PATH + "/" + PLAIN_FILE) + .build(), + RequestBody.fromBytes(plainBytes) + ); + + s3Client.putObject( + PutObjectRequest.builder() + .bucket(storageResource.getBucket()) + .key(DATA_PATH + "/" + GZ_FILE) + .build(), + RequestBody.fromBytes(gzBytes) + ); + } + + private static byte[] gzip(final String content) throws IOException + { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (final GZIPOutputStream gzipStream = new GZIPOutputStream(baos)) { + gzipStream.write(StringUtils.toUtf8(content)); + } + return baos.toByteArray(); + } +} diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/query/QueryVirtualStorageTest.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/query/QueryVirtualStorageTest.java index df55055751db..0e5a2a2baaf6 100644 --- a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/query/QueryVirtualStorageTest.java +++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/query/QueryVirtualStorageTest.java @@ -25,7 +25,7 @@ import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; -import org.apache.druid.msq.counters.ChannelCounters; +import org.apache.druid.msq.indexing.report.MSQTaskReport; import org.apache.druid.msq.indexing.report.MSQTaskReportPayload; import org.apache.druid.query.DefaultQueryMetrics; import org.apache.druid.query.DruidProcessingConfigTest; @@ -47,6 +47,8 @@ import org.apache.druid.testing.embedded.minio.MinIOStorageResource; import org.apache.druid.testing.embedded.msq.EmbeddedDurableShuffleStorageTest; import org.apache.druid.testing.embedded.msq.EmbeddedMSQApis; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -278,29 +280,28 @@ void testQueryTooMuchDataButWithDart() // Now fetch the report using the SQL query ID final GetQueryReportResponse reportResponse = msqApis.getDartQueryReport(sqlQueryId, broker); - - // Verify the report response Assertions.assertNotNull(reportResponse, "Report response should not be null"); - ChannelCounters.Snapshot segmentChannelCounters = - (ChannelCounters.Snapshot) reportResponse.getReportMap() - .findReport("multiStageQuery") - .map(r -> - ((MSQTaskReportPayload) r.getPayload()).getCounters() - .snapshotForStage(0) - .get(0) - .getMap() - .get("input0") - ).orElse(null); - - Assertions.assertNotNull(segmentChannelCounters); - Assertions.assertArrayEquals(new long[]{24L}, segmentChannelCounters.getFiles()); - Assertions.assertTrue(segmentChannelCounters.getLoadFiles()[0] > 0 && segmentChannelCounters.getLoadFiles()[0] <= segmentChannelCounters.getFiles()[0]); - // size of all segments at time of writing, possibly we have to load all of them, but possibly less depending on - // test order - Assertions.assertTrue(segmentChannelCounters.getLoadBytes()[0] > 0); - Assertions.assertTrue(segmentChannelCounters.getLoadBytes()[0] <= SIZE_BYTES); - Assertions.assertTrue(segmentChannelCounters.getLoadTime()[0] > 0); - Assertions.assertTrue(segmentChannelCounters.getLoadWait()[0] > 0); + + final MSQTaskReportPayload reportPayload = + ((MSQTaskReport) reportResponse.getReportMap().get(MSQTaskReport.REPORT_KEY)).getPayload(); + + // Verify stage 0 (segment read) input counters + final EmbeddedMSQApis.ChannelSums inputChannelSums = msqApis.getInputChannelSums(reportPayload, 0); + Assertions.assertEquals(24L, inputChannelSums.files()); + Assertions.assertEquals(24L, inputChannelSums.totalFiles()); + Assertions.assertEquals(0L, inputChannelSums.queries()); + Assertions.assertEquals(0L, inputChannelSums.totalQueries()); + Assertions.assertEquals(39244L, inputChannelSums.rows()); + MatcherAssert.assertThat(inputChannelSums.bytes(), Matchers.greaterThan(0L)); + MatcherAssert.assertThat(inputChannelSums.bytes(), Matchers.lessThanOrEqualTo(SIZE_BYTES)); + + // Verify stage 0 (segment read) VSF load counters + MatcherAssert.assertThat(inputChannelSums.loadFiles(), Matchers.greaterThan(0L)); + MatcherAssert.assertThat(inputChannelSums.loadFiles(), Matchers.lessThanOrEqualTo(24L)); + MatcherAssert.assertThat(inputChannelSums.loadTime(), Matchers.greaterThan(0L)); + MatcherAssert.assertThat(inputChannelSums.loadWait(), Matchers.greaterThan(0L)); + MatcherAssert.assertThat(inputChannelSums.loadBytes(), Matchers.greaterThan(0L)); + MatcherAssert.assertThat(inputChannelSums.loadBytes(), Matchers.lessThanOrEqualTo(SIZE_BYTES)); } @Test diff --git a/examples/bin/run-java b/examples/bin/run-java index 80190d0a793c..5a30cd54fbdf 100755 --- a/examples/bin/run-java +++ b/examples/bin/run-java @@ -43,6 +43,7 @@ then --add-opens=java.base/java.io=ALL-UNNAMED \ --add-opens=java.base/java.lang=ALL-UNNAMED \ --add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED \ + --add-modules=jdk.incubator.vector \ "$@" else exec "$JAVA_BIN" "$@" diff --git a/examples/bin/start-druid b/examples/bin/start-druid index 81d8938adbb2..f39053ce16a5 100755 --- a/examples/bin/start-druid +++ b/examples/bin/start-druid @@ -31,5 +31,5 @@ elif [ -x "$(command -v python)" ] then exec python "$WHEREAMI/start-druid-main.py" "$@" else - echo "python interepreter not found" + echo "python interpreter not found" fi diff --git a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java index 6099105b3374..04973a5272fd 100644 --- a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java +++ b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java @@ -322,7 +322,7 @@ protected Map getTimeLagPerPartition(Map currentOffs } @Override - protected RabbitStreamDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map map) + public RabbitStreamDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map map) { return new RabbitStreamDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, map)); } @@ -408,7 +408,7 @@ public LagStats computeLagStats() } @Override - protected void updatePartitionLagFromStream() + public void updatePartitionLagFromStream() { getRecordSupplierLock().lock(); @@ -435,7 +435,7 @@ protected void updatePartitionLagFromStream() } @Override - protected Map getLatestSequencesFromStream() + public Map getLatestSequencesFromStream() { return latestSequenceFromStream != null ? latestSequenceFromStream : new HashMap<>(); } diff --git a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java index 4a445f6f1c11..4763a949a615 100644 --- a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java +++ b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java @@ -30,6 +30,7 @@ import org.apache.druid.indexing.overlord.supervisor.Supervisor; import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig; import org.apache.druid.indexing.rabbitstream.RabbitStreamIndexTaskClientFactory; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.metrics.DruidMonitorSchedulerConfig; @@ -155,6 +156,55 @@ protected RabbitStreamSupervisorSpec toggleSuspend(boolean suspend) supervisorStateManagerConfig); } + @Override + public RabbitStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + RabbitStreamSupervisorIOConfig ioConfig = getSpec().getIOConfig(); + RabbitStreamSupervisorIOConfig backfillIoConfig = new RabbitStreamSupervisorIOConfig( + ioConfig.getStream(), + ioConfig.getUri(), + ioConfig.getInputFormat(), + ioConfig.getReplicas(), + taskCount != null ? taskCount : ioConfig.getTaskCount(), + ioConfig.getTaskDuration().toPeriod(), + ioConfig.getConsumerProperties(), + ioConfig.getAutoScalerConfig(), + ioConfig.getPollTimeout(), + ioConfig.getStartDelay().toPeriod(), + ioConfig.getPeriod().toPeriod(), + ioConfig.getCompletionTimeout().toPeriod(), + ioConfig.isUseEarliestSequenceNumber(), + ioConfig.getLateMessageRejectionPeriod().isPresent() ? ioConfig.getLateMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getEarlyMessageRejectionPeriod().isPresent() ? ioConfig.getEarlyMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getLateMessageRejectionStartDateTime().isPresent() ? ioConfig.getLateMessageRejectionStartDateTime().get() : null, + ioConfig.getStopTaskCount(), + ioConfig.getServerPriorityToReplicas(), + boundedStreamConfig + ); + return new RabbitStreamSupervisorSpec( + backfillId, + null, + getSpec().getDataSchema(), + getSpec().getTuningConfig(), + backfillIoConfig, + getContext(), + isSuspended(), + taskStorage, + taskMaster, + indexerMetadataStorageCoordinator, + (RabbitStreamIndexTaskClientFactory) indexTaskClientFactory, + mapper, + emitter, + monitorSchedulerConfig, + rowIngestionMetersFactory, + supervisorStateManagerConfig + ); + } + @Override public String toString() { diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaConsumerMonitor.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaConsumerMonitor.java index a24779ec61e9..08f933a31b09 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaConsumerMonitor.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaConsumerMonitor.java @@ -21,7 +21,6 @@ import com.google.common.util.concurrent.AtomicDouble; import org.apache.druid.error.DruidException; -import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import org.apache.druid.java.util.metrics.AbstractMonitor; @@ -29,18 +28,18 @@ import org.apache.kafka.common.Metric; import org.apache.kafka.common.MetricName; +import javax.annotation.Nullable; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; public class KafkaConsumerMonitor extends AbstractMonitor { - private static final Logger log = new Logger(KafkaConsumerMonitor.class); - private volatile boolean stopAfterNext = false; private static final String CLIENT_ID_TAG = "client-id"; @@ -137,12 +136,23 @@ public class KafkaConsumerMonitor extends AbstractMonitor ).collect(Collectors.toMap(KafkaConsumerMetric::getKafkaMetricName, Function.identity())); private final KafkaConsumer consumer; + + /** + * Supplies a new metric builder for each emitted metric. + */ + @Nullable + private final Supplier metricBuilderSupplier; + private final Map counters = new HashMap<>(); private final AtomicDouble pollIdleRatioAvg = new AtomicDouble(1.0d); - public KafkaConsumerMonitor(final KafkaConsumer consumer) + public KafkaConsumerMonitor( + final KafkaConsumer consumer, + @Nullable final Supplier metricBuilderSupplier + ) { this.consumer = consumer; + this.metricBuilderSupplier = metricBuilderSupplier; } @Override @@ -173,7 +183,8 @@ public boolean doMonitor(final ServiceEmitter emitter) } if (emitValue != null && !Double.isNaN(emitValue.doubleValue())) { - final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder(); + final ServiceMetricEvent.Builder builder = + metricBuilderSupplier != null ? metricBuilderSupplier.get() : new ServiceMetricEvent.Builder(); for (final String dimension : kafkaConsumerMetric.getDimensions()) { if (!CLIENT_ID_TAG.equals(dimension)) { builder.setDimension(dimension, metricName.tags().get(dimension)); diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTask.java index f16a9a35cf2c..2b1bc58e190e 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTask.java @@ -116,8 +116,13 @@ protected KafkaRecordSupplier newTaskRecordSupplier(final TaskToolbox toolbox) props.put("auto.offset.reset", "none"); final KafkaRecordSupplier recordSupplier = - new KafkaRecordSupplier(props, configMapper, kafkaIndexTaskIOConfig.getConfigOverrides(), - kafkaIndexTaskIOConfig.isMultiTopic()); + new KafkaRecordSupplier( + props, + configMapper, + kafkaIndexTaskIOConfig.getConfigOverrides(), + kafkaIndexTaskIOConfig.isMultiTopic(), + this::getMetricBuilder + ); if (toolbox.getMonitorScheduler() != null) { toolbox.getMonitorScheduler().addMonitor(recordSupplier.monitor()); diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaRecordSupplier.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaRecordSupplier.java index c76e5f4965fd..26dce74b0aea 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaRecordSupplier.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaRecordSupplier.java @@ -35,6 +35,7 @@ import org.apache.druid.indexing.seekablestream.common.StreamPartition; import org.apache.druid.indexing.seekablestream.extension.KafkaConfigOverrides; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import org.apache.druid.java.util.metrics.Monitor; import org.apache.druid.metadata.DynamicConfigProvider; import org.apache.druid.metadata.PasswordProvider; @@ -46,6 +47,7 @@ import org.apache.kafka.common.serialization.Deserializer; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.lang.reflect.Type; @@ -58,6 +60,7 @@ import java.util.Properties; import java.util.Set; import java.util.concurrent.Callable; +import java.util.function.Supplier; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -90,21 +93,27 @@ public KafkaRecordSupplier( Map consumerProperties, ObjectMapper sortingMapper, KafkaConfigOverrides configOverrides, - boolean multiTopic + boolean multiTopic, + @Nullable Supplier metricBuilderSupplier ) { - this(getKafkaConsumer(sortingMapper, consumerProperties, configOverrides), multiTopic); + this( + getKafkaConsumer(sortingMapper, consumerProperties, configOverrides), + multiTopic, + metricBuilderSupplier + ); } @VisibleForTesting public KafkaRecordSupplier( KafkaConsumer consumer, - boolean multiTopic + boolean multiTopic, + @Nullable Supplier metricBuilderSupplier ) { this.consumer = consumer; this.multiTopic = multiTopic; - this.monitor = new KafkaConsumerMonitor(consumer); + this.monitor = new KafkaConsumerMonitor(consumer, metricBuilderSupplier); } @Override diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaSamplerSpec.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaSamplerSpec.java index d718e35d7cd9..d04d97472b78 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaSamplerSpec.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaSamplerSpec.java @@ -69,8 +69,12 @@ protected KafkaRecordSupplier createRecordSupplier() props.put("request.timeout.ms", Integer.toString(samplerConfig.getTimeoutMs())); KafkaSupervisorIOConfig kafkaSupervisorIOConfig = (KafkaSupervisorIOConfig) ioConfig; - return new KafkaRecordSupplier(props, objectMapper, kafkaSupervisorIOConfig.getConfigOverrides(), - kafkaSupervisorIOConfig.isMultiTopic() + return new KafkaRecordSupplier( + props, + objectMapper, + kafkaSupervisorIOConfig.getConfigOverrides(), + kafkaSupervisorIOConfig.isMultiTopic(), + null ); } finally { diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java index 727eb52db272..0d8fa70f7f18 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java @@ -137,7 +137,8 @@ protected RecordSupplier setupReco spec.getIoConfig().getConsumerProperties(), sortingMapper, spec.getIoConfig().getConfigOverrides(), - spec.getIoConfig().isMultiTopic() + spec.getIoConfig().isMultiTopic(), + null ); } @@ -356,7 +357,7 @@ protected Map getTimeLagPerPartition(Map map) + public KafkaDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map map) { return new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, map)); } @@ -548,7 +549,7 @@ private Map getTimestampPerPartitionAtCurrentOffset(S *

*/ @Override - protected void updatePartitionLagFromStream() + public void updatePartitionLagFromStream() { if (getIoConfig().isEmitTimeLagMetrics()) { updatePartitionTimeAndRecordLagFromStream(); @@ -597,7 +598,7 @@ private void updateOffsetSnapshot( } @Override - protected Map getLatestSequencesFromStream() + public Map getLatestSequencesFromStream() { return offsetSnapshotRef.get().getLatestOffsetsFromStream(); } @@ -630,7 +631,7 @@ protected boolean isMultiTopic() * Gets the offsets as stored in the metadata store. The map returned will only contain * offsets from topic partitions that match the current supervisor config stream. This * override is needed because in the case of multi-topic, a user could have updated the supervisor - * config from single topic to mult-topic, where the new multi-topic pattern regex matches the + * config from single topic to multi-topic, where the new multi-topic pattern regex matches the * old config single topic. Without this override, the previously stored metadata for the single * topic would be deemed as different from the currently configure stream, and not be included in * the offset map returned. This implementation handles these cases appropriately. @@ -640,7 +641,7 @@ protected boolean isMultiTopic() * updated to single topic or multi-topic depending on the supervisor config, as needed. */ @Override - protected Map getOffsetsFromMetadataStorage() + public Map getOffsetsFromMetadataStorage() { final DataSourceMetadata dataSourceMetadata = retrieveDataSourceMetadata(); if (checkSourceMetadataMatch(dataSourceMetadata)) { diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java index b607ade1acfe..31d3e8fad691 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java @@ -36,6 +36,7 @@ import org.apache.druid.indexing.overlord.supervisor.Supervisor; import org.apache.druid.indexing.overlord.supervisor.SupervisorSpec; import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.emitter.service.ServiceEmitter; @@ -173,6 +174,59 @@ protected KafkaSupervisorSpec toggleSuspend(boolean suspend) ); } + @Override + public KafkaSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + KafkaSupervisorIOConfig ioConfig = getSpec().getIOConfig(); + KafkaSupervisorIOConfig backfillIoConfig = new KafkaSupervisorIOConfig( + ioConfig.getTopic(), + ioConfig.getTopicPattern(), + ioConfig.getInputFormat(), + ioConfig.getReplicas(), + taskCount != null ? taskCount : ioConfig.getTaskCount(), + ioConfig.getTaskDuration().toPeriod(), + ioConfig.getConsumerProperties(), + ioConfig.getAutoScalerConfig(), + ioConfig.getLagAggregator(), + ioConfig.getPollTimeout(), + ioConfig.getStartDelay().toPeriod(), + ioConfig.getPeriod().toPeriod(), + ioConfig.isUseEarliestSequenceNumber(), + ioConfig.getCompletionTimeout().toPeriod(), + ioConfig.getLateMessageRejectionPeriod().isPresent() ? ioConfig.getLateMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getEarlyMessageRejectionPeriod().isPresent() ? ioConfig.getEarlyMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getLateMessageRejectionStartDateTime().isPresent() ? ioConfig.getLateMessageRejectionStartDateTime().get() : null, + ioConfig.getConfigOverrides(), + ioConfig.getIdleConfig(), + ioConfig.getStopTaskCount(), + ioConfig.isEmitTimeLagMetrics(), + ioConfig.getServerPriorityToReplicas(), + boundedStreamConfig + ); + return new KafkaSupervisorSpec( + backfillId, + null, + getSpec().getDataSchema(), + getSpec().getTuningConfig(), + backfillIoConfig, + getContext(), + isSuspended(), + taskStorage, + taskMaster, + indexerMetadataStorageCoordinator, + (KafkaIndexTaskClientFactory) indexTaskClientFactory, + mapper, + emitter, + monitorSchedulerConfig, + rowIngestionMetersFactory, + supervisorStateManagerConfig + ); + } + /** * Extends {@link SeekableStreamSupervisorSpec#validateSpecUpdateTo} to ensure that the proposed spec and current spec are either both multi-topic or both single-topic. *

diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java index bb40602be7ba..5be3f60e6ae7 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -1223,7 +1223,7 @@ public void testRunWithTransformSpec() throws Exception // Wait for task to exit Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode()); - verifyTaskMetrics(task, RowMeters.with().bytes(getTotalSizeOfRecords(0, 5)).thrownAwayByReason(InputRowFilterResult.NULL_OR_EMPTY_RECORD, 4).totalProcessed(1)); + verifyTaskMetrics(task, RowMeters.with().bytes(getTotalSizeOfRecords(0, 5)).thrownAwayByReason(InputRowFilterResult.CUSTOM_FILTER, 4).totalProcessed(1)); // Check published metadata final List publishedDescriptors = publishedDescriptors(); @@ -3406,7 +3406,7 @@ public void testTaskWithTransformSpecDoesNotCauseCliPeonCyclicDependency() // Wait for task to exit Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode()); - verifyTaskMetrics(task, RowMeters.with().bytes(getTotalSizeOfRecords(0, 5)).thrownAwayByReason(InputRowFilterResult.NULL_OR_EMPTY_RECORD, 4).totalProcessed(1)); + verifyTaskMetrics(task, RowMeters.with().bytes(getTotalSizeOfRecords(0, 5)).thrownAwayByReason(InputRowFilterResult.CUSTOM_FILTER, 4).totalProcessed(1)); // Check published metadata final List publishedDescriptors = publishedDescriptors(); diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaRecordSupplierTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaRecordSupplierTest.java index a01ae639c04e..f7c65df3d19a 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaRecordSupplierTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaRecordSupplierTest.java @@ -31,10 +31,12 @@ import org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord; import org.apache.druid.indexing.seekablestream.common.StreamPartition; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import org.apache.druid.java.util.metrics.Monitor; import org.apache.druid.java.util.metrics.StubServiceEmitter; import org.apache.druid.metadata.DynamicConfigProvider; import org.apache.druid.metadata.MapStringDynamicConfigProvider; +import org.apache.druid.query.DruidMetrics; import org.apache.druid.segment.TestHelper; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.KafkaConsumer; @@ -60,6 +62,7 @@ import java.util.Properties; import java.util.Set; import java.util.concurrent.ExecutionException; +import java.util.function.Supplier; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -230,7 +233,7 @@ public void testSupplierSetup() throws ExecutionException, InterruptedException ); KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false, null); Assert.assertTrue(recordSupplier.getAssignment().isEmpty()); @@ -263,14 +266,14 @@ public void test_defaultRejectAllUrlsForSaslOauthBearerUrlConsumerProperty() thr properties.put("sasl.oauthbearer.token.endpoint.url", "http://localhost:8080/token"); MatcherAssert.assertThat( - assertThrows(KafkaException.class, () -> new KafkaRecordSupplier(properties, OBJECT_MAPPER, null, false)), + assertThrows(KafkaException.class, () -> new KafkaRecordSupplier(properties, OBJECT_MAPPER, null, false, null)), CoreMatchers.instanceOf(KafkaException.class) ); properties.remove("sasl.oauthbearer.token.endpoint.url"); properties.put("sasl.oauthbearer.jwks.endpoint.url", "http://localhost:8080/jwks"); MatcherAssert.assertThat( - assertThrows(KafkaException.class, () -> new KafkaRecordSupplier(properties, OBJECT_MAPPER, null, false)), + assertThrows(KafkaException.class, () -> new KafkaRecordSupplier(properties, OBJECT_MAPPER, null, false, null)), CoreMatchers.instanceOf(KafkaException.class) ); } @@ -287,7 +290,7 @@ public void testMultiTopicSupplierSetup() throws ExecutionException, Interrupted insertData(); KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, true); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, true, null); String stream = Pattern.quote(TOPIC) + "|" + Pattern.quote(otherTopic); Set partitions = recordSupplier.getPartitionIds(stream); @@ -323,7 +326,8 @@ public void testSupplierSetupCustomDeserializer() throws ExecutionException, Int properties, OBJECT_MAPPER, null, - false + false, + null ); Assert.assertTrue(recordSupplier.getAssignment().isEmpty()); @@ -351,7 +355,8 @@ public void testSupplierSetupCustomDeserializerRequiresParameter() properties, OBJECT_MAPPER, null, - false + false, + null ); Assert.assertTrue(recordSupplier.getAssignment().isEmpty()); //just test recordSupplier is initiated @@ -370,7 +375,8 @@ public void testSupplierSetupCustomDeserializerRequiresParameterButMissingIt() properties, OBJECT_MAPPER, null, - false + false, + null ); Assert.assertTrue(recordSupplier.getAssignment().isEmpty()); //just test recordSupplier is initiated @@ -397,7 +403,8 @@ public void testPollCustomDeserializer() throws InterruptedException, ExecutionE properties, OBJECT_MAPPER, null, - false + false, + null ); recordSupplier.assign(partitions); @@ -431,11 +438,14 @@ public void testPoll() throws InterruptedException, ExecutionException StreamPartition.of(TOPIC, PARTITION_1) ); + final Supplier metricBuilderSupplier = + () -> new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SUPERVISOR_ID, "supervisor-1"); KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, - false + false, + metricBuilderSupplier ); final Monitor monitor = recordSupplier.monitor(); @@ -472,6 +482,11 @@ public void testPoll() throws InterruptedException, ExecutionException emitter.verifyEmitted("kafka/consumer/outgoingBytes", 2); emitter.verifyEmitted("kafka/consumer/pollIdleRatio", 1); + // All emitted metrics carry the supervisorId dimension. + for (final ServiceMetricEvent event : emitter.getMetricEvents("kafka/consumer/bytesConsumed")) { + Assert.assertEquals("supervisor-1", event.getUserDims().get(DruidMetrics.SUPERVISOR_ID)); + } + recordSupplier.close(); Assert.assertFalse(monitor.monitor(emitter)); } @@ -497,7 +512,7 @@ public void testPollAfterMoreDataAdded() throws InterruptedException, ExecutionE KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false, null); recordSupplier.assign(partitions); recordSupplier.seekToEarliest(partitions); @@ -567,7 +582,7 @@ public void testSeek() throws InterruptedException, ExecutionException ); KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false, null); recordSupplier.assign(partitions); recordSupplier.seekToEarliest(partitions); @@ -610,7 +625,7 @@ public void testSeekToLatest() throws InterruptedException, ExecutionException ); KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false, null); recordSupplier.assign(partitions); recordSupplier.seekToEarliest(partitions); @@ -643,7 +658,7 @@ public void testSeekUnassigned() throws InterruptedException, ExecutionException ); KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false, null); recordSupplier.assign(partitions); @@ -669,7 +684,7 @@ public void testPosition() throws ExecutionException, InterruptedException ); KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false, null); recordSupplier.assign(partitions); recordSupplier.seekToEarliest(partitions); @@ -704,7 +719,7 @@ public void testPosition() throws ExecutionException, InterruptedException public void getLatestSequenceNumberWhenPartitionIsEmptyAndUseEarliestOffsetShouldReturnsValidNonNull() { KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false, null); StreamPartition streamPartition = StreamPartition.of(TOPIC, PARTITION_0); Set> partitions = ImmutableSet.of(streamPartition); recordSupplier.assign(partitions); @@ -716,7 +731,7 @@ public void getLatestSequenceNumberWhenPartitionIsEmptyAndUseEarliestOffsetShoul public void getEarliestSequenceNumberWhenPartitionIsEmptyAndUseEarliestOffsetShouldReturnsValidNonNull() { KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false, null); StreamPartition streamPartition = StreamPartition.of(TOPIC, PARTITION_0); Set> partitions = ImmutableSet.of(streamPartition); recordSupplier.assign(partitions); @@ -728,7 +743,7 @@ public void getEarliestSequenceNumberWhenPartitionIsEmptyAndUseEarliestOffsetSho public void getLatestSequenceNumberWhenPartitionIsEmptyAndUseLatestOffsetShouldReturnsValidNonNull() { KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false, null); StreamPartition streamPartition = StreamPartition.of(TOPIC, PARTITION_0); Set> partitions = ImmutableSet.of(streamPartition); recordSupplier.assign(partitions); @@ -740,7 +755,7 @@ public void getLatestSequenceNumberWhenPartitionIsEmptyAndUseLatestOffsetShouldR public void getEarliestSequenceNumberWhenPartitionIsEmptyAndUseLatestOffsetShouldReturnsValidNonNull() { KafkaRecordSupplier recordSupplier = new KafkaRecordSupplier( - KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false); + KAFKA_SERVER.consumerProperties(), OBJECT_MAPPER, null, false, null); StreamPartition streamPartition = StreamPartition.of(TOPIC, PARTITION_0); Set> partitions = ImmutableSet.of(streamPartition); recordSupplier.assign(partitions); diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java index 8879ff6d9753..06ca9b64ced5 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java @@ -32,6 +32,7 @@ import org.apache.druid.indexing.overlord.TaskStorage; import org.apache.druid.indexing.overlord.supervisor.SupervisorSpec; import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.LagAggregator; import org.apache.druid.indexing.seekablestream.supervisor.autoscaler.CostBasedAutoScalerConfig; import org.apache.druid.jackson.DefaultObjectMapper; @@ -564,6 +565,38 @@ public void test_validateSpecUpdateTo() sourceSpec.validateSpecUpdateTo(validDestSpec); } + @Test + public void testCreateBackfillSpec() + { + KafkaSupervisorSpec spec = new KafkaSupervisorSpecBuilder() + .withDataSchema( + schema -> schema + .withTimestamp(TimestampSpec.DEFAULT) + .withAggregators(new CountAggregatorFactory("rows")) + .withGranularity(new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null)) + ) + .withIoConfig( + ioConfig -> ioConfig + .withJsonInputFormat() + .withConsumerProperties(Map.of("bootstrap.servers", "localhost:9092")) + .withTaskCount(3) + ) + .build("testDs", "metrics"); + + BoundedStreamConfig boundedStreamConfig = new BoundedStreamConfig( + Map.of("0", 100L, "1", 200L), + Map.of("0", 500L, "1", 600L) + ); + + KafkaSupervisorSpec backfill = (KafkaSupervisorSpec) spec.createBackfillSpec("backfill-id", boundedStreamConfig, 2); + + Assert.assertEquals("backfill-id", backfill.getId()); + Assert.assertEquals("testDs", backfill.getSpec().getDataSchema().getDataSource()); + Assert.assertEquals("metrics", backfill.getSpec().getIOConfig().getTopic()); + Assert.assertEquals(2, backfill.getSpec().getIOConfig().getTaskCount()); + Assert.assertEquals(boundedStreamConfig, backfill.getSpec().getIOConfig().getBoundedStreamConfig()); + } + private KafkaSupervisorSpec getSpec(String topic, String topicPattern) { KafkaSupervisorSpecBuilder builder = new KafkaSupervisorSpecBuilder() diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java index d1f082af24e9..65c60c384180 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java @@ -6348,7 +6348,8 @@ protected RecordSupplier setupReco Deserializer valueDeserializerObject = new ByteArrayDeserializer(); return new KafkaRecordSupplier( new KafkaConsumer<>(props, keyDeserializerObject, valueDeserializerObject), - getIoConfig().isMultiTopic() + getIoConfig().isMultiTopic(), + null ); } diff --git a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java index 0f91fc0965db..3f1f4034f3ce 100644 --- a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java +++ b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java @@ -321,7 +321,7 @@ protected Map getTimeLagPerPartition(Map currentOf } @Override - protected SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( + public SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( String stream, Map map ) @@ -336,7 +336,7 @@ protected OrderedSequenceNumber makeSequenceNumber(String seq, boolean i } @Override - protected void updatePartitionLagFromStream() + public void updatePartitionLagFromStream() { KinesisRecordSupplier supplier = (KinesisRecordSupplier) recordSupplier; // this recordSupplier method is thread safe, so does not need to acquire the recordSupplierLock diff --git a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java index 8e6615716809..4899337797bf 100644 --- a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java +++ b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java @@ -35,6 +35,7 @@ import org.apache.druid.indexing.overlord.TaskStorage; import org.apache.druid.indexing.overlord.supervisor.Supervisor; import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.metrics.DruidMonitorSchedulerConfig; @@ -193,4 +194,57 @@ protected KinesisSupervisorSpec toggleSuspend(boolean suspend) supervisorStateManagerConfig ); } + + @Override + public KinesisSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + KinesisSupervisorIOConfig ioConfig = getSpec().getIOConfig(); + KinesisSupervisorIOConfig backfillIoConfig = new KinesisSupervisorIOConfig( + ioConfig.getStream(), + ioConfig.getInputFormat(), + ioConfig.getEndpoint(), + null, + ioConfig.getReplicas(), + taskCount != null ? taskCount : ioConfig.getTaskCount(), + ioConfig.getTaskDuration().toPeriod(), + ioConfig.getStartDelay().toPeriod(), + ioConfig.getPeriod().toPeriod(), + ioConfig.isUseEarliestSequenceNumber(), + ioConfig.getCompletionTimeout().toPeriod(), + ioConfig.getLateMessageRejectionPeriod().isPresent() ? ioConfig.getLateMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getEarlyMessageRejectionPeriod().isPresent() ? ioConfig.getEarlyMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getLateMessageRejectionStartDateTime().isPresent() ? ioConfig.getLateMessageRejectionStartDateTime().get() : null, + ioConfig.getRecordsPerFetch(), + ioConfig.getFetchDelayMillis(), + ioConfig.getAwsAssumedRoleArn(), + ioConfig.getAwsExternalId(), + ioConfig.getAutoScalerConfig(), + ioConfig.isDeaggregate(), + ioConfig.getServerPriorityToReplicas(), + boundedStreamConfig + ); + return new KinesisSupervisorSpec( + backfillId, + null, + getSpec().getDataSchema(), + getSpec().getTuningConfig(), + backfillIoConfig, + getContext(), + isSuspended(), + taskStorage, + taskMaster, + indexerMetadataStorageCoordinator, + (KinesisIndexTaskClientFactory) indexTaskClientFactory, + mapper, + emitter, + monitorSchedulerConfig, + rowIngestionMetersFactory, + awsCredentialsConfig, + supervisorStateManagerConfig + ); + } } diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskSerdeTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskSerdeTest.java index 3089a39537cd..5173dfae9ffe 100644 --- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskSerdeTest.java +++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskSerdeTest.java @@ -35,7 +35,6 @@ import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.realtime.ChatHandlerProvider; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.server.security.Action; import org.apache.druid.server.security.Resource; @@ -160,7 +159,7 @@ private static ObjectMapper createObjectMapper() binder.bindConstant().annotatedWith(Names.named("serviceName")).to("test"); binder.bindConstant().annotatedWith(Names.named("servicePort")).to(8000); binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(9000); - binder.bind(ChatHandlerProvider.class).toInstance(new NoopChatHandlerProvider()); + binder.bind(ChatHandlerProvider.class).toInstance(new ChatHandlerProvider()); binder.bind(RowIngestionMetersFactory.class).toInstance(new DropwizardRowIngestionMetersFactory()); binder.bind(AppenderatorsManager.class).toInstance(new TestAppenderatorsManager()); } diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskTest.java index f4ccfd4fc702..669e7136bfaf 100644 --- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskTest.java +++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskTest.java @@ -876,7 +876,7 @@ public void testRunWithTransformSpec() throws Exception verifyAll(); verifyTaskMetrics(task, RowMeters.with().bytes(getTotalSize(RECORDS, 0, 5)) - .thrownAwayByReason(InputRowFilterResult.NULL_OR_EMPTY_RECORD, 4).totalProcessed(1)); + .thrownAwayByReason(InputRowFilterResult.CUSTOM_FILTER, 4).totalProcessed(1)); // Check published metadata assertEqualsExceptVersion(ImmutableList.of(sdd("2009/P1D", 0)), publishedDescriptors()); diff --git a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java index 89311981b0e2..9ec20045361c 100644 --- a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java +++ b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import org.apache.commons.lang3.ObjectUtils; -import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory; import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunnerFactory; import javax.annotation.Nullable; @@ -51,11 +50,9 @@ public KubernetesAndWorkerTaskRunnerConfig( this.runnerStrategy = ObjectUtils.getIfNull(runnerStrategy, KubernetesTaskRunnerFactory.TYPE_NAME); this.workerType = ObjectUtils.getIfNull(workerType, HttpRemoteTaskRunnerFactory.TYPE_NAME); Preconditions.checkArgument( - this.workerType.equals(HttpRemoteTaskRunnerFactory.TYPE_NAME) || - this.workerType.equals(RemoteTaskRunnerFactory.TYPE_NAME), - "workerType must be set to one of (%s, %s)", - HttpRemoteTaskRunnerFactory.TYPE_NAME, - RemoteTaskRunnerFactory.TYPE_NAME + this.workerType.equals(HttpRemoteTaskRunnerFactory.TYPE_NAME), + "workerType must be set to [%s]; the ZooKeeper-based 'remote' worker type has been removed.", + HttpRemoteTaskRunnerFactory.TYPE_NAME ); } diff --git a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java index 6e82bb8766ff..b45aa6fb846d 100644 --- a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java +++ b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java @@ -44,7 +44,6 @@ import org.apache.druid.guice.annotations.Self; import org.apache.druid.guice.annotations.Smile; import org.apache.druid.indexing.common.config.TaskConfig; -import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory; import org.apache.druid.indexing.overlord.TaskRunnerFactory; import org.apache.druid.indexing.overlord.WorkerTaskRunner; import org.apache.druid.indexing.overlord.config.TaskQueueConfig; @@ -264,10 +263,10 @@ public void stop() } /** - * Provides a TaskRunnerFactory instance suitable for environments without Zookeeper. - * In such environments, the standard RemoteTaskRunnerFactory may not be operational. - * Depending on the workerType defined in KubernetesAndWorkerTaskRunnerConfig, - * this method selects and returns an appropriate TaskRunnerFactory implementation. + * Provides the worker-side {@link TaskRunnerFactory} that the {@code k8sAndWorker} runner pairs + * with {@link KubernetesTaskRunnerFactory}. Only {@link HttpRemoteTaskRunnerFactory} is + * supported; the ZooKeeper-based 'remote' worker type was removed, and + * {@link KubernetesAndWorkerTaskRunnerConfig} enforces this at config-validation time. */ @Provides @LazySingleton @@ -277,10 +276,8 @@ TaskRunnerFactory provideWorkerTaskRunner( Injector injector ) { - String workerType = runnerConfig.getWorkerType(); - return HttpRemoteTaskRunnerFactory.TYPE_NAME.equals(workerType) - ? injector.getInstance(HttpRemoteTaskRunnerFactory.class) - : injector.getInstance(RemoteTaskRunnerFactory.class); + // workerType is validated to be HttpRemoteTaskRunnerFactory.TYPE_NAME by the config. + return injector.getInstance(HttpRemoteTaskRunnerFactory.class); } /** diff --git a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java index 329a1ea52bce..5338ad2ebb9b 100644 --- a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java +++ b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java @@ -38,7 +38,7 @@ public void test_deserializable() throws IOException ); Assertions.assertEquals("worker", config.getRunnerStrategy()); - Assertions.assertEquals("remote", config.getWorkerType()); + Assertions.assertEquals("httpRemote", config.getWorkerType()); } @Test diff --git a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java index e37313ebb0fb..55e5103567b6 100644 --- a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java +++ b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java @@ -35,7 +35,6 @@ import org.apache.druid.guice.annotations.EscalatedGlobal; import org.apache.druid.guice.annotations.Self; import org.apache.druid.indexing.common.config.TaskConfig; -import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory; import org.apache.druid.indexing.overlord.TaskRunnerFactory; import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunnerFactory; import org.apache.druid.jackson.JacksonModule; @@ -77,8 +76,6 @@ public class KubernetesOverlordModuleTest @Mock private HttpClient httpClient; @Mock - private RemoteTaskRunnerFactory remoteTaskRunnerFactory; - @Mock private HttpRemoteTaskRunnerFactory httpRemoteTaskRunnerFactory; @Mock private ConfigManagerConfig configManagerConfig; @@ -111,7 +108,7 @@ public void setUpConfigManagerMock() @Test public void testDefaultHttpRemoteTaskRunnerFactoryBindSuccessfully() { - injector = makeInjectorWithProperties(initializePropertes(false), false, true); + injector = makeInjectorWithProperties(initializePropertes(), true); KubernetesAndWorkerTaskRunnerFactory taskRunnerFactory = injector.getInstance( KubernetesAndWorkerTaskRunnerFactory.class); Assertions.assertNotNull(taskRunnerFactory); @@ -122,32 +119,21 @@ public void testDefaultHttpRemoteTaskRunnerFactoryBindSuccessfully() @Test public void testMultipleKubernetesTaskRunnerFactoryBindSuccessfully() { - final Properties props = initializePropertes(false); + final Properties props = initializePropertes(); props.setProperty("druid.indexer.runner.type", MultipleKubernetesTaskRunnerFactory.TYPE_NAME); props.setProperty("druid.indexer.runner.clusters[0].taskNamespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); final TaskRunnerFactory taskRunnerFactory = injector.getInstance(TaskRunnerFactory.class); Assertions.assertInstanceOf(MultipleKubernetesTaskRunnerFactory.class, taskRunnerFactory); } - @Test - public void testRemoteTaskRunnerFactoryBindSuccessfully() - { - injector = makeInjectorWithProperties(initializePropertes(true), true, false); - KubernetesAndWorkerTaskRunnerFactory taskRunnerFactory = injector.getInstance( - KubernetesAndWorkerTaskRunnerFactory.class); - Assertions.assertNotNull(taskRunnerFactory); - - Assertions.assertNotNull(taskRunnerFactory.build()); - } - @Test public void testExceptionThrownIfNoTaskRunnerFactoryBind() { Assertions.assertThrows(ProvisionException.class, () -> { - injector = makeInjectorWithProperties(initializePropertes(false), false, false); + injector = makeInjectorWithProperties(initializePropertes(), false); injector.getInstance(KubernetesAndWorkerTaskRunnerFactory.class); }); } @@ -159,7 +145,7 @@ public void test_build_withMultiContainerAdapterType_returnsWithMultiContainerTa props.setProperty("druid.indexer.runner.k8s.adapter.type", "overlordMultiContainer"); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); TaskAdapter taskAdapter = injector.getInstance( TaskAdapter.class); @@ -173,7 +159,7 @@ public void test_build_withSingleContainerAdapterType_returnsKubernetesTaskRunne Properties props = new Properties(); props.setProperty("druid.indexer.runner.k8s.adapter.type", "overlordSingleContainer"); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); TaskAdapter taskAdapter = injector.getInstance( TaskAdapter.class); @@ -188,7 +174,7 @@ public void test_build_withSingleContainerAdapterTypeAndSidecarSupport_throwsPro props.setProperty("druid.indexer.runner.k8s.adapter.type", "overlordSingleContainer"); props.setProperty("druid.indexer.runner.sidecarSupport", "true"); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); Assertions.assertThrows( ProvisionException.class, @@ -203,7 +189,7 @@ public void test_build_withSidecarSupport_returnsKubernetesTaskRunnerWithMultiCo Properties props = new Properties(); props.setProperty("druid.indexer.runner.sidecarSupport", "true"); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); TaskAdapter adapter = injector.getInstance(TaskAdapter.class); @@ -218,7 +204,7 @@ public void test_build_withoutSidecarSupport_returnsKubernetesTaskRunnerWithSing Properties props = new Properties(); props.setProperty("druid.indexer.runner.sidecarSupport", "false"); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); TaskAdapter adapter = injector.getInstance(TaskAdapter.class); @@ -235,7 +221,7 @@ public void test_build_withPodTemplateAdapterType_returnsKubernetesTaskRunnerWit props.setProperty("druid.indexer.runner.k8s.adapter.type", "customTemplateAdapter"); props.setProperty("druid.indexer.runner.k8s.podTemplate.base", url.getPath()); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); TaskAdapter adapter = injector.getInstance(TaskAdapter.class); @@ -251,7 +237,7 @@ public void test_httpClientFactory_defaultsToVertx() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); // Don't set httpClientType - should default to vertx - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class); Assertions.assertNotNull(factory); @@ -266,7 +252,7 @@ public void test_httpClientFactory_okhttpSelection() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "okhttp"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class); Assertions.assertNotNull(factory); @@ -281,7 +267,7 @@ public void test_httpClientFactory_vertxExplicitSelection() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "vertx"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class); Assertions.assertNotNull(factory); @@ -296,7 +282,7 @@ public void test_httpClientFactory_jdkSelection() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "javaStandardHttp"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class); Assertions.assertNotNull(factory); @@ -312,7 +298,7 @@ public void test_httpClientFactory_invalidTypeThrowsException() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "invalid"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); injector.getInstance(DruidKubernetesHttpClientFactory.class); }); } @@ -324,7 +310,7 @@ public void test_druidKubernetesClient_createdWithVertxClient() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); // Don't set httpClientType - should default to vertx - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); DruidKubernetesClient client = injector.getInstance(DruidKubernetesClient.class); Assertions.assertNotNull(client, "DruidKubernetesClient should be created successfully"); @@ -333,8 +319,7 @@ public void test_druidKubernetesClient_createdWithVertxClient() private Injector makeInjectorWithProperties( final Properties props, - boolean isWorkerTypeRemote, - boolean isWorkerTypeHttpRemote + boolean bindHttpRemoteTaskRunnerFactory ) { return Guice.createInjector( @@ -350,10 +335,7 @@ private Injector makeInjectorWithProperties( binder.bind(DruidNode.class) .annotatedWith(Self.class) .toInstance(new DruidNode("test-inject", null, false, null, null, true, false)); - if (isWorkerTypeRemote) { - binder.bind(RemoteTaskRunnerFactory.class).toInstance(remoteTaskRunnerFactory); - } - if (isWorkerTypeHttpRemote) { + if (bindHttpRemoteTaskRunnerFactory) { binder.bind(HttpRemoteTaskRunnerFactory.class).toInstance(httpRemoteTaskRunnerFactory); } binder.bind( @@ -374,14 +356,11 @@ private Injector makeInjectorWithProperties( )); } - private static Properties initializePropertes(boolean isWorkerTypeRemote) + private static Properties initializePropertes() { final Properties props = new Properties(); props.put("druid.indexer.runner.namespace", "NAMESPACE"); props.put("druid.indexer.runner.k8sAndWorker.runnerStrategy.type", "k8s"); - if (isWorkerTypeRemote) { - props.put("druid.indexer.runner.k8sAndWorker.runnerStrategy.workerType", "remote"); - } return props; } } diff --git a/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json b/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json index 43e7414f11f8..de09ff0ee209 100644 --- a/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json +++ b/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json @@ -1,4 +1,4 @@ { "runnerStrategy.type": "worker", - "runnerStrategy.workerType": "remote" -} \ No newline at end of file + "runnerStrategy.workerType": "httpRemote" +} diff --git a/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java b/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java index 7474a79a15eb..96bfc03214dc 100644 --- a/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java +++ b/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java @@ -36,12 +36,14 @@ import java.io.File; import java.io.IOException; import java.util.Objects; +import java.util.concurrent.atomic.AtomicBoolean; public class OrcInputFormat extends NestedInputFormat { static final long SCALE_FACTOR = 8L; private final boolean binaryAsString; private final Configuration conf; + private final AtomicBoolean fileSystemInitialized = new AtomicBoolean(false); @JsonCreator public OrcInputFormat( @@ -55,19 +57,20 @@ public OrcInputFormat( this.conf = conf; } - private void initialize(Configuration conf) + // Init FileSystem once under this class's classloader to avoid concurrent setContextClassLoader races. + private void ensureFileSystemInitialized() { - //Initializing seperately since during eager initialization, resolving - //namenode hostname throws an error if nodes are ephemeral - - // Ensure that FileSystem class level initialization happens with correct CL - // See https://github.com/apache/druid/issues/1714 - ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader(); + if (!fileSystemInitialized.compareAndSet(false, true)) { + return; + } + final ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); FileSystem.get(conf); } catch (IOException ex) { + // Reset so a subsequent createReader can retry init instead of skipping it. + fileSystemInitialized.set(false); throw new RuntimeException(ex); } finally { @@ -91,7 +94,7 @@ public boolean getBinaryAsString() @Override public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity source, File temporaryDirectory) { - initialize(conf); + ensureFileSystemInitialized(); return new OrcReader(conf, inputRowSchema, source, temporaryDirectory, getFlattenSpec(), binaryAsString); } diff --git a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java index a7f6e5131c3a..555d1de2c998 100644 --- a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java +++ b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java @@ -73,6 +73,7 @@ public void testEquals() { EqualsVerifier.forClass(OrcInputFormat.class) .withPrefabValues(Configuration.class, new Configuration(), new Configuration()) + .withIgnoredFields("fileSystemInitialized") .usingGetClass() .verify(); } diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java index 2cac95a200b1..82412fe412c9 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java @@ -417,9 +417,9 @@ static void uploadFileIfPossible( /** * Determines whether to use HTTP or HTTPS protocol based on configuration. */ - public static boolean useHttps(AWSClientConfig clientConfig, AWSEndpointConfig endpointConfig) + public static boolean useHttps(@Nullable AWSClientConfig clientConfig, AWSEndpointConfig endpointConfig) { - String protocol = clientConfig.getProtocol(); + final String protocol = clientConfig == null ? null : clientConfig.getProtocol(); final String endpointUrl = endpointConfig.getUrl(); if (org.apache.commons.lang3.StringUtils.isNotEmpty(endpointUrl)) { diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java index 168216affdca..e9291c739249 100644 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java @@ -404,6 +404,39 @@ public void testSerdeWithCloudConfigPropertiesWithSessionToken() throws Exceptio EasyMock.verify(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); } + @Test + public void testSchemelessEndpointConfigUrlWithNullClientConfigResolvesSupplier() throws Exception + { + EasyMock.reset(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); + EasyMock.expect(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER.getS3StorageConfig()) + .andStubReturn(S3_STORAGE_CONFIG); + EasyMock.replay(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); + + final AWSEndpointConfig schemelessEndpoint = MAPPER.readValue( + "{\"url\":\"s3.example.com\",\"signingRegion\":\"us-east-1\"}", + AWSEndpointConfig.class + ); + + final S3InputSource inputSource = new S3InputSource( + SERVICE, + SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, + INPUT_DATA_CONFIG, + null, + null, + EXPECTED_LOCATION, + null, + CLOUD_CONFIG_PROPERTIES, + null, + schemelessEndpoint, + null + ); + + // Forces s3ClientSupplier evaluation, which hits S3Utils.useHttps and confirms a null client config does not blow up. + inputSource.createEntity(new CloudObjectLocation("bucket", "path")); + + EasyMock.verify(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); + } + @Test public void testGetSetSessionToken() { diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageDruidModuleTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageDruidModuleTest.java index 3932c147695b..5d4fc4f188f2 100644 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageDruidModuleTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageDruidModuleTest.java @@ -19,11 +19,12 @@ package org.apache.druid.storage.s3; -import com.google.common.collect.ImmutableList; +import com.google.inject.Guice; import com.google.inject.Injector; import org.apache.druid.common.aws.AWSModule; -import org.apache.druid.guice.GuiceInjectors; +import org.apache.druid.guice.DruidSecondaryModule; import org.apache.druid.guice.ServerModule; +import org.apache.druid.guice.StartupInjectorBuilder; import org.apache.druid.segment.loading.OmniDataSegmentArchiver; import org.apache.druid.segment.loading.OmniDataSegmentKiller; import org.apache.druid.segment.loading.OmniDataSegmentMover; @@ -70,12 +71,12 @@ public void testSegmentMoverBoundSingleton() private static Injector createInjector() { - return GuiceInjectors.makeStartupInjectorWithModules( - ImmutableList.of( - new AWSModule(), - new S3StorageDruidModule(), - new ServerModule() - ) + final Injector startupInjector = new StartupInjectorBuilder().forServer().build(); + return Guice.createInjector( + startupInjector.getInstance(DruidSecondaryModule.class), + new AWSModule(), + new S3StorageDruidModule(), + new ServerModule() ); } } diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java index 6c46df7d993a..dcf7b3f8d0d5 100644 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java @@ -19,6 +19,9 @@ package org.apache.druid.storage.s3; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.common.aws.AWSClientConfig; +import org.apache.druid.common.aws.AWSEndpointConfig; import org.easymock.Capture; import org.easymock.CaptureType; import org.easymock.EasyMock; @@ -33,6 +36,7 @@ import java.io.IOException; import java.util.List; +import java.util.concurrent.CompletionException; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; @@ -148,6 +152,31 @@ public void testRetryWithSdkClientException() throws Exception Assert.assertEquals(maxRetries, count.get()); } + @Test + public void testRetryWithAsyncCredentialProviderChainException() throws Exception + { + final int maxRetries = 3; + final AtomicInteger count = new AtomicInteger(); + S3Utils.retryS3Operation( + () -> { + if (count.incrementAndGet() >= maxRetries) { + return "hey"; + } else { + throw new CompletionException( + SdkClientException.builder() + .message( + "Unable to load credentials from any of the providers in the chain " + + "AwsCredentialsProviderChain" + ) + .build() + ); + } + }, + maxRetries + ); + Assert.assertEquals(maxRetries, count.get()); + } + @Test public void testRetryWithS3InternalError() throws Exception { @@ -382,4 +411,42 @@ public void testRetryWithS3MultiObjectDeleteException() throws Exception ); Assert.assertEquals(maxRetries, count.get()); } + + private static final ObjectMapper JSON = new ObjectMapper(); + + private static AWSEndpointConfig endpointWith(String json) throws IOException + { + return JSON.readValue(json, AWSEndpointConfig.class); + } + + @Test + public void testUseHttpsNullClientConfigSchemelessEndpointReturnsTrue() throws IOException + { + Assert.assertTrue(S3Utils.useHttps(null, endpointWith("{\"url\":\"s3.example.com\"}"))); + } + + @Test + public void testUseHttpsNullClientConfigHttpEndpointReturnsFalse() throws IOException + { + Assert.assertFalse(S3Utils.useHttps(null, endpointWith("{\"url\":\"http://s3.example.com\"}"))); + } + + @Test + public void testUseHttpsNullClientConfigHttpsEndpointReturnsTrue() throws IOException + { + Assert.assertTrue(S3Utils.useHttps(null, endpointWith("{\"url\":\"https://s3.example.com\"}"))); + } + + @Test + public void testUseHttpsNullClientConfigNullEndpointUrlReturnsTrue() throws IOException + { + Assert.assertTrue(S3Utils.useHttps(null, new AWSEndpointConfig())); + } + + @Test + public void testUseHttpsDefaultClientConfigSchemelessEndpointReturnsTrue() throws IOException + { + // Sanity check: default AWSClientConfig protocol is "https"; schemeless URL inherits "https". + Assert.assertTrue(S3Utils.useHttps(new AWSClientConfig(), endpointWith("{\"url\":\"s3.example.com\"}"))); + } } diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml index 42e117203649..e6cc7d787c10 100644 --- a/indexing-service/pom.xml +++ b/indexing-service/pom.xml @@ -92,14 +92,6 @@ io.netty netty - - org.apache.zookeeper - zookeeper - - - org.apache.zookeeper - zookeeper-jute - com.fasterxml.jackson.core jackson-core @@ -112,10 +104,6 @@ com.google.guava guava - - org.apache.curator - curator-recipes - jakarta.validation jakarta.validation-api diff --git a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java index cc3732439d8a..da60043c6780 100644 --- a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java +++ b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java @@ -22,8 +22,6 @@ import com.google.inject.Binder; import org.apache.druid.indexing.overlord.config.ForkingTaskRunnerConfig; import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.server.initialization.IndexerZkConfig; /** */ @@ -34,8 +32,6 @@ public class IndexingServiceModuleHelper public static void configureTaskRunnerConfigs(Binder binder) { JsonConfigProvider.bind(binder, INDEXER_RUNNER_PROPERTY_PREFIX, ForkingTaskRunnerConfig.class); - JsonConfigProvider.bind(binder, INDEXER_RUNNER_PROPERTY_PREFIX, RemoteTaskRunnerConfig.class); JsonConfigProvider.bind(binder, INDEXER_RUNNER_PROPERTY_PREFIX, HttpRemoteTaskRunnerConfig.class); - JsonConfigProvider.bind(binder, "druid.zk.paths.indexer", IndexerZkConfig.class); } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/SegmentCacheManagerFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/SegmentCacheManagerFactory.java index 63ee7e8c4954..cf5dfc7d0e93 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/SegmentCacheManagerFactory.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/SegmentCacheManagerFactory.java @@ -27,8 +27,10 @@ import org.apache.druid.segment.loading.SegmentCacheManager; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.loading.SegmentLocalCacheManager; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationConfig; +import org.apache.druid.timeline.DataSegment; import java.io.File; import java.util.Collections; @@ -52,16 +54,34 @@ public SegmentCacheManagerFactory( this.jsonMapper = mapper; } - public SegmentCacheManager manufacturate(File storageDir, boolean virtualStorage) + /** + * Creates a new {@link SegmentCacheManager} backed by a new storage location in {@code storageDir}, and a new + * loading thread pool of default size. + * + * @param storageDir storage location + * @param maxSize size limit, or null for no limit + * @param virtualStorage whether to configure the cache manager in ephemeral virtual storage mode. In this mode, + * loading is triggered by {@link SegmentCacheManager#acquireSegment(DataSegment)}, and + * segment files are deleted as soon as all holds are closed. + */ + public SegmentCacheManager manufacturate(File storageDir, Long maxSize, boolean virtualStorage) { + final StorageLocationConfig locationConfig = new StorageLocationConfig( + storageDir, + maxSize != null ? maxSize : Long.MAX_VALUE, + null + ); final SegmentLoaderConfig loaderConfig = new SegmentLoaderConfig() - .setLocations(Collections.singletonList(new StorageLocationConfig(storageDir, null, null))) - .setVirtualStorage(virtualStorage, virtualStorage); + .setLocations(Collections.singletonList(locationConfig)) + .setVirtualStorage(virtualStorage) + .setVirtualStorageIsEphemeral(virtualStorage); final List storageLocations = loaderConfig.toStorageLocations(); + final StorageLoadingThreadPool loadingThreadPool = StorageLoadingThreadPool.createFromConfig(loaderConfig); return new SegmentLocalCacheManager( storageLocations, loaderConfig, + loadingThreadPool, new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), indexIO, jsonMapper diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolboxFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolboxFactory.java index 2cd5b8dd6096..6ee32438d49c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolboxFactory.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolboxFactory.java @@ -247,7 +247,7 @@ public TaskToolbox build(TaskConfig config, Task task) .queryProcessingPool(queryProcessingPool) .joinableFactory(joinableFactory) .monitorSchedulerProvider(monitorSchedulerProvider) - .segmentCacheManager(segmentCacheManagerFactory.manufacturate(taskWorkDir, true)) + .segmentCacheManager(segmentCacheManagerFactory.manufacturate(taskWorkDir, null, true)) .jsonMapper(jsonMapper) .taskWorkDir(taskWorkDir) .indexIO(indexIO) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index eb3b5c0e84f8..b29899a77cf5 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -435,7 +435,7 @@ public TaskStatus runTask(final TaskToolbox toolbox) // ParallelIndexSupervisorTask because it doesn't support APIs for live ingestion reports. log.warn("Chat handler is already registered. Skipping chat handler registration."); } else { - toolbox.getChatHandlerProvider().register(getId(), this, false); + toolbox.getChatHandlerProvider().register(getId(), this); } this.authorizerMapper = toolbox.getAuthorizerMapper(); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index 44147e242955..9025c2ce8a6a 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -522,7 +522,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception Preconditions.checkNotNull(toolbox.getChatHandlerProvider(), "chatHandlerProvider").getClass().getName() ); authorizerMapper = toolbox.getAuthorizerMapper(); - toolbox.getChatHandlerProvider().register(getId(), this, false); + toolbox.getChatHandlerProvider().register(getId(), this); // the lineage-based segment allocation protocol must be used as the legacy protocol has a critical bug // (see SinglePhaseParallelIndexTaskRunner.allocateNewSegment()). However, we tell subtasks to use diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java index f706aafe39dd..53a480b66cc2 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java @@ -242,7 +242,7 @@ public TaskStatus runTask(final TaskToolbox toolbox) throws Exception } this.authorizerMapper = toolbox.getAuthorizerMapper(); - toolbox.getChatHandlerProvider().register(getId(), this, false); + toolbox.getChatHandlerProvider().register(getId(), this); rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters(); parseExceptionHandler = new ParseExceptionHandler( diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java index ab7ea72730ad..84fead9916bb 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java @@ -313,7 +313,8 @@ public DruidInputSource withInterval(Interval interval) @Override protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) { - final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory, false); + final SegmentCacheManager segmentCacheManager = + segmentCacheManagerFactory.manufacturate(temporaryDirectory, null, false); final List> timeline = createTimeline(); final Iterator entityIterator = FluentIterable diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentInputEntity.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentInputEntity.java index 0f7f5a6b9412..85ab75c3787e 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentInputEntity.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidSegmentInputEntity.java @@ -59,7 +59,7 @@ public URI getUri() } @Override - public InputStream open() + public InputStream openRaw() { throw new UnsupportedOperationException("Don't call this"); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java index 350f9b08cf91..4dac9ea81c1a 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java @@ -23,9 +23,7 @@ import com.google.common.base.Optional; import com.google.inject.Inject; import org.apache.druid.client.indexing.IndexingService; -import org.apache.druid.curator.discovery.ServiceAnnouncer; import org.apache.druid.discovery.DruidLeaderSelector; -import org.apache.druid.guice.annotations.Self; import org.apache.druid.indexing.common.actions.SegmentAllocationQueue; import org.apache.druid.indexing.common.actions.TaskActionClientFactory; import org.apache.druid.indexing.common.task.TaskContextEnricher; @@ -42,7 +40,6 @@ import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; -import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordinator.CoordinatorOverlordServiceConfig; import java.util.concurrent.atomic.AtomicReference; @@ -67,10 +64,9 @@ public class DruidOverlord private final AtomicReference leaderLifecycleRef = new AtomicReference<>(null); /** - * Indicates that all services have been started and the node can now announce - * itself with {@link ServiceAnnouncer#announce}. This must be set to false - * as soon as {@link DruidLeaderSelector.Listener#stopBeingLeader()} is - * called. + * Indicates that all services have been started and the node is ready to serve + * leader-only HTTP routes. This must be set to false as soon as + * {@link DruidLeaderSelector.Listener#stopBeingLeader()} is called. */ private volatile boolean initialized; @@ -83,9 +79,7 @@ public DruidOverlord( final GlobalTaskLockbox taskLockbox, final TaskStorage taskStorage, final TaskActionClientFactory taskActionClientFactory, - @Self final DruidNode selfNode, final TaskRunnerFactory runnerFactory, - final ServiceAnnouncer serviceAnnouncer, final CoordinatorOverlordServiceConfig coordinatorOverlordServiceConfig, final ServiceEmitter emitter, final SupervisorManager supervisorManager, @@ -103,9 +97,6 @@ public DruidOverlord( this.segmentMetadataCache = segmentMetadataCache; this.coordinatorOverlordServiceConfig = coordinatorOverlordServiceConfig; - final DruidNode node = coordinatorOverlordServiceConfig.getOverlordService() == null ? selfNode : - selfNode.withService(coordinatorOverlordServiceConfig.getOverlordService()); - this.leadershipListener = new DruidLeaderSelector.Listener() { @Override @@ -173,15 +164,13 @@ public void start() compactionScheduler.becomeLeader(); scheduledBatchTaskManager.start(); - // Announce the node only after all the services have been initialized + // Mark ready only after all the services have been initialized initialized = true; - serviceAnnouncer.announce(node); } @Override public void stop() { - serviceAnnouncer.unannounce(node); scheduledBatchTaskManager.stop(); compactionScheduler.stopBeingLeader(); taskMaster.downgradeToHalfLeader(); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ForkingTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ForkingTaskRunner.java index fb09cb5f1547..52c2dcc7ba38 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ForkingTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ForkingTaskRunner.java @@ -115,7 +115,8 @@ public class ForkingTaskRunner "--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED", "--add-opens=java.base/java.io=ALL-UNNAMED", "--add-opens=java.base/java.lang=ALL-UNNAMED", - "--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED" + "--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED", + "--add-modules=jdk.incubator.vector" ); private final ForkingTaskRunnerConfig config; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java index cd911ed99811..76a8385d7095 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java @@ -105,7 +105,8 @@ public ImmutableWorkerInfo( } /** - * Helper used by {@link ZkWorker} and {@link org.apache.druid.indexing.overlord.hrtr.WorkerHolder}. + * Helper used by {@link org.apache.druid.indexing.overlord.hrtr.WorkerHolder} to build a worker view from a set of + * task announcements. */ public static ImmutableWorkerInfo fromWorkerAnnouncements( final Worker worker, diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunner.java deleted file mode 100644 index 4018701d447f..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunner.java +++ /dev/null @@ -1,1673 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Optional; -import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import com.google.common.base.Stopwatch; -import com.google.common.base.Supplier; -import com.google.common.base.Throwables; -import com.google.common.collect.Collections2; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListenableScheduledFuture; -import com.google.common.util.concurrent.ListeningScheduledExecutorService; -import com.google.common.util.concurrent.MoreExecutors; -import com.google.common.util.concurrent.SettableFuture; -import org.apache.commons.lang3.mutable.MutableInt; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.recipes.cache.PathChildrenCache; -import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; -import org.apache.curator.utils.ZKPaths; -import org.apache.druid.concurrent.LifecycleLock; -import org.apache.druid.curator.CuratorUtils; -import org.apache.druid.curator.cache.PathChildrenCacheFactory; -import org.apache.druid.indexer.RunnerTaskState; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskState; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.task.IndexTaskUtils; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningService; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningStrategy; -import org.apache.druid.indexing.overlord.autoscaling.ScalingStats; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; -import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; -import org.apache.druid.indexing.overlord.setup.WorkerSelectStrategy; -import org.apache.druid.indexing.worker.TaskAnnouncement; -import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.Pair; -import org.apache.druid.java.util.common.RE; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.common.lifecycle.LifecycleStart; -import org.apache.druid.java.util.common.lifecycle.LifecycleStop; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; -import org.apache.druid.java.util.http.client.HttpClient; -import org.apache.druid.java.util.http.client.Request; -import org.apache.druid.java.util.http.client.response.InputStreamResponseHandler; -import org.apache.druid.java.util.http.client.response.StatusResponseHandler; -import org.apache.druid.java.util.http.client.response.StatusResponseHolder; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.tasklogs.TaskLogStreamer; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.jboss.netty.handler.codec.http.HttpMethod; -import org.jboss.netty.handler.codec.http.HttpResponseStatus; -import org.joda.time.Duration; -import org.joda.time.Period; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executor; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -/** - * The RemoteTaskRunner's primary responsibility is to assign tasks to worker nodes. - * The RemoteTaskRunner uses Zookeeper to keep track of which workers are running which tasks. Tasks are assigned by - * creating ephemeral nodes in ZK that workers must remove. Workers announce the statuses of the tasks they are running. - * Once a task completes, it is up to the RTR to remove the task status and run any necessary cleanup. - * The RemoteTaskRunner is event driven and updates state according to ephemeral node changes in ZK. - *

- * The RemoteTaskRunner will assign tasks to a node until the node hits capacity. At that point, task assignment will - * fail. The RemoteTaskRunner depends on another component to create additional worker resources. - *

- * If a worker node becomes inexplicably disconnected from Zk, the RemoteTaskRunner will fail any tasks associated with the - * worker after waiting for RemoteTaskRunnerConfig.taskCleanupTimeout for the worker to show up. - *

- * The RemoteTaskRunner uses ZK for job management and assignment and http for IPC messages. - */ -public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer -{ - private static final EmittingLogger log = new EmittingLogger(RemoteTaskRunner.class); - private static final Joiner JOINER = Joiner.on("/"); - - private final ObjectMapper jsonMapper; - private final RemoteTaskRunnerConfig config; - private final Duration shutdownTimeout; - private final IndexerZkConfig indexerZkConfig; - private final CuratorFramework cf; - private final PathChildrenCacheFactory workerStatusPathChildrenCacheFactory; - private final ExecutorService workerStatusPathChildrenCacheExecutor; - private final PathChildrenCache workerPathCache; - private final HttpClient httpClient; - private final Supplier workerConfigRef; - - // all workers that exist in ZK - private final ConcurrentMap zkWorkers = new ConcurrentHashMap<>(); - // payloads of pending tasks, which we remember just long enough to assign to workers - private final ConcurrentMap pendingTaskPayloads = new ConcurrentHashMap<>(); - // tasks that have not yet been assigned to a worker - private final RemoteTaskRunnerWorkQueue pendingTasks = new RemoteTaskRunnerWorkQueue(); - // all tasks that have been assigned to a worker - private final RemoteTaskRunnerWorkQueue runningTasks = new RemoteTaskRunnerWorkQueue(); - // tasks that are complete but not cleaned up yet - private final RemoteTaskRunnerWorkQueue completeTasks = new RemoteTaskRunnerWorkQueue(); - - private final ExecutorService runPendingTasksExec; - - // Workers that have been marked as lazy. these workers are not running any tasks and can be terminated safely by the scaling policy. - private final ConcurrentMap lazyWorkers = new ConcurrentHashMap<>(); - - // Workers that have been blacklisted. - private final Set blackListedWorkers = Collections.synchronizedSet(new HashSet<>()); - - // task runner listeners - private final CopyOnWriteArrayList> listeners = new CopyOnWriteArrayList<>(); - - // workers which were assigned a task and are yet to acknowledge same. - // Map: workerId -> taskId - private final ConcurrentMap workersWithUnacknowledgedTask = new ConcurrentHashMap<>(); - // Map: taskId -> taskId .tasks which are being tried to be assigned to a worker - private final ConcurrentMap tryAssignTasks = new ConcurrentHashMap<>(); - - private final Object statusLock = new Object(); - - private final LifecycleLock lifecycleLock = new LifecycleLock(); - - private final ListeningScheduledExecutorService cleanupExec; - - private final ConcurrentMap removedWorkerCleanups = new ConcurrentHashMap<>(); - private final ProvisioningStrategy provisioningStrategy; - private final ServiceEmitter emitter; - private ProvisioningService provisioningService; - - public RemoteTaskRunner( - ObjectMapper jsonMapper, - RemoteTaskRunnerConfig config, - IndexerZkConfig indexerZkConfig, - CuratorFramework cf, - PathChildrenCacheFactory.Builder pathChildrenCacheFactory, - HttpClient httpClient, - Supplier workerConfigRef, - ProvisioningStrategy provisioningStrategy, - ServiceEmitter emitter - ) - { - this.jsonMapper = jsonMapper; - this.config = config; - this.shutdownTimeout = config.getTaskShutdownLinkTimeout().toStandardDuration(); // Fail fast - this.indexerZkConfig = indexerZkConfig; - this.cf = cf; - this.workerPathCache = pathChildrenCacheFactory.build().make(cf, indexerZkConfig.getAnnouncementsPath()); - this.workerStatusPathChildrenCacheExecutor = PathChildrenCacheFactory.Builder.createDefaultExecutor(); - this.workerStatusPathChildrenCacheFactory = pathChildrenCacheFactory - .withExecutorService(workerStatusPathChildrenCacheExecutor) - .withShutdownExecutorOnClose(false) - .build(); - this.httpClient = httpClient; - this.workerConfigRef = workerConfigRef; - this.cleanupExec = MoreExecutors.listeningDecorator( - ScheduledExecutors.fixed(1, "RemoteTaskRunner-Scheduled-Cleanup--%d") - ); - this.provisioningStrategy = provisioningStrategy; - this.runPendingTasksExec = Execs.multiThreaded( - config.getPendingTasksRunnerNumThreads(), - "rtr-pending-tasks-runner-%d" - ); - this.emitter = emitter; - } - - @Override - @LifecycleStart - public void start() - { - if (!lifecycleLock.canStart()) { - return; - } - try { - log.info("Starting RemoteTaskRunner..."); - final MutableInt waitingFor = new MutableInt(1); - final Object waitingForMonitor = new Object(); - - // Add listener for creation/deletion of workers - workerPathCache.getListenable().addListener( - (client, event) -> { - final Worker worker; - switch (event.getType()) { - case CHILD_ADDED: - worker = jsonMapper.readValue( - event.getData().getData(), - Worker.class - ); - synchronized (waitingForMonitor) { - waitingFor.increment(); - } - Futures.addCallback( - addWorker(worker), - new FutureCallback<>() - { - @Override - public void onSuccess(ZkWorker zkWorker) - { - synchronized (waitingForMonitor) { - waitingFor.decrement(); - waitingForMonitor.notifyAll(); - } - } - - @Override - public void onFailure(Throwable throwable) - { - synchronized (waitingForMonitor) { - waitingFor.decrement(); - waitingForMonitor.notifyAll(); - } - } - }, - MoreExecutors.directExecutor() - ); - break; - case CHILD_UPDATED: - worker = jsonMapper.readValue( - event.getData().getData(), - Worker.class - ); - updateWorker(worker); - break; - - case CHILD_REMOVED: - worker = jsonMapper.readValue( - event.getData().getData(), - Worker.class - ); - removeWorker(worker); - break; - case INITIALIZED: - // Schedule cleanup for task status of the workers that might have disconnected while overlord was not running - List workers; - try { - workers = cf.getChildren().forPath(indexerZkConfig.getStatusPath()); - } - catch (KeeperException.NoNodeException e) { - // statusPath doesn't exist yet; can occur if no middleManagers have started. - workers = ImmutableList.of(); - } - for (String workerId : workers) { - final String workerAnnouncePath = JOINER.join(indexerZkConfig.getAnnouncementsPath(), workerId); - final String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId); - if (!zkWorkers.containsKey(workerId) && cf.checkExists().forPath(workerAnnouncePath) == null) { - try { - scheduleTasksCleanupForWorker(workerId, cf.getChildren().forPath(workerStatusPath)); - } - catch (Exception e) { - log.warn( - e, - "Could not schedule cleanup for worker[%s] during startup (maybe someone removed the status znode[%s]?). Skipping.", - workerId, - workerStatusPath - ); - } - } - } - synchronized (waitingForMonitor) { - waitingFor.decrement(); - waitingForMonitor.notifyAll(); - } - break; - case CONNECTION_SUSPENDED: - case CONNECTION_RECONNECTED: - case CONNECTION_LOST: - // do nothing - } - } - ); - workerPathCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT); - synchronized (waitingForMonitor) { - while (waitingFor.intValue() > 0) { - waitingForMonitor.wait(); - } - } - - ScheduledExecutors.scheduleAtFixedRate( - cleanupExec, - Period.ZERO.toStandardDuration(), - config.getWorkerBlackListCleanupPeriod().toStandardDuration(), - this::checkBlackListedNodes - ); - - provisioningService = provisioningStrategy.makeProvisioningService(this); - lifecycleLock.started(); - } - catch (Exception e) { - throw new RuntimeException(e); - } - finally { - lifecycleLock.exitStart(); - } - } - - @Override - @LifecycleStop - public void stop() - { - if (!lifecycleLock.canStop()) { - return; - } - try { - log.info("Stopping RemoteTaskRunner..."); - provisioningService.close(); - - Closer closer = Closer.create(); - for (ZkWorker zkWorker : zkWorkers.values()) { - closer.register(zkWorker); - } - closer.register(workerPathCache); - try { - closer.close(); - } - finally { - workerStatusPathChildrenCacheExecutor.shutdown(); - } - - if (runPendingTasksExec != null) { - runPendingTasksExec.shutdown(); - } - - if (cleanupExec != null) { - cleanupExec.shutdown(); - } - } - catch (Exception e) { - throw new RuntimeException(e); - } - finally { - lifecycleLock.exitStop(); - } - } - - @Override - public List>> restore() - { - return ImmutableList.of(); - } - - @Override - public void registerListener(TaskRunnerListener listener, Executor executor) - { - for (Pair pair : listeners) { - if (pair.lhs.getListenerId().equals(listener.getListenerId())) { - throw new ISE("Listener [%s] already registered", listener.getListenerId()); - } - } - - final Pair listenerPair = Pair.of(listener, executor); - - synchronized (statusLock) { - for (Map.Entry entry : runningTasks.entrySet()) { - TaskRunnerUtils.notifyLocationChanged( - ImmutableList.of(listenerPair), - entry.getKey(), - entry.getValue().getLocation() - ); - } - - log.info("Registered listener [%s]", listener.getListenerId()); - listeners.add(listenerPair); - } - } - - @Override - public void unregisterListener(String listenerId) - { - for (Pair pair : listeners) { - if (pair.lhs.getListenerId().equals(listenerId)) { - listeners.remove(pair); - log.info("Unregistered listener [%s]", listenerId); - return; - } - } - } - - @Override - public Collection getWorkers() - { - return getImmutableWorkerFromZK(zkWorkers.values()); - } - - @Override - public Collection getRunningTasks() - { - return ImmutableList.copyOf(runningTasks.values()); - } - - @Override - public Collection getPendingTasks() - { - return ImmutableList.copyOf(pendingTasks.values()); - } - - @Override - public Collection getPendingTaskPayloads() - { - // return a snapshot of current pending task payloads. - return ImmutableList.copyOf(pendingTaskPayloads.values()); - } - - @Override - public RemoteTaskRunnerConfig getConfig() - { - return config; - } - - @Override - public Collection getKnownTasks() - { - // Use a map to dedupe tasks, since they may transition from one state to another while this method is iterating - // through the various collections. - final Map items = new LinkedHashMap<>(); - - // Racey, since there is a period of time during assignment when a task is neither pending nor running. - for (RemoteTaskRunnerWorkItem item : pendingTasks.values()) { - items.put(item.getTaskId(), item); - } - - for (RemoteTaskRunnerWorkItem item : runningTasks.values()) { - items.put(item.getTaskId(), item); - } - - for (RemoteTaskRunnerWorkItem item : completeTasks.values()) { - items.put(item.getTaskId(), item); - } - - return ImmutableList.copyOf(items.values()); - } - - @Nullable - @Override - public RunnerTaskState getRunnerTaskState(String taskId) - { - if (pendingTasks.containsKey(taskId)) { - return RunnerTaskState.PENDING; - } - if (runningTasks.containsKey(taskId)) { - return RunnerTaskState.RUNNING; - } - if (completeTasks.containsKey(taskId)) { - return RunnerTaskState.NONE; - } - - return null; - } - - @Override - public TaskLocation getTaskLocation(String taskId) - { - if (pendingTasks.containsKey(taskId)) { - return pendingTasks.get(taskId).getLocation(); - } - if (runningTasks.containsKey(taskId)) { - return runningTasks.get(taskId).getLocation(); - } - if (completeTasks.containsKey(taskId)) { - return completeTasks.get(taskId).getLocation(); - } - - return TaskLocation.unknown(); - } - - @Override - public Optional getScalingStats() - { - return Optional.fromNullable(provisioningService.getStats()); - } - - @Nullable - public ZkWorker findWorkerRunningTask(String taskId) - { - for (ZkWorker zkWorker : zkWorkers.values()) { - if (zkWorker.isRunningTask(taskId)) { - return zkWorker; - } - } - return null; - } - - /** - * Retrieve {@link ZkWorker} based on an ID (host), or null if the ID doesn't exist. - */ - @Nullable - ZkWorker findWorkerId(String workerId) - { - return zkWorkers.get(workerId); - } - - public boolean isWorkerRunningTask(ZkWorker worker, String taskId) - { - return Preconditions.checkNotNull(worker, "worker").isRunningTask(taskId); - } - - /** - * A task will be run only if there is no current knowledge in the RemoteTaskRunner of the task. - * - * @param task task to run - */ - @Override - public ListenableFuture run(final Task task) - { - final RemoteTaskRunnerWorkItem completeTask, runningTask, pendingTask; - if ((pendingTask = pendingTasks.get(task.getId())) != null) { - log.info("Assigned a task[%s] that is already pending!", task.getId()); - runPendingTasks(); - return pendingTask.getResult(); - } else if ((runningTask = runningTasks.get(task.getId())) != null) { - ZkWorker zkWorker = findWorkerRunningTask(task.getId()); - if (zkWorker == null) { - log.warn("Told to run task[%s], but no worker has started running it yet.", task.getId()); - } else { - log.info("Task[%s] already running on %s.", task.getId(), zkWorker.getWorker().getHost()); - TaskAnnouncement announcement = zkWorker.getRunningTasks().get(task.getId()); - if (announcement.getTaskStatus().isComplete()) { - taskComplete(runningTask, zkWorker, announcement.getTaskStatus()); - } - } - return runningTask.getResult(); - } else if ((completeTask = completeTasks.get(task.getId())) != null) { - return completeTask.getResult(); - } else { - RemoteTaskRunnerWorkItem workItem = addPendingTask(task); - runPendingTasks(); - return workItem.getResult(); - } - } - - /** - * Finds the worker running the task and forwards the shutdown signal to the worker. - * - * @param taskId - task id to shutdown - */ - @Override - public void shutdown(final String taskId, String reason) - { - log.info("Shutdown [%s] because: [%s]", taskId, reason); - if (!lifecycleLock.awaitStarted(1, TimeUnit.SECONDS)) { - log.info("This TaskRunner is stopped or not yet started. Ignoring shutdown command for task: %s", taskId); - } else if (pendingTasks.remove(taskId) != null) { - pendingTaskPayloads.remove(taskId); - log.info("Removed task from pending queue: %s", taskId); - } else if (completeTasks.containsKey(taskId)) { - cleanup(taskId); - } else { - final ZkWorker zkWorker = findWorkerRunningTask(taskId); - - if (zkWorker == null) { - log.info("Can't shutdown! No worker running task %s", taskId); - return; - } - URL url = null; - try { - url = TaskRunnerUtils.makeWorkerURL(zkWorker.getWorker(), "/druid/worker/v1/task/%s/shutdown", taskId); - final StatusResponseHolder response = httpClient.go( - new Request(HttpMethod.POST, url), - StatusResponseHandler.getInstance(), - shutdownTimeout - ).get(); - - log.info( - "Sent shutdown message to worker: %s, status %s, response: %s", - zkWorker.getWorker().getHost(), - response.getStatus(), - response.getContent() - ); - - if (!HttpResponseStatus.OK.equals(response.getStatus())) { - log.error("Shutdown failed for %s! Are you sure the task was running?", taskId); - } - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RE(e, "Interrupted posting shutdown to [%s] for task [%s]", url, taskId); - } - catch (Exception e) { - throw new RE(e, "Error in handling post to [%s] for task [%s]", zkWorker.getWorker().getHost(), taskId); - } - } - } - - @Override - public Optional streamTaskLog(final String taskId, final long offset) throws IOException - { - final ZkWorker zkWorker = findWorkerRunningTask(taskId); - - if (zkWorker == null) { - // Worker is not running this task, it might be available in deep storage - return Optional.absent(); - } else { - // Worker is still running this task - final URL url = TaskRunnerUtils.makeWorkerURL( - zkWorker.getWorker(), - "/druid/worker/v1/task/%s/log?offset=%s", - taskId, - Long.toString(offset) - ); - try { - return Optional.of(httpClient.go( - new Request(HttpMethod.GET, url), - new InputStreamResponseHandler() - ).get()); - } - catch (InterruptedException e) { - throw new RuntimeException(e); - } - catch (ExecutionException e) { - // Unwrap if possible - Throwables.propagateIfPossible(e.getCause(), IOException.class); - throw new RuntimeException(e); - } - } - } - - - @Override - public Optional streamTaskReports(final String taskId) throws IOException - { - final ZkWorker zkWorker = findWorkerRunningTask(taskId); - - if (zkWorker == null) { - // Worker is not running this task, it might be available in deep storage - return Optional.absent(); - } - - final RemoteTaskRunnerWorkItem runningWorkItem = runningTasks.get(taskId); - - if (runningWorkItem == null) { - // Worker very recently exited. - return Optional.absent(); - } - - final TaskLocation taskLocation = runningWorkItem.getLocation(); - - if (TaskLocation.unknown().equals(taskLocation)) { - // No location known for this task. It may have not been assigned one yet. - return Optional.absent(); - } - - final URL url = TaskRunnerUtils.makeTaskLocationURL( - taskLocation, - "/druid/worker/v1/chat/%s/liveReports", - taskId - ); - - return TaskRunnerUtils.streamTaskReportsFromTaskLocation(httpClient, url); - } - - - /** - * Adds a task to the pending queue. - * {@link #runPendingTasks()} should be called to run the pending task. - */ - @VisibleForTesting - RemoteTaskRunnerWorkItem addPendingTask(final Task task) - { - log.info("Added pending task %s", task.getId()); - final RemoteTaskRunnerWorkItem taskRunnerWorkItem = new RemoteTaskRunnerWorkItem( - task.getId(), - task.getType(), - null, - null, - task.getDataSource() - ); - pendingTaskPayloads.put(task.getId(), task); - pendingTasks.put(task.getId(), taskRunnerWorkItem); - return taskRunnerWorkItem; - } - - /** - * This method uses a multi-threaded executor to extract all pending tasks and attempt to run them. Any tasks that - * are successfully assigned to a worker will be moved from pendingTasks to runningTasks. This method is thread-safe. - * This method should be run each time there is new worker capacity or if new tasks are assigned. - */ - @VisibleForTesting - void runPendingTasks() - { - runPendingTasksExec.submit( - (Callable) () -> { - try { - // make a copy of the pending tasks because tryAssignTask may delete tasks from pending and move them - // into running status - List copy = Lists.newArrayList(pendingTasks.values()); - sortByInsertionTime(copy); - - for (RemoteTaskRunnerWorkItem taskRunnerWorkItem : copy) { - runPendingTask(taskRunnerWorkItem); - } - } - catch (Exception e) { - log.makeAlert(e, "Exception in running pending tasks").emit(); - } - - return null; - } - ); - } - - /** - * Run one pending task. This method must be called in the same class except for unit tests. - */ - @VisibleForTesting - void runPendingTask(RemoteTaskRunnerWorkItem taskRunnerWorkItem) - { - String taskId = taskRunnerWorkItem.getTaskId(); - if (tryAssignTasks.putIfAbsent(taskId, taskId) == null) { - try { - //this can still be null due to race from explicit task shutdown request - //or if another thread steals and completes this task right after this thread makes copy - //of pending tasks. See https://github.com/apache/druid/issues/2842 . - Task task = pendingTaskPayloads.get(taskId); - if (task != null && tryAssignTask(task, taskRunnerWorkItem)) { - pendingTaskPayloads.remove(taskId); - } - } - catch (Exception e) { - log.makeAlert(e, "Exception while trying to assign task") - .addData("taskId", taskRunnerWorkItem.getTaskId()) - .emit(); - RemoteTaskRunnerWorkItem workItem = pendingTasks.remove(taskId); - if (workItem != null) { - taskComplete( - workItem, - null, - TaskStatus.failure( - taskId, - StringUtils.format("Failed to assign this task. See overlord logs for more details.") - ) - ); - } - } - finally { - tryAssignTasks.remove(taskId); - } - } - } - - @VisibleForTesting - static void sortByInsertionTime(List tasks) - { - Collections.sort(tasks, Comparator.comparing(RemoteTaskRunnerWorkItem::getQueueInsertionTime)); - } - - /** - * Removes a task from the complete queue and clears out the ZK status path of the task. - * - * @param taskId - the task to cleanup - */ - private void cleanup(final String taskId) - { - if (!lifecycleLock.awaitStarted(1, TimeUnit.SECONDS)) { - return; - } - final RemoteTaskRunnerWorkItem removed = completeTasks.remove(taskId); - final Worker worker; - if (removed == null || (worker = removed.getWorker()) == null) { - log.makeAlert("Asked to cleanup nonexistent task") - .addData("taskId", taskId) - .emit(); - } else { - final String workerId = worker.getHost(); - log.info("Cleaning up task[%s] on worker[%s]", taskId, workerId); - final String statusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId, taskId); - try { - cf.delete().guaranteed().forPath(statusPath); - } - catch (KeeperException.NoNodeException e) { - log.info("Tried to delete status path[%s] that didn't exist! Must've gone away already?", statusPath); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - } - - /** - * Ensures no workers are already running a task before assigning the task to a worker. - * It is possible that a worker is running a task that the RTR has no knowledge of. This occurs when the RTR - * needs to bootstrap after a restart. - * - * @param taskRunnerWorkItem - the task to assign - * @return true iff the task is now assigned - */ - private boolean tryAssignTask(final Task task, final RemoteTaskRunnerWorkItem taskRunnerWorkItem) throws Exception - { - Preconditions.checkNotNull(task, "task"); - Preconditions.checkNotNull(taskRunnerWorkItem, "taskRunnerWorkItem"); - Preconditions.checkArgument(task.getId().equals(taskRunnerWorkItem.getTaskId()), "task id != workItem id"); - - if (runningTasks.containsKey(task.getId()) || findWorkerRunningTask(task.getId()) != null) { - log.info("Task[%s] already running.", task.getId()); - return true; - } else { - // Nothing running this task, announce it in ZK for a worker to run it - WorkerBehaviorConfig workerConfig = workerConfigRef.get(); - WorkerSelectStrategy strategy; - if (workerConfig == null || workerConfig.getSelectStrategy() == null) { - strategy = WorkerBehaviorConfig.DEFAULT_STRATEGY; - log.debug("No worker selection strategy set. Using default of [%s]", strategy.getClass().getSimpleName()); - } else { - strategy = workerConfig.getSelectStrategy(); - } - - ZkWorker assignedWorker = null; - final ImmutableWorkerInfo immutableZkWorker; - try { - synchronized (workersWithUnacknowledgedTask) { - immutableZkWorker = strategy.findWorkerForTask( - config, - ImmutableMap.copyOf(getWorkersEligibleToRunTasks()), - task - ); - - if (immutableZkWorker != null && - workersWithUnacknowledgedTask.putIfAbsent(immutableZkWorker.getWorker().getHost(), task.getId()) - == null) { - assignedWorker = zkWorkers.get(immutableZkWorker.getWorker().getHost()); - } - } - - if (assignedWorker != null) { - return announceTask(task, assignedWorker, taskRunnerWorkItem); - } else { - log.debug( - "Unsuccessful task-assign attempt for task [%s] on workers [%s]. Workers to ack tasks are [%s].", - task.getId(), - zkWorkers.values(), - workersWithUnacknowledgedTask - ); - } - - return false; - } - finally { - if (assignedWorker != null) { - workersWithUnacknowledgedTask.remove(assignedWorker.getWorker().getHost()); - //if this attempt won the race to run the task then other task might be able to use this worker now after task ack. - runPendingTasks(); - } - } - } - } - - Map getWorkersEligibleToRunTasks() - { - return Maps.transformEntries( - Maps.filterEntries( - zkWorkers, - input -> !lazyWorkers.containsKey(input.getKey()) && - !workersWithUnacknowledgedTask.containsKey(input.getKey()) && - !blackListedWorkers.contains(input.getValue()) - ), - (String key, ZkWorker value) -> value.toImmutable() - ); - } - - /** - * Creates a ZK entry under a specific path associated with a worker. The worker is responsible for - * removing the task ZK entry and creating a task status ZK entry. - * - * @param theZkWorker The worker the task is assigned to - * @param taskRunnerWorkItem The task to be assigned - * @return boolean indicating whether the task was successfully assigned or not - */ - private boolean announceTask( - final Task task, - final ZkWorker theZkWorker, - final RemoteTaskRunnerWorkItem taskRunnerWorkItem - ) throws Exception - { - Preconditions.checkArgument(task.getId().equals(taskRunnerWorkItem.getTaskId()), "task id != workItem id"); - final String worker = theZkWorker.getWorker().getHost(); - synchronized (statusLock) { - if (!zkWorkers.containsKey(worker) || lazyWorkers.containsKey(worker)) { - // the worker might have been killed or marked as lazy - log.debug("Not assigning task to already removed worker[%s]", worker); - return false; - } - log.info("Assigning task [%s] to worker [%s]", task.getId(), worker); - - CuratorUtils.createIfNotExists( - cf, - JOINER.join(indexerZkConfig.getTasksPath(), worker, task.getId()), - CreateMode.EPHEMERAL, - jsonMapper.writeValueAsBytes(task), - config.getMaxZnodeBytes() - ); - - RemoteTaskRunnerWorkItem workItem = pendingTasks.remove(task.getId()); - if (workItem == null) { - log.makeAlert("Ignoring null work item from pending task queue") - .addData("taskId", task.getId()) - .emit(); - return false; - } - - final ServiceMetricEvent.Builder metricBuilder = new ServiceMetricEvent.Builder(); - IndexTaskUtils.setTaskDimensions(metricBuilder, task); - emitter.emit(metricBuilder.setMetric( - "task/pending/time", - new Duration(workItem.getQueueInsertionTime(), DateTimes.nowUtc()).getMillis()) - ); - - RemoteTaskRunnerWorkItem newWorkItem = workItem.withWorker(theZkWorker.getWorker(), null); - runningTasks.put(task.getId(), newWorkItem); - log.info("Task [%s] started running on worker [%s]", task.getId(), newWorkItem.getWorker().getHost()); - TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.running(task.getId())); - - // Syncing state with Zookeeper - don't assign new tasks until the task we just assigned is actually running - // on a worker - this avoids overflowing a worker with tasks - Stopwatch timeoutStopwatch = Stopwatch.createStarted(); - while (!isWorkerRunningTask(theZkWorker, task.getId())) { - final long waitMs = config.getTaskAssignmentTimeout().toStandardDuration().getMillis(); - statusLock.wait(waitMs); - long elapsed = timeoutStopwatch.elapsed(TimeUnit.MILLISECONDS); - if (elapsed >= waitMs) { - log.makeAlert( - "Task assignment timed out on worker [%s], never ran task [%s]! Timeout: (%s >= %s)!", - worker, - task.getId(), - elapsed, - config.getTaskAssignmentTimeout() - ).emit(); - taskComplete( - taskRunnerWorkItem, - theZkWorker, - TaskStatus.failure( - task.getId(), - StringUtils.format( - "The worker that this task is assigned did not start it in timeout[%s]. " - + "See overlord logs for more details.", - config.getTaskAssignmentTimeout() - ) - ) - ); - break; - } - } - return true; - } - } - - private boolean cancelWorkerCleanup(String workerHost) - { - ScheduledFuture previousCleanup = removedWorkerCleanups.remove(workerHost); - if (previousCleanup != null) { - log.info("Cancelling Worker[%s] scheduled task cleanup", workerHost); - previousCleanup.cancel(false); - } - return previousCleanup != null; - } - - /** - * When a new worker appears, listeners are registered for status changes associated with tasks assigned to - * the worker. Status changes indicate the creation or completion of a task. - * The RemoteTaskRunner updates state according to these changes. - * - * @param worker contains metadata for a worker that has appeared in ZK - * @return future that will contain a fully initialized worker - */ - private ListenableFuture addWorker(final Worker worker) - { - log.info("Worker[%s] reportin' for duty!", worker.getHost()); - - try { - cancelWorkerCleanup(worker.getHost()); - - final String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), worker.getHost()); - final PathChildrenCache statusCache = workerStatusPathChildrenCacheFactory.make(cf, workerStatusPath); - final SettableFuture retVal = SettableFuture.create(); - final ZkWorker zkWorker = new ZkWorker( - worker, - statusCache, - jsonMapper - ); - - // Add status listener to the watcher for status changes - zkWorker.addListener(getStatusListener(worker, zkWorker, retVal)); - zkWorker.start(); - return retVal; - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - - @VisibleForTesting - PathChildrenCacheListener getStatusListener(final Worker worker, final ZkWorker zkWorker, final SettableFuture retVal) - { - return (client, event) -> { - final String taskId; - final RemoteTaskRunnerWorkItem taskRunnerWorkItem; - synchronized (statusLock) { - try { - switch (event.getType()) { - case CHILD_ADDED: - case CHILD_UPDATED: - if (event.getData() == null) { - log.error("Unexpected null for event.getData() in handle new worker status for [%s]", event.getType().toString()); - log.makeAlert("Unexpected null for event.getData() in handle new worker status") - .addData("worker", zkWorker.getWorker().getHost()) - .addData("eventType", event.getType().toString()) - .emit(); - return; - } - taskId = ZKPaths.getNodeFromPath(event.getData().getPath()); - final TaskAnnouncement announcement = jsonMapper.readValue( - event.getData().getData(), TaskAnnouncement.class - ); - - log.info( - "Worker[%s] wrote %s status for task [%s] on [%s]", - zkWorker.getWorker().getHost(), - announcement.getTaskStatus().getStatusCode(), - taskId, - announcement.getTaskLocation() - ); - - // Synchronizing state with ZK - statusLock.notifyAll(); - - final RemoteTaskRunnerWorkItem tmp; - if ((tmp = runningTasks.get(taskId)) != null) { - taskRunnerWorkItem = tmp; - } else { - final RemoteTaskRunnerWorkItem newTaskRunnerWorkItem = new RemoteTaskRunnerWorkItem( - taskId, - announcement.getTaskType(), - zkWorker.getWorker(), - TaskLocation.unknown(), - announcement.getTaskDataSource() - ); - final RemoteTaskRunnerWorkItem existingItem = runningTasks.putIfAbsent( - taskId, - newTaskRunnerWorkItem - ); - if (existingItem == null) { - log.warn( - "Worker[%s] announced a status for a task I didn't know about, adding to runningTasks: %s", - zkWorker.getWorker().getHost(), - taskId - ); - taskRunnerWorkItem = newTaskRunnerWorkItem; - } else { - taskRunnerWorkItem = existingItem; - } - } - - if (!announcement.getTaskLocation().equals(taskRunnerWorkItem.getLocation())) { - taskRunnerWorkItem.setLocation(announcement.getTaskLocation()); - TaskRunnerUtils.notifyLocationChanged(listeners, taskId, announcement.getTaskLocation()); - } - - if (announcement.getTaskStatus().isComplete()) { - taskComplete(taskRunnerWorkItem, zkWorker, announcement.getTaskStatus()); - runPendingTasks(); - } - break; - case CHILD_REMOVED: - if (event.getData() == null) { - log.error("Unexpected null for event.getData() in handle new worker status for [%s]", event.getType().toString()); - log.makeAlert("Unexpected null for event.getData() in handle new worker status") - .addData("worker", zkWorker.getWorker().getHost()) - .addData("eventType", event.getType().toString()) - .emit(); - return; - } - taskId = ZKPaths.getNodeFromPath(event.getData().getPath()); - taskRunnerWorkItem = runningTasks.remove(taskId); - if (taskRunnerWorkItem != null) { - log.warn("Task[%s] just disappeared!", taskId); - final TaskStatus taskStatus = TaskStatus.failure( - taskId, - "The worker that this task was assigned disappeared. See overlord logs for more details." - ); - taskRunnerWorkItem.setResult(taskStatus); - TaskRunnerUtils.notifyStatusChanged(listeners, taskId, taskStatus); - } else { - log.info("Task[%s] went bye bye.", taskId); - } - break; - case INITIALIZED: - if (zkWorkers.putIfAbsent(worker.getHost(), zkWorker) == null) { - retVal.set(zkWorker); - } else { - final String message = StringUtils.format( - "This should not happen...tried to add already-existing worker[%s]", - worker.getHost() - ); - log.makeAlert(message) - .addData("workerHost", worker.getHost()) - .addData("workerIp", worker.getIp()) - .emit(); - retVal.setException(new IllegalStateException(message)); - } - runPendingTasks(); - break; - case CONNECTION_SUSPENDED: - case CONNECTION_RECONNECTED: - case CONNECTION_LOST: - // do nothing - } - } - catch (Exception e) { - String znode = null; - if (event.getData() != null) { - znode = event.getData().getPath(); - } - log.makeAlert(e, "Failed to handle new worker status") - .addData("worker", zkWorker.getWorker().getHost()) - .addData("znode", znode) - .addData("eventType", event.getType().toString()) - .emit(); - } - } - }; - } - - /** - * We allow workers to change their own capacities and versions. They cannot change their own hosts or ips without - * dropping themselves and re-announcing. - */ - private void updateWorker(final Worker worker) - { - final ZkWorker zkWorker = zkWorkers.get(worker.getHost()); - if (zkWorker != null) { - log.info("Worker[%s] updated its announcement from[%s] to[%s].", worker.getHost(), zkWorker.getWorker(), worker); - zkWorker.setWorker(worker); - } else { - log.warn( - "Worker[%s] updated its announcement but we didn't have a ZkWorker for it. Ignoring.", - worker.getHost() - ); - } - } - - /** - * When a ephemeral worker node disappears from ZK, incomplete running tasks will be retried by - * the logic in the status listener. We still have to make sure there are no tasks assigned - * to the worker but not yet running. - * - * @param worker - the removed worker - */ - private void removeWorker(final Worker worker) - { - log.info("Kaboom! Worker[%s] removed!", worker.getHost()); - - final ZkWorker zkWorker = zkWorkers.get(worker.getHost()); - if (zkWorker != null) { - try { - scheduleTasksCleanupForWorker(worker.getHost(), getAssignedTasks(worker)); - } - catch (Exception e) { - throw new RuntimeException(e); - } - finally { - try { - zkWorker.close(); - } - catch (Exception e) { - log.error(e, "Exception closing worker[%s]!", worker.getHost()); - } - zkWorkers.remove(worker.getHost()); - checkBlackListedNodes(); - } - } - lazyWorkers.remove(worker.getHost()); - } - - /** - * Schedule a task that will, at some point in the future, clean up znodes and issue failures for "tasksToFail" - * if they are being run by "worker". - */ - private void scheduleTasksCleanupForWorker(final String worker, final List tasksToFail) - { - // This method is only called from the PathChildrenCache event handler, so this may look like a race, - // but is actually not. - cancelWorkerCleanup(worker); - - final ListenableScheduledFuture cleanupTask = cleanupExec.schedule( - () -> { - log.info("Running scheduled cleanup for Worker[%s]", worker); - try { - for (String assignedTask : tasksToFail) { - String taskPath = JOINER.join(indexerZkConfig.getTasksPath(), worker, assignedTask); - String statusPath = JOINER.join(indexerZkConfig.getStatusPath(), worker, assignedTask); - if (cf.checkExists().forPath(taskPath) != null) { - cf.delete().guaranteed().forPath(taskPath); - } - - if (cf.checkExists().forPath(statusPath) != null) { - cf.delete().guaranteed().forPath(statusPath); - } - - log.info("Failing task[%s]", assignedTask); - RemoteTaskRunnerWorkItem taskRunnerWorkItem = runningTasks.remove(assignedTask); - if (taskRunnerWorkItem != null) { - final TaskStatus taskStatus = TaskStatus.failure( - assignedTask, - StringUtils.format("Canceled for worker cleanup. See overlord logs for more details.") - ); - taskRunnerWorkItem.setResult(taskStatus); - TaskRunnerUtils.notifyStatusChanged(listeners, assignedTask, taskStatus); - } else { - log.warn("RemoteTaskRunner has no knowledge of task[%s]", assignedTask); - } - } - - // worker is gone, remove worker task status announcements path. - String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), worker); - if (cf.checkExists().forPath(workerStatusPath) != null) { - cf.delete().guaranteed().forPath(JOINER.join(indexerZkConfig.getStatusPath(), worker)); - } - } - catch (Exception e) { - log.makeAlert("Exception while cleaning up worker[%s]", worker).emit(); - throw new RuntimeException(e); - } - }, - config.getTaskCleanupTimeout().toStandardDuration().getMillis(), - TimeUnit.MILLISECONDS - ); - - removedWorkerCleanups.put(worker, cleanupTask); - - // Remove this entry from removedWorkerCleanups when done, if it's actually the one in there. - Futures.addCallback( - cleanupTask, - new FutureCallback() - { - @Override - public void onSuccess(Object result) - { - removedWorkerCleanups.remove(worker, cleanupTask); - } - - @Override - public void onFailure(Throwable t) - { - removedWorkerCleanups.remove(worker, cleanupTask); - } - }, - MoreExecutors.directExecutor() - ); - } - - private void taskComplete( - RemoteTaskRunnerWorkItem taskRunnerWorkItem, - @Nullable ZkWorker zkWorker, - TaskStatus taskStatus - ) - { - Preconditions.checkNotNull(taskRunnerWorkItem, "taskRunnerWorkItem"); - Preconditions.checkNotNull(taskStatus, "taskStatus"); - if (zkWorker != null) { - log.info( - "Worker[%s] completed task[%s] with status[%s]", - zkWorker.getWorker().getHost(), - taskStatus.getId(), - taskStatus.getStatusCode() - ); - // Worker is done with this task - zkWorker.setLastCompletedTaskTime(DateTimes.nowUtc()); - } else { - log.info("Workerless task[%s] completed with status[%s]", taskStatus.getId(), taskStatus.getStatusCode()); - } - - // Move from running -> complete - // If the task was running and this is the first complete event, - // previousComplete should be null and removedRunning should not. - final RemoteTaskRunnerWorkItem previousComplete = completeTasks.put(taskStatus.getId(), taskRunnerWorkItem); - final RemoteTaskRunnerWorkItem removedRunning = runningTasks.remove(taskStatus.getId()); - - if (previousComplete != null && removedRunning != null) { - log.warn( - "This is not the first complete event for task[%s], but it was still known as running. " - + "Ignoring the previously known running status.", - taskStatus.getId() - ); - } - - if (previousComplete != null) { - // This is not the first complete event for the same task. - try { - // getResult().get() must return immediately. - TaskState lastKnownState = previousComplete.getResult().get(1, TimeUnit.MILLISECONDS).getStatusCode(); - if (taskStatus.getStatusCode() != lastKnownState) { - log.warn( - "The state of the new task complete event is different from its last known state. " - + "New state[%s], last known state[%s]", - taskStatus.getStatusCode(), - lastKnownState - ); - } - } - catch (InterruptedException e) { - log.warn(e, "Interrupted while getting the last known task status."); - Thread.currentThread().interrupt(); - } - catch (ExecutionException | TimeoutException e) { - // This case should not really happen. - log.warn(e, "Failed to get the last known task status. Ignoring this failure."); - } - } else { - // This is the first complete event for this task. - // Update success/failure counters - if (zkWorker != null) { - if (taskStatus.isSuccess()) { - zkWorker.resetContinuouslyFailedTasksCount(); - if (blackListedWorkers.remove(zkWorker)) { - zkWorker.setBlacklistedUntil(null); - log.info("[%s] removed from blacklist because a task finished with SUCCESS", zkWorker.getWorker()); - } - } else if (taskStatus.isFailure()) { - zkWorker.incrementContinuouslyFailedTasksCount(); - } - - // Blacklist node if there are too many failures. - synchronized (blackListedWorkers) { - if (zkWorker.getContinuouslyFailedTasksCount() > config.getMaxRetriesBeforeBlacklist() && - blackListedWorkers.size() <= zkWorkers.size() * (config.getMaxPercentageBlacklistWorkers() / 100.0) - 1) { - zkWorker.setBlacklistedUntil(DateTimes.nowUtc().plus(config.getWorkerBlackListBackoffTime())); - if (blackListedWorkers.add(zkWorker)) { - log.info( - "Blacklisting [%s] until [%s] after [%,d] failed tasks in a row.", - zkWorker.getWorker(), - zkWorker.getBlacklistedUntil(), - zkWorker.getContinuouslyFailedTasksCount() - ); - } - } - } - } - - // Notify interested parties - taskRunnerWorkItem.setResult(taskStatus); - TaskRunnerUtils.notifyStatusChanged(listeners, taskStatus.getId(), taskStatus); - } - } - - @Override - public Collection markWorkersLazy(Predicate isLazyWorker, int maxLazyWorkers) - { - // skip the lock and bail early if we should not mark any workers lazy (e.g. number - // of current workers is at or below the minNumWorkers of autoscaler config) - if (lazyWorkers.size() >= maxLazyWorkers) { - return getLazyWorkers(); - } - - // Search for new workers to mark lazy. - // Status lock is used to prevent any tasks being assigned to workers while we mark them lazy - synchronized (statusLock) { - for (Map.Entry worker : zkWorkers.entrySet()) { - if (lazyWorkers.size() >= maxLazyWorkers) { - break; - } - final ZkWorker zkWorker = worker.getValue(); - try { - if (getAssignedTasks(zkWorker.getWorker()).isEmpty() && isLazyWorker.apply(zkWorker.toImmutable())) { - log.info("Adding Worker[%s] to lazySet!", zkWorker.getWorker().getHost()); - lazyWorkers.put(worker.getKey(), zkWorker); - } - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - } - - return getLazyWorkers(); - } - - protected List getAssignedTasks(Worker worker) throws Exception - { - final List assignedTasks = Lists.newArrayList( - cf.getChildren().forPath(JOINER.join(indexerZkConfig.getTasksPath(), worker.getHost())) - ); - - for (Map.Entry entry : runningTasks.entrySet()) { - if (entry.getValue() == null) { - log.error( - "Huh? null work item for [%s]", - entry.getKey() - ); - } else if (entry.getValue().getWorker() == null) { - log.error("Huh? no worker for [%s]", entry.getKey()); - } else if (entry.getValue().getWorker().getHost().equalsIgnoreCase(worker.getHost())) { - log.info("[%s]: Found [%s] running", worker.getHost(), entry.getKey()); - assignedTasks.add(entry.getKey()); - } - } - log.info("[%s]: Found %d tasks assigned", worker.getHost(), assignedTasks.size()); - return assignedTasks; - } - - @Override - public Collection getLazyWorkers() - { - return getWorkerFromZK(lazyWorkers.values()); - } - - private static ImmutableList getImmutableWorkerFromZK(Collection workers) - { - return ImmutableList.copyOf(Collections2.transform(workers, ZkWorker::toImmutable)); - } - - private static ImmutableList getWorkerFromZK(Collection workers) - { - return ImmutableList.copyOf(Collections2.transform(workers, ZkWorker::getWorker)); - } - - public Collection getBlackListedWorkers() - { - synchronized (blackListedWorkers) { - return getImmutableWorkerFromZK(blackListedWorkers); - } - } - - private boolean shouldRemoveNodeFromBlackList(ZkWorker zkWorker) - { - if (blackListedWorkers.size() > zkWorkers.size() * (config.getMaxPercentageBlacklistWorkers() / 100.0)) { - log.info( - "Removing [%s] from blacklist because percentage of blacklisted workers exceeds [%d]", - zkWorker.getWorker(), - config.getMaxPercentageBlacklistWorkers() - ); - - return true; - } - - long remainingMillis = zkWorker.getBlacklistedUntil().getMillis() - getCurrentTimeMillis(); - if (remainingMillis <= 0) { - log.info("Removing [%s] from blacklist because backoff time elapsed", zkWorker.getWorker()); - return true; - } - - log.info("[%s] still blacklisted for [%,ds]", zkWorker.getWorker(), remainingMillis / 1000); - return false; - } - - @VisibleForTesting - void checkBlackListedNodes() - { - boolean shouldRunPendingTasks = false; - - // must be synchronized while iterating: - // https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Collections.html#synchronizedSet(java.util.Set) - synchronized (blackListedWorkers) { - for (Iterator iterator = blackListedWorkers.iterator(); iterator.hasNext(); ) { - ZkWorker zkWorker = iterator.next(); - if (shouldRemoveNodeFromBlackList(zkWorker)) { - iterator.remove(); - zkWorker.resetContinuouslyFailedTasksCount(); - zkWorker.setBlacklistedUntil(null); - shouldRunPendingTasks = true; - } - } - } - - if (shouldRunPendingTasks) { - runPendingTasks(); - } - } - - @VisibleForTesting - protected long getCurrentTimeMillis() - { - return System.currentTimeMillis(); - } - - @VisibleForTesting - ConcurrentMap getRemovedWorkerCleanups() - { - return removedWorkerCleanups; - } - - @VisibleForTesting - RemoteTaskRunnerConfig getRemoteTaskRunnerConfig() - { - return config; - } - - @VisibleForTesting - Map getWorkersWithUnacknowledgedTask() - { - return workersWithUnacknowledgedTask; - } - - @VisibleForTesting - ProvisioningStrategy getProvisioningStrategy() - { - return provisioningStrategy; - } - - @Override - public Map getTotalTaskSlotCount() - { - Map totalPeons = new HashMap<>(); - for (ImmutableWorkerInfo worker : getWorkers()) { - String workerCategory = worker.getWorker().getCategory(); - int workerCapacity = worker.getWorker().getCapacity(); - totalPeons.compute( - workerCategory, - (category, totalCapacity) -> totalCapacity == null ? workerCapacity : totalCapacity + workerCapacity - ); - } - - return totalPeons; - } - - @Override - public Map getIdleTaskSlotCount() - { - Map totalIdlePeons = new HashMap<>(); - for (ImmutableWorkerInfo worker : getWorkersEligibleToRunTasks().values()) { - String workerCategory = worker.getWorker().getCategory(); - int workerAvailableCapacity = worker.getAvailableCapacity(); - totalIdlePeons.compute( - workerCategory, - (category, availableCapacity) -> availableCapacity == null ? workerAvailableCapacity : availableCapacity + workerAvailableCapacity - ); - } - - return totalIdlePeons; - } - - @Override - public Map getUsedTaskSlotCount() - { - Map totalUsedPeons = new HashMap<>(); - for (ImmutableWorkerInfo worker : getWorkers()) { - String workerCategory = worker.getWorker().getCategory(); - int workerUsedCapacity = worker.getCurrCapacityUsed(); - totalUsedPeons.compute( - workerCategory, - (category, usedCapacity) -> usedCapacity == null ? workerUsedCapacity : usedCapacity + workerUsedCapacity - ); - } - - return totalUsedPeons; - } - - @Override - public Map getLazyTaskSlotCount() - { - Map totalLazyPeons = new HashMap<>(); - for (Worker worker : getLazyWorkers()) { - String workerCategory = worker.getCategory(); - int workerLazyPeons = worker.getCapacity(); - totalLazyPeons.compute( - workerCategory, - (category, lazyPeons) -> lazyPeons == null ? workerLazyPeons : lazyPeons + workerLazyPeons - ); - } - - return totalLazyPeons; - } - - @Override - public Map getBlacklistedTaskSlotCount() - { - Map totalBlacklistedPeons = new HashMap<>(); - for (ImmutableWorkerInfo worker : getBlackListedWorkers()) { - String workerCategory = worker.getWorker().getCategory(); - int workerBlacklistedPeons = worker.getWorker().getCapacity(); - totalBlacklistedPeons.compute( - workerCategory, - (category, blacklistedPeons) -> blacklistedPeons == null ? workerBlacklistedPeons : blacklistedPeons + workerBlacklistedPeons - ); - } - - return totalBlacklistedPeons; - } - - @Override - public int getTotalCapacity() - { - return getWorkers().stream().mapToInt(workerInfo -> workerInfo.getWorker().getCapacity()).sum(); - } - - /** - * Retrieves the maximum capacity of the task runner when autoscaling is enabled.* - * @return The maximum capacity as an integer value. Returns -1 if the maximum - * capacity cannot be determined or if autoscaling is not enabled. - */ - @Override - public int getMaximumCapacityWithAutoscale() - { - int maximumCapacity = -1; - WorkerBehaviorConfig workerBehaviorConfig = workerConfigRef.get(); - if (workerBehaviorConfig == null) { - // Auto scale not setup - log.debug("Cannot calculate maximum worker capacity as worker behavior config is not configured"); - maximumCapacity = -1; - } else if (workerBehaviorConfig instanceof DefaultWorkerBehaviorConfig) { - DefaultWorkerBehaviorConfig defaultWorkerBehaviorConfig = (DefaultWorkerBehaviorConfig) workerBehaviorConfig; - if (defaultWorkerBehaviorConfig.getAutoScaler() == null) { - // Auto scale not setup - log.debug("Cannot calculate maximum worker capacity as auto scaler not configured"); - maximumCapacity = -1; - } else { - int maxWorker = defaultWorkerBehaviorConfig.getAutoScaler().getMaxNumWorkers(); - int expectedWorkerCapacity = provisioningStrategy.getExpectedWorkerCapacity(getWorkers()); - maximumCapacity = expectedWorkerCapacity == -1 ? -1 : maxWorker * expectedWorkerCapacity; - } - } - return maximumCapacity; - } - - @Override - public int getUsedCapacity() - { - return getWorkers().stream().mapToInt(ImmutableWorkerInfo::getCurrCapacityUsed).sum(); - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactory.java deleted file mode 100644 index a455c40cb4ac..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactory.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Supplier; -import com.google.inject.Inject; -import org.apache.curator.framework.CuratorFramework; -import org.apache.druid.curator.cache.PathChildrenCacheFactory; -import org.apache.druid.guice.annotations.EscalatedGlobal; -import org.apache.druid.indexing.overlord.autoscaling.NoopProvisioningStrategy; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningSchedulerConfig; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningStrategy; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.java.util.http.client.HttpClient; -import org.apache.druid.server.initialization.IndexerZkConfig; - -/** - */ -public class RemoteTaskRunnerFactory implements TaskRunnerFactory -{ - public static final String TYPE_NAME = "remote"; - private final CuratorFramework curator; - private final RemoteTaskRunnerConfig remoteTaskRunnerConfig; - private final IndexerZkConfig zkPaths; - private final ObjectMapper jsonMapper; - private final HttpClient httpClient; - private final Supplier workerConfigRef; - private final ProvisioningSchedulerConfig provisioningSchedulerConfig; - private final ProvisioningStrategy provisioningStrategy; - private final ServiceEmitter emitter; - private RemoteTaskRunner runner; - - @Inject - public RemoteTaskRunnerFactory( - final CuratorFramework curator, - final RemoteTaskRunnerConfig remoteTaskRunnerConfig, - final IndexerZkConfig zkPaths, - final ObjectMapper jsonMapper, - @EscalatedGlobal final HttpClient httpClient, - final Supplier workerConfigRef, - final ProvisioningSchedulerConfig provisioningSchedulerConfig, - final ProvisioningStrategy provisioningStrategy, - final ServiceEmitter emitter - ) - { - this.curator = curator; - this.remoteTaskRunnerConfig = remoteTaskRunnerConfig; - this.zkPaths = zkPaths; - this.jsonMapper = jsonMapper; - this.httpClient = httpClient; - this.workerConfigRef = workerConfigRef; - this.provisioningSchedulerConfig = provisioningSchedulerConfig; - this.provisioningStrategy = provisioningStrategy; - this.emitter = emitter; - } - - @Override - public RemoteTaskRunner build() - { - runner = new RemoteTaskRunner( - jsonMapper, - remoteTaskRunnerConfig, - zkPaths, - curator, - new PathChildrenCacheFactory.Builder().withCompressed(true), - httpClient, - workerConfigRef, - provisioningSchedulerConfig.isDoAutoscale() ? provisioningStrategy : new NoopProvisioningStrategy<>(), - emitter - ); - return runner; - } - - @Override - public RemoteTaskRunner get() - { - return runner; - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ZkWorker.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ZkWorker.java deleted file mode 100644 index 21ed55115418..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ZkWorker.java +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonToken; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import org.apache.curator.framework.recipes.cache.ChildData; -import org.apache.curator.framework.recipes.cache.PathChildrenCache; -import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; -import org.apache.druid.annotations.UsedInGeneratedCode; -import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask; -import org.apache.druid.indexing.worker.TaskAnnouncement; -import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.jackson.JacksonUtils; -import org.joda.time.DateTime; - -import java.io.Closeable; -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.Collectors; - -/** - * Holds information about a worker and a listener for task status changes associated with the worker. - */ -public class ZkWorker implements Closeable -{ - private final PathChildrenCache statusCache; - private final Function cacheConverter; - private final java.util.function.Function taskIdExtractor; - - private AtomicReference worker; - private AtomicReference lastCompletedTaskTime = new AtomicReference<>(DateTimes.nowUtc()); - private AtomicReference blacklistedUntil = new AtomicReference<>(); - private AtomicInteger continuouslyFailedTasksCount = new AtomicInteger(0); - - public ZkWorker(Worker worker, PathChildrenCache statusCache, final ObjectMapper jsonMapper) - { - this.worker = new AtomicReference<>(worker); - this.statusCache = statusCache; - this.cacheConverter = (ChildData input) -> - JacksonUtils.readValue(jsonMapper, input.getData(), TaskAnnouncement.class); - this.taskIdExtractor = createTaskIdExtractor(jsonMapper); - } - - static java.util.function.Function createTaskIdExtractor(final ObjectMapper jsonMapper) - { - return (ChildData input) -> { - try (JsonParser parser = jsonMapper.getFactory().createParser(input.getData())) { - while (parser.nextToken() != JsonToken.END_OBJECT) { - String currentName = parser.getCurrentName(); - if (currentName == null) { - continue; - } - - switch (currentName) { - case TaskAnnouncement.TASK_ID_KEY: - parser.nextToken(); - return parser.getValueAsString(); - default: - parser.skipChildren(); - } - } - return null; - } - catch (IOException e) { - throw new RuntimeException(e); - } - }; - } - - public void start() throws Exception - { - statusCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT); - } - - public void addListener(PathChildrenCacheListener listener) - { - statusCache.getListenable().addListener(listener); - } - - @JsonProperty("worker") - public Worker getWorker() - { - return worker.get(); - } - - @JsonProperty("runningTasks") - public Collection getRunningTaskIds() - { - return statusCache.getCurrentData() - .stream() - .map(taskIdExtractor) - .collect(Collectors.toSet()); - } - - public Map getRunningTasks() - { - Map retVal = new HashMap<>(); - for (TaskAnnouncement taskAnnouncement : Lists.transform( - statusCache.getCurrentData(), - cacheConverter - )) { - retVal.put(taskAnnouncement.getTaskStatus().getId(), taskAnnouncement); - } - - return retVal; - } - - @JsonProperty("currCapacityUsed") - public int getCurrCapacityUsed() - { - return getCurrCapacityUsed(getRunningTasks()); - } - - private static int getCurrCapacityUsed(Map tasks) - { - int currCapacity = 0; - for (TaskAnnouncement taskAnnouncement : tasks.values()) { - currCapacity += taskAnnouncement.getTaskResource().getRequiredCapacity(); - } - return currCapacity; - } - - @JsonProperty("currParallelIndexCapacityUsed") - public int getCurrParallelIndexCapacityUsed() - { - return getCurrParallelIndexCapacityUsed(getRunningTasks()); - } - - private int getCurrParallelIndexCapacityUsed(Map tasks) - { - int currParallelIndexCapacityUsed = 0; - for (TaskAnnouncement taskAnnouncement : tasks.values()) { - if (taskAnnouncement.getTaskType().equals(ParallelIndexSupervisorTask.TYPE)) { - currParallelIndexCapacityUsed += taskAnnouncement.getTaskResource().getRequiredCapacity(); - } - } - return currParallelIndexCapacityUsed; - } - - @JsonProperty("availabilityGroups") - public Set getAvailabilityGroups() - { - return getAvailabilityGroups(getRunningTasks()); - } - - private static Set getAvailabilityGroups(Map tasks) - { - Set retVal = new HashSet<>(); - for (TaskAnnouncement taskAnnouncement : tasks.values()) { - retVal.add(taskAnnouncement.getTaskResource().getAvailabilityGroup()); - } - return retVal; - } - - @JsonProperty - public DateTime getLastCompletedTaskTime() - { - return lastCompletedTaskTime.get(); - } - - @JsonProperty - public DateTime getBlacklistedUntil() - { - return blacklistedUntil.get(); - } - - public boolean isRunningTask(String taskId) - { - return statusCache.getCurrentData() - .stream() - .map(taskIdExtractor) - .anyMatch((String s) -> taskId.equals(s)); - } - - @UsedInGeneratedCode // See JavaScriptWorkerSelectStrategyTest - public boolean isValidVersion(String minVersion) - { - final Worker w = worker.get(); - return !w.isDisabled() && w.getVersion().compareTo(minVersion) >= 0; - } - - public void setWorker(Worker newWorker) - { - final Worker oldWorker = worker.get(); - Preconditions.checkArgument(newWorker.getHost().equals(oldWorker.getHost()), "Cannot change Worker host"); - Preconditions.checkArgument(newWorker.getIp().equals(oldWorker.getIp()), "Cannot change Worker ip"); - - worker.set(newWorker); - } - - public void setLastCompletedTaskTime(DateTime completedTaskTime) - { - lastCompletedTaskTime.set(completedTaskTime); - } - - public void setBlacklistedUntil(DateTime blacklistedUntil) - { - this.blacklistedUntil.set(blacklistedUntil); - } - - public ImmutableWorkerInfo toImmutable() - { - return ImmutableWorkerInfo.fromWorkerAnnouncements( - worker.get(), - getRunningTasks(), - lastCompletedTaskTime.get(), - blacklistedUntil.get() - ); - } - - @Override - public void close() throws IOException - { - statusCache.close(); - } - - public int getContinuouslyFailedTasksCount() - { - return continuouslyFailedTasksCount.get(); - } - - public void resetContinuouslyFailedTasksCount() - { - this.continuouslyFailedTasksCount.set(0); - } - - public void incrementContinuouslyFailedTasksCount() - { - this.continuouslyFailedTasksCount.incrementAndGet(); - } - - @Override - public String toString() - { - return "ZkWorker{" + - "worker=" + worker + - ", lastCompletedTaskTime=" + lastCompletedTaskTime + - ", blacklistedUntil=" + blacklistedUntil + - '}'; - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/HttpRemoteTaskRunnerConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/HttpRemoteTaskRunnerConfig.java index bc0ba7f81c70..acb288f13264 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/HttpRemoteTaskRunnerConfig.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/HttpRemoteTaskRunnerConfig.java @@ -23,10 +23,45 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.joda.time.Period; +import javax.validation.constraints.Max; +import javax.validation.constraints.Min; +import javax.validation.constraints.NotNull; + /** */ -public class HttpRemoteTaskRunnerConfig extends RemoteTaskRunnerConfig +public class HttpRemoteTaskRunnerConfig extends WorkerTaskRunnerConfig { + // This default value is kept to take MM restart into consideration just in case it was + // restarted right after task assignment. + @JsonProperty + @NotNull + private Period taskAssignmentTimeout = new Period("PT5M"); + + @JsonProperty + @NotNull + private Period taskCleanupTimeout = new Period("PT15M"); + + @JsonProperty + @Min(1) + private int pendingTasksRunnerNumThreads = 1; + + @JsonProperty + @Min(1) + private int maxRetriesBeforeBlacklist = 5; + + @JsonProperty + @NotNull + private Period workerBlackListBackoffTime = new Period("PT15M"); + + @JsonProperty + @NotNull + private Period workerBlackListCleanupPeriod = new Period("PT5M"); + + @JsonProperty + @Max(100) + @Min(0) + private int maxPercentageBlacklistWorkers = 20; + @JsonProperty private int workerSyncNumThreads = 5; @@ -48,6 +83,46 @@ public class HttpRemoteTaskRunnerConfig extends RemoteTaskRunnerConfig @JsonProperty private Period serverUnstabilityTimeout = new Period("PT1M"); + public Period getTaskAssignmentTimeout() + { + return taskAssignmentTimeout; + } + + public Period getTaskCleanupTimeout() + { + return taskCleanupTimeout; + } + + public int getPendingTasksRunnerNumThreads() + { + return pendingTasksRunnerNumThreads; + } + + public int getMaxRetriesBeforeBlacklist() + { + return maxRetriesBeforeBlacklist; + } + + public Period getWorkerBlackListBackoffTime() + { + return workerBlackListBackoffTime; + } + + public Period getWorkerBlackListCleanupPeriod() + { + return workerBlackListCleanupPeriod; + } + + public int getMaxPercentageBlacklistWorkers() + { + return maxPercentageBlacklistWorkers; + } + + public void setMaxPercentageBlacklistWorkers(int maxPercentageBlacklistWorkers) + { + this.maxPercentageBlacklistWorkers = maxPercentageBlacklistWorkers; + } + public int getWorkerSyncNumThreads() { return workerSyncNumThreads; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfig.java deleted file mode 100644 index 9cd90167813a..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfig.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord.config; - -import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.druid.curator.CuratorUtils; -import org.apache.druid.java.util.common.HumanReadableBytes; -import org.apache.druid.java.util.common.HumanReadableBytesRange; -import org.joda.time.Period; - -import javax.validation.constraints.Max; -import javax.validation.constraints.Min; -import javax.validation.constraints.NotNull; - -/** - */ -public class RemoteTaskRunnerConfig extends WorkerTaskRunnerConfig -{ - // This default value is kept to take MM restart into consideration just in case it was - // restarted right after task assignment. - @JsonProperty - @NotNull - private Period taskAssignmentTimeout = new Period("PT5M"); - - @JsonProperty - @NotNull - private Period taskCleanupTimeout = new Period("PT15M"); - - @JsonProperty - @HumanReadableBytesRange(min = 10 * 1024, - max = Integer.MAX_VALUE, - message = "maxZnodeBytes must be in the range of [10KiB, 2GiB)" - ) - private HumanReadableBytes maxZnodeBytes = HumanReadableBytes.valueOf(CuratorUtils.DEFAULT_MAX_ZNODE_BYTES); - - @JsonProperty - private Period taskShutdownLinkTimeout = new Period("PT1M"); - - @JsonProperty - @Min(1) - private int pendingTasksRunnerNumThreads = 1; - - @JsonProperty - @Min(1) - private int maxRetriesBeforeBlacklist = 5; - - @JsonProperty - @NotNull - private Period workerBlackListBackoffTime = new Period("PT15M"); - - @JsonProperty - @NotNull - private Period workerBlackListCleanupPeriod = new Period("PT5M"); - - @JsonProperty - @Max(100) - @Min(0) - private int maxPercentageBlacklistWorkers = 20; - - public Period getTaskAssignmentTimeout() - { - return taskAssignmentTimeout; - } - - public Period getTaskCleanupTimeout() - { - return taskCleanupTimeout; - } - - public int getMaxZnodeBytes() - { - return maxZnodeBytes.getBytesInInt(); - } - - public Period getTaskShutdownLinkTimeout() - { - return taskShutdownLinkTimeout; - } - - public int getPendingTasksRunnerNumThreads() - { - return pendingTasksRunnerNumThreads; - } - - public int getMaxRetriesBeforeBlacklist() - { - return maxRetriesBeforeBlacklist; - } - - public Period getWorkerBlackListBackoffTime() - { - return workerBlackListBackoffTime; - } - - public Period getWorkerBlackListCleanupPeriod() - { - return workerBlackListCleanupPeriod; - } - - public int getMaxPercentageBlacklistWorkers() - { - return maxPercentageBlacklistWorkers; - } - - public void setMaxPercentageBlacklistWorkers(int maxPercentageBlacklistWorkers) - { - this.maxPercentageBlacklistWorkers = maxPercentageBlacklistWorkers; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - RemoteTaskRunnerConfig that = (RemoteTaskRunnerConfig) o; - - if (!maxZnodeBytes.equals(that.maxZnodeBytes)) { - return false; - } - if (pendingTasksRunnerNumThreads != that.pendingTasksRunnerNumThreads) { - return false; - } - if (!taskAssignmentTimeout.equals(that.taskAssignmentTimeout)) { - return false; - } - if (!taskCleanupTimeout.equals(that.taskCleanupTimeout)) { - return false; - } - if (!getMinWorkerVersion().equals(that.getMinWorkerVersion())) { - return false; - } - if (!taskShutdownLinkTimeout.equals(that.taskShutdownLinkTimeout)) { - return false; - } - if (maxRetriesBeforeBlacklist != that.maxRetriesBeforeBlacklist) { - return false; - } - if (!workerBlackListBackoffTime.equals(that.getWorkerBlackListBackoffTime())) { - return false; - } - if (maxPercentageBlacklistWorkers != that.maxPercentageBlacklistWorkers) { - return false; - } - return workerBlackListCleanupPeriod.equals(that.workerBlackListCleanupPeriod); - - } - - @Override - public int hashCode() - { - int result = taskAssignmentTimeout.hashCode(); - result = 31 * result + taskCleanupTimeout.hashCode(); - result = 31 * result + getMinWorkerVersion().hashCode(); - result = 31 * result + maxZnodeBytes.hashCode(); - result = 31 * result + taskShutdownLinkTimeout.hashCode(); - result = 31 * result + pendingTasksRunnerNumThreads; - result = 31 * result + maxRetriesBeforeBlacklist; - result = 31 * result + workerBlackListBackoffTime.hashCode(); - result = 31 * result + workerBlackListCleanupPeriod.hashCode(); - result = 31 * result + maxPercentageBlacklistWorkers; - return result; - } - - @Override - public String toString() - { - return "RemoteTaskRunnerConfig{" + - "taskAssignmentTimeout=" + taskAssignmentTimeout + - ", taskCleanupTimeout=" + taskCleanupTimeout + - ", minWorkerVersion='" + getMinWorkerVersion() + '\'' + - ", maxZnodeBytes=" + maxZnodeBytes + - ", taskShutdownLinkTimeout=" + taskShutdownLinkTimeout + - ", pendingTasksRunnerNumThreads=" + pendingTasksRunnerNumThreads + - ", maxRetriesBeforeBlacklist=" + maxRetriesBeforeBlacklist + - ", taskBlackListBackoffTimeMillis=" + workerBlackListBackoffTime + - ", taskBlackListCleanupPeriod=" + workerBlackListCleanupPeriod + - ", maxPercentageBlacklistWorkers= " + maxPercentageBlacklistWorkers + - '}'; - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunner.java index 53bd0dbbf940..dcf4bdf266ec 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunner.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; import com.google.common.base.Objects; import com.google.common.base.Optional; import com.google.common.base.Preconditions; @@ -41,7 +40,6 @@ import com.google.common.util.concurrent.ListeningScheduledExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.errorprone.annotations.concurrent.GuardedBy; -import org.apache.curator.framework.CuratorFramework; import org.apache.druid.concurrent.LifecycleLock; import org.apache.druid.discovery.DiscoveryDruidNode; import org.apache.druid.discovery.DruidNodeDiscovery; @@ -86,9 +84,7 @@ import org.apache.druid.java.util.http.client.Request; import org.apache.druid.java.util.http.client.response.InputStreamResponseHandler; import org.apache.druid.query.DruidMetrics; -import org.apache.druid.server.initialization.IndexerZkConfig; import org.apache.druid.tasklogs.TaskLogStreamer; -import org.apache.zookeeper.KeeperException; import org.jboss.netty.handler.codec.http.HttpMethod; import org.joda.time.Duration; import org.joda.time.Period; @@ -126,10 +122,6 @@ * 3. GET request for getting list of assigned, running, completed tasks on Middle Manager and its enable/disable status. * This endpoint is implemented to support long poll and holds the request till there is a change. This class * sends the next request immediately as the previous finishes to keep the state up-to-date. - *

- * ZK_CLEANUP_TODO : As of 0.11.1, it is required to cleanup task status paths from ZK which are created by the - * workers to support deprecated RemoteTaskRunner. So a method "scheduleCompletedTaskStatusCleanupFromZk()" is added' - * which should be removed in the release that removes RemoteTaskRunner legacy ZK updation WorkerTaskMonitor class. */ public class HttpRemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer, WorkerHolder.Listener { @@ -194,15 +186,6 @@ public class HttpRemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer, private final TaskStorage taskStorage; private final ServiceEmitter emitter; - // ZK_CLEANUP_TODO : Remove these when RemoteTaskRunner and WorkerTaskMonitor are removed. - private static final Joiner JOINER = Joiner.on("/"); - - @Nullable // Null, if zk is disabled - private final CuratorFramework cf; - - @Nullable // Null, if zk is disabled - private final ScheduledExecutorService zkCleanupExec; - private final IndexerZkConfig indexerZkConfig; private volatile DruidNodeDiscovery.Listener nodeDiscoveryListener; public HttpRemoteTaskRunner( @@ -213,8 +196,6 @@ public HttpRemoteTaskRunner( ProvisioningStrategy provisioningStrategy, DruidNodeDiscoveryProvider druidNodeDiscoveryProvider, TaskStorage taskStorage, - @Nullable CuratorFramework cf, - IndexerZkConfig indexerZkConfig, ServiceEmitter emitter ) { @@ -240,19 +221,6 @@ public HttpRemoteTaskRunner( ScheduledExecutors.fixed(1, "HttpRemoteTaskRunner-Worker-Cleanup-%d") ); - if (cf != null) { - this.cf = cf; - this.zkCleanupExec = ScheduledExecutors.fixed( - 1, - "HttpRemoteTaskRunner-zk-cleanup-%d" - ); - } else { - this.cf = null; - this.zkCleanupExec = null; - } - - this.indexerZkConfig = indexerZkConfig; - this.provisioningStrategy = provisioningStrategy; } @@ -267,8 +235,6 @@ public void start() try { log.info("Starting..."); - scheduleCompletedTaskStatusCleanupFromZk(); - startWorkersHandling(); ScheduledExecutors.scheduleAtFixedRate( @@ -296,68 +262,6 @@ public void start() } } - private void scheduleCompletedTaskStatusCleanupFromZk() - { - if (cf == null) { - return; - } - - zkCleanupExec.scheduleAtFixedRate( - () -> { - try { - List workers; - try { - workers = cf.getChildren().forPath(indexerZkConfig.getStatusPath()); - } - catch (KeeperException.NoNodeException e) { - // statusPath doesn't exist yet; can occur if no middleManagers have started. - workers = ImmutableList.of(); - } - - Set knownActiveTaskIds = new HashSet<>(); - if (!workers.isEmpty()) { - for (Task task : taskStorage.getActiveTasks()) { - knownActiveTaskIds.add(task.getId()); - } - } - - for (String workerId : workers) { - String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId); - - List taskIds; - try { - taskIds = cf.getChildren().forPath(workerStatusPath); - } - catch (KeeperException.NoNodeException e) { - taskIds = ImmutableList.of(); - } - - for (String taskId : taskIds) { - if (!knownActiveTaskIds.contains(taskId)) { - String taskStatusPath = JOINER.join(workerStatusPath, taskId); - try { - cf.delete().guaranteed().forPath(taskStatusPath); - } - catch (KeeperException.NoNodeException e) { - log.info("Failed to delete taskStatusPath[%s].", taskStatusPath); - } - } - } - } - } - catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - } - catch (Exception ex) { - log.error(ex, "Unknown error while doing task status cleanup in ZK."); - } - }, - 1, - 5, - TimeUnit.MINUTES - ); - } - /** * Must not be used outside of this class and {@link HttpRemoteTaskRunnerResource} */ diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerFactory.java index 3e0fddc1c0e8..fd1b53ffa3c2 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerFactory.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerFactory.java @@ -22,9 +22,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Supplier; import com.google.inject.Inject; -import com.google.inject.Provider; -import org.apache.curator.framework.CuratorFramework; -import org.apache.druid.curator.ZkEnablementConfig; import org.apache.druid.discovery.DruidNodeDiscoveryProvider; import org.apache.druid.guice.annotations.EscalatedGlobal; import org.apache.druid.guice.annotations.Smile; @@ -37,9 +34,6 @@ import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.http.client.HttpClient; -import org.apache.druid.server.initialization.IndexerZkConfig; - -import javax.annotation.Nullable; /** */ @@ -58,11 +52,6 @@ public class HttpRemoteTaskRunnerFactory implements TaskRunnerFactory cfProvider, - final IndexerZkConfig indexerZkConfig, - final ZkEnablementConfig zkEnablementConfig, final ServiceEmitter emitter ) { @@ -87,14 +73,7 @@ public HttpRemoteTaskRunnerFactory( this.provisioningStrategy = provisioningStrategy; this.druidNodeDiscoveryProvider = druidNodeDiscoveryProvider; this.taskStorage = taskStorage; - this.indexerZkConfig = indexerZkConfig; this.emitter = emitter; - - if (zkEnablementConfig.isEnabled()) { - this.cf = cfProvider.get(); - } else { - this.cf = null; - } } @Override @@ -108,8 +87,6 @@ public HttpRemoteTaskRunner build() provisioningSchedulerConfig.isDoAutoscale() ? provisioningStrategy : new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, taskStorage, - cf, - indexerZkConfig, emitter ); return runner; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/setup/WorkerSelectStrategy.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/setup/WorkerSelectStrategy.java index a3443ee73583..8fff4bb7e7be 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/setup/WorkerSelectStrategy.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/setup/WorkerSelectStrategy.java @@ -30,7 +30,8 @@ import javax.annotation.Nullable; /** - * The {@link org.apache.druid.indexing.overlord.RemoteTaskRunner} uses this class to select a worker to assign tasks to. + * The {@link org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner} uses this class to select a worker to assign + * tasks to. */ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = EqualDistributionWorkerSelectStrategy.class) @JsonSubTypes(value = { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java index 52f3cba7fc11..fa7d96634ae6 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java @@ -23,9 +23,11 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.ListenableFuture; import com.google.inject.Inject; import org.apache.druid.common.guava.FutureUtils; +import org.apache.druid.common.utils.IdUtils; import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.error.NotFound; @@ -35,8 +37,11 @@ import org.apache.druid.indexing.overlord.DataSourceMetadata; import org.apache.druid.indexing.overlord.supervisor.autoscaler.SupervisorTaskAutoScaler; import org.apache.druid.indexing.seekablestream.SeekableStreamDataSourceMetadata; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.lifecycle.LifecycleStart; import org.apache.druid.java.util.common.lifecycle.LifecycleStop; @@ -129,33 +134,8 @@ public Optional getActiveSupervisorIdForDatasourceWithAppendLock(String final Supervisor supervisor = entry.getValue().lhs; final SupervisorSpec supervisorSpec = entry.getValue().rhs; - boolean hasAppendLock = Tasks.DEFAULT_USE_CONCURRENT_LOCKS; - if (supervisorSpec instanceof SeekableStreamSupervisorSpec) { - SeekableStreamSupervisorSpec seekableStreamSupervisorSpec = (SeekableStreamSupervisorSpec) supervisorSpec; - Map context = seekableStreamSupervisorSpec.getContext(); - if (context != null) { - Boolean useConcurrentLocks = QueryContexts.getAsBoolean( - Tasks.USE_CONCURRENT_LOCKS, - context.get(Tasks.USE_CONCURRENT_LOCKS) - ); - if (useConcurrentLocks == null) { - TaskLockType taskLockType = QueryContexts.getAsEnum( - Tasks.TASK_LOCK_TYPE, - context.get(Tasks.TASK_LOCK_TYPE), - TaskLockType.class - ); - if (taskLockType == null) { - hasAppendLock = Tasks.DEFAULT_USE_CONCURRENT_LOCKS; - } else if (taskLockType == TaskLockType.APPEND) { - hasAppendLock = true; - } else { - hasAppendLock = false; - } - } else { - hasAppendLock = useConcurrentLocks; - } - } - } + boolean hasAppendLock = supervisorSpec instanceof SeekableStreamSupervisorSpec + && specHasConcurrentLocks((SeekableStreamSupervisorSpec) supervisorSpec); if (supervisor instanceof SeekableStreamSupervisor && !supervisorSpec.isSuspended() @@ -393,6 +373,116 @@ public boolean resetSupervisor(String id, @Nullable DataSourceMetadata resetData return true; } + /** + * Resets a supervisor to the latest stream offsets and starts a bounded backfill supervisor to + * process the skipped range from the previously checkpointed offsets up to the latest offsets. + * + * @param id supervisor ID + * @param backfillTaskCount number of tasks for the backfill supervisor, or null to inherit from the source spec + * @return map with {@code "id"} (the original supervisor ID) and {@code "backfillSupervisorId"} + * @throws IllegalArgumentException if the supervisor is not a {@link SeekableStreamSupervisor}, + * if {@code useEarliestSequenceNumber} is true, + * if {@code useConcurrentLocks} is not set to true in the supervisor context, + * or if the supervisor is not in a RUNNING state + * @throws IllegalStateException if the latest or checkpointed offsets cannot be retrieved, + * or if the backfill spec cannot be serialized + */ + public Map resetToLatestAndBackfill(String id, @Nullable Integer backfillTaskCount) + { + Preconditions.checkState(started, "SupervisorManager not started"); + Preconditions.checkNotNull(id, "id"); + + Pair supervisor = supervisors.get(id); + + if (supervisor == null) { + throw new IAE("Supervisor[%s] does not exist", id); + } + + if (!(supervisor.lhs instanceof SeekableStreamSupervisor)) { + throw new IAE("Supervisor[%s] is not a streaming supervisor", id); + } + + SeekableStreamSupervisor streamSupervisor = (SeekableStreamSupervisor) supervisor.lhs; + SeekableStreamSupervisorSpec streamSpec = (SeekableStreamSupervisorSpec) supervisor.rhs; + + validateResetAndBackfill(id, streamSupervisor, streamSpec); + + log.info("Capturing latest offsets from stream for supervisor[%s]", id); + streamSupervisor.updatePartitionLagFromStream(); + Map endOffsets = streamSupervisor.getLatestSequencesFromStream(); + + log.info("Capturing checkpointed offsets for supervisor[%s]", id); + Map startOffsets = streamSupervisor.getOffsetsFromMetadataStorage(); + + if (endOffsets == null || endOffsets.isEmpty()) { + throw new ISE("Skipping reset: Failed to get latest offsets from stream for supervisor[%s]", id); + } + if (startOffsets == null || startOffsets.isEmpty()) { + throw new ISE("Skipping reset: Failed to get checkpointed offsets for supervisor[%s]", id); + } + + String backfillSupervisorId = IdUtils.getRandomIdWithPrefix(id + "_backfill"); + + try { + Map normalizedStartOffsets = jsonMapper.readValue(jsonMapper.writeValueAsString(startOffsets), Map.class); + Map normalizedEndOffsets = jsonMapper.readValue(jsonMapper.writeValueAsString(endOffsets), Map.class); + BoundedStreamConfig boundedStreamConfig = new BoundedStreamConfig(normalizedStartOffsets, normalizedEndOffsets); + SupervisorSpec backfillSpec = streamSpec.createBackfillSpec(backfillSupervisorId, boundedStreamConfig, backfillTaskCount); + createOrUpdateAndStartSupervisor(backfillSpec); + } + catch (JsonProcessingException e) { + throw new ISE(e, "Failed to serialize offsets for backfill supervisor[%s]", backfillSupervisorId); + } + + log.info( + "Started backfill supervisor[%s] for supervisor[%s] with startOffsets[%s] and endOffsets[%s]", + backfillSupervisorId, + id, + startOffsets, + endOffsets + ); + + log.info("Resetting supervisor[%s] metadata to latest offsets", id); + DataSourceMetadata resetMetadata = streamSupervisor.createDataSourceMetaDataForReset( + streamSupervisor.getIoConfig().getStream(), + endOffsets + ); + + streamSupervisor.resetOffsets(resetMetadata); + + // Reset autoscaler if present + SupervisorTaskAutoScaler autoscaler = autoscalers.get(id); + if (autoscaler != null) { + autoscaler.reset(); + } + + return ImmutableMap.of( + "id", id, + "backfillSupervisorId", backfillSupervisorId + ); + } + + private void validateResetAndBackfill( + String id, + SeekableStreamSupervisor streamSupervisor, + SeekableStreamSupervisorSpec streamSpec + ) + { + if (streamSupervisor.getIoConfig().isUseEarliestSequenceNumber()) { + throw new IAE("Reset with skipped offsets is not supported when useEarliestOffset is true."); + } + + if (!specHasConcurrentLocks(streamSpec)) { + throw new IAE( + "Backfill tasks require 'useConcurrentLocks' to be set to true in the supervisor context to allow concurrent writes with the main supervisor tasks" + ); + } + + if (streamSupervisor.getState() != SupervisorStateManager.BasicState.RUNNING) { + throw new IAE("Supervisor[%s] must be in a RUNNING state to perform a reset and backfill", id); + } + } + public boolean checkPointDataSourceMetadata( String supervisorId, int taskGroupId, @@ -631,4 +721,29 @@ private SupervisorSpec getSpec(String id) return supervisor == null ? null : supervisor.rhs; } } + + /** + * Returns true if the spec's context enables concurrent (append) locks, accepting both + * {@code useConcurrentLocks: true} (or any truthy string) and {@code taskLockType: APPEND}. + */ + private static boolean specHasConcurrentLocks(SeekableStreamSupervisorSpec spec) + { + Map context = spec.getContext(); + if (context == null) { + return Tasks.DEFAULT_USE_CONCURRENT_LOCKS; + } + Boolean useConcurrentLocks = QueryContexts.getAsBoolean( + Tasks.USE_CONCURRENT_LOCKS, + context.get(Tasks.USE_CONCURRENT_LOCKS) + ); + if (useConcurrentLocks != null) { + return useConcurrentLocks; + } + TaskLockType taskLockType = QueryContexts.getAsEnum( + Tasks.TASK_LOCK_TYPE, + context.get(Tasks.TASK_LOCK_TYPE), + TaskLockType.class + ); + return taskLockType == TaskLockType.APPEND; + } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResource.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResource.java index aff9edf19af9..8d0e04eb7988 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResource.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResource.java @@ -640,6 +640,50 @@ private Response handleResetRequest( ); } + @POST + @Path("/{id}/resetToLatestAndBackfill") + @Produces(MediaType.APPLICATION_JSON) + @ResourceFilters(SupervisorResourceFilter.class) + public Response resetToLatestAndBackfill( + @PathParam("id") final String id, + @QueryParam("backfillTaskCount") @Nullable final Integer backfillTaskCount + ) + { + return handleResetToLatestAndBackfill(id, backfillTaskCount); + } + + private Response handleResetToLatestAndBackfill(final String id, @Nullable final Integer backfillTaskCount) + { + if (backfillTaskCount != null && backfillTaskCount < 1) { + return Response.status(Response.Status.BAD_REQUEST) + .entity(ImmutableMap.of("error", "backfillTaskCount must be a positive integer")) + .build(); + } + return asLeaderWithSupervisorManager( + manager -> { + if (!manager.getSupervisorIds().contains(id)) { + return Response.status(Response.Status.NOT_FOUND) + .entity(ImmutableMap.of("error", StringUtils.format("[%s] does not exist", id))) + .build(); + } + try { + Map result = manager.resetToLatestAndBackfill(id, backfillTaskCount); + return Response.ok(result).build(); + } + catch (IllegalArgumentException e) { + return Response.status(Response.Status.BAD_REQUEST) + .entity(ImmutableMap.of("error", e.getMessage())) + .build(); + } + catch (Exception e) { + return Response.serverError() + .entity(ImmutableMap.of("error", e.getMessage())) + .build(); + } + } + ); + } + private Response asLeaderWithSupervisorManager(Function f) { Optional supervisorManager = taskMaster.getSupervisorManager(); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java index 4bcf539d6aea..7209146aaf54 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java @@ -423,7 +423,7 @@ private TaskStatus runInternal(TaskToolbox toolbox) throws Exception initializeSequences(); log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName()); - toolbox.getChatHandlerProvider().register(task.getId(), this, false); + toolbox.getChatHandlerProvider().register(task.getId(), this); runThread = Thread.currentThread(); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SettableByteEntity.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SettableByteEntity.java index b4eb42a6b48c..3faf436f59ca 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SettableByteEntity.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SettableByteEntity.java @@ -62,7 +62,7 @@ public T getEntity() } @Override - public InputStream open() + public InputStream openRaw() { // Duplicate the entity buffer, because the stream will update its position. final SettableByteBufferInputStream stream = new SettableByteBufferInputStream(); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SettableByteEntityReader.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SettableByteEntityReader.java index 2314d7408425..883a798f24d0 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SettableByteEntityReader.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SettableByteEntityReader.java @@ -28,7 +28,7 @@ import org.apache.druid.data.input.impl.ByteEntity; import org.apache.druid.data.input.impl.JsonInputFormat; import org.apache.druid.java.util.common.parsers.CloseableIterator; -import org.apache.druid.segment.transform.TransformSpec; +import org.apache.druid.segment.transform.Transformer; import org.apache.druid.segment.transform.TransformingInputEntityReader; import java.io.File; @@ -46,16 +46,16 @@ class SettableByteEntityReader implements InputEntityReade SettableByteEntityReader( InputFormat inputFormat, InputRowSchema inputRowSchema, - TransformSpec transformSpec, + Transformer transformer, File indexingTmpDir ) { Preconditions.checkNotNull(inputFormat, "inputFormat"); final InputFormat format = JsonInputFormat.withLineSplittable(inputFormat, false); this.entity = new SettableByteEntity<>(); - this.delegate = new TransformingInputEntityReader( + this.delegate = TransformingInputEntityReader.withoutFilter( format.createReader(inputRowSchema, entity, indexingTmpDir), - transformSpec.toTransformer() + transformer ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/StreamChunkReader.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/StreamChunkReader.java index a0ac1f01ea5a..b7bad274822c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/StreamChunkReader.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/StreamChunkReader.java @@ -32,6 +32,7 @@ import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.transform.TransformSpec; +import org.apache.druid.segment.transform.Transformer; import javax.annotation.Nullable; import java.io.File; @@ -64,13 +65,14 @@ class StreamChunkReader ) { InvalidInput.notNull(inputFormat, "inputFormat"); + final Transformer transformer = transformSpec.toTransformer(); this.byteEntityReader = new SettableByteEntityReader<>( inputFormat, inputRowSchema, - transformSpec, + transformer, indexingTmpDir ); - this.rowFilter = rowFilter; + this.rowFilter = transformer.hasFilter() ? withTransformFilter(transformer, rowFilter) : rowFilter; this.rowIngestionMeters = rowIngestionMeters; this.parseExceptionHandler = parseExceptionHandler; } @@ -89,6 +91,13 @@ class StreamChunkReader this.parseExceptionHandler = parseExceptionHandler; } + private static InputRowFilter withTransformFilter(final Transformer transformer, final InputRowFilter rowFilter) + { + final InputRowFilter transformFilter = row -> + transformer.rowMatchesFilter(row) ? InputRowFilterResult.ACCEPTED : InputRowFilterResult.CUSTOM_FILTER; + return transformFilter.and(rowFilter); + } + List parse(@Nullable List streamChunk, boolean isEndOfShard) throws IOException { if (streamChunk == null || streamChunk.isEmpty()) { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java index 9bef543496b6..74329c68e1d2 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java @@ -2195,7 +2195,7 @@ public void resetOffsetsInternal(@Nonnull final DataSourceMetadata dataSourceMet final boolean metadataUpdateSuccess; final DataSourceMetadata metadata = indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(supervisorId); if (metadata == null) { - log.info("Checkpointed metadata in null for supervisor[%s] for dataSource[%s] - inserting metadata[%s]", supervisorId, dataSource, resetMetadata); + log.info("Checkpointed metadata is null for supervisor[%s] for dataSource[%s] - inserting metadata[%s]", supervisorId, dataSource, resetMetadata); metadataUpdateSuccess = indexerMetadataStorageCoordinator.insertDataSourceMetadata(supervisorId, resetMetadata); } else { if (!checkSourceMetadataMatch(metadata)) { @@ -3311,7 +3311,7 @@ private boolean updatePartitionDataFromStream() /** * gets mapping of partitions in stream to their latest offsets. */ - protected Map getLatestSequencesFromStream() + public Map getLatestSequencesFromStream() { return new HashMap<>(); } @@ -4589,7 +4589,7 @@ private OrderedSequenceNumber getOffsetFromStorageForPartiti } } - protected Map getOffsetsFromMetadataStorage() + public Map getOffsetsFromMetadataStorage() { final DataSourceMetadata dataSourceMetadata = retrieveDataSourceMetadata(); if (dataSourceMetadata instanceof SeekableStreamDataSourceMetadata @@ -4976,7 +4976,7 @@ private void updateCurrentOffsets() throws InterruptedException, ExecutionExcept coalesceAndAwait(futures); } - protected abstract void updatePartitionLagFromStream(); + public abstract void updatePartitionLagFromStream(); /** * Gets 'lag' of currently processed offset behind latest offset as a measure of difference between offsets. @@ -5233,7 +5233,7 @@ protected abstract List sequence * @return specific instance of datasource metadata */ - protected abstract SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( + public abstract SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( String stream, Map map ); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpec.java index 842f0de4774e..ecbd51757c37 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpec.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpec.java @@ -297,4 +297,10 @@ public void merge(@Nullable SupervisorSpec existingSpec) protected abstract SeekableStreamSupervisorSpec toggleSuspend(boolean suspend); + public abstract SeekableStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ); + } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerCuratorCoordinator.java b/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerCuratorCoordinator.java deleted file mode 100644 index c018c6a1a63b..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerCuratorCoordinator.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.worker; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableMap; -import com.google.inject.Inject; -import org.apache.curator.framework.CuratorFramework; -import org.apache.druid.curator.CuratorUtils; -import org.apache.druid.curator.announcement.ServiceAnnouncer; -import org.apache.druid.guice.annotations.DirectExecutorAnnouncer; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.lifecycle.LifecycleStart; -import org.apache.druid.java.util.common.lifecycle.LifecycleStop; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -/** - * The CuratorCoordinator provides methods to use Curator. Persistent ZK paths are created on {@link #start()}. - */ -public class WorkerCuratorCoordinator -{ - private static final Logger log = new Logger(WorkerCuratorCoordinator.class); - private static final Joiner JOINER = Joiner.on("/"); - - private final Object lock = new Object(); - - private final ObjectMapper jsonMapper; - private final RemoteTaskRunnerConfig config; - private final CuratorFramework curatorFramework; - private final ServiceAnnouncer announcer; - - private final String baseAnnouncementsPath; - private final String baseTaskPath; - private final String baseStatusPath; - - private volatile Worker worker; - private volatile boolean started; - - @Inject - public WorkerCuratorCoordinator( - ObjectMapper jsonMapper, - IndexerZkConfig indexerZkConfig, - RemoteTaskRunnerConfig config, - CuratorFramework curatorFramework, - @DirectExecutorAnnouncer ServiceAnnouncer announcer, - Worker worker - ) - { - this.jsonMapper = jsonMapper; - this.config = config; - this.curatorFramework = curatorFramework; - this.worker = worker; - this.announcer = announcer; - - this.baseAnnouncementsPath = getPath(Arrays.asList(indexerZkConfig.getAnnouncementsPath(), worker.getHost())); - this.baseTaskPath = getPath(Arrays.asList(indexerZkConfig.getTasksPath(), worker.getHost())); - this.baseStatusPath = getPath(Arrays.asList(indexerZkConfig.getStatusPath(), worker.getHost())); - } - - @LifecycleStart - public void start() throws Exception - { - log.info("WorkerCuratorCoordinator good to go. Server[%s]", worker.getHost()); - synchronized (lock) { - if (started) { - return; - } - - CuratorUtils.createIfNotExists( - curatorFramework, - getTaskPathForWorker(), - CreateMode.PERSISTENT, - jsonMapper.writeValueAsBytes(ImmutableMap.of("created", DateTimes.nowUtc().toString())), - config.getMaxZnodeBytes() - ); - - CuratorUtils.createIfNotExists( - curatorFramework, - getStatusPathForWorker(), - CreateMode.PERSISTENT, - jsonMapper.writeValueAsBytes(ImmutableMap.of("created", DateTimes.nowUtc().toString())), - config.getMaxZnodeBytes() - ); - - announcer.start(); - announcer.announce(getAnnouncementsPathForWorker(), jsonMapper.writeValueAsBytes(worker), false); - - started = true; - } - } - - @LifecycleStop - public void stop() - { - log.info("Stopping WorkerCuratorCoordinator for worker[%s]", worker.getHost()); - synchronized (lock) { - if (!started) { - return; - } - announcer.stop(); - - started = false; - } - } - - public String getPath(Iterable parts) - { - return JOINER.join(parts); - } - - public String getAnnouncementsPathForWorker() - { - return baseAnnouncementsPath; - } - - public String getTaskPathForWorker() - { - return baseTaskPath; - } - - public String getTaskPathForId(String taskId) - { - return getPath(Arrays.asList(baseTaskPath, taskId)); - } - - public String getStatusPathForWorker() - { - return baseStatusPath; - } - - public String getStatusPathForId(String statusId) - { - return getPath(Arrays.asList(baseStatusPath, statusId)); - } - - public Worker getWorker() - { - return worker; - } - - public void removeTaskRunZnode(String taskId) throws Exception - { - try { - curatorFramework.delete().guaranteed().forPath(getTaskPathForId(taskId)); - } - catch (KeeperException e) { - log.debug( - e, - "Could not delete task path for task[%s]. This is not an error if httpRemote taskRunner is being used at overlord.", - taskId - ); - } - } - - public void updateTaskStatusAnnouncement(TaskAnnouncement announcement) throws Exception - { - synchronized (lock) { - if (!started) { - return; - } - - CuratorUtils.createOrSet( - curatorFramework, - getStatusPathForId(announcement.getTaskStatus().getId()), - CreateMode.PERSISTENT, - jsonMapper.writeValueAsBytes(announcement), - config.getMaxZnodeBytes() - ); - } - } - - public List getAnnouncements() throws Exception - { - final List announcements = new ArrayList<>(); - - for (String id : curatorFramework.getChildren().forPath(getStatusPathForWorker())) { - announcements.add( - jsonMapper.readValue( - curatorFramework.getData().forPath(getStatusPathForId(id)), - TaskAnnouncement.class - ) - ); - } - - return announcements; - } - - public void updateWorkerAnnouncement(Worker newWorker) throws Exception - { - synchronized (lock) { - if (!started) { - throw new ISE("Cannot update worker! Not Started!"); - } - - this.worker = newWorker; - announcer.update(getAnnouncementsPathForWorker(), jsonMapper.writeValueAsBytes(newWorker)); - } - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskManager.java b/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskManager.java index 7379e34293f1..34ff1757cf5d 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskManager.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskManager.java @@ -96,10 +96,8 @@ public class WorkerTaskManager implements IndexerTaskCountStatsProvider private final ConcurrentMap assignedTasks = new ConcurrentHashMap<>(); - // ZK_CLEANUP_TODO : these are marked protected to be used in subclass WorkerTaskMonitor that updates ZK. - // should be marked private alongwith WorkerTaskMonitor removal. - protected final ConcurrentMap runningTasks = new ConcurrentHashMap<>(); - protected final ConcurrentMap completedTasks = new ConcurrentHashMap<>(); + private final ConcurrentMap runningTasks = new ConcurrentHashMap<>(); + private final ConcurrentMap completedTasks = new ConcurrentHashMap<>(); private final ChangeRequestHistory changeHistory = new ChangeRequestHistory<>(); @@ -782,8 +780,6 @@ public void handle() "Got run notice for task [%s] that I am already running or completed...", task.getId() ); - - taskStarted(task.getId()); return; } @@ -801,9 +797,6 @@ public void handle() cleanupAssignedTask(task); log.info("Task[%s] started.", task.getId()); } - - taskAnnouncementChanged(announcement); - taskStarted(task.getId()); } } @@ -855,7 +848,6 @@ public void handle() moveFromRunningToCompleted(task.getId(), latest); changeHistory.addChangeRequest(new WorkerHistoryItem.TaskUpdate(latest)); - taskAnnouncementChanged(latest); log.info( "Task [%s] completed with status [%s].", task.getId(), @@ -903,24 +895,8 @@ public void handle() ); changeHistory.addChangeRequest(new WorkerHistoryItem.TaskUpdate(latest)); - taskAnnouncementChanged(latest); } } } } - - // ZK_CLEANUP_TODO : - //Note: Following abstract methods exist only to support WorkerTaskMonitor that - //watches task assignments and updates task statuses inside Zookeeper. When the transition to HTTP is complete - //in Overlord as well as MiddleManagers then WorkerTaskMonitor should be deleted, this class should no longer be abstract - //and the methods below should be removed. - protected void taskStarted(String taskId) - { - - } - - protected void taskAnnouncementChanged(TaskAnnouncement announcement) - { - - } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskMonitor.java b/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskMonitor.java deleted file mode 100644 index c8537997b588..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskMonitor.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.worker; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; -import com.google.inject.Inject; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.recipes.cache.PathChildrenCache; -import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; -import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; -import org.apache.druid.curator.CuratorUtils; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.config.TaskConfig; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.overlord.TaskRunner; -import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.java.util.common.lifecycle.LifecycleStart; -import org.apache.druid.java.util.common.lifecycle.LifecycleStop; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.rpc.indexing.OverlordClient; - -/** - * This class is deprecated and required only to support {@link org.apache.druid.indexing.overlord.RemoteTaskRunner}. - * {@link org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner} should be used instead. - * - * The monitor watches ZK at a specified path for new tasks to appear. Upon starting the monitor, a listener will be - * created that waits for new tasks. Tasks are executed as soon as they are seen. - */ -@Deprecated -public class WorkerTaskMonitor extends WorkerTaskManager -{ - private static final EmittingLogger log = new EmittingLogger(WorkerTaskMonitor.class); - - private final ObjectMapper jsonMapper; - private final PathChildrenCache pathChildrenCache; - private final CuratorFramework cf; - private final WorkerCuratorCoordinator workerCuratorCoordinator; - - private final Object lifecycleLock = new Object(); - private volatile boolean started = false; - - @Inject - public WorkerTaskMonitor( - ObjectMapper jsonMapper, - TaskRunner taskRunner, - TaskConfig taskConfig, - WorkerConfig workerConfig, - CuratorFramework cf, - WorkerCuratorCoordinator workerCuratorCoordinator, - OverlordClient overlordClient - ) - { - super(jsonMapper, taskRunner, taskConfig, workerConfig, overlordClient); - - this.jsonMapper = jsonMapper; - this.pathChildrenCache = new PathChildrenCache( - cf, - workerCuratorCoordinator.getTaskPathForWorker(), - false, - true, - Execs.makeThreadFactory("TaskMonitorCache-%s") - ); - this.cf = cf; - this.workerCuratorCoordinator = workerCuratorCoordinator; - } - - /** - * Register a monitor for new tasks. When new tasks appear, the worker node announces a status to indicate it has - * started the task. When the task is complete, the worker node updates the status. - */ - @LifecycleStart - @Override - public void start() throws Exception - { - super.start(); - - synchronized (lifecycleLock) { - Preconditions.checkState(!started, "already started"); - started = true; - - try { - cleanupStaleAnnouncements(); - registerRunListener(); - pathChildrenCache.start(); - - log.debug("Started WorkerTaskMonitor."); - started = true; - } - catch (InterruptedException e) { - throw e; - } - catch (Exception e) { - log.makeAlert(e, "Exception starting WorkerTaskMonitor") - .emit(); - throw e; - } - } - } - - private void cleanupStaleAnnouncements() throws Exception - { - synchronized (lock) { - // cleanup any old running task announcements which are invalid after restart - for (TaskAnnouncement announcement : workerCuratorCoordinator.getAnnouncements()) { - if (announcement.getTaskStatus().isRunnable()) { - TaskStatus completionStatus = null; - TaskAnnouncement completedAnnouncement = completedTasks.get(announcement.getTaskId()); - if (completedAnnouncement != null) { - completionStatus = completedAnnouncement.getTaskStatus(); - } else if (!runningTasks.containsKey(announcement.getTaskStatus().getId())) { - completionStatus = TaskStatus.failure( - announcement.getTaskStatus().getId(), - "Canceled as unknown task. See middleManager or indexer logs for more details." - ); - } - - if (completionStatus != null) { - log.info( - "Cleaning up stale announcement for task [%s]. New status is [%s].", - announcement.getTaskStatus().getId(), - completionStatus.getStatusCode() - ); - workerCuratorCoordinator.updateTaskStatusAnnouncement( - TaskAnnouncement.create( - announcement.getTaskStatus().getId(), - announcement.getTaskType(), - announcement.getTaskResource(), - completionStatus, - TaskLocation.unknown(), - announcement.getTaskDataSource() - ) - ); - } - } - } - } - } - - private void registerRunListener() - { - pathChildrenCache.getListenable().addListener( - new PathChildrenCacheListener() - { - @Override - public void childEvent(CuratorFramework curatorFramework, PathChildrenCacheEvent event) - throws Exception - { - if (CuratorUtils.isChildAdded(event)) { - final Task task = jsonMapper.readValue( - cf.getData().forPath(event.getData().getPath()), - Task.class - ); - - assignTask(task); - } - } - } - ); - } - - @LifecycleStop - @Override - public void stop() throws Exception - { - super.stop(); - - synchronized (lifecycleLock) { - Preconditions.checkState(started, "not started"); - - try { - started = false; - pathChildrenCache.close(); - - log.debug("Stopped WorkerTaskMonitor."); - } - catch (Exception e) { - log.makeAlert(e, "Exception stopping WorkerTaskMonitor") - .emit(); - } - } - } - - @Override - protected void taskStarted(String taskId) - { - try { - workerCuratorCoordinator.removeTaskRunZnode(taskId); - } - catch (Exception ex) { - log.error(ex, "Unknown exception while deleting task[%s] znode.", taskId); - } - } - - @Override - protected void taskAnnouncementChanged(TaskAnnouncement announcement) - { - try { - workerCuratorCoordinator.updateTaskStatusAnnouncement(announcement); - } - catch (Exception ex) { - log.makeAlert(ex, "Failed to update task announcement") - .addData("task", announcement.getTaskId()) - .emit(); - } - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/worker/http/WorkerResource.java b/indexing-service/src/main/java/org/apache/druid/indexing/worker/http/WorkerResource.java index 528c230857e4..5d81b14f2ee1 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/worker/http/WorkerResource.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/worker/http/WorkerResource.java @@ -25,14 +25,11 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.inject.Inject; -import com.google.inject.Provider; import com.sun.jersey.spi.container.ResourceFilters; import org.apache.druid.common.utils.IdUtils; -import org.apache.druid.curator.ZkEnablementConfig; import org.apache.druid.indexing.overlord.TaskRunner; import org.apache.druid.indexing.overlord.TaskRunnerWorkItem; import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.indexing.worker.WorkerCuratorCoordinator; import org.apache.druid.indexing.worker.WorkerTaskManager; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; @@ -41,7 +38,6 @@ import org.apache.druid.server.http.security.StateResourceFilter; import org.apache.druid.tasklogs.TaskLogStreamer; -import javax.annotation.Nullable; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -60,35 +56,21 @@ public class WorkerResource { private static final Logger log = new Logger(WorkerResource.class); - private static String DISABLED_VERSION = ""; private final Worker enabledWorker; - - @Nullable // Null, if zk is disabled - private final WorkerCuratorCoordinator curatorCoordinator; - private final TaskRunner taskRunner; private final WorkerTaskManager workerTaskManager; @Inject public WorkerResource( Worker worker, - Provider curatorCoordinatorProvider, TaskRunner taskRunner, - WorkerTaskManager workerTaskManager, - ZkEnablementConfig zkEnablementConfig - + WorkerTaskManager workerTaskManager ) { this.enabledWorker = worker; this.taskRunner = taskRunner; this.workerTaskManager = workerTaskManager; - - if (zkEnablementConfig.isEnabled()) { - this.curatorCoordinator = curatorCoordinatorProvider.get(); - } else { - this.curatorCoordinator = null; - } } @@ -99,20 +81,6 @@ public WorkerResource( public Response doDisable() { try { - if (curatorCoordinator != null) { - // Dual-write disabled signal: legacy version="" for old overlords + disabled=true for new overlords. - // TODO: Safe to drop DISABLED_VERSION once backward compatibility with overlords is no longer required. - final Worker disabledWorker = new Worker( - enabledWorker.getScheme(), - enabledWorker.getHost(), - enabledWorker.getIp(), - enabledWorker.getCapacity(), - DISABLED_VERSION, - enabledWorker.getCategory(), - true - ); - curatorCoordinator.updateWorkerAnnouncement(disabledWorker); - } workerTaskManager.workerDisabled(); return Response.ok(ImmutableMap.of(enabledWorker.getHost(), "disabled")).build(); } @@ -128,9 +96,6 @@ public Response doDisable() public Response doEnable() { try { - if (curatorCoordinator != null) { - curatorCoordinator.updateWorkerAnnouncement(enabledWorker); - } workerTaskManager.workerEnabled(); return Response.ok(ImmutableMap.of(enabledWorker.getHost(), "enabled")).build(); } diff --git a/indexing-service/src/main/java/org/apache/druid/server/initialization/IndexerZkConfig.java b/indexing-service/src/main/java/org/apache/druid/server/initialization/IndexerZkConfig.java deleted file mode 100644 index 41f245e106ff..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/server/initialization/IndexerZkConfig.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.server.initialization; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.curator.utils.ZKPaths; - -/** - * - */ -public class IndexerZkConfig -{ - @JsonCreator - public IndexerZkConfig( - @JacksonInject ZkPathsConfig zkPathsConfig, - @JsonProperty("base") String base, - @JsonProperty("announcementsPath") String announcementsPath, - @JsonProperty("tasksPath") String tasksPath, - @JsonProperty("statusPath") String statusPath - ) - { - this.zkPathsConfig = zkPathsConfig; - this.base = base; - this.announcementsPath = announcementsPath; - this.tasksPath = tasksPath; - this.statusPath = statusPath; - } - - @JacksonInject - private final ZkPathsConfig zkPathsConfig; - - @JsonProperty - private final String base; - - @JsonProperty - private final String announcementsPath; - - @JsonProperty - private final String tasksPath; - - @JsonProperty - private final String statusPath; - - private String defaultIndexerPath(final String subPath) - { - return ZKPaths.makePath(getBase(), subPath); - } - - public String getBase() - { - return base == null ? getZkPathsConfig().defaultPath("indexer") : base; - } - - public String getAnnouncementsPath() - { - return announcementsPath == null ? defaultIndexerPath("announcements") : announcementsPath; - } - - public String getTasksPath() - { - return tasksPath == null ? defaultIndexerPath("tasks") : tasksPath; - } - - public String getStatusPath() - { - return statusPath == null ? defaultIndexerPath("status") : statusPath; - } - - public ZkPathsConfig getZkPathsConfig() - { - return zkPathsConfig; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - IndexerZkConfig that = (IndexerZkConfig) o; - - if (announcementsPath != null - ? !announcementsPath.equals(that.announcementsPath) - : that.announcementsPath != null) { - return false; - } - if (base != null ? !base.equals(that.base) : that.base != null) { - return false; - } - if (statusPath != null ? !statusPath.equals(that.statusPath) : that.statusPath != null) { - return false; - } - if (tasksPath != null ? !tasksPath.equals(that.tasksPath) : that.tasksPath != null) { - return false; - } - if (zkPathsConfig != null ? !zkPathsConfig.equals(that.zkPathsConfig) : that.zkPathsConfig != null) { - return false; - } - - return true; - } - - @Override - public int hashCode() - { - int result = zkPathsConfig != null ? zkPathsConfig.hashCode() : 0; - result = 31 * result + (base != null ? base.hashCode() : 0); - result = 31 * result + (announcementsPath != null ? announcementsPath.hashCode() : 0); - result = 31 * result + (tasksPath != null ? tasksPath.hashCode() : 0); - result = 31 * result + (statusPath != null ? statusPath.hashCode() : 0); - return result; - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java index 639a3410ab2f..d83ea2885868 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/TaskToolboxTest.java @@ -53,7 +53,7 @@ import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.loading.SegmentLocalCacheManager; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; +import org.apache.druid.segment.realtime.ChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.appenderator.UnifiedIndexerAppenderatorsManager; import org.apache.druid.server.DruidNode; @@ -150,7 +150,7 @@ public void setUp() throws IOException new NoopTestTaskReportFileWriter(), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, - new NoopChatHandlerProvider(), + new ChatHandlerProvider(), new DropwizardRowIngestionMetersFactory(), new TestAppenderatorsManager(), new NoopOverlordClient(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/TestUtils.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/TestUtils.java index 6481a5aecfbc..f61f1c18d055 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/TestUtils.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/TestUtils.java @@ -47,7 +47,6 @@ import org.apache.druid.segment.loading.LocalDataSegmentPuller; import org.apache.druid.segment.loading.LocalLoadSpec; import org.apache.druid.segment.realtime.ChatHandlerProvider; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.server.security.AuthConfig; @@ -94,7 +93,7 @@ public TestUtils() .addValue(ExprMacroTable.class, LookupEnabledTestExprMacroTable.INSTANCE) .addValue(IndexIO.class, indexIO) .addValue(ObjectMapper.class, jsonMapper) - .addValue(ChatHandlerProvider.class, new NoopChatHandlerProvider()) + .addValue(ChatHandlerProvider.class, new ChatHandlerProvider()) .addValue(AuthConfig.class, new AuthConfig()) .addValue(AuthorizerMapper.class, null) .addValue(RowIngestionMetersFactory.class, rowIngestionMetersFactory) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java index 807f67bdb092..708b511b93cd 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java @@ -60,7 +60,6 @@ import org.apache.druid.segment.incremental.OnheapIncrementalIndex; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.realtime.ChatHandlerProvider; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.transform.CompactionTransformSpec; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; @@ -174,7 +173,7 @@ private static ObjectMapper setupInjectablesInObjectMapper(ObjectMapper objectMa ImmutableList.of( binder -> { binder.bind(AuthorizerMapper.class).toInstance(AuthTestUtils.TEST_AUTHORIZER_MAPPER); - binder.bind(ChatHandlerProvider.class).toInstance(new NoopChatHandlerProvider()); + binder.bind(ChatHandlerProvider.class).toInstance(new ChatHandlerProvider()); binder.bind(RowIngestionMetersFactory.class).toInstance(ROW_INGESTION_METERS_FACTORY); binder.bind(CoordinatorClient.class).toInstance(COORDINATOR_CLIENT); binder.bind(SegmentCacheManagerFactory.class).toInstance(new SegmentCacheManagerFactory(TestIndex.INDEX_IO, objectMapper)); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunBase.java index a4690f8ec090..99d478229a89 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunBase.java @@ -101,12 +101,13 @@ import org.apache.druid.segment.loading.SegmentCacheManager; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.loading.SegmentLocalCacheManager; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.segment.loading.TombstoneLoadSpec; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; +import org.apache.druid.segment.realtime.ChatHandlerProvider; import org.apache.druid.segment.realtime.WindowedCursorFactory; import org.apache.druid.segment.transform.CompactionTransformSpec; import org.apache.druid.server.metrics.NoopServiceEmitter; @@ -1263,7 +1264,7 @@ public void testRunWithSpatialDimensions() throws Exception Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(0).getShardSpec()); final File cacheDir = temporaryFolder.newFolder(); - final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir, false); + final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir, null, false); List rowsFromSegment = new ArrayList<>(); for (DataSegment segment : segments) { @@ -1375,7 +1376,7 @@ public void testRunWithAutoCastDimensions() throws Exception Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(0).getShardSpec()); final File cacheDir = temporaryFolder.newFolder(); - final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir, false); + final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir, null, false); List rowsFromSegment = new ArrayList<>(); for (DataSegment segment : segments) { @@ -1492,7 +1493,7 @@ public void testRunWithAutoCastDimensionsSortByDimension() throws Exception Assert.assertEquals(new NumberedShardSpec(0, 1), compactSegment.getShardSpec()); final File cacheDir = temporaryFolder.newFolder(); - final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir, false); + final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir, null, false); List rowsFromSegment = new ArrayList<>(); segmentCacheManager.load(compactSegment); @@ -1714,6 +1715,7 @@ public boolean isVirtualStorageEphemeral() final SegmentCacheManager cacheManager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestIndex.INDEX_IO, objectMapper @@ -1734,7 +1736,7 @@ public boolean isVirtualStorageEphemeral() .indexMerger(testUtils.getIndexMergerV9Factory().create(true)) .taskReportFileWriter(new SingleFileTaskReportFileWriter(reportsFile)) .authorizerMapper(AuthTestUtils.TEST_AUTHORIZER_MAPPER) - .chatHandlerProvider(new NoopChatHandlerProvider()) + .chatHandlerProvider(new ChatHandlerProvider()) .rowIngestionMetersFactory(testUtils.getRowIngestionMetersFactory()) .appenderatorsManager(new TestAppenderatorsManager()) .overlordClient(overlordClient) @@ -1748,7 +1750,7 @@ public boolean isVirtualStorageEphemeral() protected List getCSVFormatRowsFromSegments(List segments) throws Exception { final File cacheDir = temporaryFolder.newFolder(); - final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir, false); + final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir, null, false); List rowsFromSegment = new ArrayList<>(); for (DataSegment segment : segments) { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java index 10c27c0b268c..6e3ae324b74c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java @@ -138,7 +138,6 @@ import org.apache.druid.segment.loading.SegmentCacheManager; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.ChatHandlerProvider; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.selector.settable.SettableColumnValueSelector; import org.apache.druid.segment.transform.CompactionTransformSpec; @@ -305,7 +304,7 @@ private static ObjectMapper setupInjectablesInObjectMapper(ObjectMapper objectMa List.of( binder -> { binder.bind(AuthorizerMapper.class).toInstance(AuthTestUtils.TEST_AUTHORIZER_MAPPER); - binder.bind(ChatHandlerProvider.class).toInstance(new NoopChatHandlerProvider()); + binder.bind(ChatHandlerProvider.class).toInstance(new ChatHandlerProvider()); binder.bind(RowIngestionMetersFactory.class).toInstance(TEST_UTILS.getRowIngestionMetersFactory()); binder.bind(CoordinatorClient.class).toInstance(COORDINATOR_CLIENT); binder.bind(SegmentCacheManagerFactory.class) @@ -2025,7 +2024,7 @@ public void drop(DataSegment segment) )) .taskReportFileWriter(new NoopTestTaskReportFileWriter()) .authorizerMapper(AuthTestUtils.TEST_AUTHORIZER_MAPPER) - .chatHandlerProvider(new NoopChatHandlerProvider()) + .chatHandlerProvider(new ChatHandlerProvider()) .rowIngestionMetersFactory(TEST_UTILS.getRowIngestionMetersFactory()) .appenderatorsManager(new TestAppenderatorsManager()) .coordinatorClient(COORDINATOR_CLIENT) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index a2b0b2255537..399cb9a513e0 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -88,6 +88,7 @@ import org.apache.druid.segment.loading.SegmentCacheManager; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.loading.SegmentLocalCacheManager; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.segment.realtime.WindowedCursorFactory; @@ -219,6 +220,7 @@ public List getLocations() segmentCacheManager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestIndex.INDEX_IO, jsonMapper diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index 66628d340b5f..7ca28f7f3b79 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -80,7 +80,7 @@ import org.apache.druid.segment.metadata.IndexingStateCache; import org.apache.druid.segment.metadata.SegmentSchemaCache; import org.apache.druid.segment.metadata.SegmentSchemaManager; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; +import org.apache.druid.segment.realtime.ChatHandlerProvider; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; import org.apache.druid.server.metrics.NoopServiceEmitter; @@ -227,7 +227,7 @@ public void shutdownTask(Task task) public SegmentCacheManager newSegmentLoader(File storageDir) { - return segmentCacheManagerFactory.manufacturate(storageDir, true); + return segmentCacheManagerFactory.manufacturate(storageDir, null, true); } public ObjectMapper getObjectMapper() @@ -311,7 +311,7 @@ public TaskToolbox createTaskToolbox(TaskConfig config, Task task, SupervisorMan .create(task.getContextValue(Tasks.STORE_EMPTY_COLUMNS_KEY, true))) .taskReportFileWriter(new NoopTestTaskReportFileWriter()) .authorizerMapper(AuthTestUtils.TEST_AUTHORIZER_MAPPER) - .chatHandlerProvider(new NoopChatHandlerProvider()) + .chatHandlerProvider(new ChatHandlerProvider()) .rowIngestionMetersFactory(testUtils.getRowIngestionMetersFactory()) .appenderatorsManager(new TestAppenderatorsManager()) .taskLogPusher(null) @@ -489,7 +489,7 @@ public ListenableFuture run(Task task) .create(task.getContextValue(Tasks.STORE_EMPTY_COLUMNS_KEY, true))) .taskReportFileWriter(new SingleFileTaskReportFileWriter(taskReportsFile)) .authorizerMapper(AuthTestUtils.TEST_AUTHORIZER_MAPPER) - .chatHandlerProvider(new NoopChatHandlerProvider()) + .chatHandlerProvider(new ChatHandlerProvider()) .rowIngestionMetersFactory(testUtils.getRowIngestionMetersFactory()) .appenderatorsManager(new TestAppenderatorsManager()) .taskLogPusher(null) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java index 1790c0d36bb5..c4c2baa79997 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java @@ -257,7 +257,7 @@ List querySegment(DataSegment dataSegment, List columns private Segment loadSegment(DataSegment dataSegment, File tempSegmentDir) { final SegmentCacheManager cacheManager = new SegmentCacheManagerFactory(TestIndex.INDEX_IO, getObjectMapper()) - .manufacturate(tempSegmentDir, false); + .manufacturate(tempSegmentDir, null, false); try { cacheManager.load(dataSegment); return cacheManager.acquireCachedSegment(dataSegment.getId()).orElseThrow(); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index 236d067e53d2..e147d032ad54 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -96,7 +96,6 @@ import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.ChatHandlerProvider; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.server.DruidNode; @@ -628,7 +627,7 @@ public void prepareObjectMapper(ObjectMapper objectMapper, IndexIO indexIO) .addValue(ExprMacroTable.class, LookupEnabledTestExprMacroTable.INSTANCE) .addValue(IndexIO.class, indexIO) .addValue(ObjectMapper.class, objectMapper) - .addValue(ChatHandlerProvider.class, new NoopChatHandlerProvider()) + .addValue(ChatHandlerProvider.class, new ChatHandlerProvider()) .addValue(AuthConfig.class, new AuthConfig()) .addValue(AuthorizerMapper.class, null) .addValue(RowIngestionMetersFactory.class, new DropwizardRowIngestionMetersFactory()) @@ -685,7 +684,7 @@ public File getStorageDirectory() .taskReportFileWriter(new SingleFileTaskReportFileWriter(reportsFile)) .policyEnforcer(NoopPolicyEnforcer.instance()) .authorizerMapper(AuthTestUtils.TEST_AUTHORIZER_MAPPER) - .chatHandlerProvider(new NoopChatHandlerProvider()) + .chatHandlerProvider(new ChatHandlerProvider()) .rowIngestionMetersFactory(new TestUtils().getRowIngestionMetersFactory()) .appenderatorsManager(new TestAppenderatorsManager()) .overlordClient(indexingServiceClient) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/OverlordBlinkLeadershipTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/OverlordBlinkLeadershipTest.java deleted file mode 100644 index af3b81977998..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/OverlordBlinkLeadershipTest.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.google.common.base.Supplier; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningSchedulerConfig; -import org.apache.druid.indexing.overlord.autoscaling.SimpleWorkerProvisioningConfig; -import org.apache.druid.indexing.overlord.autoscaling.SimpleWorkerProvisioningStrategy; -import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; -import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; -import org.joda.time.Period; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -public class OverlordBlinkLeadershipTest -{ - private RemoteTaskRunnerTestUtils rtrUtils; - private final TestRemoteTaskRunnerConfig remoteTaskRunnerConfig = new TestRemoteTaskRunnerConfig(new Period("PT5M")); - private final DefaultWorkerBehaviorConfig defaultWorkerBehaviourConfig = DefaultWorkerBehaviorConfig.defaultConfig(); - private final Supplier workerBehaviorConfigSupplier = new Supplier<>() - { - @Override - public DefaultWorkerBehaviorConfig get() - { - return defaultWorkerBehaviourConfig; - } - }; - private final SimpleWorkerProvisioningStrategy resourceManagement = new SimpleWorkerProvisioningStrategy( - new SimpleWorkerProvisioningConfig(), - workerBehaviorConfigSupplier, - new ProvisioningSchedulerConfig() - ); - - @Before - public void setUp() throws Exception - { - rtrUtils = new RemoteTaskRunnerTestUtils(); - rtrUtils.setUp(); - } - - @After - public void tearDown() throws Exception - { - rtrUtils.tearDown(); - } - - /** - * Test that we can start taskRunner, then stop it (emulating "losing leadership", see {@link - * TaskMaster#stopBeingLeader()}), then creating a new taskRunner from {@link - * org.apache.curator.framework.recipes.leader.LeaderSelectorListener#takeLeadership} implementation in - * {@link TaskMaster} and start it again. - */ - @Test(timeout = 60_000L) - public void testOverlordBlinkLeadership() - { - try { - RemoteTaskRunner remoteTaskRunner1 = rtrUtils.makeRemoteTaskRunner( - remoteTaskRunnerConfig, - resourceManagement, - null - ); - remoteTaskRunner1.stop(); - RemoteTaskRunner remoteTaskRunner2 = rtrUtils.makeRemoteTaskRunner( - remoteTaskRunnerConfig, - resourceManagement, - null - ); - remoteTaskRunner2.stop(); - } - catch (Exception e) { - Assert.fail("Should have not thrown any exceptions, thrown: " + e); - } - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactoryTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactoryTest.java deleted file mode 100644 index f62c86b14c1f..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactoryTest.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import org.apache.curator.framework.CuratorFramework; -import org.apache.druid.indexing.overlord.autoscaling.NoopProvisioningStrategy; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningSchedulerConfig; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; -import org.junit.Assert; -import org.junit.Test; -import org.mockito.Mockito; - -public class RemoteTaskRunnerFactoryTest -{ - @Test - public void testBuildWithAutoScale() - { - ProvisioningSchedulerConfig provisioningSchedulerConfig = Mockito.mock(ProvisioningSchedulerConfig.class); - Mockito.when(provisioningSchedulerConfig.isDoAutoscale()).thenReturn(true); - - RemoteTaskRunnerFactory remoteTaskRunnerFactory = getTestRemoteTaskRunnerFactory(provisioningSchedulerConfig); - - Assert.assertNull(remoteTaskRunnerFactory.build().getProvisioningStrategy()); - } - - @Test - public void testBuildWithoutAutoScale() - { - ProvisioningSchedulerConfig provisioningSchedulerConfig = Mockito.mock(ProvisioningSchedulerConfig.class); - Mockito.when(provisioningSchedulerConfig.isDoAutoscale()).thenReturn(false); - - RemoteTaskRunnerFactory remoteTaskRunnerFactory = getTestRemoteTaskRunnerFactory(provisioningSchedulerConfig); - - Assert.assertTrue(remoteTaskRunnerFactory.build().getProvisioningStrategy() instanceof NoopProvisioningStrategy); - } - - private RemoteTaskRunnerFactory getTestRemoteTaskRunnerFactory(ProvisioningSchedulerConfig provisioningSchedulerConfig) - { - CuratorFramework curator = Mockito.mock(CuratorFramework.class); - Mockito.when(curator.newWatcherRemoveCuratorFramework()).thenReturn(null); - return new RemoteTaskRunnerFactory( - curator, - new RemoteTaskRunnerConfig(), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), - null, - null, - null, - provisioningSchedulerConfig, - null, - null - ); - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerRunPendingTasksConcurrencyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerRunPendingTasksConcurrencyTest.java deleted file mode 100644 index 62c18d7bd232..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerRunPendingTasksConcurrencyTest.java +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.google.common.util.concurrent.ListenableFuture; -import org.apache.druid.indexer.TaskState; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.TestTasks; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.java.util.common.ISE; -import org.apache.zookeeper.ZooKeeper; -import org.joda.time.Period; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -/** - */ -public class RemoteTaskRunnerRunPendingTasksConcurrencyTest -{ - private RemoteTaskRunner remoteTaskRunner; - private final RemoteTaskRunnerTestUtils rtrTestUtils = new RemoteTaskRunnerTestUtils(); - - @Before - public void setUp() throws Exception - { - rtrTestUtils.setUp(); - } - - @After - public void tearDown() throws Exception - { - if (remoteTaskRunner != null) { - remoteTaskRunner.stop(); - } - rtrTestUtils.tearDown(); - } - - // This task reproduces the races described in https://github.com/apache/druid/issues/2842 - @Test(timeout = 60_000L) - public void testConcurrency() throws Exception - { - rtrTestUtils.makeWorker("worker0", 3); - rtrTestUtils.makeWorker("worker1", 3); - - remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner( - new TestRemoteTaskRunnerConfig(new Period("PT3600S")) - { - @Override - public int getPendingTasksRunnerNumThreads() - { - return 2; - } - }, - null - ); - - int numTasks = 6; - ListenableFuture[] results = new ListenableFuture[numTasks]; - Task[] tasks = new Task[numTasks]; - - //2 tasks - for (int i = 0; i < 2; i++) { - tasks[i] = TestTasks.unending("task" + i); - results[i] = (remoteTaskRunner.run(tasks[i])); - } - - waitForBothWorkersToHaveUnackedTasks(); - - //3 more tasks, all of which get queued up - for (int i = 2; i < 5; i++) { - tasks[i] = TestTasks.unending("task" + i); - results[i] = (remoteTaskRunner.run(tasks[i])); - } - - //simulate completion of task0 and task1 - mockWorkerRunningAndCompletionSuccessfulTasks(tasks[0], tasks[1]); - - Assert.assertEquals(TaskState.SUCCESS, results[0].get().getStatusCode()); - Assert.assertEquals(TaskState.SUCCESS, results[1].get().getStatusCode()); - - // now both threads race to run the last 3 tasks. task2 and task3 are being assigned - waitForBothWorkersToHaveUnackedTasks(); - - if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[2].getId()) - && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[3].getId())) { - remoteTaskRunner.shutdown("task4", "test"); - mockWorkerRunningAndCompletionSuccessfulTasks(tasks[3], tasks[2]); - Assert.assertEquals(TaskState.SUCCESS, results[3].get().getStatusCode()); - Assert.assertEquals(TaskState.SUCCESS, results[2].get().getStatusCode()); - } else if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[3].getId()) - && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[4].getId())) { - remoteTaskRunner.shutdown("task2", "test"); - mockWorkerRunningAndCompletionSuccessfulTasks(tasks[4], tasks[3]); - Assert.assertEquals(TaskState.SUCCESS, results[4].get().getStatusCode()); - Assert.assertEquals(TaskState.SUCCESS, results[3].get().getStatusCode()); - } else if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[4].getId()) - && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[2].getId())) { - remoteTaskRunner.shutdown("task3", "test"); - mockWorkerRunningAndCompletionSuccessfulTasks(tasks[4], tasks[2]); - Assert.assertEquals(TaskState.SUCCESS, results[4].get().getStatusCode()); - Assert.assertEquals(TaskState.SUCCESS, results[2].get().getStatusCode()); - } else { - throw new ISE("two out of three tasks 2,3 and 4 must be waiting for ack."); - } - - //ensure that RTR is doing OK and still making progress - tasks[5] = TestTasks.unending("task5"); - results[5] = remoteTaskRunner.run(tasks[5]); - waitForOneWorkerToHaveUnackedTasks(); - if (rtrTestUtils.taskAssigned("worker0", tasks[5].getId())) { - rtrTestUtils.mockWorkerRunningTask("worker0", tasks[5]); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker0", tasks[5]); - } else { - rtrTestUtils.mockWorkerRunningTask("worker1", tasks[5]); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker1", tasks[5]); - } - Assert.assertEquals(TaskState.SUCCESS, results[5].get().getStatusCode()); - } - - private void mockWorkerRunningAndCompletionSuccessfulTasks(Task t1, Task t2) throws Exception - { - if (rtrTestUtils.taskAssigned("worker0", t1.getId())) { - rtrTestUtils.mockWorkerRunningTask("worker0", t1); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker0", t1); - rtrTestUtils.mockWorkerRunningTask("worker1", t2); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker1", t2); - } else { - rtrTestUtils.mockWorkerRunningTask("worker1", t1); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker1", t1); - rtrTestUtils.mockWorkerRunningTask("worker0", t2); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker0", t2); - } - } - - private void waitForOneWorkerToHaveUnackedTasks() throws Exception - { - while (remoteTaskRunner.getWorkersWithUnacknowledgedTask().size() < 1) { - Thread.sleep(5); - } - - ZooKeeper zk = rtrTestUtils.getCuratorFramework().getZookeeperClient().getZooKeeper(); - while (zk.getChildren(RemoteTaskRunnerTestUtils.TASKS_PATH + "/worker0", false).size() < 1 - && zk.getChildren(RemoteTaskRunnerTestUtils.TASKS_PATH + "/worker1", false).size() < 1) { - Thread.sleep(5); - } - } - - private void waitForBothWorkersToHaveUnackedTasks() throws Exception - { - while (remoteTaskRunner.getWorkersWithUnacknowledgedTask().size() < 2) { - Thread.sleep(5); - } - - ZooKeeper zk = rtrTestUtils.getCuratorFramework().getZookeeperClient().getZooKeeper(); - while (zk.getChildren(RemoteTaskRunnerTestUtils.TASKS_PATH + "/worker0", false).size() < 1 - || zk.getChildren(RemoteTaskRunnerTestUtils.TASKS_PATH + "/worker1", false).size() < 1) { - Thread.sleep(5); - } - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java deleted file mode 100644 index 2a94d4f21ed1..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java +++ /dev/null @@ -1,1303 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Function; -import com.google.common.base.Joiner; -import com.google.common.base.Optional; -import com.google.common.base.Predicate; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.common.io.ByteStreams; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.api.CuratorWatcher; -import org.apache.curator.framework.recipes.cache.PathChildrenCache; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskState; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.IndexingServiceCondition; -import org.apache.druid.indexing.common.TaskLockType; -import org.apache.druid.indexing.common.TestIndexTask; -import org.apache.druid.indexing.common.TestTasks; -import org.apache.druid.indexing.common.TestUtils; -import org.apache.druid.indexing.common.actions.SegmentTransactionalAppendAction; -import org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction; -import org.apache.druid.indexing.common.actions.SegmentTransactionalReplaceAction; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.common.task.TaskResource; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; -import org.apache.druid.indexing.overlord.setup.EqualDistributionWorkerSelectStrategy; -import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.java.util.http.client.HttpClient; -import org.apache.druid.java.util.http.client.Request; -import org.apache.druid.java.util.http.client.response.InputStreamFullResponseHolder; -import org.apache.druid.server.metrics.NoopServiceEmitter; -import org.apache.druid.testing.DeadlockDetectingTimeout; -import org.apache.zookeeper.Watcher; -import org.easymock.Capture; -import org.easymock.EasyMock; -import org.jboss.netty.handler.codec.http.DefaultHttpResponse; -import org.jboss.netty.handler.codec.http.HttpResponseStatus; -import org.jboss.netty.handler.codec.http.HttpVersion; -import org.joda.time.Period; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TestRule; -import org.junit.rules.TestWatcher; -import org.junit.runner.Description; -import org.mockito.Mockito; - -import java.io.ByteArrayOutputStream; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; - -public class RemoteTaskRunnerTest -{ - private static final Logger LOG = new Logger(RemoteTaskRunnerTest.class); - private static final Joiner JOINER = RemoteTaskRunnerTestUtils.JOINER; - private static final String WORKER_HOST = "worker"; - private static final String ANNOUCEMENTS_PATH = JOINER.join( - RemoteTaskRunnerTestUtils.ANNOUNCEMENTS_PATH, - WORKER_HOST - ); - private static final String STATUS_PATH = JOINER.join(RemoteTaskRunnerTestUtils.STATUS_PATH, WORKER_HOST); - - // higher timeout to reduce flakiness on CI pipeline - private static final Period TIMEOUT_PERIOD = Period.millis(30000); - - private RemoteTaskRunner remoteTaskRunner; - private HttpClient httpClient; - private RemoteTaskRunnerTestUtils rtrTestUtils = new RemoteTaskRunnerTestUtils(); - private ObjectMapper jsonMapper; - private CuratorFramework cf; - - private Task task; - private Worker worker; - - @Rule - public TestRule watcher = new TestWatcher() - { - @Override - protected void starting(Description description) - { - LOG.info("Starting test: " + description.getMethodName()); - } - - @Override - protected void finished(Description description) - { - LOG.info("Finishing test: " + description.getMethodName()); - } - }; - - @Rule - public final TestRule timeout = new DeadlockDetectingTimeout(60, TimeUnit.SECONDS); - - @Before - public void setUp() throws Exception - { - rtrTestUtils.setUp(); - jsonMapper = rtrTestUtils.getObjectMapper(); - cf = rtrTestUtils.getCuratorFramework(); - - task = TestTasks.unending("task id with spaces"); - EmittingLogger.registerEmitter(new NoopServiceEmitter()); - } - - @After - public void tearDown() throws Exception - { - if (remoteTaskRunner != null) { - remoteTaskRunner.stop(); - } - rtrTestUtils.tearDown(); - } - - @Test - public void testRun() throws Exception - { - doSetup(); - - Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(3, remoteTaskRunner.getIdleTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(0, remoteTaskRunner.getUsedTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(3, remoteTaskRunner.getTotalCapacity()); - Assert.assertEquals(-1, remoteTaskRunner.getMaximumCapacityWithAutoscale()); - Assert.assertEquals(0, remoteTaskRunner.getUsedCapacity()); - - - ListenableFuture result = remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - - cf.delete().guaranteed().forPath(JOINER.join(STATUS_PATH, task.getId())); - - Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(3, remoteTaskRunner.getIdleTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(0, remoteTaskRunner.getUsedTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(3, remoteTaskRunner.getTotalCapacity()); - Assert.assertEquals(0, remoteTaskRunner.getUsedCapacity()); - } - - @Test - public void testRunTaskThatAlreadyPending() throws Exception - { - doSetup(); - remoteTaskRunner.addPendingTask(task); - remoteTaskRunner.runPendingTasks(); - Assert.assertFalse(workerRunningTask(task.getId())); - - ListenableFuture result = remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - } - - @Test - public void testStartWithNoWorker() - { - makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD)); - } - - @Test - public void testRunExistingTaskThatHasntStartedRunning() throws Exception - { - doSetup(); - - remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - - ListenableFuture result = remoteTaskRunner.run(task); - - Assert.assertFalse(result.isDone()); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - } - - @Test - public void testRunExistingTaskThatHasStartedRunning() throws Exception - { - doSetup(); - - remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - - ListenableFuture result = remoteTaskRunner.run(task); - - Assert.assertFalse(result.isDone()); - - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - } - - @Test - public void testRunTooMuchZKData() throws Exception - { - ServiceEmitter emitter = EasyMock.createMock(ServiceEmitter.class); - EmittingLogger.registerEmitter(emitter); - EasyMock.replay(emitter); - - doSetup(); - - remoteTaskRunner.run(TestTasks.unending(new String(new char[5000]))); - - EasyMock.verify(emitter); - } - - @Test - public void testRunSameAvailabilityGroup() throws Exception - { - doSetup(); - - TestIndexTask task1 = new TestIndexTask( - "rt1", - new TaskResource("rt1", 1), - "foo", - TaskStatus.running("rt1"), - jsonMapper - ); - remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - - TestIndexTask task2 = new TestIndexTask( - "rt2", - new TaskResource("rt1", 1), - "foo", - TaskStatus.running("rt2"), - jsonMapper - ); - remoteTaskRunner.run(task2); - - TestIndexTask task3 = new TestIndexTask( - "rt3", - new TaskResource("rt2", 1), - "foo", - TaskStatus.running("rt3"), - jsonMapper - ); - remoteTaskRunner.run(task3); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getRunningTasks().size() == 2; - } - } - ) - ); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getPendingTasks().size() == 1; - } - } - ) - ); - - Assert.assertTrue(remoteTaskRunner.getPendingTasks().iterator().next().getTaskId().equals("rt2")); - } - - @Test - public void testRunWithCapacity() throws Exception - { - doSetup(); - - TestIndexTask task1 = new TestIndexTask( - "rt1", - new TaskResource("rt1", 1), - "foo", - TaskStatus.running("rt1"), - jsonMapper - ); - remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - - TestIndexTask task2 = new TestIndexTask( - "rt2", - new TaskResource("rt2", 3), - "foo", - TaskStatus.running("rt2"), - jsonMapper - ); - remoteTaskRunner.run(task2); - - TestIndexTask task3 = new TestIndexTask( - "rt3", - new TaskResource("rt3", 2), - "foo", - TaskStatus.running("rt3"), - jsonMapper - ); - remoteTaskRunner.run(task3); - Assert.assertTrue(taskAnnounced(task3.getId())); - mockWorkerRunningTask(task3); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getRunningTasks().size() == 2; - } - } - ) - ); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getPendingTasks().size() == 1; - } - } - ) - ); - - Assert.assertTrue(remoteTaskRunner.getPendingTasks().iterator().next().getTaskId().equals("rt2")); - } - - @Test - public void testStatusRemoved() throws Exception - { - doSetup(); - CountDownLatch deletionLatch = new CountDownLatch(1); - ListenableFuture future = remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - - Assert.assertTrue(workerRunningTask(task.getId())); - - Assert.assertTrue(remoteTaskRunner.getRunningTasks().iterator().next().getTaskId().equals(task.getId())); - - String taskStatusPath = JOINER.join(STATUS_PATH, task.getId()); - cf.checkExists().usingWatcher((CuratorWatcher) event -> { - if (event.getType() == Watcher.Event.EventType.NodeDeleted) { - deletionLatch.countDown(); - } - }).forPath(taskStatusPath); - - cf.delete().forPath(taskStatusPath); - - Assert.assertTrue("Deletion event not received", deletionLatch.await(5, TimeUnit.SECONDS)); - - TaskStatus status = future.get(); - - Assert.assertEquals(status.getStatusCode(), TaskState.FAILED); - Assert.assertNotNull(status.getErrorMsg()); - Assert.assertTrue(status.getErrorMsg().contains("The worker that this task was assigned disappeared")); - } - - @Test - public void testBootstrap() throws Exception - { - makeWorker(); - - RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD); - rtrConfig.setMaxPercentageBlacklistWorkers(100); - - makeRemoteTaskRunner(rtrConfig); - - TestIndexTask task1 = new TestIndexTask( - "first", - new TaskResource("first", 1), - "foo", - TaskStatus.running("first"), - jsonMapper - ); - remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - - TestIndexTask task = new TestIndexTask( - "second", - new TaskResource("task", 2), - "foo", - TaskStatus.running("task"), - jsonMapper - ); - remoteTaskRunner.run(task); - - TestIndexTask task2 = new TestIndexTask( - "second", - new TaskResource("second", 2), - "foo", - TaskStatus.running("second"), - jsonMapper - ); - remoteTaskRunner.run(task2); - Assert.assertTrue(taskAnnounced(task2.getId())); - mockWorkerRunningTask(task2); - - final Set runningTasks = Sets.newHashSet( - Iterables.transform( - remoteTaskRunner.getRunningTasks(), - new Function<>() - { - @Override - public String apply(RemoteTaskRunnerWorkItem input) - { - return input.getTaskId(); - } - } - ) - ); - Assert.assertEquals("runningTasks", ImmutableSet.of("first", "second"), runningTasks); - } - - @Test - public void testRunWithTaskComplete() throws Exception - { - doSetup(); - TestIndexTask task1 = new TestIndexTask( - "testTask", - new TaskResource("testTask", 2), - "foo", - TaskStatus.success("testTask"), - jsonMapper - ); - remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - mockWorkerCompleteSuccessfulTask(task1); - - Assert.assertEquals(TaskState.SUCCESS, remoteTaskRunner.run(task1).get().getStatusCode()); - } - - @Test - public void testWorkerRemoved() throws Exception - { - doSetup(); - Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(3, remoteTaskRunner.getIdleTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - - Future future = remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - - Assert.assertTrue(workerRunningTask(task.getId())); - - cf.delete().forPath(ANNOUCEMENTS_PATH); - - TaskStatus status = future.get(); - - Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); - Assert.assertNotNull(status.getErrorMsg()); - Assert.assertTrue(status.getErrorMsg().contains("Canceled for worker cleanup")); - RemoteTaskRunnerConfig config = remoteTaskRunner.getRemoteTaskRunnerConfig(); - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getRemovedWorkerCleanups().isEmpty(); - } - }, - // cleanup task is independently scheduled by event listener. we need to wait some more time. - config.getTaskCleanupTimeout().toStandardDuration().getMillis() * 2 - ) - ); - Assert.assertNull(cf.checkExists().forPath(STATUS_PATH)); - - Assert.assertFalse(remoteTaskRunner.getTotalTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - Assert.assertFalse(remoteTaskRunner.getIdleTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - } - - @Test - public void testWorkerDisabled() throws Exception - { - doSetup(); - final ListenableFuture result = remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - - // Disable while task running - disableWorker(); - - // Continue test - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - - // Confirm RTR thinks the worker is disabled. - Assert.assertTrue(Iterables.getOnlyElement(remoteTaskRunner.getWorkers()).getWorker().isDisabled()); - } - - @Test - public void testRestartRemoteTaskRunner() throws Exception - { - doSetup(); - remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - - remoteTaskRunner.stop(); - makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD)); - final RemoteTaskRunnerWorkItem newWorkItem = remoteTaskRunner - .getKnownTasks() - .stream() - .filter(workItem -> workItem.getTaskId().equals(task.getId())) - .findFirst() - .orElse(null); - final ListenableFuture result = newWorkItem.getResult(); - - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - } - - @Test - public void testRunPendingTaskFailToAssignTask() throws Exception - { - doSetup(); - Thread.sleep(100); - RemoteTaskRunnerWorkItem originalItem = remoteTaskRunner.addPendingTask(task); - // modify taskId to make task assignment failed - RemoteTaskRunnerWorkItem wankyItem = Mockito.mock(RemoteTaskRunnerWorkItem.class); - Mockito.when(wankyItem.getTaskId()).thenReturn(originalItem.getTaskId()).thenReturn("wrongId"); - remoteTaskRunner.runPendingTask(wankyItem); - TaskStatus taskStatus = originalItem.getResult().get(0, TimeUnit.MILLISECONDS); - Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode()); - Assert.assertEquals( - "Failed to assign this task. See overlord logs for more details.", - taskStatus.getErrorMsg() - ); - } - - @Test - public void testRunPendingTaskTimeoutToAssign() throws Exception - { - makeWorker(); - makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD)); - RemoteTaskRunnerWorkItem workItem = remoteTaskRunner.addPendingTask(task); - remoteTaskRunner.runPendingTask(workItem); - TaskStatus taskStatus = workItem.getResult().get(0, TimeUnit.MILLISECONDS); - Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode()); - Assert.assertNotNull(taskStatus.getErrorMsg()); - Assert.assertTrue( - taskStatus.getErrorMsg().startsWith("The worker that this task is assigned did not start it in timeout") - ); - } - - @Test - public void testGetMaximumCapacity_noWorkerConfig() - { - httpClient = EasyMock.createMock(HttpClient.class); - remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner( - new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD), - new TestProvisioningStrategy<>(), - httpClient, - null - ); - Assert.assertEquals(-1, remoteTaskRunner.getMaximumCapacityWithAutoscale()); - } - - @Test - public void testGetMaximumCapacity_noAutoScaler() - { - httpClient = EasyMock.createMock(HttpClient.class); - remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner( - new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD), - new TestProvisioningStrategy<>(), - httpClient, - new DefaultWorkerBehaviorConfig(new EqualDistributionWorkerSelectStrategy(null, null), null) - ); - Assert.assertEquals(-1, remoteTaskRunner.getMaximumCapacityWithAutoscale()); - } - - @Test - public void testGetMaximumCapacity_withAutoScaler() - { - httpClient = EasyMock.createMock(HttpClient.class); - remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner( - new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD), - new TestProvisioningStrategy<>(), - httpClient, - DefaultWorkerBehaviorConfig.defaultConfig() - ); - // Default autoscaler has max workers of 0 - Assert.assertEquals(0, remoteTaskRunner.getMaximumCapacityWithAutoscale()); - } - - private void doSetup() throws Exception - { - makeWorker(); - makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD)); - } - - private void makeRemoteTaskRunner(RemoteTaskRunnerConfig config) - { - httpClient = EasyMock.createMock(HttpClient.class); - remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner(config, httpClient); - } - - private void makeWorker() throws Exception - { - worker = rtrTestUtils.makeWorker(WORKER_HOST, 3); - } - - private void disableWorker() throws Exception - { - rtrTestUtils.disableWorker(worker); - } - - private boolean taskAnnounced(final String taskId) - { - return rtrTestUtils.taskAssigned(WORKER_HOST, taskId); - } - - private boolean workerRunningTask(final String taskId) - { - return rtrTestUtils.workerRunningTask(WORKER_HOST, taskId); - } - - private boolean workerCompletedTask(final ListenableFuture result) - { - return TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return result.isDone(); - } - } - ); - } - - private void mockWorkerRunningTask(final Task task) throws Exception - { - rtrTestUtils.mockWorkerRunningTask("worker", task); - } - - private void mockWorkerCompleteSuccessfulTask(final Task task) throws Exception - { - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker", task); - } - - private void mockWorkerCompleteFailedTask(final Task task) throws Exception - { - rtrTestUtils.mockWorkerCompleteFailedTask("worker", task); - } - - @Test - public void testFindLazyWorkerTaskRunning() throws Exception - { - doSetup(); - remoteTaskRunner.start(); - remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Collection lazyworkers = remoteTaskRunner.markWorkersLazy( - new Predicate<>() - { - @Override - public boolean apply(ImmutableWorkerInfo input) - { - return true; - } - }, 1 - ); - Assert.assertTrue(lazyworkers.isEmpty()); - Assert.assertTrue(remoteTaskRunner.getLazyWorkers().isEmpty()); - Assert.assertEquals(1, remoteTaskRunner.getWorkers().size()); - } - - @Test - public void testFindLazyWorkerForWorkerJustAssignedTask() throws Exception - { - doSetup(); - remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - Collection lazyworkers = remoteTaskRunner.markWorkersLazy( - new Predicate<>() - { - @Override - public boolean apply(ImmutableWorkerInfo input) - { - return true; - } - }, 1 - ); - Assert.assertTrue(lazyworkers.isEmpty()); - Assert.assertTrue(remoteTaskRunner.getLazyWorkers().isEmpty()); - Assert.assertEquals(1, remoteTaskRunner.getWorkers().size()); - } - - @Test - public void testFindLazyWorkerNotRunningAnyTask() throws Exception - { - doSetup(); - Collection lazyworkers = remoteTaskRunner.markWorkersLazy( - new Predicate<>() - { - @Override - public boolean apply(ImmutableWorkerInfo input) - { - return true; - } - }, 1 - ); - Assert.assertEquals(1, lazyworkers.size()); - Assert.assertEquals(1, remoteTaskRunner.getLazyWorkers().size()); - Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertFalse(remoteTaskRunner.getIdleTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - Assert.assertEquals(3, remoteTaskRunner.getLazyTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - } - - @Test - public void testFindLazyWorkerNotRunningAnyTaskButWithZeroMaxWorkers() throws Exception - { - doSetup(); - Collection lazyworkers = remoteTaskRunner.markWorkersLazy( - new Predicate<>() - { - @Override - public boolean apply(ImmutableWorkerInfo input) - { - return true; - } - }, 0 - ); - Assert.assertEquals(0, lazyworkers.size()); - Assert.assertEquals(0, remoteTaskRunner.getLazyWorkers().size()); - } - - @Test - public void testWorkerZKReconnect() throws Exception - { - makeWorker(); - makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(new Period("PT5M"))); - Future future = remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - - Assert.assertTrue(workerRunningTask(task.getId())); - byte[] bytes = cf.getData().forPath(ANNOUCEMENTS_PATH); - cf.delete().forPath(ANNOUCEMENTS_PATH); - // worker task cleanup scheduled - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getRemovedWorkerCleanups().containsKey(worker.getHost()); - } - } - ) - ); - - // Worker got reconnected - cf.create().forPath(ANNOUCEMENTS_PATH, bytes); - - // worker task cleanup should get cancelled and removed - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return !remoteTaskRunner.getRemovedWorkerCleanups().containsKey(worker.getHost()); - } - } - ) - ); - - mockWorkerCompleteSuccessfulTask(task); - TaskStatus status = future.get(); - Assert.assertEquals(status.getStatusCode(), TaskState.SUCCESS); - Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode()); - } - - @Test - public void testSortByInsertionTime() - { - RemoteTaskRunnerWorkItem item1 = new RemoteTaskRunnerWorkItem("b", "t", null, null, "ds_test") - .withQueueInsertionTime(DateTimes.of("2015-01-01T00:00:03Z")); - RemoteTaskRunnerWorkItem item2 = new RemoteTaskRunnerWorkItem("a", "t", null, null, "ds_test") - .withQueueInsertionTime(DateTimes.of("2015-01-01T00:00:02Z")); - RemoteTaskRunnerWorkItem item3 = new RemoteTaskRunnerWorkItem("c", "t", null, null, "ds_test") - .withQueueInsertionTime(DateTimes.of("2015-01-01T00:00:01Z")); - ArrayList workItems = Lists.newArrayList(item1, item2, item3); - RemoteTaskRunner.sortByInsertionTime(workItems); - Assert.assertEquals(item3, workItems.get(0)); - Assert.assertEquals(item2, workItems.get(1)); - Assert.assertEquals(item1, workItems.get(2)); - } - - @Test - public void testBlacklistZKWorkers() throws Exception - { - makeWorker(); - - RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD); - rtrConfig.setMaxPercentageBlacklistWorkers(100); - - makeRemoteTaskRunner(rtrConfig); - - TestIndexTask task1 = new TestIndexTask( - "test_index1", - new TaskResource("test_index1", 1), - "foo", - TaskStatus.success("test_index1"), - jsonMapper - ); - Future taskFuture1 = remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - mockWorkerCompleteFailedTask(task1); - Assert.assertTrue(taskFuture1.get().isFailure()); - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - 1, - remoteTaskRunner.findWorkerRunningTask(task1.getId()).getContinuouslyFailedTasksCount() - ); - - TestIndexTask task2 = new TestIndexTask( - "test_index2", - new TaskResource("test_index2", 1), - "foo", - TaskStatus.running("test_index2"), - jsonMapper - ); - Future taskFuture2 = remoteTaskRunner.run(task2); - Assert.assertTrue(taskAnnounced(task2.getId())); - mockWorkerRunningTask(task2); - mockWorkerCompleteFailedTask(task2); - Assert.assertTrue(taskFuture2.get().isFailure()); - Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - 2, - remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount() - ); - - ((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner) - .setCurrentTimeMillis(System.currentTimeMillis()); - remoteTaskRunner.checkBlackListedNodes(); - - Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size()); - - ((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner) - .setCurrentTimeMillis(System.currentTimeMillis() + 2 * TIMEOUT_PERIOD.toStandardDuration().getMillis()); - remoteTaskRunner.checkBlackListedNodes(); - - // After backOffTime the nodes are removed from blacklist - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - 0, - remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount() - ); - - TestIndexTask task3 = new TestIndexTask( - "test_index3", - new TaskResource("test_index3", 1), - "foo", - TaskStatus.running("test_index3"), - jsonMapper - ); - Future taskFuture3 = remoteTaskRunner.run(task3); - Assert.assertTrue(taskAnnounced(task3.getId())); - mockWorkerRunningTask(task3); - mockWorkerCompleteSuccessfulTask(task3); - Assert.assertTrue(taskFuture3.get().isSuccess()); - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - 0, - remoteTaskRunner.findWorkerRunningTask(task3.getId()).getContinuouslyFailedTasksCount() - ); - } - - /** - * With 2 workers and maxPercentageBlacklistWorkers(25), no worker should be blacklisted even after exceeding - * maxRetriesBeforeBlacklist. - */ - @Test - public void testBlacklistZKWorkers25Percent() throws Exception - { - rtrTestUtils.makeWorker("worker", 10); - rtrTestUtils.makeWorker("worker2", 10); - - RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD); - rtrConfig.setMaxPercentageBlacklistWorkers(25); - - makeRemoteTaskRunner(rtrConfig); - - String assignedWorker = null; - - for (int i = 1; i < 13; i++) { - String taskId = StringUtils.format("rt-%d", i); - TestIndexTask task = new TestIndexTask( - taskId, - new TaskResource(taskId, 1), - "foo", - TaskStatus.success(taskId), - jsonMapper - ); - - Future taskFuture = remoteTaskRunner.run(task); - - if (i == 1) { - if (rtrTestUtils.taskAssigned("worker2", task.getId())) { - assignedWorker = "worker2"; - } else { - assignedWorker = "worker"; - } - } - - Assert.assertTrue(rtrTestUtils.taskAssigned(assignedWorker, task.getId())); - rtrTestUtils.mockWorkerRunningTask(assignedWorker, task); - rtrTestUtils.mockWorkerCompleteFailedTask(assignedWorker, task); - - Assert.assertTrue(taskFuture.get().isFailure()); - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - i, - remoteTaskRunner.findWorkerId("worker").getContinuouslyFailedTasksCount() - + remoteTaskRunner.findWorkerId("worker2").getContinuouslyFailedTasksCount() - ); - } - } - - /** - * With 2 workers and maxPercentageBlacklistWorkers(50), one worker should get blacklisted after the second failure - * and the second worker should never be blacklisted even after exceeding maxRetriesBeforeBlacklist. - */ - @Test - public void testBlacklistZKWorkers50Percent() throws Exception - { - rtrTestUtils.makeWorker("worker", 10); - rtrTestUtils.makeWorker("worker2", 10); - - RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD); - rtrConfig.setMaxPercentageBlacklistWorkers(50); - - makeRemoteTaskRunner(rtrConfig); - - String firstWorker = null; - String secondWorker = null; - - for (int i = 1; i < 13; i++) { - String taskId = StringUtils.format("rt-%d", i); - TestIndexTask task = new TestIndexTask( - taskId, - new TaskResource(taskId, 1), - "foo", - TaskStatus.success(taskId), - jsonMapper - ); - - Future taskFuture = remoteTaskRunner.run(task); - - if (i == 1) { - if (rtrTestUtils.taskAssigned("worker2", task.getId())) { - firstWorker = "worker2"; - secondWorker = "worker"; - } else { - firstWorker = "worker"; - secondWorker = "worker2"; - } - } - - final String expectedWorker = i > 2 ? secondWorker : firstWorker; - - Assert.assertTrue( - StringUtils.format("Task[%s] assigned to worker[%s]", i, expectedWorker), - rtrTestUtils.taskAssigned(expectedWorker, task.getId()) - ); - rtrTestUtils.mockWorkerRunningTask(expectedWorker, task); - rtrTestUtils.mockWorkerCompleteFailedTask(expectedWorker, task); - - Assert.assertTrue(taskFuture.get().isFailure()); - Assert.assertEquals( - StringUtils.format("Blacklisted workers after task[%s]", i), - i >= 2 ? 1 : 0, - remoteTaskRunner.getBlackListedWorkers().size() - ); - Assert.assertEquals( - StringUtils.format("Continuously failed tasks after task[%s]", i), - i, - remoteTaskRunner.findWorkerId("worker").getContinuouslyFailedTasksCount() - + remoteTaskRunner.findWorkerId("worker2").getContinuouslyFailedTasksCount() - ); - } - } - - @Test - public void testSuccessfulTaskOnBlacklistedWorker() throws Exception - { - makeWorker(); - - RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD); - rtrConfig.setMaxPercentageBlacklistWorkers(100); - - makeRemoteTaskRunner(rtrConfig); - - TestIndexTask task1 = new TestIndexTask( - "test_index1", new TaskResource("test_index1", 1), "foo", TaskStatus.success("test_index1"), jsonMapper - ); - TestIndexTask task2 = new TestIndexTask( - "test_index2", new TaskResource("test_index2", 1), "foo", TaskStatus.success("test_index2"), jsonMapper - ); - TestIndexTask task3 = new TestIndexTask( - "test_index3", new TaskResource("test_index3", 1), "foo", TaskStatus.success("test_index3"), jsonMapper - ); - - Future taskFuture1 = remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - mockWorkerCompleteFailedTask(task1); - Assert.assertTrue(taskFuture1.get().isFailure()); - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertFalse(remoteTaskRunner.getBlacklistedTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - - Future taskFuture2 = remoteTaskRunner.run(task2); - Assert.assertTrue(taskAnnounced(task2.getId())); - mockWorkerRunningTask(task2); - Assert.assertFalse(remoteTaskRunner.getBlacklistedTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - - Future taskFuture3 = remoteTaskRunner.run(task3); - Assert.assertTrue(taskAnnounced(task3.getId())); - mockWorkerRunningTask(task3); - mockWorkerCompleteFailedTask(task3); - Assert.assertTrue(taskFuture3.get().isFailure()); - Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - 3, - remoteTaskRunner.getBlacklistedTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue() - ); - - mockWorkerCompleteSuccessfulTask(task2); - Assert.assertTrue(taskFuture2.get().isSuccess()); - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertFalse(remoteTaskRunner.getBlacklistedTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - } - - @Test - public void testStatusListenerEventDataNullShouldNotThrowException() throws Exception - { - // Set up mock emitter to verify log alert when exception is thrown inside the status listener - Worker worker = EasyMock.createMock(Worker.class); - EasyMock.expect(worker.getHost()).andReturn("host").atLeastOnce(); - EasyMock.replay(worker); - ServiceEmitter emitter = EasyMock.createMock(ServiceEmitter.class); - Capture capturedArgument = Capture.newInstance(); - emitter.emit(EasyMock.capture(capturedArgument)); - EasyMock.expectLastCall().atLeastOnce(); - EmittingLogger.registerEmitter(emitter); - EasyMock.replay(emitter); - - PathChildrenCache cache = new PathChildrenCache(cf, "/test", true); - testStartWithNoWorker(); - cache.getListenable() - .addListener(remoteTaskRunner.getStatusListener(worker, new ZkWorker(worker, cache, jsonMapper), null)); - cache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT); - - // Status listener will recieve event with null data - Assert.assertTrue( - TestUtils.conditionValid(() -> cache.getCurrentData().size() == 1) - ); - - // Verify that the log emitter was called - EasyMock.verify(worker); - EasyMock.verify(emitter); - Map alertDataMap = capturedArgument.getValue().build(null).getDataMap(); - Assert.assertTrue(alertDataMap.containsKey("znode")); - Assert.assertNull(alertDataMap.get("znode")); - // Status listener should successfully completes without throwing exception - } - - @Test - public void testStreamTaskReportsUnknownTask() throws Exception - { - doSetup(); - Assert.assertEquals(Optional.absent(), remoteTaskRunner.streamTaskReports("foo")); - } - - @Test - public void testStreamTaskReportsKnownTask() throws Exception - { - doSetup(); - final Capture capturedRequest = Capture.newInstance(); - final String reportString = "my report!"; - final InputStreamFullResponseHolder reportResponse = taskReportResponse(HttpResponseStatus.OK, reportString); - EasyMock.expect(httpClient.go(EasyMock.capture(capturedRequest), EasyMock.anyObject())) - .andReturn(Futures.immediateFuture(reportResponse)); - EasyMock.replay(httpClient); - - ListenableFuture result = remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - - // Wait for the task to have a known location. - Assert.assertTrue( - TestUtils.conditionValid( - () -> - !remoteTaskRunner.getRunningTasks().isEmpty() - && !Iterables.getOnlyElement(remoteTaskRunner.getRunningTasks()) - .getLocation() - .equals(TaskLocation.unknown()) - ) - ); - - // Stream task reports from a running task. - final InputStream in = remoteTaskRunner.streamTaskReports(task.getId()).get(); - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ByteStreams.copy(in, baos); - Assert.assertEquals(reportString, StringUtils.fromUtf8(baos.toByteArray())); - - // Stream task reports from a completed task. - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - Assert.assertEquals(Optional.absent(), remoteTaskRunner.streamTaskReports(task.getId())); - - // Verify the HTTP request. - EasyMock.verify(httpClient); - Assert.assertEquals( - "http://dummy:9000/druid/worker/v1/chat/task%20id%20with%20spaces/liveReports", - capturedRequest.getValue().getUrl().toString() - ); - } - - @Test - public void testStreamTaskReportsUnavailableFromWorker() throws Exception - { - doSetup(); - final Capture capturedRequest = Capture.newInstance(); - final InputStreamFullResponseHolder reportResponse = taskReportResponse( - HttpResponseStatus.SERVICE_UNAVAILABLE, - "{\"error\":\"Can't find chatHandler for handler[task]\"}" - ); - EasyMock.expect(httpClient.go(EasyMock.capture(capturedRequest), EasyMock.anyObject())) - .andReturn(Futures.immediateFuture(reportResponse)); - EasyMock.replay(httpClient); - - remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - - // Wait for the task to have a known location. - Assert.assertTrue( - TestUtils.conditionValid( - () -> - !remoteTaskRunner.getRunningTasks().isEmpty() - && !Iterables.getOnlyElement(remoteTaskRunner.getRunningTasks()) - .getLocation() - .equals(TaskLocation.unknown()) - ) - ); - - Assert.assertEquals(Optional.absent(), remoteTaskRunner.streamTaskReports(task.getId())); - - EasyMock.verify(httpClient); - Assert.assertEquals( - "http://dummy:9000/druid/worker/v1/chat/task%20id%20with%20spaces/liveReports", - capturedRequest.getValue().getUrl().toString() - ); - } - - @Test - public void testBuildPublishAction() - { - TestIndexTask task = new TestIndexTask( - "test_index1", - new TaskResource("test_index1", 1), - "foo", - TaskStatus.success("test_index1"), - jsonMapper - ); - - Assert.assertEquals( - SegmentTransactionalAppendAction.class, - task.buildPublishActionForTest( - Collections.emptySet(), - Collections.emptySet(), - null, - TaskLockType.APPEND - ).getClass() - ); - - Assert.assertEquals( - SegmentTransactionalReplaceAction.class, - task.buildPublishActionForTest( - Collections.emptySet(), - Collections.emptySet(), - null, - TaskLockType.REPLACE - ).getClass() - ); - - Assert.assertEquals( - SegmentTransactionalInsertAction.class, - task.buildPublishActionForTest( - Collections.emptySet(), - Collections.emptySet(), - null, - TaskLockType.EXCLUSIVE - ).getClass() - ); - } - - private static InputStreamFullResponseHolder taskReportResponse( - final HttpResponseStatus status, - final String content - ) - { - final InputStreamFullResponseHolder response = new InputStreamFullResponseHolder( - new DefaultHttpResponse(HttpVersion.HTTP_1_1, status) - ); - response.addChunk(StringUtils.toUtf8(content)); - response.done(); - return response; - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTestUtils.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTestUtils.java deleted file mode 100644 index af33b6fc9196..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTestUtils.java +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.base.Supplier; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.CuratorFrameworkFactory; -import org.apache.curator.retry.ExponentialBackoffRetry; -import org.apache.curator.test.TestingCluster; -import org.apache.druid.common.guava.DSuppliers; -import org.apache.druid.curator.PotentiallyGzippedCompressionProvider; -import org.apache.druid.curator.cache.PathChildrenCacheFactory; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.IndexingServiceCondition; -import org.apache.druid.indexing.common.TestUtils; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.overlord.autoscaling.NoopProvisioningStrategy; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningStrategy; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; -import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; -import org.apache.druid.indexing.worker.TaskAnnouncement; -import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.http.client.HttpClient; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; -import org.apache.druid.server.metrics.NoopServiceEmitter; -import org.apache.zookeeper.CreateMode; - -import java.util.concurrent.atomic.AtomicReference; - -/** - */ -public class RemoteTaskRunnerTestUtils -{ - static final Joiner JOINER = Joiner.on("/"); - static final String BASE_PATH = "/test/druid"; - static final String ANNOUNCEMENTS_PATH = StringUtils.format("%s/indexer/announcements", BASE_PATH); - static final String TASKS_PATH = StringUtils.format("%s/indexer/tasks", BASE_PATH); - static final String STATUS_PATH = StringUtils.format("%s/indexer/status", BASE_PATH); - static final TaskLocation DUMMY_LOCATION = TaskLocation.create("dummy", 9000, -1); - - private TestingCluster testingCluster; - - private CuratorFramework cf; - private ObjectMapper jsonMapper; - - RemoteTaskRunnerTestUtils() - { - TestUtils testUtils = new TestUtils(); - jsonMapper = testUtils.getTestObjectMapper(); - } - - CuratorFramework getCuratorFramework() - { - return cf; - } - - ObjectMapper getObjectMapper() - { - return jsonMapper; - } - - void setUp() throws Exception - { - testingCluster = new TestingCluster(1); - testingCluster.start(); - - cf = CuratorFrameworkFactory.builder() - .connectString(testingCluster.getConnectString()) - .retryPolicy(new ExponentialBackoffRetry(1, 10)) - .compressionProvider(new PotentiallyGzippedCompressionProvider(false)) - .build(); - cf.start(); - cf.blockUntilConnected(); - cf.create().creatingParentsIfNeeded().forPath(BASE_PATH); - cf.create().creatingParentsIfNeeded().forPath(TASKS_PATH); - } - - void tearDown() throws Exception - { - cf.close(); - testingCluster.stop(); - } - - RemoteTaskRunner makeRemoteTaskRunner(RemoteTaskRunnerConfig config, HttpClient httpClient) - { - NoopProvisioningStrategy resourceManagement = new NoopProvisioningStrategy<>(); - return makeRemoteTaskRunner(config, resourceManagement, httpClient); - } - - public RemoteTaskRunner makeRemoteTaskRunner( - RemoteTaskRunnerConfig config, - ProvisioningStrategy provisioningStrategy, - HttpClient httpClient - ) - { - return makeRemoteTaskRunner( - config, - provisioningStrategy, - httpClient, - DefaultWorkerBehaviorConfig.defaultConfig() - ); - } - - public RemoteTaskRunner makeRemoteTaskRunner( - RemoteTaskRunnerConfig config, - ProvisioningStrategy provisioningStrategy, - HttpClient httpClient, - WorkerBehaviorConfig workerBehaviorConfig - ) - { - RemoteTaskRunner remoteTaskRunner = new TestableRemoteTaskRunner( - jsonMapper, - config, - new IndexerZkConfig( - new ZkPathsConfig() - { - @Override - public String getBase() - { - return BASE_PATH; - } - }, null, null, null, null - ), - cf, - new PathChildrenCacheFactory.Builder(), - httpClient, - DSuppliers.of(new AtomicReference<>(workerBehaviorConfig)), - provisioningStrategy - ); - - remoteTaskRunner.start(); - return remoteTaskRunner; - } - - Worker makeWorker(final String workerId, final int capacity) throws Exception - { - Worker worker = new Worker( - "http", - workerId, - workerId, - capacity, - "0", - WorkerConfig.DEFAULT_CATEGORY - ); - - cf.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath( - JOINER.join(ANNOUNCEMENTS_PATH, workerId), - jsonMapper.writeValueAsBytes(worker) - ); - cf.create().creatingParentsIfNeeded().forPath(JOINER.join(TASKS_PATH, workerId)); - - return worker; - } - - void disableWorker(Worker worker) throws Exception - { - cf.setData().forPath( - JOINER.join(ANNOUNCEMENTS_PATH, worker.getHost()), - jsonMapper.writeValueAsBytes(new Worker( - worker.getScheme(), - worker.getHost(), - worker.getIp(), - worker.getCapacity(), - worker.getVersion(), - worker.getCategory(), - true - )) - ); - } - - void mockWorkerRunningTask(final String workerId, final Task task) throws Exception - { - cf.delete().forPath(JOINER.join(TASKS_PATH, workerId, task.getId())); - - final String taskStatusPath = JOINER.join(STATUS_PATH, workerId, task.getId()); - TaskAnnouncement taskAnnouncement = TaskAnnouncement.create(task, TaskStatus.running(task.getId()), DUMMY_LOCATION); - cf.create() - .creatingParentsIfNeeded() - .forPath(taskStatusPath, jsonMapper.writeValueAsBytes(taskAnnouncement)); - - Preconditions.checkNotNull( - cf.checkExists().forPath(taskStatusPath), - "Failed to write status on [%s]", - taskStatusPath - ); - } - - void mockWorkerCompleteSuccessfulTask(final String workerId, final Task task) throws Exception - { - TaskAnnouncement taskAnnouncement = TaskAnnouncement.create(task, TaskStatus.success(task.getId()), DUMMY_LOCATION); - cf.setData().forPath(JOINER.join(STATUS_PATH, workerId, task.getId()), jsonMapper.writeValueAsBytes(taskAnnouncement)); - } - - void mockWorkerCompleteFailedTask(final String workerId, final Task task) throws Exception - { - TaskAnnouncement taskAnnouncement = TaskAnnouncement.create( - task, - TaskStatus.failure( - task.getId(), - "Dummy task status failure for testing" - ), - DUMMY_LOCATION - ); - cf.setData() - .forPath(JOINER.join(STATUS_PATH, workerId, task.getId()), jsonMapper.writeValueAsBytes(taskAnnouncement)); - } - - boolean workerRunningTask(final String workerId, final String taskId) - { - return pathExists(JOINER.join(STATUS_PATH, workerId, taskId)); - } - - boolean taskAssigned(final String workerId, final String taskId) - { - return pathExists(JOINER.join(TASKS_PATH, workerId, taskId)); - } - - boolean pathExists(final String path) - { - return TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - return cf.checkExists().forPath(path) != null; - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public String toString() - { - return StringUtils.format("Path[%s] exists", path); - } - } - ); - } - - public static class TestableRemoteTaskRunner extends RemoteTaskRunner - { - private long currentTimeMillis = System.currentTimeMillis(); - - public TestableRemoteTaskRunner( - ObjectMapper jsonMapper, - RemoteTaskRunnerConfig config, - IndexerZkConfig indexerZkConfig, - CuratorFramework cf, - PathChildrenCacheFactory.Builder pathChildrenCacheFactory, - HttpClient httpClient, - Supplier workerConfigRef, - ProvisioningStrategy provisioningStrategy - ) - { - super( - jsonMapper, - config, - indexerZkConfig, - cf, - pathChildrenCacheFactory, - httpClient, - workerConfigRef, - provisioningStrategy, - new NoopServiceEmitter() - ); - } - - void setCurrentTimeMillis(long currentTimeMillis) - { - this.currentTimeMillis = currentTimeMillis; - } - - @Override - protected long getCurrentTimeMillis() - { - return currentTimeMillis; - } - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/SingleTaskBackgroundRunnerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/SingleTaskBackgroundRunnerTest.java index 51e28cfeff2a..37b2c5323b3a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/SingleTaskBackgroundRunnerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/SingleTaskBackgroundRunnerTest.java @@ -54,7 +54,7 @@ import org.apache.druid.segment.loading.NoopDataSegmentMover; import org.apache.druid.segment.loading.NoopDataSegmentPusher; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; +import org.apache.druid.segment.realtime.ChatHandlerProvider; import org.apache.druid.server.DruidNode; import org.apache.druid.server.SetAndVerifyContextQueryRunner; import org.apache.druid.server.coordination.NoopDataSegmentAnnouncer; @@ -132,7 +132,7 @@ public void setup() throws IOException new SingleFileTaskReportFileWriter(new File("fake")), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, - new NoopChatHandlerProvider(), + new ChatHandlerProvider(), utils.getRowIngestionMetersFactory(), new TestAppenderatorsManager(), new NoopOverlordClient(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java index e1e6936e6040..ef723b94ee5f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java @@ -128,7 +128,7 @@ import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig; import org.apache.druid.segment.loading.NoopDataSegmentArchiver; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; +import org.apache.druid.segment.realtime.ChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.appenderator.UnifiedIndexerAppenderatorsManager; import org.apache.druid.server.DruidNode; @@ -596,7 +596,7 @@ public void announceSegment(DataSegment segment) new NoopTestTaskReportFileWriter(), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, - new NoopChatHandlerProvider(), + new ChatHandlerProvider(), TEST_UTILS.getRowIngestionMetersFactory(), appenderatorsManager, new NoopOverlordClient(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockConfigTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockConfigTest.java index f257691fdd87..4936243c886f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockConfigTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockConfigTest.java @@ -30,6 +30,7 @@ import org.apache.druid.indexing.overlord.config.DefaultTaskConfig; import org.apache.druid.indexing.overlord.config.TaskLockConfig; import org.apache.druid.indexing.overlord.config.TaskQueueConfig; +import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner; import org.apache.druid.indexing.test.TestIndexerMetadataStorageCoordinator; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.emitter.service.ServiceEmitter; @@ -110,7 +111,7 @@ public boolean isForceTimeChunkLock() lockConfig = new TaskLockConfig(); } final TaskQueueConfig queueConfig = new TaskQueueConfig(null, null, null, null, null, null); - final TaskRunner taskRunner = EasyMock.createNiceMock(RemoteTaskRunner.class); + final TaskRunner taskRunner = EasyMock.createNiceMock(HttpRemoteTaskRunner.class); final TaskActionClientFactory actionClientFactory = EasyMock.createNiceMock(LocalTaskActionClientFactory.class); final GlobalTaskLockbox lockbox = new GlobalTaskLockbox(taskStorage, new TestIndexerMetadataStorageCoordinator()); final ServiceEmitter emitter = new NoopServiceEmitter(); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java index bc409064e97c..d64b40039347 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java @@ -27,7 +27,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; -import org.apache.curator.framework.CuratorFramework; import org.apache.druid.common.guava.DSuppliers; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.HttpInputSource; @@ -88,8 +87,6 @@ import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordinator.stats.CoordinatorRunStats; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; import org.apache.druid.timeline.DataSegment; import org.easymock.EasyMock; import org.joda.time.Interval; @@ -810,8 +807,6 @@ private HttpRemoteTaskRunner createHttpRemoteTaskRunner() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), serviceEmitter ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestRemoteTaskRunnerConfig.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestRemoteTaskRunnerConfig.java deleted file mode 100644 index a634294637c7..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestRemoteTaskRunnerConfig.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.joda.time.Period; - -/** - */ -public class TestRemoteTaskRunnerConfig extends RemoteTaskRunnerConfig -{ - private final Period timeout; - - public TestRemoteTaskRunnerConfig(Period timeout) - { - this.timeout = timeout; - } - - @Override - public Period getTaskAssignmentTimeout() - { - return timeout; - } - - @Override - public Period getTaskCleanupTimeout() - { - return timeout; - } - - @Override - public int getMaxZnodeBytes() - { - // make sure this is large enough, otherwise RemoteTaskRunnerTest might fail unexpectedly - return 10 * 1024; - } - - @Override - public Period getTaskShutdownLinkTimeout() - { - return timeout; - } - - @Override - public String getMinWorkerVersion() - { - return ""; - } - - @Override - public int getMaxRetriesBeforeBlacklist() - { - return 1; - } - - @Override - public Period getWorkerBlackListBackoffTime() - { - return timeout; - } - - @Override - public Period getWorkerBlackListCleanupPeriod() - { - return timeout; - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/ZkWorkerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/ZkWorkerTest.java deleted file mode 100644 index fac929912088..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/ZkWorkerTest.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.curator.framework.recipes.cache.ChildData; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.task.NoopTask; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.worker.TaskAnnouncement; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.zookeeper.data.Stat; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.function.Function; - -public class ZkWorkerTest -{ - Function extract; - - @Before - public void setup() - { - ObjectMapper mapper = new DefaultObjectMapper(); - extract = ZkWorker.createTaskIdExtractor(mapper); - } - - ChildData prepare(String input) - { - String replaced = StringUtils.format(StringUtils.replaceChar(input, '\'', "\""), TaskAnnouncement.TASK_ID_KEY); - byte[] data = StringUtils.toUtf8(replaced); - return new ChildData("/a/b/c", new Stat(), data); - } - - @Test - public void testShallowObjectWithIdFirst() - { - ChildData input = prepare("{'%s': 'abcd', 'status': 'RUNNING'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testShallowObjectWithIdMiddle() - { - ChildData input = prepare("{'before': 'something', '%s': 'abcd', 'status': 'RUNNING'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testShallowObjectWithIdLast() - { - ChildData input = prepare("{'before': 'something', 'status': 'RUNNING', '%s': 'abcd'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testShallowObjectWithNoId() - { - ChildData input = prepare("{'before': 'something', 'status': 'RUNNING'}"); - String actual = extract.apply(input); - Assert.assertNull(actual); - } - - @Test - public void testDeepObjectWithIdFirst() - { - ChildData input = prepare("{'%s': 'abcd', 'subobject': { 'subkey': 'subvalue' }, 'subarray': [{'key': 'val'}, 2, 3], 'status': 'RUNNING'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testDeepObjectWithIdLast() - { - ChildData input = prepare("{'subobject': { 'subkey': 'subvalue' }, 'subarray': [{'key': 'val'}, 2, 3], 'status': 'RUNNING', '%s': 'abcd'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testDeepObjectWithIdInNestedOnly() - { - ChildData input = prepare("{'subobject': { '%s': 'defg' }, 'subarray': [{'key': 'val'}, 2, 3], 'status': 'RUNNING'}"); - String actual = extract.apply(input); - Assert.assertNull(actual); - } - - @Test - public void testDeepObjectWithIdInNestedAndOuter() - { - ChildData input = prepare("{'subobject': { '%s': 'defg' }, 'subarray': [{'key': 'val'}, 2, 3], 'status': 'RUNNING', '%1$s': 'abcd'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testIdWithWrongTypeReturnsNull() - { - ChildData input = prepare("{'%s': {'nested': 'obj'}'"); - String actual = extract.apply(input); - Assert.assertNull(actual); - } - - @Test - public void testCanReadIdFromAJacksonSerializedTaskAnnouncement() throws JsonProcessingException - { - Task task0 = NoopTask.create(); - TaskAnnouncement taskAnnouncement = TaskAnnouncement.create( - task0, - TaskStatus.running(task0.getId()), - TaskLocation.unknown() - ); - - ObjectMapper objectMapper = new ObjectMapper(); - - byte[] serialized = objectMapper.writeValueAsBytes(taskAnnouncement); - - ChildData zkNode = new ChildData("/a/b/c", new Stat(), serialized); - - String actualExtractedTaskId = extract.apply(zkNode); - Assert.assertEquals(task0.getId(), actualExtractedTaskId); - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/PendingTaskBasedProvisioningStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/PendingTaskBasedProvisioningStrategyTest.java index eaba6f9e6f9a..69867b1ba38b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/PendingTaskBasedProvisioningStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/PendingTaskBasedProvisioningStrategyTest.java @@ -29,17 +29,15 @@ import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.RemoteTaskRunner; import org.apache.druid.indexing.overlord.RemoteTaskRunnerWorkItem; -import org.apache.druid.indexing.overlord.ZkWorker; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner; import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; import org.apache.druid.indexing.overlord.setup.FillCapacityWorkerSelectStrategy; import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; import org.apache.druid.indexing.worker.TaskAnnouncement; import org.apache.druid.indexing.worker.Worker; import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.emitter.EmittingLogger; @@ -240,7 +238,7 @@ public void testSuccessfulInitialMinWorkersProvision() EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5); EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(new ArrayList()); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // No pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( new ArrayList<>() @@ -248,7 +246,7 @@ public void testSuccessfulInitialMinWorkersProvision() EasyMock.expect(runner.getWorkers()).andReturn( Collections.emptyList() ); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("aNode")) ).times(3); @@ -291,7 +289,7 @@ public ScheduledExecutorService get() EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5); EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(new ArrayList()); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // No pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( new ArrayList<>() @@ -299,7 +297,7 @@ public ScheduledExecutorService get() EasyMock.expect(runner.getWorkers()).andReturn( Collections.emptyList() ); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("aNode")) ).times(3); @@ -343,7 +341,7 @@ public ScheduledExecutorService get() EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5); EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(new ArrayList()); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // No pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( new ArrayList<>() @@ -351,7 +349,7 @@ public ScheduledExecutorService get() EasyMock.expect(runner.getWorkers()).andReturn( Collections.emptyList() ); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.replay(runner, autoScaler); Provisioner provisioner = strategy.makeProvisioner(runner); boolean provisionedSomething = provisioner.doProvision(); @@ -366,7 +364,7 @@ public void testSuccessfulMinWorkersProvision() EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5); EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(new ArrayList()); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // No pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( new ArrayList<>() @@ -374,10 +372,10 @@ public void testSuccessfulMinWorkersProvision() // 1 node already running, only provision 2 more. EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("aNode")) ).times(2); @@ -400,7 +398,7 @@ public void testSuccessfulMinWorkersProvisionWithOldVersionNodeRunning() EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5); EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(new ArrayList()); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // No pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( new ArrayList<>() @@ -408,11 +406,11 @@ public void testSuccessfulMinWorkersProvisionWithOldVersionNodeRunning() // 1 node already running, only provision 2 more. EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask, "http", "h1", "n1", INVALID_VERSION).toImmutable() // Invalid version node + workerWithTask(testTask), + workerWithTask(testTask, "http", "h1", "n1", INVALID_VERSION) // Invalid version node ) ); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("aNode")) ).times(2); @@ -438,7 +436,7 @@ public void testProvisioning() EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("fake")) ); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( Collections.singletonList( NoopTask.create() @@ -446,11 +444,11 @@ public void testProvisioning() ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask, "http", "h1", "n1", INVALID_VERSION).toImmutable() // Invalid version node + workerWithTask(testTask), + workerWithTask(testTask, "http", "h1", "n1", INVALID_VERSION) // Invalid version node ) ).times(2); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()).times(1); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()).times(1); EasyMock.replay(runner); EasyMock.replay(autoScaler); @@ -509,7 +507,7 @@ public ScheduledExecutorService get() EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("fake")) ).times(2); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // two pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( ImmutableList.of( @@ -520,11 +518,11 @@ public ScheduledExecutorService get() // Capacity for current worker is 1 EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask, "http", "h1", "n1", INVALID_VERSION).toImmutable() // Invalid version node + workerWithTask(testTask), + workerWithTask(testTask, "http", "h1", "n1", INVALID_VERSION) // Invalid version node ) ).times(2); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()).times(1); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()).times(1); EasyMock.replay(runner); EasyMock.replay(autoScaler); @@ -584,7 +582,7 @@ public ScheduledExecutorService get() EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("fake")) ).times(1); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // two pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( ImmutableList.of( @@ -597,7 +595,7 @@ public ScheduledExecutorService get() Collections.emptyList() ).times(2); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()).times(1); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()).times(1); EasyMock.replay(runner); EasyMock.replay(autoScaler); @@ -645,7 +643,7 @@ public void testProvisionAlert() throws Exception new AutoScalingData(Collections.singletonList("fake")) ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( Collections.singletonList( NoopTask.create() @@ -653,12 +651,12 @@ public void testProvisionAlert() throws Exception ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(testTask, "http", "hi", "lo", MIN_VERSION, 1).toImmutable(), - new TestZkWorker(testTask, "http", "h1", "n1", INVALID_VERSION).toImmutable(), // Invalid version node - new TestZkWorker(testTask, "http", "h2", "n1", INVALID_VERSION).toImmutable() // Invalid version node + workerWithTask(testTask, "http", "hi", "lo", MIN_VERSION, 1), + workerWithTask(testTask, "http", "h1", "n1", INVALID_VERSION), // Invalid version node + workerWithTask(testTask, "http", "h2", "n1", INVALID_VERSION) // Invalid version node ) ).times(2); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.replay(runner); Provisioner provisioner = strategy.makeProvisioner(runner); @@ -699,9 +697,9 @@ public void testDoSuccessfulTerminate() new AutoScalingData(new ArrayList<>()) ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem( testTask.getId(), testTask.getType(), @@ -713,12 +711,12 @@ public void testDoSuccessfulTerminate() ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( ImmutableList.of( - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask), + workerWithTask(testTask) ) ).times(2); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) - .andReturn(Collections.singletonList(new TestZkWorker(testTask).getWorker())); + .andReturn(Collections.singletonList(workerWithTask(testTask).getWorker())); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); EasyMock.replay(runner); @@ -745,17 +743,17 @@ public void testSomethingTerminating() ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getWorkers()).andReturn( ImmutableList.of( - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask), + workerWithTask(testTask), + workerWithTask(testTask) ) ).times(2); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()).times(2); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) - .andReturn(Collections.singletonList(new TestZkWorker(testTask).toImmutable().getWorker())); + .andReturn(Collections.singletonList(workerWithTask(testTask).getWorker())); EasyMock.replay(runner); Provisioner provisioner = strategy.makeProvisioner(runner); @@ -788,7 +786,7 @@ public void testNoActionNeeded() .andReturn(Collections.singletonList("ip")); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( Collections.singletonList( (Task) NoopTask.create() @@ -796,11 +794,11 @@ public void testNoActionNeeded() ).times(1); EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(NoopTask.create()).toImmutable(), - new TestZkWorker(NoopTask.create()).toImmutable() + workerWithTask(NoopTask.create()), + workerWithTask(NoopTask.create()) ) ).times(2); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) @@ -836,16 +834,16 @@ public void testMinCountIncrease() EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(Collections.singletonList("ip")); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( Collections.emptyList() ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(NoopTask.create(), "http", "h1", "i1", MIN_VERSION).toImmutable() + workerWithTask(NoopTask.create(), "http", "h1", "i1", MIN_VERSION) ) ).times(3); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()).times(2); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()).times(2); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) @@ -894,7 +892,7 @@ public void testNullWorkerConfig() workerConfig.set(null); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( Collections.singletonList( NoopTask.create() @@ -902,7 +900,7 @@ public void testNullWorkerConfig() ).times(1); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(null).toImmutable() + workerWithTask(null) ) ).times(2); EasyMock.replay(runner); @@ -919,56 +917,38 @@ public void testNullWorkerConfig() EasyMock.verify(runner); } - private static class TestZkWorker extends ZkWorker + private static ImmutableWorkerInfo workerWithTask(Task task) { - private final Task testTask; - - public TestZkWorker( - Task testTask - ) - { - this(testTask, "http", "host", "ip", MIN_VERSION); - } - - public TestZkWorker( - Task testTask, - String scheme, - String host, - String ip, - String version - ) - { - this(testTask, scheme, host, ip, version, 1); - } + return workerWithTask(task, "http", "host", "ip", MIN_VERSION, 1); + } - public TestZkWorker( - Task testTask, - String scheme, - String host, - String ip, - String version, - int capacity - ) - { - super(new Worker(scheme, host, ip, capacity, version, WorkerConfig.DEFAULT_CATEGORY), null, new DefaultObjectMapper()); - - this.testTask = testTask; - } + private static ImmutableWorkerInfo workerWithTask( + Task task, + String scheme, + String host, + String ip, + String version + ) + { + return workerWithTask(task, scheme, host, ip, version, 1); + } - @Override - public Map getRunningTasks() - { - if (testTask == null) { - return new HashMap<>(); - } - return ImmutableMap.of( - testTask.getId(), - TaskAnnouncement.create( - testTask, - TaskStatus.running(testTask.getId()), - TaskLocation.unknown() - ) - ); - } + private static ImmutableWorkerInfo workerWithTask( + Task task, + String scheme, + String host, + String ip, + String version, + int capacity + ) + { + Worker worker = new Worker(scheme, host, ip, capacity, version, WorkerConfig.DEFAULT_CATEGORY); + Map running = task == null + ? new HashMap<>() + : ImmutableMap.of( + task.getId(), + TaskAnnouncement.create(task, TaskStatus.running(task.getId()), TaskLocation.unknown()) + ); + return ImmutableWorkerInfo.fromWorkerAnnouncements(worker, running, DateTimes.EPOCH, null); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/SimpleProvisioningStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/SimpleProvisioningStrategyTest.java index 0799b3ba6e43..05b5f7ff1657 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/SimpleProvisioningStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/SimpleProvisioningStrategyTest.java @@ -27,15 +27,14 @@ import org.apache.druid.indexing.common.TestTasks; import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.overlord.RemoteTaskRunner; +import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; import org.apache.druid.indexing.overlord.RemoteTaskRunnerWorkItem; -import org.apache.druid.indexing.overlord.ZkWorker; +import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner; import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; import org.apache.druid.indexing.worker.TaskAnnouncement; import org.apache.druid.indexing.worker.Worker; import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.emitter.EmittingLogger; @@ -51,6 +50,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -120,16 +120,16 @@ public void testSuccessfulProvision() EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("aNode")) ); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ); EasyMock.replay(runner); @@ -158,16 +158,16 @@ public void testSomethingProvisioning() EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("fake")) ); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ).times(2); EasyMock.replay(runner); @@ -217,16 +217,16 @@ public void testProvisionAlert() throws Exception new AutoScalingData(Collections.singletonList("fake")) ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ).times(2); EasyMock.replay(runner); @@ -270,20 +270,20 @@ public void testDoSuccessfulTerminate() new AutoScalingData(new ArrayList<>()) ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ).times(2); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) - .andReturn(Collections.singletonList(new TestZkWorker(testTask).getWorker())); + .andReturn(Collections.singletonList(workerWithTask(testTask).getWorker())); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); EasyMock.replay(runner); @@ -311,21 +311,21 @@ public void testSomethingTerminating() ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ).times(2); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()).times(2); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) - .andReturn(Collections.singletonList(new TestZkWorker(testTask).getWorker())); + .andReturn(Collections.singletonList(workerWithTask(testTask).getWorker())); EasyMock.replay(runner); Provisioner provisioner = strategy.makeProvisioner(runner); @@ -359,17 +359,17 @@ public void testNoActionNeeded() .andReturn(Collections.singletonList("ip")); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(NoopTask.create()).toImmutable(), - new TestZkWorker(NoopTask.create()).toImmutable() + workerWithTask(NoopTask.create()), + workerWithTask(NoopTask.create()) ) ).times(2); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); @@ -407,13 +407,13 @@ public void testMinCountIncrease() EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(Collections.singletonList("ip")); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( Collections.emptyList() ).times(3); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(NoopTask.create(), "http", "h1", "i1", "0").toImmutable() + workerWithTask(NoopTask.create(), "http", "h1", "i1", "0") ) ).times(3); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); @@ -463,16 +463,16 @@ public void testNullWorkerConfig() workerConfig.set(null); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(null).toImmutable() + workerWithTask(null) ) ).times(1); EasyMock.replay(runner); @@ -489,44 +489,26 @@ public void testNullWorkerConfig() EasyMock.verify(runner); } - private static class TestZkWorker extends ZkWorker + private static ImmutableWorkerInfo workerWithTask(Task task) + { + return workerWithTask(task, "http", "host", "ip", "0"); + } + + private static ImmutableWorkerInfo workerWithTask( + Task task, + String scheme, + String host, + String ip, + String version + ) { - private final Task testTask; - - public TestZkWorker( - Task testTask - ) - { - this(testTask, "http", "host", "ip", "0"); - } - - public TestZkWorker( - Task testTask, - String scheme, - String host, - String ip, - String version - ) - { - super(new Worker(scheme, host, ip, 3, version, WorkerConfig.DEFAULT_CATEGORY), null, new DefaultObjectMapper()); - - this.testTask = testTask; - } - - @Override - public Map getRunningTasks() - { - if (testTask == null) { - return new HashMap<>(); - } - return ImmutableMap.of( - testTask.getId(), - TaskAnnouncement.create( - testTask, - TaskStatus.running(testTask.getId()), - TaskLocation.unknown() - ) - ); - } + Worker worker = new Worker(scheme, host, ip, 3, version, WorkerConfig.DEFAULT_CATEGORY); + Map running = task == null + ? new HashMap<>() + : ImmutableMap.of( + task.getId(), + TaskAnnouncement.create(task, TaskStatus.running(task.getId()), TaskLocation.unknown()) + ); + return ImmutableWorkerInfo.fromWorkerAnnouncements(worker, running, DateTimes.EPOCH, null); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfigTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfigTest.java deleted file mode 100644 index 1cc72346a0a2..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfigTest.java +++ /dev/null @@ -1,880 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord.config; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableList; -import com.google.inject.Injector; -import com.google.inject.ProvisionException; -import org.apache.druid.guice.GuiceInjectors; -import org.apache.druid.guice.IndexingServiceModuleHelper; -import org.apache.druid.guice.JsonConfigProvider; -import org.apache.druid.guice.JsonConfigurator; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.joda.time.Period; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; - -public class RemoteTaskRunnerConfigTest -{ - @Rule - public ExpectedException expectedException = ExpectedException.none(); - - private static final ObjectMapper MAPPER = new DefaultObjectMapper(); - private static final Period DEFAULT_TIMEOUT = Period.ZERO; - private static final String DEFAULT_VERSION = ""; - private static final long DEFAULT_MAX_ZNODE = 10 * 1024; - private static final int DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS = 5; - private static final int DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST = 5; - private static final Period DEFAULT_TASK_BACKOFF = new Period("PT10M"); - private static final Period DEFAULT_BLACKLIST_CLEANUP_PERIOD = new Period("PT5M"); - - @Test - public void testIsJsonConfiguratable() - { - JsonConfigurator.verifyClazzIsConfigurable(MAPPER, RemoteTaskRunnerConfig.class, null); - } - - @Test - public void testGetTaskAssignmentTimeout() throws Exception - { - final Period timeout = Period.hours(1); - Assert.assertEquals( - timeout, - reflect(generateRemoteTaskRunnerConfig( - timeout, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getTaskAssignmentTimeout() - ); - } - - @Test - public void testGetPendingTasksRunnerNumThreads() throws Exception - { - final int pendingTasksRunnerNumThreads = 20; - Assert.assertEquals( - pendingTasksRunnerNumThreads, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - pendingTasksRunnerNumThreads, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getPendingTasksRunnerNumThreads() - ); - } - - @Test - public void testGetMinWorkerVersion() throws Exception - { - final String version = "some version"; - Assert.assertEquals( - version, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - version, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getMinWorkerVersion() - ); - } - - @Test - public void testGetMaxZnodeBytes() throws Exception - { - final long max = 20 * 1024; - Assert.assertEquals( - max, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - max, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getMaxZnodeBytes() - ); - } - - @Test - public void testGetTaskShutdownLinkTimeout() throws Exception - { - final Period timeout = Period.hours(1); - Assert.assertEquals( - timeout, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - timeout, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getTaskShutdownLinkTimeout() - ); - } - - @Test - public void testGetTaskCleanupTimeout() throws Exception - { - final Period timeout = Period.hours(1); - Assert.assertEquals( - timeout, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - timeout, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getTaskCleanupTimeout() - ); - } - - @Test - public void testGetMaxRetriesBeforeBlacklist() throws Exception - { - final int maxRetriesBeforeBlacklist = 2; - Assert.assertEquals( - maxRetriesBeforeBlacklist, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - maxRetriesBeforeBlacklist, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getMaxRetriesBeforeBlacklist() - ); - } - - @Test - public void testGetWorkerBlackListBackoffTime() throws Exception - { - final Period taskBlackListBackoffTime = new Period("PT1M"); - Assert.assertEquals( - taskBlackListBackoffTime, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - taskBlackListBackoffTime, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getWorkerBlackListBackoffTime() - ); - } - - @Test - public void testGetTaskBlackListCleanupPeriod() throws Exception - { - final Period taskBlackListCleanupPeriod = Period.years(100); - Assert.assertEquals( - taskBlackListCleanupPeriod, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - taskBlackListCleanupPeriod - )).getWorkerBlackListCleanupPeriod() - ); - } - - @Test - public void testEquals() throws Exception - { - Assert.assertEquals( - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )), - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )) - ); - final Period timeout = Period.years(999); - final String version = "someVersion"; - final long max = 20 * 1024; - final int pendingTasksRunnerNumThreads = 20; - final int maxRetriesBeforeBlacklist = 1; - final Period taskBlackListBackoffTime = new Period("PT1M"); - final Period taskBlackListCleanupPeriod = Period.years(10); - Assert.assertEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - DEFAULT_TIMEOUT, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - DEFAULT_VERSION, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - DEFAULT_MAX_ZNODE, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - DEFAULT_TIMEOUT, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - DEFAULT_TASK_BACKOFF, - taskBlackListCleanupPeriod - )) - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )) - ); - } - - @Test - public void testHashCode() throws Exception - { - Assert.assertEquals( - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).hashCode() - ); - final Period timeout = Period.years(999); - final String version = "someVersion"; - final long max = 20 * 1024; - final int pendingTasksRunnerNumThreads = 20; - final int maxRetriesBeforeBlacklist = 80; - final Period taskBlackListBackoffTime = new Period("PT1M"); - final Period taskBlackListCleanupPeriod = Period.years(10); - Assert.assertEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - DEFAULT_TIMEOUT, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - DEFAULT_VERSION, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - DEFAULT_MAX_ZNODE, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - DEFAULT_TIMEOUT, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - DEFAULT_TASK_BACKOFF, - taskBlackListCleanupPeriod - )).hashCode() - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).hashCode() - ); - } - - @Test - public void testMaxZnodeBytesLowerThanExpected() - { - final Injector injector = GuiceInjectors.makeStartupInjectorWithModules(ImmutableList.of( - binder -> IndexingServiceModuleHelper.configureTaskRunnerConfigs(binder)) - ); - - this.expectedException.expect(ProvisionException.class); - this.expectedException.expectMessage("maxZnodeBytes must be in the range of [10KiB, 2GiB)"); - - Properties props = new Properties(); - props.put(IndexingServiceModuleHelper.INDEXER_RUNNER_PROPERTY_PREFIX + ".maxZnodeBytes", "9KiB"); - - JsonConfigProvider configProvider = JsonConfigProvider.of( - IndexingServiceModuleHelper.INDEXER_RUNNER_PROPERTY_PREFIX, - RemoteTaskRunnerConfig.class - ); - configProvider.inject(props, injector.getBinding(JsonConfigurator.class).getProvider().get()); - configProvider.get(); - } - - @Test - public void testMaxZnodeBytesGreaterThanExpected() - { - final Injector injector = GuiceInjectors.makeStartupInjectorWithModules(ImmutableList.of( - binder -> IndexingServiceModuleHelper.configureTaskRunnerConfigs(binder)) - ); - - this.expectedException.expect(ProvisionException.class); - this.expectedException.expectMessage("maxZnodeBytes must be in the range of [10KiB, 2GiB)"); - - Properties props = new Properties(); - props.put(IndexingServiceModuleHelper.INDEXER_RUNNER_PROPERTY_PREFIX + ".maxZnodeBytes", "2GiB"); - - JsonConfigProvider configProvider = JsonConfigProvider.of( - IndexingServiceModuleHelper.INDEXER_RUNNER_PROPERTY_PREFIX, - RemoteTaskRunnerConfig.class - ); - configProvider.inject(props, injector.getBinding(JsonConfigurator.class).getProvider().get()); - configProvider.get(); - } - - - private RemoteTaskRunnerConfig reflect(RemoteTaskRunnerConfig config) throws IOException - { - return MAPPER.readValue(MAPPER.writeValueAsString(config), RemoteTaskRunnerConfig.class); - } - - private RemoteTaskRunnerConfig generateRemoteTaskRunnerConfig( - Period taskAssignmentTimeout, - Period taskCleanupTimeout, - String minWorkerVersion, - long maxZnodeBytes, - Period taskShutdownLinkTimeout, - int pendingTasksRunnerNumThreads, - int maxRetriesBeforeBlacklist, - Period taskBlackListBackoffTime, - Period taskBlackListCleanupPeriod - ) - { - final Map objectMap = new HashMap<>(); - objectMap.put("taskAssignmentTimeout", taskAssignmentTimeout); - objectMap.put("taskCleanupTimeout", taskCleanupTimeout); - objectMap.put("minWorkerVersion", minWorkerVersion); - objectMap.put("maxZnodeBytes", maxZnodeBytes); - objectMap.put("taskShutdownLinkTimeout", taskShutdownLinkTimeout); - objectMap.put("pendingTasksRunnerNumThreads", pendingTasksRunnerNumThreads); - objectMap.put("maxRetriesBeforeBlacklist", maxRetriesBeforeBlacklist); - objectMap.put("workerBlackListBackoffTime", taskBlackListBackoffTime); - objectMap.put("workerBlackListCleanupPeriod", taskBlackListCleanupPeriod); - return MAPPER.convertValue(objectMap, RemoteTaskRunnerConfig.class); - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerTest.java index 8e4d1d3aacd7..578bb7d9491d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerTest.java @@ -28,7 +28,6 @@ import com.google.common.collect.Iterables; import com.google.common.io.ByteStreams; import com.google.common.util.concurrent.Futures; -import org.apache.curator.framework.CuratorFramework; import org.apache.druid.common.guava.DSuppliers; import org.apache.druid.concurrent.LifecycleLock; import org.apache.druid.discovery.DiscoveryDruidNode; @@ -67,8 +66,6 @@ import org.apache.druid.segment.TestHelper; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.ChangeRequestHttpSyncer; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; import org.apache.druid.server.metrics.NoopServiceEmitter; import org.easymock.Capture; import org.easymock.EasyMock; @@ -296,8 +293,6 @@ public int getPendingTasksRunnerNumThreads() provisioningStrategy, druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -365,8 +360,6 @@ public int getPendingTasksRunnerNumThreads() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -470,8 +463,6 @@ public int getPendingTasksRunnerNumThreads() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, taskStorageMock, - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -613,8 +604,6 @@ public int getPendingTasksRunnerNumThreads() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -789,8 +778,6 @@ public Period getTaskCleanupTimeout() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -987,8 +974,6 @@ public int getPendingTasksRunnerNumThreads() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -1500,8 +1485,6 @@ public Period getTaskAssignmentTimeout() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -1613,8 +1596,6 @@ public Period getTaskAssignmentTimeout() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -1774,8 +1755,6 @@ public void testSyncMonitoring_finiteIteration() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -1820,8 +1799,6 @@ public void testGetMaximumCapacity_noWorkerConfig() new TestProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ); Assert.assertEquals(-1, taskRunner.getMaximumCapacityWithAutoscale()); @@ -1844,8 +1821,6 @@ public void testGetMaximumCapacity_noAutoScaler() new TestProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ); Assert.assertEquals(-1, taskRunner.getMaximumCapacityWithAutoscale()); @@ -1868,8 +1843,6 @@ public void testGetMaximumCapacity_withAutoScaler() new TestProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ); // Default autoscaler has max workers of 0 @@ -1902,8 +1875,6 @@ public int getPendingTasksRunnerNumThreads() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, taskStorage, - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ); @@ -2325,8 +2296,6 @@ public int getPendingTasksRunnerNumThreads() provisioningStrategy, druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java index 9852f897056d..1c3893517093 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java @@ -33,7 +33,6 @@ import org.apache.curator.test.Timing; import org.apache.druid.audit.AuditManager; import org.apache.druid.curator.PotentiallyGzippedCompressionProvider; -import org.apache.druid.curator.discovery.LatchableServiceAnnouncer; import org.apache.druid.discovery.DruidLeaderSelector; import org.apache.druid.indexer.TaskLocation; import org.apache.druid.indexer.TaskState; @@ -115,7 +114,6 @@ public class OverlordTest private GlobalTaskLockbox taskLockbox; private TaskStorage taskStorage; private TaskActionClientFactory taskActionClientFactory; - private CountDownLatch announcementLatch; private DruidNode druidNode; private OverlordResource overlordResource; private Map taskCompletionCountDownLatches; @@ -195,7 +193,6 @@ public void setUp() throws Exception taskCompletionCountDownLatches = new HashMap<>(); taskCompletionCountDownLatches.put(taskId0, new CountDownLatch(1)); taskCompletionCountDownLatches.put(taskId1, new CountDownLatch(1)); - announcementLatch = new CountDownLatch(1); setupServerAndCurator(); curator.start(); curator.blockUntilConnected(); @@ -251,9 +248,7 @@ public MockTaskRunner get() taskLockbox, taskStorage, taskActionClientFactory, - druidNode, taskRunnerFactory, - new LatchableServiceAnnouncer(announcementLatch, null), new CoordinatorOverlordServiceConfig(null, null), serviceEmitter, supervisorManager, @@ -274,9 +269,7 @@ public void testOverlordRun() throws Exception { // basic task master lifecycle test overlord.start(); - announcementLatch.await(); while (!overlord.isLeader()) { - // I believe the control will never reach here and thread will never sleep but just to be on safe side Thread.sleep(10); } Assert.assertEquals(overlord.getCurrentLeader(), druidNode.getHostAndPort()); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithAffinityWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithAffinityWorkerSelectStrategyTest.java index 1e81be9cc978..3450aa8d4807 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithAffinityWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithAffinityWorkerSelectStrategyTest.java @@ -24,7 +24,7 @@ import com.google.common.collect.ImmutableSet; import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.indexing.worker.Worker; import org.apache.druid.indexing.worker.config.WorkerConfig; import org.apache.druid.java.util.common.DateTimes; @@ -49,7 +49,7 @@ public void testFindWorkerForTask() NoopTask noopTask = NoopTask.forDatasource("foo"); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -94,7 +94,7 @@ public void testFindWorkerForTaskWithNulls() ); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -125,7 +125,7 @@ public void testIsolation() ); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost", new ImmutableWorkerInfo( @@ -170,7 +170,7 @@ public void testFindWorkerForTaskWithGlobalLimits() NoopTask noopTask = NoopTask.forDatasource("foo"); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -194,7 +194,7 @@ public void testFindWorkerForTaskWithGlobalLimits() Assert.assertNotNull(worker); ImmutableWorkerInfo worker1 = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -237,7 +237,7 @@ public void testFindWorkerForTaskWithGlobalRatios() NoopTask noopTask = NoopTask.forDatasource("foo"); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -261,7 +261,7 @@ public void testFindWorkerForTaskWithGlobalRatios() Assert.assertNotNull(worker); ImmutableWorkerInfo worker1 = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithCategorySpecWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithCategorySpecWorkerSelectStrategyTest.java index 3c22a6c4c6d1..442a64ae068d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithCategorySpecWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithCategorySpecWorkerSelectStrategyTest.java @@ -26,7 +26,7 @@ import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskIOConfig; import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTuningConfig; import org.apache.druid.indexing.seekablestream.TestSeekableStreamIndexTask; @@ -212,7 +212,7 @@ public void testSupervisorIdCategoryAffinity() new EqualDistributionWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -241,7 +241,7 @@ public void testSupervisorIdCategoryAffinityFallbackToDatasource() new EqualDistributionWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -271,7 +271,7 @@ public void testSupervisorIdCategoryAffinityFallbackToDefault() new EqualDistributionWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -286,7 +286,7 @@ private ImmutableWorkerInfo selectWorker(WorkerCategorySpec workerCategorySpec) new EqualDistributionWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, NoopTask.forDatasource("ds1") ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWorkerSelectStrategyTest.java index 05b1ab36f497..2568217042b2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWorkerSelectStrategyTest.java @@ -23,7 +23,7 @@ import com.google.common.collect.ImmutableSet; import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.indexing.worker.Worker; import org.apache.druid.indexing.worker.config.WorkerConfig; import org.apache.druid.java.util.common.DateTimes; @@ -72,7 +72,7 @@ public void testFindWorkerForTask() final EqualDistributionWorkerSelectStrategy strategy = new EqualDistributionWorkerSelectStrategy(null, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -100,7 +100,7 @@ public void testFindWorkerForTaskWhenSameCurrCapacityUsed() final EqualDistributionWorkerSelectStrategy strategy = new EqualDistributionWorkerSelectStrategy(null, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -128,7 +128,7 @@ public void testOneDisableWorkerDifferentUsedCapacity() final EqualDistributionWorkerSelectStrategy strategy = new EqualDistributionWorkerSelectStrategy(null, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -156,7 +156,7 @@ public void testOneDisableWorkerSameUsedCapacity() final EqualDistributionWorkerSelectStrategy strategy = new EqualDistributionWorkerSelectStrategy(null, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -193,7 +193,7 @@ public void testWeakAffinity() ); ImmutableWorkerInfo workerFoo = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("foo") ); @@ -201,14 +201,14 @@ public void testWeakAffinity() // With weak affinity, bar (which has no affinity workers available) can use a non-affinity worker. ImmutableWorkerInfo workerBar = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("bar") ); Assert.assertEquals("localhost0", workerBar.getWorker().getHost()); ImmutableWorkerInfo workerBaz = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("baz") ); @@ -230,7 +230,7 @@ public void testStrongAffinity() ); ImmutableWorkerInfo workerFoo = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("foo") ); @@ -238,14 +238,14 @@ public void testStrongAffinity() // With strong affinity, no workers can be found for bar. ImmutableWorkerInfo workerBar = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("bar") ); Assert.assertNull(workerBar); ImmutableWorkerInfo workerBaz = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("baz") ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithAffinityWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithAffinityWorkerSelectStrategyTest.java index 207c6e43dda3..0455e394937b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithAffinityWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithAffinityWorkerSelectStrategyTest.java @@ -23,7 +23,7 @@ import com.google.common.collect.ImmutableSet; import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.indexing.worker.Worker; import org.apache.druid.indexing.worker.config.WorkerConfig; import org.apache.druid.java.util.common.DateTimes; @@ -45,7 +45,7 @@ public void testFindWorkerForTask() ); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -76,7 +76,7 @@ public void testFindWorkerForTaskWithNulls() ); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -107,7 +107,7 @@ public void testIsolation() ); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost", new ImmutableWorkerInfo( @@ -137,7 +137,7 @@ public void testFindWorkerForTaskWithGlobalLimits() NoopTask noopTask = NoopTask.forDatasource("foo"); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -161,7 +161,7 @@ public void testFindWorkerForTaskWithGlobalLimits() Assert.assertNotNull(worker); ImmutableWorkerInfo worker1 = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -204,7 +204,7 @@ public void testFindWorkerForTaskWithGlobalRatios() NoopTask noopTask = NoopTask.forDatasource("foo"); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -228,7 +228,7 @@ public void testFindWorkerForTaskWithGlobalRatios() Assert.assertNotNull(worker); ImmutableWorkerInfo worker1 = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithCategorySpecWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithCategorySpecWorkerSelectStrategyTest.java index 27fde352caba..a63dde25b25c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithCategorySpecWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithCategorySpecWorkerSelectStrategyTest.java @@ -26,7 +26,7 @@ import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskIOConfig; import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTuningConfig; import org.apache.druid.indexing.seekablestream.TestSeekableStreamIndexTask; @@ -214,7 +214,7 @@ public void testSupervisorIdCategoryAffinity() new FillCapacityWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -245,7 +245,7 @@ public void testSupervisorIdCategoryAffinityFallbackToDatasource() new FillCapacityWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -276,7 +276,7 @@ public void testSupervisorIdCategoryAffinityFallbackToDefault() new FillCapacityWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -291,7 +291,7 @@ private ImmutableWorkerInfo selectWorker(WorkerCategorySpec workerCategorySpec) new FillCapacityWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, NoopTask.forDatasource("ds1") ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java index 222c26ae5bd3..fb01549951a5 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java @@ -25,12 +25,11 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.TestRemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.js.JavaScriptConfig; import org.easymock.EasyMock; import org.hamcrest.CoreMatchers; -import org.joda.time.Period; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -123,7 +122,7 @@ public void testFindWorkerForTask() ); ImmutableWorkerInfo workerForBatchTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("index_parallel") ); @@ -131,7 +130,7 @@ public void testFindWorkerForTask() Assert.assertEquals(worker1, workerForBatchTask); ImmutableWorkerInfo workerForOtherTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("other_type") ); @@ -147,7 +146,7 @@ public void testIsolationOfBatchWorker() "10.0.0.2", createMockWorker(1, true, true) ); ImmutableWorkerInfo workerForOtherTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("other_type") ); @@ -162,14 +161,14 @@ public void testNoValidWorker() "10.0.0.4", createMockWorker(1, true, false) ); ImmutableWorkerInfo workerForBatchTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("index_parallel") ); Assert.assertNull(workerForBatchTask); ImmutableWorkerInfo workerForOtherTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("otherTask") ); @@ -185,14 +184,14 @@ public void testNoWorkerCanRunTask() "10.0.0.4", createMockWorker(1, false, true) ); ImmutableWorkerInfo workerForBatchTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("index_parallel") ); Assert.assertNull(workerForBatchTask); ImmutableWorkerInfo workerForOtherTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("otherTask") ); @@ -209,7 +208,7 @@ public void testFillWorkerCapacity() "10.0.0.2", createMockWorker(5, true, true) ); ImmutableWorkerInfo workerForBatchTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("index_parallel") ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java index 525444e23dea..199e004b4243 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java @@ -19,6 +19,9 @@ package org.apache.druid.indexing.overlord.supervisor; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.common.collect.ImmutableList; @@ -26,6 +29,8 @@ import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.SettableFuture; import org.apache.druid.data.input.impl.ByteEntity; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.error.DruidException; import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.error.InvalidInput; @@ -35,7 +40,11 @@ import org.apache.druid.indexing.overlord.ObjectMetadata; import org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers; import org.apache.druid.indexing.seekablestream.TestSeekableStreamDataSourceMetadata; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; +import org.apache.druid.indexing.seekablestream.supervisor.LagAggregator; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor; +import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIOConfig; +import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIngestionSpec; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; @@ -43,6 +52,7 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.MetadataSupervisorManager; import org.apache.druid.metadata.PendingSegmentRecord; +import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.server.metrics.SupervisorStatsProvider; import org.apache.druid.timeline.partition.NumberedShardSpec; @@ -59,6 +69,7 @@ import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; +import javax.annotation.Nullable; import java.lang.reflect.Field; import java.util.Collection; import java.util.Collections; @@ -1068,6 +1079,186 @@ public void test_isAnotherTaskGroupPublishingToPartitions() ); } + @Test + public void testResetToLatestAndBackfill() throws Exception + { + EasyMock.expect(metadataSupervisorManager.getLatest()).andReturn(ImmutableMap.of()); + replayAll(); + manager.start(); + + final ConcurrentHashMap> supervisorsMap = getSupervisorsMap(); + final SeekableStreamSupervisorSpec streamSpec = EasyMock.createNiceMock(SeekableStreamSupervisorSpec.class); + final SeekableStreamSupervisor streamSupervisor = EasyMock.createNiceMock(SeekableStreamSupervisor.class); + final SeekableStreamSupervisorIOConfig ioConfig = EasyMock.createNiceMock(SeekableStreamSupervisorIOConfig.class); + + // non-SeekableStream supervisor → IAE + // Use a concrete anonymous Supervisor (not a mock) to reliably fail instanceof SeekableStreamSupervisor + final Supervisor nonStreamSupervisor = new Supervisor() + { + @Override + public void start() + { + } + + @Override + public void stop(boolean stopGracefully) + { + } + + @Override + public SupervisorReport getStatus() + { + return null; + } + + @Override + public SupervisorStateManager.State getState() + { + return null; + } + + @Override + public void reset(DataSourceMetadata dataSourceMetadata) + { + } + }; + supervisorsMap.put("id1", Pair.of(nonStreamSupervisor, streamSpec)); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + + // useEarliestSequenceNumber=true → IAE + supervisorsMap.put("id1", Pair.of(streamSupervisor, streamSpec)); + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(true).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // useConcurrentLocks not set (null context) → IAE + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(null).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // useConcurrentLocks=false → IAE + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("useConcurrentLocks", false)).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // useConcurrentLocks="true" (string) → accepted, fails at next guard (not RUNNING) + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("useConcurrentLocks", "true")).once(); + EasyMock.expect(streamSupervisor.getState()).andReturn(SupervisorStateManager.BasicState.SUSPENDED).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // taskLockType=APPEND → accepted, fails at next guard (not RUNNING) + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("taskLockType", "APPEND")).once(); + EasyMock.expect(streamSupervisor.getState()).andReturn(SupervisorStateManager.BasicState.SUSPENDED).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // supervisor not RUNNING → IAE + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("useConcurrentLocks", true)).once(); + EasyMock.expect(streamSupervisor.getState()).andReturn(SupervisorStateManager.BasicState.SUSPENDED).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // empty latest offsets → ISE + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("useConcurrentLocks", true)).once(); + EasyMock.expect(streamSupervisor.getState()).andReturn(SupervisorStateManager.BasicState.RUNNING).once(); + streamSupervisor.updatePartitionLagFromStream(); + EasyMock.expectLastCall().once(); + EasyMock.expect(streamSupervisor.getLatestSequencesFromStream()).andReturn(ImmutableMap.of()).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalStateException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // empty start offsets from metadata → ISE + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("useConcurrentLocks", true)).once(); + EasyMock.expect(streamSupervisor.getState()).andReturn(SupervisorStateManager.BasicState.RUNNING).once(); + streamSupervisor.updatePartitionLagFromStream(); + EasyMock.expectLastCall().once(); + EasyMock.expect(streamSupervisor.getLatestSequencesFromStream()).andReturn(ImmutableMap.of("0", 100L)).once(); + EasyMock.expect(streamSupervisor.getOffsetsFromMetadataStorage()).andReturn(ImmutableMap.of()).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalStateException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + + verifyAll(); + } + + @Test + public void testCreateBackfillSpec() + { + final TestBackfillSupervisorSpec.IOConfig ioConfig = new TestBackfillSupervisorSpec.IOConfig("test-stream", null, null); + final TestBackfillSupervisorSpec.IngestionSpec ingestionSpec = new TestBackfillSupervisorSpec.IngestionSpec(ioConfig); + final SeekableStreamSupervisorSpec sourceSpec = new TestBackfillSupervisorSpec("original-id", ingestionSpec); + + final BoundedStreamConfig boundedStreamConfig = new BoundedStreamConfig( + ImmutableMap.of("0", 100L), + ImmutableMap.of("0", 200L) + ); + + // Without overriding taskCount + final SupervisorSpec backfillSpec = sourceSpec.createBackfillSpec("backfill-id", boundedStreamConfig, null); + Assert.assertEquals("backfill-id", backfillSpec.getId()); + final TestBackfillSupervisorSpec backfillCast = (TestBackfillSupervisorSpec) backfillSpec; + final BoundedStreamConfig actualConfig = backfillCast.getIoConfig().getBoundedStreamConfig(); + Assert.assertNotNull(actualConfig); + Assert.assertEquals(ImmutableMap.of("0", 100L), actualConfig.getStartSequenceNumbers()); + Assert.assertEquals(ImmutableMap.of("0", 200L), actualConfig.getEndSequenceNumbers()); + Assert.assertEquals(1, backfillCast.getIoConfig().getTaskCount()); + + // With overriding taskCount + final SupervisorSpec backfillSpecWithCount = sourceSpec.createBackfillSpec("backfill-id-2", boundedStreamConfig, 5); + Assert.assertEquals("backfill-id-2", backfillSpecWithCount.getId()); + final TestBackfillSupervisorSpec backfillWithCount = (TestBackfillSupervisorSpec) backfillSpecWithCount; + Assert.assertEquals(5, backfillWithCount.getIoConfig().getTaskCount()); + } + private static class TestSupervisorSpec implements SupervisorSpec { private final String id; @@ -1137,4 +1328,103 @@ public List getDataSources() return Collections.singletonList(id); } } + + @JsonTypeName("testBackfill") + private static class TestBackfillSupervisorSpec extends SeekableStreamSupervisorSpec + { + @JsonCreator + TestBackfillSupervisorSpec( + @JsonProperty("id") String id, + @JsonProperty("spec") IngestionSpec ingestionSpec + ) + { + super( + id, + ingestionSpec, + ImmutableMap.of("useConcurrentLocks", true), + false, + null, null, null, null, + MAPPER, + null, null, null, null + ); + } + + @Override + public Supervisor createSupervisor() + { + return null; + } + + @Override + public String getType() + { + return "testBackfill"; + } + + @Override + public String getSource() + { + return "test-stream"; + } + + @Override + protected SeekableStreamSupervisorSpec toggleSuspend(boolean suspend) + { + return this; + } + + @Override + public SeekableStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + return new TestBackfillSupervisorSpec( + backfillId, + new IngestionSpec(new IOConfig(getIoConfig().getStream(), taskCount, boundedStreamConfig)) + ); + } + + @Override + public SeekableStreamSupervisorIOConfig getIoConfig() + { + return getSpec().getIOConfig(); + } + + @JsonTypeName("testBackfillIngestionSpec") + static class IngestionSpec extends SeekableStreamSupervisorIngestionSpec + { + @JsonCreator + IngestionSpec( + @JsonProperty("ioConfig") IOConfig ioConfig + ) + { + super( + new DataSchema( + "testDS", + new TimestampSpec("time", "auto", null), + new DimensionsSpec(Collections.emptyList()), + null, null, null, null, null + ), + ioConfig, + null + ); + } + } + + @JsonTypeName("testBackfillIOConfig") + static class IOConfig extends SeekableStreamSupervisorIOConfig + { + @JsonCreator + IOConfig( + @JsonProperty("stream") String stream, + @JsonProperty("taskCount") Integer taskCount, + @JsonProperty("boundedStreamConfig") BoundedStreamConfig boundedStreamConfig + ) + { + super(stream, null, 1, taskCount, null, null, null, false, null, null, null, null, LagAggregator.DEFAULT, null, null, null, null, boundedStreamConfig); + } + } + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResourceTest.java index 4ccf4659994f..31e0d604a222 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResourceTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResourceTest.java @@ -34,6 +34,7 @@ import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskClientFactory; import org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers; import org.apache.druid.indexing.seekablestream.TestSeekableStreamDataSourceMetadata; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIOConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIngestionSpec; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; @@ -1379,6 +1380,100 @@ public void testResetOffsets() verifyAll(); } + @Test + public void testResetToLatestAndBackfill() + { + // 200 - success + EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)); + EasyMock.expect(supervisorManager.getSupervisorIds()).andReturn(ImmutableSet.of("my-id")); + EasyMock.expect(supervisorManager.resetToLatestAndBackfill("my-id", null)) + .andReturn(ImmutableMap.of("id", "my-id", "backfillSupervisorId", "my-id_backfill_abcdefgh")); + replayAll(); + + Response response = supervisorResource.resetToLatestAndBackfill("my-id", null); + Assert.assertEquals(200, response.getStatus()); + Assert.assertEquals( + ImmutableMap.of("id", "my-id", "backfillSupervisorId", "my-id_backfill_abcdefgh"), + response.getEntity() + ); + verifyAll(); + resetAll(); + + // 404 - supervisor does not exist + EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)); + EasyMock.expect(supervisorManager.getSupervisorIds()).andReturn(ImmutableSet.of()); + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", null); + Assert.assertEquals(404, response.getStatus()); + verifyAll(); + resetAll(); + + // 400 - IAE (e.g. supervisor not running) + EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)); + EasyMock.expect(supervisorManager.getSupervisorIds()).andReturn(ImmutableSet.of("my-id")); + EasyMock.expect(supervisorManager.resetToLatestAndBackfill("my-id", null)) + .andThrow(new IllegalArgumentException("Supervisor[my-id] must be in a RUNNING state")); + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", null); + Assert.assertEquals(400, response.getStatus()); + Assert.assertEquals( + ImmutableMap.of("error", "Supervisor[my-id] must be in a RUNNING state"), + response.getEntity() + ); + verifyAll(); + resetAll(); + + // 500 - ISE (e.g. failed to retrieve offsets) + EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)); + EasyMock.expect(supervisorManager.getSupervisorIds()).andReturn(ImmutableSet.of("my-id")); + EasyMock.expect(supervisorManager.resetToLatestAndBackfill("my-id", null)) + .andThrow(new IllegalStateException("Failed to get latest offsets from stream")); + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", null); + Assert.assertEquals(500, response.getStatus()); + Assert.assertEquals( + ImmutableMap.of("error", "Failed to get latest offsets from stream"), + response.getEntity() + ); + verifyAll(); + resetAll(); + + // 400 - invalid backfillTaskCount (zero) + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", 0); + Assert.assertEquals(400, response.getStatus()); + Assert.assertEquals( + ImmutableMap.of("error", "backfillTaskCount must be a positive integer"), + response.getEntity() + ); + verifyAll(); + resetAll(); + + // 400 - invalid backfillTaskCount (negative) + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", -1); + Assert.assertEquals(400, response.getStatus()); + Assert.assertEquals( + ImmutableMap.of("error", "backfillTaskCount must be a positive integer"), + response.getEntity() + ); + verifyAll(); + resetAll(); + + // 503 - no supervisor manager (not leader) + EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.absent()); + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", null); + Assert.assertEquals(503, response.getStatus()); + verifyAll(); + } + @Test public void testNoopSupervisorSpecSerde() throws Exception { @@ -1668,6 +1763,16 @@ protected SeekableStreamSupervisorSpec toggleSuspend(boolean suspend) return null; } + @Override + public SeekableStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + return null; + } + @JsonIgnore @Nonnull @Override diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunnerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunnerTest.java index 7f0e731d1d3c..01c22ed36d07 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunnerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunnerTest.java @@ -52,7 +52,7 @@ import org.apache.druid.segment.incremental.InputRowFilterResult; import org.apache.druid.segment.incremental.NoopRowIngestionMeters; import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; +import org.apache.druid.segment.realtime.ChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata; import org.apache.druid.segment.realtime.appenderator.StreamAppenderator; import org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver; @@ -364,7 +364,7 @@ private TaskToolbox createTaskToolbox() .authorizerMapper(AuthTestUtils.TEST_AUTHORIZER_MAPPER) .rowIngestionMetersFactory(NoopRowIngestionMeters::new) .indexMerger(testUtils.getIndexMergerV9Factory().create(true)) - .chatHandlerProvider(new NoopChatHandlerProvider()) + .chatHandlerProvider(new ChatHandlerProvider()) .dataNodeService(new DataNodeService(DruidServer.DEFAULT_TIER, 100L, null, ServerType.HISTORICAL, 1)) .lookupNodeService(new LookupNodeService(DruidServer.DEFAULT_TIER)) .appenderatorsManager(new TestAppenderatorsManager()) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java index 06b1a1c7cab1..ce48c0efda20 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java @@ -118,7 +118,7 @@ import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.HeapMemoryIndexingStateStorage; import org.apache.druid.segment.metadata.SegmentSchemaManager; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; +import org.apache.druid.segment.realtime.ChatHandlerProvider; import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.segment.realtime.appenderator.StreamAppenderator; import org.apache.druid.server.DruidNode; @@ -696,7 +696,7 @@ public void close() new SingleFileTaskReportFileWriter(reportsFile), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, - new NoopChatHandlerProvider(), + new ChatHandlerProvider(), testUtils.getRowIngestionMetersFactory(), new TestAppenderatorsManager(), new NoopOverlordClient(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/StreamChunkReaderTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/StreamChunkReaderTest.java index 7e414ad506b9..2b5bc49d1b8d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/StreamChunkReaderTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/StreamChunkReaderTest.java @@ -37,6 +37,10 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.JSONPathSpec; import org.apache.druid.java.util.common.parsers.ParseException; +import org.apache.druid.query.filter.AndDimFilter; +import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.segment.incremental.InputRowFilterResult; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.incremental.SimpleRowIngestionMeters; @@ -158,6 +162,127 @@ public void parseEmptyEndOfShard() throws IOException Assert.assertEquals(0, rowIngestionMeters.getThrownAway()); } + @Test + public void testTransformSpecFilterIncrementsCustomFilterReason() throws IOException + { + final JsonInputFormat inputFormat = new JsonInputFormat( + JSONPathSpec.DEFAULT, + Collections.emptyMap(), + null, + null, + null + ); + final TransformSpec transformSpec = new TransformSpec( + new AndDimFilter( + new SelectorDimFilter("column_a", "y", null), + new NotDimFilter(new SelectorDimFilter("column_b", "other", null)) + ), + null + ); + final StreamChunkReader chunkParser = new StreamChunkReader<>( + inputFormat, + new InputRowSchema(TimestampSpec.DEFAULT, DimensionsSpec.EMPTY, ColumnsFilter.all()), + transformSpec, + temporaryFolder.newFolder(), + InputRowFilter.allowAll(), + rowIngestionMeters, + parseExceptionHandler + ); + final List parsedRows = chunkParser.parse( + Arrays.asList( + new ByteEntity( + "{\"timestamp\": \"2020-01-01\", \"column_a\": \"y\", \"column_b\": \"other\"}" + .getBytes(StringUtils.UTF8_STRING) + ), + new ByteEntity( + "{\"timestamp\": \"2020-01-01\", \"column_a\": \"y\", \"column_b\": \"title1\"}" + .getBytes(StringUtils.UTF8_STRING) + ) + ), + false + ); + + Assert.assertEquals(1, parsedRows.size()); + Assert.assertEquals("title1", Iterables.getOnlyElement(parsedRows.get(0).getDimension("column_b"))); + Assert.assertEquals(1, rowIngestionMeters.getThrownAway()); + + final Map thrownAwayByReason = rowIngestionMeters.getThrownAwayByReason(); + Assert.assertEquals(Long.valueOf(1), thrownAwayByReason.get(InputRowFilterResult.CUSTOM_FILTER.getReason())); + Assert.assertFalse(thrownAwayByReason.containsKey(InputRowFilterResult.NULL_OR_EMPTY_RECORD.getReason())); + } + + @Test + public void testTransformSpecFilterPreservesOtherRejectionReasons() throws IOException + { + final JsonInputFormat inputFormat = new JsonInputFormat( + JSONPathSpec.DEFAULT, + Collections.emptyMap(), + null, + null, + null + ); + final TransformSpec transformSpec = new TransformSpec( + new AndDimFilter( + new SelectorDimFilter("column_a", "y", null), + new NotDimFilter(new SelectorDimFilter("column_b", "other", null)) + ), + null + ); + final InputRowFilter rowFilter = row -> { + if (row == null) { + return InputRowFilterResult.NULL_OR_EMPTY_RECORD; + } else if ("late".equals(row.getRaw("column_b"))) { + return InputRowFilterResult.BEFORE_MIN_MESSAGE_TIME; + } else if ("early".equals(row.getRaw("column_b"))) { + return InputRowFilterResult.AFTER_MAX_MESSAGE_TIME; + } + return InputRowFilterResult.ACCEPTED; + }; + final StreamChunkReader chunkParser = new StreamChunkReader<>( + inputFormat, + new InputRowSchema(TimestampSpec.DEFAULT, DimensionsSpec.EMPTY, ColumnsFilter.all()), + transformSpec, + temporaryFolder.newFolder(), + rowFilter, + rowIngestionMeters, + parseExceptionHandler + ); + + chunkParser.parse(ImmutableList.of(), false); + final List parsedRows = chunkParser.parse( + Arrays.asList( + new ByteEntity( + "{\"timestamp\": \"2020-01-01\", \"column_a\": \"y\", \"column_b\": \"other\"}" + .getBytes(StringUtils.UTF8_STRING) + ), + new ByteEntity( + "{\"timestamp\": \"2020-01-01\", \"column_a\": \"y\", \"column_b\": \"late\"}" + .getBytes(StringUtils.UTF8_STRING) + ), + new ByteEntity( + "{\"timestamp\": \"2020-01-01\", \"column_a\": \"y\", \"column_b\": \"early\"}" + .getBytes(StringUtils.UTF8_STRING) + ), + new ByteEntity( + "{\"timestamp\": \"2020-01-01\", \"column_a\": \"y\", \"column_b\": \"title1\"}" + .getBytes(StringUtils.UTF8_STRING) + ) + ), + false + ); + + Assert.assertEquals(1, parsedRows.size()); + Assert.assertEquals("title1", Iterables.getOnlyElement(parsedRows.get(0).getDimension("column_b"))); + Assert.assertEquals(4, rowIngestionMeters.getThrownAway()); + + final Map thrownAwayByReason = rowIngestionMeters.getThrownAwayByReason(); + Assert.assertEquals(Long.valueOf(1), thrownAwayByReason.get(InputRowFilterResult.NULL_OR_EMPTY_RECORD.getReason())); + Assert.assertEquals(Long.valueOf(1), thrownAwayByReason.get(InputRowFilterResult.BEFORE_MIN_MESSAGE_TIME.getReason())); + Assert.assertEquals(Long.valueOf(1), thrownAwayByReason.get(InputRowFilterResult.AFTER_MAX_MESSAGE_TIME.getReason())); + Assert.assertEquals(Long.valueOf(1), thrownAwayByReason.get(InputRowFilterResult.CUSTOM_FILTER.getReason())); + Assert.assertFalse(thrownAwayByReason.containsKey(InputRowFilterResult.UNKNOWN.getReason())); + } + @Test public void testParseMalformedDataWithAllowedParseExceptions_thenNoException() throws IOException { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpecTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpecTest.java index 8d1b5350e8ce..80120d07fdf5 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpecTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpecTest.java @@ -942,6 +942,16 @@ protected SeekableStreamSupervisorSpec toggleSuspend(boolean suspend) return null; } + @Override + public SeekableStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + return null; + } + @Override public String getType() { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java index d61049777c8f..9e45920ad719 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java @@ -3059,7 +3059,7 @@ public String toString() final TestSeekableStreamSupervisor supervisor = new TestSeekableStreamSupervisor() { @Override - protected SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( + public SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( String stream, Map map ) @@ -3284,7 +3284,7 @@ protected String baseTaskName() } @Override - protected void updatePartitionLagFromStream() + public void updatePartitionLagFromStream() { // do nothing } @@ -3381,7 +3381,7 @@ protected boolean doesTaskMatchSupervisor(Task task) } @Override - protected SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( + public SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( String stream, Map map ) @@ -3521,7 +3521,7 @@ public LagStats computeLagStats() } @Override - protected Map getLatestSequencesFromStream() + public Map getLatestSequencesFromStream() { return streamOffsets; } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorTestBase.java index 4eefaed9bd99..c96a64211b97 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorTestBase.java @@ -124,7 +124,7 @@ protected String baseTaskName() } @Override - protected void updatePartitionLagFromStream() + public void updatePartitionLagFromStream() { // do nothing } @@ -205,7 +205,7 @@ protected boolean doesTaskMatchSupervisor(Task task) } @Override - protected SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( + public SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( String stream, Map map ) @@ -436,6 +436,16 @@ protected SeekableStreamSupervisorSpec toggleSuspend(boolean suspend) { return null; } + + @Override + public SeekableStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + return null; + } } protected static SeekableStreamSupervisorTuningConfig getTuningConfig() diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java index 75cfaf68dc22..d77b83a75c22 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java @@ -54,7 +54,7 @@ import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; +import org.apache.druid.segment.realtime.ChatHandlerProvider; import org.apache.druid.server.coordination.ChangeRequestHistory; import org.apache.druid.server.coordination.ChangeRequestsSnapshot; import org.apache.druid.server.security.AuthTestUtils; @@ -174,7 +174,7 @@ private WorkerTaskManager createWorkerTaskManager(File baseDir, WorkerConfig wor new NoopTestTaskReportFileWriter(), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, - new NoopChatHandlerProvider(), + new ChatHandlerProvider(), testUtils.getRowIngestionMetersFactory(), new TestAppenderatorsManager(), overlordClient, @@ -192,18 +192,7 @@ private WorkerTaskManager createWorkerTaskManager(File baseDir, WorkerConfig wor taskConfig, workerConfig, overlordClient - ) - { - @Override - protected void taskStarted(String taskId) - { - } - - @Override - protected void taskAnnouncementChanged(TaskAnnouncement announcement) - { - } - }; + ); } @Before diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java deleted file mode 100644 index 7663d38ef3e9..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.worker; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.jsontype.NamedType; -import com.google.common.base.Joiner; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.CuratorFrameworkFactory; -import org.apache.curator.retry.ExponentialBackoffRetry; -import org.apache.curator.test.TestingCluster; -import org.apache.druid.client.coordinator.NoopCoordinatorClient; -import org.apache.druid.curator.PotentiallyGzippedCompressionProvider; -import org.apache.druid.curator.announcement.NodeAnnouncer; -import org.apache.druid.indexer.TaskState; -import org.apache.druid.indexing.common.IndexingServiceCondition; -import org.apache.druid.indexing.common.SegmentCacheManagerFactory; -import org.apache.druid.indexing.common.TaskToolboxFactory; -import org.apache.druid.indexing.common.TestIndexTask; -import org.apache.druid.indexing.common.TestTasks; -import org.apache.druid.indexing.common.TestUtils; -import org.apache.druid.indexing.common.actions.TaskActionClient; -import org.apache.druid.indexing.common.actions.TaskActionClientFactory; -import org.apache.druid.indexing.common.config.TaskConfig; -import org.apache.druid.indexing.common.config.TaskConfigBuilder; -import org.apache.druid.indexing.common.task.NoopTestTaskReportFileWriter; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.common.task.TestAppenderatorsManager; -import org.apache.druid.indexing.overlord.SingleTaskBackgroundRunner; -import org.apache.druid.indexing.overlord.TestRemoteTaskRunnerConfig; -import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.query.policy.NoopPolicyEnforcer; -import org.apache.druid.rpc.indexing.NoopOverlordClient; -import org.apache.druid.rpc.indexing.OverlordClient; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.IndexMergerV9Factory; -import org.apache.druid.segment.TestIndex; -import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; -import org.apache.druid.segment.join.NoopJoinableFactory; -import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; -import org.apache.druid.server.DruidNode; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ServerConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; -import org.apache.druid.server.metrics.NoopServiceEmitter; -import org.apache.druid.server.security.AuthTestUtils; -import org.apache.druid.utils.JvmUtils; -import org.easymock.EasyMock; -import org.joda.time.Period; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.List; - -/** - * - */ -public class WorkerTaskMonitorTest -{ - private static final Joiner JOINER = Joiner.on("/"); - private static final String BASE_PATH = "/test/druid"; - private static final String TASKS_PATH = StringUtils.format("%s/indexer/tasks/worker", BASE_PATH); - private static final String STATUS_PATH = StringUtils.format("%s/indexer/status/worker", BASE_PATH); - private static final DruidNode DUMMY_NODE = new DruidNode("dummy", "dummy", false, 9000, null, true, false); - - private TestingCluster testingCluster; - private CuratorFramework cf; - private WorkerCuratorCoordinator workerCuratorCoordinator; - private WorkerTaskMonitor workerTaskMonitor; - - private Task task; - - private Worker worker; - private final TestUtils testUtils; - private ObjectMapper jsonMapper; - private IndexMergerV9Factory indexMergerV9Factory; - private IndexIO indexIO; - - public WorkerTaskMonitorTest() - { - testUtils = new TestUtils(); - jsonMapper = testUtils.getTestObjectMapper(); - indexMergerV9Factory = testUtils.getIndexMergerV9Factory(); - indexIO = testUtils.getTestIndexIO(); - } - - @Before - public void setUp() throws Exception - { - testingCluster = new TestingCluster(1); - testingCluster.start(); - - cf = CuratorFrameworkFactory.builder() - .connectString(testingCluster.getConnectString()) - .retryPolicy(new ExponentialBackoffRetry(1, 10)) - .compressionProvider(new PotentiallyGzippedCompressionProvider(false)) - .build(); - cf.start(); - cf.blockUntilConnected(); - cf.create().creatingParentsIfNeeded().forPath(BASE_PATH); - - worker = new Worker( - "http", - "worker", - "localhost", - 3, - "0", - WorkerConfig.DEFAULT_CATEGORY - ); - - workerCuratorCoordinator = new WorkerCuratorCoordinator( - jsonMapper, - new IndexerZkConfig( - new ZkPathsConfig() - { - @Override - public String getBase() - { - return BASE_PATH; - } - }, null, null, null, null - ), - new TestRemoteTaskRunnerConfig(new Period("PT1S")), - cf, - new NodeAnnouncer(cf, Execs.directExecutor()), - worker - ); - workerCuratorCoordinator.start(); - - - // Start a task monitor - workerTaskMonitor = createTaskMonitor(); - TestTasks.registerSubtypes(jsonMapper); - jsonMapper.registerSubtypes(new NamedType(TestIndexTask.class, "test_index")); - workerTaskMonitor.start(); - - task = TestTasks.immediateSuccess("test"); - } - - private WorkerTaskMonitor createTaskMonitor() - { - final TaskConfig taskConfig = new TaskConfigBuilder() - .setBaseDir(FileUtils.createTempDir().toString()) - .build(); - - TaskActionClientFactory taskActionClientFactory = EasyMock.createNiceMock(TaskActionClientFactory.class); - TaskActionClient taskActionClient = EasyMock.createNiceMock(TaskActionClient.class); - EasyMock.expect(taskActionClientFactory.create(EasyMock.anyObject())).andReturn(taskActionClient).anyTimes(); - SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class); - EasyMock.replay(taskActionClientFactory, taskActionClient, notifierFactory); - return new WorkerTaskMonitor( - jsonMapper, - new SingleTaskBackgroundRunner( - new TaskToolboxFactory( - null, - taskConfig, - null, - taskActionClientFactory, - null, - NoopPolicyEnforcer.instance(), - null, - null, - null, - null, - null, - notifierFactory, - null, - null, - null, - NoopJoinableFactory.INSTANCE, - null, - new SegmentCacheManagerFactory(TestIndex.INDEX_IO, jsonMapper), - jsonMapper, - indexIO, - null, - null, - null, - indexMergerV9Factory, - null, - null, - null, - null, - null, - new NoopTestTaskReportFileWriter(), - null, - AuthTestUtils.TEST_AUTHORIZER_MAPPER, - new NoopChatHandlerProvider(), - testUtils.getRowIngestionMetersFactory(), - new TestAppenderatorsManager(), - new NoopOverlordClient(), - new NoopCoordinatorClient(), - null, - null, - null, - "1", - CentralizedDatasourceSchemaConfig.create(), - JvmUtils.getRuntimeInfo() - ), - taskConfig, - new NoopServiceEmitter(), - DUMMY_NODE, - new ServerConfig() - ), - taskConfig, - new WorkerConfig(), - cf, - workerCuratorCoordinator, - EasyMock.createNiceMock(OverlordClient.class) - ); - } - - @After - public void tearDown() throws Exception - { - workerCuratorCoordinator.stop(); - workerTaskMonitor.stop(); - cf.close(); - testingCluster.stop(); - } - - @Test(timeout = 60_000L) - public void testRunTask() throws Exception - { - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - return cf.checkExists().forPath(JOINER.join(TASKS_PATH, task.getId())) == null; - } - catch (Exception e) { - return false; - } - } - } - ) - ); - - cf.create() - .creatingParentsIfNeeded() - .forPath(JOINER.join(TASKS_PATH, task.getId()), jsonMapper.writeValueAsBytes(task)); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - final byte[] bytes = cf.getData().forPath(JOINER.join(STATUS_PATH, task.getId())); - final TaskAnnouncement announcement = jsonMapper.readValue( - bytes, - TaskAnnouncement.class - ); - return announcement.getTaskStatus().isComplete(); - } - catch (Exception e) { - return false; - } - } - } - ) - ); - - TaskAnnouncement taskAnnouncement = jsonMapper.readValue( - cf.getData().forPath(JOINER.join(STATUS_PATH, task.getId())), TaskAnnouncement.class - ); - - Assert.assertEquals(task.getId(), taskAnnouncement.getTaskStatus().getId()); - Assert.assertEquals(TaskState.SUCCESS, taskAnnouncement.getTaskStatus().getStatusCode()); - } - - @Test(timeout = 60_000L) - public void testGetAnnouncements() throws Exception - { - cf.create() - .creatingParentsIfNeeded() - .forPath(JOINER.join(TASKS_PATH, task.getId()), jsonMapper.writeValueAsBytes(task)); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - final byte[] bytes = cf.getData().forPath(JOINER.join(STATUS_PATH, task.getId())); - final TaskAnnouncement announcement = jsonMapper.readValue( - bytes, - TaskAnnouncement.class - ); - return announcement.getTaskStatus().isComplete(); - } - catch (Exception e) { - return false; - } - } - } - ) - ); - - List announcements = workerCuratorCoordinator.getAnnouncements(); - Assert.assertEquals(1, announcements.size()); - Assert.assertEquals(task.getId(), announcements.get(0).getTaskStatus().getId()); - Assert.assertEquals(TaskState.SUCCESS, announcements.get(0).getTaskStatus().getStatusCode()); - Assert.assertEquals(DUMMY_NODE.getHost(), announcements.get(0).getTaskLocation().getHost()); - Assert.assertEquals(DUMMY_NODE.getPlaintextPort(), announcements.get(0).getTaskLocation().getPort()); - } - - @Test(timeout = 60_000L) - public void testRestartCleansOldStatus() throws Exception - { - task = TestTasks.unending("test"); - - cf.create() - .creatingParentsIfNeeded() - .forPath(JOINER.join(TASKS_PATH, task.getId()), jsonMapper.writeValueAsBytes(task)); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - return cf.checkExists().forPath(JOINER.join(STATUS_PATH, task.getId())) != null; - } - catch (Exception e) { - return false; - } - } - } - ) - ); - // simulate node restart - workerTaskMonitor.stop(); - workerTaskMonitor = createTaskMonitor(); - workerTaskMonitor.start(); - List announcements = workerCuratorCoordinator.getAnnouncements(); - Assert.assertEquals(1, announcements.size()); - Assert.assertEquals(task.getId(), announcements.get(0).getTaskStatus().getId()); - Assert.assertEquals(TaskState.FAILED, announcements.get(0).getTaskStatus().getStatusCode()); - Assert.assertEquals( - "Canceled as unknown task. See middleManager or indexer logs for more details.", - announcements.get(0).getTaskStatus().getErrorMsg() - ); - } - - @Test(timeout = 60_000L) - public void testStatusAnnouncementsArePersistent() throws Exception - { - cf.create() - .creatingParentsIfNeeded() - .forPath(JOINER.join(TASKS_PATH, task.getId()), jsonMapper.writeValueAsBytes(task)); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - return cf.checkExists().forPath(JOINER.join(STATUS_PATH, task.getId())) != null; - } - catch (Exception e) { - return false; - } - } - } - ) - ); - // ephemeral owner is 0 is created node is PERSISTENT - Assert.assertEquals(0, cf.checkExists().forPath(JOINER.join(STATUS_PATH, task.getId())).getEphemeralOwner()); - - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/http/WorkerResourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/http/WorkerResourceTest.java index 096688e5eb35..3421f4645b9f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/http/WorkerResourceTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/http/WorkerResourceTest.java @@ -19,26 +19,11 @@ package org.apache.druid.indexing.worker.http; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.CuratorFrameworkFactory; -import org.apache.curator.retry.ExponentialBackoffRetry; -import org.apache.curator.test.TestingCluster; -import org.apache.druid.curator.PotentiallyGzippedCompressionProvider; -import org.apache.druid.curator.ZkEnablementConfig; -import org.apache.druid.curator.announcement.NodeAnnouncer; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.TaskRunner; import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.indexing.worker.WorkerCuratorCoordinator; -import org.apache.druid.indexing.worker.WorkerTaskMonitor; +import org.apache.druid.indexing.worker.WorkerTaskManager; import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; import org.easymock.EasyMock; -import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -49,33 +34,13 @@ */ public class WorkerResourceTest { - private static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper(); - private static final String BASE_PATH = "/test/druid"; - private static final String ANNOUNCEMENT_PATH = StringUtils.format("%s/indexer/announcements/host", BASE_PATH); - - private TestingCluster testingCluster; - private CuratorFramework cf; - private Worker worker; - - private WorkerCuratorCoordinator curatorCoordinator; + private WorkerTaskManager workerTaskManager; private WorkerResource workerResource; @Before - public void setUp() throws Exception + public void setUp() { - testingCluster = new TestingCluster(1); - testingCluster.start(); - - cf = CuratorFrameworkFactory.builder() - .connectString(testingCluster.getConnectString()) - .retryPolicy(new ExponentialBackoffRetry(1, 10)) - .compressionProvider(new PotentiallyGzippedCompressionProvider(false)) - .build(); - cf.start(); - cf.blockUntilConnected(); - cf.create().creatingParentsIfNeeded().forPath(BASE_PATH); - worker = new Worker( "http", "host", @@ -84,71 +49,49 @@ public void setUp() throws Exception "v1", WorkerConfig.DEFAULT_CATEGORY ); - - curatorCoordinator = new WorkerCuratorCoordinator( - JSON_MAPPER, - new IndexerZkConfig(new ZkPathsConfig() - { - @Override - public String getBase() - { - return BASE_PATH; - } - }, null, null, null, null), - new RemoteTaskRunnerConfig(), - cf, - new NodeAnnouncer(cf, Execs.directExecutor()), - worker - ); - curatorCoordinator.start(); - + workerTaskManager = EasyMock.createMock(WorkerTaskManager.class); workerResource = new WorkerResource( worker, - () -> curatorCoordinator, - null, - EasyMock.createNiceMock(WorkerTaskMonitor.class), - ZkEnablementConfig.ENABLED + EasyMock.createNiceMock(TaskRunner.class), + workerTaskManager ); } - @After - public void tearDown() throws Exception + @Test + public void testDoDisable() { - curatorCoordinator.stop(); - cf.close(); - testingCluster.close(); + workerTaskManager.workerDisabled(); + EasyMock.expectLastCall(); + EasyMock.replay(workerTaskManager); + + Response res = workerResource.doDisable(); + Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus()); + + EasyMock.verify(workerTaskManager); } @Test - public void testDoDisable() throws Exception + public void testDoEnable() { - Worker theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class); - Assert.assertEquals("v1", theWorker.getVersion()); - Assert.assertFalse(theWorker.isDisabled()); + workerTaskManager.workerEnabled(); + EasyMock.expectLastCall(); + EasyMock.replay(workerTaskManager); - Response res = workerResource.doDisable(); + Response res = workerResource.doEnable(); Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus()); - theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class); - Assert.assertTrue(theWorker.getVersion().isEmpty()); - Assert.assertTrue(theWorker.isDisabled()); + EasyMock.verify(workerTaskManager); } @Test - public void testDoEnable() throws Exception + public void testIsEnabled() { - // Disable the worker - Response res = workerResource.doDisable(); - Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus()); - Worker theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class); - Assert.assertTrue(theWorker.getVersion().isEmpty()); - Assert.assertTrue(theWorker.isDisabled()); + EasyMock.expect(workerTaskManager.isWorkerEnabled()).andReturn(true); + EasyMock.replay(workerTaskManager); - // Enable the worker - res = workerResource.doEnable(); + Response res = workerResource.isEnabled(); Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus()); - theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class); - Assert.assertEquals("v1", theWorker.getVersion()); - Assert.assertFalse(theWorker.isDisabled()); + + EasyMock.verify(workerTaskManager); } } diff --git a/indexing-service/src/test/java/org/apache/druid/server/initialization/IndexerZkConfigTest.java b/indexing-service/src/test/java/org/apache/druid/server/initialization/IndexerZkConfigTest.java deleted file mode 100644 index 92ddb1b63caa..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/server/initialization/IndexerZkConfigTest.java +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.server.initialization; - -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.collect.ImmutableList; -import com.google.inject.Binder; -import com.google.inject.Injector; -import com.google.inject.Module; -import com.google.inject.name.Names; -import org.apache.druid.guice.GuiceInjectors; -import org.apache.druid.guice.JsonConfigProvider; -import org.apache.druid.guice.JsonConfigurator; -import org.apache.druid.initialization.Initialization; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.jackson.JacksonUtils; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.Collection; -import java.util.HashSet; -import java.util.Map; -import java.util.Properties; -import java.util.UUID; - -/** - * - */ -public class IndexerZkConfigTest -{ - private static final String INDEXER_PROPERTY_STRING = "test.druid.zk.paths.indexer"; - private static final String ZK_SERVICE_CONFIG_STRING = "test.druid.zk.paths"; - private static final Collection CLOBBERABLE_PROPERTIES = new HashSet<>(); - - private static final Module SIMPLE_ZK_CONFIG_MODULE = new Module() - { - @Override - public void configure(Binder binder) - { - binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/test"); - binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0); - binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1); - // See IndexingServiceModuleHelper - JsonConfigProvider.bind(binder, INDEXER_PROPERTY_STRING, IndexerZkConfig.class); - JsonConfigProvider.bind(binder, ZK_SERVICE_CONFIG_STRING, ZkPathsConfig.class); - } - }; - - @BeforeClass - public static void setup() - { - for (Field field : IndexerZkConfig.class.getDeclaredFields()) { - if (null != field.getAnnotation(JsonProperty.class)) { - CLOBBERABLE_PROPERTIES.add(StringUtils.format("%s.%s", INDEXER_PROPERTY_STRING, field.getName())); - } - } - for (Field field : ZkPathsConfig.class.getDeclaredFields()) { - if (null != field.getAnnotation(JsonProperty.class)) { - CLOBBERABLE_PROPERTIES.add(StringUtils.format("%s.%s", ZK_SERVICE_CONFIG_STRING, field.getName())); - } - } - } - - private Properties propertyValues = new Properties(); - private int assertions = 0; - - @Before - public void setupTest() - { - for (String property : CLOBBERABLE_PROPERTIES) { - propertyValues.put(property, UUID.randomUUID().toString()); - } - assertions = 0; - } - - - private void validateEntries(ZkPathsConfig zkPathsConfig) - throws IllegalAccessException, NoSuchMethodException, InvocationTargetException - { - for (Field field : ZkPathsConfig.class.getDeclaredFields()) { - if (null != field.getAnnotation(JsonProperty.class)) { - String property = StringUtils.format("%s.%s", ZK_SERVICE_CONFIG_STRING, field.getName()); - String getter = StringUtils.format( - "get%s%s", - StringUtils.toUpperCase(field.getName().substring(0, 1)), - field.getName().substring(1) - ); - Method method = ZkPathsConfig.class.getDeclaredMethod(getter); - Assert.assertEquals(propertyValues.getProperty(property), method.invoke(zkPathsConfig)); - ++assertions; - } - } - } - - private void validateEntries(IndexerZkConfig indexerZkConfig) - throws IllegalAccessException, NoSuchMethodException, InvocationTargetException - { - for (Field field : IndexerZkConfig.class.getDeclaredFields()) { - if (null != field.getAnnotation(JsonProperty.class)) { - String property = StringUtils.format("%s.%s", INDEXER_PROPERTY_STRING, field.getName()); - String getter = StringUtils.format( - "get%s%s", - StringUtils.toUpperCase(field.getName().substring(0, 1)), - field.getName().substring(1) - ); - Method method = IndexerZkConfig.class.getDeclaredMethod(getter); - Assert.assertEquals(propertyValues.getProperty(property), method.invoke(indexerZkConfig)); - ++assertions; - } - } - } - - @Test - public void testNullConfig() - { - propertyValues.clear(); - - final Injector injector = Initialization.makeInjectorWithModules( - GuiceInjectors.makeStartupInjector(), - ImmutableList.of(SIMPLE_ZK_CONFIG_MODULE) - ); - JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get(); - - JsonConfigProvider zkPathsConfig = JsonConfigProvider.of(ZK_SERVICE_CONFIG_STRING, ZkPathsConfig.class); - zkPathsConfig.inject(propertyValues, configurator); - - JsonConfigProvider indexerZkConfig = JsonConfigProvider.of( - INDEXER_PROPERTY_STRING, - IndexerZkConfig.class - ); - indexerZkConfig.inject(propertyValues, configurator); - - Assert.assertEquals("/druid/indexer/tasks", indexerZkConfig.get().getTasksPath()); - } - - @Test - public void testSimpleConfig() throws IllegalAccessException, NoSuchMethodException, InvocationTargetException - { - final Injector injector = Initialization.makeInjectorWithModules( - GuiceInjectors.makeStartupInjector(), - ImmutableList.of(SIMPLE_ZK_CONFIG_MODULE) - ); - JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get(); - - JsonConfigProvider zkPathsConfig = JsonConfigProvider.of(ZK_SERVICE_CONFIG_STRING, ZkPathsConfig.class); - zkPathsConfig.inject(propertyValues, configurator); - - JsonConfigProvider indexerZkConfig = JsonConfigProvider.of( - INDEXER_PROPERTY_STRING, - IndexerZkConfig.class - ); - indexerZkConfig.inject(propertyValues, configurator); - - - IndexerZkConfig zkConfig = indexerZkConfig.get(); - ZkPathsConfig zkPathsConfig1 = zkPathsConfig.get(); - - validateEntries(zkConfig); - validateEntries(zkPathsConfig1); - Assert.assertEquals(CLOBBERABLE_PROPERTIES.size(), assertions); - } - - - - @Test - public void testIndexerBaseOverride() - { - final String overrideValue = "/foo/bar/baz"; - final String indexerPropertyKey = INDEXER_PROPERTY_STRING + ".base"; - final String priorValue = System.getProperty(indexerPropertyKey); - System.setProperty(indexerPropertyKey, overrideValue); // Set it here so that the binding picks it up - final Injector injector = Initialization.makeInjectorWithModules( - GuiceInjectors.makeStartupInjector(), - ImmutableList.of(SIMPLE_ZK_CONFIG_MODULE) - ); - propertyValues.clear(); - propertyValues.setProperty(indexerPropertyKey, overrideValue); // Have to set it here as well annoyingly enough - - - JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get(); - - JsonConfigProvider indexerPathsConfig = JsonConfigProvider.of( - INDEXER_PROPERTY_STRING, - IndexerZkConfig.class - ); - indexerPathsConfig.inject(propertyValues, configurator); - IndexerZkConfig indexerZkConfig = indexerPathsConfig.get(); - - - // Rewind value before we potentially fail - if (priorValue == null) { - System.clearProperty(indexerPropertyKey); - } else { - System.setProperty(indexerPropertyKey, priorValue); - } - - Assert.assertEquals(overrideValue, indexerZkConfig.getBase()); - Assert.assertEquals(overrideValue + "/announcements", indexerZkConfig.getAnnouncementsPath()); - } - - @Test - public void testExactConfig() - { - final Injector injector = Initialization.makeInjectorWithModules( - GuiceInjectors.makeStartupInjector(), - ImmutableList.of(SIMPLE_ZK_CONFIG_MODULE) - ); - propertyValues.setProperty(ZK_SERVICE_CONFIG_STRING + ".base", "/druid/metrics"); - - - JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get(); - - JsonConfigProvider zkPathsConfig = JsonConfigProvider.of( - ZK_SERVICE_CONFIG_STRING, - ZkPathsConfig.class - ); - - zkPathsConfig.inject(propertyValues, configurator); - - ZkPathsConfig zkPathsConfig1 = zkPathsConfig.get(); - - IndexerZkConfig indexerZkConfig = new IndexerZkConfig(zkPathsConfig1, null, null, null, null); - - Assert.assertEquals("/druid/metrics/indexer", indexerZkConfig.getBase()); - Assert.assertEquals("/druid/metrics/indexer/announcements", indexerZkConfig.getAnnouncementsPath()); - } - - @Test - public void testFullOverride() throws Exception - { - final DefaultObjectMapper mapper = new DefaultObjectMapper(); - final ZkPathsConfig zkPathsConfig = new ZkPathsConfig(); - - IndexerZkConfig indexerZkConfig = new IndexerZkConfig( - zkPathsConfig, - "/druid/prod", - "/druid/prod/a", - "/druid/prod/t", - "/druid/prod/s" - ); - - Map value = mapper.readValue( - mapper.writeValueAsString(indexerZkConfig), JacksonUtils.TYPE_REFERENCE_MAP_STRING_STRING - ); - IndexerZkConfig newConfig = new IndexerZkConfig( - zkPathsConfig, - value.get("base"), - value.get("announcementsPath"), - value.get("tasksPath"), - value.get("statusPath") - ); - - Assert.assertEquals(indexerZkConfig, newConfig); - } -} diff --git a/licenses.yaml b/licenses.yaml index 23f58a0d3b3d..5712ec2a9247 100644 --- a/licenses.yaml +++ b/licenses.yaml @@ -321,7 +321,7 @@ name: Jackson license_category: binary module: java-core license_name: Apache License version 2.0 -version: 2.20.2 +version: 2.21.3 libraries: - com.fasterxml.jackson.core: jackson-core - com.fasterxml.jackson.core: jackson-annotations @@ -364,7 +364,7 @@ name: Jackson license_category: binary module: java-core license_name: Apache License version 2.0 -version: "2.20" +version: "2.21" libraries: - com.fasterxml.jackson.core: jackson-annotations @@ -374,7 +374,7 @@ name: Jackson license_category: binary module: extensions-contrib/druid-deltalake-extensions license_name: Apache License version 2.0 -version: 2.20.2 +version: 2.21.3 libraries: - com.fasterxml.jackson.core: jackson-databind notice: | @@ -424,7 +424,7 @@ name: Caffeine license_category: binary module: java-core license_name: Apache License version 2.0 -version: 2.9.3 +version: 3.2.4 libraries: - com.github.ben-manes.caffeine: caffeine @@ -434,7 +434,7 @@ name: Error Prone Annotations license_category: binary module: java-core license_name: Apache License version 2.0 -version: 2.41.0 +version: 2.49.0 libraries: - com.google.errorprone: error_prone_annotations @@ -1002,7 +1002,7 @@ name: Jackson license_category: binary module: extensions-core/kubernetes-overlord-extensions license_name: Apache License version 2.0 -version: 2.20.2 +version: 2.21.3 libraries: - com.fasterxml.jackson.dataformat: jackson-dataformat-properties notice: | @@ -1107,6 +1107,16 @@ libraries: --- +name: org.jspecify jspecify +license_category: binary +module: extensions-core/kubernetes-extensions +license_name: Apache License version 2.0 +version: 1.0.0 +libraries: + - org.jspecify: jspecify + +--- + name: io.gsonfire gson-fire license_category: binary module: extensions-core/kubernetes-extensions @@ -1890,7 +1900,6 @@ libraries: - org.apache.curator: curator-client - org.apache.curator: curator-framework - org.apache.curator: curator-recipes - - org.apache.curator: curator-x-discovery notices: - curator-client: | Curator Client @@ -1901,9 +1910,6 @@ notices: - curator-recipes: | Curator Recipes Copyright 2011-2022 The Apache Software Foundation - - curator-x-discovery: | - Curator Service Discovery - Copyright 2011-2022 The Apache Software Foundation --- @@ -3069,7 +3075,7 @@ libraries: --- name: Jackson Dataformat Yaml -version: 2.20.2 +version: 2.21.3 license_category: binary module: extensions/druid-avro-extensions license_name: Apache License version 2.0 diff --git a/multi-stage-query/README.md b/multi-stage-query/README.md index 5b00da45f0b0..11fd12d11f22 100644 --- a/multi-stage-query/README.md +++ b/multi-stage-query/README.md @@ -17,9 +17,9 @@ ~ under the License. --> -# `druid-multi-stage-query` developer notes +# Multi-stage query developer notes -This document provides developer notes for the major packages of the `druid-multi-stage-query` extension. It does not +This document provides developer notes for the major packages of the multi-stage query module. It does not discuss future plans; these are discussed on the list or in GitHub issues. ## Model diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/counters/ChannelCounters.java b/multi-stage-query/src/main/java/org/apache/druid/msq/counters/ChannelCounters.java index 975cd50aaf0a..2df4ea1b5d61 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/counters/ChannelCounters.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/counters/ChannelCounters.java @@ -40,7 +40,7 @@ */ public class ChannelCounters implements QueryCounter { - private static final int NO_PARTITION = 0; + public static final int NO_PARTITION = 0; @GuardedBy("this") private final LongList rows = new LongArrayList(); @@ -162,7 +162,16 @@ public ChannelCounters addTotalQueries(final long n) } } - private void add( + /** + * Most-generic "add" method. + * + * @param partitionNumber add counters to this partition + * @param nRows add this number of rows + * @param nBytes add this number of bytes + * @param nFrames add this number of frames + * @param nFiles add this number of files + */ + public void add( final int partitionNumber, final long nRows, final long nBytes, diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartQueryMaker.java b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartQueryMaker.java index d6f62f859f79..628653c103df 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartQueryMaker.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartQueryMaker.java @@ -279,7 +279,7 @@ private Sequence runWithReport( return Sequences.simple(List.of(new Object[]{reportMap})); } catch (InterruptedException e) { - controllerHolder.cancel(CancellationReason.UNKNOWN); + controllerHolder.cancel(CancellationReason.UNKNOWN, null); Thread.currentThread().interrupt(); throw new RuntimeException(e); } @@ -321,8 +321,11 @@ private Sequence runWithSequence( @Override public void after(final boolean isDone, final Throwable thrown) { - if (!isDone || thrown != null) { - controllerHolder.cancel(CancellationReason.UNKNOWN); + if (thrown != null || !isDone) { + // If an exception is set (thrown != null), retain it. Otherwise, if we're in here, !isDone indicates + // the caller went away before reading the entire stream of results. The best we can do in that case + // is a generic UNKNOWN cancellation reason. + controllerHolder.cancel(CancellationReason.UNKNOWN, thrown); } } } diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartSqlEngine.java b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartSqlEngine.java index a66a72ec7151..c62c57b28801 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartSqlEngine.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartSqlEngine.java @@ -379,7 +379,7 @@ public void cancelQuery(PlannerContext plannerContext, QueryScheduler querySched if (dartQueryId instanceof String) { final ControllerHolder holder = controllerRegistry.getController((String) dartQueryId); if (holder != null) { - holder.cancel(CancellationReason.USER_REQUEST); + holder.cancel(CancellationReason.USER_REQUEST, null); } } else { log.warn( diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartFrameContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartFrameContext.java index fca50d51efe0..2c679e9f348b 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartFrameContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartFrameContext.java @@ -40,6 +40,7 @@ import org.apache.druid.segment.incremental.NoopRowIngestionMeters; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.server.SegmentManager; import javax.annotation.Nullable; @@ -54,6 +55,7 @@ public class DartFrameContext implements FrameContext private final FrameWriterSpec frameWriterSpec; private final SegmentWrangler segmentWrangler; private final SegmentManager segmentManager; + private final VirtualStorageManager virtualStorageManager; private final CoordinatorClient coordinatorClient; private final WorkerContext workerContext; @@ -78,6 +80,7 @@ public DartFrameContext( final FrameWriterSpec frameWriterSpec, final SegmentWrangler segmentWrangler, final SegmentManager segmentManager, + final VirtualStorageManager virtualStorageManager, final CoordinatorClient coordinatorClient, @Nullable final ProcessingBuffersSet processingBuffersSet, final WorkerMemoryParameters memoryParameters, @@ -89,6 +92,7 @@ public DartFrameContext( this.segmentWrangler = segmentWrangler; this.frameWriterSpec = frameWriterSpec; this.segmentManager = segmentManager; + this.virtualStorageManager = virtualStorageManager; this.coordinatorClient = coordinatorClient; this.workerContext = workerContext; this.processingBuffersSet = processingBuffersSet; @@ -121,6 +125,12 @@ public SegmentManager segmentManager() return segmentManager; } + @Override + public VirtualStorageManager virtualStorageManager() + { + return virtualStorageManager; + } + @Override public CoordinatorClient coordinatorClient() { diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java index 39888b757177..e43d79d29f74 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java @@ -51,6 +51,7 @@ import org.apache.druid.query.QueryContexts; import org.apache.druid.query.policy.PolicyEnforcer; import org.apache.druid.segment.SegmentWrangler; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.server.DruidNode; import org.apache.druid.server.SegmentManager; import org.apache.druid.utils.CloseableUtils; @@ -81,6 +82,7 @@ public class DartWorkerContext implements WorkerContext private final DartWorkerClient workerClient; private final SegmentWrangler segmentWrangler; private final SegmentManager segmentManager; + private final VirtualStorageManager virtualStorageManager; private final CoordinatorClient coordinatorClient; private final MemoryIntrospector memoryIntrospector; private final ProcessingBuffersProvider processingBuffersProvider; @@ -109,6 +111,7 @@ public class DartWorkerContext implements WorkerContext final DruidProcessingConfig processingConfig, final SegmentWrangler segmentWrangler, final SegmentManager segmentManager, + final VirtualStorageManager virtualStorageManager, final CoordinatorClient coordinatorClient, final MemoryIntrospector memoryIntrospector, final ProcessingBuffersProvider processingBuffersProvider, @@ -131,6 +134,7 @@ public class DartWorkerContext implements WorkerContext this.workerClient = workerClient; this.segmentWrangler = segmentWrangler; this.segmentManager = segmentManager; + this.virtualStorageManager = virtualStorageManager; this.coordinatorClient = coordinatorClient; this.memoryIntrospector = memoryIntrospector; this.processingBuffersProvider = processingBuffersProvider; @@ -258,6 +262,7 @@ public FrameContext frameContext(WorkOrder workOrder) FrameWriterSpec.fromContext(workOrder.getWorkerContext()), segmentWrangler, segmentManager, + virtualStorageManager, coordinatorClient, workOrder.getStageDefinition().getProcessor().usesProcessingBuffers() ? processingBuffersSet.get() : null, memoryParameters, diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContextFactoryImpl.java b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContextFactoryImpl.java index dc74cea3513f..16e8e187b983 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContextFactoryImpl.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContextFactoryImpl.java @@ -37,10 +37,10 @@ import org.apache.druid.msq.input.InputSliceReaderProvider; import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.QueryContext; -import org.apache.druid.query.groupby.GroupingEngine; import org.apache.druid.query.policy.PolicyEnforcer; import org.apache.druid.rpc.ServiceClientFactory; import org.apache.druid.segment.SegmentWrangler; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.server.DruidNode; import org.apache.druid.server.SegmentManager; @@ -61,8 +61,8 @@ public class DartWorkerContextFactoryImpl implements DartWorkerContextFactory private final ServiceClientFactory serviceClientFactory; private final DruidProcessingConfig processingConfig; private final SegmentWrangler segmentWrangler; - private final GroupingEngine groupingEngine; private final SegmentManager segmentManager; + private final VirtualStorageManager virtualStorageManager; private final CoordinatorClient coordinatorClient; private final MemoryIntrospector memoryIntrospector; private final ProcessingBuffersProvider processingBuffersProvider; @@ -81,8 +81,8 @@ public DartWorkerContextFactoryImpl( @EscalatedGlobal ServiceClientFactory serviceClientFactory, DruidProcessingConfig processingConfig, SegmentWrangler segmentWrangler, - GroupingEngine groupingEngine, SegmentManager segmentManager, + VirtualStorageManager virtualStorageManager, CoordinatorClient coordinatorClient, MemoryIntrospector memoryIntrospector, @Dart ProcessingBuffersProvider processingBuffersProvider, @@ -101,8 +101,8 @@ public DartWorkerContextFactoryImpl( this.processingConfig = processingConfig; this.segmentWrangler = segmentWrangler; this.coordinatorClient = coordinatorClient; - this.groupingEngine = groupingEngine; this.segmentManager = segmentManager; + this.virtualStorageManager = virtualStorageManager; this.memoryIntrospector = memoryIntrospector; this.processingBuffersProvider = processingBuffersProvider; this.outbox = outbox; @@ -130,6 +130,7 @@ public WorkerContext build( processingConfig, segmentWrangler, segmentManager, + virtualStorageManager, coordinatorClient, memoryIntrospector, processingBuffersProvider, diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/Controller.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/Controller.java index 1370a89da23c..75b47e7e262b 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/Controller.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/Controller.java @@ -60,7 +60,7 @@ public interface Controller * Terminate the controller upon a cancellation request. Causes a concurrently-running {@link #run} method in * a separate thread to cancel all outstanding work and exit. */ - void stop(CancellationReason reason); + void stop(CancellationReason reason, @Nullable Throwable cause); // Worker-to-controller messages diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerHolder.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerHolder.java index 4caf79d2866a..5e0af589a86a 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerHolder.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerHolder.java @@ -238,7 +238,7 @@ public ListenableFuture runAsync( /** * Places this holder into {@link State#CANCELED} and stops the controller. */ - public void cancel(final CancellationReason reason) + public void cancel(final CancellationReason reason, @Nullable final Throwable cause) { final State prevState; synchronized (this) { @@ -251,7 +251,7 @@ public void cancel(final CancellationReason reason) } if (prevState == State.RUNNING) { - controller.stop(reason); + controller.stop(reason, cause); // Interrupt the controller thread as a failsafe, in case the controller is blocked on something. synchronized (this) { @@ -299,12 +299,12 @@ private ScheduledFuture scheduleTimeout(final ScheduledExecutorService schedu if (delayMs <= 0) { // Deadline has already passed. Cancel immediately rather than scheduling, so the cancellation // takes effect even when using a direct executor for the controller thread. - cancel(CancellationReason.QUERY_TIMEOUT); + cancel(CancellationReason.QUERY_TIMEOUT, null); return null; } return scheduledExec.schedule( - () -> cancel(CancellationReason.QUERY_TIMEOUT), + () -> cancel(CancellationReason.QUERY_TIMEOUT, null), delayMs, TimeUnit.MILLISECONDS ); diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index db9b3cbc9fbf..868e3ad82fa8 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -252,12 +252,20 @@ public class ControllerImpl implements Controller private final AtomicReference workerErrorRef = new AtomicReference<>(); /** - * Set by {@link #stop(CancellationReason)}. If non-null, this reason takes priority over any exception - * encountered during execution when building the error report. If we didn't do this, interrupts arising - * from cancellation could produce errors that are less informative than the actual cancellation reason. + * Set by {@link #stop(CancellationReason, Throwable)}. If non-null, this reason takes priority over any + * exception encountered during execution when building the error report. If we didn't do this, interrupts + * arising from cancellation could produce errors that are less informative than the actual cancellation reason. */ private volatile CancellationReason cancelReason; + /** + * Set by {@link #stop(CancellationReason, Throwable)} when cancellation was triggered by an external error + * (such as a failure writing results back to the client). If non-null, this exception is logged and included + * in the error report instead of a bare {@link CanceledFault}, so the original error is not lost. + */ + @Nullable + private volatile Throwable cancelException; + // For system warning reporting private final ConcurrentLinkedQueue workerWarnings = new ConcurrentLinkedQueue<>(); @@ -375,14 +383,20 @@ private MSQTaskReportPayload makeStatusReportForException(Throwable e) } @Override - public void stop(CancellationReason reason) + public void stop(CancellationReason reason, @Nullable Throwable cause) { final QueryDefinition queryDef = queryDefRef.get(); // stopGracefully() is called when the containing process is terminated, or when the task is canceled. - log.info("Query [%s] canceled.", queryDef != null ? queryDef.getQueryId() : ""); + final String queryIdForLog = queryDef != null ? queryDef.getQueryId() : ""; + if (cause != null) { + log.warn(cause, "Query[%s] canceled, reason[%s].", queryIdForLog, reason); + } else { + log.info("Query[%s] canceled, reason[%s].", queryIdForLog, reason); + } cancelReason = reason; + cancelException = cause; stopExternalFetchers(); kernelManipulationQueue.clear(); // No point processing any possibly-queued commands. addToKernelManipulationQueue( @@ -483,7 +497,14 @@ private MSQTaskReportPayload runInternal(final QueryListener queryListener, fina taskStateForReport = TaskState.FAILED; - if (cancelReason != null) { + if (cancelReason == CancellationReason.UNKNOWN && cancelException != null) { + // Cancellation triggered by an external error. Report the original error. + if (exceptionEncountered != null) { + cancelException.addSuppressed(exceptionEncountered); + } + errorForReport = + MSQErrorReport.fromException(queryId(), selfHost, null, cancelException, querySpec.getColumnMappings()); + } else if (cancelReason != null) { errorForReport = MSQErrorReport.fromFault(queryId(), selfHost, null, new CanceledFault(cancelReason)); } else { errorForReport = MSQTasks.makeErrorReport(queryId(), selfHost, controllerError, workerError); diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/DataServerQueryHandlerUtils.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/DataServerQueryHandlerUtils.java index 392e6ee378ea..9a034a152e8d 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/DataServerQueryHandlerUtils.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/DataServerQueryHandlerUtils.java @@ -35,6 +35,8 @@ import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.query.TableDataSource; import org.apache.druid.query.context.ResponseContext; +import org.apache.druid.query.rowsandcols.RowsAndColumns; +import org.apache.druid.query.scan.ScanResultValue; import org.apache.druid.sql.calcite.planner.PlannerContext; import java.util.Collections; @@ -130,7 +132,15 @@ public static Yielder createYielder( return Yielders.each( mappingFunction.apply(sequence) .map(row -> { - channelCounters.incrementRowCount(); + if (row instanceof RowsAndColumns rac) { + channelCounters.addRAC(rac, ChannelCounters.NO_PARTITION); + } else if (row instanceof ScanResultValue scanResult + && scanResult.getEvents() instanceof List eventsList) { + // Special handling for ScanQuery + channelCounters.add(ChannelCounters.NO_PARTITION, eventsList.size(), 0, 0, 0); + } else { + channelCounters.incrementRowCount(); + } return row; }) ); diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/FrameContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/FrameContext.java index 1a3b99538d1d..8cc2e9a66f3d 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/FrameContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/FrameContext.java @@ -30,6 +30,7 @@ import org.apache.druid.segment.SegmentWrangler; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.server.SegmentManager; import javax.annotation.Nullable; @@ -55,6 +56,11 @@ public interface FrameContext extends Closeable */ SegmentManager segmentManager(); + /** + * Returns the virtual storage manager for caching files. + */ + VirtualStorageManager virtualStorageManager(); + /** * Returns the coordinator client for fetching DataSegment metadata when not available locally. * May be null if no coordinator client is available (e.g., in Dart workers). diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/RunWorkOrder.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/RunWorkOrder.java index 20593f266219..e064d78640e9 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/RunWorkOrder.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/RunWorkOrder.java @@ -403,7 +403,14 @@ private InputSliceReader makeInputSliceReader() LinkedHashMap, InputSliceReader> readers = new LinkedHashMap<>(); readers.put(NilInputSlice.class, NilInputSliceReader.INSTANCE); readers.put(StageInputSlice.class, StageInputSliceReader.INSTANCE); - readers.put(ExternalInputSlice.class, new ExternalInputSliceReader(frameContext.tempDir("external"))); + readers.put( + ExternalInputSlice.class, + new ExternalInputSliceReader( + frameContext.virtualStorageManager(), + frameContext.tempDir("external"), + MultiStageQueryContext.isBackgroundFetchExternalFiles(workOrder.getWorkerContext()) + ) + ); readers.put(InlineInputSlice.class, new InlineInputSliceReader(frameContext.segmentWrangler())); readers.put(LookupInputSlice.class, new LookupInputSliceReader(frameContext.segmentWrangler())); readers.put(SegmentsInputSlice.class, new SegmentsInputSliceReader(frameContext, reindex)); diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java index 5ea69d8c6a0a..013b1af617b8 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java @@ -213,7 +213,7 @@ public void registerController(Controller controller, final Closer closer) taskDataSource, toolbox.getAuthorizerMapper() ); - toolbox.getChatHandlerProvider().register(controller.queryId(), chatHandler, false); + toolbox.getChatHandlerProvider().register(controller.queryId(), chatHandler); closer.register(() -> toolbox.getChatHandlerProvider().unregister(controller.queryId())); } diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerFrameContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerFrameContext.java index 13da1393c710..2684c25a6d64 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerFrameContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerFrameContext.java @@ -39,6 +39,7 @@ import org.apache.druid.segment.SegmentWrangler; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.server.SegmentManager; import javax.annotation.Nullable; @@ -51,6 +52,7 @@ public class IndexerFrameContext implements FrameContext private final FrameWriterSpec frameWriterSpec; private final IndexIO indexIO; private final SegmentManager segmentManager; + private final VirtualStorageManager virtualStorageManager; @Nullable private final CoordinatorClient coordinatorClient; @@ -75,6 +77,7 @@ public IndexerFrameContext( FrameWriterSpec frameWriterSpec, IndexIO indexIO, SegmentManager segmentManager, + VirtualStorageManager virtualStorageManager, @Nullable CoordinatorClient coordinatorClient, @Nullable ProcessingBuffersSet processingBuffersSet, IndexerDataServerQueryHandlerFactory dataServerQueryHandlerFactory, @@ -87,6 +90,7 @@ public IndexerFrameContext( this.frameWriterSpec = frameWriterSpec; this.indexIO = indexIO; this.segmentManager = segmentManager; + this.virtualStorageManager = virtualStorageManager; this.coordinatorClient = coordinatorClient; this.processingBuffersSet = processingBuffersSet; this.memoryParameters = memoryParameters; @@ -118,6 +122,12 @@ public SegmentManager segmentManager() return segmentManager; } + @Override + public VirtualStorageManager virtualStorageManager() + { + return virtualStorageManager; + } + @Override @Nullable public CoordinatorClient coordinatorClient() diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java index b5058e053711..3e1c151e5f21 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java @@ -30,6 +30,7 @@ import org.apache.druid.guice.annotations.Smile; import org.apache.druid.indexing.common.SegmentCacheManagerFactory; import org.apache.druid.indexing.common.TaskToolbox; +import org.apache.druid.indexing.common.config.TaskConfig; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.msq.exec.ControllerClient; @@ -60,7 +61,10 @@ import org.apache.druid.rpc.indexing.SpecificTaskRetryPolicy; import org.apache.druid.rpc.indexing.SpecificTaskServiceLocator; import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.loading.LeastBytesUsedStorageLocationSelectorStrategy; import org.apache.druid.segment.loading.SegmentCacheManager; +import org.apache.druid.segment.loading.external.StorageLocationVirtualStorageManager; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.server.DruidNode; import org.apache.druid.server.SegmentManager; import org.apache.druid.server.metrics.StorageMonitor; @@ -88,6 +92,7 @@ public class IndexerWorkerContext implements WorkerContext private final ServiceLocator controllerLocator; private final IndexIO indexIO; private final SegmentManager segmentManager; + private final VirtualStorageManager virtualStorageManager; private final StorageMonitor storageMonitor; @Nullable private final CoordinatorClient coordinatorClient; @@ -113,6 +118,7 @@ public IndexerWorkerContext( final ServiceLocator controllerLocator, final IndexIO indexIO, final SegmentManager segmentManager, + final VirtualStorageManager virtualStorageManager, final StorageMonitor storageMonitor, @Nullable final CoordinatorClient coordinatorClient, final ServiceClientFactory clientFactory, @@ -128,6 +134,7 @@ public IndexerWorkerContext( this.controllerLocator = controllerLocator; this.indexIO = indexIO; this.segmentManager = segmentManager; + this.virtualStorageManager = virtualStorageManager; this.storageMonitor = storageMonitor; this.coordinatorClient = coordinatorClient; this.clientFactory = clientFactory; @@ -166,10 +173,23 @@ public static IndexerWorkerContext createProductionInstance( ) { final IndexIO indexIO = injector.getInstance(IndexIO.class); + final TaskConfig taskConfig = injector.getInstance(TaskConfig.class); final SegmentCacheManager cacheManager = injector.getInstance(SegmentCacheManagerFactory.class) - .manufacturate(new File(toolbox.getIndexingTmpDir(), "segment-fetch"), true); + .manufacturate( + new File(toolbox.getIndexingTmpDir(), "segment-fetch"), + // Divide tmpStorageBytesPerTask by 3 so the local cache never takes up the majority of space. + // In a typical leaf stage run, we may need some disk space for inputs and some for outputs. + taskConfig.getTmpStorageBytesPerTask() > 0 ? taskConfig.getTmpStorageBytesPerTask() / 3 : null, + true + ); final SegmentManager segmentManager = new SegmentManager(cacheManager); + final VirtualStorageManager virtualStorageManager = + new StorageLocationVirtualStorageManager( + cacheManager.getLocations(), + new LeastBytesUsedStorageLocationSelectorStrategy(cacheManager.getLocations()), + cacheManager.getLoadingThreadPool() + ); final StorageMonitor storageMonitor = new StorageMonitor(cacheManager.getLocations(), task::getMetricBuilder); toolbox.addMonitor(storageMonitor); final ServiceClientFactory serviceClientFactory = @@ -190,6 +210,7 @@ public static IndexerWorkerContext createProductionInstance( new SpecificTaskServiceLocator(task.getControllerTaskId(), overlordClient), indexIO, segmentManager, + virtualStorageManager, storageMonitor, toolbox.getCoordinatorClient(), serviceClientFactory, @@ -258,7 +279,7 @@ public void registerWorker(Worker worker, Closer closer) { final WorkerChatHandler chatHandler = new WorkerChatHandler(worker, toolbox.getAuthorizerMapper(), task.getDataSource()); - toolbox.getChatHandlerProvider().register(worker.id(), chatHandler, false); + toolbox.getChatHandlerProvider().register(worker.id(), chatHandler); closer.register(() -> toolbox.getChatHandlerProvider().unregister(worker.id())); } @@ -320,6 +341,7 @@ public FrameContext frameContext(WorkOrder workOrder) FrameWriterSpec.fromContext(workOrder.getWorkerContext()), indexIO, segmentManager, + virtualStorageManager, coordinatorClient, workOrder.getStageDefinition().getProcessor().usesProcessingBuffers() ? processingBuffersSet.get() : null, dataServerQueryHandlerFactory, diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java index 1950aeace5f4..b452ad1bb535 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java @@ -317,7 +317,7 @@ public void stopGracefully(final TaskConfig taskConfig) { final ControllerHolder holder = controllerHolder; if (holder != null) { - holder.cancel(CancellationReason.TASK_SHUTDOWN); + holder.cancel(CancellationReason.TASK_SHUTDOWN, null); } } diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/input/AdaptedLoadableSegment.java b/multi-stage-query/src/main/java/org/apache/druid/msq/input/AdaptedLoadableSegment.java index 8897170c5e63..7000064be9b3 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/input/AdaptedLoadableSegment.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/input/AdaptedLoadableSegment.java @@ -21,7 +21,12 @@ import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.SettableFuture; +import org.apache.druid.common.asyncresource.AsyncResource; +import org.apache.druid.common.asyncresource.AsyncResources; import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.msq.counters.ChannelCounters; import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.segment.ReferenceCountedSegmentProvider; @@ -29,11 +34,11 @@ import org.apache.druid.segment.loading.AcquireSegmentAction; import org.apache.druid.segment.loading.AcquireSegmentResult; import org.apache.druid.timeline.DataSegment; -import org.joda.time.Interval; import javax.annotation.Nullable; import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; /** * Implementation of {@link LoadableSegment} for segments adapted from non-regular sources such as inline data, @@ -44,64 +49,70 @@ public class AdaptedLoadableSegment implements LoadableSegment { private final AtomicBoolean acquired = new AtomicBoolean(false); + private final Supplier> asyncSegmentSupplier; private final SegmentDescriptor descriptor; @Nullable - private final ChannelCounters inputCounters; - @Nullable private final String description; - private final ListenableFuture dataSegmentFuture; - private final AcquireSegmentAction acquireSegmentAction; + @Nullable + private final ChannelCounters inputCounters; - private AdaptedLoadableSegment( - final Segment segment, + /** + * Creates a wrapper around a supplier of an {@link AcquireSegmentResult}. The lifecycle of the supplied + * {@link AsyncResource} is tied to the {@link AcquireSegmentAction} returned from {@link #acquire()}. + * + * @param asyncSegmentSupplier the supplier to wrap + * @param descriptor descriptor containing the interval to use for filtering + * @param description user-oriented description for error messages + * @param inputCounters counters for tracking input via {@link LoadableSegmentUtils#countedLoad}. + */ + public AdaptedLoadableSegment( + final Supplier> asyncSegmentSupplier, final SegmentDescriptor descriptor, @Nullable final String description, @Nullable final ChannelCounters inputCounters ) { + this.asyncSegmentSupplier = asyncSegmentSupplier; this.descriptor = descriptor; this.description = description; this.inputCounters = inputCounters; - - // These segments don't have an associated DataSegment - this.dataSegmentFuture = Futures.immediateFailedFuture( - DruidException.defensive("DataSegment not available for adapted segments") - ); - - // Pre-create the acquire action since the segment is already available - final ListenableFuture segmentFuture = - Futures.immediateFuture(AcquireSegmentResult.cached(ReferenceCountedSegmentProvider.of(segment))); - this.acquireSegmentAction = new AcquireSegmentAction(() -> segmentFuture, null); } /** * Creates an AdaptedLoadableSegment wrapper around a Segment object which is not a regular Druid segment, * has no associated {@link DataSegment}, and whose lifecycle is not managed by the LoadableSegment instance. * - * @param segment the segment to wrap - * @param queryInterval the query interval to use for filtering - * @param description user-oriented description for error messages - * @param channelCounters counters for tracking input + * @param segment the segment to wrap + * @param descriptor descriptor containing the interval to use for filtering + * @param description user-oriented description for error messages + * @param inputCounters counters for tracking input */ - public static AdaptedLoadableSegment create( + public static AdaptedLoadableSegment fromUnmanagedSegment( final Segment segment, - final Interval queryInterval, + final SegmentDescriptor descriptor, @Nullable final String description, - @Nullable final ChannelCounters channelCounters + @Nullable final ChannelCounters inputCounters ) { + // Pre-create the acquire result since the segment is already available + final AcquireSegmentResult acquireSegmentResult = + AcquireSegmentResult.cached(ReferenceCountedSegmentProvider.of(segment)); + + final AsyncResource resource = AsyncResources.unmanaged(acquireSegmentResult); return new AdaptedLoadableSegment( - segment, - new SegmentDescriptor(queryInterval, "0", 0), + () -> resource, + descriptor, description, - channelCounters + inputCounters ); } @Override public ListenableFuture dataSegmentFuture() { - return dataSegmentFuture; + return Futures.immediateFailedFuture( + DruidException.defensive("DataSegment not available for adapted segments") + ); } @Override @@ -110,13 +121,6 @@ public SegmentDescriptor descriptor() return descriptor; } - @Override - @Nullable - public ChannelCounters inputCounters() - { - return inputCounters; - } - @Override @Nullable public String description() @@ -139,6 +143,45 @@ public AcquireSegmentAction acquire() if (!acquired.compareAndSet(false, true)) { throw DruidException.defensive("Segment with descriptor[%s] is already acquired", descriptor); } - return acquireSegmentAction; + + // Synchronized by itself; Closer is not thread-safe. + final Closer closer = Closer.create(); + final AtomicBoolean closed = new AtomicBoolean(false); + + return new AcquireSegmentAction( + () -> { + final AsyncResource asyncSegment = asyncSegmentSupplier.get(); + synchronized (closer) { + if (closed.get()) { + asyncSegment.close(); + return Futures.immediateFailedFuture(new ISE("Already closed")); + } else { + closer.register(asyncSegment); + } + } + + final SettableFuture retVal = SettableFuture.create(); + + asyncSegment.addReadyCallback(() -> { + try { + retVal.set(asyncSegment.get()); + } + catch (Throwable e) { + retVal.setException(e); + } + }); + + // Use byteCount = 0 for adapted segments; we can't really tell what it is from the AcquireSegmentResult + // (the "load size" may not be the entire size if the segment was fully or partially cached). Implementations + // call ChannelCounters#incrementBytes if they have something useful to put there. + return LoadableSegmentUtils.countedLoad(retVal, 0, inputCounters); + }, + () -> { + synchronized (closer) { + closed.set(true); + closer.close(); + } + } + ); } } diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/input/LoadableSegment.java b/multi-stage-query/src/main/java/org/apache/druid/msq/input/LoadableSegment.java index 5c04d78383c1..227c1ad2d492 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/input/LoadableSegment.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/input/LoadableSegment.java @@ -22,7 +22,6 @@ import com.google.common.util.concurrent.ListenableFuture; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.error.DruidException; -import org.apache.druid.msq.counters.ChannelCounters; import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.segment.Segment; import org.apache.druid.segment.loading.AcquireSegmentAction; @@ -51,13 +50,6 @@ public interface LoadableSegment */ SegmentDescriptor descriptor(); - /** - * Returns input counters that should be updated as this segment is queried, or null if it is not necessary - * to update input counters. - */ - @Nullable - ChannelCounters inputCounters(); - /** * User-oriented description, suitable for inclusion in log or error messages. */ diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/input/LoadableSegmentUtils.java b/multi-stage-query/src/main/java/org/apache/druid/msq/input/LoadableSegmentUtils.java new file mode 100644 index 000000000000..a092777eeef3 --- /dev/null +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/input/LoadableSegmentUtils.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.input; + +import com.google.common.util.concurrent.ListenableFuture; +import org.apache.druid.common.guava.FutureUtils; +import org.apache.druid.msq.counters.ChannelCounters; +import org.apache.druid.segment.PhysicalSegmentInspector; +import org.apache.druid.segment.Segment; +import org.apache.druid.segment.loading.AcquireSegmentAction; +import org.apache.druid.segment.loading.AcquireSegmentResult; + +import javax.annotation.Nullable; +import java.util.Optional; + +public class LoadableSegmentUtils +{ + /** + * Given a future from {@link AcquireSegmentAction#getSegmentFuture()}, wraps it with logic to increment + * counters as follows: + * + *

    + *
  • {@link ChannelCounters#addLoad(AcquireSegmentResult)} when the load completes
  • + *
  • {@link ChannelCounters#addFile(long, long)} each time a reference is acquired from + * {@link AcquireSegmentResult#getReferenceProvider()}. The row count is taken from + * {@link #getSegmentRowCount(Segment)}, and byte count is taken from {@code byteCount}.
  • + *
+ */ + public static ListenableFuture countedLoad( + final ListenableFuture segmentFuture, + final long byteCount, + @Nullable final ChannelCounters channelCounters + ) + { + if (channelCounters == null) { + return segmentFuture; + } else { + return FutureUtils.transform( + segmentFuture, + result -> { + channelCounters.addLoad(result); + return new AcquireSegmentResult( + () -> { + final Optional segment = result.getReferenceProvider().acquireReference(); + final int rowCount = segment.map(LoadableSegmentUtils::getSegmentRowCount).orElse(0); + channelCounters.addFile(rowCount, byteCount); + return segment; + }, + result.getLoadSizeBytes(), + result.getWaitTimeNanos(), + result.getLoadTimeNanos() + ); + } + ); + } + } + + /** + * Gets the number of rows for a segment, using a {@link PhysicalSegmentInspector}. Returns 0 when unknown. + */ + public static int getSegmentRowCount(final Segment segment) + { + final PhysicalSegmentInspector inspector = segment.as(PhysicalSegmentInspector.class); + return inspector != null ? inspector.getNumRows() : 0; + } +} diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/input/RegularLoadableSegment.java b/multi-stage-query/src/main/java/org/apache/druid/msq/input/RegularLoadableSegment.java index 7582cd2514c5..b43eefdcf36a 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/input/RegularLoadableSegment.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/input/RegularLoadableSegment.java @@ -77,7 +77,7 @@ public class RegularLoadableSegment implements LoadableSegment * @param segmentManager segment manager for loading and caching segments * @param segmentId the segment ID to load * @param descriptor segment descriptor for querying - * @param inputCounters optional counters for tracking input + * @param inputCounters optional counters for tracking input via {@link LoadableSegmentUtils#countedLoad} * @param coordinatorClient optional client for fetching DataSegment from Coordinator when not available locally * @param isReindex true if this is a DML command writing to the same table it's reading from */ @@ -119,13 +119,6 @@ public SegmentDescriptor descriptor() return descriptor; } - @Override - @Nullable - public ChannelCounters inputCounters() - { - return inputCounters; - } - @Override @Nullable public String description() @@ -143,6 +136,13 @@ public synchronized Optional acquireIfCached() final Optional cachedSegment = segmentManager.acquireCachedSegment(segmentId); if (cachedSegment.isPresent()) { acquired = true; + + // Update counters in the manner of LoadableSegmentUtils#countedLoad (which we aren't using here). + if (inputCounters != null) { + final int rowCount = LoadableSegmentUtils.getSegmentRowCount(cachedSegment.get()); + final long byteCount = cachedDataSegment != null ? cachedDataSegment.getSize() : 0; + inputCounters.addFile(rowCount, byteCount); + } } return cachedSegment; } @@ -157,7 +157,15 @@ public synchronized AcquireSegmentAction acquire() acquired = true; if (cachedDataSegment != null) { - return segmentManager.acquireSegment(cachedDataSegment); + final AcquireSegmentAction action = segmentManager.acquireSegment(cachedDataSegment); + return new AcquireSegmentAction( + () -> LoadableSegmentUtils.countedLoad( + action.getSegmentFuture(), + cachedDataSegment.getSize(), + inputCounters + ), + action + ); } else { // Create a shim AcquireSegmentAction that doesn't acquire a hold (yet). We can't make a real // AcquireSegmentAction yet because we don't have the DataSegment object. It needs to be fetched @@ -168,7 +176,11 @@ public synchronized AcquireSegmentAction acquire() return new AcquireSegmentAction( Suppliers.memoize(() -> FutureUtils.transformAsync( dataSegmentFutureSupplier.get(), - dataSegment -> closer.register(segmentManager.acquireSegment(dataSegment)).getSegmentFuture() + dataSegment -> LoadableSegmentUtils.countedLoad( + closer.register(segmentManager.acquireSegment(dataSegment)).getSegmentFuture(), + dataSegment.getSize(), + inputCounters + ) )), closer ); diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/input/external/ExternalInputSliceReader.java b/multi-stage-query/src/main/java/org/apache/druid/msq/input/external/ExternalInputSliceReader.java index 310175b0ef84..5a95c53f4c4e 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/input/external/ExternalInputSliceReader.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/input/external/ExternalInputSliceReader.java @@ -19,18 +19,22 @@ package org.apache.druid.msq.input.external; +import org.apache.druid.common.asyncresource.AsyncResource; +import org.apache.druid.common.asyncresource.AsyncResources; import org.apache.druid.data.input.ColumnsFilter; +import org.apache.druid.data.input.InputFilePointer; import org.apache.druid.data.input.InputFormat; import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.InputSource; import org.apache.druid.data.input.InputSourceReader; -import org.apache.druid.data.input.InputStats; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.InlineInputSource; +import org.apache.druid.data.input.impl.LocalInputSource; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.msq.counters.ChannelCounters; import org.apache.druid.msq.counters.CounterNames; import org.apache.druid.msq.counters.CounterTracker; @@ -45,12 +49,14 @@ import org.apache.druid.msq.input.stage.ReadablePartitions; import org.apache.druid.msq.util.DimensionSchemaUtils; import org.apache.druid.query.SegmentDescriptor; +import org.apache.druid.segment.ReferenceCountedSegmentProvider; import org.apache.druid.segment.RowBasedSegment; -import org.apache.druid.segment.Segment; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.segment.incremental.SimpleRowIngestionMeters; -import org.apache.druid.timeline.SegmentId; +import org.apache.druid.segment.loading.AcquireSegmentResult; +import org.apache.druid.segment.loading.external.CachedFile; +import org.apache.druid.segment.loading.external.VirtualStorageManager; +import org.apache.druid.utils.CloseableUtils; import java.io.File; import java.util.ArrayList; @@ -64,13 +70,21 @@ */ public class ExternalInputSliceReader implements InputSliceReader { - public static final String SEGMENT_ID = "__external"; - public static final SegmentDescriptor SEGMENT_DESCRIPTOR = SegmentId.dummy(SEGMENT_ID).toDescriptor(); + private static final Logger log = new Logger(ExternalInputSliceReader.class); + + private final VirtualStorageManager virtualStorageManager; private final File temporaryDirectory; + private final boolean backgroundFetchExternalFiles; - public ExternalInputSliceReader(final File temporaryDirectory) + public ExternalInputSliceReader( + final VirtualStorageManager virtualStorageManager, + final File temporaryDirectory, + final boolean backgroundFetchExternalFiles + ) { + this.virtualStorageManager = virtualStorageManager; this.temporaryDirectory = temporaryDirectory; + this.backgroundFetchExternalFiles = backgroundFetchExternalFiles; } public static boolean isFileBasedInputSource(final InputSource inputSource) @@ -92,26 +106,15 @@ public PhysicalInputSlice attach( final List loadableSegments = new ArrayList<>(); for (final InputSource inputSource : externalInputSlice.getInputSources()) { - // The LoadableSegment generated here does not acquire a real hold, and ends up loading the external data in a - // processing thread (when the cursor is created). Ideally, this would be better integrated with the virtual - // storage system, giving us storage holds and the ability to load data outside of a processing thread. - final Segment segment = makeExternalSegment( + final LoadableSegment segment = makeExternalSegment( inputSource, externalInputSlice.getInputFormat(), externalInputSlice.getSignature(), - new File(temporaryDirectory, String.valueOf(inputNumber)), inputCounters, counters.warnings(), warningPublisher ); - loadableSegments.add( - AdaptedLoadableSegment.create( - segment, - Intervals.ETERNITY, - StringUtils.format("external[%s]", inputSource.toString()), - null - ) - ); + loadableSegments.add(segment); } return new PhysicalInputSlice(ReadablePartitions.empty(), loadableSegments, Collections.emptyList()); @@ -120,11 +123,10 @@ public PhysicalInputSlice attach( /** * Creates a lazy segment that fetches external data when a cursor is created. */ - private Segment makeExternalSegment( + private LoadableSegment makeExternalSegment( final InputSource inputSource, final InputFormat inputFormat, final RowSignature signature, - final File temporaryDirectory, final ChannelCounters channelCounters, final WarningCounters warningCounters, final Consumer warningPublisher @@ -144,27 +146,128 @@ private Segment makeExternalSegment( ColumnsFilter.all() ); - final InputSourceReader reader; + final String description = StringUtils.format("external[%s]", inputSource.toString()); + final SegmentDescriptor descriptor = new SegmentDescriptor(Intervals.ETERNITY, "0", 0); + final List filePointers = inputSource.asFilePointers(); + + if (filePointers != null && backgroundFetchExternalFiles) { + // The LoadableSegment generated here will cause files to be fetched in VSF loading threads + // when acquire() is called. + return new AdaptedLoadableSegment( + () -> { + final long startTime = System.nanoTime(); + final List> cachedFileResources = new ArrayList<>(filePointers.size()); + try { + for (final InputFilePointer filePointer : filePointers) { + cachedFileResources.add( + virtualStorageManager.reserveAndPopulateAsync( + filePointer.uri().toString(), + filePointer.sizeSupplier(), + filePointer.populator() + ) + ); + } + + final AsyncResource fetched = AsyncResources.transform( + AsyncResources.collect(cachedFileResources), + cachedFiles -> { + long totalSize = 0; + final List files = new ArrayList<>(cachedFiles.size()); + + for (final CachedFile cachedFile : cachedFiles) { + files.add(cachedFile.getFile()); + totalSize += cachedFile.getFile().length(); + } + + final InputSource localInputSource = new LocalInputSource(null, null, files, null); + final ExternalSegment segment = new ExternalSegment( + localInputSource, + makeReader(schema, localInputSource, inputFormat, channelCounters), + warningCounters, + warningPublisher, + channelCounters, + signature + ); + + return new AcquireSegmentResult( + ReferenceCountedSegmentProvider.of(segment), + totalSize, + 0L, + System.nanoTime() - startTime + ); + } + ); + + // If the fetch fails because of insufficient storage space, release the fetched files and fall + // back to creating an ExternalSegment that streams data directly from the original input source. + return AsyncResources.recover( + fetched, + e -> { + if (VirtualStorageManager.isInsufficientStorage(e)) { + log.noStackTrace() + .info(e, "Insufficient storage space to prefetch[%s]; streaming instead.", description); + return AcquireSegmentResult.cached( + ReferenceCountedSegmentProvider.of( + new ExternalSegment( + inputSource, + makeReader(schema, inputSource, inputFormat, channelCounters), + warningCounters, + warningPublisher, + channelCounters, + signature + ) + ) + ); + } else { + return null; + } + } + ); + } + catch (Throwable e) { + // Close any cachedFileResources that have been created prior to this exception being thrown. + throw CloseableUtils.closeAndWrapInCatch(e, CloseableUtils.forIterable(cachedFileResources)); + } + }, + descriptor, + description, + channelCounters + ); + } else { + // The LoadableSegment generated here does not acquire a real hold, and ends up loading the external data in a + // processing thread (when the cursor is created). + return AdaptedLoadableSegment.fromUnmanagedSegment( + new ExternalSegment( + inputSource, + makeReader(schema, inputSource, inputFormat, channelCounters), + warningCounters, + warningPublisher, + channelCounters, + signature + ), + descriptor, + description, + channelCounters + ); + } + } + + private InputSourceReader makeReader( + final InputRowSchema schema, + final InputSource inputSource, + final InputFormat inputFormat, + final ChannelCounters channelCounters + ) + { final boolean incrementCounters = isFileBasedInputSource(inputSource); - final InputStats inputStats = new SimpleRowIngestionMeters(); if (incrementCounters) { - reader = new CountableInputSourceReader( + return new CountableInputSourceReader( inputSource.reader(schema, inputFormat, temporaryDirectory), channelCounters ); } else { - reader = inputSource.reader(schema, inputFormat, temporaryDirectory); + return inputSource.reader(schema, inputFormat, temporaryDirectory); } - - return new ExternalSegment( - inputSource, - reader, - inputStats, - warningCounters, - warningPublisher, - channelCounters, - signature - ); } } diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/input/external/ExternalSegment.java b/multi-stage-query/src/main/java/org/apache/druid/msq/input/external/ExternalSegment.java index 4c21616eb3ff..48e22f1f298b 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/input/external/ExternalSegment.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/input/external/ExternalSegment.java @@ -31,9 +31,12 @@ import org.apache.druid.msq.indexing.error.CannotParseExternalDataFault; import org.apache.druid.segment.RowBasedSegment; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.incremental.NoopRowIngestionMeters; +import org.apache.druid.utils.CloseableUtils; import java.io.IOException; import java.util.NoSuchElementException; +import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; /** @@ -46,16 +49,14 @@ public class ExternalSegment extends RowBasedSegment /** * @param inputSource {@link InputSource} that the segment is a representation of * @param reader reader to read the external input source - * @param inputStats input stats * @param warningCounters warning counters tracking the warnings generated while reading the external source * @param warningPublisher publisher to report the warnings generated - * @param channelCounters channel counters to increment as we read through the files/units of the external source + * @param channelCounters channel counters, will be used for {@link ChannelCounters#incrementBytes(long)} * @param signature signature of the external source */ public ExternalSegment( final InputSource inputSource, final InputSourceReader reader, - final InputStats inputStats, final WarningCounters warningCounters, final Consumer warningPublisher, final ChannelCounters channelCounters, @@ -70,7 +71,7 @@ public ExternalSegment( public CloseableIterator make() { try { - CloseableIterator baseIterator = reader.read(inputStats); + CloseableIterator baseIterator = reader.read(makeInputStats(inputSource, channelCounters)); return new CloseableIterator<>() { private InputRow next = null; @@ -119,20 +120,7 @@ public InputRow next() @Override public void cleanup(CloseableIterator iterFromMake) { - try { - iterFromMake.close(); - // We increment the file count whenever the caller calls clean up. So we can double count here - // if the callers are not careful. - // This logic only works because we are using FilePerSplitHintSpec. Each input source only - // has one file. - if (ExternalInputSliceReader.isFileBasedInputSource(inputSource)) { - channelCounters.incrementFileCount(); - channelCounters.incrementBytes(inputStats.getProcessedBytes()); - } - } - catch (IOException e) { - throw new RuntimeException(e); - } + CloseableUtils.closeAndWrapExceptions(iterFromMake); } } ), @@ -149,4 +137,32 @@ public RowSignature signature() { return signature; } + + /** + * Create {@link InputStats} that calls {@link ChannelCounters#incrementBytes(long)} as data is read from files. + */ + private static InputStats makeInputStats(final InputSource inputSource, final ChannelCounters channelCounters) + { + if (ExternalInputSliceReader.isFileBasedInputSource(inputSource)) { + return new InputStats() + { + private final AtomicLong processedBytes = new AtomicLong(); + + @Override + public void incrementProcessedBytes(final long n) + { + processedBytes.addAndGet(n); + channelCounters.incrementBytes(n); + } + + @Override + public long getProcessedBytes() + { + return processedBytes.get(); + } + }; + } else { + return new NoopRowIngestionMeters(); + } + } } diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/input/inline/InlineInputSliceReader.java b/multi-stage-query/src/main/java/org/apache/druid/msq/input/inline/InlineInputSliceReader.java index 7e6c8daa7eba..0a1dc9305b22 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/input/inline/InlineInputSliceReader.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/input/inline/InlineInputSliceReader.java @@ -30,6 +30,7 @@ import org.apache.druid.msq.input.stage.ReadablePartitions; import org.apache.druid.msq.input.table.RichSegmentDescriptor; import org.apache.druid.query.InlineDataSource; +import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.segment.InlineSegmentWrangler; import org.apache.druid.segment.Segment; import org.apache.druid.segment.SegmentWrangler; @@ -70,9 +71,9 @@ public PhysicalInputSlice attach( for (final Segment segment : segmentWrangler.getSegmentsForIntervals(dataSource, Intervals.ONLY_ETERNITY)) { segments.add( - AdaptedLoadableSegment.create( + AdaptedLoadableSegment.fromUnmanagedSegment( segment, - Intervals.ETERNITY, + new SegmentDescriptor(Intervals.ETERNITY, "0", 0), "inline data", counters.channel(CounterNames.inputChannel(inputNumber)) ) diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/input/lookup/LookupInputSliceReader.java b/multi-stage-query/src/main/java/org/apache/druid/msq/input/lookup/LookupInputSliceReader.java index a9ee5db9e708..0eeea6f6ff0a 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/input/lookup/LookupInputSliceReader.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/input/lookup/LookupInputSliceReader.java @@ -32,6 +32,7 @@ import org.apache.druid.msq.input.PhysicalInputSlice; import org.apache.druid.msq.input.stage.ReadablePartitions; import org.apache.druid.query.LookupDataSource; +import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.segment.Segment; import org.apache.druid.segment.SegmentWrangler; import org.apache.druid.utils.CloseableUtils; @@ -87,9 +88,9 @@ public PhysicalInputSlice attach( throw new ISE("Lookup[%s] has multiple segments; cannot read", lookupName); } - final LoadableSegment loadableSegment = AdaptedLoadableSegment.create( + final LoadableSegment loadableSegment = AdaptedLoadableSegment.fromUnmanagedSegment( segment, - Intervals.ETERNITY, + new SegmentDescriptor(Intervals.ETERNITY, "0", 0), StringUtils.format("lookup[%s]", lookupName), counters.channel(CounterNames.inputChannel(inputNumber)) ); diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BaseLeafFrameProcessor.java b/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BaseLeafFrameProcessor.java index 788da66072d1..742ef6a974c9 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BaseLeafFrameProcessor.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BaseLeafFrameProcessor.java @@ -32,15 +32,12 @@ import org.apache.druid.java.util.common.Unit; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.msq.counters.ChannelCounters; import org.apache.druid.msq.exec.DataServerQueryHandler; import org.apache.druid.msq.input.table.SegmentsInputSlice; -import org.apache.druid.segment.PhysicalSegmentInspector; import org.apache.druid.segment.ReferenceCountedSegmentProvider; import org.apache.druid.segment.Segment; import org.apache.druid.segment.SegmentMapFunction; import org.apache.druid.segment.SegmentReference; -import org.apache.druid.utils.CloseableUtils; import java.io.IOException; import java.util.Collections; @@ -154,19 +151,6 @@ protected Segment mapSegment( throw DruidException.defensive("Missing segmentReference[%s]", segmentHolder.getDescriptor()); } - try { - final ChannelCounters counters = segmentHolder.getInputCounters(); - if (counters != null) { - // Attach a counters.addFile call to the closer, to ensure input metrics are updated. - // Get row count prior to mapping, because mapped segments often do not provide PhysicalSegmentInspector. - final int rowCount = getSegmentRowCount(segmentReference); - closer.register(() -> counters.addFile(rowCount, 0)); - } - } - catch (Throwable e) { - throw CloseableUtils.closeAndWrapInCatch(e, segmentReference); - } - final Segment segment = closer.register(segmentReference.map(segmentMapFn)).getSegmentReference().orElse(null); if (segment == null) { throw DruidException.defensive("Missing segment[%s]", segmentHolder.getDescriptor()); @@ -187,17 +171,4 @@ protected Segment mapUnmanagedSegment(final Segment segment) segment.getId() )); } - - /** - * Helper to get the number of rows for a segment, using a {@link PhysicalSegmentInspector}. Returns 0 when the - * number is unknown. - */ - private int getSegmentRowCount(final SegmentReference segmentReference) - { - return segmentReference - .getSegmentReference() - .flatMap(segment -> Optional.ofNullable(segment.as(PhysicalSegmentInspector.class))) - .map(PhysicalSegmentInspector::getNumRows) - .orElse(0); - } } diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/ReadableInputQueue.java b/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/ReadableInputQueue.java index b965e8babc2b..c2fe66ed0f48 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/ReadableInputQueue.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/ReadableInputQueue.java @@ -27,7 +27,6 @@ import org.apache.druid.error.DruidException; import org.apache.druid.frame.channel.ReadableFrameChannel; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.msq.counters.ChannelCounters; import org.apache.druid.msq.exec.DataServerQueryHandler; import org.apache.druid.msq.exec.std.StandardPartitionReader; import org.apache.druid.msq.input.LoadableSegment; @@ -136,7 +135,6 @@ public void start() if (cachedSegment.isPresent()) { final SegmentReferenceHolder holder = new SegmentReferenceHolder( new SegmentReference(loadableSegment.descriptor(), cachedSegment, null), - loadableSegment.inputCounters(), loadableSegment.description() ); loadedSegments.add(holder); @@ -299,17 +297,12 @@ private ListenableFuture loadNextSegment() // Transfer segment from "loadingSegments" to "loadedSegments" and return a reference to it. if (loadingSegments.remove(acquireSegmentAction)) { try { - final ChannelCounters inputCounters = nextLoadableSegment.inputCounters(); - if (inputCounters != null) { - inputCounters.addLoad(segment); - } final SegmentReferenceHolder referenceHolder = new SegmentReferenceHolder( new SegmentReference( nextLoadableSegment.descriptor(), segment.getReferenceProvider().acquireReference(), acquireSegmentAction // Release the hold when the SegmentReference is closed. ), - nextLoadableSegment.inputCounters(), nextLoadableSegment.description() ); loadedSegments.add(referenceHolder); diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/SegmentReferenceHolder.java b/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/SegmentReferenceHolder.java index 71470eb620dc..b6f387042407 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/SegmentReferenceHolder.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/SegmentReferenceHolder.java @@ -20,7 +20,6 @@ package org.apache.druid.msq.querykit; import com.google.common.base.Preconditions; -import org.apache.druid.msq.counters.ChannelCounters; import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.segment.SegmentReference; @@ -35,20 +34,16 @@ public class SegmentReferenceHolder { private final AtomicReference segmentReference = new AtomicReference<>(); @Nullable - private final ChannelCounters inputCounters; - @Nullable private final String description; private final SegmentDescriptor descriptor; public SegmentReferenceHolder( SegmentReference segmentReference, - @Nullable ChannelCounters inputCounters, @Nullable String description ) { this.segmentReference.set(Preconditions.checkNotNull(segmentReference, "segmentReference")); this.descriptor = Preconditions.checkNotNull(segmentReference, "segmentReference").getSegmentDescriptor(); - this.inputCounters = inputCounters; this.description = description; } @@ -71,15 +66,6 @@ public SegmentDescriptor getDescriptor() return descriptor; } - /** - * Input counters that should be incremented as we read, or null if none should be incremented. - */ - @Nullable - public ChannelCounters getInputCounters() - { - return inputCounters; - } - /** * User-oriented description, suitable for inclusion in error messages. */ diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/rpc/ControllerResource.java b/multi-stage-query/src/main/java/org/apache/druid/msq/rpc/ControllerResource.java index cc570ec992ad..90b259ad39dd 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/rpc/ControllerResource.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/rpc/ControllerResource.java @@ -199,8 +199,8 @@ public Response httpGetTaskList(@Context final HttpServletRequest req) } /** - * See {@link org.apache.druid.indexing.overlord.RemoteTaskRunner#streamTaskReports} for the client-side code that - * calls this API. + * See {@link org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner#streamTaskReports} for the client-side code + * that calls this API. */ @GET @Path("/liveReports") diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java index 79b1c5ca3417..d731ca074ae6 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java @@ -171,6 +171,9 @@ public class MultiStageQueryContext public static final String CTX_REMOVE_NULL_BYTES = "removeNullBytes"; public static final boolean DEFAULT_REMOVE_NULL_BYTES = false; + public static final String CTX_BACKGROUND_FETCH_EXTERNAL_FILES = "backgroundFetchExternalFiles"; + public static final boolean DEFAULT_BACKGROUND_FETCH_EXTERNAL_FILES = true; + /** * Hint to {@link StageProcessor} implementations about whether they should attempt to use * {@link FrameCombiner} when doing sort-based aggregations. @@ -473,6 +476,11 @@ public static boolean removeNullBytes(final QueryContext queryContext) return queryContext.getBoolean(CTX_REMOVE_NULL_BYTES, DEFAULT_REMOVE_NULL_BYTES); } + public static boolean isBackgroundFetchExternalFiles(final QueryContext queryContext) + { + return queryContext.getBoolean(CTX_BACKGROUND_FETCH_EXTERNAL_FILES, DEFAULT_BACKGROUND_FETCH_EXTERNAL_FILES); + } + public static boolean isUseCombiner(final QueryContext queryContext) { return queryContext.getBoolean(CTX_USE_COMBINER, DEFAULT_USE_COMBINER); diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/dart/worker/DartFrameContextTest.java b/multi-stage-query/src/test/java/org/apache/druid/msq/dart/worker/DartFrameContextTest.java index 056115d87a2d..d687be8e92bf 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/dart/worker/DartFrameContextTest.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/dart/worker/DartFrameContextTest.java @@ -135,6 +135,7 @@ private static DartFrameContext makeContext(final ProcessingBuffersSet processin null, null, null, + null, processingBuffersSet, null, null, diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/exec/ControllerHolderTest.java b/multi-stage-query/src/test/java/org/apache/druid/msq/exec/ControllerHolderTest.java index cea10aa88278..67b6995c6552 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/exec/ControllerHolderTest.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/exec/ControllerHolderTest.java @@ -47,6 +47,7 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; public class ControllerHolderTest { @@ -107,7 +108,7 @@ public void run(final QueryListener listener) final ListenableFuture future = holder.runAsync(new NoopQueryListener(), null, controllerThreadPool); controllerStarted.await(); - holder.cancel(CancellationReason.USER_REQUEST); + holder.cancel(CancellationReason.USER_REQUEST, null); controllerFinished.await(); try { @@ -146,7 +147,7 @@ public void run(final QueryListener listener) } @Override - public void stop(final CancellationReason reason) + public void stop(final CancellationReason reason, @Nullable Throwable cause) { stopCalled.set(true); } @@ -156,12 +157,54 @@ public void stop(final CancellationReason reason) holder.runAsync(new NoopQueryListener(), null, controllerThreadPool); controllerStarted.await(); - holder.cancel(CancellationReason.USER_REQUEST); + holder.cancel(CancellationReason.USER_REQUEST, null); controllerFinished.await(); Assert.assertTrue("stop() should have been called as failsafe", stopCalled.get()); } + @Test + public void testCancelPassesCauseToStop() throws Exception + { + final CountDownLatch controllerStarted = new CountDownLatch(1); + final CountDownLatch controllerFinished = new CountDownLatch(1); + final AtomicReference stopCause = new AtomicReference<>(); + final RuntimeException cause = new RuntimeException("error writing to client"); + final Controller controller = new TestController("test-query") + { + @Override + public void run(final QueryListener listener) + { + try { + controllerStarted.countDown(); + Thread.sleep(300_000); + } + catch (InterruptedException e) { + // expected + } + finally { + listener.onQueryComplete(makeSuccessReport()); + controllerFinished.countDown(); + } + } + + @Override + public void stop(final CancellationReason reason, @Nullable final Throwable cause) + { + stopCause.set(cause); + } + }; + + final ControllerHolder holder = new ControllerHolder(controller, "sql-1", null, null, DateTimes.nowUtc()); + holder.runAsync(new NoopQueryListener(), null, controllerThreadPool); + + controllerStarted.await(); + holder.cancel(CancellationReason.UNKNOWN, cause); + controllerFinished.await(); + + Assert.assertSame("stop() should have received the cause", cause, stopCause.get()); + } + @Test public void testSuccessfulCompletion() throws Exception { @@ -198,7 +241,7 @@ public void run(final QueryListener listener) final ControllerHolder holder = new ControllerHolder(controller, "sql-1", null, null, DateTimes.nowUtc()); // Cancel before run - holder.cancel(CancellationReason.USER_REQUEST); + holder.cancel(CancellationReason.USER_REQUEST, null); Assert.assertEquals(ControllerHolder.State.CANCELED, holder.getState()); // Run should complete quickly without running the controller @@ -238,8 +281,8 @@ public void run(final QueryListener listener) controllerStarted.await(); // Cancel twice — should not throw - holder.cancel(CancellationReason.USER_REQUEST); - holder.cancel(CancellationReason.USER_REQUEST); + holder.cancel(CancellationReason.USER_REQUEST, null); + holder.cancel(CancellationReason.USER_REQUEST, null); controllerFinished.await(); Assert.assertEquals(ControllerHolder.State.CANCELED, holder.getState()); @@ -264,7 +307,7 @@ public void run(final QueryListener listener) Assert.assertEquals(ControllerHolder.State.SUCCESS, holder.getState()); // Cancel after completion — should be a no-op - holder.cancel(CancellationReason.USER_REQUEST); + holder.cancel(CancellationReason.USER_REQUEST, null); Assert.assertEquals(ControllerHolder.State.SUCCESS, holder.getState()); } @@ -391,7 +434,7 @@ public String queryId() } @Override - public void stop(final CancellationReason reason) + public void stop(final CancellationReason reason, @Nullable Throwable cause) { } diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQCompactionTaskRunTest.java b/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQCompactionTaskRunTest.java index 895de0319daa..e8c49f918d26 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQCompactionTaskRunTest.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQCompactionTaskRunTest.java @@ -93,6 +93,7 @@ import org.apache.druid.segment.loading.LocalDataSegmentPusher; import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig; import org.apache.druid.segment.loading.SegmentCacheManager; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.segment.serde.ComplexMetrics; import org.apache.druid.server.security.Escalator; @@ -249,6 +250,7 @@ public void setUpMSQ() .toInstance(new ForwardingQueryProcessingPool(Execs.singleThreaded("Test-runner-processing-pool"))), binder -> binder.bind(ObjectMapper.class).annotatedWith(Json.class).toInstance(objectMapper), binder -> binder.bind(SegmentCacheManager.class).toInstance(segmentCacheManager), + binder -> binder.bind(VirtualStorageManager.class).toInstance(MSQTestBase.makeNilVirtualStorageManager()), binder -> binder.bind(GroupingEngine.class).toInstance(groupingEngine) ); injector = Guice.createInjector(modules); diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/IndexerFrameContextTest.java b/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/IndexerFrameContextTest.java index 6832198173bf..560d10b11f6f 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/IndexerFrameContextTest.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/IndexerFrameContextTest.java @@ -135,6 +135,7 @@ private static IndexerFrameContext makeContext(final ProcessingBuffersSet proces null, null, null, + null, processingBuffersSet, null, null, diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncherRetryTest.java b/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncherRetryTest.java index 0ca643f109f7..35488be081b1 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncherRetryTest.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncherRetryTest.java @@ -322,6 +322,12 @@ public ListenableFuture> terminateSupervisor(String supervis throw new UOE("Not implemented"); } + @Override + public ListenableFuture> resetToLatestAndBackfill(String supervisorId) + { + throw new UOE("Not implemented"); + } + @Override public ListenableFuture> supervisorStatuses() { diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/input/RegularLoadableSegmentTest.java b/multi-stage-query/src/test/java/org/apache/druid/msq/input/RegularLoadableSegmentTest.java index 4bf712811de9..c24a1506ec2e 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/input/RegularLoadableSegmentTest.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/input/RegularLoadableSegmentTest.java @@ -55,6 +55,7 @@ import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.loading.SegmentLoadingException; import org.apache.druid.segment.loading.SegmentLocalCacheManager; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.server.SegmentManager; @@ -164,12 +165,13 @@ public void setUp() throws Exception cacheDir = tempDir.resolve("cache").toFile(); final SegmentLoaderConfig virtualLoaderConfig = new SegmentLoaderConfig() .setLocations(ImmutableList.of(new StorageLocationConfig(cacheDir, 10_000_000_000L, null))) - .setVirtualStorage(true, true); + .setVirtualStorage(true).setVirtualStorageIsEphemeral(true); final List virtualLocations = virtualLoaderConfig.toStorageLocations(); segmentManagerDynamic = new SegmentManager( new SegmentLocalCacheManager( virtualLocations, virtualLoaderConfig, + StorageLoadingThreadPool.createFromConfig(virtualLoaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(virtualLocations), TestIndex.INDEX_IO, jsonMapper @@ -185,6 +187,7 @@ public void setUp() throws Exception new SegmentLocalCacheManager( localLocations, localLoaderConfig, + StorageLoadingThreadPool.createFromConfig(localLoaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(localLocations), TestIndex.INDEX_IO, jsonMapper diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessorTest.java b/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessorTest.java index 9922be0a6c24..8b617059082d 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessorTest.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessorTest.java @@ -133,7 +133,6 @@ public void test_runWithSegments() throws Exception segmentReferenceProvider.acquireReference(), null ), - null, null ) ), @@ -396,7 +395,6 @@ public void close() segmentReferenceProvider.acquireReference(), null ), - null, null ) ), diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/sql/MSQTaskQueryMakerTest.java b/multi-stage-query/src/test/java/org/apache/druid/msq/sql/MSQTaskQueryMakerTest.java index f7947676ed7d..c72efa72aa1d 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/sql/MSQTaskQueryMakerTest.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/sql/MSQTaskQueryMakerTest.java @@ -93,6 +93,7 @@ import org.apache.druid.segment.join.JoinConditionAnalysis; import org.apache.druid.segment.join.JoinType; import org.apache.druid.segment.join.JoinableFactoryWrapper; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.server.QueryResponse; import org.apache.druid.server.QueryStackTests; import org.apache.druid.server.SegmentManager; @@ -222,6 +223,7 @@ public void setUp() throws Exception binder -> { binder.bind(WireTransferableContext.class).toInstance(FrameTestUtil.WT_CONTEXT_LEGACY); binder.bind(CoordinatorClient.class).to(NoopCoordinatorClient.class); + binder.bind(VirtualStorageManager.class).toInstance(MSQTestBase.makeNilVirtualStorageManager()); } ); Injector injector = Guice.createInjector(defaultModule, BoundFieldModule.of(this)); diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/test/AbstractMSQComponentSupplierDelegate.java b/multi-stage-query/src/test/java/org/apache/druid/msq/test/AbstractMSQComponentSupplierDelegate.java index 4d1fa384e2e7..ede5f61662c4 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/test/AbstractMSQComponentSupplierDelegate.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/test/AbstractMSQComponentSupplierDelegate.java @@ -49,7 +49,6 @@ public DruidModule getCoreModule() { return DruidModuleCollection.of( super.getCoreModule(), - new MSQTestModule(), new IndexingServiceTuningConfigModule(), new JoinableFactoryModule(), new MSQExternalDataSourceModule(), @@ -59,6 +58,12 @@ public DruidModule getCoreModule() ); } + @Override + public DruidModule getOverrideModule() + { + return DruidModuleCollection.of(super.getOverrideModule(), new MSQTestModule()); + } + @Override public Class getSqlEngineClass() { diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java b/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java index bef94919dcf2..837a545db1d0 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java @@ -95,7 +95,7 @@ public void configure(Binder binder) public SegmentCacheManager provideSegmentCacheManager(ObjectMapper testMapper, TempDirProducer tempDirProducer) { return new SegmentCacheManagerFactory(TestIndex.INDEX_IO, testMapper) - .manufacturate(tempDirProducer.newTempFolder("test"), true); + .manufacturate(tempDirProducer.newTempFolder("test"), null, true); } @Provides diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index 10c3e8cbec31..65a9fb5b0e8f 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -155,10 +155,14 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.loading.LeastBytesUsedStorageLocationSelectorStrategy; import org.apache.druid.segment.loading.LocalDataSegmentPusher; import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig; import org.apache.druid.segment.loading.LocalLoadSpec; import org.apache.druid.segment.loading.SegmentCacheManager; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; +import org.apache.druid.segment.loading.external.StorageLocationVirtualStorageManager; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; @@ -466,7 +470,7 @@ public void setUp2() indexIO = new IndexIO(objectMapper, ColumnConfig.DEFAULT); segmentCacheManager = - new SegmentCacheManagerFactory(indexIO, objectMapper).manufacturate(newTempFolder("cacheManager"), true); + new SegmentCacheManagerFactory(indexIO, objectMapper).manufacturate(newTempFolder("cacheManager"), null, true); testSegmentManager = new TestSegmentManager(); @@ -556,6 +560,13 @@ public String getFormatString() // Requirement of WorkerMemoryParameters.createProductionInstanceForWorker(injector) binder -> binder.bind(AppenderatorsManager.class).toProvider(() -> null), binder -> binder.bind(SegmentManager.class).toInstance(testSegmentManager.getSegmentManager()), + binder -> binder.bind(VirtualStorageManager.class).toInstance( + new StorageLocationVirtualStorageManager( + segmentCacheManager.getLocations(), + new LeastBytesUsedStorageLocationSelectorStrategy(segmentCacheManager.getLocations()), + segmentCacheManager.getLoadingThreadPool() + ) + ), new JoinableFactoryModule(), new IndexingServiceTuningConfigModule(), Modules.override(new MSQSqlModule()).with( @@ -797,7 +808,7 @@ public void close() testSegmentManager.addSegment(dataSegment, segment); acquiredSegment = testSegmentManager.getSegment(segmentId); } - return AdaptedLoadableSegment.create(acquiredSegment, descriptor.getInterval(), null, counters); + return AdaptedLoadableSegment.fromUnmanagedSegment(acquiredSegment, descriptor, null, counters); } public SelectTester testSelectQuery() @@ -847,6 +858,18 @@ public static WorkerMemoryParameters makeTestWorkerMemoryParameters() ); } + /** + * Creates an non-functional {@link VirtualStorageManager} suitable for tests. + */ + public static VirtualStorageManager makeNilVirtualStorageManager() + { + return new StorageLocationVirtualStorageManager( + ImmutableList.of(), + new LeastBytesUsedStorageLocationSelectorStrategy(ImmutableList.of()), + StorageLoadingThreadPool.none() + ); + } + private String runMultiStageQuery( String query, Map context, diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestOverlordServiceClient.java b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestOverlordServiceClient.java index 27381ca6904b..f7b8d31def41 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestOverlordServiceClient.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestOverlordServiceClient.java @@ -258,7 +258,7 @@ private void registerController(String queryId, MSQTestTaskDetails msqTestTaskDe @Override public ListenableFuture cancelTask(String taskId) { - getControllerForQueryId(taskId).stop(CancellationReason.TASK_SHUTDOWN); + getControllerForQueryId(taskId).stop(CancellationReason.TASK_SHUTDOWN, null); return Futures.immediateFuture(null); } diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java index 02ae7c56e1a8..ef5191088347 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java @@ -54,6 +54,7 @@ import org.apache.druid.segment.incremental.NoopRowIngestionMeters; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.server.DruidNode; import org.apache.druid.server.SegmentManager; @@ -258,6 +259,12 @@ public SegmentManager segmentManager() return injector.getInstance(SegmentManager.class); } + @Override + public VirtualStorageManager virtualStorageManager() + { + return injector.getInstance(VirtualStorageManager.class); + } + @Override public CoordinatorClient coordinatorClient() { diff --git a/owasp-dependency-check-suppressions.xml b/owasp-dependency-check-suppressions.xml index 5677c037353e..0be1d8f33d43 100644 --- a/owasp-dependency-check-suppressions.xml +++ b/owasp-dependency-check-suppressions.xml @@ -242,12 +242,12 @@ - + ^pkg:maven/org\.codehaus\.jackson/jackson\-mapper\-asl@1.9.13$ - 10 + 10 diff --git a/pom.xml b/pom.xml index aabb17d07f08..bab799d96ad1 100644 --- a/pom.xml +++ b/pom.xml @@ -96,7 +96,7 @@ 2.2.0 10.17.1.0 4.2.22 - 2.41.0 + 2.49.0 7.6.0 8.5.4 32.1.3-jre @@ -105,7 +105,7 @@ 1.10.0 12.1.8 1.19.4 - 2.20.2 + 2.21.3 1.9.13 2.25.4 8.2.0 @@ -125,7 +125,7 @@ It should be removed once those extensions are upgraded or dropped (see #19109). --> 1.12.793 2.40.0 - 2.9.3 + 3.2.4 0.8.14 2.0.3 6.2.5.Final @@ -164,6 +164,9 @@ --add-opens=java.base/java.util=ALL-UNNAMED + + + --add-modules=jdk.incubator.vector maven.org @@ -589,17 +592,6 @@ curator-recipes ${apache.curator.version} - - org.apache.curator - curator-x-discovery - ${apache.curator.version} - - - com.fasterxml.jackson.core - jackson-databind - - - org.apache.calcite calcite-core @@ -1794,6 +1786,8 @@ **/*_jmhType_*.class **/*_jmhTest_*.class **/*_generated*.class + + **/math/expr/vector/simd/Simd*.class **.SuppressForbidden @@ -2143,6 +2137,11 @@ org.apache.hadoop.fs + + + + --add-modules=jdk.incubator.vector + @@ -2152,6 +2151,9 @@ true ${maven.compiler.release} + + --add-modules=jdk.incubator.vector + @@ -2212,6 +2214,8 @@ -J--add-exports=java.base/sun.nio.ch=ALL-UNNAMED -J--add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED -J--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED + -J--add-modules=jdk.incubator.vector + --add-modules=jdk.incubator.vector diff --git a/processing/src/main/java/org/apache/druid/common/asyncresource/AsyncResource.java b/processing/src/main/java/org/apache/druid/common/asyncresource/AsyncResource.java new file mode 100644 index 000000000000..a92b9047784d --- /dev/null +++ b/processing/src/main/java/org/apache/druid/common/asyncresource/AsyncResource.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.asyncresource; + +import com.google.common.util.concurrent.ListenableFuture; +import org.apache.druid.collections.ResourceHolder; +import org.apache.druid.error.DruidException; + +import java.io.Closeable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +/** + * Represents a resource that requires some cleanup and that is acquired asynchronously. This class can in principle + * be used for resources that do not need cleanup or are not acquired asynchronously, but it is most useful when + * both are true. The wrapper generally owns the resource lifecycle; see "to consume a resource" below for details. + * + *

To produce a resource, generally you should create and populate {@link SettableAsyncResource}. + * + *

To consume a resource, use {@link #addReadyCallback(Runnable)}, {@link #await()}, or {@link #await(long)} + * to wait for the resource to become ready. Then use {@link #get()} to retrieve the resource. When you are done + * with the resource, call {@link #close()} on the {@code AsyncResource} object to close it. Do *not* close the + * resource {@code T} itself, even if it is {@link Closeable}, as this may lead to a double-close. The one + * exception to this rule is if you call {@link SettableAsyncResource#release()}: in this case you actually + * *must* call close on the resource {@code T} itself. + * + *

Why not use Futures?

+ * Often {@link Future} or {@link ListenableFuture} are used for objects that are created asynchronously. + * These are, however, problematic when the object is a resource that requires cleanup. The biggest issue is + * handling cancellation. As soon as a caller gets a {@code Future}, it becomes responsible for + * closing the resource once it is created. If the caller is a query that could itself be canceled, it must + * still arrange for the resource to be closed. + * + *

The caller can do something like this to deal with it: + * + *

+ * // Upon query cancellation, attach a callback to the future that closes the resource once it becomes available.
+ * Futures.addCallback(
+ *   resourceFuture,
+ *   new FutureCallback<>() {
+ *     void onSuccess(Closeable resource) { resource.close(); }
+ *     void onFailure(Throwable t) { }
+ *   }
+ * );
+ * 
+ * + * But this is awkward, and doesn't allow resource acquisition to actually be canceled. Canceling the future isn't + * reliable, because it can lead to an orphaned resource: the asynchronous acquisition can complete in a race with + * cancellation, and in this case, the resource becomes eligible for GC without completing the future and therefore + * without being closed. + * + *

AsyncResource handles this problem by automatically closing the resource in + * {@link SettableAsyncResource#set(ResourceHolder)} when the {@link SettableAsyncResource} has been canceled. + */ +public interface AsyncResource extends Closeable +{ + /** + * Whether resource acquisition has completed (successfully or with failure). To wait for this to become true + * asynchronously, use {@link #addReadyCallback(Runnable)}. To block until readiness, use {@link #await()} + * or {@link #await(long)}. + */ + boolean isReady(); + + /** + * Register a callback to fire when {@link #isReady()} becomes true (whether the load succeeded or failed). If the + * holder is already ready, the callback fires immediately in the calling thread. Callbacks are not fired if + * {@link #close()} is called prior to the resource becoming available. + * + *

Throws {@link DruidException} if {@link #close()} has been called prior to this method. + */ + void addReadyCallback(Runnable callback); + + /** + * Retrieve the underlying object. May be called any number of times, and the same object will be returned. + * + *

Throws {@link DruidException} if the underlying object is not ready or if {@link #close()} has been called. + * Also throws an exception if the resource acquisition failed. + */ + T get(); + + /** + * Block until {@link #isReady()} returns true. Does not close the resource if interrupted; callers must still + * call {@link #close()}. + * + *

Throws {@link DruidException} if {@link #close()} has been called prior to this method. + */ + default T await() throws InterruptedException + { + final CountDownLatch latch = new CountDownLatch(1); + addReadyCallback(latch::countDown); + latch.await(); + return get(); + } + + /** + * Block until {@link #isReady()} returns true, up to some timeout. Does not close the resource if interrupted + * or if waiting times out; callers must still call {@link #close()}. + * + *

Throws {@link DruidException} if {@link #close()} has been called prior to this method. + */ + default T await(long timeoutMillis) throws InterruptedException, TimeoutException + { + final CountDownLatch latch = new CountDownLatch(1); + addReadyCallback(latch::countDown); + if (!latch.await(timeoutMillis, TimeUnit.MILLISECONDS)) { + throw new TimeoutException(); + } + return get(); + } + + /** + * Closes the resource if it is ready, and has not been released by {@link SettableAsyncResource#release()}. + * If acquisition is still in progress, it is canceled if possible. + * + *

Despite {@link Closeable} requiring this method to be idempotent, it is not necessarily + * going to be idempotent. Do not close more than once. + */ + @Override + void close(); +} diff --git a/processing/src/main/java/org/apache/druid/common/asyncresource/AsyncResources.java b/processing/src/main/java/org/apache/druid/common/asyncresource/AsyncResources.java new file mode 100644 index 000000000000..e22edd06750f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/common/asyncresource/AsyncResources.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.asyncresource; + +import org.apache.druid.collections.ResourceHolder; + +import java.io.Closeable; +import java.util.List; +import java.util.function.Function; + +/** + * Utility functions for {@link AsyncResource}. + */ +public class AsyncResources +{ + private AsyncResources() + { + // No instantiation. + } + + /** + * Returns an {@link AsyncResource} that wraps the underlying object, which does not have a lifecycle. + */ + public static AsyncResource unmanaged(final T object) + { + final SettableAsyncResource retVal = new SettableAsyncResource<>(); + retVal.set(object, null); + return retVal; + } + + /** + * Returns an {@link AsyncResource} that wraps the underlying closeable object. + */ + public static AsyncResource ofCloseable(final T object) + { + final SettableAsyncResource retVal = new SettableAsyncResource<>(); + retVal.set(ResourceHolder.fromCloseable(object)); + return retVal; + } + + /** + * Returns an {@link AsyncResource} that collects a list of underlying resources into a single lifecycle. + * Calling {@link AsyncResource#close()} on the returned async resource causes the underlying async resource + * to be closed. + * + *

The transformation generally happens eagerly in the thread that provides the source resource, so it is + * important that it run quickly. + * + *

The target of {@code function} need not be {@link Closeable}, and even if it is {@link Closeable}, it + * is not closed (only the source is closed). This transform utility is meant for transformations that do + * not introduce new resource lifecycles. + */ + public static AsyncResource transform( + final AsyncResource sourceResource, + final Function function + ) + { + return new TransformAsyncResource<>(sourceResource, function); + } + + /** + * Returns an {@link AsyncResource} that collects a list of underlying resources into a single lifecycle. + * Calling {@link AsyncResource#close()} on the returned async resource causes the underlying async resources + * to also be closed. + */ + public static AsyncResource> collect(final List> asyncResources) + { + return new CollectAsyncResource<>(asyncResources); + } + + /** + * Returns an {@link AsyncResource} that recovers from an exception in {@code sourceResource}. + * + *

If the source resource suceeds, the recoverFn is not called and the underlying source resource is returned. + * On the other hand, if the source resource fails, the {@code recoverFn} is called with the exception and is + * given a chance to substitute a fallback value. Recovery generally happens eagerly in the thread that provides + * the source resource, so it is important that it run quickly. + * + *

When recovery happens, the {@code sourceResource} is closed immediately. Otherwise, the {@code sourceResoruce} + * is closed when the resource returned by this function is closed. + * + *

The target of {@code function} need not be {@link Closeable}, and even if it is {@link Closeable}, it + * is not closed (only the source is closed). This transform utility is meant for transformations that do + * not introduce new resource lifecycles. + * + * @param sourceResource the source resource + * @param recoverFn called when the source resource fails. Returns nonnull to recover, or null to keep the + * error condition as-is. + */ + public static AsyncResource recover( + final AsyncResource sourceResource, + final Function recoverFn + ) + { + return new RecoverAsyncResource<>(sourceResource, recoverFn); + } +} diff --git a/processing/src/main/java/org/apache/druid/common/asyncresource/CollectAsyncResource.java b/processing/src/main/java/org/apache/druid/common/asyncresource/CollectAsyncResource.java new file mode 100644 index 000000000000..3955e0b25a48 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/common/asyncresource/CollectAsyncResource.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.asyncresource; + +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.utils.CloseableUtils; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Resource that collects a list of underlying resources into a single lifecycle. + * + * @see AsyncResources#collect(List) for more details + */ +public class CollectAsyncResource implements AsyncResource> +{ + private final List> sourceResources; + private final AtomicInteger readyCount = new AtomicInteger(0); + private final SettableAsyncResource> targetResource = new SettableAsyncResource<>(); + + /** + * Constructor. Can also be created with {@link AsyncResources#collect(List)}. + */ + CollectAsyncResource(final List> sourceResources) + { + this.sourceResources = sourceResources; + + if (sourceResources.isEmpty()) { + targetResource.set(List.of(), null); + } else { + for (final AsyncResource asyncResource : sourceResources) { + asyncResource.addReadyCallback(this::onOneSourceReady); + } + } + } + + @Override + public boolean isReady() + { + return targetResource.isReady(); + } + + @Override + public void addReadyCallback(Runnable callback) + { + targetResource.addReadyCallback(callback); + } + + @Override + public List get() + { + return targetResource.get(); + } + + @Override + public void close() + { + final Closer closer = Closer.create(); + closer.registerAll(sourceResources); + closer.register(targetResource); + CloseableUtils.closeAndWrapExceptions(closer); + } + + private void onOneSourceReady() + { + if (readyCount.incrementAndGet() == sourceResources.size()) { + try { + final List resources = new ArrayList<>(sourceResources.size()); + for (final AsyncResource asyncResource : sourceResources) { + resources.add(asyncResource.get()); + } + targetResource.set(resources, null); // no Closeable here, since we own asyncResources ourselves + } + catch (Throwable e) { + targetResource.setException(e); + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/common/asyncresource/RecoverAsyncResource.java b/processing/src/main/java/org/apache/druid/common/asyncresource/RecoverAsyncResource.java new file mode 100644 index 000000000000..a83a70fd74f4 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/common/asyncresource/RecoverAsyncResource.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.asyncresource; + +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.utils.CloseableUtils; + +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Function; + +/** + * Resource that passes through an underlying resource, but can substitute a fallback on failure. + * + * @see AsyncResources#recover(AsyncResource, Function) for more details + */ +public class RecoverAsyncResource implements AsyncResource +{ + private static final Logger log = new Logger(RecoverAsyncResource.class); + + private final AsyncResource sourceResource; + private final Function recovery; + private final SettableAsyncResource targetResource = new SettableAsyncResource<>(); + + /** + * Whether {@link #sourceResource} has been closed. The source is closed either after recovery (so its resources + * are released before handing the recovered object to the caller), or otherwise in {@link #close()}. + */ + private final AtomicBoolean sourceClosed = new AtomicBoolean(false); + + /** + * Constructor. Can also be created with {@link AsyncResources#recover(AsyncResource, Function)}. + */ + RecoverAsyncResource( + final AsyncResource sourceResource, + final Function recovery + ) + { + this.sourceResource = sourceResource; + this.recovery = recovery; + sourceResource.addReadyCallback(this::onSourceReady); + } + + @Override + public boolean isReady() + { + return targetResource.isReady(); + } + + @Override + public T get() + { + return targetResource.get(); + } + + @Override + public void addReadyCallback(Runnable callback) + { + targetResource.addReadyCallback(callback); + } + + @Override + public void close() + { + final Closer closer = Closer.create(); + if (sourceClosed.compareAndSet(false, true)) { + closer.register(sourceResource); + } + closer.register(targetResource); + CloseableUtils.closeAndWrapExceptions(closer); + } + + private void onSourceReady() + { + final T value; + try { + value = sourceResource.get(); + } + catch (Throwable e) { + final T recovered; + try { + recovered = recovery.apply(e); + } + catch (Throwable e2) { + e.addSuppressed(e2); + targetResource.setException(e); + return; + } + + if (recovered != null) { + // Release the source's resources before handing back the recovered object. + if (sourceClosed.compareAndSet(false, true)) { + CloseableUtils.closeAndSuppressExceptions( + sourceResource, + ex -> log.warn(ex, "Failed to close source resource during recovery") + ); + } + targetResource.set(recovered, null); + } else { + targetResource.setException(e); + } + return; + } + + targetResource.set(value, null); + } +} diff --git a/processing/src/main/java/org/apache/druid/common/asyncresource/SettableAsyncResource.java b/processing/src/main/java/org/apache/druid/common/asyncresource/SettableAsyncResource.java new file mode 100644 index 000000000000..9894d16a3116 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/common/asyncresource/SettableAsyncResource.java @@ -0,0 +1,335 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.asyncresource; + +import com.google.common.base.Throwables; +import com.google.common.util.concurrent.SettableFuture; +import com.google.errorprone.annotations.concurrent.GuardedBy; +import org.apache.druid.collections.ResourceHolder; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.Either; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.segment.AsyncCursorHolder; +import org.apache.druid.utils.CloseableUtils; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Future; + +/** + * Basic utility that allows creating {@link AsyncResource} wrappers. Analogous to JDK {@link CompletableFuture} + * or Guava {@link SettableFuture}. See {@link AsyncResource} for details about why you would want to use this + * instead of {@link Future}. + * + *

In addition to {@link #get()}, there is also {@link #release()}. Releasing is not allowed by instances + * of this class, but may be allowed by subclasses. When releasing is allowed, it enables an ownership-transfer + * model. See {@link #release()} for more information on this model. + */ +public class SettableAsyncResource implements AsyncResource +{ + private static final Logger LOG = new Logger(SettableAsyncResource.class); + + /** + * Whether {@link #release()} is allowed. + */ + private final boolean releasable; + + /** + * Callbacks provided by {@link #addReadyCallback(Runnable)}. + */ + @GuardedBy("this") + private final List readyCallbacks = new ArrayList<>(); + + /** + * Canceler provided by {@link #setCanceler(Runnable)}. Ideally would be {@code GuardeBy("this")}, but isn't + * annotated because errorprone doesn't like the reference to canceler::run escaping. + */ + @Nullable + private Runnable canceler; + + /** + * Result set by {@link #setInternal(Either)}. + */ + @Nullable + @GuardedBy("this") + private ResourceHolder result = null; + + /** + * Error set by {@link #setInternal(Either)}. + */ + @Nullable + @GuardedBy("this") + private Throwable error = null; + + @GuardedBy("this") + private State state = State.NEW; + + /** + * Constructor. + */ + public SettableAsyncResource() + { + this(false); + } + + /** + * Constructor for subclasses that allow {@link #release()}. They should provide "true" here and then + * also override {@link #release()} to change its visibility from protected to public. + */ + protected SettableAsyncResource(boolean releasable) + { + this.releasable = releasable; + } + + /** + * Set a canceler that will be called from {@link #close()} if this {@link AsyncResource} is closed prior + * to the resource being available. Calling this method has no effect if {@link #close()} has already been called + * or if the resource is already available. + */ + public synchronized void setCanceler(Runnable newCanceler) + { + if (canceler != null) { + throw DruidException.defensive("canceler already set, cannot call setCanceler()"); + } + + if (state == State.NEW) { + this.canceler = newCanceler; + } + } + + /** + * Provides a resource and closer for the resource. Transitions the {@link AsyncResource} into a "ready" state, + * where {@link #isReady()} returns true, if it has not yet been closed. Returns true if this transition happened + * successfully, false otherwise. + * + *

If this method returns true, it also fires all the callbacks that were registered via + * {@link #addReadyCallback(Runnable)}. Once this method returns true, {@link #close()} will no longer call + * the canceler from {@link #setCanceler(Runnable)}. + * + *

If this method returns false, the producer is responsible for closing the resource itself. + * + *

Throws {@link DruidException} if this resource was already completed from a prior call to this method or + * {@link #setException}). + */ + public boolean set(T object, @Nullable Closeable closer) + { + if (object == null) { + throw DruidException.defensive("object cannot be null"); + } + final ResourceHolder resourceHolder = new ResourceHolder<>() + { + @Override + public T get() + { + return object; + } + + @Override + public void close() + { + CloseableUtils.closeAndWrapExceptions(closer); + } + }; + return setInternal(Either.value(resourceHolder)); + } + + /** + * Variant of {@link #set(Object, Closeable)} for callers that have a {@link ResourceHolder}. + */ + public boolean set(ResourceHolder holder) + { + return set(holder.get(), holder); + } + + /** + * Provides an exception for a resource that failed to load. Transitions the {@link AsyncResource} into a "ready" + * state, where {@link #isReady()} returns true, if it has not yet been closed. + * + *

If this method successfully transitions to "ready", it also fires all the callbacks that were registered via + * {@link #addReadyCallback(Runnable)}. Afterwards, {@link #close()} will no longer call the canceler from + * {@link #setCanceler(Runnable)}. + * + *

Throws {@link DruidException} if this resource was already completed from a prior call to this method or + * {@link #set}). + */ + public void setException(Throwable t) + { + setInternal(Either.error(t)); + } + + @Override + public synchronized boolean isReady() + { + return state == State.READY; + } + + @Override + public synchronized T get() + { + return switch (state) { + case NEW -> throw DruidException.defensive("Not ready yet"); + case READY -> { + if (error != null) { + Throwables.throwIfUnchecked(error); + throw DruidException.forPersona(DruidException.Persona.DEVELOPER) + .ofCategory(DruidException.Category.UNCATEGORIZED) + .build(error, error.getMessage()); + } else { + yield result.get(); + } + } + case RELEASED -> throw DruidException.defensive("Resource has been released"); + case CLOSED -> throw DruidException.defensive("Closed"); + }; + } + + /** + * Take ownership of the underlying object. After this returns, {@link #close()} on this + * {@link AsyncResource} is a no-op; the caller is responsible for closing the returned {@code T}. Useful when + * passing the resource to something else that prefers to take full ownership of it. + * + *

This method enables a resource-transfer model. It is exposed by certain subclasses, such as + * {@link AsyncCursorHolder}, where the resource is itself {@link Closeable} and where that close method + * encapsulates all necessary resource releasing logic. It is provided for convenience of certain callers, + * although note that you must avoid releasing if you intend to use combinators such as + * {@link AsyncResources#collect}, {@link AsyncResources#transform}, etc. These combinators will fail to properly + * encapsulate resource lifecycle if resources have been released. + * + *

Throws {@link DruidException} if the holder is not yet ready, has already been released, or if + * {@link #close()} has been called. + */ + protected synchronized T release() + { + if (!releasable) { + throw DruidException.defensive("Not releasable"); + } + + final T object = get(); + // Clear result to allow GC. + result = null; + state = State.RELEASED; + return object; + } + + @Override + public void addReadyCallback(Runnable callback) + { + final boolean fireImmediately; + synchronized (this) { + switch (state) { + case NEW -> { + readyCallbacks.add(callback); + fireImmediately = false; + } + case READY -> fireImmediately = true; + default -> throw DruidException.defensive("Cannot addReadyCallback in state[%s]", state); + } + } + if (fireImmediately) { + callback.run(); + } + } + + @Override + public void close() + { + final Closeable deferredCloseable; + + synchronized (this) { + deferredCloseable = switch (state) { + case NEW -> canceler != null ? canceler::run : null; + case READY -> result; + case RELEASED -> null; + default -> throw DruidException.defensive("Already closed"); + }; + + // Clear result and canceler to allow GC. + result = null; + canceler = null; + state = State.CLOSED; + } + + CloseableUtils.closeAndSuppressExceptions( + deferredCloseable, + e -> LOG.warn(e, "Failed to call cleaner of class[%s]", deferredCloseable.getClass()) + ); + } + + @GuardedBy("this") + private List drainCallbacks() + { + final List snapshot = List.copyOf(readyCallbacks); + readyCallbacks.clear(); + return snapshot; + } + + private boolean setInternal(Either> value) + { + final boolean didSet; + final List callbacksToFire; + + synchronized (this) { + didSet = switch (state) { + case NEW -> { + if (value.isError()) { + error = value.error(); + } else { + result = value.valueOrThrow(); + } + + state = State.READY; + yield true; + } + case READY, RELEASED -> throw DruidException.defensive("Already complete, cannot call set/setException again"); + case CLOSED -> false; + }; + + // Clear canceler to allow GC. + canceler = null; + callbacksToFire = drainCallbacks(); + } + fireCallbacks(callbacksToFire); + return didSet; + } + + private static void fireCallbacks(List callbacks) + { + for (final Runnable callback : callbacks) { + try { + callback.run(); + } + catch (Throwable t) { + // Best-effort; one bad callback shouldn't break others. + LOG.warn(t, "callback exception"); + } + } + } + + enum State + { + NEW, + READY, + RELEASED, + CLOSED + } +} diff --git a/processing/src/main/java/org/apache/druid/common/asyncresource/TransformAsyncResource.java b/processing/src/main/java/org/apache/druid/common/asyncresource/TransformAsyncResource.java new file mode 100644 index 000000000000..8f6bcf6bceea --- /dev/null +++ b/processing/src/main/java/org/apache/druid/common/asyncresource/TransformAsyncResource.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.asyncresource; + +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.utils.CloseableUtils; + +import java.util.function.Function; + +/** + * Resource that is the result of calling a function on an underlying resource. + * + * @see AsyncResources#transform(AsyncResource, Function) for more details + */ +public class TransformAsyncResource implements AsyncResource +{ + private final AsyncResource sourceResource; + private final Function function; + private final SettableAsyncResource targetResource; + + /** + * Constructor. Can also be created with {@link AsyncResources#transform(AsyncResource, Function)}. + */ + TransformAsyncResource(final AsyncResource sourceResource, final Function function) + { + this.sourceResource = sourceResource; + this.function = function; + this.targetResource = new SettableAsyncResource<>(); + sourceResource.addReadyCallback(this::onSourceReady); + } + + @Override + public boolean isReady() + { + return targetResource.isReady(); + } + + @Override + public R get() + { + return targetResource.get(); + } + + @Override + public void addReadyCallback(Runnable callback) + { + targetResource.addReadyCallback(callback); + } + + @Override + public void close() + { + final Closer closer = Closer.create(); + closer.register(sourceResource); + closer.register(targetResource); + CloseableUtils.closeAndWrapExceptions(closer); + } + + private void onSourceReady() + { + try { + final R target = function.apply(sourceResource.get()); + targetResource.set(target, null); // no Closeable here, since we own sourceResource ourselves + } + catch (Throwable e) { + targetResource.setException(e); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/data/input/BytesCountingInputEntity.java b/processing/src/main/java/org/apache/druid/data/input/BytesCountingInputEntity.java index 21485bfa02f3..c1b4739bb81a 100644 --- a/processing/src/main/java/org/apache/druid/data/input/BytesCountingInputEntity.java +++ b/processing/src/main/java/org/apache/druid/data/input/BytesCountingInputEntity.java @@ -57,6 +57,12 @@ public InputStream open() throws IOException return new BytesCountingInputStream(baseInputEntity.open(), inputStats); } + @Override + public InputStream openRaw() throws IOException + { + return new BytesCountingInputStream(baseInputEntity.openRaw(), inputStats); + } + public InputEntity getBaseInputEntity() { return baseInputEntity; diff --git a/processing/src/main/java/org/apache/druid/data/input/InputEntity.java b/processing/src/main/java/org/apache/druid/data/input/InputEntity.java index d6ea72115732..f990a94de8fc 100644 --- a/processing/src/main/java/org/apache/druid/data/input/InputEntity.java +++ b/processing/src/main/java/org/apache/druid/data/input/InputEntity.java @@ -63,13 +63,22 @@ interface CleanableFile extends Closeable URI getUri(); /** - * Opens an {@link InputStream} on the input entity directly. + * Opens an {@link InputStream} on the input entity, decompressing the raw bytes if the entity is compressed. * This is the basic way to read the given entity. * This method may be called multiple times to re-read the data from the entity. * * @see #fetch */ - InputStream open() throws IOException; + default InputStream open() throws IOException + { + // Implementations that decompress must override this method. + return openRaw(); + } + + /** + * Opens an {@link InputStream} on the raw bytes of the input entity, without any decompression. + */ + InputStream openRaw() throws IOException; /** * Fetches the input entity into the local storage. diff --git a/services/src/main/java/org/apache/druid/cli/convert/ChatHandlerConverter.java b/processing/src/main/java/org/apache/druid/data/input/InputFilePointer.java similarity index 55% rename from services/src/main/java/org/apache/druid/cli/convert/ChatHandlerConverter.java rename to processing/src/main/java/org/apache/druid/data/input/InputFilePointer.java index aa8b72affd7c..651eee96849b 100644 --- a/services/src/main/java/org/apache/druid/cli/convert/ChatHandlerConverter.java +++ b/processing/src/main/java/org/apache/druid/data/input/InputFilePointer.java @@ -17,32 +17,21 @@ * under the License. */ -package org.apache.druid.cli.convert; +package org.apache.druid.data.input; -import com.google.common.collect.ImmutableMap; +import org.apache.druid.io.FilePopulator; -import java.util.Map; -import java.util.Properties; +import java.net.URI; +import java.util.function.LongSupplier; /** + * Pointer to a file that may be on remote storage. + * + * @param uri URI of the file + * @param sizeSupplier supplier for the size of the file. If the file is compressed, this is the compressed size + * @param populator populator that knows how to fetch the file. If the file is compressed, this fetches the + * compressed file */ -public class ChatHandlerConverter implements PropertyConverter +public record InputFilePointer(URI uri, LongSupplier sizeSupplier, FilePopulator populator) { - - private static final String PROPERTY = "druid.indexer.chathandler.publishDiscovery"; - - @Override - public boolean canHandle(String property) - { - return PROPERTY.equals(property); - } - - @Override - public Map convert(Properties properties) - { - if (Boolean.parseBoolean(properties.getProperty(PROPERTY))) { - return ImmutableMap.of("druid.indexer.task.chathandler.type", "curator"); - } - return ImmutableMap.of(); - } } diff --git a/processing/src/main/java/org/apache/druid/data/input/InputSource.java b/processing/src/main/java/org/apache/druid/data/input/InputSource.java index be815742be16..23a164094821 100644 --- a/processing/src/main/java/org/apache/druid/data/input/InputSource.java +++ b/processing/src/main/java/org/apache/druid/data/input/InputSource.java @@ -33,6 +33,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.File; +import java.util.List; import java.util.Set; /** @@ -78,6 +79,15 @@ public interface InputSource */ boolean needsFormat(); + /** + * If this input source is representable as a set of files, return pointers to those files. + */ + @Nullable + default List asFilePointers() + { + return null; + } + /** * Creates an {@link InputSourceReader}. * diff --git a/processing/src/main/java/org/apache/druid/data/input/RetryingInputEntity.java b/processing/src/main/java/org/apache/druid/data/input/RetryingInputEntity.java index ac2481e989ab..c91abb073243 100644 --- a/processing/src/main/java/org/apache/druid/data/input/RetryingInputEntity.java +++ b/processing/src/main/java/org/apache/druid/data/input/RetryingInputEntity.java @@ -36,13 +36,21 @@ public abstract class RetryingInputEntity implements InputEntity @Override public InputStream open() throws IOException { - RetryingInputStream retryingInputStream = new RetryingInputStream<>( + return CompressionUtils.decompress(openRaw(), getPath()); + } + + /** + * Opens a raw {@link InputStream} on the entity, without decompression. + */ + @Override + public InputStream openRaw() throws IOException + { + return new RetryingInputStream<>( this, new RetryingInputEntityOpenFunction(), getRetryCondition(), getMaxRetries() ); - return CompressionUtils.decompress(retryingInputStream, getPath()); } // override this in sub-classes to customize retries diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/ByteEntity.java b/processing/src/main/java/org/apache/druid/data/input/impl/ByteEntity.java index db4f3396ae4d..3e7307d0f144 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/ByteEntity.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/ByteEntity.java @@ -63,7 +63,7 @@ public URI getUri() } @Override - public InputStream open() + public InputStream openRaw() { return new ByteBufferInputStream(buffer.duplicate()); } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/CloudObjectInputSource.java b/processing/src/main/java/org/apache/druid/data/input/impl/CloudObjectInputSource.java index 6e9587ca0d7e..b047f812810b 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/CloudObjectInputSource.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/CloudObjectInputSource.java @@ -29,6 +29,7 @@ import org.apache.druid.data.input.FilePerSplitHintSpec; import org.apache.druid.data.input.InputEntity; import org.apache.druid.data.input.InputFileAttribute; +import org.apache.druid.data.input.InputFilePointer; import org.apache.druid.data.input.InputFormat; import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.InputSourceReader; @@ -39,6 +40,7 @@ import org.apache.druid.data.input.impl.systemfield.SystemFieldInputSource; import org.apache.druid.data.input.impl.systemfield.SystemFields; import org.apache.druid.java.util.common.CloseableIterators; +import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.utils.CollectionUtils; import org.apache.druid.utils.Streams; @@ -49,6 +51,7 @@ import java.nio.file.FileSystems; import java.nio.file.PathMatcher; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Objects; @@ -211,6 +214,66 @@ Iterator getInputEntities(final InputFormat inputFormat) .iterator(); } + @Nullable + @Override + public List asFilePointers() + { + if (!systemFields.getFields().isEmpty()) { + // System fields cannot be added to file pointers. + return null; + } + + final List locations; + if (!CollectionUtils.isNullOrEmpty(objects)) { + locations = objects; + } else if (!CollectionUtils.isNullOrEmpty(uris)) { + locations = Lists.transform(uris, CloudObjectLocation::new); + } else { + // Only uris + objects can be expressed as file pointers. Prefixes could be if we listed them here, but + // we'd rather avoid doing that, since this method is likely to be called in a thread that should not + // be doing blocking I/O. + return null; + } + + final PathMatcher globMatcher = + StringUtils.isNotBlank(objectGlob) + ? FileSystems.getDefault().getPathMatcher("glob:" + objectGlob) + : null; + + final CloudObjectSplitWidget splitWidget = getSplitWidget(); + final List pointers = new ArrayList<>(); + for (final CloudObjectLocation location : locations) { + if (globMatcher != null && !globMatcher.matches(Paths.get(location.getPath()))) { + continue; + } + + final InputEntity entity = createEntity(location); + final URI uri = location.toUri(scheme); + pointers.add( + new InputFilePointer( + uri, + () -> { + try { + return splitWidget.getObjectSize(location); + } + catch (IOException e) { + throw new RuntimeException(e); + } + }, + dstFile -> FileUtils.copyLarge( + entity::openRaw, + dstFile, + new byte[InputEntity.DEFAULT_FETCH_BUFFER_SIZE], + entity.getRetryCondition(), + InputEntity.DEFAULT_MAX_NUM_FETCH_TRIES, + "Failed to fetch [" + uri + "]" + ) + ) + ); + } + return pointers; + } + @Override public boolean equals(Object o) { diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/FileEntity.java b/processing/src/main/java/org/apache/druid/data/input/impl/FileEntity.java index 01762a9e8798..617fd895563b 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/FileEntity.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/FileEntity.java @@ -38,8 +38,14 @@ public FileEntity(File file) } @Override - public CleanableFile fetch(File temporaryDirectory, byte[] fetchBuffer) + public CleanableFile fetch(File temporaryDirectory, byte[] fetchBuffer) throws IOException { + if (CompressionUtils.Format.fromFileName(file.getName()) != null) { + // The file appears to be compressed. Decompress it into temporaryDirectory. + return InputEntity.super.fetch(temporaryDirectory, fetchBuffer); + } + + // The file is not compressed, so there is no need to copy it. return new CleanableFile() { @Override @@ -72,4 +78,10 @@ public InputStream open() throws IOException { return CompressionUtils.decompress(new FileInputStream(file), file.getName()); } + + @Override + public InputStream openRaw() throws IOException + { + return new FileInputStream(file); + } } diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/LocalInputSource.java b/processing/src/main/java/org/apache/druid/data/input/impl/LocalInputSource.java index 365a59afecfd..981e23766f13 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/LocalInputSource.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/LocalInputSource.java @@ -37,6 +37,7 @@ import org.apache.druid.data.input.AbstractInputSource; import org.apache.druid.data.input.InputEntity; import org.apache.druid.data.input.InputFileAttribute; +import org.apache.druid.data.input.InputFilePointer; import org.apache.druid.data.input.InputFormat; import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.InputSourceReader; @@ -290,6 +291,15 @@ protected InputSourceReader formattableReader( ); } + @Nullable + @Override + public List asFilePointers() + { + // Intentionally not implemented, because this method is used for fetching external files to a local + // cache, and there is no point in copying local files from one place to another. + return null; + } + @Override public boolean equals(Object o) { diff --git a/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java b/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java index fd8ee5e9e02a..890e3c233a04 100644 --- a/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java +++ b/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java @@ -44,19 +44,25 @@ public class GuiceAnnotationIntrospector extends NopAnnotationIntrospector @Override public JacksonInject.Value findInjectableValue(AnnotatedMember m) { - Object id = findGuiceInjectId(m); + // Preserve useInput / optional from the annotation. The simpler Value.forId(id) drops + // them and relies on AnnotationIntrospectorPair's fallback. See FasterXML/jackson-databind#1381. + final JacksonInject annotation = m.getAnnotation(JacksonInject.class); + if (annotation == null) { + return null; + } + final Object id = findGuiceInjectId(m); if (id == null) { return null; } - return JacksonInject.Value.forId(id); + return JacksonInject.Value.from(annotation).withId(id); } + /** + * Resolves the Guice {@link Key} for an annotated member. Callers must verify that {@code m} + * carries a {@link JacksonInject} annotation before invoking; this method does not re-check. + */ private Object findGuiceInjectId(AnnotatedMember m) { - if (m.getAnnotation(JacksonInject.class) == null) { - return null; - } - Type genericType = null; Annotation guiceAnnotation = null; diff --git a/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java b/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java index 954610c4e287..70dd1cc51519 100644 --- a/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java +++ b/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java @@ -55,6 +55,8 @@ public class StartupInjectorBuilder extends BaseInjectorBuilder double * double, long -> double * double, double -> double + * + * If a non-null {@link SimdSupportedBinaryOp} is supplied to the constructor and + * {@link ExpressionProcessing#useVectorApi()} is true, this factory will return SIMD-specialized processors backed + * by the JDK incubator {@code jdk.incubator.vector} API instead of the standard scalar implementations. */ public class SimpleVectorMathBivariateProcessorFactory extends VectorMathBivariateProcessorFactory { @@ -38,6 +47,8 @@ public class SimpleVectorMathBivariateProcessorFactory extends VectorMathBivaria private final DoubleBivariateLongDoubleFunction longDoubleFunction; private final DoubleBivariateDoubleLongFunction doubleLongFunction; private final DoubleBivariateDoublesFunction doublesFunction; + @Nullable + private final SimdSupportedBinaryOp simdOp; protected SimpleVectorMathBivariateProcessorFactory( LongBivariateLongsFunction longsFunction, @@ -45,16 +56,36 @@ protected SimpleVectorMathBivariateProcessorFactory( DoubleBivariateDoubleLongFunction doubleLongFunction, DoubleBivariateDoublesFunction doublesFunction ) + { + this(longsFunction, longDoubleFunction, doubleLongFunction, doublesFunction, null); + } + + protected SimpleVectorMathBivariateProcessorFactory( + LongBivariateLongsFunction longsFunction, + DoubleBivariateLongDoubleFunction longDoubleFunction, + DoubleBivariateDoubleLongFunction doubleLongFunction, + DoubleBivariateDoublesFunction doublesFunction, + @Nullable SimdSupportedBinaryOp simdOp + ) { this.longsFunction = longsFunction; this.longDoubleFunction = longDoubleFunction; this.doubleLongFunction = doubleLongFunction; this.doublesFunction = doublesFunction; + this.simdOp = simdOp; } @Override public final ExprVectorProcessor longsProcessor(Expr.VectorInputBindingInspector inspector, Expr left, Expr right) { + if (simdOp != null && ExpressionProcessing.useVectorApi()) { + return SimdProcessors.makeLongLong( + left.asVectorProcessor(inspector), + right.asVectorProcessor(inspector), + simdOp, + longsFunction + ); + } return new LongBivariateLongsFunctionVectorProcessor( left.asVectorProcessor(inspector), right.asVectorProcessor(inspector), @@ -69,6 +100,14 @@ public final ExprVectorProcessor longDoubleProcessor( Expr right ) { + if (simdOp != null && ExpressionProcessing.useVectorApi()) { + return SimdProcessors.makeLongDouble( + left.asVectorProcessor(inspector), + right.asVectorProcessor(inspector), + simdOp, + longDoubleFunction + ); + } return new DoubleBivariateLongDoubleFunctionVectorProcessor( left.asVectorProcessor(inspector), right.asVectorProcessor(inspector), @@ -83,6 +122,14 @@ public final ExprVectorProcessor doubleLongProcessor( Expr right ) { + if (simdOp != null && ExpressionProcessing.useVectorApi()) { + return SimdProcessors.makeDoubleLong( + left.asVectorProcessor(inspector), + right.asVectorProcessor(inspector), + simdOp, + doubleLongFunction + ); + } return new DoubleBivariateDoubleLongFunctionVectorProcessor( left.asVectorProcessor(inspector), right.asVectorProcessor(inspector), @@ -97,6 +144,14 @@ public final ExprVectorProcessor doublesProcessor( Expr right ) { + if (simdOp != null && ExpressionProcessing.useVectorApi()) { + return SimdProcessors.makeDoubleDouble( + left.asVectorProcessor(inspector), + right.asVectorProcessor(inspector), + simdOp, + doublesFunction + ); + } return new DoubleBivariateDoublesFunctionVectorProcessor( left.asVectorProcessor(inspector), right.asVectorProcessor(inspector), diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java b/processing/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java index c12ebc55eaa9..4a26f8141531 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/VectorMathProcessors.java @@ -23,6 +23,7 @@ import com.google.common.primitives.Ints; import org.apache.druid.math.expr.ExpressionValidationException; import org.apache.druid.math.expr.Function; +import org.apache.druid.math.expr.vector.simd.SimdSupportedBinaryOp; public class VectorMathProcessors { @@ -300,7 +301,7 @@ public static final class Add extends SimpleVectorMathBivariateProcessorFactory public Add() { - super(Long::sum, Double::sum, Double::sum, Double::sum); + super(Long::sum, Double::sum, Double::sum, Double::sum, SimdSupportedBinaryOp.ADD); } } @@ -314,7 +315,8 @@ public Subtract() (left, right) -> left - right, (left, right) -> (double) left - right, (left, right) -> left - (double) right, - (left, right) -> left - right + (left, right) -> left - right, + SimdSupportedBinaryOp.SUB ); } } @@ -325,7 +327,13 @@ public static final class Multiply extends SimpleVectorMathBivariateProcessorFac public Multiply() { - super(Multiply::multiply, Multiply::multiply, Multiply::multiply, Multiply::multiply); + super( + Multiply::multiply, + Multiply::multiply, + Multiply::multiply, + Multiply::multiply, + SimdSupportedBinaryOp.MUL + ); } private static long multiply(long x, long y) diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleAddProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleAddProcessor.java new file mode 100644 index 000000000000..d476468cb076 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleAddProcessor.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoublesFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (double[], double[]) -> double[]} addition. The op is hardcoded to + * {@link DoubleVector#add} so the JIT statically resolves it to the platform's double-add intrinsic. + */ +public final class SimdDoubleDoubleAddProcessor extends SimdDoubleDoubleProcessor +{ + public SimdDoubleDoubleAddProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateDoublesFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + double[] leftInput, + double[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = SPECIES.length(); + final int upperBound = SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i); + final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i); + va.add(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i) + .or(VectorMask.fromArray(SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(SPECIES, rightNulls, i); + } + final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i); + final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i); + va.add(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleMulProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleMulProcessor.java new file mode 100644 index 000000000000..56cf53e3309e --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleMulProcessor.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoublesFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (double[], double[]) -> double[]} multiplication. The op is hardcoded to + * {@link DoubleVector#mul} so the JIT statically resolves it to the platform's double-multiply intrinsic. + */ +public final class SimdDoubleDoubleMulProcessor extends SimdDoubleDoubleProcessor +{ + public SimdDoubleDoubleMulProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateDoublesFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + double[] leftInput, + double[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = SPECIES.length(); + final int upperBound = SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i); + final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i); + va.mul(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i) + .or(VectorMask.fromArray(SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(SPECIES, rightNulls, i); + } + final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i); + final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i); + va.mul(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleProcessor.java new file mode 100644 index 000000000000..8f3eeebac2c8 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleProcessor.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.VectorSpecies; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor; +import org.apache.druid.math.expr.vector.ExprEvalDoubleVector; +import org.apache.druid.math.expr.vector.ExprEvalVector; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoublesFunction; + +import javax.annotation.Nullable; + +/** + * Abstract base for SIMD processors that compute {@code (double[], double[]) -> double[]} ops. See + * {@link SimdLongLongProcessor} for the design rationale. + */ +abstract class SimdDoubleDoubleProcessor implements ExprVectorProcessor +{ + static final VectorSpecies SPECIES = DoubleVector.SPECIES_PREFERRED; + + private final ExprVectorProcessor left; + private final ExprVectorProcessor right; + final DoubleBivariateDoublesFunction scalarFallback; + final double[] outValues; + final boolean[] outNulls; + + protected SimdDoubleDoubleProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateDoublesFunction scalarFallback + ) + { + this.left = CastToTypeVectorProcessor.cast(left, ExpressionType.DOUBLE); + this.right = CastToTypeVectorProcessor.cast(right, ExpressionType.DOUBLE); + this.scalarFallback = scalarFallback; + this.outValues = new double[this.left.maxVectorSize()]; + this.outNulls = new boolean[this.left.maxVectorSize()]; + } + + @Override + public final ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + final ExprEvalVector lhs = left.evalVector(bindings); + final ExprEvalVector rhs = right.evalVector(bindings); + processVector( + lhs.values(), + rhs.values(), + lhs.getNullVector(), + rhs.getNullVector(), + bindings.getCurrentVectorSize() + ); + return new ExprEvalDoubleVector(outValues, outNulls); + } + + protected abstract void processVector( + double[] leftInput, + double[] rightInput, + @Nullable boolean[] leftNulls, + @Nullable boolean[] rightNulls, + int currentSize + ); + + @Override + public final ExpressionType getOutputType() + { + return ExpressionType.DOUBLE; + } + + @Override + public final int maxVectorSize() + { + return outValues.length; + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleSubProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleSubProcessor.java new file mode 100644 index 000000000000..9f290240bce8 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleDoubleSubProcessor.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoublesFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (double[], double[]) -> double[]} subtraction. The op is hardcoded to + * {@link DoubleVector#sub} so the JIT statically resolves it to the platform's double-subtract intrinsic. + */ +public final class SimdDoubleDoubleSubProcessor extends SimdDoubleDoubleProcessor +{ + public SimdDoubleDoubleSubProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateDoublesFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + double[] leftInput, + double[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = SPECIES.length(); + final int upperBound = SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i); + final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i); + va.sub(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i) + .or(VectorMask.fromArray(SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(SPECIES, rightNulls, i); + } + final DoubleVector va = DoubleVector.fromArray(SPECIES, leftInput, i); + final DoubleVector vb = DoubleVector.fromArray(SPECIES, rightInput, i); + va.sub(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongAddProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongAddProcessor.java new file mode 100644 index 000000000000..5d1eb74f0d96 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongAddProcessor.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoubleLongFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (double[], long[]) -> double[]} addition. The op is hardcoded to + * {@link DoubleVector#add} so the JIT statically resolves it to the platform's double-add intrinsic. + */ +public final class SimdDoubleLongAddProcessor extends SimdDoubleLongProcessor +{ + public SimdDoubleLongAddProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateDoubleLongFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + double[] leftInput, + long[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = DOUBLE_SPECIES.length(); + final int upperBound = DOUBLE_SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i); + final DoubleVector vb = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0); + va.add(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i) + .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i); + } + final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i); + final DoubleVector vb = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0); + va.add(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongMulProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongMulProcessor.java new file mode 100644 index 000000000000..b593799ee262 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongMulProcessor.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoubleLongFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (double[], long[]) -> double[]} multiplication. The op is hardcoded to + * {@link DoubleVector#mul} so the JIT statically resolves it to the platform's double-multiply intrinsic. + */ +public final class SimdDoubleLongMulProcessor extends SimdDoubleLongProcessor +{ + public SimdDoubleLongMulProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateDoubleLongFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + double[] leftInput, + long[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = DOUBLE_SPECIES.length(); + final int upperBound = DOUBLE_SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i); + final DoubleVector vb = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0); + va.mul(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i) + .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i); + } + final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i); + final DoubleVector vb = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0); + va.mul(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongProcessor.java new file mode 100644 index 000000000000..e3e705d656a1 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongProcessor.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorSpecies; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor; +import org.apache.druid.math.expr.vector.ExprEvalDoubleVector; +import org.apache.druid.math.expr.vector.ExprEvalVector; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoubleLongFunction; + +import javax.annotation.Nullable; + +/** + * Abstract base for SIMD processors that compute {@code (double[], long[]) -> double[]} ops. The long lane is + * widened to {@link DoubleVector} via {@code castShape(DoubleVector.SPECIES_PREFERRED, 0)} in each subclass's hot + * loop. See {@link SimdLongLongProcessor} for the design rationale. + */ +abstract class SimdDoubleLongProcessor implements ExprVectorProcessor +{ + static final VectorSpecies LONG_SPECIES = LongVector.SPECIES_PREFERRED; + static final VectorSpecies DOUBLE_SPECIES = DoubleVector.SPECIES_PREFERRED; + + private final ExprVectorProcessor left; + private final ExprVectorProcessor right; + final DoubleBivariateDoubleLongFunction scalarFallback; + final double[] outValues; + final boolean[] outNulls; + + protected SimdDoubleLongProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateDoubleLongFunction scalarFallback + ) + { + this.left = CastToTypeVectorProcessor.cast(left, ExpressionType.DOUBLE); + this.right = CastToTypeVectorProcessor.cast(right, ExpressionType.LONG); + this.scalarFallback = scalarFallback; + this.outValues = new double[this.left.maxVectorSize()]; + this.outNulls = new boolean[this.left.maxVectorSize()]; + } + + @Override + public final ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + final ExprEvalVector lhs = left.evalVector(bindings); + final ExprEvalVector rhs = right.evalVector(bindings); + processVector( + lhs.values(), + rhs.values(), + lhs.getNullVector(), + rhs.getNullVector(), + bindings.getCurrentVectorSize() + ); + return new ExprEvalDoubleVector(outValues, outNulls); + } + + protected abstract void processVector( + double[] leftInput, + long[] rightInput, + @Nullable boolean[] leftNulls, + @Nullable boolean[] rightNulls, + int currentSize + ); + + @Override + public final ExpressionType getOutputType() + { + return ExpressionType.DOUBLE; + } + + @Override + public final int maxVectorSize() + { + return outValues.length; + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongSubProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongSubProcessor.java new file mode 100644 index 000000000000..97da5a718f60 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdDoubleLongSubProcessor.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoubleLongFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (double[], long[]) -> double[]} subtraction. The op is hardcoded to + * {@link DoubleVector#sub} so the JIT statically resolves it to the platform's double-subtract intrinsic. + */ +public final class SimdDoubleLongSubProcessor extends SimdDoubleLongProcessor +{ + public SimdDoubleLongSubProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateDoubleLongFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + double[] leftInput, + long[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = DOUBLE_SPECIES.length(); + final int upperBound = DOUBLE_SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i); + final DoubleVector vb = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0); + va.sub(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i) + .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i); + } + final DoubleVector va = DoubleVector.fromArray(DOUBLE_SPECIES, leftInput, i); + final DoubleVector vb = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, rightInput, i).castShape(DOUBLE_SPECIES, 0); + va.sub(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleAddProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleAddProcessor.java new file mode 100644 index 000000000000..bd077a05a023 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleAddProcessor.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateLongDoubleFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (long[], double[]) -> double[]} addition. The op is hardcoded to + * {@link DoubleVector#add} so the JIT statically resolves it to the platform's double-add intrinsic. + */ +public final class SimdLongDoubleAddProcessor extends SimdLongDoubleProcessor +{ + public SimdLongDoubleAddProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateLongDoubleFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + long[] leftInput, + double[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = DOUBLE_SPECIES.length(); + final int upperBound = DOUBLE_SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final DoubleVector va = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0); + final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i); + va.add(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i) + .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i); + } + final DoubleVector va = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0); + final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i); + va.add(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleMulProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleMulProcessor.java new file mode 100644 index 000000000000..2d211e26b7e1 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleMulProcessor.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateLongDoubleFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (long[], double[]) -> double[]} multiplication. The op is hardcoded to + * {@link DoubleVector#mul} so the JIT statically resolves it to the platform's double-multiply intrinsic. + */ +public final class SimdLongDoubleMulProcessor extends SimdLongDoubleProcessor +{ + public SimdLongDoubleMulProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateLongDoubleFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + long[] leftInput, + double[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = DOUBLE_SPECIES.length(); + final int upperBound = DOUBLE_SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final DoubleVector va = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0); + final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i); + va.mul(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i) + .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i); + } + final DoubleVector va = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0); + final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i); + va.mul(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleProcessor.java new file mode 100644 index 000000000000..366354f82b2e --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleProcessor.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorSpecies; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor; +import org.apache.druid.math.expr.vector.ExprEvalDoubleVector; +import org.apache.druid.math.expr.vector.ExprEvalVector; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateLongDoubleFunction; + +import javax.annotation.Nullable; + +/** + * Abstract base for SIMD processors that compute {@code (long[], double[]) -> double[]} ops. The long lane is + * widened to {@link DoubleVector} via {@code castShape(DoubleVector.SPECIES_PREFERRED, 0)} in each subclass's hot + * loop. See {@link SimdLongLongProcessor} for the design rationale. + */ +abstract class SimdLongDoubleProcessor implements ExprVectorProcessor +{ + static final VectorSpecies LONG_SPECIES = LongVector.SPECIES_PREFERRED; + static final VectorSpecies DOUBLE_SPECIES = DoubleVector.SPECIES_PREFERRED; + + private final ExprVectorProcessor left; + private final ExprVectorProcessor right; + final DoubleBivariateLongDoubleFunction scalarFallback; + final double[] outValues; + final boolean[] outNulls; + + protected SimdLongDoubleProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateLongDoubleFunction scalarFallback + ) + { + this.left = CastToTypeVectorProcessor.cast(left, ExpressionType.LONG); + this.right = CastToTypeVectorProcessor.cast(right, ExpressionType.DOUBLE); + this.scalarFallback = scalarFallback; + this.outValues = new double[this.left.maxVectorSize()]; + this.outNulls = new boolean[this.left.maxVectorSize()]; + } + + @Override + public final ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + final ExprEvalVector lhs = left.evalVector(bindings); + final ExprEvalVector rhs = right.evalVector(bindings); + processVector( + lhs.values(), + rhs.values(), + lhs.getNullVector(), + rhs.getNullVector(), + bindings.getCurrentVectorSize() + ); + return new ExprEvalDoubleVector(outValues, outNulls); + } + + protected abstract void processVector( + long[] leftInput, + double[] rightInput, + @Nullable boolean[] leftNulls, + @Nullable boolean[] rightNulls, + int currentSize + ); + + @Override + public final ExpressionType getOutputType() + { + return ExpressionType.DOUBLE; + } + + @Override + public final int maxVectorSize() + { + return outValues.length; + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleSubProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleSubProcessor.java new file mode 100644 index 000000000000..33c8602cf884 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongDoubleSubProcessor.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateLongDoubleFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (long[], double[]) -> double[]} subtraction. The op is hardcoded to + * {@link DoubleVector#sub} so the JIT statically resolves it to the platform's double-subtract intrinsic. + */ +public final class SimdLongDoubleSubProcessor extends SimdLongDoubleProcessor +{ + public SimdLongDoubleSubProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + DoubleBivariateLongDoubleFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + long[] leftInput, + double[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = DOUBLE_SPECIES.length(); + final int upperBound = DOUBLE_SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final DoubleVector va = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0); + final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i); + va.sub(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i) + .or(VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(DOUBLE_SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(DOUBLE_SPECIES, rightNulls, i); + } + final DoubleVector va = + (DoubleVector) LongVector.fromArray(LONG_SPECIES, leftInput, i).castShape(DOUBLE_SPECIES, 0); + final DoubleVector vb = DoubleVector.fromArray(DOUBLE_SPECIES, rightInput, i); + va.sub(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongAddProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongAddProcessor.java new file mode 100644 index 000000000000..f5e0298af09e --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongAddProcessor.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.LongBivariateLongsFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (long[], long[]) -> long[]} addition. The op is hardcoded to {@link LongVector#add} + * so the JIT statically resolves it to the platform's long-add intrinsic. + */ +public final class SimdLongLongAddProcessor extends SimdLongLongProcessor +{ + public SimdLongLongAddProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + LongBivariateLongsFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + long[] leftInput, + long[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = SPECIES.length(); + final int upperBound = SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final LongVector va = LongVector.fromArray(SPECIES, leftInput, i); + final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i); + va.add(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i) + .or(VectorMask.fromArray(SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(SPECIES, rightNulls, i); + } + final LongVector va = LongVector.fromArray(SPECIES, leftInput, i); + final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i); + va.add(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongMulProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongMulProcessor.java new file mode 100644 index 000000000000..32e8e8aa751b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongMulProcessor.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.LongBivariateLongsFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (long[], long[]) -> long[]} multiplication. The op is hardcoded to + * {@link LongVector#mul} so the JIT statically resolves it to the platform's long-multiply intrinsic. + */ +public final class SimdLongLongMulProcessor extends SimdLongLongProcessor +{ + public SimdLongLongMulProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + LongBivariateLongsFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + long[] leftInput, + long[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = SPECIES.length(); + final int upperBound = SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final LongVector va = LongVector.fromArray(SPECIES, leftInput, i); + final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i); + va.mul(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i) + .or(VectorMask.fromArray(SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(SPECIES, rightNulls, i); + } + final LongVector va = LongVector.fromArray(SPECIES, leftInput, i); + final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i); + va.mul(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongProcessor.java new file mode 100644 index 000000000000..999f4149fac2 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongProcessor.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorSpecies; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor; +import org.apache.druid.math.expr.vector.ExprEvalLongVector; +import org.apache.druid.math.expr.vector.ExprEvalVector; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.LongBivariateLongsFunction; + +import javax.annotation.Nullable; + +/** + * Abstract base for SIMD processors that compute {@code (long[], long[]) -> long[]} ops. Each concrete subclass + * (one per op) overrides {@link #processVector} with a hot loop that calls a statically-resolved {@link LongVector} + * method (e.g. {@code va.add(vb)}) so the JIT emits the corresponding SIMD intrinsic. + */ +abstract class SimdLongLongProcessor implements ExprVectorProcessor +{ + static final VectorSpecies SPECIES = LongVector.SPECIES_PREFERRED; + + private final ExprVectorProcessor left; + private final ExprVectorProcessor right; + final LongBivariateLongsFunction scalarFallback; + final long[] outValues; + final boolean[] outNulls; + + protected SimdLongLongProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + LongBivariateLongsFunction scalarFallback + ) + { + this.left = CastToTypeVectorProcessor.cast(left, ExpressionType.LONG); + this.right = CastToTypeVectorProcessor.cast(right, ExpressionType.LONG); + this.scalarFallback = scalarFallback; + this.outValues = new long[this.left.maxVectorSize()]; + this.outNulls = new boolean[this.left.maxVectorSize()]; + } + + @Override + public final ExprEvalVector evalVector(Expr.VectorInputBinding bindings) + { + final ExprEvalVector lhs = left.evalVector(bindings); + final ExprEvalVector rhs = right.evalVector(bindings); + processVector( + lhs.values(), + rhs.values(), + lhs.getNullVector(), + rhs.getNullVector(), + bindings.getCurrentVectorSize() + ); + return new ExprEvalLongVector(outValues, outNulls); + } + + protected abstract void processVector( + long[] leftInput, + long[] rightInput, + @Nullable boolean[] leftNulls, + @Nullable boolean[] rightNulls, + int currentSize + ); + + @Override + public final ExpressionType getOutputType() + { + return ExpressionType.LONG; + } + + @Override + public final int maxVectorSize() + { + return outValues.length; + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongSubProcessor.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongSubProcessor.java new file mode 100644 index 000000000000..ab85396463dd --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdLongLongSubProcessor.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorMask; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.LongBivariateLongsFunction; + +import java.util.Arrays; + +/** + * SIMD specialization of {@code (long[], long[]) -> long[]} subtraction. The op is hardcoded to {@link LongVector#sub} + * so the JIT statically resolves it to the platform's long-subtract intrinsic. + */ +public final class SimdLongLongSubProcessor extends SimdLongLongProcessor +{ + public SimdLongLongSubProcessor( + ExprVectorProcessor left, + ExprVectorProcessor right, + LongBivariateLongsFunction scalarFallback + ) + { + super(left, right, scalarFallback); + } + + @Override + protected void processVector( + long[] leftInput, + long[] rightInput, + boolean[] leftNulls, + boolean[] rightNulls, + int currentSize + ) + { + final boolean hasLeftNulls = leftNulls != null; + final boolean hasRightNulls = rightNulls != null; + final int laneCount = SPECIES.length(); + final int upperBound = SPECIES.loopBound(currentSize); + int i = 0; + if (!hasLeftNulls && !hasRightNulls) { + for (; i < upperBound; i += laneCount) { + final LongVector va = LongVector.fromArray(SPECIES, leftInput, i); + final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i); + va.sub(vb).intoArray(outValues, i); + } + for (; i < currentSize; i++) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + Arrays.fill(outNulls, 0, currentSize, false); + } else { + for (; i < upperBound; i += laneCount) { + final VectorMask nm; + if (hasLeftNulls && hasRightNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i) + .or(VectorMask.fromArray(SPECIES, rightNulls, i)); + } else if (hasLeftNulls) { + nm = VectorMask.fromArray(SPECIES, leftNulls, i); + } else { + nm = VectorMask.fromArray(SPECIES, rightNulls, i); + } + final LongVector va = LongVector.fromArray(SPECIES, leftInput, i); + final LongVector vb = LongVector.fromArray(SPECIES, rightInput, i); + va.sub(vb).intoArray(outValues, i); + nm.intoArray(outNulls, i); + } + for (; i < currentSize; i++) { + final boolean isNull = (hasLeftNulls && leftNulls[i]) || (hasRightNulls && rightNulls[i]); + outNulls[i] = isNull; + if (!isNull) { + outValues[i] = scalarFallback.process(leftInput[i], rightInput[i]); + } + } + } + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdProcessors.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdProcessors.java new file mode 100644 index 000000000000..d8d74021c7a0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdProcessors.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +import org.apache.druid.error.DruidException; +import org.apache.druid.math.expr.vector.ExprVectorProcessor; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoubleLongFunction; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateDoublesFunction; +import org.apache.druid.math.expr.vector.functional.DoubleBivariateLongDoubleFunction; +import org.apache.druid.math.expr.vector.functional.LongBivariateLongsFunction; + +/** + * Dispatch table from a {@link SimdSupportedBinaryOp} identifier to a concrete, op-specialized SIMD processor. + * One class per op and type-combo so the JIT sees a monomorphic call site for the SIMD operation in each hot loop. + */ +public final class SimdProcessors +{ + private SimdProcessors() + { + } + + public static ExprVectorProcessor makeLongLong( + ExprVectorProcessor left, + ExprVectorProcessor right, + SimdSupportedBinaryOp op, + LongBivariateLongsFunction scalarFallback + ) + { + return switch (op) { + case ADD -> new SimdLongLongAddProcessor(left, right, scalarFallback); + case SUB -> new SimdLongLongSubProcessor(left, right, scalarFallback); + case MUL -> new SimdLongLongMulProcessor(left, right, scalarFallback); + default -> throw DruidException.defensive("Unsupported SIMD binary op[%s]", op); + }; + } + + public static ExprVectorProcessor makeDoubleDouble( + ExprVectorProcessor left, + ExprVectorProcessor right, + SimdSupportedBinaryOp op, + DoubleBivariateDoublesFunction scalarFallback + ) + { + return switch (op) { + case ADD -> new SimdDoubleDoubleAddProcessor(left, right, scalarFallback); + case SUB -> new SimdDoubleDoubleSubProcessor(left, right, scalarFallback); + case MUL -> new SimdDoubleDoubleMulProcessor(left, right, scalarFallback); + default -> throw DruidException.defensive("Unsupported SIMD binary op[%s]", op); + }; + } + + public static ExprVectorProcessor makeLongDouble( + ExprVectorProcessor left, + ExprVectorProcessor right, + SimdSupportedBinaryOp op, + DoubleBivariateLongDoubleFunction scalarFallback + ) + { + return switch (op) { + case ADD -> new SimdLongDoubleAddProcessor(left, right, scalarFallback); + case SUB -> new SimdLongDoubleSubProcessor(left, right, scalarFallback); + case MUL -> new SimdLongDoubleMulProcessor(left, right, scalarFallback); + default -> throw DruidException.defensive("Unsupported SIMD binary op[%s]", op); + }; + } + + public static ExprVectorProcessor makeDoubleLong( + ExprVectorProcessor left, + ExprVectorProcessor right, + SimdSupportedBinaryOp op, + DoubleBivariateDoubleLongFunction scalarFallback + ) + { + return switch (op) { + case ADD -> new SimdDoubleLongAddProcessor(left, right, scalarFallback); + case SUB -> new SimdDoubleLongSubProcessor(left, right, scalarFallback); + case MUL -> new SimdDoubleLongMulProcessor(left, right, scalarFallback); + default -> throw DruidException.defensive("Unsupported SIMD binary op[%s]", op); + }; + } +} diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdSupportedBinaryOp.java b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdSupportedBinaryOp.java new file mode 100644 index 000000000000..953571ba4d36 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/simd/SimdSupportedBinaryOp.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.math.expr.vector.simd; + +/** + * Identifies which binary math operations have a {@code jdk.incubator.vector} (SIMD) specialization. Used by + * {@link org.apache.druid.math.expr.vector.SimpleVectorMathBivariateProcessorFactory} subclasses to declare that + * their operation can be dispatched to a SIMD variant when the user enables + * {@link org.apache.druid.math.expr.ExpressionProcessingConfig#USE_VECTOR_API}. + * + * Deliberately does not reference any {@code jdk.incubator.vector} types so that callers wiring the enum into + * factories do not need the incubator module visible. + */ +public enum SimdSupportedBinaryOp +{ + ADD, + SUB, + MUL +} diff --git a/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java b/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java index 74f2ffc634a3..fc56504b90e9 100644 --- a/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java +++ b/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java @@ -79,7 +79,7 @@ public Sequence run(final QueryPlus queryPlus, final ResponseContext respo { Query query = queryPlus.getQuery(); final int priority = query.context().getPriority(); - final Ordering ordering = query.getResultOrdering(); + final Ordering ordering = query.getResultOrdering(); final QueryPlus threadSafeQueryPlus = queryPlus.withoutThreadUnsafeState(); final QueryContext context = query.context(); @@ -91,7 +91,7 @@ public Sequence run(final QueryPlus queryPlus, final ResponseContext respo @Override public Iterator make() { - // Make it a List<> to materialize all of the values (so that it will submit everything to the executor) + // Make it a List<> to materialize all the values (so that it will submit everything to the executor) List>> futures = Lists.newArrayList( Iterables.transform( diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java index 3235d709eee6..c901b52962fc 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java @@ -40,9 +40,11 @@ import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.virtual.ExpressionPlan; import org.apache.druid.segment.virtual.ExpressionPlanner; import org.apache.druid.segment.virtual.ExpressionSelectors; @@ -347,6 +349,86 @@ public Object combine(@Nullable Object lhs, @Nullable Object rhs) ).value(); } + @Override + public AggregateCombiner makeAggregateCombiner() + { + final ColumnType intermediateType = getIntermediateType(); + // The combiner delegates to combine(), which feeds inputs into combineExpression typed against initialCombineValue. + // If the fold-side intermediate type (what's stored in the segment column) differs from the combine-side type, + // the primitive selector would silently feed wrong-typed values into the expression. Fall through to UOE. + if (!intermediateType.equals(ExpressionType.toColumnType(initialCombineValue.get().type()))) { + return super.makeAggregateCombiner(); + } + if (intermediateType.is(ValueType.LONG)) { + return new LongAggregateCombiner() + { + private long state; + private boolean isNull; + + @Override + public void reset(ColumnValueSelector selector) + { + state = selector.getLong(); + isNull = selector.isNull(); + } + + @Override + public void fold(ColumnValueSelector selector) + { + final Object combined = combine(isNull ? null : state, selector.getObject()); + isNull = combined == null; + state = combined == null ? 0L : ((Number) combined).longValue(); + } + + @Override + public long getLong() + { + return state; + } + + @Override + public boolean isNull() + { + return isNull; + } + }; + } else if (intermediateType.is(ValueType.DOUBLE)) { + return new DoubleAggregateCombiner() + { + private double state; + private boolean isNull; + + @Override + public void reset(ColumnValueSelector selector) + { + state = selector.getDouble(); + isNull = selector.isNull(); + } + + @Override + public void fold(ColumnValueSelector selector) + { + final Object combined = combine(isNull ? null : state, selector.getObject()); + isNull = combined == null; + state = combined == null ? 0.0 : ((Number) combined).doubleValue(); + } + + @Override + public double getDouble() + { + return state; + } + + @Override + public boolean isNull() + { + return isNull; + } + }; + } + return super.makeAggregateCombiner(); + } + @Override public Object deserialize(Object object) { diff --git a/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java index b5f67595ffa4..96668b306886 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java @@ -24,10 +24,13 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableRangeSet; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.io.BaseEncoding; import com.google.common.primitives.Chars; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.segment.filter.LikeFilter; @@ -154,6 +157,20 @@ public Filter toFilter() @Override public RangeSet getDimensionRangeSet(String dimension) { + if (!this.dimension.equals(dimension) || extractionFn != null) { + return null; + } + final LikeDimFilter.LikeMatcher.SuffixMatch suffixMatch = likeMatcher.getSuffixMatch(); + final String prefix = likeMatcher.getPrefix(); + if (suffixMatch == LikeMatcher.SuffixMatch.MATCH_EMPTY) { + // The full pattern was a literal (no wildcards); LIKE acts as equality on `prefix`. + return ImmutableRangeSet.of(Range.singleton(prefix)); + } + if (suffixMatch == LikeMatcher.SuffixMatch.MATCH_ANY) { + // LIKE 'prefix%' matches every string starting with `prefix`; bare LIKE '%' matches everything + return ImmutableRangeSet.of(prefix.isEmpty() ? Range.all() : prefixRange(prefix)); + } + // mid-string wildcards aren't expressible as a single Range. return null; } @@ -197,6 +214,42 @@ public String toString() return builder.appendFilterTuning(filterTuning).build(); } + /** + * Range covering every string that starts with {@code prefix} + */ + public static Range prefixRange(String prefix) + { + if (prefix.isEmpty()) { + throw DruidException.defensive("prefix is empty; use Range.all() explicitly for the match-everything case"); + } + final String successor = lexicographicSuccessor(prefix); + return successor == null ? Range.atLeast(prefix) : Range.closedOpen(prefix, successor); + } + + /** + * Smallest string strictly greater than {@code s} in lexicographic (UTF-16) order: increment the last + * non-{@link Character#MAX_VALUE} char and truncate everything after it. Returns {@code null} when {@code s} + * is a non-empty run of {@code MAX_VALUE} chars and the carry would overflow. + */ + @Nullable + @VisibleForTesting + static String lexicographicSuccessor(String s) + { + if (s.isEmpty()) { + return "\u0000"; + } + final char[] chars = s.toCharArray(); + int i = chars.length - 1; + while (i >= 0 && chars[i] == Character.MAX_VALUE) { + i--; + } + if (i < 0) { + return null; + } + chars[i]++; + return new String(chars, 0, i + 1); + } + public static class LikeMatcher { public enum SuffixMatch diff --git a/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java b/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java index 7f1a37f61e63..f888bf87e473 100644 --- a/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java +++ b/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java @@ -48,6 +48,7 @@ public class SpecificSegmentQueryRunner implements QueryRunner @VisibleForTesting static final String CTX_SET_THREAD_NAME = "setProcessingThreadNames"; + static final boolean DEFAULT_SET_THREAD_NAME_ENABLED = false; public SpecificSegmentQueryRunner( QueryRunner base, @@ -68,7 +69,7 @@ public Sequence run(final QueryPlus input, final ResponseContext responseC ) ); - final boolean setName = input.getQuery().context().getBoolean(CTX_SET_THREAD_NAME, true); + final boolean setName = input.getQuery().context().getBoolean(CTX_SET_THREAD_NAME, DEFAULT_SET_THREAD_NAME_ENABLED); final Query query = queryPlus.getQuery(); diff --git a/processing/src/main/java/org/apache/druid/segment/AsyncCursorHolder.java b/processing/src/main/java/org/apache/druid/segment/AsyncCursorHolder.java index e682f111619e..d980022d3255 100644 --- a/processing/src/main/java/org/apache/druid/segment/AsyncCursorHolder.java +++ b/processing/src/main/java/org/apache/druid/segment/AsyncCursorHolder.java @@ -19,37 +19,19 @@ package org.apache.druid.segment; -import com.google.errorprone.annotations.concurrent.GuardedBy; +import org.apache.druid.collections.ResourceHolder; +import org.apache.druid.common.asyncresource.SettableAsyncResource; import org.apache.druid.error.DruidException; import org.apache.druid.frame.processor.ReturnOrAwait; -import org.apache.druid.java.util.common.Either; -import org.apache.druid.java.util.common.logger.Logger; import javax.annotation.Nullable; -import java.io.Closeable; -import java.util.ArrayList; -import java.util.List; /** * Closeable wrapper around an asynchronously-loaded {@link CursorHolder}, returned by * {@link CursorFactory#makeCursorHolderAsync}. Designed to make lifecycle management safe even when the holder is * still loading: callers receive a single Closeable handle and can register it once with their cleanup machinery, * regardless of where the underlying load is in its lifecycle. - *

- * The hazard this exists to avoid: returning a {@code ListenableFuture} (or similar future-of-Closeable) - * makes correct cleanup error-prone, where canceling the future or letting a caller fail before consuming the future - * can orphan the produced holder, leaking the underlying resources. By exposing a Closeable that internally tracks the - * load and disposes whatever has materialized, callers don't have to write that bookkeeping themselves. - *

- *

Producer protocol

- * Producers feed results in via {@link #set(CursorHolder)} or {@link #setException(Throwable)}, both of which return - * a boolean. If they return {@code false}, this wrapper has already been closed and the producer is responsible for - * closing whatever holder it just produced. - * Producers may pass a {@link Runnable} canceler at construction time which runs on {@link #close()} when the wrapper - * is closed before the {@link #set} has been called, giving the producer an opportunity to abort its work. The canceler - * is best-effort: a producer may have already produced the holder by the time it observes cancellation, in which case - * its {@link #set} call will return false and it must close the holder it tried to set. - *

+ * *

Consumer protocol

* Consumers wait for {@link #isReady()} via {@link #addReadyCallback}, and {@link #release()} to transfer ownership of * the {@link CursorHolder} (or throw the producer exception). Calling {@link #release()} before {@link #isReady()} @@ -70,36 +52,8 @@ * // ... use holder; close it when done (or hand it to a component that owns its lifecycle) ... * } */ -public class AsyncCursorHolder implements Closeable +public class AsyncCursorHolder extends SettableAsyncResource { - private static final Logger LOG = new Logger(AsyncCursorHolder.class); - - /** - * Completed {@link AsyncCursorHolder} backed by an already available {@link CursorHolder} - */ - public static AsyncCursorHolder completed(CursorHolder holder) - { - final AsyncCursorHolder result = new AsyncCursorHolder(null); - result.set(holder); - return result; - } - - @Nullable - private final Runnable canceler; - - @GuardedBy("this") - @Nullable - private CursorHolder result = null; - @GuardedBy("this") - @Nullable - private Throwable error = null; - @GuardedBy("this") - private boolean closed = false; - @GuardedBy("this") - private boolean disposed = false; - @GuardedBy("this") - private final List readyCallbacks = new ArrayList<>(); - /** * @param canceler optional callback invoked from {@link #close()} when the wrapper is closed before the load has * completed ({@link #set} or {@link #setException}). Producers that support cancellation should @@ -108,207 +62,31 @@ public static AsyncCursorHolder completed(CursorHolder holder) */ public AsyncCursorHolder(@Nullable Runnable canceler) { - this.canceler = canceler; - } - - /** - * Allows producer to mark the load successful with the given holder. Returns {@code true} if accepted, {@code false} - * if this wrapper has already been closed, in which case the producer is responsible for closing {@link CursorHolder} - * itself. Throws {@link DruidException} if the load was already completed (from prior calls to this method or - * {@link #setException}). - *

- * Callbacks registered via {@link #addReadyCallback} fire outside the lock to avoid lock-ordering deadlocks and - * unbounded lock holds. - */ - public boolean set(CursorHolder holder) - { - if (holder == null) { - throw DruidException.defensive("CursorHolder cannot be null"); - } - return setInternal(Either.value(holder)); - } - - /** - * Allows producer to mark the load as failed. Returns {@code true} if accepted, {@code false} if this wrapper has - * already been closed (no holder was produced, so there's nothing for the producer to clean up). Throws - * {@link DruidException} if the load was already completed (from prior calls to this method or {@link #set}). - *

- * Callbacks registered via {@link #addReadyCallback} fire outside the lock to avoid lock-ordering deadlocks and - * unbounded lock holds. - */ - public boolean setException(Throwable t) - { - return setInternal(Either.error(t)); - } - - private boolean setInternal(Either value) - { - final List callbacksToFire; - synchronized (this) { - if (closed) { - return false; - } - if (result != null || error != null) { - throw DruidException.defensive("AsyncCursorHolder is already completed"); - } - if (value.isError()) { - error = value.error(); - } else { - result = value.valueOrThrow(); - } - callbacksToFire = drainCallbacks(); - } - fireCallbacks(callbacksToFire); - return true; + super(true); + setCanceler(canceler); } /** - * Whether the load has completed (successfully or with failure). Once true, stays true. Callers that need to wait - * for readiness without blocking the current thread should register a {@link #addReadyCallback} and yield. + * Convenience setter. */ - public synchronized boolean isReady() + public boolean set(CursorHolder cursorHolder) { - return result != null || error != null; + return super.set(cursorHolder, cursorHolder); } - /** - * Take ownership of the underlying {@link CursorHolder}. After this returns, {@link #close()} on this - * {@code AsyncCursorHolder} is a no-op; the caller is responsible for closing the returned holder. Useful when - * passing the holder to another component (e.g. a cursor-lifecycle manager) that takes ownership of it. - *

- * Throws {@link DruidException} if the holder is not yet ready, has already been released, or this wrapper - * has been closed. Wraps and rethrows the failure if the underlying load failed. Does not block; callers must - * check {@link #isReady()} first (typically by yielding via a {@link #addReadyCallback}-driven wait pattern). - */ + @Override // Overridden to change access from protected to public public synchronized CursorHolder release() { - if (closed) { - throw DruidException.defensive("AsyncCursorHolder is already closed"); - } - if (disposed) { - throw DruidException.defensive("AsyncCursorHolder has already been released"); - } - if (error != null) { - // pass through as is - if (error instanceof RuntimeException runtime) { - throw runtime; - } else if (error instanceof Error e) { - throw e; - } - throw DruidException.forPersona(DruidException.Persona.DEVELOPER) - .ofCategory(DruidException.Category.UNCATEGORIZED) - .build(error, error.getMessage()); - } - if (result == null) { - throw DruidException.defensive("AsyncCursorHolder is not ready yet"); - } - final CursorHolder resultToReturn = result; - // clear result so it can be eligible for gc - result = null; - disposed = true; - return resultToReturn; + return super.release(); } /** - * Register a callback to fire when {@link #isReady()} becomes true (whether the load succeeded or failed). If the - * holder is already ready, the callback fires synchronously on the calling thread. Otherwise it fires on whatever - * thread invokes {@link #set} or {@link #setException}, outside the wrapper's lock so the callback may safely - * re-enter the wrapper. Multiple callbacks may be registered; each fires once. - */ - public void addReadyCallback(Runnable callback) - { - final boolean fireImmediately; - synchronized (this) { - if (result != null || error != null) { - fireImmediately = true; - } else { - readyCallbacks.add(callback); - fireImmediately = false; - } - } - if (fireImmediately) { - callback.run(); - } - } - - /** - * Close the wrapper. Safe at any lifecycle point and idempotent: - *

    - *
  • Already-loaded: closes the underlying {@link CursorHolder} immediately.
  • - *
  • Loading in progress: invokes the canceler (if one was supplied at construction). The producer may still - * call {@link #set} / {@link #setException} after this; if the producer wins the race and calls {@code set} - * with a holder, {@code set} returns false and the producer is responsible for closing it.
  • - *
  • Load failed: no-op (no holder was produced).
  • - *
  • Already released: no-op.
  • - *
  • Already closed: throws {@link DruidException}.
  • - *
+ * Completed {@link AsyncCursorHolder} backed by an already available {@link CursorHolder} */ - @Override - public void close() - { - final CursorHolder holderToClose; - final Runnable cancelerToRun; - synchronized (this) { - if (closed) { - throw DruidException.defensive("Already closed"); - } - closed = true; - if (disposed) { - // Ownership was already transferred via release(); the caller manages the holder lifecycle. - return; - } - if (result != null) { - // Result is here and no one has released it; we close it. - disposed = true; - holderToClose = result; - cancelerToRun = null; - } else if (error != null) { - // Load already failed; nothing to dispose. - holderToClose = null; - cancelerToRun = null; - } else { - // Load not yet completed; signal cancellation to the producer (if any). - holderToClose = null; - cancelerToRun = canceler; - } - } - if (holderToClose != null) { - try { - holderToClose.close(); - } - catch (Throwable ignored) { - // Best-effort cleanup - } - } - if (cancelerToRun != null) { - try { - cancelerToRun.run(); - } - catch (Throwable t) { - // Best-effort cancel - LOG.warn(t, "AsyncCursorHolder canceler exception"); - } - } - } - - @GuardedBy("this") - private List drainCallbacks() - { - final List snapshot = List.copyOf(readyCallbacks); - readyCallbacks.clear(); - return snapshot; - } - - private static void fireCallbacks(List callbacks) + public static AsyncCursorHolder completed(CursorHolder holder) { - for (Runnable cb : callbacks) { - try { - cb.run(); - } - catch (Throwable t) { - // Best-effort; one bad callback shouldn't break others. - LOG.warn(t, "AsyncCursorHolder callback exception"); - } - } + final AsyncCursorHolder result = new AsyncCursorHolder(null); + result.set(ResourceHolder.fromCloseable(holder)); + return result; } } diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java index 1283b4475b44..50c68de30c35 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java @@ -630,7 +630,7 @@ protected Metadata makeProjections( final String section2 = "build projection[" + projectionSchema.getName() + "] inverted index and columns"; progress.startSection(section2); - segmentFileBuilder.startFileGroup(projectionSchema.getName()); + segmentFileBuilder.startFileBundle(projectionSchema.getName()); if (projectionSchema.getTimeColumnName() != null) { makeTimeColumn( segmentFileBuilder, diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java index 91d2661841ec..28d9ee56345c 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java @@ -218,7 +218,7 @@ protected File makeIndexFiles( /************ Create Inverted Indexes and Finalize Build Columns *************/ final String section = "build inverted index and columns"; progress.startSection(section); - v10Smoosher.startFileGroup(Projections.BASE_TABLE_PROJECTION_NAME); + v10Smoosher.startFileBundle(Projections.BASE_TABLE_PROJECTION_NAME); makeTimeColumn(v10Smoosher, progress, timeWriter, indexSpec, basePrefix + ColumnHolder.TIME_COLUMN_NAME); makeMetricsColumns( v10Smoosher, diff --git a/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java b/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java index c622ec756418..5db4914a464a 100644 --- a/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java +++ b/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java @@ -28,6 +28,7 @@ import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.data.CompressionStrategy; import org.apache.druid.segment.loading.SegmentRangeReader; import org.apache.druid.utils.CloseableUtils; @@ -83,7 +84,13 @@ */ public class PartialSegmentFileMapperV10 implements SegmentFileMapper { - static final String METADATA_HEADER_SUFFIX = ".header"; + private static final Logger LOG = new Logger(PartialSegmentFileMapperV10.class); + + /** + * Suffix appended to the target filename to form the local header file. Public so cache-manager components can + * recognize the partial-download on-disk layout during bootstrap restore and reservation cleanup. + */ + public static final String METADATA_HEADER_SUFFIX = ".header"; /** * Create (or restore) a lazy mapper for the main segment file with attached external file mappers. If persisted state @@ -146,9 +153,16 @@ static PartialSegmentFileMapperV10 createForFile( bitmapBuffer = mmapBitmap(headerFile, result); } catch (Exception e) { - // corrupted file (partial write, truncated bitmap, bad JSON, etc.) — delete and re-fetch + // corrupted file (partial write, truncated bitmap, bad JSON, etc.), delete and re-fetch result = null; - headerFile.delete(); + if (!headerFile.delete()) { + LOG.warn( + e, + "Failed to delete corrupted header file[%s] for [%s]; will be overwritten by re-fetch", + headerFile, + targetFilename + ); + } } } @@ -167,7 +181,32 @@ static PartialSegmentFileMapperV10 createForFile( bitmapBuffer ); - // restore downloaded files from the bitmap + // bitmap-vs-container repair pre-pass: if the bitmap claims a file is downloaded but its container file is + // missing on disk, the bitmap is lying (e.g. partial-cache eviction that cleared containers but couldn't atomically + // clear bits, or external file-system damage). Clear those bits before the restore loop so we don't spuriously + // sparse-allocate empty containers in the restore loop's ensureContainerInitialized call and treat their files as + // downloaded. + for (int i = 0; i < mapper.sortedFileNames.size(); i++) { + final int byteIndex = i / 8; + final int bitMask = 1 << (i % 8); + if ((bitmapBuffer.get(byteIndex) & bitMask) == 0) { + continue; + } + final String name = mapper.sortedFileNames.get(i); + final SegmentInternalFileMetadata fileMetadata = result.getMetadata().getFiles().get(name); + if (fileMetadata == null) { + continue; + } + final File containerFile = new File( + localCacheDir, + StringUtils.format("%s.container.%05d", targetFilename, fileMetadata.getContainer()) + ); + if (!containerFile.exists()) { + bitmapBuffer.put(byteIndex, (byte) (bitmapBuffer.get(byteIndex) & ~bitMask)); + } + } + + // restore downloaded files from the (now-repaired) bitmap for (int i = 0; i < mapper.sortedFileNames.size(); i++) { final int byteIndex = i / 8; final int bitIndex = i % 8; @@ -249,6 +288,57 @@ public SegmentFileMetadata getSegmentFileMetadata() return metadata; } + /** + * Names of the external segment files attached to this mapper (each one is its own {@link PartialSegmentFileMapperV10} + * accessible via {@link #getExternalMapper}). Empty for mappers with no externals. + */ + public Set getExternalFilenames() + { + return externalMappers.keySet(); + } + + /** + * Look up the child mapper for an external segment file. Returns {@code null} if no external with that name is + * attached. Cache-layer callers use this to walk external files' {@link SegmentFileMetadata} and route + * {@link #initializeContainer} / {@link #evictContainer} calls to the right physical file. + */ + @Nullable + public PartialSegmentFileMapperV10 getExternalMapper(String externalFilename) + { + return externalMappers.get(externalFilename); + } + + /** + * Resolve {@code this} when {@code externalFilename} is null (main file), otherwise the named external child + * mapper. Throws if the external is not attached. Useful for routing container operations from cache-layer code + * that holds {@code (externalFilename, containerIndex)} refs. + */ + public PartialSegmentFileMapperV10 mapperForContainer(@Nullable String externalFilename) + { + if (externalFilename == null) { + return this; + } + final PartialSegmentFileMapperV10 external = externalMappers.get(externalFilename); + if (external == null) { + throw DruidException.defensive( + "External mapper[%s] is not attached to this mapper for [%s]", + externalFilename, + targetFilename + ); + } + return external; + } + + /** + * The {@code targetFilename} this mapper writes/reads to/from inside the cache directory. For the entry-point + * mapper this is e.g. {@link org.apache.druid.segment.IndexIO#V10_FILE_NAME}; for an external child mapper it's + * the external file's name. + */ + public String getTargetFilename() + { + return targetFilename; + } + @Override public Set getInternalFilenames() { @@ -290,8 +380,8 @@ public ByteBuffer mapExternalFile(String filename, String name) throws IOExcepti /** * Pre-download a set of internal files so that subsequent {@link #mapFile(String)} calls for these files will not - * trigger individual downloads. Files that are already downloaded are skipped. This is useful for batch-downloading - * all files for a projection at once. + * trigger individual downloads. Files that are already downloaded are skipped. Useful for batch-downloading all + * files in a bundle at once (see {@link SegmentFileBuilder#startFileBundle}). */ public void ensureFilesAvailable(Set fileNames) throws IOException { @@ -303,6 +393,27 @@ public void ensureFilesAvailable(Set fileNames) throws IOException } } + /** + * Total on-disk size of the header file(s) backing this mapper, summed across the main file and any external file + * mappers. This is the actual reservation size that should be charged against the local cache once the metadata has + * been fetched and persisted; callers can compare it against an up-front pessimistic estimate to decide whether to + * shrink the reservation. + */ + public long getOnDiskHeaderSize() + { + long total = headerFileSize(localCacheDir, targetFilename); + for (PartialSegmentFileMapperV10 ext : externalMappers.values()) { + total += headerFileSize(ext.localCacheDir, ext.targetFilename); + } + return total; + } + + private static long headerFileSize(File dir, String filename) + { + final File header = new File(dir, filename + METADATA_HEADER_SUFFIX); + return header.exists() ? header.length() : 0; + } + /** * Total bytes downloaded so far across all internal files, including external mappers. */ @@ -384,6 +495,104 @@ private void ensureFileDownloaded(String name, SegmentInternalFileMetadata fileM } } + /** + * Public entry point for cache-layer code that wants to ensure a container is materialized before any data is + * downloaded into it (e.g. when a per-bundle cache entry is mounted, the entry pre-allocates its container files + * so that subsequent {@link #mapFile} calls have somewhere to write into and the cache layer can charge the + * reservation up front). + */ + public void initializeContainer(int containerIndex) throws IOException + { + checkClosed(); + ensureContainerInitialized(containerIndex); + } + + /** + * Reverse of {@link #initializeContainer(int)}: unmap the in-memory view of the container, delete the local + * container file, and clear the bitmap bits + {@link #downloadedFiles} entries for every internal file that lived + * in this container. + *

+ * Used by per-bundle cache entries on unmount/eviction to release the disk and memory footprint of one bundle + * without affecting other bundles sharing the same {@link PartialSegmentFileMapperV10}. After eviction, subsequent + * {@link #mapFile} calls for files in this container will re-trigger downloads via {@link #initializeContainer} + * and the bitmap will be repopulated incrementally. + *

+ * Concurrency contract. The caller is responsible for ensuring no concurrent {@link #mapFile} (or + * {@link #ensureFilesAvailable}) call is in flight for any file in this container. This is enforced one layer up + * by the cache-entry refcount: {@code PartialSegmentBundleCacheEntry} only invokes {@code evictContainer} from its + * {@code doActualUnmount} callback, which fires only after every reference acquired via {@code acquireReference()} + * has been closed. Bypassing that gate is dangerous, {@link ByteBufferUtils#unmap} frees the off-heap mapping, so a + * {@link ByteBuffer#slice} from a concurrent reader is a JVM SIGSEGV, not a recoverable error. + *

+ * No-op if the container has not been initialized. + */ + public void evictContainer(int containerIndex) + { + checkClosed(); + containerLocks[containerIndex].lock(); + try { + final MappedByteBuffer existing = containers[containerIndex]; + if (existing != null) { + ByteBufferUtils.unmap(existing); + containers[containerIndex] = null; + } + // Try the cached containerFiles[i] first. If it's null, the container was never initialized in this mapper + // instance (typical right after create() with an empty bitmap), but the on-disk file may still exist from a + // previous run. Fall back to the deterministic path so eviction is always effective. + File containerFile = containerFiles[containerIndex]; + if (containerFile == null) { + containerFile = new File( + localCacheDir, + StringUtils.format("%s.container.%05d", targetFilename, containerIndex) + ); + } + if (containerFile.exists() && !containerFile.delete()) { + LOG.warn( + "Failed to delete container file[%s] during eviction of container[%d] for [%s]; leaking on disk", + containerFile, + containerIndex, + targetFilename + ); + } + containerFiles[containerIndex] = null; + } + finally { + containerLocks[containerIndex].unlock(); + } + + // clear bitmap bits + downloadedFiles entries for files that lived in this container. Iterates + // metadata.getFiles() without external synchronization: SegmentFileMetadata is constructed once at mapper + // creation and its file map is effectively immutable for the mapper's lifetime, so concurrent iteration is safe. + for (Map.Entry entry : metadata.getFiles().entrySet()) { + if (entry.getValue().getContainer() != containerIndex) { + continue; + } + final String fileName = entry.getKey(); + if (downloadedFiles.remove(fileName)) { + downloadedBytes.addAndGet(-entry.getValue().getSize()); + } + clearBitmapBit(fileName); + } + } + + private void clearBitmapBit(String name) + { + final Integer index = fileNameToIndex.get(name); + if (index == null) { + return; + } + final int byteIndex = index / 8; + final int bitMask = 1 << (index % 8); + bitmapLock.lock(); + try { + final byte existing = bitmapBuffer.get(byteIndex); + bitmapBuffer.put(byteIndex, (byte) (existing & ~bitMask)); + } + finally { + bitmapLock.unlock(); + } + } + /** * Initialize a local container file if not already done. Creates a sparse file at the original container size * and memory-maps it. The channel is closed immediately after mapping, the mmap persists independently, backed by diff --git a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java index 6d5aea47374c..d589213f6054 100644 --- a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java @@ -41,25 +41,33 @@ */ public interface SegmentFileBuilder extends Closeable { + /** + * Default bundle name for containers written without an explicit {@link #startFileBundle} call. Thinking of file + * bundles as directories, this is the root directory that sits above any named subdirectories the writer declares. + * Containers always carry a non-null bundle name; if the writer never calls {@code startFileBundle}, they are + * tagged with this default. Cache-layer readers treat all containers sharing this name as one mount/evict unit. + */ + String ROOT_BUNDLE_NAME = "__root__"; + /** * Add a column to the metadata of this segment file */ void addColumn(String name, ColumnDescriptor columnDescriptor); /** - * Declare that subsequent writes belong to a named group of files that should be stored together. This is a hint + * Declare that subsequent writes belong to a named bundle of files that should be stored together. This is a hint * about physical layout, it does not constrain the names of files subsequently added, and implementations are free * to ignore it entirely (the default is a no-op for formats that don't organize data into coarse-grained * groupings). Projections are the primary caller today, but the mechanism is generic, it's equally applicable to * grouping internal metadata, data shared across columns, etc. *

- * Callers should invoke this before writing each group's files; passing {@code null} clears the current group. - * Callers should not invoke this while a writer returned by {@link #addWithChannel} is still open (implementations - * may reject such calls). + * Callers should invoke this before writing each bundle's files; passing {@code null} resets the current bundle to + * the {@link #ROOT_BUNDLE_NAME} default. Callers should not invoke this while a writer returned by + * {@link #addWithChannel} is still open (implementations may reject such calls). * - * @see SegmentFileBuilderV10#startFileGroup(String) for the V10 semantics + * @see SegmentFileBuilderV10#startFileBundle(String) for the V10 semantics */ - default void startFileGroup(@Nullable String groupName) + default void startFileBundle(@Nullable String bundleName) { } diff --git a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java index 0b17960aa573..ba4a79cb0c8c 100644 --- a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java +++ b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java @@ -50,7 +50,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.TreeMap; /** @@ -61,20 +60,21 @@ * V10 file format: * | version (byte) | meta compression (byte) | meta length (int) | meta json | container 0 | ... | container n | *

- * Containers are scoped to at most one declared file group. Callers declare which group they are writing via - * {@link #startFileGroup(String)} before writing its files; a new container is started when the declared group - * changes or the current container would exceed {@link #maxContainerSize}. A group whose total size exceeds the max - * container size spans multiple containers, all tagged with the same group. This gives readers a clean 1:1 (or 1:N) - * mapping between groups and containers, which supports per-group partial loading without any read-side reorganization. - * Projections are the primary caller today, but the mechanism is equally usable for other organizational needs - * (shared data across columns, internal metadata, etc.). + * Containers are scoped to exactly one declared bundle. Callers declare which bundle they are writing via + * {@link #startFileBundle(String)} before writing its files; a new container is started when the declared bundle + * changes or the current container would exceed {@link #maxContainerSize}. A bundle whose total size exceeds the max + * container size spans multiple containers, all tagged with the same bundle. This gives readers a clean 1:1 (or 1:N) + * mapping between bundles and containers, which supports per-bundle partial loading without any read-side + * reorganization. Projections are the primary caller today, but the mechanism is equally usable for other + * organizational needs (shared data across columns, internal metadata, etc.). *

- * Callers that never invoke {@link #startFileGroup(String)} are mapped to a null-group container. + * Callers that never invoke {@link #startFileBundle(String)} have all writes tagged with the + * {@link SegmentFileBuilder#ROOT_BUNDLE_NAME} default bundle. *

* Much of the logic here was ported from {@link org.apache.druid.java.util.common.io.smoosh.FileSmoosher} of the V9 * format and there is a fair bit of overlap. In fact, the initial implementation of this class wrapped a V9 smoosher * to build the files before combining them into the V10 format. The main difference is that V9 fills each container to - * the max while here we organize with file groups. + * the max while here we organize with bundles. */ public class SegmentFileBuilderV10 implements SegmentFileBuilder { @@ -115,8 +115,8 @@ public static SegmentFileBuilderV10 create(ObjectMapper jsonMapper, File baseDir // Nested addWithChannel calls (for example a serializer that, while being written, emits sub-files for its own // columnar parts) can't write into the current container concurrently with the outer writer. These nested writes are // redirected to temporary files and merged back into container(s) once the outer writer completes. Each entry - // carries the file group that was active when the delegate was created so that the merge routes it into the - // correct container even if the active group has since changed. + // carries the bundle that was active when the delegate was created so that the merge routes it into the correct + // container even if the active bundle has since changed. private final List completedDelegates = new ArrayList<>(); private final List inProgressDelegates = new ArrayList<>(); private long delegateFileCounter = 0; @@ -124,11 +124,11 @@ public static SegmentFileBuilderV10 create(ObjectMapper jsonMapper, File baseDir @Nullable private ContainerWriter currentContainer = null; private boolean writerCurrentlyInUse = false; - // The file group declared by the most recent {@link #startFileGroup} call. Writes are routed into containers - // tagged with this group. Remains {@code null} if the caller never declares one, in which case all writes share - // a single null-group container. - @Nullable - private String currentFileGroup = null; + + /** + * The bundle declared by the most recent {@link #startFileBundle} call + */ + private String currentBundle = SegmentFileBuilder.ROOT_BUNDLE_NAME; @Nullable private String interval = null; @@ -189,7 +189,7 @@ public SegmentFileChannel addWithChannel(final String name, final long size) thr if (internalFiles.containsKey(name)) { throw new IAE("Cannot add files of the same name, already have [%s]", name); } - ensureNameMatchesActiveGroup(name); + ensureNameMatchesActiveBundle(name); if (size > maxContainerSize) { throw DruidException.forPersona(DruidException.Persona.ADMIN) .ofCategory(DruidException.Category.RUNTIME_FAILURE) @@ -207,7 +207,7 @@ public SegmentFileChannel addWithChannel(final String name, final long size) thr return delegateChannel(name, size); } - ensureContainer(currentFileGroup, size); + ensureContainer(currentBundle, size); final ContainerWriter target = currentContainer; final long startOffset = target.currOffset; writerCurrentlyInUse = true; @@ -284,59 +284,69 @@ public SegmentFileBuilder getExternalBuilder(String externalFile) { return externalSegmentFileBuilders.computeIfAbsent( externalFile, - (k) -> new SegmentFileBuilderV10(jsonMapper, externalFile, baseDir, maxContainerSize, metadataCompression) + (k) -> { + final SegmentFileBuilderV10 fresh = + new SegmentFileBuilderV10(jsonMapper, externalFile, baseDir, maxContainerSize, metadataCompression); + // A late-attached external inherits the parent's currently-active bundle on creation only; subsequent + // bundle changes flow through the parent's startFileBundle broadcast. Re-applying on every fetch would + // close the external's in-progress container, since V10 bundles cannot currently be re-entered. + if (!SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(currentBundle)) { + fresh.startFileBundle(currentBundle); + } + return fresh; + } ); } @Override public void addColumn(String name, ColumnDescriptor columnDescriptor) { - ensureNameMatchesActiveGroup(name); + ensureNameMatchesActiveBundle(name); this.columns.put(name, columnDescriptor); } /** - * If a file group is currently active (set by the most recent {@link #startFileGroup} call), enforce that names of - * files and columns added under it are prefixed by {@code groupName + "/"}. Prevents silent collisions where two - * groups write a file/column of the same bare name and the second silently overwrites the first in the metadata - * maps. Existing production callers (e.g. {@code IndexMergerV10} via - * {@code Projections.getProjectionSegmentInternalFileName}) already construct prefixed names, so this is a no-op - * for them; it catches new writers that forget the convention. + * If a named bundle is currently active (set by the most recent {@link #startFileBundle} call to a non-root value), + * enforce that names of files and columns added under it are prefixed by {@code bundleName + "/"}. The root bundle + * is unconstrained. */ - private void ensureNameMatchesActiveGroup(String name) + private void ensureNameMatchesActiveBundle(String name) { - if (currentFileGroup != null && !name.startsWith(currentFileGroup + "/")) { + if (!SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(currentBundle) && !name.startsWith(currentBundle + "/")) { throw DruidException.defensive( - "Name[%s] must start with the active file group prefix[%s/]", + "Name[%s] must start with the active bundle prefix[%s/]", name, - currentFileGroup + currentBundle ); } } /** - * Declare the file group that subsequent writes belong to. Writes are routed into a container tagged with the - * declared group; a new container is rolled when the group changes or the incoming file won't fit. A group whose - * total size exceeds {@link #maxContainerSize} is split across multiple consecutive containers, all tagged with - * the same group. Passing {@code null} clears the current group; subsequent writes are then routed into a - * null-group container until the next call. + * Declare the bundle that subsequent writes belong to. Writes are routed into a container tagged with the declared + * bundle; a new container is rolled when the bundle changes or the incoming file won't fit. A bundle whose total + * size exceeds {@link #maxContainerSize} is split across multiple consecutive containers, all tagged with the same + * bundle. Passing {@code null} resets to {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}; subsequent writes are then + * routed into a root-bundle container until the next call. *

* Current V10-specific limitations worth knowing: *

    - *
  • Groups cannot be re-entered. Once a different group (or {@code null}) has been declared, the previous - * group's container is closed, and you cannot go back and append more files to it, any such writes would - * open a fresh container for the re-declared group, so the group's files would end up in non-contiguous - * containers. If all of a group's files must land in the same container(s), write them contiguously.
  • + *
  • Bundles cannot be re-entered. Once a different bundle has been declared the previous bundle's container is + * closed, and you cannot go back and append more files to it; any such writes would open a fresh container + * for the re-declared bundle, so the bundle's files would end up in non-contiguous containers. If all of a + * bundle's files must land in the same container(s), write them contiguously.
  • *
  • Throws if called while a writer returned by {@link #addWithChannel} is still open.
  • *
*/ @Override - public void startFileGroup(@Nullable String groupName) + public void startFileBundle(@Nullable String bundleName) { if (writerCurrentlyInUse) { - throw DruidException.defensive("Cannot start file group[%s] while a writer is in progress", groupName); + throw DruidException.defensive("Cannot start file bundle[%s] while a writer is in progress", bundleName); + } + this.currentBundle = bundleName == null ? SegmentFileBuilder.ROOT_BUNDLE_NAME : bundleName; + for (SegmentFileBuilderV10 externalFile : externalSegmentFileBuilders.values()) { + externalFile.startFileBundle(bundleName); } - this.currentFileGroup = groupName; } public void addInterval(String interval) @@ -464,35 +474,35 @@ private List buildContainerMetadata() long offset = 0; for (ContainerWriter container : containers) { final long length = container.file.length(); - result.add(new SegmentFileContainerMetadata(offset, length, container.group)); + result.add(new SegmentFileContainerMetadata(offset, length, container.bundle)); offset += length; } return result; } /** - * Ensure that {@link #currentContainer} is ready to accept {@code size} bytes of a file belonging to {@code group}. + * Ensure that {@link #currentContainer} is ready to accept {@code size} bytes of a file belonging to {@code bundle}. * Rolls the current container and starts a new one when: *
    *
  • there is no current container, or
  • - *
  • the current container is for a different group, or
  • + *
  • the current container is for a different bundle, or
  • *
  • the current container cannot fit the incoming bytes within {@link #maxContainerSize}.
  • *
*/ - private void ensureContainer(@Nullable String group, long size) throws IOException + private void ensureContainer(String bundle, long size) throws IOException { if (currentContainer == null - || !Objects.equals(currentContainer.group, group) + || !currentContainer.bundle.equals(bundle) || !currentContainer.canFit(size)) { if (currentContainer != null) { currentContainer.close(); } - currentContainer = openNewContainer(group); + currentContainer = openNewContainer(bundle); containers.add(currentContainer); } } - private ContainerWriter openNewContainer(@Nullable String group) throws IOException + private ContainerWriter openNewContainer(String bundle) throws IOException { FileUtils.mkdirp(baseDir); final int fileNum = containers.size(); @@ -500,7 +510,7 @@ private ContainerWriter openNewContainer(@Nullable String group) throws IOExcept baseDir, StringUtils.format("%s-%05d.container", outputFileName, fileNum) ); - return new ContainerWriter(fileNum, containerFile, group, maxContainerSize); + return new ContainerWriter(fileNum, containerFile, bundle, maxContainerSize); } private SegmentFileChannel delegateChannel(final String name, final long size) throws IOException @@ -509,9 +519,9 @@ private SegmentFileChannel delegateChannel(final String name, final long size) t // cannot collide, since main and external always have distinct output file names. final String delegateName = StringUtils.format("%s-delegate-%d", outputFileName, delegateFileCounter++); final File tmpFile = new File(baseDir, delegateName); - // Snapshot the active group now so that if this delegate is merged after the outer writer has advanced past - // the group it was created under, it still routes into the correct container. - final DelegateEntry entry = new DelegateEntry(tmpFile, name, currentFileGroup); + // Snapshot the active bundle now so that if this delegate is merged after the outer writer has advanced past + // the bundle it was created under, it still routes into the correct container. + final DelegateEntry entry = new DelegateEntry(tmpFile, name, currentBundle); inProgressDelegates.add(entry); return new SegmentFileChannel() @@ -576,9 +586,9 @@ public void close() throws IOException /** * Move completed delegate temp files into containers by replaying them as regular {@link #add} calls. Only called - * when no outer writer is currently holding the builder. Each entry's snapshotted group is restored as - * {@link #currentFileGroup} during its replay so the file lands in the container that was active when the - * nested write was originally requested, not whichever group happens to be active at merge time. + * when no outer writer is currently holding the builder. Each entry's snapshotted bundle is restored as + * {@link #currentBundle} during its replay so the file lands in the container that was active when the nested + * write was originally requested, not whichever bundle happens to be active at merge time. */ private void mergeDelegatedFiles() throws IOException { @@ -587,10 +597,10 @@ private void mergeDelegatedFiles() throws IOException } final List toProcess = new ArrayList<>(completedDelegates); completedDelegates.clear(); - final String savedGroup = currentFileGroup; + final String savedBundle = currentBundle; try { for (DelegateEntry entry : toProcess) { - currentFileGroup = entry.group; + currentBundle = entry.bundle; add(entry.name, entry.file); if (!entry.file.delete()) { LOG.warn("Unable to delete delegate file[%s]", entry.file); @@ -598,33 +608,32 @@ private void mergeDelegatedFiles() throws IOException } } finally { - currentFileGroup = savedGroup; + currentBundle = savedBundle; } } - private record DelegateEntry(File file, String name, @Nullable String group) + private record DelegateEntry(File file, String name, String bundle) { } /** - * Low-level writer for a single container chunk file. One container holds internal files from at most one group. + * Low-level writer for a single container chunk file. One container holds internal files from exactly one bundle. */ private static class ContainerWriter implements GatheringByteChannel { private final int fileNum; private final File file; - @Nullable - private final String group; + private final String bundle; private final long maxSize; private final Closer closer = Closer.create(); private final GatheringByteChannel channel; private long currOffset = 0; - ContainerWriter(int fileNum, File file, @Nullable String group, long maxSize) throws IOException + ContainerWriter(int fileNum, File file, String bundle, long maxSize) throws IOException { this.fileNum = fileNum; this.file = file; - this.group = group; + this.bundle = bundle; this.maxSize = maxSize; final FileOutputStream outStream = closer.register(new FileOutputStream(file)); this.channel = closer.register(outStream.getChannel()); @@ -675,9 +684,9 @@ public void close() throws IOException closer.close(); if (LOG.isDebugEnabled()) { LOG.debug( - "Created container file[%s] for group[%s] of size[%,d] bytes.", + "Created container file[%s] for bundle[%s] of size[%,d] bytes.", file.getAbsolutePath(), - group, + bundle, file.length() ); } diff --git a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java index 3739eb7718b8..45005a5a9f4d 100644 --- a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java +++ b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.annotations.SuppressFBWarnings; import javax.annotation.Nullable; import java.util.Objects; @@ -30,30 +31,29 @@ * Starting offset and size of a 'container' stored in a V10 segment file; think the V10 equivalent of V9's external * 'smoosh' files, e.g. 00000.smoosh. *

- * Each container holds internal files belonging to at most one named file group, as declared at write time via - * {@link SegmentFileBuilder#startFileGroup}. The {@link #fileGroup} field records that name so readers can attribute - * a container to its group without parsing internal-file names. The field is {@code null} for containers written - * without a {@code startFileGroup} call (or with {@code startFileGroup(null)}), and for containers from segments - * produced by writers that pre-date this field; null serializes as a Jackson-omitted property so old segments - * round-trip unchanged. + * Each container holds internal files belonging to exactly one named bundle, as declared at write time via + * {@link SegmentFileBuilder#startFileBundle}. The {@link #bundle} field records that name so readers can attribute a + * container to its bundle without parsing internal-file names. Containers written without an explicit + * {@code startFileBundle} call are tagged with {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}; that default value is + * omitted from JSON output, so segments produced by writers pre-dating this field deserialize cleanly (missing + * property normalizes to the default in the constructor). */ public class SegmentFileContainerMetadata { private final long startOffset; private final long size; - @Nullable - private final String fileGroup; + private final String bundle; @JsonCreator public SegmentFileContainerMetadata( @JsonProperty("startOffset") long startOffset, @JsonProperty("size") long size, - @JsonProperty("fileGroup") @Nullable String fileGroup + @JsonProperty("bundle") @Nullable String bundle ) { this.startOffset = startOffset; this.size = size; - this.fileGroup = fileGroup; + this.bundle = bundle == null ? SegmentFileBuilder.ROOT_BUNDLE_NAME : bundle; } @JsonProperty @@ -69,11 +69,10 @@ public long getSize() } @JsonProperty - @JsonInclude(JsonInclude.Include.NON_NULL) - @Nullable - public String getFileGroup() + @JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = DefaultBundleFilter.class) + public String getBundle() { - return fileGroup; + return bundle; } @Override @@ -88,13 +87,13 @@ public boolean equals(Object o) SegmentFileContainerMetadata that = (SegmentFileContainerMetadata) o; return startOffset == that.startOffset && size == that.size - && Objects.equals(fileGroup, that.fileGroup); + && Objects.equals(bundle, that.bundle); } @Override public int hashCode() { - return Objects.hash(startOffset, size, fileGroup); + return Objects.hash(startOffset, size, bundle); } @Override @@ -103,7 +102,32 @@ public String toString() return "SegmentFileContainerMetadata{" + "startOffset=" + startOffset + ", size=" + size - + ", fileGroup=" + fileGroup + + ", bundle=" + bundle + '}'; } + + /** + * Jackson {@code valueFilter} that omits the {@code bundle} field from JSON when it carries the + * {@link SegmentFileBuilder#ROOT_BUNDLE_NAME} default. Jackson invokes {@code equals(value)} against the filter + * instance with the property value (a {@link String} here, not another filter): returning {@code true} means + * "value equals default, omit it." The asymmetric equals contract is intentional and required by Jackson's filter + * API, so the standard same-class check would defeat the mechanism. + */ + static final class DefaultBundleFilter + { + + @Override + @SuppressWarnings("EqualsDoesntCheckParameterClass") + @SuppressFBWarnings("EQ_CHECK_FOR_OPERAND_NOT_COMPATIBLE_WITH_THIS") + public boolean equals(Object value) + { + return SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(value); + } + + @Override + public int hashCode() + { + return 0; + } + } } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/NoopRowIngestionMeters.java b/processing/src/main/java/org/apache/druid/segment/incremental/NoopRowIngestionMeters.java index 67eb80a2dc8b..6b7786d5605f 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/NoopRowIngestionMeters.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/NoopRowIngestionMeters.java @@ -23,9 +23,8 @@ import java.util.Map; /** - * This class is used only in {@code DartFrameContext}. - * - * Consider using {@link RowIngestionMetersFactory} instead. + * This class is used when stats are not needed. Consider using {@link RowIngestionMetersFactory} instead + * when stats are needed. */ public class NoopRowIngestionMeters implements RowIngestionMeters { diff --git a/processing/src/main/java/org/apache/druid/segment/transform/Transformer.java b/processing/src/main/java/org/apache/druid/segment/transform/Transformer.java index 2ff263a64738..04186bd70145 100644 --- a/processing/src/main/java/org/apache/druid/segment/transform/Transformer.java +++ b/processing/src/main/java/org/apache/druid/segment/transform/Transformer.java @@ -72,26 +72,46 @@ public class Transformer @Nullable public InputRow transform(@Nullable final InputRow row) { - if (row == null) { + final InputRow transformedRow = transformWithoutFilter(row); + if (!rowMatchesFilter(transformedRow)) { return null; } + return transformedRow; + } - final InputRow transformedRow; + @Nullable + public InputRow transformWithoutFilter(@Nullable final InputRow row) + { + if (row == null) { + return null; + } if (transforms.isEmpty()) { - transformedRow = row; + return row; } else { - transformedRow = new TransformedInputRow(row, transforms); + return new TransformedInputRow(row, transforms); } + } - if (valueMatcher != null) { - rowSupplierForValueMatcher.set(transformedRow); - if (!valueMatcher.matches(false)) { - return null; - } + /** + * Returns true if this transformer has a {@link TransformSpec} filter to apply via {@link #rowMatchesFilter}. + */ + public boolean hasFilter() + { + return valueMatcher != null; + } + + /** + * Applies the {@link TransformSpec} filter to a row that has already had transforms applied. + */ + public boolean rowMatchesFilter(@Nullable final InputRow transformedRow) + { + if (transformedRow == null || valueMatcher == null) { + return true; } - return transformedRow; + rowSupplierForValueMatcher.set(transformedRow); + return valueMatcher.matches(false); } @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/transform/TransformingInputEntityReader.java b/processing/src/main/java/org/apache/druid/segment/transform/TransformingInputEntityReader.java index 33bed4658691..945168cd5dd3 100644 --- a/processing/src/main/java/org/apache/druid/segment/transform/TransformingInputEntityReader.java +++ b/processing/src/main/java/org/apache/druid/segment/transform/TransformingInputEntityReader.java @@ -30,17 +30,40 @@ public class TransformingInputEntityReader implements InputEntityReader { private final InputEntityReader delegate; private final Transformer transformer; + private final boolean applyFilter; - public TransformingInputEntityReader(InputEntityReader delegate, Transformer transformer) + public TransformingInputEntityReader(final InputEntityReader delegate, final Transformer transformer) + { + this(delegate, transformer, true); + } + + public static TransformingInputEntityReader withoutFilter( + final InputEntityReader delegate, + final Transformer transformer + ) + { + return new TransformingInputEntityReader(delegate, transformer, false); + } + + private TransformingInputEntityReader( + final InputEntityReader delegate, + final Transformer transformer, + final boolean applyFilter + ) { this.delegate = delegate; this.transformer = transformer; + this.applyFilter = applyFilter; } @Override public CloseableIterator read() throws IOException { - return delegate.read().map(transformer::transform); + if (applyFilter) { + return delegate.read().map(transformer::transform); + } else { + return delegate.read().map(transformer::transformWithoutFilter); + } } @Override diff --git a/processing/src/test/java/org/apache/druid/common/asyncresource/CollectAsyncResourceTest.java b/processing/src/test/java/org/apache/druid/common/asyncresource/CollectAsyncResourceTest.java new file mode 100644 index 000000000000..40ce3e9b4bc8 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/common/asyncresource/CollectAsyncResourceTest.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.asyncresource; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +public class CollectAsyncResourceTest +{ + @Test + public void testEmptyListCompletesImmediatelyWithEmptyResult() + { + final List> sources = List.of(); + final AsyncResource> collected = AsyncResources.collect(sources); + + Assertions.assertTrue(collected.isReady(), "collect of an empty list must be immediately ready"); + Assertions.assertEquals(List.of(), collected.get()); + collected.close(); + } + + @Test + public void testReadyOnlyAfterAllSourcesReadyAndPreservesOrder() + { + final SettableAsyncResource a = new SettableAsyncResource<>(); + final SettableAsyncResource b = new SettableAsyncResource<>(); + final SettableAsyncResource c = new SettableAsyncResource<>(); + final List> sources = List.of(a, b, c); + final AsyncResource> collected = AsyncResources.collect(sources); + + Assertions.assertFalse(collected.isReady()); + + // Complete out of order; the result must still be in source order. + c.set("c", null); + Assertions.assertFalse(collected.isReady()); + a.set("a", null); + Assertions.assertFalse(collected.isReady()); + b.set("b", null); + + Assertions.assertTrue(collected.isReady()); + Assertions.assertEquals(List.of("a", "b", "c"), collected.get()); + collected.close(); + } + + @Test + public void testAlreadyReadySourcesCompleteOnConstruction() + { + final SettableAsyncResource a = new SettableAsyncResource<>(); + final SettableAsyncResource b = new SettableAsyncResource<>(); + a.set("a", null); + b.set("b", null); + + final List> sources = List.of(a, b); + final AsyncResource> collected = AsyncResources.collect(sources); + + Assertions.assertTrue(collected.isReady()); + Assertions.assertEquals(List.of("a", "b"), collected.get()); + collected.close(); + } + + @Test + public void testSourceFailurePropagates() + { + final SettableAsyncResource a = new SettableAsyncResource<>(); + final SettableAsyncResource b = new SettableAsyncResource<>(); + final List> sources = List.of(a, b); + final AsyncResource> collected = AsyncResources.collect(sources); + + final RuntimeException failure = new RuntimeException("boom"); + a.set("a", null); + b.setException(failure); + + Assertions.assertTrue(collected.isReady()); + final RuntimeException thrown = Assertions.assertThrows(RuntimeException.class, collected::get); + Assertions.assertSame(failure, thrown); + collected.close(); + } + + @Test + public void testCloseClosesAllReadySources() + { + final AtomicInteger aClose = new AtomicInteger(); + final AtomicInteger bClose = new AtomicInteger(); + final SettableAsyncResource a = new SettableAsyncResource<>(); + final SettableAsyncResource b = new SettableAsyncResource<>(); + a.set("a", aClose::incrementAndGet); + b.set("b", bClose::incrementAndGet); + + final List> sources = List.of(a, b); + final AsyncResource> collected = AsyncResources.collect(sources); + Assertions.assertTrue(collected.isReady()); + + collected.close(); + Assertions.assertEquals(1, aClose.get()); + Assertions.assertEquals(1, bClose.get()); + } + + @Test + public void testCloseCancelsPendingSources() + { + final AtomicInteger aCancel = new AtomicInteger(); + final SettableAsyncResource a = new SettableAsyncResource<>(); + a.setCanceler(aCancel::incrementAndGet); + + final List> sources = List.of(a); + final AsyncResource> collected = AsyncResources.collect(sources); + Assertions.assertFalse(collected.isReady()); + + collected.close(); + Assertions.assertEquals(1, aCancel.get(), "closing the collect must cancel pending sources"); + } +} diff --git a/processing/src/test/java/org/apache/druid/common/asyncresource/RecoverAsyncResourceTest.java b/processing/src/test/java/org/apache/druid/common/asyncresource/RecoverAsyncResourceTest.java new file mode 100644 index 000000000000..9a3c695de61b --- /dev/null +++ b/processing/src/test/java/org/apache/druid/common/asyncresource/RecoverAsyncResourceTest.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.asyncresource; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +public class RecoverAsyncResourceTest +{ + @Test + public void testSourceSuccessPassesThroughAndDoesNotRecover() + { + final TrackingAsyncResource source = new TrackingAsyncResource<>(); + final AtomicInteger recoveryCalls = new AtomicInteger(); + + final AsyncResource recovering = AsyncResources.recover( + source, + e -> { + recoveryCalls.incrementAndGet(); + return "fallback"; + } + ); + + source.delegate.set("value", null); + + Assertions.assertTrue(recovering.isReady()); + Assertions.assertEquals("value", recovering.get()); + Assertions.assertEquals(0, recoveryCalls.get(), "recovery must not be called on success"); + Assertions.assertEquals(0, source.closeCount.get(), "source must not be closed before close()"); + + recovering.close(); + Assertions.assertEquals(1, source.closeCount.get(), "close() must close the source"); + } + + @Test + public void testSourceFailureRecoversWithFallbackValue() + { + final TrackingAsyncResource source = new TrackingAsyncResource<>(); + final AtomicReference recoveryArg = new AtomicReference<>(); + + final AsyncResource recovering = AsyncResources.recover( + source, + e -> { + recoveryArg.set(e); + return "fallback"; + } + ); + + final RuntimeException failure = new RuntimeException("boom"); + source.delegate.setException(failure); + + Assertions.assertTrue(recovering.isReady()); + Assertions.assertEquals("fallback", recovering.get()); + Assertions.assertSame(failure, recoveryArg.get(), "recovery receives the source's error"); + Assertions.assertEquals( + 1, + source.closeCount.get(), + "source must be closed eagerly during recovery, before the wrapper is closed" + ); + + // Closing the wrapper does not double-close the source. + recovering.close(); + Assertions.assertEquals(1, source.closeCount.get()); + } + + @Test + public void testSourceFailureWithNullRecoveryPropagatesError() + { + final TrackingAsyncResource source = new TrackingAsyncResource<>(); + + final AsyncResource recovering = AsyncResources.recover( + source, + e -> null + ); + + final RuntimeException failure = new RuntimeException("boom"); + source.delegate.setException(failure); + + Assertions.assertTrue(recovering.isReady()); + final RuntimeException thrown = Assertions.assertThrows(RuntimeException.class, recovering::get); + Assertions.assertSame(failure, thrown); + Assertions.assertEquals(0, source.closeCount.get(), "source must not be closed until close() when not recovering"); + + recovering.close(); + Assertions.assertEquals(1, source.closeCount.get()); + } + + @Test + public void testRecoveryThrowsPropagatesWithOriginalSuppressed() + { + final TrackingAsyncResource source = new TrackingAsyncResource<>(); + final RuntimeException recoveryFailure = new RuntimeException("recovery failed"); + + final AsyncResource recovering = AsyncResources.recover( + source, + e -> { + throw recoveryFailure; + } + ); + + final RuntimeException original = new RuntimeException("boom"); + source.delegate.setException(original); + + Assertions.assertTrue(recovering.isReady()); + final RuntimeException thrown = Assertions.assertThrows(RuntimeException.class, recovering::get); + Assertions.assertSame(original, thrown); + Assertions.assertEquals(1, thrown.getSuppressed().length); + Assertions.assertSame(recoveryFailure, thrown.getSuppressed()[0]); + } + + @Test + public void testCloseBeforeReadyClosesSource() + { + final TrackingAsyncResource source = new TrackingAsyncResource<>(); + + final AsyncResource recovering = AsyncResources.recover( + source, + e -> "fallback" + ); + + // Close the wrapper while the source is still pending. + recovering.close(); + Assertions.assertEquals(1, source.closeCount.get()); + Assertions.assertFalse(recovering.isReady()); + + // Completing the source after close is a no-op and must not throw. + Assertions.assertFalse(source.delegate.set("late", null)); + } + + /** + * An {@link AsyncResource} that delegates to a {@link SettableAsyncResource} and counts {@link #close()} calls, + * so tests can verify how the source resource's lifecycle is managed. + */ + private static class TrackingAsyncResource implements AsyncResource + { + private final SettableAsyncResource delegate = new SettableAsyncResource<>(); + private final AtomicInteger closeCount = new AtomicInteger(); + + @Override + public boolean isReady() + { + return delegate.isReady(); + } + + @Override + public void addReadyCallback(Runnable callback) + { + delegate.addReadyCallback(callback); + } + + @Override + public T get() + { + return delegate.get(); + } + + @Override + public void close() + { + closeCount.incrementAndGet(); + delegate.close(); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/common/asyncresource/SettableAsyncResourceTest.java b/processing/src/test/java/org/apache/druid/common/asyncresource/SettableAsyncResourceTest.java new file mode 100644 index 000000000000..43def411a362 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/common/asyncresource/SettableAsyncResourceTest.java @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.asyncresource; + +import org.apache.druid.collections.ResourceHolder; +import org.apache.druid.error.DruidException; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; + +public class SettableAsyncResourceTest +{ + @Test + public void testSetThenGetAndReady() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + Assertions.assertFalse(resource.isReady()); + + Assertions.assertTrue(resource.set("value", null)); + Assertions.assertTrue(resource.isReady()); + Assertions.assertEquals("value", resource.get()); + } + + @Test + public void testGetBeforeReadyThrows() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + Assertions.assertThrows(DruidException.class, resource::get); + } + + @Test + public void testSetNullObjectThrows() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + Assertions.assertThrows(DruidException.class, () -> resource.set(null, null)); + } + + @Test + public void testCloseClosesTheResourceExactlyOnce() + { + final AtomicInteger closeCount = new AtomicInteger(); + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.set("value", closeCount::incrementAndGet); + + resource.close(); + Assertions.assertEquals(1, closeCount.get()); + } + + @Test + public void testSetWithResourceHolder() + { + final AtomicInteger closeCount = new AtomicInteger(); + final TrackedCloseable closeable = new TrackedCloseable("value", closeCount); + final SettableAsyncResource resource = new SettableAsyncResource<>(); + Assertions.assertTrue(resource.set(ResourceHolder.fromCloseable(closeable))); + + Assertions.assertSame(closeable, resource.get()); + resource.close(); + Assertions.assertEquals(1, closeCount.get()); + } + + @Test + public void testDoubleSetThrows() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.set("value", null); + Assertions.assertThrows(DruidException.class, () -> resource.set("again", null)); + } + + @Test + public void testSetAfterExceptionThrows() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.setException(new RuntimeException("boom")); + Assertions.assertThrows(DruidException.class, () -> resource.set("value", null)); + } + + @Test + public void testSetExceptionUncheckedRethrownAsIs() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + final RuntimeException failure = new RuntimeException("boom"); + resource.setException(failure); + + Assertions.assertTrue(resource.isReady()); + final RuntimeException thrown = Assertions.assertThrows(RuntimeException.class, resource::get); + Assertions.assertSame(failure, thrown); + } + + @Test + public void testSetExceptionCheckedWrappedInDruidException() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.setException(new IOException("boom")); + + Assertions.assertTrue(resource.isReady()); + Assertions.assertThrows(DruidException.class, resource::get); + } + + @Test + public void testReadyCallbackFiresImmediatelyWhenAlreadyReady() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.set("value", null); + + final AtomicInteger fired = new AtomicInteger(); + resource.addReadyCallback(fired::incrementAndGet); + Assertions.assertEquals(1, fired.get()); + } + + @Test + public void testReadyCallbackFiresOnSet() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + + final AtomicInteger fired = new AtomicInteger(); + resource.addReadyCallback(fired::incrementAndGet); + Assertions.assertEquals(0, fired.get()); + + resource.set("value", null); + Assertions.assertEquals(1, fired.get()); + } + + @Test + public void testReadyCallbackFiresOnSetException() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + + final AtomicInteger fired = new AtomicInteger(); + resource.addReadyCallback(fired::incrementAndGet); + resource.setException(new RuntimeException("boom")); + Assertions.assertEquals(1, fired.get()); + } + + @Test + public void testAddReadyCallbackAfterCloseThrows() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.close(); + Assertions.assertThrows(DruidException.class, () -> resource.addReadyCallback(() -> {})); + } + + @Test + public void testCloseInNewStateRunsCanceler() + { + final AtomicInteger canceled = new AtomicInteger(); + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.setCanceler(canceled::incrementAndGet); + + resource.close(); + Assertions.assertEquals(1, canceled.get()); + } + + @Test + public void testCancelerNotRunOnceReady() + { + final AtomicInteger canceled = new AtomicInteger(); + final AtomicInteger closeCount = new AtomicInteger(); + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.setCanceler(canceled::incrementAndGet); + + // Becoming ready clears the canceler; close() must then close the resource, not cancel. + resource.set("value", closeCount::incrementAndGet); + resource.close(); + + Assertions.assertEquals(0, canceled.get(), "canceler must not run once the resource is ready"); + Assertions.assertEquals(1, closeCount.get()); + } + + @Test + public void testSetCancelerTwiceThrows() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.setCanceler(() -> {}); + Assertions.assertThrows(DruidException.class, () -> resource.setCanceler(() -> {})); + } + + @Test + public void testSetAfterCloseReturnsFalseAndDoesNotCloseTheResource() + { + final AtomicInteger closeCount = new AtomicInteger(); + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.close(); + + // Producer racing with close: set() returns false, and the resource is NOT closed by the wrapper, so the + // producer remains responsible for closing it. + Assertions.assertFalse(resource.set("value", closeCount::incrementAndGet)); + Assertions.assertEquals(0, closeCount.get()); + } + + @Test + public void testDoubleCloseThrows() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.set("value", null); + resource.close(); + Assertions.assertThrows(DruidException.class, resource::close); + } + + @Test + public void testAwaitReturnsValueWhenReady() throws InterruptedException + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.set("value", null); + Assertions.assertEquals("value", resource.await()); + } + + @Test + public void testAwaitWithTimeoutReturnsValueWhenReady() throws Exception + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + resource.set("value", null); + Assertions.assertEquals("value", resource.await(1000)); + } + + @Test + public void testAwaitWithTimeoutThrowsWhenNotReady() + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + Assertions.assertThrows(TimeoutException.class, () -> resource.await(10)); + } + + @Test + public void testAwaitWakesUpWhenSetFromAnotherThread() throws Exception + { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + final Thread setter = new Thread(() -> resource.set("value", null)); + setter.start(); + Assertions.assertEquals("value", resource.await()); + setter.join(); + } + + private static class TrackedCloseable implements java.io.Closeable + { + private final String value; + private final AtomicInteger closeCount; + + TrackedCloseable(final String value, final AtomicInteger closeCount) + { + this.value = value; + this.closeCount = closeCount; + } + + @Override + public String toString() + { + return value; + } + + @Override + public void close() + { + closeCount.incrementAndGet(); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/common/asyncresource/TransformAsyncResourceTest.java b/processing/src/test/java/org/apache/druid/common/asyncresource/TransformAsyncResourceTest.java new file mode 100644 index 000000000000..2a066eef7fcd --- /dev/null +++ b/processing/src/test/java/org/apache/druid/common/asyncresource/TransformAsyncResourceTest.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.asyncresource; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.Closeable; +import java.util.concurrent.atomic.AtomicInteger; + +public class TransformAsyncResourceTest +{ + @Test + public void testTransformAppliesFunction() + { + final SettableAsyncResource source = new SettableAsyncResource<>(); + final AsyncResource transformed = AsyncResources.transform(source, i -> "v" + i); + + Assertions.assertFalse(transformed.isReady()); + source.set(42, null); + + Assertions.assertTrue(transformed.isReady()); + Assertions.assertEquals("v42", transformed.get()); + transformed.close(); + } + + @Test + public void testTransformOfAlreadyReadySourceFiresImmediately() + { + final SettableAsyncResource source = new SettableAsyncResource<>(); + source.set(7, null); + + final AsyncResource transformed = AsyncResources.transform(source, i -> "v" + i); + Assertions.assertTrue(transformed.isReady()); + Assertions.assertEquals("v7", transformed.get()); + transformed.close(); + } + + @Test + public void testFunctionCalledOnceAndLazily() + { + final AtomicInteger calls = new AtomicInteger(); + final SettableAsyncResource source = new SettableAsyncResource<>(); + final AsyncResource transformed = AsyncResources.transform( + source, + i -> { + calls.incrementAndGet(); + return "v" + i; + } + ); + + Assertions.assertEquals(0, calls.get(), "function must not run before the source is ready"); + source.set(1, null); + Assertions.assertEquals(1, calls.get()); + + // Repeated get() must not re-run the function. + transformed.get(); + transformed.get(); + Assertions.assertEquals(1, calls.get()); + transformed.close(); + } + + @Test + public void testSourceFailurePropagatesAndFunctionNotCalled() + { + final AtomicInteger calls = new AtomicInteger(); + final SettableAsyncResource source = new SettableAsyncResource<>(); + final AsyncResource transformed = AsyncResources.transform( + source, + i -> { + calls.incrementAndGet(); + return "v" + i; + } + ); + + final RuntimeException failure = new RuntimeException("boom"); + source.setException(failure); + + Assertions.assertTrue(transformed.isReady()); + final RuntimeException thrown = Assertions.assertThrows(RuntimeException.class, transformed::get); + Assertions.assertSame(failure, thrown); + Assertions.assertEquals(0, calls.get(), "function must not run when the source failed"); + transformed.close(); + } + + @Test + public void testFunctionThrowsPropagates() + { + final RuntimeException failure = new RuntimeException("function boom"); + final SettableAsyncResource source = new SettableAsyncResource<>(); + final AsyncResource transformed = AsyncResources.transform( + source, + i -> { + throw failure; + } + ); + + source.set(1, null); + + Assertions.assertTrue(transformed.isReady()); + final RuntimeException thrown = Assertions.assertThrows(RuntimeException.class, transformed::get); + Assertions.assertSame(failure, thrown); + transformed.close(); + } + + @Test + public void testCloseClosesSourceButNotTransformedValue() + { + final AtomicInteger sourceClose = new AtomicInteger(); + final AtomicInteger targetClose = new AtomicInteger(); + final SettableAsyncResource source = new SettableAsyncResource<>(); + + // The function's result is Closeable, but transform must NOT manage its lifecycle (it only closes the source). + final Closeable target = targetClose::incrementAndGet; + final AsyncResource transformed = AsyncResources.transform(source, i -> target); + + source.set(1, sourceClose::incrementAndGet); + Assertions.assertTrue(transformed.isReady()); + Assertions.assertSame(target, transformed.get()); + + transformed.close(); + Assertions.assertEquals(1, sourceClose.get(), "the source resource must be closed"); + Assertions.assertEquals(0, targetClose.get(), "the transformed value must not be closed by the transform"); + } + + @Test + public void testCloseBeforeReadyCancelsSource() + { + final AtomicInteger sourceCancel = new AtomicInteger(); + final SettableAsyncResource source = new SettableAsyncResource<>(); + source.setCanceler(sourceCancel::incrementAndGet); + + final AsyncResource transformed = AsyncResources.transform(source, i -> "v" + i); + Assertions.assertFalse(transformed.isReady()); + + transformed.close(); + Assertions.assertEquals(1, sourceCancel.get()); + } +} diff --git a/processing/src/test/java/org/apache/druid/data/input/ResourceInputSource.java b/processing/src/test/java/org/apache/druid/data/input/ResourceInputSource.java index b052d5058480..1ecdaf0500b4 100644 --- a/processing/src/test/java/org/apache/druid/data/input/ResourceInputSource.java +++ b/processing/src/test/java/org/apache/druid/data/input/ResourceInputSource.java @@ -119,8 +119,13 @@ public URI getUri() @Override public InputStream open() throws IOException { - final InputStream resourceStream = classLoader.getResourceAsStream(resourceFile); - return CompressionUtils.decompress(resourceStream, resourceFile); + return CompressionUtils.decompress(openRaw(), resourceFile); + } + + @Override + public InputStream openRaw() + { + return classLoader.getResourceAsStream(resourceFile); } } } diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/CloudObjectInputSourceTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/CloudObjectInputSourceTest.java index e0daac8c7e99..a0ac99f14461 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/CloudObjectInputSourceTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/CloudObjectInputSourceTest.java @@ -19,22 +19,33 @@ package org.apache.druid.data.input.impl; +import com.google.common.base.Predicates; import com.google.common.collect.Lists; import org.apache.druid.data.input.InputEntity; +import org.apache.druid.data.input.InputFilePointer; import org.apache.druid.data.input.InputSplit; import org.apache.druid.data.input.MaxSizeSplitHintSpec; +import org.apache.druid.data.input.impl.systemfield.SystemField; import org.apache.druid.data.input.impl.systemfield.SystemFields; import org.apache.druid.java.util.common.parsers.JSONPathSpec; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.ArgumentMatchers; import org.mockito.Mockito; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; import java.net.URI; +import java.nio.charset.StandardCharsets; import java.nio.file.FileSystems; +import java.nio.file.Files; import java.nio.file.PathMatcher; import java.nio.file.Paths; import java.util.Arrays; import java.util.Collections; +import java.util.EnumSet; import java.util.Iterator; import java.util.List; import java.util.stream.Collectors; @@ -226,6 +237,110 @@ public void testWithObjects() Assertions.assertEquals(OBJECTS.size(), entities.size()); } + @Test + public void test_asFilePointers_withUris() + { + final CloudObjectInputSource inputSource = Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings() + .useConstructor(SCHEME, URIS2, null, null, null, SystemFields.none()) + .defaultAnswer(Mockito.CALLS_REAL_METHODS) + ); + Mockito.when(inputSource.getSplitWidget()).thenReturn(new MockSplitWidget(123L)); + + final List pointers = inputSource.asFilePointers(); + + Assertions.assertEquals( + URIS2, + pointers.stream().map(InputFilePointer::uri).collect(Collectors.toList()) + ); + + // sizeSupplier defers to the split widget. + Assertions.assertEquals(123L, pointers.get(0).sizeSupplier().getAsLong()); + } + + @Test + public void test_asFilePointers_withObjects() + { + final CloudObjectInputSource inputSource = Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings() + .useConstructor(SCHEME, null, null, OBJECTS_BEFORE_GLOB, null, SystemFields.none()) + .defaultAnswer(Mockito.CALLS_REAL_METHODS) + ); + Mockito.when(inputSource.getSplitWidget()).thenReturn(new MockSplitWidget()); + + final List pointers = inputSource.asFilePointers(); + + Assertions.assertEquals( + OBJECTS_BEFORE_GLOB.stream().map(object -> object.toUri(SCHEME)).collect(Collectors.toList()), + pointers.stream().map(InputFilePointer::uri).collect(Collectors.toList()) + ); + } + + @Test + public void test_asFilePointers_withObjectGlob() + { + final CloudObjectInputSource inputSource = Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings() + .useConstructor(SCHEME, null, null, OBJECTS_BEFORE_GLOB, "**.csv", SystemFields.none()) + .defaultAnswer(Mockito.CALLS_REAL_METHODS) + ); + Mockito.when(inputSource.getSplitWidget()).thenReturn(new MockSplitWidget()); + + final List pointers = inputSource.asFilePointers(); + + // Only the .csv object survives the glob; the .parquet object is filtered out. + Assertions.assertEquals( + URIS, + pointers.stream().map(InputFilePointer::uri).collect(Collectors.toList()) + ); + } + + @Test + public void test_asFilePointers_withSystemFieldsReturnsNull() + { + final CloudObjectInputSource inputSource = Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings() + .useConstructor(SCHEME, URIS, null, null, null, new SystemFields(EnumSet.of(SystemField.URI))) + .defaultAnswer(Mockito.CALLS_REAL_METHODS) + ); + + // System fields cannot be expressed as file pointers. + Assertions.assertNull(inputSource.asFilePointers()); + } + + @Test + public void test_asFilePointers_withPrefixesReturnsNull() + { + final CloudObjectInputSource inputSource = Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings() + .useConstructor(SCHEME, null, PREFIXES, null, null, SystemFields.none()) + .defaultAnswer(Mockito.CALLS_REAL_METHODS) + ); + + // Prefixes are not expressed as file pointers. + Assertions.assertNull(inputSource.asFilePointers()); + } + + @Test + public void test_asFilePointers_populatorFetchesContent(@TempDir File tempDir) throws IOException + { + final byte[] content = "hello,world\n1,2\n".getBytes(StandardCharsets.UTF_8); + + final InputEntity entity = Mockito.mock(InputEntity.class); + Mockito.when(entity.openRaw()).thenReturn(new ByteArrayInputStream(content)); + Mockito.when(entity.getRetryCondition()).thenReturn(Predicates.alwaysFalse()); + + final CloudObjectInputSource inputSource = Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings() + .useConstructor(SCHEME, URIS, null, null, null, SystemFields.none()) + .defaultAnswer(Mockito.CALLS_REAL_METHODS) + ); + Mockito.when(inputSource.getSplitWidget()).thenReturn(new MockSplitWidget()); + Mockito.doReturn(entity).when(inputSource).createEntity(ArgumentMatchers.any()); + + final List pointers = inputSource.asFilePointers(); + Assertions.assertEquals(1, pointers.size()); + + final File dstFile = new File(tempDir, "fetched.csv"); + pointers.get(0).populator().populate(dstFile); + + Assertions.assertArrayEquals(content, Files.readAllBytes(dstFile.toPath())); + } + @Test public void testGlobSubdirectories() { @@ -240,6 +355,18 @@ public void testGlobSubdirectories() private static class MockSplitWidget implements CloudObjectSplitWidget { + private final long objectSize; + + MockSplitWidget() + { + this(0); + } + + MockSplitWidget(final long objectSize) + { + this.objectSize = objectSize; + } + @Override public Iterator getDescriptorIteratorForPrefixes(List prefixes) { @@ -249,7 +376,7 @@ public Iterator getDescriptorIteratorForPrefixes(List pre @Override public long getObjectSize(CloudObjectLocation descriptor) { - return 0; + return objectSize; } } } diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/FileEntityTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/FileEntityTest.java new file mode 100644 index 000000000000..315c942bc45e --- /dev/null +++ b/processing/src/test/java/org/apache/druid/data/input/impl/FileEntityTest.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.data.input.impl; + +import org.apache.commons.io.IOUtils; +import org.apache.druid.data.input.InputEntity; +import org.apache.druid.data.input.InputEntity.CleanableFile; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.utils.CompressionUtils; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; + +public class FileEntityTest +{ + private static final String CONTENT = "the quick brown fox\n"; + + @TempDir + File tempDir; + + @Test + public void test_openRaw_returnsRawBytesWithoutDecompressing() throws IOException + { + final File gzFile = gzipFile("data.txt.gz", CONTENT); + try (InputStream in = new FileEntity(gzFile).openRaw()) { + Assertions.assertArrayEquals(Files.readAllBytes(gzFile.toPath()), IOUtils.toByteArray(in)); + } + } + + @Test + public void test_open_decompressesBasedOnFileName() throws IOException + { + final File gzFile = gzipFile("data.txt.gz", CONTENT); + try (InputStream in = new FileEntity(gzFile).open()) { + Assertions.assertEquals(CONTENT, IOUtils.toString(in, StandardCharsets.UTF_8)); + } + } + + @Test + public void test_open_returnsContentForUncompressedFile() throws IOException + { + final File file = writeFile("data.txt", CONTENT); + try (InputStream in = new FileEntity(file).open()) { + Assertions.assertEquals(CONTENT, IOUtils.toString(in, StandardCharsets.UTF_8)); + } + } + + @Test + public void test_fetch_uncompressedReturnsSameFileWithoutCopying() throws IOException + { + final File file = writeFile("data.txt", CONTENT); + final File fetchDir = makeDir("fetch-uncompressed"); + try (CleanableFile fetched = new FileEntity(file).fetch(fetchDir, fetchBuffer())) { + Assertions.assertEquals(file, fetched.file()); + } + // close() is a no-op for uncompressed files; the original file remains. + Assertions.assertTrue(file.exists()); + } + + @Test + public void test_fetch_compressedDecompressesIntoTempDirAndCleansUp() throws IOException + { + final File gzFile = gzipFile("data.txt.gz", CONTENT); + final File fetchDir = makeDir("fetch-compressed"); + final File fetchedFile; + try (CleanableFile fetched = new FileEntity(gzFile).fetch(fetchDir, fetchBuffer())) { + fetchedFile = fetched.file(); + Assertions.assertEquals(fetchDir, fetchedFile.getParentFile()); + Assertions.assertEquals(CONTENT, StringUtils.fromUtf8(Files.readAllBytes(fetchedFile.toPath()))); + } + // close() removes the decompressed temp file; the original compressed file remains. + Assertions.assertFalse(fetchedFile.exists()); + Assertions.assertTrue(gzFile.exists()); + } + + private File writeFile(final String name, final String content) throws IOException + { + final File file = new File(tempDir, name); + Files.write(file.toPath(), StringUtils.toUtf8(content)); + return file; + } + + private File gzipFile(final String name, final String content) throws IOException + { + final File source = writeFile("source-for-" + name, content); + final File gzFile = new File(tempDir, name); + CompressionUtils.gzip(source, gzFile); + return gzFile; + } + + private File makeDir(final String name) throws IOException + { + final File dir = new File(tempDir, name); + Files.createDirectories(dir.toPath()); + return dir; + } + + private static byte[] fetchBuffer() + { + return new byte[InputEntity.DEFAULT_FETCH_BUFFER_SIZE]; + } +} diff --git a/processing/src/test/java/org/apache/druid/guice/StartupInjectorBuilderTest.java b/processing/src/test/java/org/apache/druid/guice/StartupInjectorBuilderTest.java index ece5e78cbf72..c8264ec85231 100644 --- a/processing/src/test/java/org/apache/druid/guice/StartupInjectorBuilderTest.java +++ b/processing/src/test/java/org/apache/druid/guice/StartupInjectorBuilderTest.java @@ -215,6 +215,34 @@ public void testValidator_acceptsHttpServerViewType() new StartupInjectorBuilder().withExtensions().withProperties(props).build(); } + @Test + public void testValidator_rejectsRemoteIndexerRunnerType() + { + final Properties props = new Properties(); + props.setProperty(StartupInjectorBuilder.INDEXER_RUNNER_TYPE_CONFIG_STRING, "remote"); + + final StartupInjectorBuilder builder = new StartupInjectorBuilder().withExtensions().withProperties(props); + + Throwable t = Assertions.assertThrows(ISE.class, builder::build); + Assertions.assertEquals( + "Invalid value[remote] for property[druid.indexer.runner.type]. The ZooKeeper-based" + + " 'remote' task runner has been removed. Remove this property to use the default" + + " 'httpRemote' runner (or set it to 'local' for single-process testing). See the" + + " Druid upgrade notes for details.", + t.getMessage() + ); + } + + @Test + public void testValidator_acceptsHttpRemoteIndexerRunnerType() + { + final Properties props = new Properties(); + props.setProperty(StartupInjectorBuilder.INDEXER_RUNNER_TYPE_CONFIG_STRING, "httpRemote"); + + // Should not throw + new StartupInjectorBuilder().withExtensions().withProperties(props).build(); + } + @Test public void verifyInjectorBuild_withDeletedConfig_throwsException() { diff --git a/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyTest.java b/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyTest.java index 7f21e489f3d8..9ffe5da3ace8 100644 --- a/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyTest.java +++ b/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyTest.java @@ -67,7 +67,7 @@ public class VectorExprResultConsistencyTest extends InitializedNullHandlingTest { private static final Logger log = new Logger(VectorExprResultConsistencyTest.class); private static final int NUM_ITERATIONS = 10; - private static final int VECTOR_SIZE = 4; + private static final List VECTOR_SIZES = List.of(3, 8, 17, 67); private static final Map LOOKUP = Map.of( @@ -764,16 +764,18 @@ public static void testExpressionSequentialBindings( final int numIterations ) { - for (int iter = 0; iter < numIterations; iter++) { - assertEvalsMatch( - expr, - parsed, - makeSequentialBinding( - VECTOR_SIZE, - types, - -2 + (iter * VECTOR_SIZE) // include negative numbers and zero - ) - ); + for (int vectorSize : VECTOR_SIZES) { + for (int iter = 0; iter < numIterations; iter++) { + assertEvalsMatch( + expr, + parsed, + makeSequentialBinding( + vectorSize, + types, + -2 + (iter * vectorSize) // include negative numbers and zero + ) + ); + } } } @@ -784,8 +786,10 @@ public static void testExpressionRandomizedBindings( final int numIterations ) { - for (int iterations = 0; iterations < numIterations; iterations++) { - assertEvalsMatch(expr, parsed, makeRandomizedBindings(VECTOR_SIZE, types)); + for (int vectorSize : VECTOR_SIZES) { + for (int iterations = 0; iterations < numIterations; iterations++) { + assertEvalsMatch(expr, parsed, makeRandomizedBindings(vectorSize, types)); + } } } @@ -808,7 +812,8 @@ public static void assertEvalsMatch( ); if (vectorEval.isValue() && nonVectorEval.isValue()) { - for (int i = 0; i < VECTOR_SIZE; i++) { + final int vectorSize = bindings.lhs.length; + for (int i = 0; i < vectorSize; i++) { final String message = StringUtils.format( "Values do not match for row[%s] for expression[%s], bindings[%s]", i, @@ -1000,9 +1005,9 @@ private static Either evalNonVector( @Nullable ExpressionType outputType ) { - final Object[] exprValues = new Object[VECTOR_SIZE]; + final Object[] exprValues = new Object[bindings.length]; - for (int i = 0; i < VECTOR_SIZE; i++) { + for (int i = 0; i < bindings.length; i++) { ExprEval eval; try { eval = expr.eval(bindings[i]); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/NoopChatHandlerProvider.java b/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyVectorApiTest.java similarity index 58% rename from server/src/main/java/org/apache/druid/segment/realtime/NoopChatHandlerProvider.java rename to processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyVectorApiTest.java index 9480bdf2bc7e..a14190796a27 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/NoopChatHandlerProvider.java +++ b/processing/src/test/java/org/apache/druid/math/expr/VectorExprResultConsistencyVectorApiTest.java @@ -17,35 +17,26 @@ * under the License. */ -package org.apache.druid.segment.realtime; +package org.apache.druid.math.expr; -import com.google.common.base.Optional; +import org.junit.After; +import org.junit.Before; /** + * Re-runs every {@link VectorExprResultConsistencyTest} case with the SIMD ({@code jdk.incubator.vector}) expression + * vector processors enabled, ensuring the SIMD specializations agree with the non-vectorized reference. */ -public class NoopChatHandlerProvider implements ChatHandlerProvider +public class VectorExprResultConsistencyVectorApiTest extends VectorExprResultConsistencyTest { - @Override - public void register(String key, ChatHandler handler) + @Before + public void enableVectorApi() { - // do nothing + ExpressionProcessing.initializeForVectorApiTests(); } - @Override - public void register(String key, ChatHandler handler, boolean announce) + @After + public void resetExpressionProcessing() { - // do nothing - } - - @Override - public void unregister(String key) - { - // do nothing - } - - @Override - public Optional get(String key) - { - return Optional.absent(); + ExpressionProcessing.initializeForTests(); } } diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java b/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java index 0c3c846f4872..b8a31fe584b8 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java @@ -430,7 +430,7 @@ public URI getUri() } @Override - public InputStream open() + public InputStream openRaw() { return inputDataStream; } diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregationTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregationTest.java new file mode 100644 index 000000000000..baef2de3a0a2 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregationTest.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; +import org.apache.druid.query.Result; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexSegment; +import org.apache.druid.segment.Segment; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.apache.druid.timeline.SegmentId; +import org.apache.druid.utils.CloseableUtils; +import org.joda.time.DateTime; +import org.junit.After; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.util.Collections; +import java.util.List; +import java.util.Set; + +/** + * Verifies that {@link ExpressionLambdaAggregatorFactory} can be used as an ingest-time metric for primitive numeric + * types. + */ +public class ExpressionLambdaAggregationTest extends InitializedNullHandlingTest +{ + private static final String DIM = "groupKey"; + private static final String LONG_FIELD = "longField"; + private static final String DOUBLE_FIELD = "doubleField"; + private static final DateTime TIMESTAMP = DateTimes.of("2020-01-01"); + + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + private QueryableIndex mergedIndex; + private Segment segment; + + @After + public void tearDown() + { + if (segment != null) { + CloseableUtils.closeAndWrapExceptions(segment); + } + if (mergedIndex != null) { + CloseableUtils.closeAndWrapExceptions(mergedIndex); + } + } + + @Test + public void testNumericExpressionLambdaIngestRollupViaMerge() throws Exception + { + // Three rows sharing the same (timestamp, dim) so they roll up into a single output row during merge. + // longField values: 1 (0b001), 2 (0b010), 4 (0b100) -> sum=7, bitwiseOr=7 + // doubleField values: 1.5, 2.0, 0.25 -> sum=3.75 + final List rows = List.of( + row(1L, 1.5), + row(2L, 2.0), + row(4L, 0.25) + ); + + final ExpressionLambdaAggregatorFactory longSum = new ExpressionLambdaAggregatorFactory( + "long_sum", + Set.of(LONG_FIELD), + null, + "0", + null, + null, + false, + false, + "__acc + " + LONG_FIELD, + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + // BitwiseSqlAggregator-style: same single-field, op("__acc", field) fold + final ExpressionLambdaAggregatorFactory bitwiseOr = new ExpressionLambdaAggregatorFactory( + "bitwise_or", + ImmutableSet.of(LONG_FIELD), + null, + "0", + null, + null, + false, + false, + "bitwiseOr(\"__acc\", \"" + LONG_FIELD + "\")", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + final ExpressionLambdaAggregatorFactory doubleSum = new ExpressionLambdaAggregatorFactory( + "double_sum", + ImmutableSet.of(DOUBLE_FIELD), + null, + "0.0", + null, + null, + false, + false, + "__acc + " + DOUBLE_FIELD, + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + final IncrementalIndexSchema schema = IncrementalIndexSchema.builder() + .withQueryGranularity(Granularities.NONE) + .withRollup(true) + .withDimensionsSpec( + DimensionsSpec.builder() + .setDimensions(ImmutableList.of(new StringDimensionSchema(DIM))) + .build() + ) + .withMetrics( + new CountAggregatorFactory("count"), + longSum, + bitwiseOr, + doubleSum + ) + .build(); + + mergedIndex = IndexBuilder.create() + .tmpDir(tempFolder.newFolder()) + .schema(schema) + .intermediaryPersistSize(1) + .rows(rows) + .buildMMappedMergedIndex(); + + segment = new QueryableIndexSegment(mergedIndex, SegmentId.dummy("test")); + + final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .dataSource("test") + .granularity(Granularities.ALL) + .intervals("1970/2050") + .aggregators( + new LongSumAggregatorFactory("count", "count"), + longSum.getCombiningFactory(), + bitwiseOr.getCombiningFactory(), + doubleSum.getCombiningFactory() + ) + .build(); + + try (final AggregationTestHelper helper = + AggregationTestHelper.createTimeseriesQueryAggregationTestHelper(Collections.emptyList(), tempFolder)) { + + final Sequence> seq = helper.runQueryOnSegmentsObjs( + ImmutableList.of(segment), + query + ); + final TimeseriesResultValue result = Iterables.getOnlyElement(seq.toList()).getValue(); + + // Three input rows rolled up into one, count reflects rollup happened + Assert.assertEquals(3L, result.getLongMetric("count").longValue()); + Assert.assertEquals(7L, result.getLongMetric("long_sum").longValue()); + Assert.assertEquals(7L, result.getLongMetric("bitwise_or").longValue()); + Assert.assertEquals(3.75, result.getDoubleMetric("double_sum").doubleValue(), 0.0); + } + } + + private static InputRow row(long longVal, double doubleVal) + { + return new MapBasedInputRow( + TIMESTAMP, + ImmutableList.of(DIM), + ImmutableMap.of( + DIM, "a", + LONG_FIELD, longVal, + DOUBLE_FIELD, doubleVal + ) + ); + } +} diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java index 499bcef08fe2..29bf850d3d44 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java @@ -24,24 +24,31 @@ import com.google.common.collect.ImmutableSet; import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.druid.java.util.common.HumanReadableBytes; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.query.Druids; import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.query.timeseries.TimeseriesQuery; import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.selector.TestColumnValueSelector; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import javax.annotation.Nullable; import java.io.IOException; +import java.util.Arrays; +import java.util.List; public class ExpressionLambdaAggregatorFactoryTest extends InitializedNullHandlingTest { @@ -545,6 +552,303 @@ public void testComplexTypeFinalized() Assert.assertEquals(ColumnType.DOUBLE, agg.getResultType()); } + @Test + public void testLongAggregateCombiner() + { + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0", + null, + true, + false, + false, + "__acc + x", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + AggregateCombiner combiner = agg.makeAggregateCombiner(); + TestColumnValueSelector selector = TestColumnValueSelector.of( + Long.class, + Arrays.asList(1L, 2L, 3L) + ); + selector.advance(); + combiner.reset(selector); + Assert.assertEquals(1L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(3L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(6L, combiner.getLong()); + } + + @Test + public void testDoubleAggregateCombiner() + { + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0.0", + null, + true, + false, + false, + "__acc + x", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + AggregateCombiner combiner = agg.makeAggregateCombiner(); + TestColumnValueSelector selector = TestColumnValueSelector.of( + Double.class, + Arrays.asList(1.5, 2.25, 0.25) + ); + selector.advance(); + combiner.reset(selector); + Assert.assertEquals(1.5, combiner.getDouble(), 0.0); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(3.75, combiner.getDouble(), 0.0); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(4.0, combiner.getDouble(), 0.0); + } + + @Test + public void testNullableAggregateCombinerSkipsNulls() + { + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0", + null, + true, + false, + false, + "__acc + x", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + AggregateCombiner combiner = agg.makeNullableAggregateCombiner(); + NullableLongSelector selector = new NullableLongSelector(Arrays.asList(null, 5L, null, 7L)); + selector.advance(); + combiner.reset(selector); + Assert.assertTrue(combiner.isNull()); + + selector.advance(); + combiner.fold(selector); + Assert.assertFalse(combiner.isNull()); + Assert.assertEquals(5L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(5L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(12L, combiner.getLong()); + } + + @Test + public void testNullableAggregateCombinerWhenCombineAggregatesNullsExpressionSeesNulls() + { + // shouldCombineAggregateNullInputs=true means the combine expression sees null inputs directly. The expression + // itself is responsible for handling them; here `nvl` coalesces nulls to 0 so the accumulator keeps advancing. + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0", + null, + true, + true, + true, + "nvl(__acc, 0) + nvl(x, 0)", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + AggregateCombiner combiner = agg.makeNullableAggregateCombiner(); + NullableLongSelector selector = new NullableLongSelector(Arrays.asList(1L, null, 3L)); + selector.advance(); + combiner.reset(selector); + Assert.assertEquals(1L, combiner.getLong()); + + // null is passed through to the expression, which coalesces to 0 + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(1L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(4L, combiner.getLong()); + } + + @Test + public void testNullableAggregateCombinerNullExpressionResultPropagates() + { + // shouldCombineAggregateNullInputs=true with an expression that doesn't handle nulls: `__acc + null` evaluates + // to null in Druid expression semantics, and the combiner reports isNull accordingly. + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0", + null, + true, + true, + true, + "__acc + x", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + AggregateCombiner combiner = agg.makeNullableAggregateCombiner(); + NullableLongSelector selector = new NullableLongSelector(Arrays.asList(1L, null)); + selector.advance(); + combiner.reset(selector); + Assert.assertFalse(combiner.isNull()); + Assert.assertEquals(1L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertTrue(combiner.isNull()); + } + + + private static final class NullableLongSelector implements ColumnValueSelector + { + private final List values; + private int index = -1; + + NullableLongSelector(List values) + { + this.values = values; + } + + void advance() + { + index++; + } + + @Override + public long getLong() + { + Long v = values.get(index); + return v == null ? 0L : v; + } + + @Override + public double getDouble() + { + return getLong(); + } + + @Override + public float getFloat() + { + return getLong(); + } + + @Override + public boolean isNull() + { + return values.get(index) == null; + } + + @Nullable + @Override + public Long getObject() + { + return values.get(index); + } + + @Override + public Class classOfObject() + { + return Long.class; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + } + } + + @Test(expected = UOE.class) + public void testAggregateCombinerNotSupportedForNonNumericTypes() + { + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "''", + "''", + true, + true, + true, + "concat(__acc, x)", + "concat(__acc, expr_agg_name)", + null, + null, + new HumanReadableBytes(2048), + TestExprMacroTable.INSTANCE + ); + + agg.makeAggregateCombiner(); + } + + @Test(expected = UOE.class) + public void testAggregateCombinerNotSupportedWhenFoldAndCombineTypesDiffer() + { + // fold seed is LONG (intermediate column type), but combine seed is LONG_ARRAY — combining a long segment column + // with an expression that expects arrays would silently produce wrong values, so the combiner refuses to handle it. + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0", + "ARRAY[]", + null, + false, + false, + "__acc + x", + "array_set_add(__acc, expr_agg_name)", + null, + null, + new HumanReadableBytes(2048), + TestExprMacroTable.INSTANCE + ); + + Assert.assertEquals(ColumnType.LONG, agg.getIntermediateType()); + agg.makeAggregateCombiner(); + } + @Test public void testResultArraySignature() { diff --git a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java index afa450bc471c..d122963f2efc 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java @@ -20,8 +20,11 @@ package org.apache.druid.query.filter; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableRangeSet; +import com.google.common.collect.Range; import com.google.common.collect.Sets; import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.error.DruidException; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.query.extraction.SubstringDimExtractionFn; import org.apache.druid.segment.column.ColumnIndexSupplier; @@ -322,6 +325,134 @@ public void testPatternFindsCorrectMiddleMatch() assertMatch("1 _ 5%6", "1 2 3 1 4 5 6", DruidPredicateMatch.FALSE); } + @Test + public void testGetDimensionRangeSet_literalPattern() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "bar", null, null); + Assert.assertEquals( + ImmutableRangeSet.of(Range.singleton("bar")), + filter.getDimensionRangeSet("foo") + ); + } + + @Test + public void testGetDimensionRangeSet_prefixPattern() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "bar%", null, null); + Assert.assertEquals( + ImmutableRangeSet.of(Range.closedOpen("bar", "bas")), + filter.getDimensionRangeSet("foo") + ); + } + + @Test + public void testGetDimensionRangeSet_midPatternWildcard_returnsNull() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "bar%baz", null, null); + Assert.assertNull(filter.getDimensionRangeSet("foo")); + } + + @Test + public void testGetDimensionRangeSet_suffixPattern_returnsNull() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "%bar", null, null); + Assert.assertNull(filter.getDimensionRangeSet("foo")); + } + + @Test + public void testGetDimensionRangeSet_singleWildcard_returnsAll() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "%", null, null); + Assert.assertEquals( + ImmutableRangeSet.of(Range.all()), + filter.getDimensionRangeSet("foo") + ); + } + + @Test + public void testGetDimensionRangeSet_otherDimension_returnsNull() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "bar%", null, null); + Assert.assertNull(filter.getDimensionRangeSet("other")); + } + + @Test + public void testGetDimensionRangeSet_withExtractionFn_returnsNull() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "bar%", null, new SubstringDimExtractionFn(0, 3)); + Assert.assertNull(filter.getDimensionRangeSet("foo")); + } + + @Test + public void testPrefixRange_singleLowercaseChar() + { + Assert.assertEquals(Range.closedOpen("foo", "fop"), LikeDimFilter.prefixRange("foo")); + } + + @Test + public void testPrefixRange_uppercaseCarryStaysWithinAscii() + { + Assert.assertEquals(Range.closedOpen("foZ", "fo["), LikeDimFilter.prefixRange("foZ")); + } + + @Test + public void testPrefixRange_trailingMaxValue_carriesPastIt() + { + Assert.assertEquals( + Range.closedOpen("foo￿", "fop"), + LikeDimFilter.prefixRange("foo￿") + ); + } + + @Test + public void testPrefixRange_allMaxValue_fallsBackToAtLeast() + { + Assert.assertEquals(Range.atLeast("￿￿"), LikeDimFilter.prefixRange("￿￿")); + } + + @Test + public void testPrefixRange_empty_throws() + { + Assert.assertThrows(DruidException.class, () -> LikeDimFilter.prefixRange("")); + } + + @Test + public void testPrefixRange_enclosesAllPrefixedStrings() + { + final Range range = LikeDimFilter.prefixRange("foo"); + Assert.assertTrue(range.contains("foo")); + Assert.assertTrue(range.contains("foo0")); + Assert.assertTrue(range.contains("foobar")); + Assert.assertTrue(range.contains("foozzz")); + Assert.assertFalse(range.contains("fo")); + Assert.assertFalse(range.contains("fop")); + Assert.assertFalse(range.contains("fox")); + } + + @Test + public void testLexicographicSuccessor_basic() + { + Assert.assertEquals("fop", LikeDimFilter.lexicographicSuccessor("foo")); + } + + @Test + public void testLexicographicSuccessor_empty_returnsNullChar() + { + Assert.assertEquals("\u0000", LikeDimFilter.lexicographicSuccessor("")); + } + + @Test + public void testLexicographicSuccessor_singleMaxValue_returnsNull() + { + Assert.assertNull(LikeDimFilter.lexicographicSuccessor("￿")); + } + + @Test + public void testLexicographicSuccessor_trailingMaxValues_truncatedAndCarried() + { + Assert.assertEquals("fop", LikeDimFilter.lexicographicSuccessor("foo￿￿")); + } + private void assertCompilation(String pattern, String expected) { LikeDimFilter.LikeMatcher matcher = LikeDimFilter.LikeMatcher.from(pattern, '\\'); diff --git a/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java index 5ab3783869ed..0101ce990b49 100644 --- a/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java @@ -51,6 +51,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; public class SpecificSegmentQueryRunnerTest { @@ -182,8 +184,6 @@ public void run() new CountAggregatorFactory("rows") ) ) - // Do one test with CTX_SET_THREAD_NAME = false. - .context(ImmutableMap.of(SpecificSegmentQueryRunner.CTX_SET_THREAD_NAME, false)) .build(); Sequence results = queryRunner.run(QueryPlus.wrap(query), responseContext); List> res = results.toList(); @@ -197,6 +197,101 @@ public void run() validate(mapper, descriptor, responseContext); } + @Test + public void testSetThreadName() + { + assertThreadNameDuringProcessing(null, "original-test-thread"); + assertThreadNameDuringProcessing(false, "original-test-thread"); + assertThreadNameDuringProcessing(true, "processing_thread-name-query"); + } + + private void assertThreadNameDuringProcessing( + final Boolean setProcessingThreadNames, + final String expectedThreadNameDuringProcessing + ) + { + final String originalThreadName = Thread.currentThread().getName(); + + try { + Thread.currentThread().setName("original-test-thread"); + + final AtomicReference runnerThreadName = new AtomicReference<>(); + final AtomicReference sequenceThreadName = new AtomicReference<>(); + final Result value = makeResult(); + final SegmentDescriptor descriptor = new SegmentDescriptor( + Intervals.of("2012-01-01T00:00:00Z/P1D"), + "version", + 0 + ); + + final SpecificSegmentQueryRunner> queryRunner = new SpecificSegmentQueryRunner<>( + new QueryRunner<>() + { + @Override + public Sequence> run( + QueryPlus> queryPlus, + ResponseContext responseContext + ) + { + runnerThreadName.set(Thread.currentThread().getName()); + return Sequences.withEffect( + Sequences.simple(Collections.singletonList(value)), + () -> sequenceThreadName.set(Thread.currentThread().getName()), + Execs.directExecutor() + ); + } + }, + new SpecificSegmentSpec(descriptor) + ); + + final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .dataSource("foo") + .granularity(Granularities.ALL) + .intervals(ImmutableList.of(Intervals.of("2012-01-01T00:00:00Z/P1D"))) + .aggregators( + ImmutableList.of( + new CountAggregatorFactory("rows") + ) + ) + .context(makeThreadNameContext(setProcessingThreadNames)) + .queryId("thread-name-query") + .build(); + + final Sequence> results = queryRunner.run( + QueryPlus.wrap(query), + ResponseContext.createEmpty() + ); + results.toList(); + + Assertions.assertEquals(expectedThreadNameDuringProcessing, runnerThreadName.get()); + Assertions.assertEquals(expectedThreadNameDuringProcessing, sequenceThreadName.get()); + Assertions.assertEquals("original-test-thread", Thread.currentThread().getName()); + } + finally { + Thread.currentThread().setName(originalThreadName); + } + } + + private static Map makeThreadNameContext(final Boolean setProcessingThreadNames) + { + if (setProcessingThreadNames == null) { + return Collections.emptyMap(); + } else { + return ImmutableMap.of(SpecificSegmentQueryRunner.CTX_SET_THREAD_NAME, setProcessingThreadNames); + } + } + + private static Result makeResult() + { + final TimeseriesResultBuilder builder = new TimeseriesResultBuilder( + DateTimes.of("2012-01-01T00:00:00Z") + ); + final CountAggregator rows = new CountAggregator(); + rows.aggregate(); + builder.addMetric("rows", rows.get()); + return builder.build(); + } + private void validate(ObjectMapper mapper, SegmentDescriptor descriptor, ResponseContext responseContext) throws IOException { diff --git a/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java b/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java index bb3e865fa118..6e82fb4a76a8 100644 --- a/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java +++ b/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java @@ -37,9 +37,10 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.file.CountingRangeReader; +import org.apache.druid.segment.file.DirectoryBackedRangeReader; import org.apache.druid.segment.file.PartialSegmentFileMapperV10; import org.apache.druid.segment.incremental.IncrementalIndexSchema; -import org.apache.druid.segment.loading.SegmentRangeReader; import org.apache.druid.segment.projections.QueryableProjection; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -49,19 +50,14 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.io.RandomAccessFile; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicInteger; class PartialQueryableIndexTest extends InitializedNullHandlingTest { @@ -421,7 +417,7 @@ void testMatchesEagerQueryableIndex() throws IOException // verify that the partial index produces the same schema info as the eager (full) index final IndexIO indexIO = TestHelper.getTestIndexIO(); final File cacheDir = newCacheDir("match_eager"); - final DirectoryRangeReader rangeReader = new DirectoryRangeReader(segmentDir); + final DirectoryBackedRangeReader rangeReader = new DirectoryBackedRangeReader(segmentDir); try ( QueryableIndex eagerIndex = indexIO.loadIndex(segmentDir); @@ -470,62 +466,4 @@ private File newCacheDir(String name) throws IOException FileUtils.mkdirp(dir); return dir; } - - static class DirectoryRangeReader implements SegmentRangeReader - { - private final File directory; - - DirectoryRangeReader(File directory) - { - this.directory = directory; - } - - @Override - public InputStream readRange(String filename, long offset, long length) throws IOException - { - File target = new File(directory, filename); - try (RandomAccessFile raf = new RandomAccessFile(target, "r")) { - final int available = (int) Math.min(length, Math.max(0, raf.length() - offset)); - byte[] data = new byte[available]; - raf.seek(offset); - raf.readFully(data); - return new ByteArrayInputStream(data); - } - } - } - - static class CountingRangeReader extends DirectoryRangeReader - { - private final AtomicInteger readCount = new AtomicInteger(0); - private final Set readFilenames = ConcurrentHashMap.newKeySet(); - - CountingRangeReader(File directory) - { - super(directory); - } - - int getReadCount() - { - return readCount.get(); - } - - Set getReadFilenames() - { - return Set.copyOf(readFilenames); - } - - void resetCount() - { - readCount.set(0); - readFilenames.clear(); - } - - @Override - public InputStream readRange(String filename, long offset, long length) throws IOException - { - readCount.incrementAndGet(); - readFilenames.add(filename); - return super.readRange(filename, offset, length); - } - } } diff --git a/processing/src/test/java/org/apache/druid/segment/file/CountingRangeReader.java b/processing/src/test/java/org/apache/druid/segment/file/CountingRangeReader.java new file mode 100644 index 000000000000..37f50250abd0 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/file/CountingRangeReader.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.file; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * A {@link DirectoryBackedRangeReader} that tracks range-read activity across the partial-segment test suite. Records + * total reads, header-only reads (offset == 0, which corresponds to V10 header preamble fetches), and the set of + * filenames that have been read. Each call site reads only the metric(s) it cares about. + */ +public class CountingRangeReader extends DirectoryBackedRangeReader +{ + private final AtomicInteger readCount = new AtomicInteger(0); + private final AtomicInteger headerReadCount = new AtomicInteger(0); + private final Set readFilenames = ConcurrentHashMap.newKeySet(); + + public CountingRangeReader(File directory) + { + super(directory); + } + + public int getReadCount() + { + return readCount.get(); + } + + public int getHeaderReadCount() + { + return headerReadCount.get(); + } + + public Set getReadFilenames() + { + return Set.copyOf(readFilenames); + } + + public void resetCount() + { + readCount.set(0); + headerReadCount.set(0); + readFilenames.clear(); + } + + @Override + public InputStream readRange(String filename, long offset, long length) throws IOException + { + readCount.incrementAndGet(); + if (offset == 0) { + headerReadCount.incrementAndGet(); + } + readFilenames.add(filename); + return super.readRange(filename, offset, length); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/file/DirectoryBackedRangeReader.java b/processing/src/test/java/org/apache/druid/segment/file/DirectoryBackedRangeReader.java new file mode 100644 index 000000000000..bc1df52946a6 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/file/DirectoryBackedRangeReader.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.file; + +import org.apache.druid.segment.loading.SegmentRangeReader; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; + +/** + * A {@link SegmentRangeReader} backed by a directory of files. Used across the partial-segment test suite (processing + * + server modules) to simulate deep-storage range reads against an on-disk layout produced by + * {@link SegmentFileBuilderV10} or {@link org.apache.druid.segment.IndexMergerV10}. + */ +public class DirectoryBackedRangeReader implements SegmentRangeReader +{ + private final File directory; + + public DirectoryBackedRangeReader(File directory) + { + this.directory = directory; + } + + @Override + public InputStream readRange(String filename, long offset, long length) throws IOException + { + final File target = new File(directory, filename); + try (RandomAccessFile raf = new RandomAccessFile(target, "r")) { + final int available = (int) Math.min(length, Math.max(0, raf.length() - offset)); + final byte[] data = new byte[available]; + raf.seek(offset); + raf.readFully(data); + return new ByteArrayInputStream(data); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java b/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java index 790ba10ece73..0294f65e20bf 100644 --- a/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java +++ b/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java @@ -33,11 +33,9 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -48,7 +46,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicInteger; class PartialSegmentFileMapperV10Test { @@ -554,59 +551,4 @@ private static PartialSegmentFileMapperV10 createMapper( ); } - /** - * A {@link SegmentRangeReader} backed by a directory of files, supporting both main and external file reads. - */ - static class DirectoryBackedRangeReader implements SegmentRangeReader - { - private final File directory; - - DirectoryBackedRangeReader(File directory) - { - this.directory = directory; - } - - @Override - public InputStream readRange(String filename, long offset, long length) throws IOException - { - File target = new File(directory, filename); - try (RandomAccessFile raf = new RandomAccessFile(target, "r")) { - final int available = (int) Math.min(length, Math.max(0, raf.length() - offset)); - byte[] data = new byte[available]; - raf.seek(offset); - raf.readFully(data); - return new ByteArrayInputStream(data); - } - } - } - - /** - * A {@link DirectoryBackedRangeReader} that counts range reads (excluding metadata fetches). - */ - static class CountingRangeReader extends DirectoryBackedRangeReader - { - private final AtomicInteger readCount = new AtomicInteger(0); - - CountingRangeReader(File directory) - { - super(directory); - } - - int getReadCount() - { - return readCount.get(); - } - - void resetCount() - { - readCount.set(0); - } - - @Override - public InputStream readRange(String filename, long offset, long length) throws IOException - { - readCount.incrementAndGet(); - return super.readRange(filename, offset, length); - } - } } diff --git a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java index 6dd01d8e5bd9..8f065990012b 100644 --- a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java +++ b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java @@ -53,13 +53,13 @@ void testOneContainerPerProjection() throws IOException { final File baseDir = newBaseDir(); - // matches the production usage pattern in IndexMergerV10: call startFileGroup then write that projection's + // matches the production usage pattern in IndexMergerV10: call startFileBundle then write that projection's // columns, then move on to the next projection. final String[] projections = {"__base", "projA", "projB"}; final int colCount = 3; try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { for (String projection : projections) { - builder.startFileGroup(projection); + builder.startFileBundle(projection); for (int col = 0; col < colCount; col++) { final String name = projection + "/col" + col; final File tmpFile = new File(tempDir, StringUtils.format("%s-%s.bin", projection, col)); @@ -89,14 +89,14 @@ void testProjectionNameWithSlashRoutesCorrectly() throws IOException final String slashyProjection = "nested/projection"; final int colCount = 3; try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("__base"); + builder.startFileBundle("__base"); for (int col = 0; col < colCount; col++) { final String name = "__base/col" + col; final File tmpFile = new File(tempDir, StringUtils.format("base-%s.bin", col)); Files.write(Ints.toByteArray(name.hashCode()), tmpFile); builder.add(name, tmpFile); } - builder.startFileGroup(slashyProjection); + builder.startFileBundle(slashyProjection); for (int col = 0; col < colCount; col++) { final String name = slashyProjection + "/col" + col; final File tmpFile = new File(tempDir, StringUtils.format("slashy-%s.bin", col)); @@ -133,7 +133,7 @@ void testAddWithoutGroupPrefixThrowsWhenGroupActive() throws IOException final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("projA"); + builder.startFileBundle("projA"); final File tmp = new File(tempDir, "no-prefix.bin"); Files.write(Ints.toByteArray(1), tmp); // file name doesn't start with "projA/", so add must throw @@ -147,7 +147,7 @@ void testAddWithChannelWithoutGroupPrefixThrowsWhenGroupActive() throws IOExcept final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("projA"); + builder.startFileBundle("projA"); Assertions.assertThrows(RuntimeException.class, () -> builder.addWithChannel("wrong/col0", 4)); } } @@ -158,7 +158,7 @@ void testAddColumnWithoutGroupPrefixThrowsWhenGroupActive() throws IOException final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("projA"); + builder.startFileBundle("projA"); Assertions.assertThrows( RuntimeException.class, () -> builder.addColumn("wrong_no_prefix", new ColumnDescriptor.Builder() @@ -169,12 +169,12 @@ void testAddColumnWithoutGroupPrefixThrowsWhenGroupActive() throws IOException } @Test - void testAddWithoutPrefixIsAllowedWhenNoGroupActive() throws IOException + void testAddWithoutPrefixIsAllowedInRootBundle() throws IOException { final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - // never call startFileGroup; bare names are fine + // never call startFileBundle; bare names are fine under the default root bundle final File tmp = new File(tempDir, "bare.bin"); Files.write(Ints.toByteArray(1), tmp); builder.add("col0", tmp); @@ -183,7 +183,7 @@ void testAddWithoutPrefixIsAllowedWhenNoGroupActive() throws IOException } @Test - void testContainerMetadataCarriesFileGroup() throws IOException + void testContainerMetadataCarriesBundle() throws IOException { final File baseDir = newBaseDir(); @@ -191,7 +191,7 @@ void testContainerMetadataCarriesFileGroup() throws IOException final int colCount = 2; try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { for (String projection : projections) { - builder.startFileGroup(projection); + builder.startFileBundle(projection); for (int col = 0; col < colCount; col++) { final String name = projection + "/col" + col; final File tmpFile = new File(tempDir, StringUtils.format("%s-%s.bin", projection, col)); @@ -206,35 +206,34 @@ void testContainerMetadataCarriesFileGroup() throws IOException final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata(); Assertions.assertEquals(projections.length, metadata.getContainers().size()); - // Each container's fileGroup must equal the group active when it was written. Build the expected list by - // walking the files: each container holds files from exactly one group, so the first file's group prefix is - // authoritative. + // Each container's bundle must equal the bundle active when it was written. Each container holds files from + // exactly one bundle, so the first file's name prefix is authoritative. for (int ci = 0; ci < metadata.getContainers().size(); ci++) { final int containerIdx = ci; - final String expectedGroup = metadata.getFiles().entrySet().stream() + final String expectedBundle = metadata.getFiles().entrySet().stream() .filter(e -> e.getValue().getContainer() == containerIdx) .map(e -> e.getKey().substring(0, e.getKey().indexOf('/'))) .findFirst() .orElseThrow(); Assertions.assertEquals( - expectedGroup, - metadata.getContainers().get(ci).getFileGroup(), - "container " + ci + " fileGroup mismatch" + expectedBundle, + metadata.getContainers().get(ci).getBundle(), + "container " + ci + " bundle mismatch" ); } } } @Test - void testContainerWrittenWithoutStartFileGroupHasNullFileGroup() throws IOException + void testContainerWrittenWithoutStartFileBundleDefaultsToRoot() throws IOException { final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - // never call startFileGroup; the single container should carry fileGroup == null + // never call startFileBundle; the single container should be tagged with ROOT_BUNDLE_NAME for (int col = 0; col < 3; col++) { final String name = "col" + col; - final File tmpFile = new File(tempDir, StringUtils.format("nogroup-%s.bin", col)); + final File tmpFile = new File(tempDir, StringUtils.format("nobundle-%s.bin", col)); Files.write(Ints.toByteArray(name.hashCode()), tmpFile); builder.add(name, tmpFile); } @@ -244,50 +243,81 @@ void testContainerWrittenWithoutStartFileGroupHasNullFileGroup() throws IOExcept try (SegmentFileMapperV10 mapper = SegmentFileMapperV10.create(segmentFile, JSON_MAPPER)) { final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata(); Assertions.assertEquals(1, metadata.getContainers().size()); - Assertions.assertNull(metadata.getContainers().get(0).getFileGroup()); + Assertions.assertEquals( + SegmentFileBuilder.ROOT_BUNDLE_NAME, + metadata.getContainers().get(0).getBundle() + ); } } @Test - void testStartFileGroupNullClearsCurrentGroup() throws IOException + void testStartFileBundleNullResetsToRoot() throws IOException { final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("first"); + builder.startFileBundle("first"); final File firstFile = new File(tempDir, "first.bin"); Files.write(Ints.toByteArray(1), firstFile); builder.add("first/a", firstFile); - builder.startFileGroup(null); - final File noGroupFile = new File(tempDir, "ng.bin"); - Files.write(Ints.toByteArray(2), noGroupFile); - builder.add("ng/a", noGroupFile); + // Passing null resets to ROOT_BUNDLE_NAME; subsequent writes go in a root-bundle container. + builder.startFileBundle(null); + final File rootFile = new File(tempDir, "root.bin"); + Files.write(Ints.toByteArray(2), rootFile); + builder.add("root_a", rootFile); } final File segmentFile = new File(baseDir, IndexIO.V10_FILE_NAME); try (SegmentFileMapperV10 mapper = SegmentFileMapperV10.create(segmentFile, JSON_MAPPER)) { final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata(); Assertions.assertEquals(2, metadata.getContainers().size()); - Assertions.assertEquals("first", metadata.getContainers().get(0).getFileGroup()); - Assertions.assertNull(metadata.getContainers().get(1).getFileGroup()); + Assertions.assertEquals("first", metadata.getContainers().get(0).getBundle()); + Assertions.assertEquals( + SegmentFileBuilder.ROOT_BUNDLE_NAME, + metadata.getContainers().get(1).getBundle() + ); } } @Test - void testStartFileGroupWhileWriterInUseThrows() throws IOException + void testStartFileBundleWhileWriterInUseThrows() throws IOException { final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("__base"); + builder.startFileBundle("__base"); try (SegmentFileChannel outer = builder.addWithChannel("__base/col0", 4)) { - Assertions.assertThrows(RuntimeException.class, () -> builder.startFileGroup("projA")); + Assertions.assertThrows(RuntimeException.class, () -> builder.startFileBundle("projA")); outer.write(ByteBuffer.wrap(new byte[]{1, 2, 3, 4})); } } } + @Test + void testStartFileBundleWithRootNameIsSameAsNull() throws IOException + { + final File baseDir = newBaseDir(); + + try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { + // Explicit ROOT_BUNDLE_NAME and null are equivalent; both resolve to the default root bundle. + builder.startFileBundle(SegmentFileBuilder.ROOT_BUNDLE_NAME); + final File tmp = new File(baseDir, "tmp.bin"); + Files.write(new byte[]{1, 2, 3, 4}, tmp); + builder.add("col0", tmp); + } + + final File segmentFile = new File(baseDir, IndexIO.V10_FILE_NAME); + try (SegmentFileMapperV10 mapper = SegmentFileMapperV10.create(segmentFile, JSON_MAPPER)) { + final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata(); + Assertions.assertEquals(1, metadata.getContainers().size()); + Assertions.assertEquals( + SegmentFileBuilder.ROOT_BUNDLE_NAME, + metadata.getContainers().get(0).getBundle() + ); + } + } + @Test void testExternalBuilderAlsoSplitsContainersByProjection() throws IOException { @@ -300,7 +330,7 @@ void testExternalBuilderAlsoSplitsContainersByProjection() throws IOException try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { for (String projection : mainProjections) { - builder.startFileGroup(projection); + builder.startFileBundle(projection); for (int col = 0; col < colCount; col++) { final String name = projection + "/col" + col; final File tmpFile = new File(tempDir, StringUtils.format("main-%s-%s.bin", projection, col)); @@ -313,7 +343,7 @@ void testExternalBuilderAlsoSplitsContainersByProjection() throws IOException // sub-file with its own header + containers. Projection-per-container splitting must apply there too. final SegmentFileBuilder external = builder.getExternalBuilder(externalName); for (String projection : externalProjections) { - external.startFileGroup(projection); + external.startFileBundle(projection); for (int col = 0; col < colCount; col++) { final String name = projection + "/col" + (col + 1000); final File tmpFile = new File(tempDir, StringUtils.format("ext-%s-%s.bin", projection, col)); @@ -370,7 +400,7 @@ void testNestedAddWithChannelDelegatesPerBuilder() throws IOException final byte[] nestedBytes = new byte[]{5, 6, 7, 8}; try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("__base"); + builder.startFileBundle("__base"); try (SegmentFileChannel outer = builder.addWithChannel("__base/outer", outerBytes.length)) { // nested write while outer is in use → forced into delegate temp file try (SegmentFileChannel nested = builder.addWithChannel("__base/nested", nestedBytes.length)) { @@ -380,7 +410,7 @@ void testNestedAddWithChannelDelegatesPerBuilder() throws IOException } final SegmentFileBuilder external = builder.getExternalBuilder(externalName); - external.startFileGroup("extProj"); + external.startFileBundle("extProj"); try (SegmentFileChannel extOuter = external.addWithChannel("extProj/outer", outerBytes.length)) { try (SegmentFileChannel extNested = external.addWithChannel("extProj/nested", nestedBytes.length)) { extNested.write(ByteBuffer.wrap(nestedBytes)); @@ -399,12 +429,12 @@ void testNestedAddWithChannelDelegatesPerBuilder() throws IOException } @Test - void testNestedDelegateClosedAfterOuterRoutesToOriginalGroup() throws IOException + void testNestedDelegateClosedAfterOuterRoutesToOriginalBundle() throws IOException { // doing something like this is weird and probably should happen in practice, but if a nested write was requested - // while file group "groupA" was active; even if the caller switches to "groupB" before finally closing the nested - // channel, the delegated bytes must still land in groupA's container, not groupB's. Otherwise the grouping breaks, - // and files from other groups end up in the same container. + // while bundle "groupA" was active; even if the caller switches to "groupB" before finally closing the nested + // channel, the delegated bytes must still land in groupA's container, not groupB's. Otherwise bundles break and + // files from other bundles end up in the same container. final File baseDir = newBaseDir(); final byte[] outerBytes = new byte[]{1, 2, 3, 4}; @@ -412,7 +442,7 @@ void testNestedDelegateClosedAfterOuterRoutesToOriginalGroup() throws IOExceptio final byte[] groupBBytes = new byte[]{9, 10, 11, 12}; try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("groupA"); + builder.startFileBundle("groupA"); final SegmentFileChannel outer = builder.addWithChannel("groupA/outer", outerBytes.length); final SegmentFileChannel nested = builder.addWithChannel("groupA/nested", nestedBytes.length); @@ -423,7 +453,7 @@ void testNestedDelegateClosedAfterOuterRoutesToOriginalGroup() throws IOExceptio outer.close(); // switch group before closing the still-open nested delegate; merge must use the snapshotted "groupA" - builder.startFileGroup("groupB"); + builder.startFileBundle("groupB"); nested.close(); // and a real groupB file so we can verify groupB's container is independent of the nested file diff --git a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java index 5a56dcd7faf2..24374071d148 100644 --- a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java +++ b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java @@ -36,33 +36,48 @@ void testEqualsAndHashCode() } @Test - void testSerdeWithFileGroup() throws Exception + void testSerdeWithNamedBundle() throws Exception { final SegmentFileContainerMetadata metadata = new SegmentFileContainerMetadata(100, 4096, "projA"); final String json = JSON_MAPPER.writeValueAsString(metadata); - Assertions.assertTrue(json.contains("\"fileGroup\":\"projA\""), "fileGroup must be present in serialized JSON: " + json); + Assertions.assertTrue(json.contains("\"bundle\":\"projA\""), "bundle must be present in serialized JSON: " + json); Assertions.assertEquals(metadata, JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class)); } @Test - void testSerdeWithNullFileGroupOmitsField() throws Exception + void testNullBundleNormalizesToRootAndOmitsFromJson() throws Exception { - // Old-format segments don't have fileGroup; serializing null must omit the property so older readers (and - // future versions reading old segments) round-trip unchanged. + // Null in the constructor is the writer-side equivalent of "no explicit startFileBundle call"; the field + // normalizes to ROOT_BUNDLE_NAME, and the default value is omitted from JSON so segments without explicit + // bundles stay compact on disk. final SegmentFileContainerMetadata metadata = new SegmentFileContainerMetadata(0, 1024, null); + Assertions.assertEquals(SegmentFileBuilder.ROOT_BUNDLE_NAME, metadata.getBundle()); final String json = JSON_MAPPER.writeValueAsString(metadata); - Assertions.assertFalse(json.contains("fileGroup"), "null fileGroup must be omitted from JSON, got: " + json); + Assertions.assertFalse(json.contains("bundle"), "default bundle must be omitted from JSON, got: " + json); Assertions.assertEquals(metadata, JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class)); } @Test - void testDeserializeLegacyJsonWithoutFileGroup() throws Exception + void testExplicitRootBundleAlsoOmitsFromJson() throws Exception { - // Bytes produced by a writer pre-dating the fileGroup field must deserialize cleanly with fileGroup == null. - final String legacyJson = "{\"startOffset\":42,\"size\":8192}"; - final SegmentFileContainerMetadata metadata = JSON_MAPPER.readValue(legacyJson, SegmentFileContainerMetadata.class); + // Passing ROOT_BUNDLE_NAME explicitly is equivalent to passing null; both normalize to the default and both + // omit the field from JSON. + final SegmentFileContainerMetadata metadata = + new SegmentFileContainerMetadata(0, 1024, SegmentFileBuilder.ROOT_BUNDLE_NAME); + final String json = JSON_MAPPER.writeValueAsString(metadata); + Assertions.assertFalse(json.contains("bundle"), "explicit root bundle must be omitted from JSON, got: " + json); + Assertions.assertEquals(metadata, JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class)); + } + + @Test + void testDeserializeJsonWithoutBundleFieldDefaultsToRoot() throws Exception + { + // Bytes produced by a writer that didn't include a bundle field (old segments, or new segments without + // explicit startFileBundle) must deserialize to the ROOT_BUNDLE_NAME default. + final String json = "{\"startOffset\":42,\"size\":8192}"; + final SegmentFileContainerMetadata metadata = JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class); Assertions.assertEquals(42, metadata.getStartOffset()); Assertions.assertEquals(8192, metadata.getSize()); - Assertions.assertNull(metadata.getFileGroup()); + Assertions.assertEquals(SegmentFileBuilder.ROOT_BUNDLE_NAME, metadata.getBundle()); } } diff --git a/processing/src/test/java/org/apache/druid/segment/transform/TransformerTest.java b/processing/src/test/java/org/apache/druid/segment/transform/TransformerTest.java index 099c91845144..1f8ec37ddf6f 100644 --- a/processing/src/test/java/org/apache/druid/segment/transform/TransformerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/transform/TransformerTest.java @@ -21,13 +21,16 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.druid.data.input.InputEntityReader; import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputRowListPlusRawValues; import org.apache.druid.data.input.MapBasedInputRow; import org.apache.druid.data.input.MapBasedRow; import org.apache.druid.data.input.Row; import org.apache.druid.error.DruidExceptionMatcher; +import org.apache.druid.java.util.common.CloseableIterators; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.filter.SelectorDimFilter; @@ -40,6 +43,7 @@ import org.junit.rules.ExpectedException; import javax.annotation.Nullable; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -58,6 +62,62 @@ public void testTransformNullRowReturnNull() Assert.assertNull(transformer.transform((InputRowListPlusRawValues) null)); } + @Test + public void testTransformWithoutFilter() + { + final Transformer transformer = new Transformer( + new TransformSpec(new SelectorDimFilter("dim", "keep", null), null) + ); + final InputRow keepRow = makeRow("keep"); + final InputRow dropRow = makeRow("drop"); + + Assert.assertSame(keepRow, transformer.transformWithoutFilter(keepRow)); + Assert.assertSame(dropRow, transformer.transformWithoutFilter(dropRow)); + Assert.assertNull(transformer.transformWithoutFilter(null)); + + Assert.assertTrue(transformer.hasFilter()); + Assert.assertTrue(transformer.rowMatchesFilter(keepRow)); + Assert.assertFalse(transformer.rowMatchesFilter(dropRow)); + Assert.assertTrue(transformer.rowMatchesFilter(null)); + } + + @Test + public void testTransformingInputEntityReaderCanSkipFilter() throws IOException + { + final Transformer transformer = new Transformer( + new TransformSpec(new SelectorDimFilter("dim", "keep", null), null) + ); + final InputRow dropRow = makeRow("drop"); + final InputRow keepRow = makeRow("keep"); + final TransformingInputEntityReader reader = TransformingInputEntityReader.withoutFilter( + new TestInputEntityReader(dropRow, keepRow), + transformer + ); + + try (final CloseableIterator iterator = reader.read()) { + Assert.assertSame(dropRow, iterator.next()); + Assert.assertSame(keepRow, iterator.next()); + Assert.assertFalse(iterator.hasNext()); + } + } + + @Test + public void testTransformingInputEntityReaderReturnsNullForFilteredRowsByDefault() throws IOException + { + final Transformer transformer = new Transformer( + new TransformSpec(new SelectorDimFilter("dim", "keep", null), null) + ); + final TransformingInputEntityReader reader = new TransformingInputEntityReader( + new TestInputEntityReader(makeRow("drop")), + transformer + ); + + try (final CloseableIterator iterator = reader.read()) { + Assert.assertNull(iterator.next()); + Assert.assertFalse(iterator.hasNext()); + } + } + @Test public void testTransformTimeColumn() { @@ -579,4 +639,35 @@ public void testNowRejectedWhenWrappedInArithmeticForTimeColumn() () -> new ExpressionTransform("__time", "now() + 1000", TestExprMacroTable.INSTANCE) ); } + + private static InputRow makeRow(final String dim) + { + return new MapBasedInputRow( + DateTimes.nowUtc(), + ImmutableList.of("dim"), + ImmutableMap.of("dim", dim) + ); + } + + private static class TestInputEntityReader implements InputEntityReader + { + private final List rows; + + private TestInputEntityReader(final InputRow... rows) + { + this.rows = Arrays.asList(rows); + } + + @Override + public CloseableIterator read() + { + return CloseableIterators.withEmptyBaggage(rows.iterator()); + } + + @Override + public CloseableIterator sample() + { + return CloseableIterators.withEmptyBaggage(ImmutableList.of().iterator()); + } + } } diff --git a/server/pom.xml b/server/pom.xml index 9fffcbf86882..c0f4ee55f6d1 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -78,10 +78,6 @@ - - org.apache.curator - curator-x-discovery - com.fasterxml.jackson.jaxrs jackson-jaxrs-json-provider diff --git a/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceAnnouncer.java b/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceAnnouncer.java deleted file mode 100644 index 26afee8d0e75..000000000000 --- a/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceAnnouncer.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.curator.discovery; - -import com.google.inject.Inject; -import org.apache.curator.x.discovery.ServiceDiscovery; -import org.apache.curator.x.discovery.ServiceInstance; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.server.DruidNode; - -import java.util.HashMap; -import java.util.Map; - -/** - * This class is deprecated, Add service to {@link org.apache.druid.discovery.DruidNodeAnnouncer} node announcement instead. - * - * Uses the Curator Service Discovery recipe to announce services. - */ -@Deprecated -public class CuratorServiceAnnouncer implements ServiceAnnouncer -{ - private static final EmittingLogger log = new EmittingLogger(CuratorServiceAnnouncer.class); - - private final ServiceDiscovery discovery; - private final Map> instanceMap = new HashMap<>(); - private final Object monitor = new Object(); - - @Inject - public CuratorServiceAnnouncer( - ServiceDiscovery discovery - ) - { - this.discovery = discovery; - } - - @Override - public void announce(DruidNode service) - { - final String serviceName = CuratorServiceUtils.makeCanonicalServiceName(service.getServiceName()); - - final ServiceInstance instance; - synchronized (monitor) { - if (instanceMap.containsKey(serviceName)) { - log.warn("Ignoring request to announce service[%s]", service); - return; - } else { - try { - instance = ServiceInstance.builder() - .name(serviceName) - .address(service.getHost()) - .port(service.getPlaintextPort()) - .sslPort(service.getTlsPort()) - .build(); - } - catch (Exception e) { - throw new RuntimeException(e); - } - - instanceMap.put(serviceName, instance); - } - } - - try { - log.info("Announcing service[%s]", service); - discovery.registerService(instance); - } - catch (Exception e) { - log.warn("Failed to announce service[%s]", service); - synchronized (monitor) { - instanceMap.remove(serviceName); - } - } - } - - @Override - public void unannounce(DruidNode service) - { - final String serviceName = CuratorServiceUtils.makeCanonicalServiceName(service.getServiceName()); - final ServiceInstance instance; - - synchronized (monitor) { - instance = instanceMap.get(serviceName); - if (instance == null) { - log.warn("Ignoring request to unannounce service[%s]", service); - return; - } - } - - log.info("Unannouncing service[%s]", service); - try { - discovery.unregisterService(instance); - } - catch (Exception e) { - log.makeAlert(e, "Failed to unannounce service[%s], zombie znode perhaps in existence.", serviceName) - .addData("service", service) - .emit(); - } - finally { - synchronized (monitor) { - instanceMap.remove(serviceName); - } - } - } -} diff --git a/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceUtils.java b/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceUtils.java deleted file mode 100644 index 1513d9fc05d8..000000000000 --- a/server/src/main/java/org/apache/druid/curator/discovery/CuratorServiceUtils.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.curator.discovery; - -/** - * This class is only used by Deprecated classes. - */ -@Deprecated -public class CuratorServiceUtils -{ - /** - * Replacing '/' with ':' in service names makes it easier to provide an HTTP interface using - * curator-x-discovery-server - * - * This method is marked protected because it should never be used outside of the org.apache.druid.curator.discovery - * package. If you are tempted to use this method anywhere else you are most likely doing something wrong. - * Mapping the actual service name to the name used within curator should be left to {@link CuratorServiceAnnouncer} - * and {@link ServerDiscoveryFactory} - * - * @see CuratorServiceAnnouncer - * @see ServerDiscoveryFactory - * - * @param serviceName - * @return - */ - protected static String makeCanonicalServiceName(String serviceName) - { - return serviceName.replace('/', ':'); - } -} diff --git a/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java b/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java index bd1ad64aacaa..c859e88ead94 100644 --- a/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java +++ b/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java @@ -19,168 +19,41 @@ package org.apache.druid.curator.discovery; -import com.google.common.collect.ImmutableList; import com.google.inject.Binder; import com.google.inject.Inject; -import com.google.inject.Injector; import com.google.inject.Key; import com.google.inject.Module; import com.google.inject.Provider; -import com.google.inject.Provides; -import com.google.inject.TypeLiteral; -import com.google.inject.name.Named; -import com.google.inject.name.Names; -import io.netty.util.SuppressForbidden; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.utils.ZKPaths; -import org.apache.curator.x.discovery.DownInstancePolicy; -import org.apache.curator.x.discovery.InstanceFilter; -import org.apache.curator.x.discovery.ProviderStrategy; -import org.apache.curator.x.discovery.ServiceCache; -import org.apache.curator.x.discovery.ServiceCacheBuilder; -import org.apache.curator.x.discovery.ServiceDiscovery; -import org.apache.curator.x.discovery.ServiceDiscoveryBuilder; -import org.apache.curator.x.discovery.ServiceInstance; -import org.apache.curator.x.discovery.ServiceProvider; -import org.apache.curator.x.discovery.ServiceProviderBuilder; -import org.apache.curator.x.discovery.details.ServiceCacheListener; import org.apache.druid.client.coordinator.Coordinator; import org.apache.druid.client.indexing.IndexingService; -import org.apache.druid.curator.ZkEnablementConfig; import org.apache.druid.discovery.DruidLeaderSelector; import org.apache.druid.discovery.DruidNodeAnnouncer; import org.apache.druid.discovery.DruidNodeDiscoveryProvider; -import org.apache.druid.guice.DruidBinders; -import org.apache.druid.guice.JsonConfigProvider; -import org.apache.druid.guice.KeyHolder; import org.apache.druid.guice.LazySingleton; -import org.apache.druid.guice.LifecycleModule; import org.apache.druid.guice.PolyBind; import org.apache.druid.guice.annotations.Self; -import org.apache.druid.java.util.common.lifecycle.Lifecycle; import org.apache.druid.server.DruidNode; import org.apache.druid.server.ServiceAnnouncementState; -import org.apache.druid.server.initialization.CuratorDiscoveryConfig; import org.apache.druid.server.initialization.ZkPathsConfig; -import java.lang.annotation.Annotation; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Properties; -import java.util.Set; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.Executor; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ThreadFactory; import java.util.function.Function; /** - * The DiscoveryModule allows for the registration of Keys of DruidNode objects, which it intends to be - * automatically announced at the end of the lifecycle start. - * - * In order for this to work a ServiceAnnouncer instance *must* be injected and instantiated first. - * This can often be achieved by registering ServiceAnnouncer.class with the LifecycleModule. + * Binds {@link DruidNodeAnnouncer}, {@link DruidNodeDiscoveryProvider}, and the coordinator/overlord + * {@link DruidLeaderSelector}s to their curator-backed implementations. */ public class DiscoveryModule implements Module { - private static final String NAME = "DiscoveryModule:internal"; - private static final String INTERNAL_DISCOVERY_PROP = "druid.discovery.type"; private static final String CURATOR_KEY = "curator"; - private boolean isZkEnabled = true; - - @Inject - public void configure(Properties properties) - { - isZkEnabled = ZkEnablementConfig.isEnabled(properties); - } - - /** - * Requests that the un-annotated DruidNode instance be injected and published as part of the lifecycle. - * - * That is, this module will announce the DruidNode instance returned by - * injector.getInstance(Key.get(DruidNode.class)) automatically. - * Announcement will happen in the ANNOUNCEMENTS stage of the Lifecycle - * - * @param binder the Binder to register with - */ - public static void registerDefault(Binder binder) - { - registerKey(binder, Key.get(new TypeLiteral<>() {})); - } - - /** - * Requests that the annotated DruidNode instance be injected and published as part of the lifecycle. - * - * That is, this module will announce the DruidNode instance returned by - * injector.getInstance(Key.get(DruidNode.class, annotation)) automatically. - * Announcement will happen in the ANNOUNCEMENTS stage of the Lifecycle - * - * @param annotation The annotation instance to use in finding the DruidNode instance, usually a Named annotation - */ - public static void register(Binder binder, Annotation annotation) - { - registerKey(binder, Key.get(new TypeLiteral<>() {}, annotation)); - } - - /** - * Requests that the annotated DruidNode instance be injected and published as part of the lifecycle. - * - * That is, this module will announce the DruidNode instance returned by - * injector.getInstance(Key.get(DruidNode.class, annotation)) automatically. - * Announcement will happen in the ANNOUNCEMENTS stage of the Lifecycle - * - * @param binder the Binder to register with - * @param annotation The annotation class to use in finding the DruidNode instance - */ - public static void register(Binder binder, Class annotation) - { - registerKey(binder, Key.get(new TypeLiteral<>() {}, annotation)); - } - - /** - * Requests that the keyed DruidNode instance be injected and published as part of the lifecycle. - * - * That is, this module will announce the DruidNode instance returned by - * injector.getInstance(Key.get(DruidNode.class, annotation)) automatically. - * Announcement will happen in the ANNOUNCEMENTS stage of the Lifecycle - * - * @param binder the Binder to register with - * @param key The key to use in finding the DruidNode instance - */ - public static void registerKey(Binder binder, Key key) - { - DruidBinders.discoveryAnnouncementBinder(binder).addBinding().toInstance(new KeyHolder<>(key)); - LifecycleModule.register(binder, ServiceAnnouncer.class); - } - @Override public void configure(Binder binder) { - JsonConfigProvider.bind(binder, "druid.discovery.curator", CuratorDiscoveryConfig.class); - - binder.bind(CuratorServiceAnnouncer.class).in(LazySingleton.class); - binder.bind(ServiceAnnouncementState.class).in(LazySingleton.class); - // Build the binder so that it will at a minimum inject an empty set. - DruidBinders.discoveryAnnouncementBinder(binder); - - if (isZkEnabled) { - binder.bind(ServiceAnnouncer.class) - .to(Key.get(CuratorServiceAnnouncer.class, Names.named(NAME))) - .in(LazySingleton.class); - } else { - binder.bind(Key.get(ServiceAnnouncer.Noop.class, Names.named(NAME))).toInstance(new ServiceAnnouncer.Noop()); - binder.bind(ServiceAnnouncer.class) - .to(Key.get(ServiceAnnouncer.Noop.class, Names.named(NAME))) - .in(LazySingleton.class); - } - - // internal discovery bindings. PolyBind.createChoiceWithDefault(binder, INTERNAL_DISCOVERY_PROP, Key.get(DruidNodeAnnouncer.class), CURATOR_KEY); PolyBind.createChoiceWithDefault( @@ -233,318 +106,6 @@ public void configure(Binder binder) .in(LazySingleton.class); } - @Provides - @LazySingleton - @Named(NAME) - public CuratorServiceAnnouncer getServiceAnnouncer( - final CuratorServiceAnnouncer announcer, - final Injector injector, - final Set> nodesToAnnounce, - final Lifecycle lifecycle - ) throws Exception - { - lifecycle.addMaybeStartHandler( - new Lifecycle.Handler() - { - private volatile List nodes = null; - - @Override - public void start() - { - if (nodes == null) { - nodes = new ArrayList<>(); - for (KeyHolder holder : nodesToAnnounce) { - nodes.add(injector.getInstance(holder.getKey())); - } - } - - for (DruidNode node : nodes) { - announcer.announce(node); - } - } - - @Override - public void stop() - { - if (nodes != null) { - for (DruidNode node : nodes) { - announcer.unannounce(node); - } - } - } - }, - Lifecycle.Stage.ANNOUNCEMENTS - ); - - return announcer; - } - - @Provides - @LazySingleton - public ServiceDiscovery getServiceDiscovery( - CuratorFramework curator, - CuratorDiscoveryConfig config, - Lifecycle lifecycle - ) throws Exception - { - if (!config.useDiscovery()) { - return new NoopServiceDiscovery<>(); - } - - final ServiceDiscovery serviceDiscovery = - ServiceDiscoveryBuilder.builder(Void.class) - .basePath(config.getPath()) - .client(curator) - .build(); - - lifecycle.addMaybeStartHandler( - new Lifecycle.Handler() - { - @Override - public void start() throws Exception - { - serviceDiscovery.start(); - } - - @Override - public void stop() - { - try { - serviceDiscovery.close(); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - } - ); - - return serviceDiscovery; - } - - @Provides - @LazySingleton - public ServerDiscoveryFactory getServerDiscoveryFactory( - ServiceDiscovery serviceDiscovery - ) - { - return new ServerDiscoveryFactory(serviceDiscovery); - } - - private static class NoopServiceDiscovery implements ServiceDiscovery - { - @Override - public void start() - { - - } - - @Override - public void registerService(ServiceInstance service) - { - - } - - @Override - public void updateService(ServiceInstance service) - { - - } - - @Override - public void unregisterService(ServiceInstance service) - { - - } - - @Override - public ServiceCacheBuilder serviceCacheBuilder() - { - return new NoopServiceCacheBuilder<>(); - } - - @Override - public Collection queryForNames() - { - return ImmutableList.of(); - } - - @Override - public Collection> queryForInstances(String name) - { - return ImmutableList.of(); - } - - @Override - public ServiceInstance queryForInstance(String name, String id) - { - return null; - } - - @Override - public ServiceProviderBuilder serviceProviderBuilder() - { - return new NoopServiceProviderBuilder<>(); - } - - @Override - public void close() - { - - } - } - - private static class NoopServiceCacheBuilder implements ServiceCacheBuilder - { - @Override - public ServiceCache build() - { - return new NoopServiceCache<>(); - } - - @Override - public ServiceCacheBuilder name(String name) - { - return this; - } - - @Override - public ServiceCacheBuilder threadFactory(ThreadFactory threadFactory) - { - return this; - } - - @Override - public ServiceCacheBuilder executorService(ExecutorService executorService) - { - return this; - } - - private static class NoopServiceCache implements ServiceCache - { - @Override - public List> getInstances() - { - return ImmutableList.of(); - } - - @Override - public void start() - { - // nothing - } - - @Override - public CountDownLatch startImmediate() - { - return null; - } - - @Override - public void close() - { - // nothing - } - - @Override - public void addListener(ServiceCacheListener listener) - { - // nothing - } - - @Override - public void addListener(ServiceCacheListener listener, Executor executor) - { - // nothing - } - - @Override - public void removeListener(ServiceCacheListener listener) - { - // nothing - } - } - } - - private static class NoopServiceProviderBuilder implements ServiceProviderBuilder - { - @Override - public ServiceProvider build() - { - return new NoopServiceProvider<>(); - } - - @Override - public ServiceProviderBuilder serviceName(String serviceName) - { - return this; - } - - @Override - public ServiceProviderBuilder providerStrategy(ProviderStrategy providerStrategy) - { - return this; - } - - @Override - public ServiceProviderBuilder threadFactory(ThreadFactory threadFactory) - { - return this; - } - - @Override - public ServiceProviderBuilder downInstancePolicy(DownInstancePolicy downInstancePolicy) - { - return this; - } - - @Override - @SuppressForbidden(reason = "org.apache.curator.shaded.com.google.common.base.Predicate") - public ServiceProviderBuilder additionalFilter(InstanceFilter tInstanceFilter) - { - return this; - } - - @Override - public ServiceProviderBuilder executorService(ExecutorService executorService) - { - return this; - } - } - - private static class NoopServiceProvider implements ServiceProvider - { - @Override - public void start() - { - // nothing - } - - @Override - public ServiceInstance getInstance() - { - return null; - } - - @Override - public Collection> getAllInstances() - { - return Collections.emptyList(); - } - - @Override - public void noteError(ServiceInstance tServiceInstance) - { - // nothing - } - - @Override - public void close() - { - // nothing - } - } - private static class DruidLeaderSelectorProvider implements Provider { @Inject diff --git a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoveryFactory.java b/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoveryFactory.java deleted file mode 100644 index ca3cba0132ca..000000000000 --- a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoveryFactory.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.curator.discovery; - -import com.google.inject.Inject; -import org.apache.curator.x.discovery.ServiceDiscovery; -import org.apache.curator.x.discovery.ServiceInstance; -import org.apache.curator.x.discovery.ServiceProvider; - -import java.util.Collection; - -/** - * Use {@link org.apache.druid.discovery.DruidNodeDiscovery} for discovery. - */ -@Deprecated -public class ServerDiscoveryFactory -{ - private final ServiceDiscovery serviceDiscovery; - - @Inject - public ServerDiscoveryFactory( - ServiceDiscovery serviceDiscovery - ) - { - this.serviceDiscovery = serviceDiscovery; - } - - public ServerDiscoverySelector createSelector(String serviceName) - { - if (serviceName == null) { - return new ServerDiscoverySelector(new NoopServiceProvider(), serviceName); - } - - final ServiceProvider serviceProvider = serviceDiscovery - .serviceProviderBuilder() - .serviceName(CuratorServiceUtils.makeCanonicalServiceName(serviceName)) - .build(); - return new ServerDiscoverySelector(serviceProvider, serviceName); - } - - private static class NoopServiceProvider implements ServiceProvider - { - @Override - public void start() - { - // do nothing - } - - @Override - public ServiceInstance getInstance() - { - return null; - } - - @Override - public Collection> getAllInstances() - { - return null; - } - - @Override - public void noteError(ServiceInstance tServiceInstance) - { - // do nothing - } - - @Override - public void close() - { - // do nothing - } - } - -} diff --git a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoverySelector.java b/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoverySelector.java deleted file mode 100644 index 84f4fe81ce0f..000000000000 --- a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoverySelector.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.curator.discovery; - -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.Collections2; -import com.google.common.net.HostAndPort; -import org.apache.curator.x.discovery.ServiceInstance; -import org.apache.curator.x.discovery.ServiceProvider; -import org.apache.druid.client.selector.DiscoverySelector; -import org.apache.druid.client.selector.Server; -import org.apache.druid.java.util.common.lifecycle.LifecycleStart; -import org.apache.druid.java.util.common.lifecycle.LifecycleStop; -import org.apache.druid.java.util.common.logger.Logger; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; - -/** - * Use {@link org.apache.druid.discovery.DruidNodeDiscovery} for discovery. - */ -@Deprecated -public class ServerDiscoverySelector implements DiscoverySelector -{ - private static final Logger log = new Logger(ServerDiscoverySelector.class); - - private final ServiceProvider serviceProvider; - private final String name; - - public ServerDiscoverySelector(ServiceProvider serviceProvider, String name) - { - this.serviceProvider = serviceProvider; - this.name = name; - } - - private static final Function TO_SERVER = new Function<>() - { - @Override - public Server apply(final ServiceInstance instance) - { - Preconditions.checkState( - instance.getPort() >= 0 || (instance.getSslPort() != null && instance.getSslPort() >= 0), - "Both port and sslPort not set" - ); - final int port; - final String scheme; - if (instance.getSslPort() == null) { - port = instance.getPort(); - scheme = "http"; - } else { - port = instance.getSslPort() >= 0 ? instance.getSslPort() : instance.getPort(); - scheme = instance.getSslPort() >= 0 ? "https" : "http"; - } - return new Server() - { - @Override - public String getHost() - { - return HostAndPort.fromParts(getAddress(), getPort()).toString(); - } - - @Override - public String getAddress() - { - return instance.getAddress(); - } - - @Override - public int getPort() - { - return port; - } - - @Override - public String getScheme() - { - return scheme; - } - }; - } - }; - - @Nullable - @Override - public Server pick() - { - final ServiceInstance instance; - try { - instance = serviceProvider.getInstance(); - } - catch (Exception e) { - log.info(e, "Exception getting instance for [%s]", name); - return null; - } - - if (instance == null) { - log.error("No server instance found for [%s]", name); - return null; - } - - return TO_SERVER.apply(instance); - } - - public Collection getAll() - { - try { - return Collections2.transform(serviceProvider.getAllInstances(), TO_SERVER); - } - catch (Exception e) { - log.info(e, "Unable to get all instances"); - return Collections.emptyList(); - } - } - - @LifecycleStart - public void start() throws Exception - { - serviceProvider.start(); - } - - @LifecycleStop - public void stop() throws IOException - { - serviceProvider.close(); - } -} diff --git a/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java b/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java index e48822afab6c..2476c7d9af92 100644 --- a/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java +++ b/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java @@ -27,7 +27,6 @@ import org.apache.druid.curator.announcement.NodeAnnouncer; import org.apache.druid.curator.announcement.PathChildrenAnnouncer; import org.apache.druid.curator.announcement.ServiceAnnouncer; -import org.apache.druid.guice.annotations.DirectExecutorAnnouncer; import org.apache.druid.guice.annotations.SingleThreadedAnnouncer; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.server.coordination.BatchDataSegmentAnnouncer; @@ -57,16 +56,4 @@ public ServiceAnnouncer getAnnouncerWithSingleThreadedExecutorService(CuratorFra } } - @Provides - @DirectExecutorAnnouncer - @ManageLifecycleAnnouncements - public ServiceAnnouncer getAnnouncerWithDirectExecutorService(CuratorFramework curator, CuratorConfig config) - { - boolean usingPathChildrenCacheAnnouncer = config.getPathChildrenCacheStrategy(); - if (usingPathChildrenCacheAnnouncer) { - return new PathChildrenAnnouncer(curator, Execs.directExecutor()); - } else { - return new NodeAnnouncer(curator, Execs.directExecutor()); - } - } } diff --git a/server/src/main/java/org/apache/druid/guice/DruidBinders.java b/server/src/main/java/org/apache/druid/guice/DruidBinders.java index 86424af74581..5c969d4ddf6b 100644 --- a/server/src/main/java/org/apache/druid/guice/DruidBinders.java +++ b/server/src/main/java/org/apache/druid/guice/DruidBinders.java @@ -32,7 +32,6 @@ import org.apache.druid.query.QueryToolChest; import org.apache.druid.segment.SegmentWrangler; import org.apache.druid.segment.join.JoinableFactory; -import org.apache.druid.server.DruidNode; import java.lang.annotation.Annotation; import java.util.Set; @@ -134,11 +133,6 @@ public QueryBinder naiveBinding( } } - public static Multibinder> discoveryAnnouncementBinder(Binder binder) - { - return Multibinder.newSetBinder(binder, new TypeLiteral<>() {}); - } - public static Multibinder> metricMonitorBinder(Binder binder) { return Multibinder.newSetBinder(binder, new TypeLiteral<>() {}); diff --git a/server/src/main/java/org/apache/druid/guice/LocalDataStorageDruidModule.java b/server/src/main/java/org/apache/druid/guice/LocalDataStorageDruidModule.java index 4cdd28017acd..8f1d44c3fe38 100644 --- a/server/src/main/java/org/apache/druid/guice/LocalDataStorageDruidModule.java +++ b/server/src/main/java/org/apache/druid/guice/LocalDataStorageDruidModule.java @@ -34,8 +34,6 @@ import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig; import org.apache.druid.segment.loading.LocalFileTimestampVersionFinder; import org.apache.druid.segment.loading.LocalLoadSpec; -import org.apache.druid.segment.loading.SegmentCacheManager; -import org.apache.druid.segment.loading.SegmentLocalCacheManager; import java.util.List; @@ -48,8 +46,6 @@ public class LocalDataStorageDruidModule implements DruidModule @Override public void configure(Binder binder) { - binder.bind(SegmentCacheManager.class).to(SegmentLocalCacheManager.class).in(LazySingleton.class); - bindDeepStorageLocal(binder); PolyBind.createChoice( diff --git a/server/src/main/java/org/apache/druid/guice/StorageNodeModule.java b/server/src/main/java/org/apache/druid/guice/StorageNodeModule.java index 979dce5c3a6e..09b953204a43 100644 --- a/server/src/main/java/org/apache/druid/guice/StorageNodeModule.java +++ b/server/src/main/java/org/apache/druid/guice/StorageNodeModule.java @@ -33,9 +33,14 @@ import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.segment.DefaultColumnFormatConfig; import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.loading.SegmentCacheManager; import org.apache.druid.segment.loading.SegmentLoaderConfig; +import org.apache.druid.segment.loading.SegmentLocalCacheManager; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationSelectorStrategy; +import org.apache.druid.segment.loading.external.StorageLocationVirtualStorageManager; +import org.apache.druid.segment.loading.external.VirtualStorageManager; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.DruidServerMetadata; import org.apache.druid.server.coordination.ServerType; @@ -62,6 +67,8 @@ public void configure(Binder binder) bindLocationSelectorStrategy(binder); binder.bind(ServerTypeConfig.class).toProvider(Providers.of(null)); binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class).in(LazySingleton.class); + binder.bind(SegmentCacheManager.class).to(SegmentLocalCacheManager.class).in(LazySingleton.class); + binder.bind(VirtualStorageManager.class).to(StorageLocationVirtualStorageManager.class).in(LazySingleton.class); MetricsModule.register(binder, StorageMonitor.class); } @@ -120,6 +127,13 @@ public DataNodeService getDataNodeService( ); } + @Provides + @ManageLifecycle + public StorageLoadingThreadPool getStorageLoadingThreadPool(SegmentLoaderConfig config) + { + return StorageLoadingThreadPool.createFromConfig(config); + } + @Provides @LazySingleton @Named(IS_SEGMENT_CACHE_CONFIGURED) diff --git a/server/src/main/java/org/apache/druid/metadata/input/SqlEntity.java b/server/src/main/java/org/apache/druid/metadata/input/SqlEntity.java index dcf315b81f87..e77e5e2fbcf4 100644 --- a/server/src/main/java/org/apache/druid/metadata/input/SqlEntity.java +++ b/server/src/main/java/org/apache/druid/metadata/input/SqlEntity.java @@ -84,7 +84,7 @@ public URI getUri() } @Override - public InputStream open() + public InputStream openRaw() { throw new UnsupportedOperationException("Please use fetch() instead"); } diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java b/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java index 81fccf19f131..2b1ad6a555a7 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java @@ -114,6 +114,12 @@ public ListenableFuture> terminateSupervisor(String supervis throw new UnsupportedOperationException(); } + @Override + public ListenableFuture> resetToLatestAndBackfill(String supervisorId) + { + throw new UnsupportedOperationException(); + } + @Override public ListenableFuture> supervisorStatuses() { diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java index c4d348997779..baf7e4297c9d 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java @@ -197,6 +197,15 @@ ListenableFuture> taskStatuses( */ ListenableFuture> terminateSupervisor(String supervisorId); + /** + * Resets a supervisor to the latest stream offsets and starts a bounded backfill supervisor. + *

+ * API: {@code POST /druid/indexer/v1/supervisor//resetToLatestAndBackfill} + * + * @return Map containing "id" and "backfillSupervisorId" + */ + ListenableFuture> resetToLatestAndBackfill(String supervisorId); + /** * Returns all current supervisor statuses. */ diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java index 0499a62f090a..3657d8b83a6f 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java @@ -265,6 +265,23 @@ public ListenableFuture> terminateSupervisor(String supervis ); } + @Override + public ListenableFuture> resetToLatestAndBackfill(String supervisorId) + { + final String path = StringUtils.format( + "/druid/indexer/v1/supervisor/%s/resetToLatestAndBackfill", + StringUtils.urlEncode(supervisorId) + ); + + return FutureUtils.transform( + client.asyncRequest( + new RequestBuilder(HttpMethod.POST, path), + new BytesFullResponseHandler() + ), + holder -> JacksonUtils.readValue(jsonMapper, holder.getContent(), new TypeReference<>() {}) + ); + } + @Override public ListenableFuture> supervisorStatuses() { diff --git a/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntry.java b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntry.java new file mode 100644 index 000000000000..ea7312cd26f6 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntry.java @@ -0,0 +1,630 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.google.common.util.concurrent.SettableFuture; +import com.google.errorprone.annotations.concurrent.GuardedBy; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.segment.ReferenceCountingCloseableObject; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.segment.file.SegmentFileContainerMetadata; +import org.apache.druid.segment.file.SegmentFileMetadata; +import org.apache.druid.timeline.SegmentId; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Cache entry for a single named bundle within a partial-loaded V10 segment. A bundle is a group of containers + * declared at write time via {@link SegmentFileBuilder#startFileBundle}; the cache layer reads each container's + * {@link SegmentFileContainerMetadata#getBundle} field and treats every container in the named bundle as one + * mount/evict unit. Containers written without an explicit {@code startFileBundle} call (and containers from older + * segments that did not carry a bundle name) default to {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}, the implicit + * root bundle that sits above any named ones. A bundle may also span multiple physical files: when the writer + * propagates {@code startFileBundle} to attached external segment files, the cache layer transparently includes their + * matching containers in the same bundle. + *

+ * Mounting a bundle entry sparse-allocates its containers locally via + * {@link PartialSegmentFileMapperV10#initializeContainer}; unmounting evicts them (unmap + delete + clear bitmap) via + * {@link PartialSegmentFileMapperV10#evictContainer}. + *

+ * Dependency holds + references. A bundle entry holds two layers of protection on its metadata cache entry + * plus every transitive parent bundle entry passed in at construction time via {@code parentEntryIds}. The first + * layer is a {@link StorageLocation.ReservationHold} acquired via + * {@link StorageLocation#addWeakReservationHoldIfExists}, which prevents evicting weak dependencies while this bundle + * is mounted (no-op for statically reserved dependencies). The second layer is a reference acquired via + * {@link PartialSegmentMetadataCacheEntry#acquireReference} / {@link #acquireReference} on each dependency, which + * defers each dependency's actual unmap-and-delete work until this bundle's own {@link #unmount} runs; this is the + * protection that matters for statically reserved dependencies where the cache hold is no-op. Both are acquired + * during {@link #mount} and released during {@link #unmount}; if a parent is missing or cannot be acquired, mount + * fails and any holds/references already taken are released. + *

+ * Reference-counted deferred cleanup of this bundle. {@link #unmount()} doesn't necessarily release resources + * synchronously. While any references acquired via {@link #acquireReference()} are outstanding (e.g. an in-flight + * cursor reading this bundle's columns), the actual evict-containers-and-release-dependencies work is deferred until + * the last reference releases. The same instance can be re-mounted after a previous cleanup completes; a fresh + * internal Phaser is installed on the next successful mount. + *

+ * Mount-time dedup. Concurrent {@link #mount} calls are deduplicated via a {@link AtomicReference} of + * {@link SettableFuture}; one thread does the work, the rest wait on the same future. On failure the gate is + * cleared so a subsequent caller gets a fresh attempt; on success the gate stays set until {@link #unmount}. + */ +public class PartialSegmentBundleCacheEntry implements CacheEntry +{ + private static final EmittingLogger LOG = new EmittingLogger(PartialSegmentBundleCacheEntry.class); + + /** + * Build a bundle entry given an already-mounted metadata entry and the bundle's name (as declared at write time via + * {@link SegmentFileBuilder#startFileBundle}, or implicitly {@link SegmentFileBuilder#ROOT_BUNDLE_NAME} for + * containers written without an explicit call). Walks the main file's containers plus each attached external + * file's containers, picking every container whose {@link SegmentFileContainerMetadata#getBundle bundle} equals + * {@code bundleName}. + */ + public static PartialSegmentBundleCacheEntry forBundle( + PartialSegmentMetadataCacheEntry metadataEntry, + String bundleName, + List parentEntryIds + ) + { + final PartialSegmentFileMapperV10 fileMapper = metadataEntry.getFileMapper(); + if (fileMapper == null) { + throw DruidException.defensive( + "Cannot create bundle entry for [%s/%s]: metadata entry is not mounted", + metadataEntry.getSegmentId(), + bundleName + ); + } + + final List refs = findContainersForBundle(fileMapper, bundleName); + if (refs.isEmpty()) { + throw DruidException.defensive( + "Bundle[%s] has no containers in segment[%s]", + bundleName, + metadataEntry.getSegmentId() + ); + } + + long size = 0; + for (BundleContainerRef ref : refs) { + size += fileMapper.mapperForContainer(ref.externalFilename()) + .getSegmentFileMetadata() + .getContainers() + .get(ref.containerIndex()) + .getSize(); + } + + return new PartialSegmentBundleCacheEntry( + metadataEntry.getSegmentId(), + bundleName, + refs, + size, + metadataEntry, + List.copyOf(parentEntryIds) + ); + } + + /** + * Find every {@link BundleContainerRef} that the named bundle owns across the main file and each external file: + * any container whose {@link SegmentFileContainerMetadata#getBundle bundle} equals {@code bundleName}. Shared by + * {@link #forBundle} and the bootstrap path so both observe the same definition of bundle membership. + */ + public static List findContainersForBundle( + PartialSegmentFileMapperV10 fileMapper, + String bundleName + ) + { + final List refs = new ArrayList<>(); + collectMatchingContainers(fileMapper.getSegmentFileMetadata(), bundleName, null, refs); + for (String externalFilename : fileMapper.getExternalFilenames()) { + collectMatchingContainers( + fileMapper.getExternalMapper(externalFilename).getSegmentFileMetadata(), + bundleName, + externalFilename, + refs + ); + } + return List.copyOf(refs); + } + + private final PartialSegmentBundleCacheEntryIdentifier id; + private final SegmentId segmentId; + private final String bundleName; + private final List containerRefs; + private final long size; + private final PartialSegmentMetadataCacheEntry metadataEntry; + private final List parentEntryIds; + + private final ReentrantLock entryLock = new ReentrantLock(); + private final AtomicReference> mountFuture = new AtomicReference<>(); + + @GuardedBy("entryLock") + @Nullable + private StorageLocation location; + @GuardedBy("entryLock") + private final List> holds = new ArrayList<>(); + // references this bundle holds on its metadata entry and each transitive parent bundle for the duration of its + // mounted lifetime. Released in doActualUnmount. Distinct from `holds` (cache-eviction protection): these references + // gate deferred cleanup on the dependencies, so an in-flight query that holds a reference on this bundle keeps + // metadata + parents safe from drop-time unmap even if the dependency is statically reserved. + @GuardedBy("entryLock") + private final List dependencyReferences = new ArrayList<>(); + @GuardedBy("entryLock") + private boolean mounted; + + // Reference-counted gate over the actual cleanup work (evict containers, release parent holds, unregister from + // metadata). Set on successful mount; unmount() closes the wrapper which defers running cleanup until all outstanding + // references (acquired via acquireReference()) are released. Re-created on mount-after-cleanup-completion. Null when + // the entry has never been mounted. + private final AtomicReference> references = new AtomicReference<>(); + + PartialSegmentBundleCacheEntry( + SegmentId segmentId, + String bundleName, + List containerRefs, + long size, + PartialSegmentMetadataCacheEntry metadataEntry, + List parentEntryIds + ) + { + this.segmentId = segmentId; + this.bundleName = bundleName; + this.id = new PartialSegmentBundleCacheEntryIdentifier(segmentId, bundleName); + this.containerRefs = containerRefs; + this.size = size; + this.metadataEntry = metadataEntry; + this.parentEntryIds = parentEntryIds; + } + + @Override + public PartialSegmentBundleCacheEntryIdentifier getId() + { + return id; + } + + @Override + public long getSize() + { + return size; + } + + @Override + public boolean isMounted() + { + entryLock.lock(); + try { + return mounted; + } + finally { + entryLock.unlock(); + } + } + + public SegmentId getSegmentId() + { + return segmentId; + } + + public String getBundleName() + { + return bundleName; + } + + /** + * The list of {@link BundleContainerRef} this bundle owns, across the main file and any external files. + */ + public List getContainerRefs() + { + return containerRefs; + } + + public List getParentEntryIds() + { + return parentEntryIds; + } + + /** + * Mount this bundle entry: acquire holds on the metadata entry and all transitive parent bundle entries, then + * sparse-allocate every container this bundle owns. Concurrent calls are deduplicated via the {@link #mountFuture} + * CAS gate, only one thread runs the work; the rest wait on the same future. + *

+ * On failure, any holds taken are released and the gate is cleared so a subsequent retry gets a fresh attempt. + */ + @Override + public void mount(StorageLocation mountLocation) throws IOException + { + while (true) { + final SettableFuture existing = mountFuture.get(); + if (existing != null) { + awaitMount(existing); + // The completed mount may have been for a different location. Verify the requested location matches. + entryLock.lock(); + try { + if (location != null && !location.equals(mountLocation)) { + throw DruidException.defensive( + "Already mounted[%s] in location[%s] which differs from requested[%s]", + id, + location.getPath(), + mountLocation.getPath() + ); + } + } + finally { + entryLock.unlock(); + } + verifyStillReservedOrRollback(mountLocation); + return; + } + final SettableFuture ours = SettableFuture.create(); + if (!mountFuture.compareAndSet(null, ours)) { + continue; + } + try { + doMount(mountLocation); + ours.set(null); + } + catch (Throwable t) { + // clear the gate so the next caller gets a fresh attempt + mountFuture.set(null); + ours.setException(t); + switch (t) { + case IOException ioException -> throw ioException; + case RuntimeException runtimeException -> throw runtimeException; + case Error error -> throw error; + default -> throw DruidException.defensive(t, "Failed to mount bundle entry[%s]", id); + } + } + verifyStillReservedOrRollback(mountLocation); + return; + } + } + + /** + * Post-mount safety check: confirm the entry is still registered with the location, otherwise roll back. Handles + * the race where a concurrent canceler releases the hold that was keeping this weak entry in {@code + * weakCacheEntries} and the cache evicts it while mount() is still working. Without this check, mount would commit + * local state (sparse-allocated containers on disk, parent holds + references) for an entry the cache manager no + * longer knows about, leaking those resources. Mirrors the same defensive check in {@code SegmentCacheEntry.mount}. + * Returns normally if rollback fires; callers detect via {@link #isMounted}. + */ + private void verifyStillReservedOrRollback(StorageLocation mountLocation) + { + if (!mountLocation.isReserved(id) && !mountLocation.isWeakReserved(id)) { + LOG.debug( + "Aborting mount of bundle[%s] in location[%s]; entry was evicted while mounting", + id, + mountLocation.getPath() + ); + unmount(); + } + } + + private void doMount(StorageLocation mountLocation) throws IOException + { + // Pre-check inside entryLock; after this we release entryLock so the hold-acquisition + container-init work below + // doesn't nest location.readLock under entryLock; same lock-order rule as the metadata entry's mount, which is + // the inverse of StorageLocation.release's writeLock -> entryLock. The CAS+SettableFuture gate in mount() + // guarantees only one thread runs this method at a time per entry, so we don't need entryLock to keep two + // concurrent mounters out. + entryLock.lock(); + try { + if (mounted) { + if (location != null && !location.equals(mountLocation)) { + throw DruidException.defensive( + "Already mounted[%s] in location[%s] which differs from requested[%s]", + id, + location.getPath(), + mountLocation.getPath() + ); + } + return; + } + } + finally { + entryLock.unlock(); + } + + final PartialSegmentFileMapperV10 fileMapper = metadataEntry.getFileMapper(); + if (fileMapper == null) { + throw DruidException.defensive( + "Cannot mount bundle[%s]: metadata entry[%s] is not mounted", + id, + metadataEntry.getId() + ); + } + + final List> acquired = new ArrayList<>(); + final List acquiredRefs = new ArrayList<>(); + boolean registered = false; + boolean committed = false; + try { + // 1. Cache holds on metadata + parents (prevents cache eviction of weak dependencies) + final StorageLocation.ReservationHold metadataHold = + mountLocation.addWeakReservationHoldIfExists(metadataEntry.getId()); + if (metadataHold == null) { + throw DruidException.defensive( + "Cannot acquire metadata hold for [%s]; metadata entry not registered with location[%s]", + metadataEntry.getId(), + mountLocation.getPath() + ); + } + acquired.add(metadataHold); + + for (PartialSegmentBundleCacheEntryIdentifier parentId : parentEntryIds) { + final StorageLocation.ReservationHold parentHold = + mountLocation.addWeakReservationHoldIfExists(parentId); + if (parentHold == null) { + throw DruidException.defensive( + "Cannot acquire parent hold for [%s]; parent entry not registered with location[%s]", + parentId, + mountLocation.getPath() + ); + } + acquired.add(parentHold); + } + + // 2. References on metadata + parents (gates their deferred cleanup on this bundle's lifetime; matters for + // statically-reserved dependencies where a drop fires `release()` directly without going through cache) + acquiredRefs.add(metadataEntry.acquireReference()); + for (PartialSegmentBundleCacheEntryIdentifier parentId : parentEntryIds) { + final CacheEntry parentEntry = mountLocation.getCacheEntry(parentId); + if (!(parentEntry instanceof PartialSegmentBundleCacheEntry)) { + throw DruidException.defensive( + "Parent entry[%s] of bundle[%s] is missing or not a bundle entry; cannot acquire reference", + parentId, + id + ); + } + acquiredRefs.add(((PartialSegmentBundleCacheEntry) parentEntry).acquireReference()); + } + + // 3. Sparse-allocate this bundle's containers, routing to the main mapper or the appropriate external mapper + // depending on each ref's externalFilename. + for (BundleContainerRef ref : containerRefs) { + fileMapper.mapperForContainer(ref.externalFilename()).initializeContainer(ref.containerIndex()); + } + + // Register with metadata BEFORE the state commit. If this throws (it shouldn't, but just in case), no state has + // been committed yet and the catch path releases the holds without leaving an orphaned-but-mounted bundle + metadataEntry.registerBundle(this); + registered = true; + + // Commit state under entryLock. Hold and reference ownership transfers from local lists to fields here. Also + // install (or re-install, after a prior mount/unmount cycle terminated the previous Phaser) the reference- + // counted gate over cleanup; future acquireReference() and unmount() calls operate on this instance. + entryLock.lock(); + try { + location = mountLocation; + holds.addAll(acquired); + dependencyReferences.addAll(acquiredRefs); + mounted = true; + references.set(new ReferenceCountingCloseableObject(this::doActualUnmount) {}); + } + finally { + entryLock.unlock(); + } + committed = true; + } + finally { + if (!committed) { + // Evict any containers that were successfully initialized before the failure. Mirrors the eager + // SegmentCacheEntry behavior: retry from a clean slate is simpler than reasoning about partial on-disk state. + // evictContainer is a no-op for containers that were never initialized, so we can iterate the full set + // without tracking how far the initialization loop got. + for (BundleContainerRef ref : containerRefs) { + try { + fileMapper.mapperForContainer(ref.externalFilename()).evictContainer(ref.containerIndex()); + } + catch (Throwable t) { + LOG.warn(t, "Failed to evict container[%s/%d] for bundle[%s] during mount rollback", + ref.externalFilename(), ref.containerIndex(), id); + } + } + if (registered) { + try { + metadataEntry.unregisterBundle(this); + } + catch (Throwable t) { + LOG.warn(t, "Failed to unregister bundle[%s] during mount rollback", id); + } + } + for (Closeable ref : acquiredRefs) { + try { + ref.close(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to release dependency reference during mount rollback for bundle[%s]", id); + } + } + for (StorageLocation.ReservationHold hold : acquired) { + try { + hold.close(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to release hold[%s] during mount rollback", hold); + } + } + } + } + } + + /** + * Triggers cleanup of this bundle. If any references acquired via {@link #acquireReference()} are still outstanding, + * the actual evict/release work is deferred until the last reference releases; in that case this method returns + * immediately and {@link #doActualUnmount} will fire later on the thread that closes the last reference. With no + * outstanding references, cleanup runs synchronously on the caller's thread. + */ + @Override + public void unmount() + { + final ReferenceCountingCloseableObject current = references.get(); + if (current != null && !current.isClosed()) { + current.close(); + } + } + + /** + * Acquire a reference that keeps this bundle's resources (container files, parent bundle holds) alive across an + * intervening {@link #unmount} call. The returned {@link Closeable} must be closed when the caller is done; at + * that point if {@code unmount()} has already been called and no other references remain, the deferred cleanup + * fires on the closing thread. + * + * @throws DruidException if the bundle has never been mounted, or has already been cleaned up + */ + public Closeable acquireReference() + { + final ReferenceCountingCloseableObject current = references.get(); + if (current == null) { + throw DruidException.defensive( + "Cannot acquire reference on bundle[%s] before it has been mounted", + id + ); + } + return current.incrementReferenceAndDecrementOnceCloseable() + .orElseThrow(() -> DruidException.defensive( + "Cannot acquire reference on bundle[%s]; already being unmounted", + id + )); + } + + /** + * The actual unmount work, invoked by the reference-counted gate's {@code onAdvance} once every outstanding + * reference (plus the wrapper's own initial party) has been released. Evicts owned containers, releases parent + * holds + dependency references, unregisters from the metadata entry, and clears the mount-dedup gate so a fresh + * mount can run. + *

+ * Dependency reference + cache hold releases happen OUTSIDE entryLock so that any cascading parent cleanup (a + * parent whose last reference is this bundle's, draining the parent's Phaser) doesn't run under our lock and keeps + * the entry-lock-then-location-lock convention intact even when the cascade re-enters StorageLocation. + */ + private void doActualUnmount() + { + final List refsToRelease; + final List> holdsToRelease; + entryLock.lock(); + try { + if (!mounted) { + return; + } + final PartialSegmentFileMapperV10 fileMapper = metadataEntry.getFileMapper(); + // file mapper may be null if metadata was already unmounted (out-of-order shutdown); evictContainer would NPE + if (fileMapper != null) { + for (BundleContainerRef ref : containerRefs) { + try { + fileMapper.mapperForContainer(ref.externalFilename()).evictContainer(ref.containerIndex()); + } + catch (Throwable t) { + LOG.warn(t, "Failed to evict container[%s/%d] for bundle[%s]", ref.externalFilename(), ref.containerIndex(), id); + } + } + } + refsToRelease = new ArrayList<>(dependencyReferences); + dependencyReferences.clear(); + holdsToRelease = new ArrayList<>(holds); + holds.clear(); + location = null; + mounted = false; + mountFuture.set(null); + } + finally { + entryLock.unlock(); + } + + // Release dependency references first so any cascading parent cleanup runs before we drop cache holds. The order + // is mostly informational since the two layers are independent, but matches the acquisition order in doMount. + for (Closeable ref : refsToRelease) { + try { + ref.close(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to release dependency reference for bundle[%s]", id); + } + } + releaseHolds(holdsToRelease); + metadataEntry.unregisterBundle(this); + } + + private static void collectMatchingContainers( + SegmentFileMetadata fileMeta, + String bundleName, + @Nullable String externalFilename, + List out + ) + { + final List containers = fileMeta.getContainers(); + for (int ci = 0; ci < containers.size(); ci++) { + if (bundleName.equals(containers.get(ci).getBundle())) { + out.add(new BundleContainerRef(externalFilename, ci)); + } + } + } + + private static void awaitMount(SettableFuture future) throws IOException + { + try { + future.get(); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while waiting for mount", e); + } + catch (ExecutionException e) { + final Throwable cause = e.getCause() == null ? e : e.getCause(); + switch (cause) { + case IOException ioException -> throw ioException; + case RuntimeException runtimeException -> throw runtimeException; + case Error error -> throw error; + default -> throw DruidException.defensive(e, "mount failed"); + } + } + } + + private static void releaseHolds(Collection> holds) + { + for (StorageLocation.ReservationHold hold : holds) { + try { + hold.close(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to release hold[%s]", hold); + } + } + } + + /** + * Reference to a single container that this bundle owns. {@code externalFilename} is {@code null} when the + * container lives in the main V10 file, or the external file's name when the container lives in an attached + * external file. {@code containerIndex} is the position within that file's + * {@link SegmentFileMetadata#getContainers()} list. A single logical bundle (one named group) can span containers + * across the main file and one or more external files when the writer propagates {@code startFileBundle} to both, + * the cache layer treats them as one mount/evict unit regardless. + */ + public record BundleContainerRef(@Nullable String externalFilename, int containerIndex) + { + } +} diff --git a/server/src/main/java/org/apache/druid/curator/discovery/ServiceAnnouncer.java b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifier.java similarity index 50% rename from server/src/main/java/org/apache/druid/curator/discovery/ServiceAnnouncer.java rename to server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifier.java index 1533422824eb..2d7a5b823c9a 100644 --- a/server/src/main/java/org/apache/druid/curator/discovery/ServiceAnnouncer.java +++ b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifier.java @@ -17,36 +17,25 @@ * under the License. */ -package org.apache.druid.curator.discovery; +package org.apache.druid.segment.loading; -import org.apache.druid.server.DruidNode; +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.timeline.SegmentId; /** - * This class is deprecated, Add service to {@link org.apache.druid.discovery.DruidNodeAnnouncer} node announcement instead. - * - * Announces our ability to serve a particular function. Multiple users may announce the same service, in which - * case they are treated as interchangeable instances of that service. + * Identifier for a {@link PartialSegmentBundleCacheEntry}; a named group of containers within a partial-loaded V10 + * segment that gets mounted and evicted as a unit. The {@code bundleName} is the group name declared at write time + * via {@link SegmentFileBuilder#startFileBundle}. + *

+ * Each partial segment is split across multiple {@link CacheEntry}, with one {@link SegmentCacheEntryIdentifier}-keyed + * metadata entry plus one of these per bundle. */ -@Deprecated -public interface ServiceAnnouncer +public record PartialSegmentBundleCacheEntryIdentifier(SegmentId segmentId, String bundleName) + implements CacheEntryIdentifier { - void announce(DruidNode node); - - void unannounce(DruidNode node); - - class Noop implements ServiceAnnouncer + @Override + public String toString() { - - @Override - public void announce(DruidNode node) - { - - } - - @Override - public void unannounce(DruidNode node) - { - - } + return segmentId + ":" + bundleName; } } diff --git a/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrap.java b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrap.java new file mode 100644 index 000000000000..7793d2437206 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrap.java @@ -0,0 +1,395 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.segment.file.SegmentFileContainerMetadata; +import org.apache.druid.segment.file.SegmentFileMetadata; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.timeline.SegmentId; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +/** + * Bootstraps partial-segment cache entries from existing on-disk state. Called by the cache manager on historical + * startup for each segment directory that contains the partial-download layout (`{targetFilename}.header` plus one + * or more `{targetFilename}.container.NNNNN` files). + *

+ * The bootstrap is read-only with respect to deep storage; it never issues a range read. The on-disk header file is + * parsed in-place by {@link PartialSegmentFileMapperV10#create} (which detects header corruption and, for that one + * case, may delete the local copy; bootstrap callers should treat that as "no restorable state" and fall back to a + * cold start). Bundle entries are registered as weak entries on the storage location, mounted (which sparse-allocates + * any container files that weren't already present and re-establishes parent holds), and returned to the caller. + *

+ * Parent-set inference is delegated to {@link PartialSegmentMetadataCacheEntry#inferParentBundles}. A bundle whose + * inferred parent isn't itself present on disk is treated as orphaned: its on-disk container files are deleted + * (via {@link PartialSegmentFileMapperV10#evictContainer}, which also clears the relevant bitmap bits) and the bundle + * is not restored. The next access through the cache manager acquire path then triggers a clean cold re-fetch, the + * same fall-back as when the cache manager finds a segment listed in the info directory but missing on disk. + */ +public final class PartialSegmentCacheBootstrap +{ + private static final EmittingLogger LOG = new EmittingLogger(PartialSegmentCacheBootstrap.class); + + /** + * Restore a single partial segment's cache entries from its local on-disk layout. + * + * @param segmentId the segment whose entries are being restored + * @param localCacheDir the per-segment directory containing the header + container files + * @param targetFilename the V10 entry-point filename + * @param externalFilenames any external segment file names that were registered as children of the entry-point + * file + * @param jsonMapper used to parse the header + * @param location the storage location these entries belong to; the metadata entry is registered as + * static and bundle entries are registered as weak + * @throws DruidException if the expected header file is missing, if a metadata or bundle reservation cannot be + * established on the location, or if the metadata entry mount produces a null file mapper + * @throws IOException propagated from {@link CacheEntry#mount} (metadata or bundle) or from on-disk header/bitmap + * I/O performed during restore + */ + public static RestoreResult restoreFromDisk( + SegmentId segmentId, + File localCacheDir, + String targetFilename, + List externalFilenames, + ObjectMapper jsonMapper, + StorageLocation location + ) throws IOException + { + final File headerFile = new File(localCacheDir, targetFilename + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX); + if (!headerFile.exists()) { + throw DruidException.defensive( + "No on-disk header for partial segment[%s] at [%s]; nothing to restore", + segmentId, + headerFile + ); + } + + // size the metadata reservation to the actual on-disk size so the location accounting is correct from the start + final long actualMetadataSize = computeOnDiskHeaderSize(localCacheDir, targetFilename, externalFilenames); + final PartialSegmentMetadataCacheEntry metadata = new PartialSegmentMetadataCacheEntry( + segmentId, + localCacheDir, + targetFilename, + externalFilenames, + BootstrapRangeReader.INSTANCE, + jsonMapper, + actualMetadataSize + ); + + if (!location.reserve(metadata)) { + throw DruidException.defensive( + "Failed to reserve metadata entry for partial segment[%s] at location[%s]", + segmentId, + location.getPath() + ); + } + + // From here, any throw must roll back: unmount any successfully-mounted bundles (releases their references on + // metadata) and then release the metadata entry from the location. Without this, a mid-bootstrap failure would + // leave the location partially reserved + mounted, which would confuse later restores and leak disk/memory. + final List mountedBundles = new ArrayList<>(); + boolean success = false; + try { + metadata.mount(location); + + // Discover bundle names across the main file and every external file, then keep only those whose owned + // container files actually exist on disk. Walks via the file mapper so the external mappers' SegmentFileMetadata + // are visited too — bundles can legitimately span the main file and one or more externals when the writer + // propagates startFileBundle across them. + final PartialSegmentFileMapperV10 fileMapper = metadata.getFileMapper(); + if (fileMapper == null) { + throw DruidException.defensive( + "Metadata entry mount produced null file mapper for segment[%s]", + segmentId + ); + } + final Set candidateBundleNames = discoverBundleNames(fileMapper); + final List presentBundleNames = filterByContainerPresence( + candidateBundleNames, + fileMapper, + localCacheDir + ); + + // Classify each present bundle as either mountable or orphaned. A bundle is orphaned when its inferred parent + // set includes a bundle that isn't itself present on disk; restoring it would only produce a degenerate state + // where column reads that resolve into the missing parent would fail at query time. Instead, delete the + // orphan's on-disk containers so the next access triggers a clean cold re-fetch from deep storage + final List mountableBundleNames = new ArrayList<>(); + final Set orphanedBundleNames = new HashSet<>(); + for (String name : presentBundleNames) { + boolean orphaned = false; + for (PartialSegmentBundleCacheEntryIdentifier parent : metadata.inferParentBundles(name)) { + if (!presentBundleNames.contains(parent.bundleName())) { + orphaned = true; + break; + } + } + if (orphaned) { + orphanedBundleNames.add(name); + } else { + mountableBundleNames.add(name); + } + } + + for (String orphanName : orphanedBundleNames) { + for (PartialSegmentBundleCacheEntry.BundleContainerRef ref : + PartialSegmentBundleCacheEntry.findContainersForBundle(fileMapper, orphanName)) { + fileMapper.mapperForContainer(ref.externalFilename()).evictContainer(ref.containerIndex()); + } + LOG.debug( + "Deleted on-disk state of orphaned bundle[%s] for segment[%s] (parent unrestorable); next access " + + "will trigger cold re-fetch", + orphanName, + segmentId + ); + } + + // mount base bundle before any dependent bundle so its hold is available when dependents acquire parent holds + mountableBundleNames.sort(Comparator.comparing(name -> !Projections.BASE_TABLE_PROJECTION_NAME.equals(name))); + + for (String bundleName : mountableBundleNames) { + // Mountable bundles have all parents present by construction (orphans were filtered out above), so the + // inferred parent set is exactly what we want, no further filtering needed. + final List parentIds = metadata.inferParentBundles(bundleName); + final PartialSegmentBundleCacheEntry bundle = PartialSegmentBundleCacheEntry.forBundle( + metadata, + bundleName, + parentIds + ); + // weak-reserve with a temporary hold so the mount call's own parent-hold acquisition can succeed; release the + // bootstrap hold immediately after, if the entry should remain alive for query-side access, the runtime hold + // chain (transitive parents from aggregates, segment-level holds from acquire APIs) keeps it pinned. + try (StorageLocation.ReservationHold bootstrapHold = + location.addWeakReservationHold(bundle.getId(), () -> bundle)) { + if (bootstrapHold == null) { + throw DruidException.defensive( + "Failed to reserve bundle entry[%s] in location[%s] during bootstrap", + bundle.getId(), + location.getPath() + ); + } + bundle.mount(location); + } + mountedBundles.add(bundle); + } + + LOG.debug( + "Restored partial segment[%s] from [%s]: metadata size[%d], bundles[%s], orphans[%s]", + segmentId, + localCacheDir, + actualMetadataSize, + mountableBundleNames, + orphanedBundleNames + ); + success = true; + return new RestoreResult(metadata, mountedBundles); + } + finally { + if (!success) { + // Roll back in reverse-dependency order: bundles first (so they release references on metadata + parents) + // then the metadata entry itself. The bundle/metadata cleanup is best-effort, log and continue rather than + // shadow the original failure. + for (PartialSegmentBundleCacheEntry bundle : mountedBundles) { + try { + bundle.unmount(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to roll back bundle[%s] during bootstrap failure for [%s]", bundle.getId(), segmentId); + } + } + try { + location.release(metadata); + } + catch (Throwable t) { + LOG.warn(t, "Failed to roll back metadata entry for partial segment[%s]", segmentId); + } + } + } + } + + /** + * Check whether a directory looks like a partial-segment cache layout for the given target filename. + */ + public static boolean isPartialSegmentLayout(File localCacheDir, String targetFilename) + { + if (localCacheDir == null || !localCacheDir.isDirectory()) { + return false; + } + final File header = new File( + localCacheDir, + targetFilename + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + ); + return header.exists(); + } + + private static long computeOnDiskHeaderSize(File localCacheDir, String targetFilename, List externalFilenames) + { + long total = sizeOf(new File( + localCacheDir, + targetFilename + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + )); + for (String external : externalFilenames) { + total += sizeOf(new File( + localCacheDir, + external + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + )); + } + if (total <= 0) { + // PartialSegmentMetadataCacheEntry requires a positive reservation; if all headers are zero-length the local + // layout is degenerate and should not be restored + throw DruidException.defensive( + "Zero-sized header files in [%s]; refusing to restore", + localCacheDir + ); + } + return total; + } + + private static long sizeOf(File f) + { + return f.exists() ? f.length() : 0; + } + + /** + * Discover the bundle names present in a segment by walking each container's + * {@link SegmentFileContainerMetadata#getBundle bundle} across the main file and every attached external file. + * The bundle field is always non-null — containers written without an explicit {@code startFileBundle} call + * (including those from older segments) default to {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}. + */ + private static Set discoverBundleNames(PartialSegmentFileMapperV10 fileMapper) + { + final Set names = new HashSet<>(); + collectBundleNames(fileMapper.getSegmentFileMetadata(), names); + for (String externalFilename : fileMapper.getExternalFilenames()) { + collectBundleNames(fileMapper.getExternalMapper(externalFilename).getSegmentFileMetadata(), names); + } + return names; + } + + private static void collectBundleNames(SegmentFileMetadata fileMeta, Set out) + { + for (SegmentFileContainerMetadata container : fileMeta.getContainers()) { + out.add(container.getBundle()); + } + } + + /** + * Keep only bundles whose every owned container file exists on disk. The on-disk path for a container is + * {@code {mapperTargetFilename}.container.{containerIndex:05d}} where {@code mapperTargetFilename} is the main + * V10 filename for refs in the main mapper, or the external filename for refs in an external mapper. + */ + private static List filterByContainerPresence( + Set candidateBundleNames, + PartialSegmentFileMapperV10 fileMapper, + File localCacheDir + ) + { + final List restorable = new ArrayList<>(); + for (String bundleName : candidateBundleNames) { + final List refs = + PartialSegmentBundleCacheEntry.findContainersForBundle(fileMapper, bundleName); + if (refs.isEmpty()) { + continue; + } + boolean allPresent = true; + for (PartialSegmentBundleCacheEntry.BundleContainerRef ref : refs) { + final String mapperFilename = fileMapper.mapperForContainer(ref.externalFilename()).getTargetFilename(); + final File cf = new File( + localCacheDir, + StringUtils.format("%s.container.%05d", mapperFilename, ref.containerIndex()) + ); + if (!cf.exists()) { + allPresent = false; + break; + } + } + if (allPresent) { + restorable.add(bundleName); + } + } + return restorable; + } + + private PartialSegmentCacheBootstrap() + { + // utility class + } + + /** + * Stub range reader used during bootstrap: the on-disk header is expected to exist and parse, so no fetch is needed. + * If for any reason {@link PartialSegmentFileMapperV10#create} decides to re-fetch (e.g. header corruption), this + * reader throws so we fail loudly rather than silently re-downloading without the operator's knowledge. + */ + private static final class BootstrapRangeReader implements SegmentRangeReader + { + static final BootstrapRangeReader INSTANCE = new BootstrapRangeReader(); + + @Override + public InputStream readRange(String filename, long offset, long length) + { + throw DruidException.defensive( + "BootstrapRangeReader was asked to fetch [%s] @[%d:%d]; bootstrap should only read from local disk", + Objects.toString(filename), + offset, + length + ); + } + } + + /** + * Hold-acquire result of a partial-segment restore: the always-static metadata entry plus the list of bundle + * entries (already mounted, registered as weak entries) discovered on disk for this segment. + */ + public static final class RestoreResult + { + private final PartialSegmentMetadataCacheEntry metadata; + private final List bundles; + + RestoreResult(PartialSegmentMetadataCacheEntry metadata, List bundles) + { + this.metadata = metadata; + this.bundles = List.copyOf(bundles); + } + + public PartialSegmentMetadataCacheEntry getMetadata() + { + return metadata; + } + + public List getBundles() + { + return bundles; + } + } +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntry.java b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntry.java new file mode 100644 index 000000000000..c301123dd38a --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntry.java @@ -0,0 +1,576 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.util.concurrent.SettableFuture; +import com.google.errorprone.annotations.concurrent.GuardedBy; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.segment.ReferenceCountingCloseableObject; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.segment.file.SegmentFileMetadata; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.timeline.SegmentId; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Cache entry for the metadata header of a V10 segment loaded via partial download. Mounting this entry range-reads + * the V10 header from deep storage, parses {@link SegmentFileMetadata}, and constructs a + * {@link PartialSegmentFileMapperV10} that can later download individual internal files on demand. + *

+ * Reservation is sized via a configurable up-front estimate at construction time, then shrunk to the actual on-disk + * header size after mount via {@link StorageLocation#adjustReservation}. Mount fails fast if the actual size exceeds + * the estimate; the operator must increase the knob to recover. + *

+ * Per-bundle cache entries created downstream of this one share the same {@link PartialSegmentFileMapperV10} + * instance via {@link #getFileMapper()}; closing the metadata entry closes the file mapper, which unmaps all + * containers and external file mappers. + *

+ * Reference-counted deferred cleanup. {@link #unmount()} does not necessarily release resources synchronously. + * Callers that need the file mapper to stay alive across an intervening drop (e.g. a query reading column data + * through {@link PartialSegmentBundleCacheEntry}, or another component that needs the parsed + * {@link SegmentFileMetadata}) acquire a reference via {@link #acquireReference()}; while any references are + * outstanding, the actual close-file-mapper work is deferred. When the last reference releases the cleanup fires on + * that thread. Bundle entries hold one such reference per active mount, so the typical pattern is: mount metadata, + * mount bundle (which acquires a reference on metadata), use the bundle, unmount bundle (releases its reference and + * triggers metadata cleanup if it was the last reference and metadata's own unmount has been called). The same + * instance can be re-mounted after a previous cleanup completes; a fresh internal Phaser is installed on the next + * successful mount. + *

+ * Deferred cleanup hook. Callers can attach a {@link Runnable} via {@link #setOnUnmount} that fires once after + * the mapper is closed in {@link #doActualUnmount}. This is the right place to schedule work that should run only when + * the entry is truly purged. + */ +public class PartialSegmentMetadataCacheEntry implements ResizableCacheEntry +{ + private static final EmittingLogger LOG = new EmittingLogger(PartialSegmentMetadataCacheEntry.class); + + private final SegmentCacheEntryIdentifier id; + private final SegmentId segmentId; + private final File localCacheDir; + private final String targetFilename; + private final List externalFilenames; + private final SegmentRangeReader rangeReader; + private final ObjectMapper jsonMapper; + private final long reservationEstimate; + + // ReentrantLock instead of synchronized to avoid pinning virtual threads pre-JEP 491 + private final ReentrantLock entryLock = new ReentrantLock(); + + // current size for accounting; starts at the estimate, shrunk to actual on-disk size after mount + @GuardedBy("entryLock") + private long currentSize; + + // null until mounted + @GuardedBy("entryLock") + @Nullable + private StorageLocation location; + @GuardedBy("entryLock") + @Nullable + private PartialSegmentFileMapperV10 fileMapper; + + // Optional deferred-cleanup hook invoked by doActualUnmount after the mapper is closed. + private final AtomicReference onUnmount = new AtomicReference<>(); + + // bundle entries that are currently mounted against this segment, registered by PartialSegmentBundleCacheEntry on + // successful mount and removed on unmount. Lets the drop path enumerate bundles for cascade-close without scanning + // the StorageLocation's entry maps. + private final Set linkedBundles = ConcurrentHashMap.newKeySet(); + + // Reference-counted gate over the actual cleanup work (close file mapper, delete header files). Set on + // successful mount; unmount() closes the wrapper which defers running cleanup until all outstanding references + // (acquired via acquireReference()) are released. Re-created on mount-after-cleanup-completion. Null when the entry + // has never been mounted. + private final AtomicReference> references = new AtomicReference<>(); + + // CAS+SettableFuture mount-dedup gate, mirroring the bundle entry's pattern. Without this, mount()'s slow range-read + // would have to hold entryLock for its full duration, blocking concurrent status reads (isMounted, getSize, ...). + // With it: one thread wins the CAS and runs doMount; the rest wait on the same future. On failure the gate is + // cleared so retries get a fresh attempt; on success the gate stays set until doActualUnmount clears it. + private final AtomicReference> mountFuture = new AtomicReference<>(); + + public PartialSegmentMetadataCacheEntry( + SegmentId segmentId, + File localCacheDir, + String targetFilename, + List externalFilenames, + SegmentRangeReader rangeReader, + ObjectMapper jsonMapper, + long reservationEstimate + ) + { + if (reservationEstimate <= 0) { + throw DruidException.defensive( + "Reservation estimate for partial metadata entry[%s] must be positive, got [%d]", + segmentId, + reservationEstimate + ); + } + this.segmentId = segmentId; + this.id = new SegmentCacheEntryIdentifier(segmentId); + this.localCacheDir = localCacheDir; + this.targetFilename = targetFilename; + this.externalFilenames = List.copyOf(externalFilenames); + this.rangeReader = rangeReader; + this.jsonMapper = jsonMapper; + this.reservationEstimate = reservationEstimate; + this.currentSize = reservationEstimate; + } + + @Override + public SegmentCacheEntryIdentifier getId() + { + return id; + } + + public SegmentId getSegmentId() + { + return segmentId; + } + + @Override + public long getSize() + { + entryLock.lock(); + try { + return currentSize; + } + finally { + entryLock.unlock(); + } + } + + @Override + public boolean isMounted() + { + entryLock.lock(); + try { + return fileMapper != null; + } + finally { + entryLock.unlock(); + } + } + + @Override + public void resizeReservation(long newSize) + { + // Called from StorageLocation.adjustReservation under the location's writeLock. Acquires entryLock here as a + // real (non-reentrant) acquisition: mount() releases entryLock BEFORE calling adjustReservation precisely so the + // overall path runs writeLock -> entryLock (matching StorageLocation.release -> unmount), avoiding the + // entryLock -> writeLock inversion that would deadlock. + entryLock.lock(); + try { + this.currentSize = newSize; + } + finally { + entryLock.unlock(); + } + } + + @Override + public void mount(StorageLocation mountLocation) throws IOException + { + while (true) { + final SettableFuture existing = mountFuture.get(); + if (existing != null) { + awaitMount(existing); + // The completed mount may have been for a different location. Verify the requested location matches. + entryLock.lock(); + try { + if (location != null && !location.equals(mountLocation)) { + throw DruidException.defensive( + "Already mounted[%s] in location[%s] which differs from requested[%s]", + id, + location.getPath(), + mountLocation.getPath() + ); + } + } + finally { + entryLock.unlock(); + } + verifyStillReservedOrRollback(mountLocation); + return; + } + final SettableFuture ours = SettableFuture.create(); + if (!mountFuture.compareAndSet(null, ours)) { + continue; + } + try { + doMount(mountLocation); + ours.set(null); + } + catch (Throwable t) { + // clear the future so the next caller gets a fresh attempt + mountFuture.set(null); + ours.setException(t); + if (t instanceof IOException) { + throw (IOException) t; + } + if (t instanceof RuntimeException) { + throw (RuntimeException) t; + } + if (t instanceof Error) { + throw (Error) t; + } + throw DruidException.defensive(t, "Failed to mount metadata entry[%s]", id); + } + verifyStillReservedOrRollback(mountLocation); + return; + } + } + + /** + * Post-mount safety check: confirm the entry is still registered with the location, otherwise roll back. Handles + * the race where the entry's reservation gets evicted (e.g. cache picks a weak entry whose lone hold was released + * by a concurrent canceler, or {@link StorageLocation#release} fires on the static entry from a coordinator drop) + * while mount() is still in progress. Without this check, mount would commit local state for an entry the cache + * manager no longer knows about, leaking files on disk and memory mappings. Mirrors the same defensive check in + * {@code SegmentCacheEntry.mount}. Returns normally if rollback fires; callers detect via {@link #isMounted}. + */ + private void verifyStillReservedOrRollback(StorageLocation mountLocation) + { + if (!mountLocation.isReserved(id) && !mountLocation.isWeakReserved(id)) { + LOG.debug( + "Aborting mount of metadata entry[%s] in location[%s]; entry was evicted while mounting", + id, + mountLocation.getPath() + ); + unmount(); + } + } + + private void doMount(StorageLocation mountLocation) throws IOException + { + // The CAS+SettableFuture gate in mount() guarantees only one thread runs this method at a time per entry, so + // entryLock is only held briefly for state mutations. The slow PartialSegmentFileMapperV10.create() call (which + // may issue a deep-storage range read on first mount) runs outside entryLock so concurrent status reads are not + // blocked on it. adjustReservation also runs outside entryLock: StorageLocation.release goes + // writeLock -> entryLock (via release -> unmount), so entryLock -> writeLock here would be a deadlock-prone + // lock-order inversion. + entryLock.lock(); + try { + if (location != null && fileMapper != null) { + if (!location.equals(mountLocation)) { + throw DruidException.defensive( + "Already mounted[%s] in location[%s] which differs from requested[%s]", + id, + location.getPath(), + mountLocation.getPath() + ); + } + return; + } + } + finally { + entryLock.unlock(); + } + + final PartialSegmentFileMapperV10 mapper = PartialSegmentFileMapperV10.create( + rangeReader, + jsonMapper, + localCacheDir, + targetFilename, + externalFilenames + ); + + final long sizeToAdjust; + try { + final long actualSize = mapper.getOnDiskHeaderSize(); + if (actualSize > reservationEstimate) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build( + "Partial segment metadata for [%s] is [%d] bytes on disk, exceeding the " + + "configured reservation estimate of [%d] bytes. Increase " + + "druid.segmentCache.virtualStorageMetadataReservationEstimate.", + segmentId, + actualSize, + reservationEstimate + ); + } + sizeToAdjust = actualSize < reservationEstimate ? actualSize : -1; + + entryLock.lock(); + try { + location = mountLocation; + fileMapper = mapper; + // Install (or re-install, after a previous mount/unmount cycle terminated the prior Phaser) the + // reference-counted gate over cleanup. Future acquireReference() / unmount() calls operate on this instance. + references.set(new ReferenceCountingCloseableObject(this::doActualUnmount) {}); + } + finally { + entryLock.unlock(); + } + } + catch (Throwable t) { + // mount failed; close mmaps and delete the on-disk header files so a retry starts clean. Mirrors the eager + // SegmentCacheEntry behavior: simpler to redo a small header range-read than to reason about whatever partial + // on-disk state the failure left. Crash-mid-mount across JVM restarts is still handled by the mapper's own + // corruption recovery when bootstrap runs at next startup; this path covers the in-process retry case. + try { + mapper.close(); + } + catch (Throwable closeError) { + t.addSuppressed(closeError); + } + try { + deleteHeaderFiles(); + } + catch (Throwable deleteError) { + t.addSuppressed(deleteError); + } + throw t; + } + + // Only shrink the reservation if the entry is still registered with the location. If we lost the reservation + // mid-mount (concurrent canceler / drop), adjustReservation would throw; defer to the post-mount check in + // mount() to roll back cleanly instead. + if (sizeToAdjust >= 0 && (mountLocation.isReserved(id) || mountLocation.isWeakReserved(id))) { + mountLocation.adjustReservation(id, sizeToAdjust); + } + } + + private static void awaitMount(SettableFuture future) throws IOException + { + try { + future.get(); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while waiting for mount", e); + } + catch (ExecutionException e) { + final Throwable cause = e.getCause() == null ? e : e.getCause(); + switch (cause) { + case IOException ioException -> throw ioException; + case RuntimeException runtimeException -> throw runtimeException; + case Error error -> throw error; + default -> throw DruidException.defensive(e, "mount failed"); + } + } + } + + /** + * Triggers cleanup of this entry. If any references acquired via {@link #acquireReference()} are still outstanding, + * the actual unmap-and-delete work is deferred until the last reference releases; in that case this method returns + * immediately and {@link #doActualUnmount} will fire later on the thread that closes the last reference. With no + * outstanding references, cleanup runs synchronously on the caller's thread. + */ + @Override + public void unmount() + { + final ReferenceCountingCloseableObject current = references.get(); + if (current != null && !current.isClosed()) { + current.close(); + } + } + + /** + * Acquire a reference that keeps this entry's resources (the file mapper, on-disk header files) alive across an + * intervening {@link #unmount} call. The returned {@link Closeable} must be closed when the caller is done; at + * that point if {@code unmount()} has already been called and no other references remain, the deferred cleanup + * fires on the closing thread. + * + * @throws DruidException if the entry has never been mounted, or has already been cleaned up + */ + public Closeable acquireReference() + { + final ReferenceCountingCloseableObject current = references.get(); + if (current == null) { + throw DruidException.defensive( + "Cannot acquire reference on partial segment metadata entry[%s] before it has been mounted", + id + ); + } + return current.incrementReferenceAndDecrementOnceCloseable() + .orElseThrow(() -> DruidException.defensive( + "Cannot acquire reference on partial segment metadata entry[%s]; already being unmounted", + id + )); + } + + /** + * The actual unmount work, invoked by the reference-counted gate's {@code onAdvance} once every outstanding + * reference (plus the wrapper's own initial party) has been released. Closes the file mapper, deletes the on-disk + * header files (the entry owns its storage-location footprint), and runs the optional {@link #setOnUnmount + * onUnmount} hook. + */ + private void doActualUnmount() + { + final Runnable hook; + entryLock.lock(); + try { + if (fileMapper == null) { + return; + } + try { + fileMapper.close(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to close partial segment file mapper for [%s]", segmentId); + } + fileMapper = null; + location = null; + // Clear the mount-dedup gate so a subsequent mount() on this same instance starts a fresh attempt. + mountFuture.set(null); + deleteHeaderFiles(); + hook = onUnmount.getAndSet(null); + } + finally { + entryLock.unlock(); + } + // Run the hook outside entryLock so it can touch the file system / cache manager without contending with + // concurrent status reads, and so a slow or buggy hook can't deadlock against acquireReference paths. + if (hook != null) { + try { + hook.run(); + } + catch (Throwable t) { + LOG.warn(t, "onUnmount hook failed for partial segment metadata entry[%s]", segmentId); + } + } + } + + /** + * Returns the file mapper held by this entry while mounted, or null if the entry has not been mounted. + */ + @Nullable + public PartialSegmentFileMapperV10 getFileMapper() + { + entryLock.lock(); + try { + return fileMapper; + } + finally { + entryLock.unlock(); + } + } + + /** + * Returns the parsed segment file metadata while mounted, or null if not yet mounted. + */ + @Nullable + public SegmentFileMetadata getSegmentFileMetadata() + { + final PartialSegmentFileMapperV10 mapper = getFileMapper(); + return mapper == null ? null : mapper.getSegmentFileMetadata(); + } + + /** + * Structural inference of the parent bundles that the given {@code bundleName} depends on within this segment. + * Single source of truth for both bootstrap (which post-filters by what's actually restorable on disk) and the + * query-time acquire path (which uses the result directly to seed + * {@link PartialSegmentBundleCacheEntry#forBundle}'s {@code parentEntryIds}). + *

+ * Today's rule is structural and trivial: any non-base bundle depends on the base bundle. The base bundle and the + * {@link SegmentFileBuilder#ROOT_BUNDLE_NAME root bundle} have no parents, the root bundle owns everything written + * without an explicit {@code startFileBundle} call (older fileGroup-less segments, or shared internal metadata) and + * is structurally a peer of the base. If future writers introduce richer dependency graphs, the rule will need to + * grow, likely by reading dependency metadata that the writer records explicitly rather than by inference here. + */ + public List inferParentBundles(String bundleName) + { + if (Projections.BASE_TABLE_PROJECTION_NAME.equals(bundleName) + || SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(bundleName)) { + return List.of(); + } + return List.of( + new PartialSegmentBundleCacheEntryIdentifier( + segmentId, + Projections.BASE_TABLE_PROJECTION_NAME + ) + ); + } + + /** + * Register a bundle entry as a current dependent of this metadata entry. Called by + * {@link PartialSegmentBundleCacheEntry} after a successful mount; the drop path uses {@link #snapshotLinkedBundles} + * to enumerate dependents for cascade-close. + */ + void registerBundle(PartialSegmentBundleCacheEntry bundle) + { + linkedBundles.add(bundle); + } + + /** + * Reverse of {@link #registerBundle}. Called by {@link PartialSegmentBundleCacheEntry#unmount} so the metadata's + * view stays consistent with which bundles are actually mounted. + */ + void unregisterBundle(PartialSegmentBundleCacheEntry bundle) + { + linkedBundles.remove(bundle); + } + + /** + * Snapshot of bundle entries currently mounted against this segment. Returned as a defensive copy; callers can + * iterate freely without risk of concurrent-modification surprises while bundles concurrently mount/unmount. Used + * by the drop path to cascade-close bundles before releasing the metadata entry. + */ + public Collection snapshotLinkedBundles() + { + return new ArrayList<>(linkedBundles); + } + + /** + * Attach a deferred-cleanup hook to run when this entry is finally purged. {@link #doActualUnmount} invokes the + * hook after closing the file mapper and deleting the entry's storage-location files, outside the entry lock. + * Replaces any previously-set hook. Pass {@code null} to clear. + */ + public void setOnUnmount(@Nullable Runnable hook) + { + onUnmount.set(hook); + } + + /** + * Delete the on-disk header files this entry owns (main + any externals). Called from both + * {@link #doActualUnmount} on successful purge and the mount-failure cleanup path; safe to invoke independently of + * mount state. + */ + private void deleteHeaderFiles() + { + deleteIfExists(new File(localCacheDir, targetFilename + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX)); + for (String filename : externalFilenames) { + deleteIfExists(new File(localCacheDir, filename + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX)); + } + } + + private void deleteIfExists(File file) + { + if (file.exists() && !file.delete()) { + LOG.warn("Failed to delete header file[%s] during unmount of partial segment[%s]", file, segmentId); + } + } +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/ResizableCacheEntry.java b/server/src/main/java/org/apache/druid/segment/loading/ResizableCacheEntry.java new file mode 100644 index 000000000000..e5ce93e3e319 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/ResizableCacheEntry.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +/** + * Opt-in extension of {@link CacheEntry} that supports in-place adjustment of the reservation size after the entry is + * already registered with a {@link StorageLocation}. Used by entry types whose final size is not known at registration + * time and is determined later (e.g., a partial-segment metadata entry that reserves a pessimistic estimate up front + * and shrinks to the actual on-disk header size after the header has been downloaded). + *

+ * Implementations must mutate the field backing {@link #getSize()} so subsequent calls see the new size. Only + * {@link StorageLocation#adjustReservation(CacheEntryIdentifier, long)} should call {@link #resizeReservation(long)}; + * direct calls bypass the location's bookkeeping atomics and will leave reservation accounting incorrect. + */ +public interface ResizableCacheEntry extends CacheEntry +{ + /** + * Mutate this entry's size to {@code newSize}. If you are not calling this method from within + * {@link StorageLocation}, you should be calling {@link StorageLocation#adjustReservation} instead. + */ + void resizeReservation(long newSize); +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheEntryIdentifier.java b/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheEntryIdentifier.java index c58e04a70302..6f982fc80beb 100644 --- a/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheEntryIdentifier.java +++ b/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheEntryIdentifier.java @@ -21,36 +21,11 @@ import org.apache.druid.timeline.SegmentId; -import java.util.Objects; - /** - * Use a {@link SegmentId} as a {@link CacheEntryIdentifier} + * Use a {@link SegmentId} as a {@link CacheEntryIdentifier}. */ -public final class SegmentCacheEntryIdentifier implements CacheEntryIdentifier +public record SegmentCacheEntryIdentifier(SegmentId segmentId) implements CacheEntryIdentifier { - private final SegmentId segmentId; - - public SegmentCacheEntryIdentifier(SegmentId segmentId) - { - this.segmentId = segmentId; - } - - @Override - public boolean equals(Object o) - { - if (o == null || getClass() != o.getClass()) { - return false; - } - SegmentCacheEntryIdentifier that = (SegmentCacheEntryIdentifier) o; - return Objects.equals(segmentId, that.segmentId); - } - - @Override - public int hashCode() - { - return segmentId.hashCode(); - } - @Override public String toString() { diff --git a/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheManager.java b/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheManager.java index 393b0ac8831a..d3fa1b84f2d4 100644 --- a/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheManager.java +++ b/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheManager.java @@ -144,4 +144,9 @@ public interface SegmentCacheManager * Returns the storage locations backing this cache manager. */ List getLocations(); + + /** + * Returns the loading thread pool backing this cache manager. + */ + StorageLoadingThreadPool getLoadingThreadPool(); } diff --git a/server/src/main/java/org/apache/druid/segment/loading/SegmentLoaderConfig.java b/server/src/main/java/org/apache/druid/segment/loading/SegmentLoaderConfig.java index 3f259eb0e155..1fbe3679ad03 100644 --- a/server/src/main/java/org/apache/druid/segment/loading/SegmentLoaderConfig.java +++ b/server/src/main/java/org/apache/druid/segment/loading/SegmentLoaderConfig.java @@ -94,6 +94,17 @@ public class SegmentLoaderConfig @JsonProperty("virtualStorageIsEphemeral") private boolean virtualStorageIsEphemeral = false; + /** + * Up-front size reservation (in bytes) used when mounting a partial-segment metadata cache entry. The entry + * range-reads the V10 header from deep storage at mount time, then calls + * {@link StorageLocation#adjustReservation} to shrink to the actual on-disk size. If the actual header exceeds this + * estimate, the mount fails with an operator-facing error directing them to raise this value. Defaults to 16 MiB, + * which comfortably covers the metadata of typical V10 segments; outliers with many columns and/or projections may + * need a higher value. + */ + @JsonProperty("virtualStorageMetadataReservationEstimate") + private long virtualStorageMetadataReservationEstimate = 16L * 1024L * 1024L; + private long combinedMaxSize = 0; public List getLocations() @@ -181,6 +192,11 @@ public boolean isVirtualStorageEphemeral() return virtualStorageIsEphemeral; } + public long getVirtualStorageMetadataReservationEstimate() + { + return virtualStorageMetadataReservationEstimate; + } + public SegmentLoaderConfig setLocations(List locations) { this.locations = Lists.newArrayList(locations); @@ -188,14 +204,19 @@ public SegmentLoaderConfig setLocations(List locations) } /** - * Sets {@link #virtualStorage} and {@link #virtualStorageIsEphemeral}. + * Sets {@link #virtualStorage}. */ - public SegmentLoaderConfig setVirtualStorage( - boolean virtualStorage, - boolean virtualStorageFabricEphemeral - ) + public SegmentLoaderConfig setVirtualStorage(boolean virtualStorage) { this.virtualStorage = virtualStorage; + return this; + } + + /** + * Sets {@link #virtualStorageIsEphemeral}. + */ + public SegmentLoaderConfig setVirtualStorageIsEphemeral(boolean virtualStorageFabricEphemeral) + { this.virtualStorageIsEphemeral = virtualStorageFabricEphemeral; return this; } @@ -209,9 +230,19 @@ public List toStorageLocations() { return this.getLocations() .stream() - .map(locationConfig -> new StorageLocation(locationConfig.getPath(), - locationConfig.getMaxSize(), - locationConfig.getFreeSpacePercent())) + .map(locationConfig -> { + final StorageLocation location = new StorageLocation( + locationConfig.getPath(), + locationConfig.getMaxSize(), + locationConfig.getFreeSpacePercent() + ); + + if (isVirtualStorageEphemeral()) { + location.setAreWeakEntriesEphemeral(true); + } + + return location; + }) .collect(Collectors.toList()); } @@ -234,6 +265,7 @@ public String toString() ", virtualStorageLoadThreads=" + virtualStorageLoadThreads + ", virtualStorageUseVirtualThreads=" + virtualStorageUseVirtualThreads + ", virtualStorageIsEphemeral=" + virtualStorageIsEphemeral + + ", virtualStorageMetadataReservationEstimate=" + virtualStorageMetadataReservationEstimate + ", combinedMaxSize=" + combinedMaxSize + '}'; } diff --git a/server/src/main/java/org/apache/druid/segment/loading/SegmentLocalCacheManager.java b/server/src/main/java/org/apache/druid/segment/loading/SegmentLocalCacheManager.java index 5372baf4c240..a43400efcb1a 100644 --- a/server/src/main/java/org/apache/druid/segment/loading/SegmentLocalCacheManager.java +++ b/server/src/main/java/org/apache/druid/segment/loading/SegmentLocalCacheManager.java @@ -24,7 +24,6 @@ import com.google.common.base.Suppliers; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.errorprone.annotations.concurrent.GuardedBy; import com.google.inject.Inject; @@ -115,13 +114,7 @@ public class SegmentLocalCacheManager implements SegmentCacheManager private final IndexIO indexIO; - private final ListeningExecutorService virtualStorageLoadOnDemandExec; - /** - * Bounds the number of in-flight on-demand loads when running with virtual threads. Null when virtual storage is - * disabled or when the legacy fixed-pool path is selected (the pool size is the implicit limit there). - */ - @Nullable - private final Semaphore virtualStorageLoadOnDemandPermits; + private final StorageLoadingThreadPool virtualStorageLoadingThreadPool; private ExecutorService loadOnBootstrapExec = null; private ExecutorService loadOnDownloadExec = null; @@ -129,64 +122,29 @@ public class SegmentLocalCacheManager implements SegmentCacheManager public SegmentLocalCacheManager( List locations, SegmentLoaderConfig config, + StorageLoadingThreadPool virtualStorageLoadingThreadPool, @Nonnull StorageLocationSelectorStrategy strategy, IndexIO indexIO, @Json ObjectMapper mapper ) { - this.config = config; - this.jsonMapper = mapper; this.locations = locations; + this.config = config; + this.virtualStorageLoadingThreadPool = virtualStorageLoadingThreadPool; this.strategy = strategy; this.indexIO = indexIO; + this.jsonMapper = mapper; log.info("Using storage location strategy[%s].", this.strategy.getClass().getSimpleName()); if (config.isVirtualStorage()) { if (config.getNumThreadsToLoadSegmentsIntoPageCacheOnDownload() > 0) { - throw DruidException.defensive("Invalid configuration: virtualStorage is incompatible with numThreadsToLoadSegmentsIntoPageCacheOnDownload"); + throw DruidException.defensive( + "Invalid configuration: virtualStorage is incompatible with numThreadsToLoadSegmentsIntoPageCacheOnDownload"); } if (config.getNumThreadsToLoadSegmentsIntoPageCacheOnBootstrap() > 0) { - throw DruidException.defensive("Invalid configuration: virtualStorage is incompatible with numThreadsToLoadSegmentsIntoPageCacheOnBootstrap"); - } - if (config.getVirtualStorageLoadThreads() <= 0) { - throw DruidException.forPersona(DruidException.Persona.OPERATOR) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build( - "virtualStorageLoadThreads must be greater than 0, got [%d]", - config.getVirtualStorageLoadThreads() - ); - } - if (config.isVirtualStorageEphemeral()) { - for (StorageLocation location : locations) { - location.setAreWeakEntriesEphemeral(true); - } - } - if (config.isVirtualStorageUseVirtualThreads()) { - log.info( - "Using virtual storage mode with virtual threads - max concurrent on demand loads: [%d].", - config.getVirtualStorageLoadThreads() - ); - virtualStorageLoadOnDemandPermits = new Semaphore(config.getVirtualStorageLoadThreads()); - virtualStorageLoadOnDemandExec = MoreExecutors.listeningDecorator( - Executors.newThreadPerTaskExecutor( - Thread.ofVirtual() - .name("VirtualStorageOnDemandLoadingThread-", 0) - .factory() - ) - ); - } else { - log.info( - "Using virtual storage mode with fixed platform thread pool - on demand load threads: [%d].", - config.getVirtualStorageLoadThreads() - ); - virtualStorageLoadOnDemandPermits = null; - virtualStorageLoadOnDemandExec = MoreExecutors.listeningDecorator( - Executors.newFixedThreadPool( - config.getVirtualStorageLoadThreads(), - Execs.makeThreadFactory("VirtualStorageOnDemandLoadingThread-%s") - ) - ); + throw DruidException.defensive( + "Invalid configuration: virtualStorage is incompatible with numThreadsToLoadSegmentsIntoPageCacheOnBootstrap"); } } else { log.info( @@ -208,8 +166,6 @@ public SegmentLocalCacheManager( Execs.makeThreadFactory("LoadSegmentsIntoPageCacheOnDownload-%s") ); } - virtualStorageLoadOnDemandExec = null; - virtualStorageLoadOnDemandPermits = null; } } @@ -697,9 +653,6 @@ public void shutdown() if (loadOnDownloadExec != null) { loadOnDownloadExec.shutdown(); } - if (virtualStorageLoadOnDemandExec != null) { - virtualStorageLoadOnDemandExec.shutdown(); - } } @VisibleForTesting @@ -714,6 +667,12 @@ public List getLocations() return locations; } + @Override + public StorageLoadingThreadPool getLoadingThreadPool() + { + return virtualStorageLoadingThreadPool; + } + /** * Checks whether a segment is already cached. This method does not confirm if the segment is actually mounted in * the location, or even that the segment files in some location are valid, just that some files exist in the @@ -788,12 +747,13 @@ private Supplier> makeOnDemandLoadSupplie return Suppliers.memoize( () -> { final long startTime = System.nanoTime(); - return virtualStorageLoadOnDemandExec.submit( + return virtualStorageLoadingThreadPool.getExecutorService().submit( () -> { // Acquire a permit inside the task so that waiting parks the (virtual) thread instead of blocking the // submitter. In fixed-pool mode permits is null and the pool size itself bounds concurrency. - if (virtualStorageLoadOnDemandPermits != null) { - virtualStorageLoadOnDemandPermits.acquireUninterruptibly(); + final Semaphore loadingPermits = virtualStorageLoadingThreadPool.getPermits(); + if (loadingPermits != null) { + loadingPermits.acquireUninterruptibly(); } try { final long execStartTime = System.nanoTime(); @@ -807,8 +767,8 @@ private Supplier> makeOnDemandLoadSupplie ); } finally { - if (virtualStorageLoadOnDemandPermits != null) { - virtualStorageLoadOnDemandPermits.release(); + if (loadingPermits != null) { + loadingPermits.release(); } } } diff --git a/server/src/main/java/org/apache/druid/segment/loading/StorageLoadingThreadPool.java b/server/src/main/java/org/apache/druid/segment/loading/StorageLoadingThreadPool.java new file mode 100644 index 000000000000..9c8de953b9d3 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/StorageLoadingThreadPool.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.google.common.util.concurrent.ListeningExecutorService; +import com.google.common.util.concurrent.MoreExecutors; +import org.apache.druid.error.DruidException; +import org.apache.druid.guice.StorageNodeModule; +import org.apache.druid.java.util.common.concurrent.Execs; +import org.apache.druid.java.util.common.lifecycle.LifecycleStop; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.segment.loading.external.VirtualStorageManager; + +import javax.annotation.Nullable; +import java.util.concurrent.Executors; +import java.util.concurrent.Semaphore; + +/** + * Holds the thread pool used for background loading by {@link SegmentLocalCacheManager} and + * {@link VirtualStorageManager}. + */ +public class StorageLoadingThreadPool +{ + private static final Logger log = new Logger(StorageNodeModule.class); + + private final ListeningExecutorService exec; + private final Semaphore permits; + + public StorageLoadingThreadPool( + @Nullable final ListeningExecutorService exec, + @Nullable final Semaphore permits + ) + { + this.exec = exec; + this.permits = permits; + } + + public static StorageLoadingThreadPool createFromConfig(final SegmentLoaderConfig config) + { + final ListeningExecutorService exec; + final Semaphore permits; + + if (config.isVirtualStorage()) { + if (config.getVirtualStorageLoadThreads() <= 0) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "virtualStorageLoadThreads must be greater than 0, got [%d]", + config.getVirtualStorageLoadThreads() + ); + } + if (config.isVirtualStorageUseVirtualThreads()) { + log.info( + "Using virtual storage mode with virtual threads - max concurrent on demand loads: [%d].", + config.getVirtualStorageLoadThreads() + ); + permits = new Semaphore(config.getVirtualStorageLoadThreads()); + exec = MoreExecutors.listeningDecorator( + Executors.newThreadPerTaskExecutor( + Thread.ofVirtual() + .name("VirtualStorageOnDemandLoadingThread-", 0) + .factory() + ) + ); + } else { + log.info( + "Using virtual storage mode with fixed platform thread pool - on demand load threads: [%d].", + config.getVirtualStorageLoadThreads() + ); + permits = null; + exec = MoreExecutors.listeningDecorator( + Executors.newFixedThreadPool( + config.getVirtualStorageLoadThreads(), + Execs.makeThreadFactory("VirtualStorageOnDemandLoadingThread-%s") + ) + ); + } + } else { + exec = null; + permits = null; + } + + return new StorageLoadingThreadPool(exec, permits); + } + + /** + * Returns an instance representing "no thread pool". Calling {@link #getExecutorService()} will return an error. + */ + public static StorageLoadingThreadPool none() + { + return new StorageLoadingThreadPool(null, null); + } + + public boolean isAvailable() + { + return exec != null; + } + + public ListeningExecutorService getExecutorService() + { + if (exec == null) { + throw DruidException.defensive("No exec set, we thought we wouldn't need this"); + } + return exec; + } + + /** + * Bounds the number of in-flight on-demand loads when running with virtual threads. If null, no permits are used; + * the natural limit of the {@link #getExecutorService()} is sufficient. If nonnull, callers must acquire a permit + * in the tasks submitted to {@link #getExecutorService()}, before doing real work. Permits must be released before + * the task completes. + */ + @Nullable + public Semaphore getPermits() + { + return permits; + } + + @LifecycleStop + public void stop() + { + if (exec != null) { + exec.shutdownNow(); + } + } +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/StorageLocation.java b/server/src/main/java/org/apache/druid/segment/loading/StorageLocation.java index 711d1a5d1021..67f23c2545b3 100644 --- a/server/src/main/java/org/apache/druid/segment/loading/StorageLocation.java +++ b/server/src/main/java/org/apache/druid/segment/loading/StorageLocation.java @@ -423,6 +423,78 @@ public ReservationHold addWeakReservationHold( } } + /** + * Adjusts the reservation size of an already-registered {@link ResizableCacheEntry} downward. Used when an entry's + * final size is not known at registration time (e.g. a partial-segment metadata entry that reserves a pessimistic + * estimate and shrinks to the actual on-disk header size once the header has been downloaded). Returns reclaimed + * capacity to the location's available budget; never triggers eviction. + *

+ * Throws if {@code newSize} is greater than the entry's current size: grow semantics require checking the location's + * available budget and possibly evicting other entries, and aren't needed by the current callers. + */ + public void adjustReservation(CacheEntryIdentifier id, long newSize) + { + lock.writeLock().lock(); + try { + final CacheEntry entry; + final WeakCacheEntry weak; + if (staticCacheEntries.containsKey(id)) { + entry = staticCacheEntries.get(id); + weak = null; + } else { + weak = weakCacheEntries.get(id); + if (weak == null) { + throw DruidException.defensive( + "Cannot adjust reservation for unknown cache entry[%s]", + id + ); + } + entry = weak.cacheEntry; + } + + if (!(entry instanceof ResizableCacheEntry)) { + throw DruidException.defensive( + "Cache entry[%s] of type[%s] does not support reservation adjustment", + id, + entry.getClass().getSimpleName() + ); + } + + final long oldSize = entry.getSize(); + final long delta = oldSize - newSize; + if (delta < 0) { + throw DruidException.defensive( + "Cannot grow reservation for cache entry[%s] from [%d] to [%d] bytes; only shrink is supported", + id, + oldSize, + newSize + ); + } + if (delta == 0) { + return; + } + + ((ResizableCacheEntry) entry).resizeReservation(newSize); + currSizeBytes.getAndAdd(-delta); + if (weak == null) { + currStaticSizeBytes.getAndAdd(-delta); + } else { + currWeakSizeBytes.getAndAdd(-delta); + // Each active hold contributed entry.getSize() to currHoldBytes via trackWeakHold; shrink each hold's + // contribution by the same delta so a future trackWeakRelease (which subtracts the new smaller size) lands + // on the correct total. Clamp at 0 defensively against any pre-existing drift. + final long activeHolds = weak.holdReferents.getRegisteredParties() - 1L; + if (activeHolds > 0) { + final long holdDelta = delta * activeHolds; + currHoldBytes.updateAndGet(v -> Math.max(0L, v - holdDelta)); + } + } + } + finally { + lock.writeLock().unlock(); + } + } + /** * Removes an item from {@link #staticCacheEntries}, reducing {@link #currSizeBytes} by {@link CacheEntry#getSize()}. * If the cache entry exists in {@link #weakCacheEntries}, it is left in place to be removed by diff --git a/server/src/main/java/org/apache/druid/segment/loading/external/CachedFile.java b/server/src/main/java/org/apache/druid/segment/loading/external/CachedFile.java new file mode 100644 index 000000000000..875e801d2142 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/external/CachedFile.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading.external; + +import org.apache.druid.segment.loading.CacheEntry; +import org.apache.druid.segment.loading.StorageLocation; + +import java.io.Closeable; +import java.io.File; +import java.util.function.Function; + +/** + * Handle to a cached file that prevents eviction while held. + * Must be closed when no longer needed to make the file eligible for eviction. + */ +public class CachedFile implements Closeable +{ + private final StorageLocation.ReservationHold hold; + private final DownloadableCacheEntry entry; + private final String identifier; + + CachedFile(StorageLocation.ReservationHold hold, String identifier) + { + this.hold = hold; + this.entry = (DownloadableCacheEntry) hold.getEntry(); + this.identifier = identifier; + } + + /** + * Get the identifier for this cached file. + */ + public String getIdentifier() + { + return identifier; + } + + /** + * Get the File object for this cached file. + * The file is guaranteed to exist and be populated while this handle is open. + */ + public File getFile() + { + return entry.getFile(); + } + + /** + * Extend this cached file with additional functionality. The lifecycle of the extended functionality is + * tracked along with the lifecycle of the underlying cache entry. The provided supplier is called immediately unless + * there is already a mapping for the class. Either way, the class is returned. + * + *

Do not call "close" on the returned object. Its lifecycle is associated with the underlying cache entry, + * and it may be reused beyond the lifecycle of this particular CachedFile object. It will be closed automatically + * when the underlying cache entry is unmounted. + */ + public T extend(Class clazz, Function supplier) + { + return entry.extend(clazz, () -> supplier.apply(this)); + } + + /** + * Release the hold on this cached file, making it eligible for eviction. + * + *

Usage of the File returned from this object after the hold is closed is undefined and expected to end poorly. + */ + @Override + public void close() + { + hold.close(); + } +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/external/DownloadableCacheEntry.java b/server/src/main/java/org/apache/druid/segment/loading/external/DownloadableCacheEntry.java new file mode 100644 index 000000000000..9a736b193ad9 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/external/DownloadableCacheEntry.java @@ -0,0 +1,314 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading.external; + +import com.google.common.hash.Hashing; +import org.apache.druid.error.DruidException; +import org.apache.druid.io.FilePopulator; +import org.apache.druid.java.util.common.FileUtils; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.segment.loading.CacheEntry; +import org.apache.druid.segment.loading.CacheEntryIdentifier; +import org.apache.druid.segment.loading.StorageLocation; +import org.apache.druid.utils.CloseableUtils; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Supplier; + +/** + * CacheEntry implementation that calls a FilePopulator lambda when mounted. + */ +class DownloadableCacheEntry implements CacheEntry +{ + private static final Logger log = new Logger(DownloadableCacheEntry.class); + + private static final int MAX_PREFIX_SUFFIX_LENGTH = 50; + + private final StringCacheEntryIdentifier identifier; + private final long sizeBytes; + private final FilePopulator populator; + private final File locationPath; + + private volatile boolean mounted = false; + private volatile File mountedFile = null; + + /** + * Map for keeping track of functionality added by {@link #extend}. + */ + private final ConcurrentHashMap, Closeable> extendMap = new ConcurrentHashMap<>(); + + public DownloadableCacheEntry( + StringCacheEntryIdentifier identifier, + long sizeBytes, + FilePopulator populator, + File locationPath + ) + { + this.identifier = identifier; + this.sizeBytes = sizeBytes; + this.populator = populator; + this.locationPath = locationPath; + } + + @Override + public CacheEntryIdentifier getId() + { + return identifier; + } + + @Override + public long getSize() + { + return sizeBytes; + } + + @Override + public boolean isMounted() + { + return mounted; + } + + @Override + public void mount(StorageLocation location) + { + if (mounted) { + return; // Already mounted + } + + // Determine file path - delegate to the same sanitization logic + File file = getFileForIdentifier(locationPath, identifier.value()); + + // Ensure parent directory exists + File parentDir = file.getParentFile(); + if (!parentDir.exists()) { + try { + FileUtils.mkdirp(parentDir); + } + catch (IOException e) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build(e, "Failed to create parent directory [%s]", parentDir); + } + } + + if (!(file.exists() && file.length() == sizeBytes)) { + // file doesn't exist (or is the wrong length), so we need populate it + + if (file.exists()) { + // It wasn't the right length, let's delete it to avoid confusion for the populator. + if (!file.delete()) { + log.info("Problem deleting file [%s] before populating for VSM", file); + } + } + + try { + populator.populate(file); + } + catch (Throwable e) { + // Clean up partial file on failure + if (file.exists()) { + file.delete(); + } + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build(e, "Failed to populate file [%s]", file); + } + + // Verify the file was created + if (!file.exists()) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build("Populator did not create file [%s]", file); + } + } + + this.mountedFile = file; + this.mounted = true; + } + + @Override + public void unmount() + { + final Closer extendCloser = CloseableUtils.forIterable(extendMap.values()); + extendMap.clear(); + CloseableUtils.closeAndSuppressExceptions(extendCloser, e -> log.warn(e, "Failed to close extended functionality")); + + // StorageLocation will call this when it needs to reclaim space + if (mountedFile != null && mountedFile.exists()) { + if (!mountedFile.delete()) { + log.warn("Failed to delete file[%s] on unmount(). Leaving it.", mountedFile); + } + } + mountedFile = null; + mounted = false; + } + + /** + * Get the mounted file. + * + * @return The file, or null if not mounted + */ + @Nullable + File getFile() + { + return mountedFile; + } + + /** + * Extend this cache entry with additional functionality. The lifecycle of the extended functionality is + * tracked along with the lifecycle of the cache entry. The provided supplier is called immediately unless + * there is already a mapping for the class. Either way, the class is returned. + */ + @SuppressWarnings("unchecked") + public T extend(Class clazz, Supplier supplier) + { + return (T) extendMap.computeIfAbsent(clazz, k -> supplier.get()); + } + + private File getFileForIdentifier(File locationPath, String identifier) + { + String sanitized = sanitizePath(identifier); + return new File(locationPath, sanitized); + } + + /** + * Converts an arbitrary cache entry identifier (typically a URI or file path) into a filename. The result has + * the form {@code --}. The purpose of this sanitization is to provide identifiers for + * external paths that filesystem-safe (hence the removal of special characters and shortening of the overall + * name), non-colliding (hence the inclusion of a hash), and human-recognizable (hence the inclusion of prefix + * and suffix). + * + *

Typically the prefix and suffix are of equal length. However, care is taken to preserve extensions, which + * means that the suffix may end up longer than the prefix if this is required to avoid mangling an extension. + * This allows downstream code to e.g. detect format or compression based on file extension. + * + * @throws DruidException if the identifier sanitizes to an empty string + */ + static String sanitizePath(final String originalPath) + { + String path = originalPath; + path = path.replace('\\', '/'); + + int startIndex = 0; + if (path.startsWith("/")) { + startIndex = 1; + } + int endIndex = path.endsWith("/") ? path.length() - 1 : path.length(); + + // Split the kept portion into a prefix and a suffix so that file extensions (e.g. ".gz") are + // preserved at the end of the sanitized name. The hashcode disambiguates collisions when + // multiple paths share the same prefix+suffix. + final int prefixEnd; + final int suffixStart; + if (endIndex - startIndex > MAX_PREFIX_SUFFIX_LENGTH * 2) { + prefixEnd = startIndex + MAX_PREFIX_SUFFIX_LENGTH; + suffixStart = endIndex - MAX_PREFIX_SUFFIX_LENGTH; + } else { + final int mid = startIndex + (endIndex - startIndex) / 2; + final int extensionPos = indexOfExtension(path, startIndex, endIndex); + final int split; + if (extensionPos >= 0 && extensionPos < mid && endIndex - extensionPos < MAX_PREFIX_SUFFIX_LENGTH) { + // Adjust the split point so the suffix contains the entire extension. + split = extensionPos; + } else { + split = mid; + } + prefixEnd = split; + suffixStart = split; + } + + final StringBuilder sanitized = new StringBuilder(); + appendSanitized(sanitized, path, startIndex, prefixEnd); + sanitized.append('-') + .append(Hashing.sha512().hashUnencodedChars(originalPath).toString(), 0, 16) + .append('-'); + appendSanitized(sanitized, path, suffixStart, endIndex); + + final String result = sanitized.toString(); + if (result.isEmpty()) { + throw DruidException.defensive("Identifier resulted in empty path after sanitization"); + } + + return result; + } + + private static void appendSanitized(StringBuilder dest, String path, int start, int end) + { + for (int i = start; i < end; i++) { + char c = path.charAt(i); + if (Character.isLetterOrDigit(c) || c == '-' || c == '_' || c == '.') { + dest.append(c); + } else { + dest.append('_'); + } + } + } + + /** + * Returns the position in {@code path} where the extension begins, or -1 if there is no extension. + * Searches from startIndex (inclusive) to endIndex (exclusive). Only alphanumeric strings are + * considered possible extensions. + */ + private static int indexOfExtension(String path, int startIndex, int endIndex) + { + // Find the filename. + int basenameStart = startIndex; + for (int i = endIndex - 1; i >= startIndex; i--) { + if (path.charAt(i) == '/') { + basenameStart = i + 1; + break; + } + } + + // Find the extension of the filename. + int pos = endIndex; + int extensionPos = -1; + while (pos > basenameStart) { + final int initialPos = pos; + while (pos > basenameStart && isAlphaNumeric(path.charAt(pos - 1))) { + pos--; + } + if (pos == initialPos) { + // Saw some non-alphanumeric characters. + break; + } + // Check if there's a dot. Use basenameStart + 1 because we don't want to consider ".dotfile" as an extension. + if (pos > basenameStart + 1 && path.charAt(pos - 1) == '.') { + // Found an extension. Remember the position and keep searching (for "xyz.log.gz" we want to capture ".log.gz"). + pos--; + extensionPos = pos; + } else { + break; + } + } + + return extensionPos; + } + + private static boolean isAlphaNumeric(char c) + { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/external/StorageLocationVirtualStorageManager.java b/server/src/main/java/org/apache/druid/segment/loading/external/StorageLocationVirtualStorageManager.java new file mode 100644 index 000000000000..d8a351cbc826 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/external/StorageLocationVirtualStorageManager.java @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading.external; + +import com.google.common.util.concurrent.ListenableFuture; +import com.google.inject.Inject; +import org.apache.druid.collections.ResourceHolder; +import org.apache.druid.common.asyncresource.AsyncResource; +import org.apache.druid.common.asyncresource.AsyncResources; +import org.apache.druid.common.asyncresource.SettableAsyncResource; +import org.apache.druid.error.DruidException; +import org.apache.druid.io.FilePopulator; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.segment.loading.CacheEntry; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; +import org.apache.druid.segment.loading.StorageLocation; +import org.apache.druid.segment.loading.StorageLocationSelectorStrategy; + +import javax.annotation.Nullable; +import java.io.File; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Semaphore; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.LongSupplier; + +/** + * Default implementation of VirtualStorageManager that delegates to StorageLocation. + * Uses weak reservations for all cached files, making them eligible for eviction. + */ +public class StorageLocationVirtualStorageManager implements VirtualStorageManager +{ + private static final EmittingLogger log = new EmittingLogger(StorageLocationVirtualStorageManager.class); + + private final StorageLocationSelectorStrategy strategy; + private final StorageLoadingThreadPool loadingThreadPool; + + /** + * Per-identifier locks to ensure only one thread populates a given identifier. + * Once a location is resolved, it's stored in the lock so subsequent threads can check it. + */ + private final ConcurrentHashMap populationLocks = new ConcurrentHashMap<>(); + + @Inject + public StorageLocationVirtualStorageManager( + List locations, + StorageLocationSelectorStrategy strategy, + StorageLoadingThreadPool loadingThreadPool + ) + { + this.strategy = strategy; + this.loadingThreadPool = loadingThreadPool; + log.info("Initialized VirtualStorageManager with [%d] storage locations", locations.size()); + } + + @Nullable + @Override + public CachedFile get(String identifier) + { + // We only consult the lock to discover which location the entry was resolved to. No need for synchronization, + // because we don't intend to populate it. + final PopulationLock lock = populationLocks.get(identifier); + if (lock == null) { + return null; + } + + final StorageLocation resolvedLocation = lock.getResolvedLocation(); + if (resolvedLocation == null) { + // Population is still in flight: the lock exists but doesn't have a location yet. + // Return that the file doesn't exist instead of blocking. + return null; + } + + final StorageLocation.ReservationHold hold = + resolvedLocation.addWeakReservationHoldIfExists(new StringCacheEntryIdentifier(identifier)); + return hold == null ? null : new CachedFile(hold, identifier); + } + + @Override + public CachedFile reserveAndPopulate( + String identifier, + LongSupplier sizeSupplier, + FilePopulator populator + ) + { + // Get or create lock for this identifier + final PopulationLock lock = populationLocks.computeIfAbsent(identifier, ignored -> new PopulationLock()); + + synchronized (lock) { + final StringCacheEntryIdentifier cacheId = new StringCacheEntryIdentifier(identifier); + + // If multiple threads are trying to reserve the same location, the first one will update the state on the lock + // so check that first. + final StorageLocation resolvedLocation = lock.getResolvedLocation(); + if (resolvedLocation == null) { + // Determining the size often requires an external system call, so we use this supplier to defer it until + // we absolutely need it + final long sizeBytes = sizeSupplier.getAsLong(); + + // Try to reserve in each location according to strategy + final Iterator locationIter = strategy.getLocations(); + Throwable lastException = null; + + while (locationIter.hasNext()) { + final StorageLocation location = locationIter.next(); + final File locationFile = location.getPath(); + try { + // Reserve space and acquire a hold, using a cache entry that will call the populator on mount. + final StorageLocation.ReservationHold hold = location.addWeakReservationHold( + cacheId, + () -> new DownloadableCacheEntry(cacheId, sizeBytes, populator, locationFile) + { + final AtomicBoolean mounted = new AtomicBoolean(false); + + @Override + public void mount(StorageLocation location) + { + super.mount(location); + if (mounted.compareAndSet(false, true)) { + location.trackWeakLoad(getSize()); + } + } + + @Override + public void unmount() + { + if (mounted.get()) { + populationLocks.remove(identifier, lock); + } + super.unmount(); + } + } + ); + + if (hold == null) { + log.debug( + "Failed to reserve [%d] bytes for [%s] in location [%s], trying next", + sizeBytes, + identifier, + locationFile + ); + continue; + } + + // Mount the entry (calls populator) + try { + hold.getEntry().mount(location); + } + catch (Throwable e) { + try { + hold.close(); + } + catch (Throwable e2) { + e.addSuppressed(e2); + } + throw e; + } + + // Store the resolved location for anything that is waiting on this same lock + lock.setResolvedLocation(location); + + return new CachedFile(hold, identifier); + } + catch (Throwable e) { + lastException = e; + log.debug(e, "Failed to reserve and populate in location [%s], trying next", locationFile); + } + } + // We have exited the loop, which should have returned, nothing must've worked... + + if (lastException != null) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build( + lastException, + "Failed to reserve and populate entry [%s] in any location", + identifier + ); + } else { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.CAPACITY_EXCEEDED) + .build( + "No space available to reserve [%,d] bytes for entry [%s]", + sizeBytes, + identifier + ); + } + } else { + // Try to get from the resolved location first + StorageLocation.ReservationHold hold = resolvedLocation.addWeakReservationHoldIfExists(cacheId); + + if (hold != null) { + return new CachedFile(hold, identifier); + } else { + // hold == null means the entry was evicted before we had a chance to add our hold. + // Drop the stale lock and re-resolve. + populationLocks.remove(identifier, lock); + return reserveAndPopulate(identifier, sizeSupplier, populator); + } + } + } + } + + @Override + public AsyncResource reserveAndPopulateAsync( + String identifier, + LongSupplier sizeSupplier, + FilePopulator populator + ) + { + final CachedFile cachedFile = get(identifier); + if (cachedFile != null) { + return AsyncResources.ofCloseable(cachedFile); + } + + if (loadingThreadPool.isAvailable()) { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + final ListenableFuture future = loadingThreadPool.getExecutorService().submit( + () -> { + try { + final Semaphore loadingPermits = loadingThreadPool.getPermits(); + if (loadingPermits != null) { + loadingPermits.acquire(); + } + try { + final CachedFile theCachedFile = reserveAndPopulate(identifier, sizeSupplier, populator); + if (!resource.set(ResourceHolder.fromCloseable(theCachedFile))) { + theCachedFile.close(); + } + } + finally { + if (loadingPermits != null) { + loadingPermits.release(); + } + } + } + catch (Throwable e) { + resource.setException(e); + } + } + ); + + resource.setCanceler(() -> future.cancel(true)); + return resource; + } else { + final SettableAsyncResource resource = new SettableAsyncResource<>(); + try { + resource.set(ResourceHolder.fromCloseable(reserveAndPopulate(identifier, sizeSupplier, populator))); + } + catch (Throwable e) { + resource.setException(e); + } + return resource; + } + } + + /** + * Lock object that tracks which StorageLocation was used for a given identifier. + * Once resolved, subsequent threads can check the resolved location first. + */ + private static class PopulationLock + { + private final AtomicReference resolvedLocation = new AtomicReference<>(); + + @Nullable + StorageLocation getResolvedLocation() + { + return resolvedLocation.get(); + } + + void setResolvedLocation(StorageLocation location) + { + if (!resolvedLocation.compareAndSet(null, location)) { + throw DruidException.defensive( + "Resolved location already set to [%s], cannot change to [%s]", + resolvedLocation.get().getPath(), + location.getPath() + ); + } + } + } +} diff --git a/server/src/main/java/org/apache/druid/server/initialization/CuratorDiscoveryConfig.java b/server/src/main/java/org/apache/druid/segment/loading/external/StringCacheEntryIdentifier.java similarity index 71% rename from server/src/main/java/org/apache/druid/server/initialization/CuratorDiscoveryConfig.java rename to server/src/main/java/org/apache/druid/segment/loading/external/StringCacheEntryIdentifier.java index c8c9ef95127c..906f6b96b9db 100644 --- a/server/src/main/java/org/apache/druid/server/initialization/CuratorDiscoveryConfig.java +++ b/server/src/main/java/org/apache/druid/segment/loading/external/StringCacheEntryIdentifier.java @@ -17,24 +17,18 @@ * under the License. */ -package org.apache.druid.server.initialization; +package org.apache.druid.segment.loading.external; -import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.segment.loading.CacheEntryIdentifier; /** + * Simple CacheEntryIdentifier implementation that wraps a string. */ -public class CuratorDiscoveryConfig +public record StringCacheEntryIdentifier(String value) implements CacheEntryIdentifier { - @JsonProperty - private String path = "/druid/discovery"; - - public String getPath() - { - return path; - } - - public boolean useDiscovery() + @Override + public String toString() { - return path != null; + return value; } } diff --git a/server/src/main/java/org/apache/druid/segment/loading/external/VirtualStorageManager.java b/server/src/main/java/org/apache/druid/segment/loading/external/VirtualStorageManager.java new file mode 100644 index 000000000000..074b132d127e --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/external/VirtualStorageManager.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading.external; + +import org.apache.druid.common.asyncresource.AsyncResource; +import org.apache.druid.error.DruidException; +import org.apache.druid.io.FilePopulator; +import org.apache.druid.segment.loading.StorageLocation; +import org.apache.druid.utils.Throwables; + +import javax.annotation.Nullable; +import java.util.function.LongSupplier; + +/** + * Manages a cache of downloaded files across multiple {@link StorageLocation}. + * Provides a simple API for checking if files exist and reserving space for new downloads. + */ +public interface VirtualStorageManager +{ + /** + * Check if a cached file exists and acquire a hold on it to prevent eviction. + * + * @param identifier Unique identifier for the cached file + * @return CachedFile handle if the file exists, or null if not found + */ + @Nullable + CachedFile get(String identifier); + + /** + * Reserve space for a new file, populate it via the provided lambda, and return + * a handle to access it. Population is done in the calling thread. + * + *

If an entry with the same identifier already exists, returns the existing entry + * without calling the populator. + * + * @param identifier Unique identifier for the cached file + * @param sizeSupplier Supplier that provides the number of bytes to reserve + * @param populator Lambda that will be called with a File to populate the content + * @return CachedFile handle to access the populated file + */ + CachedFile reserveAndPopulate( + String identifier, + LongSupplier sizeSupplier, + FilePopulator populator + ); + + /** + * Reserve space for a new file, populate it via the provided lambda, and return + * a handle to access it. Population is done asynchronously. + * + *

Close the {@link AsyncResource} that is returned from + * this method when done. Do not close the inner {@link CachedFile}, as this will lead + * to a double-close. + * + *

If an entry with the same identifier already exists, returns the existing entry + * without calling the populator. + * + * @param identifier Unique identifier for the cached file + * @param sizeSupplier Supplier that provides the number of bytes to reserve + * @param populator Lambda that will be called with a File to populate the content + * + * @return handle to access the populated file + */ + AsyncResource reserveAndPopulateAsync( + String identifier, + LongSupplier sizeSupplier, + FilePopulator populator + ); + + /** + * Whether the given throwable indicates that a reservation failed because there was not enough space in the + * storage locations to hold a file. + */ + static boolean isInsufficientStorage(final Throwable e) + { + final DruidException druidException = Throwables.getCauseOfType(e, DruidException.class); + return druidException != null + && druidException.getCategory() == DruidException.Category.CAPACITY_EXCEEDED; + } +} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/ChatHandler.java b/server/src/main/java/org/apache/druid/segment/realtime/ChatHandler.java index 56f8330fdcbb..dba814d9aa2c 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/ChatHandler.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/ChatHandler.java @@ -20,7 +20,7 @@ package org.apache.druid.segment.realtime; /** - * Objects that can be registered with a {@link ServiceAnnouncingChatHandlerProvider} and provide http endpoints for indexing-related + * Objects that can be registered with a {@link ChatHandlerProvider} and provide http endpoints for indexing-related * objects. This interface is empty because it only exists to signal intent. The actual http endpoints are provided * through JAX-RS annotations on the {@link ChatHandler} objects. */ diff --git a/server/src/main/java/org/apache/druid/segment/realtime/ChatHandlerProvider.java b/server/src/main/java/org/apache/druid/segment/realtime/ChatHandlerProvider.java index f19e25f37561..7d60a5a870d3 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/ChatHandlerProvider.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/ChatHandlerProvider.java @@ -20,42 +20,70 @@ package org.apache.druid.segment.realtime; import com.google.common.base.Optional; +import com.google.inject.Inject; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.logger.Logger; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; /** + * Provides a way for the outside world to talk to objects in the indexing service. Handlers are held in an + * in-memory registry on this host and reached through {@link ChatHandlerResource} via the task's known + * {@link org.apache.druid.indexer.TaskLocation}. */ -public interface ChatHandlerProvider +public class ChatHandlerProvider { + private static final Logger log = new Logger(ChatHandlerProvider.class); + + private final ConcurrentMap handlers; + + @Inject + public ChatHandlerProvider() + { + this.handlers = new ConcurrentHashMap<>(); + } + /** - * Registers a chat handler which provides an API for others to talk to objects in the indexing service. Depending - * on the implementation, this method may also announce this node so that it can be discovered by other services. + * Registers a chat handler which provides an API for others to talk to objects in the indexing service. * - * @param key a unique name identifying this service + * @param service a unique name identifying this service * @param handler instance which implements the API to be exposed */ - void register(String key, ChatHandler handler); + public void register(final String service, ChatHandler handler) + { + log.debug("Registering Eventhandler[%s]", service); - /** - * Registers a chat handler which provides an API for others to talk to objects in the indexing service. Setting - * announce to false instructs the implementation to only register the handler to expose the API and skip any - * discovery announcements that might have been broadcast. - * - * @param key a unique name identifying this service - * @param handler instance which implements the API to be exposed - * @param announce for implementations that have a service discovery mechanism, whether this node should be announced - */ - void register(String key, ChatHandler handler, boolean announce); + if (handlers.putIfAbsent(service, handler) != null) { + throw new ISE("handler already registered for service[%s]", service); + } + } /** * Unregisters a chat handler. * - * @param key the name of the service + * @param service the name of the service */ - void unregister(String key); + public void unregister(final String service) + { + log.debug("Unregistering chat handler[%s]", service); + + final ChatHandler handler = handlers.get(service); + if (handler == null) { + log.warn("handler[%s] not currently registered, ignoring.", service); + return; + } + + handlers.remove(service, handler); + } /** * Retrieves a chat handler. * * @param key the name of the service */ - Optional get(String key); + public Optional get(final String key) + { + return Optional.fromNullable(handlers.get(key)); + } } diff --git a/server/src/main/java/org/apache/druid/segment/realtime/ServiceAnnouncingChatHandlerProvider.java b/server/src/main/java/org/apache/druid/segment/realtime/ServiceAnnouncingChatHandlerProvider.java deleted file mode 100644 index da3975e4545a..000000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/ServiceAnnouncingChatHandlerProvider.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime; - -import com.google.common.base.Optional; -import com.google.inject.Inject; -import org.apache.druid.curator.discovery.ServiceAnnouncer; -import org.apache.druid.guice.annotations.RemoteChatHandler; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.server.DruidNode; - -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ConcurrentSkipListSet; - -/** - * Provides a way for the outside world to talk to objects in the indexing service. The {@link #get(String)} method - * allows anyone with a reference to this object to obtain a particular {@link ChatHandler}. An embedded - * {@link ServiceAnnouncer} will be used to advertise handlers on this host. - */ -public class ServiceAnnouncingChatHandlerProvider implements ChatHandlerProvider -{ - private static final Logger log = new Logger(ServiceAnnouncingChatHandlerProvider.class); - - private final DruidNode node; - private final ServiceAnnouncer serviceAnnouncer; - private final ConcurrentMap handlers; - private final ConcurrentSkipListSet announcements; - - @Inject - public ServiceAnnouncingChatHandlerProvider( - @RemoteChatHandler DruidNode node, - ServiceAnnouncer serviceAnnouncer - ) - { - this.node = node; - this.serviceAnnouncer = serviceAnnouncer; - this.handlers = new ConcurrentHashMap<>(); - this.announcements = new ConcurrentSkipListSet<>(); - } - - @Override - public void register(final String service, ChatHandler handler) - { - register(service, handler, true); - } - - @Override - public void register(final String service, ChatHandler handler, boolean announce) - { - log.debug("Registering Eventhandler[%s]", service); - - if (handlers.putIfAbsent(service, handler) != null) { - throw new ISE("handler already registered for service[%s]", service); - } - - if (announce) { - try { - serviceAnnouncer.announce(makeDruidNode(service)); - if (!announcements.add(service)) { - throw new ISE("announcements already has an entry for service[%s]", service); - } - } - catch (Exception e) { - log.warn(e, "Failed to register service[%s]", service); - handlers.remove(service, handler); - } - } - } - - @Override - public void unregister(final String service) - { - log.debug("Unregistering chat handler[%s]", service); - - final ChatHandler handler = handlers.get(service); - if (handler == null) { - log.warn("handler[%s] not currently registered, ignoring.", service); - return; - } - - if (announcements.contains(service)) { - try { - serviceAnnouncer.unannounce(makeDruidNode(service)); - } - catch (Exception e) { - log.warn(e, "Failed to unregister service[%s]", service); - } - - announcements.remove(service); - } - - handlers.remove(service, handler); - } - - @Override - public Optional get(final String key) - { - return Optional.fromNullable(handlers.get(key)); - } - - private DruidNode makeDruidNode(String key) - { - return new DruidNode( - key, - node.getHost(), - node.isBindOnHost(), - node.getPlaintextPort(), - node.getTlsPort(), - node.isEnablePlaintextPort(), - node.isEnableTlsPort() - ); - } -} diff --git a/server/src/main/java/org/apache/druid/server/DruidNode.java b/server/src/main/java/org/apache/druid/server/DruidNode.java index 820d8d32a08a..19252fde755c 100644 --- a/server/src/main/java/org/apache/druid/server/DruidNode.java +++ b/server/src/main/java/org/apache/druid/server/DruidNode.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.OptBoolean; import com.google.common.base.Preconditions; import com.google.common.net.HostAndPort; import com.google.inject.name.Named; @@ -130,12 +131,12 @@ public DruidNode( */ @JsonCreator public DruidNode( - @JacksonInject @Named("serviceName") @JsonProperty("service") String serviceName, + @JacksonInject(useInput = OptBoolean.TRUE) @Named("serviceName") @JsonProperty("service") String serviceName, @JsonProperty("host") String host, @JsonProperty("bindOnHost") boolean bindOnHost, @JsonProperty("plaintextPort") Integer plaintextPort, - @JacksonInject @Named("servicePort") @JsonProperty("port") Integer port, - @JacksonInject @Named("tlsServicePort") @JsonProperty("tlsPort") Integer tlsPort, + @JacksonInject(useInput = OptBoolean.TRUE) @Named("servicePort") @JsonProperty("port") Integer port, + @JacksonInject(useInput = OptBoolean.TRUE) @Named("tlsServicePort") @JsonProperty("tlsPort") Integer tlsPort, @JsonProperty("enablePlaintextPort") Boolean enablePlaintextPort, @JsonProperty("enableTlsPort") boolean enableTlsPort, @JsonProperty("labels") @Nullable Map labels diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index 72f50a87a570..41237e352997 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -35,10 +35,8 @@ import org.apache.druid.client.ServerInventoryView; import org.apache.druid.client.coordinator.Coordinator; import org.apache.druid.common.guava.FutureUtils; -import org.apache.druid.curator.discovery.ServiceAnnouncer; import org.apache.druid.discovery.DruidLeaderSelector; import org.apache.druid.guice.ManageLifecycle; -import org.apache.druid.guice.annotations.Self; import org.apache.druid.indexer.CompactionEngine; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.concurrent.ScheduledExecutorFactory; @@ -54,7 +52,6 @@ import org.apache.druid.rpc.indexing.SegmentUpdateResponse; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.CoordinatorSegmentMetadataCache; -import org.apache.druid.server.DruidNode; import org.apache.druid.server.compaction.CompactionRunSimulator; import org.apache.druid.server.compaction.CompactionSimulateResult; import org.apache.druid.server.compaction.CompactionStatusTracker; @@ -132,8 +129,6 @@ public class DruidCoordinator private final List dutiesRunnables = new ArrayList<>(); private final LoadQueueTaskMaster taskMaster; private final SegmentLoadQueueManager loadQueueManager; - private final ServiceAnnouncer serviceAnnouncer; - private final DruidNode self; private final CoordinatorCustomDutyGroups customDutyGroups; private final BalancerStrategyFactory balancerStrategyFactory; private final LookupCoordinatorManager lookupCoordinatorManager; @@ -183,8 +178,6 @@ public DruidCoordinator( OverlordClient overlordClient, LoadQueueTaskMaster taskMaster, SegmentLoadQueueManager loadQueueManager, - ServiceAnnouncer serviceAnnouncer, - @Self DruidNode self, CoordinatorCustomDutyGroups customDutyGroups, LookupCoordinatorManager lookupCoordinatorManager, @Coordinator DruidLeaderSelector coordLeaderSelector, @@ -202,8 +195,6 @@ public DruidCoordinator( this.emitter = emitter; this.overlordClient = overlordClient; this.taskMaster = taskMaster; - this.serviceAnnouncer = serviceAnnouncer; - this.self = self; this.customDutyGroups = customDutyGroups; this.executorFactory = scheduledExecutorFactory; @@ -449,7 +440,6 @@ private void becomeLeader() metadataManager.onLeaderStart(); taskMaster.onLeaderStart(); lookupCoordinatorManager.start(); - serviceAnnouncer.announce(self); if (coordinatorSegmentMetadataCache != null) { coordinatorSegmentMetadataCache.onLeaderStart(); } @@ -538,7 +528,6 @@ private void stopBeingLeader() taskMaster.onLeaderStop(); coordinatorDynamicConfigSyncer.onLeaderStop(); brokerDynamicConfigSyncer.onLeaderStop(); - serviceAnnouncer.unannounce(self); lookupCoordinatorManager.stop(); metadataManager.onLeaderStop(); stopAllDutyGroups(); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/InlineSchemaDataSourceCompactionConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/InlineSchemaDataSourceCompactionConfig.java index 55f73758b69a..23eccb3f4326 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/InlineSchemaDataSourceCompactionConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/InlineSchemaDataSourceCompactionConfig.java @@ -49,8 +49,8 @@ public static Builder builder() } /** - * The number of input segments is limited because the byte size of a serialized task spec is limited by - * org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig.maxZnodeBytes. + * The number of input segments is limited because the byte size of a serialized task spec is bounded by the + * maximum payload size accepted by the task runner. */ @Nullable private final Integer maxRowsPerSegment; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategy.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategy.java index e8b1b902dde6..53180fa9e3eb 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategy.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategy.java @@ -26,21 +26,22 @@ /** * A {@link BalancerStrategy} which normalizes the cost of placing a segment on a - * server as calculated by {@link CostBalancerStrategy} by multiplying it by the - * server's disk usage ratio. + * server as calculated by {@link CostBalancerStrategy} by dividing by the + * server's projected available disk headroom. *

- * normalizedCost = cost * usageRatio
- *     where usageRatio = diskUsed / totalDiskSpace
+ * normalizedCost = cost / max(EPSILON, 1 - projectedUsageRatio)
+ *     where projectedUsageRatio = (diskUsed + segmentSizeIfNotAlreadyProjected) / totalDiskSpace
  * 
- * This penalizes servers that are more full, driving disk utilization to equalize - * across the tier. When all servers have equal disk usage, the behavior is identical - * to {@link CostBalancerStrategy}. When historicals have different disk capacities, - * this naturally accounts for both fill level and total capacity. + * The denominator diverges as a server approaches full, so disk fullness has + * more weight over the placement decision when servers are nearly full, + * regardless of asymmetries in the locality cost. {@link #EPSILON} is a small + * numerical floor on the divisor to guard against division by zero (or by + * negative values during in-flight loads). *

- * To prevent oscillation when servers have similar utilization, any server that + * To prevent oscillation when servers have similar headroom, any server that * is already projected to hold the segment (the source on a move, or a currently * serving node on a drop) receives a cost discount equal to - * {@link #DEFAULT_MOVE_COST_SAVINGS_THRESHOLD}. A move therefore fires only when + * {@link DiskNormalizedCostBalancerStrategyConfig.DEFAULT_MOVE_COST_SAVINGS_THRESHOLD}. A move therefore fires only when * the destination saves at least this fraction of the source's cost. The default * is configurable via * {@code druid.coordinator.balancer.diskNormalized.moveCostSavingsThreshold}. @@ -48,18 +49,17 @@ public class DiskNormalizedCostBalancerStrategy extends CostBalancerStrategy { /** - * Default minimum fractional cost reduction required before a segment will - * be moved off a server that is already projected to hold it. A value of - * {@code 0.05} means the destination must be at least 5% cheaper than the - * source for the move to happen. + * Numerical floor on the headroom divisor to prevent division by zero or by + * negative values when {@code usageRatio >= 1.0} (possible for over-allocated + * servers or during in-flight loads). */ - static final double DEFAULT_MOVE_COST_SAVINGS_THRESHOLD = 0.05; + static final double EPSILON = 1e-6; private final double sourceCostMultiplier; public DiskNormalizedCostBalancerStrategy(ListeningExecutorService exec) { - this(exec, DEFAULT_MOVE_COST_SAVINGS_THRESHOLD); + this(exec, DiskNormalizedCostBalancerStrategyConfig.DEFAULT_MOVE_COST_SAVINGS_THRESHOLD); } public DiskNormalizedCostBalancerStrategy(ListeningExecutorService exec, double moveCostSavingsThreshold) @@ -85,19 +85,20 @@ protected double computePlacementCost( return cost; } - // Guard against NaN propagation in the cost comparator if a server - // somehow reports a non-positive maxSize. Such a server cannot hold - // anything and will be rejected by canLoadSegment, so returning the - // raw cost is safe. + // A server with non-positive maxSize cannot hold anything and will be + // rejected by canLoadSegment; return the raw cost to avoid NaN propagation. final long maxSize = server.getMaxSize(); if (maxSize <= 0) { return cost; } - double usageRatio = (double) server.getSizeUsed() / maxSize; - double normalizedCost = cost * usageRatio; + final boolean alreadyProjected = server.isProjectedSegment(proposalSegment); + final long projectedSizeUsed = server.getSizeUsed() + (alreadyProjected ? 0 : proposalSegment.getSize()); + final double usageRatio = (double) projectedSizeUsed / maxSize; + final double headroom = Math.max(EPSILON, 1.0 - usageRatio); + double normalizedCost = cost / headroom; - if (server.isProjectedSegment(proposalSegment)) { + if (alreadyProjected) { normalizedCost *= sourceCostMultiplier; } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategyConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategyConfig.java index 95680e7e7dd6..1219eddc8ab8 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategyConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategyConfig.java @@ -34,6 +34,14 @@ */ public class DiskNormalizedCostBalancerStrategyConfig { + /** + * Default minimum fractional cost reduction required before a segment will + * be moved off a server that is already projected to hold it. A value of + * {@code 0.05} means the destination must be at least 5% cheaper than the + * source for the move to happen. + */ + static final double DEFAULT_MOVE_COST_SAVINGS_THRESHOLD = 0.05; + /** * Minimum fractional cost reduction required to move a segment off a server * that is already projected to hold it. For example, a value of {@code 0.05} means the @@ -53,7 +61,7 @@ public DiskNormalizedCostBalancerStrategyConfig( @JsonProperty("moveCostSavingsThreshold") @Nullable Double moveCostSavingsThreshold ) { - this.moveCostSavingsThreshold = Configs.valueOrDefault(moveCostSavingsThreshold, DiskNormalizedCostBalancerStrategy.DEFAULT_MOVE_COST_SAVINGS_THRESHOLD); + this.moveCostSavingsThreshold = Configs.valueOrDefault(moveCostSavingsThreshold, DEFAULT_MOVE_COST_SAVINGS_THRESHOLD); Preconditions.checkArgument( this.moveCostSavingsThreshold >= 0.0 && this.moveCostSavingsThreshold < 1.0, diff --git a/server/src/test/java/org/apache/druid/client/cache/CaffeineCacheTest.java b/server/src/test/java/org/apache/druid/client/cache/CaffeineCacheTest.java index b34fcbdd830c..35cfc4ddf42a 100644 --- a/server/src/test/java/org/apache/druid/client/cache/CaffeineCacheTest.java +++ b/server/src/test/java/org/apache/druid/client/cache/CaffeineCacheTest.java @@ -191,22 +191,21 @@ public long getSizeInBytes() final Cache.NamedKey key2 = new Cache.NamedKey("the", s2); final CaffeineCache cache = CaffeineCache.create(config, Runnable::run); - Assert.assertNull(cache.get(key1)); - Assert.assertNull(cache.get(key2)); - - cache.put(key1, val1); - Assert.assertArrayEquals(val1, cache.get(key1)); - Assert.assertNull(cache.get(key2)); - Assert.assertEquals(0, cache.getCache().stats().evictionWeight()); - Assert.assertArrayEquals(val1, cache.get(key1)); - Assert.assertNull(cache.get(key2)); - + // Two entries with combined weight exceeding the 40-byte maximum. Caffeine 3's W-TinyLFU + // admission policy chooses which to keep based on frequency; we don't assert on identity, + // only that eviction happened and the cache shrank back under its bound. + cache.put(key1, val1); cache.put(key2, val2); - Assert.assertNull(cache.get(key1)); - Assert.assertArrayEquals(val2, cache.get(key2)); - Assert.assertEquals(34, cache.getCache().stats().evictionWeight()); + cache.getCache().cleanUp(); + + Assert.assertTrue( + "Expected eviction weight > 0 after exceeding max size, got " + + cache.getCache().stats().evictionWeight(), + cache.getCache().stats().evictionWeight() > 0 + ); + Assert.assertEquals(1, cache.getCache().asMap().size()); } @Test diff --git a/server/src/test/java/org/apache/druid/curator/discovery/LatchableServiceAnnouncer.java b/server/src/test/java/org/apache/druid/curator/discovery/LatchableServiceAnnouncer.java deleted file mode 100644 index 61e77ba2565d..000000000000 --- a/server/src/test/java/org/apache/druid/curator/discovery/LatchableServiceAnnouncer.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.curator.discovery; - -import org.apache.druid.server.DruidNode; - -import javax.annotation.Nullable; -import java.util.concurrent.CountDownLatch; - -/** - * A test service announcer that counts down the corresponding latches upon - * invocation of {@link #announce(DruidNode)} and {@link #unannounce(DruidNode)}. - */ -public class LatchableServiceAnnouncer implements ServiceAnnouncer -{ - private final CountDownLatch announceLatch; - private final CountDownLatch unannounceLatch; - - /** - * Creates a new {@link LatchableServiceAnnouncer} with the given countdown - * latches for announce and unannounce actions. - */ - public LatchableServiceAnnouncer( - @Nullable CountDownLatch announceLatch, - @Nullable CountDownLatch unannounceLatch - ) - { - this.announceLatch = announceLatch; - this.unannounceLatch = unannounceLatch; - } - - @Override - public void announce(DruidNode node) - { - if (announceLatch != null) { - announceLatch.countDown(); - } - } - - @Override - public void unannounce(DruidNode node) - { - if (unannounceLatch != null) { - unannounceLatch.countDown(); - } - } -} diff --git a/server/src/test/java/org/apache/druid/curator/discovery/ServerDiscoverySelectorTest.java b/server/src/test/java/org/apache/druid/curator/discovery/ServerDiscoverySelectorTest.java deleted file mode 100644 index 0d0180725d1f..000000000000 --- a/server/src/test/java/org/apache/druid/curator/discovery/ServerDiscoverySelectorTest.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.curator.discovery; - -import org.apache.curator.x.discovery.ServiceInstance; -import org.apache.curator.x.discovery.ServiceProvider; -import org.apache.druid.client.selector.Server; -import org.apache.druid.java.util.common.StringUtils; -import org.easymock.EasyMock; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.io.IOException; -import java.net.URI; - -public class ServerDiscoverySelectorTest -{ - - private ServiceProvider serviceProvider; - private ServerDiscoverySelector serverDiscoverySelector; - private ServiceInstance instance; - private static final int PORT = 8080; - private static final int SSL_PORT = 8280; - private static final String ADDRESS = "localhost"; - - @Before - public void setUp() - { - serviceProvider = EasyMock.createMock(ServiceProvider.class); - instance = EasyMock.createMock(ServiceInstance.class); - serverDiscoverySelector = new ServerDiscoverySelector(serviceProvider, "test"); - } - - @Test - public void testPick() throws Exception - { - EasyMock.expect(serviceProvider.getInstance()).andReturn(instance).anyTimes(); - EasyMock.expect(instance.getAddress()).andReturn(ADDRESS).anyTimes(); - EasyMock.expect(instance.getPort()).andReturn(PORT).anyTimes(); - EasyMock.expect(instance.getSslPort()).andReturn(-1).anyTimes(); - EasyMock.replay(instance, serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertEquals(PORT, server.getPort()); - Assert.assertEquals(ADDRESS, server.getAddress()); - Assert.assertTrue(server.getHost().contains(Integer.toString(PORT))); - Assert.assertTrue(server.getHost().contains(ADDRESS)); - Assert.assertEquals("http", server.getScheme()); - EasyMock.verify(instance, serviceProvider); - final URI uri = new URI( - server.getScheme(), - null, - server.getAddress(), - server.getPort(), - "/druid/indexer/v1/action", - null, - null - ); - Assert.assertEquals(PORT, uri.getPort()); - Assert.assertEquals(ADDRESS, uri.getHost()); - Assert.assertEquals("http", uri.getScheme()); - } - - @Test - public void testPickWithNullSslPort() throws Exception - { - EasyMock.expect(serviceProvider.getInstance()).andReturn(instance).anyTimes(); - EasyMock.expect(instance.getAddress()).andReturn(ADDRESS).anyTimes(); - EasyMock.expect(instance.getPort()).andReturn(PORT).anyTimes(); - EasyMock.expect(instance.getSslPort()).andReturn(null).anyTimes(); - EasyMock.replay(instance, serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertEquals(PORT, server.getPort()); - Assert.assertEquals(ADDRESS, server.getAddress()); - Assert.assertTrue(server.getHost().contains(Integer.toString(PORT))); - Assert.assertTrue(server.getHost().contains(ADDRESS)); - Assert.assertEquals("http", server.getScheme()); - EasyMock.verify(instance, serviceProvider); - final URI uri = new URI( - server.getScheme(), - null, - server.getAddress(), - server.getPort(), - "/druid/indexer/v1/action", - null, - null - ); - Assert.assertEquals(PORT, uri.getPort()); - Assert.assertEquals(ADDRESS, uri.getHost()); - Assert.assertEquals("http", uri.getScheme()); - } - - @Test - public void testPickWithSslPort() throws Exception - { - EasyMock.expect(serviceProvider.getInstance()).andReturn(instance).anyTimes(); - EasyMock.expect(instance.getAddress()).andReturn(ADDRESS).anyTimes(); - EasyMock.expect(instance.getPort()).andReturn(PORT).anyTimes(); - EasyMock.expect(instance.getSslPort()).andReturn(SSL_PORT).anyTimes(); - EasyMock.replay(instance, serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertEquals(SSL_PORT, server.getPort()); - Assert.assertEquals(ADDRESS, server.getAddress()); - Assert.assertTrue(server.getHost().contains(Integer.toString(SSL_PORT))); - Assert.assertTrue(server.getHost().contains(ADDRESS)); - Assert.assertEquals("https", server.getScheme()); - EasyMock.verify(instance, serviceProvider); - final URI uri = new URI( - server.getScheme(), - null, - server.getAddress(), - server.getPort(), - "/druid/indexer/v1/action", - null, - null - ); - Assert.assertEquals(SSL_PORT, uri.getPort()); - Assert.assertEquals(ADDRESS, uri.getHost()); - Assert.assertEquals("https", uri.getScheme()); - } - - @Test - public void testPickIPv6() throws Exception - { - final String address = "2001:0db8:0000:0000:0000:ff00:0042:8329"; - EasyMock.expect(serviceProvider.getInstance()).andReturn(instance).anyTimes(); - EasyMock.expect(instance.getAddress()).andReturn(address).anyTimes(); - EasyMock.expect(instance.getPort()).andReturn(PORT).anyTimes(); - EasyMock.expect(instance.getSslPort()).andReturn(-1).anyTimes(); - EasyMock.replay(instance, serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertEquals(PORT, server.getPort()); - Assert.assertEquals(address, server.getAddress()); - Assert.assertTrue(server.getHost().contains(Integer.toString(PORT))); - Assert.assertTrue(server.getHost().contains(address)); - Assert.assertEquals("http", server.getScheme()); - EasyMock.verify(instance, serviceProvider); - final URI uri = new URI( - server.getScheme(), - null, - server.getAddress(), - server.getPort(), - "/druid/indexer/v1/action", - null, - null - ); - Assert.assertEquals(PORT, uri.getPort()); - Assert.assertEquals(StringUtils.format("[%s]", address), uri.getHost()); - Assert.assertEquals("http", uri.getScheme()); - } - - - @Test - public void testPickIPv6Bracket() throws Exception - { - final String address = "[2001:0db8:0000:0000:0000:ff00:0042:8329]"; - EasyMock.expect(serviceProvider.getInstance()).andReturn(instance).anyTimes(); - EasyMock.expect(instance.getAddress()).andReturn(address).anyTimes(); - EasyMock.expect(instance.getPort()).andReturn(PORT).anyTimes(); - EasyMock.expect(instance.getSslPort()).andReturn(-1).anyTimes(); - EasyMock.replay(instance, serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertEquals(PORT, server.getPort()); - Assert.assertEquals(address, server.getAddress()); - Assert.assertTrue(server.getHost().contains(Integer.toString(PORT))); - Assert.assertTrue(server.getHost().contains(address)); - Assert.assertEquals("http", server.getScheme()); - EasyMock.verify(instance, serviceProvider); - final URI uri = new URI( - server.getScheme(), - null, - server.getAddress(), - server.getPort(), - "/druid/indexer/v1/action", - null, - null - ); - Assert.assertEquals(PORT, uri.getPort()); - Assert.assertEquals(address, uri.getHost()); - Assert.assertEquals("http", uri.getScheme()); - } - - @Test - public void testPickWithNullInstance() throws Exception - { - EasyMock.expect(serviceProvider.getInstance()).andReturn(null).anyTimes(); - EasyMock.replay(serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertNull(server); - EasyMock.verify(serviceProvider); - } - - @Test - public void testPickWithException() throws Exception - { - EasyMock.expect(serviceProvider.getInstance()).andThrow(new Exception()).anyTimes(); - EasyMock.replay(serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertNull(server); - EasyMock.verify(serviceProvider); - } - - @Test - public void testStart() throws Exception - { - serviceProvider.start(); - EasyMock.replay(serviceProvider); - serverDiscoverySelector.start(); - EasyMock.verify(serviceProvider); - } - - @Test - public void testStop() throws IOException - { - serviceProvider.close(); - EasyMock.replay(serviceProvider); - serverDiscoverySelector.stop(); - EasyMock.verify(serviceProvider); - } -} diff --git a/server/src/test/java/org/apache/druid/curator/discovery/ServiceAnnouncerTest.java b/server/src/test/java/org/apache/druid/curator/discovery/ServiceAnnouncerTest.java deleted file mode 100644 index f80f35bdca83..000000000000 --- a/server/src/test/java/org/apache/druid/curator/discovery/ServiceAnnouncerTest.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.curator.discovery; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterators; -import org.apache.curator.x.discovery.ServiceDiscovery; -import org.apache.curator.x.discovery.ServiceDiscoveryBuilder; -import org.apache.curator.x.discovery.ServiceInstance; -import org.apache.druid.curator.CuratorTestBase; -import org.apache.druid.java.util.common.ISE; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.List; - -public class ServiceAnnouncerTest extends CuratorTestBase -{ - @Before - public void setUp() throws Exception - { - setupServerAndCurator(); - } - - @Test - public void testServiceAnnouncement() throws Exception - { - curator.start(); - curator.blockUntilConnected(); - List serviceNames = ImmutableList.of( - "druid/overlord", - "druid/coordinator" - ); - final ServiceDiscovery serviceDiscovery = createAndAnnounceServices(serviceNames); - Assert.assertTrue( - Iterators.all( - serviceNames.iterator(), - input -> { - try { - return serviceDiscovery.queryForInstances(input.replace('/', ':')).size() == 1; - } - catch (Exception e) { - throw new ISE( - "Something went wrong while finding instance with name [%s] in Service Discovery", - input - ); - } - } - ) - ); - } - - @Test (expected = IllegalArgumentException.class) - public void testServiceAnnouncementFail() throws Exception - { - curator.start(); - curator.blockUntilConnected(); - createAndAnnounceServices(ImmutableList.of("placeholder/\u0001")); - } - - private ServiceDiscovery createAndAnnounceServices(List serviceNames) throws Exception - { - int port = 1000; - ServiceDiscovery serviceDiscovery = - ServiceDiscoveryBuilder.builder(Void.class) - .basePath("/test") - .client(curator) - .build(); - for (String serviceName : serviceNames) { - String serviceNameToUse = CuratorServiceUtils.makeCanonicalServiceName(serviceName); - ServiceInstance instance = ServiceInstance.builder() - .name(serviceNameToUse) - .address("localhost") - .port(port++) - .build(); - serviceDiscovery.registerService(instance); - } - return serviceDiscovery; - } - - @After - public void tearDown() - { - tearDownServerAndCurator(); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/loading/NoopSegmentCacheManager.java b/server/src/test/java/org/apache/druid/segment/loading/NoopSegmentCacheManager.java index ae118afa7bba..1fed47d38ea6 100644 --- a/server/src/test/java/org/apache/druid/segment/loading/NoopSegmentCacheManager.java +++ b/server/src/test/java/org/apache/druid/segment/loading/NoopSegmentCacheManager.java @@ -121,6 +121,12 @@ public void shutdown() @Override public List getLocations() { - return List.of(); + throw new UnsupportedOperationException(); + } + + @Override + public StorageLoadingThreadPool getLoadingThreadPool() + { + throw new UnsupportedOperationException(); } } diff --git a/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifierTest.java b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifierTest.java new file mode 100644 index 000000000000..831b03e76fa6 --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifierTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.timeline.SegmentId; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class PartialSegmentBundleCacheEntryIdentifierTest +{ + @Test + void testEqualsAndHashCode() + { + EqualsVerifier.forClass(PartialSegmentBundleCacheEntryIdentifier.class).usingGetClass().verify(); + } + + @Test + void testNotEqualToSegmentCacheEntryIdentifierWithSameSegmentId() + { + final SegmentId segmentId = SegmentId.of("ds", Intervals.of("2025/2026"), "v1", 0); + final PartialSegmentBundleCacheEntryIdentifier bundle = new PartialSegmentBundleCacheEntryIdentifier( + segmentId, + "__base" + ); + final SegmentCacheEntryIdentifier segment = new SegmentCacheEntryIdentifier(segmentId); + Assertions.assertNotEquals(bundle, segment); + Assertions.assertNotEquals(segment, bundle); + } +} diff --git a/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryTest.java b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryTest.java new file mode 100644 index 000000000000..e794c3abcd65 --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryTest.java @@ -0,0 +1,832 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.io.Files; +import com.google.common.primitives.Ints; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.ListBasedInputRow; +import org.apache.druid.data.input.impl.AggregateProjectionSpec; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.LongDimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.FileUtils; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.concurrent.Execs; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.file.DirectoryBackedRangeReader; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.segment.file.SegmentFileBuilderV10; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import org.apache.druid.timeline.SegmentId; +import org.joda.time.DateTime; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +class PartialSegmentBundleCacheEntryTest +{ + private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper(); + private static final SegmentId SEGMENT_ID = SegmentId.of("test", Intervals.of("2025/2026"), "v1", 0); + private static final String AGG_BUNDLE = "dim1_metric1_sum"; + private static final long ESTIMATE = 16 * 1024 * 1024L; + + private static final DateTime TIME = DateTimes.of("2025-01-01"); + private static final RowSignature ROW_SIGNATURE = RowSignature.builder() + .add("dim1", ColumnType.STRING) + .add("metric1", ColumnType.LONG) + .build(); + + private static final List PROJECTIONS = Collections.singletonList( + AggregateProjectionSpec.builder(AGG_BUNDLE) + .groupingColumns(new StringDimensionSchema("dim1")) + .aggregators( + new LongSumAggregatorFactory("_metric1_sum", "metric1"), + new CountAggregatorFactory("_count") + ) + .build() + ); + + private static final List ROWS = Arrays.asList( + new ListBasedInputRow(ROW_SIGNATURE, TIME, ROW_SIGNATURE.getColumnNames(), Arrays.asList("a", 1L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(1), ROW_SIGNATURE.getColumnNames(), Arrays.asList("a", 2L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(2), ROW_SIGNATURE.getColumnNames(), Arrays.asList("b", 3L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(3), ROW_SIGNATURE.getColumnNames(), Arrays.asList("b", 4L)) + ); + + @TempDir + static File sharedTempDir; + + private static File segmentDir; + + @TempDir + File perTestTempDir; + + private File cacheDir; + private File deepStorageDir; + + @BeforeAll + static void buildSegment() + { + final File tmp = new File(sharedTempDir, "build_" + ThreadLocalRandom.current().nextInt()); + segmentDir = IndexBuilder.create() + .useV10() + .tmpDir(tmp) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + IncrementalIndexSchema.builder() + .withDimensionsSpec( + DimensionsSpec.builder() + .setDimensions( + List.of( + new StringDimensionSchema("dim1"), + new LongDimensionSchema("metric1") + ) + ) + .build() + ) + .withRollup(false) + .withMinTimestamp(TIME.getMillis()) + .withProjections(PROJECTIONS) + .build() + ) + .indexSpec(IndexSpec.builder().withMetadataCompression(CompressionStrategy.NONE).build()) + .rows(ROWS) + .buildMMappedIndexFile(); + } + + @BeforeEach + void setup() throws IOException + { + deepStorageDir = segmentDir; + cacheDir = new File(perTestTempDir, "cache_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(cacheDir); + } + + @Test + void testForBundleDerivesContainerIndicesAndSize() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertFalse(baseEntry.getContainerRefs().isEmpty()); + Assertions.assertTrue(baseEntry.getSize() > 0); + Assertions.assertEquals(SEGMENT_ID, baseEntry.getSegmentId()); + Assertions.assertEquals(Projections.BASE_TABLE_PROJECTION_NAME, baseEntry.getBundleName()); + } + + @Test + void testForBundleFailsIfMetadataNotMounted() + { + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertThrows( + DruidException.class, + () -> PartialSegmentBundleCacheEntry.forBundle(metadata, Projections.BASE_TABLE_PROJECTION_NAME, List.of()) + ); + } + + @Test + void testForBundleFailsIfBundleUnknown() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + Assertions.assertThrows( + DruidException.class, + () -> PartialSegmentBundleCacheEntry.forBundle(metadata, "no_such_bundle", List.of()) + ); + } + + @Test + void testMountSparseAllocatesContainerFiles() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + baseEntry.mount(location); + + Assertions.assertTrue(baseEntry.isMounted()); + for (PartialSegmentBundleCacheEntry.BundleContainerRef ref : baseEntry.getContainerRefs()) { + final String mapperFilename = + ref.externalFilename() != null ? ref.externalFilename() : IndexIO.V10_FILE_NAME; + final File containerFile = new File( + cacheDir, + StringUtils.format("%s.container.%05d", mapperFilename, ref.containerIndex()) + ); + Assertions.assertTrue(containerFile.exists(), "container " + ref + " should be sparse-allocated"); + } + } + + @Test + void testMountAcquiresParentHoldsForAggregateBundle() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + final var baseHold = location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry); + Assertions.assertNotNull(baseHold); + baseEntry.mount(location); + // close the bootstrap hold so cache could in principle evict, but the aggregate's transitive hold should keep it + baseHold.close(); + + final PartialSegmentBundleCacheEntry aggEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(baseEntry.getId()) + ); + final var aggHold = location.addWeakReservationHold(aggEntry.getId(), () -> aggEntry); + Assertions.assertNotNull(aggHold); + aggEntry.mount(location); + + // base must still be held by the aggregate's transitive hold; trying to reclaim its bytes should fail + final long baseSize = baseEntry.getSize(); + Assertions.assertTrue( + location.currentSizeBytes() >= baseSize, + "base entry size should remain charged to the location while held by the aggregate" + ); + + // unmounting the aggregate releases the parent hold; base is then evictable + aggHold.close(); + aggEntry.unmount(); + } + + @Test + void testMountFailsIfMetadataNotRegisteredWithLocation() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + // mount metadata standalone without registering with the location, so no hold can be acquired below + final File anotherDir = new File(perTestTempDir, "adhoc"); + FileUtils.mkdirp(anotherDir); + final StorageLocation otherLocation = new StorageLocation(anotherDir, ESTIMATE * 8, null); + Assertions.assertTrue(otherLocation.reserve(metadata)); + metadata.mount(otherLocation); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + + Assertions.assertThrows(DruidException.class, () -> baseEntry.mount(location)); + Assertions.assertFalse(baseEntry.isMounted()); + } + + @Test + void testMountIsIdempotent() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + + baseEntry.mount(location); + Assertions.assertTrue(baseEntry.isMounted()); + baseEntry.mount(location); + Assertions.assertTrue(baseEntry.isMounted()); + } + + @Test + void testUnmountEvictsContainersAndAllowsRemount() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + baseEntry.mount(location); + + // download a file so the bitmap has something set, then verify unmount clears the container file + final PartialSegmentFileMapperV10 mapper = metadata.getFileMapper(); + Assertions.assertNotNull(mapper); + final String anyFile = mapper.getInternalFilenames().stream().findFirst().orElseThrow(); + Assertions.assertNotNull(mapper.mapFile(anyFile)); + Assertions.assertEquals(1, mapper.getDownloadedFiles().size()); + final PartialSegmentBundleCacheEntry.BundleContainerRef evictedRef = baseEntry.getContainerRefs().getFirst(); + final String evictedMapperFilename = + evictedRef.externalFilename() != null ? evictedRef.externalFilename() : IndexIO.V10_FILE_NAME; + final File evictedFile = new File( + cacheDir, + StringUtils.format("%s.container.%05d", evictedMapperFilename, evictedRef.containerIndex()) + ); + Assertions.assertTrue(evictedFile.exists()); + + baseEntry.unmount(); + + Assertions.assertFalse(baseEntry.isMounted()); + Assertions.assertFalse(evictedFile.exists(), "container file should be deleted on unmount"); + + // remount works (e.g. after cache eviction + re-acquire) + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + baseEntry.mount(location); + Assertions.assertTrue(baseEntry.isMounted()); + } + + @Test + void testConcurrentMountIsDeduplicated() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + + final int threads = 8; + final CountDownLatch start = new CountDownLatch(1); + final CountDownLatch done = new CountDownLatch(threads); + final AtomicInteger errors = new AtomicInteger(); + final ExecutorService exec = Execs.multiThreaded(threads, "partial-segment-tests-%d"); + try { + for (int i = 0; i < threads; i++) { + exec.submit(() -> { + try { + start.await(); + baseEntry.mount(location); + } + catch (Throwable t) { + errors.incrementAndGet(); + } + finally { + done.countDown(); + } + }); + } + start.countDown(); + Assertions.assertTrue(done.await(30, TimeUnit.SECONDS)); + Assertions.assertEquals(0, errors.get()); + Assertions.assertTrue(baseEntry.isMounted()); + } + finally { + exec.shutdownNow(); + } + } + + @Test + void testFailedMountClearsGateForRetry() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + // ask for an aggregate entry but pass a parent that was never registered -> mount should fail + final PartialSegmentBundleCacheEntry agg = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(new PartialSegmentBundleCacheEntryIdentifier(SEGMENT_ID, "ghost")) + ); + Assertions.assertNotNull(location.addWeakReservationHold(agg.getId(), () -> agg)); + Assertions.assertThrows(DruidException.class, () -> agg.mount(location)); + Assertions.assertFalse(agg.isMounted()); + + // a subsequent retry with a valid parent should succeed (gate must have been cleared) + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + baseEntry.mount(location); + + final PartialSegmentBundleCacheEntry retry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(baseEntry.getId()) + ); + Assertions.assertNotNull(location.addWeakReservationHold(retry.getId(), () -> retry)); + retry.mount(location); + Assertions.assertTrue(retry.isMounted()); + } + + @Test + void testUnmountDefersContainerEvictionWhileReferenceHeld() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(base.getId(), () -> base)); + base.mount(location); + + // download a file so its container is materialized on disk + final PartialSegmentFileMapperV10 mapper = metadata.getFileMapper(); + Assertions.assertNotNull(mapper); + final String anyFile = mapper.getInternalFilenames().stream().findFirst().orElseThrow(); + Assertions.assertNotNull(mapper.mapFile(anyFile)); + final PartialSegmentBundleCacheEntry.BundleContainerRef containerRef = base.getContainerRefs().getFirst(); + final String mapperFilename = + containerRef.externalFilename() != null ? containerRef.externalFilename() : IndexIO.V10_FILE_NAME; + final File containerFile = new File( + cacheDir, + StringUtils.format("%s.container.%05d", mapperFilename, containerRef.containerIndex()) + ); + Assertions.assertTrue(containerFile.exists()); + + final Closeable ref = base.acquireReference(); + base.unmount(); + Assertions.assertTrue( + containerFile.exists(), + "container file should persist while a reference is held, even after unmount" + ); + Assertions.assertTrue(base.isMounted(), "bundle should not have been cleaned up while reference is held"); + + ref.close(); + Assertions.assertFalse(containerFile.exists(), "container file should be deleted after last reference releases"); + Assertions.assertFalse(base.isMounted()); + } + + @Test + void testForBundleAcceptsBundleNameContainingSlash() throws IOException + { + // Bundle names are matched by exact equality against the container's explicit bundle field, so names with '/' + // are unambiguous. forBundle should accept them as long as a container exists with that exact bundle name. + // Build a V10 segment with a slashy bundle name and verify the cache layer attributes containers correctly. + final File deepDir = writeSlashyGroupSegment("nested/group"); + final File cache = new File(perTestTempDir, "slashy_cache_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(cache); + final StorageLocation location = new StorageLocation(cache, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cache, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(deepDir), + JSON_MAPPER, + ESTIMATE + ); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry bundle = PartialSegmentBundleCacheEntry.forBundle( + metadata, + "nested/group", + List.of() + ); + Assertions.assertEquals("nested/group", bundle.getBundleName()); + Assertions.assertFalse(bundle.getContainerRefs().isEmpty()); + } + + @Test + void testForBundleSpansMainAndExternalContainers() throws IOException + { + // Bundle "proj1" lives in BOTH the main file and an external file. forBundle should pick up containers from + // both via the explicit bundle field, producing a single logical bundle spanning multiple physical files. + final String externalName = "ext.segment"; + final File deepDir = new File(perTestTempDir, "multi_deep_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(deepDir); + try (SegmentFileBuilderV10 builder = + SegmentFileBuilderV10.create(JSON_MAPPER, deepDir)) { + // Attach the external builder BEFORE startFileBundle so the group propagates to it. + final org.apache.druid.segment.file.SegmentFileBuilder external = builder.getExternalBuilder(externalName); + builder.startFileBundle("proj1"); + + final File mainTmp = new File(perTestTempDir, "main-col.bin"); + Files.write(Ints.toByteArray(1), mainTmp); + builder.add("proj1/main_col", mainTmp); + + final File extTmp = new File(perTestTempDir, "ext-col.bin"); + Files.write(Ints.toByteArray(2), extTmp); + external.add("proj1/ext_col", extTmp); + } + + final File cache = new File(perTestTempDir, "multi_cache_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(cache); + final StorageLocation location = new StorageLocation(cache, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cache, + IndexIO.V10_FILE_NAME, + List.of(externalName), + new DirectoryBackedRangeReader(deepDir), + JSON_MAPPER, + ESTIMATE + ); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry bundle = + PartialSegmentBundleCacheEntry.forBundle(metadata, "proj1", List.of()); + // Expect exactly two refs: one in main (externalFilename == null), one in external. + Assertions.assertEquals(2, bundle.getContainerRefs().size()); + final long mainRefCount = bundle.getContainerRefs().stream() + .filter(r -> r.externalFilename() == null).count(); + final long extRefCount = bundle.getContainerRefs().stream() + .filter(r -> externalName.equals(r.externalFilename())).count(); + Assertions.assertEquals(1, mainRefCount, "expected one main-file container ref"); + Assertions.assertEquals(1, extRefCount, "expected one external-file container ref"); + + // Mount the bundle and verify both containers are sparse-allocated under their respective targetFilenames. + Assertions.assertNotNull(location.addWeakReservationHold(bundle.getId(), () -> bundle)); + bundle.mount(location); + for (PartialSegmentBundleCacheEntry.BundleContainerRef ref : bundle.getContainerRefs()) { + final String mf = ref.externalFilename() != null ? ref.externalFilename() : IndexIO.V10_FILE_NAME; + final File cf = new File(cache, StringUtils.format("%s.container.%05d", mf, ref.containerIndex())); + Assertions.assertTrue(cf.exists(), "expected container file " + cf); + } + } + + @Test + void testForBundleRootOwnsAllUngroupedContainers() throws IOException + { + // A V10 segment written without any startFileBundle calls produces containers tagged with ROOT_BUNDLE_NAME. + // forBundle(ROOT_BUNDLE_NAME) must own every such container. + final File deepDir = writeRootOnlySegment(); + final File cache = new File(perTestTempDir, "root_cache_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(cache); + final StorageLocation location = new StorageLocation(cache, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cache, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(deepDir), + JSON_MAPPER, + ESTIMATE + ); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry root = PartialSegmentBundleCacheEntry.forBundle( + metadata, + SegmentFileBuilder.ROOT_BUNDLE_NAME, + List.of() + ); + Assertions.assertEquals( + metadata.getSegmentFileMetadata().getContainers().size(), + root.getContainerRefs().size(), + "root bundle should own every container in a no-startFileBundle segment" + ); + } + + @Test + void testMountRollsBackIfEntryNoLongerWeakReservedAtLocation() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + // ephemeral mode: when a hold drops and no others remain, the weak entry is evicted from weakCacheEntries + // immediately. Lets us simulate the race where the bundle's reservation goes away before mount() finishes. + location.setAreWeakEntriesEphemeral(true); + + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + + // Reserve + immediately release the bootstrap hold. In ephemeral mode this evicts the bundle from + // weakCacheEntries. The bundle itself was never mounted, so its `mounted` flag is still false. + try (StorageLocation.ReservationHold hold = location.addWeakReservationHold(base.getId(), () -> base)) { + Assertions.assertNotNull(hold); + } + Assertions.assertFalse(location.isWeakReserved(base.getId()), "ephemeral release should have evicted"); + + // Mount without re-reserving. doMount's work succeeds (parents are fine, containers sparse-allocate), but the + // post-mount check must detect that this entry is no longer in the location's weak map and roll back. + base.mount(location); + Assertions.assertFalse( + base.isMounted(), + "post-mount check should roll back when entry was evicted from the location during mount" + ); + } + + @Test + void testAcquireReferenceBeforeMountThrows() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertThrows(DruidException.class, base::acquireReference); + } + + @Test + void testAggregateBundleHoldsReferenceOnBaseAndMetadata() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(base.getId(), () -> base)); + base.mount(location); + + final PartialSegmentBundleCacheEntry agg = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(base.getId()) + ); + Assertions.assertNotNull(location.addWeakReservationHold(agg.getId(), () -> agg)); + agg.mount(location); + + // Unmount metadata while bundles are still mounted (and holding references on it). Metadata's actual cleanup + // must defer until the bundles' references on metadata are released. + final File headerFile = new File( + cacheDir, + IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + ); + Assertions.assertTrue(headerFile.exists()); + metadata.unmount(); + Assertions.assertTrue(headerFile.exists(), "metadata header must persist while bundles reference it"); + Assertions.assertTrue(metadata.isMounted(), "metadata must stay mounted while bundles reference it"); + + // Unmount base. Cleanup is deferred because agg still references it. + base.unmount(); + Assertions.assertTrue(base.isMounted(), "base must stay mounted while agg references it"); + Assertions.assertTrue(headerFile.exists(), "metadata still alive via agg's indirect chain"); + + // Unmount agg. Its cleanup fires (releasing references on base + metadata), which cascades base's cleanup + // (releasing its remaining reference on metadata), which finally fires metadata's cleanup. + agg.unmount(); + Assertions.assertFalse(agg.isMounted()); + Assertions.assertFalse(base.isMounted()); + Assertions.assertFalse(metadata.isMounted()); + Assertions.assertFalse(headerFile.exists(), "metadata header deleted after full cascade"); + } + + @Test + void testMountedBundleIsLinkedToMetadata() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + Assertions.assertTrue(metadata.snapshotLinkedBundles().isEmpty()); + + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(base.getId(), () -> base)); + base.mount(location); + + Assertions.assertEquals(1, metadata.snapshotLinkedBundles().size()); + Assertions.assertTrue(metadata.snapshotLinkedBundles().contains(base)); + + final PartialSegmentBundleCacheEntry agg = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(base.getId()) + ); + Assertions.assertNotNull(location.addWeakReservationHold(agg.getId(), () -> agg)); + agg.mount(location); + + Assertions.assertEquals(2, metadata.snapshotLinkedBundles().size()); + + // Unmounting base while agg still holds a dependency reference on it must NOT actually clean up base. agg's + // ref keeps base alive (deferred cleanup). Both remain in the linked set. + base.unmount(); + Assertions.assertTrue( + metadata.snapshotLinkedBundles().contains(base), + "base must stay linked while agg holds its reference" + ); + Assertions.assertTrue(metadata.snapshotLinkedBundles().contains(agg)); + + // Unmounting agg releases its reference on base; base's deferred cleanup then cascades on the same thread. + agg.unmount(); + Assertions.assertTrue(metadata.snapshotLinkedBundles().isEmpty()); + } + + @Test + void testFailedMountDoesNotLeaveDanglingLink() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry agg = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(new PartialSegmentBundleCacheEntryIdentifier(SEGMENT_ID, "ghost")) + ); + Assertions.assertNotNull(location.addWeakReservationHold(agg.getId(), () -> agg)); + Assertions.assertThrows(DruidException.class, () -> agg.mount(location)); + + Assertions.assertTrue(metadata.snapshotLinkedBundles().isEmpty(), "failed mount must not register a link"); + } + + @Test + void testUnmountIsIdempotent() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + baseEntry.mount(location); + baseEntry.unmount(); + baseEntry.unmount(); // no-op + } + + private PartialSegmentMetadataCacheEntry newMetadataEntry() + { + return new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + ESTIMATE + ); + } + + /** + * Build a small V10 segment using {@link SegmentFileBuilderV10} directly (i.e., + * without going through IndexMergerV10) with a single bundle whose name contains a {@code /}. Used to verify the + * cache layer attributes containers via the explicit {@code bundle} field and tolerates slashy names. + */ + private File writeSlashyGroupSegment(String groupName) throws IOException + { + final File deepDir = new File(perTestTempDir, "slashy_deep_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(deepDir); + try (SegmentFileBuilderV10 builder = + SegmentFileBuilderV10.create(JSON_MAPPER, deepDir)) { + builder.startFileBundle(groupName); + for (int i = 0; i < 3; i++) { + final File tmp = new File(perTestTempDir, "slashy-col" + i + ".bin"); + Files.write(Ints.toByteArray(i), tmp); + builder.add(groupName + "/col" + i, tmp); + } + } + return deepDir; + } + + /** + * Build a small V10 segment using {@link SegmentFileBuilderV10} directly with NO + * {@code startFileBundle} calls. Containers default to {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}, simulating an + * older unnamed segment whose containers all live under the root bundle. + */ + private File writeRootOnlySegment() throws IOException + { + final File deepDir = new File(perTestTempDir, "root_deep_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(deepDir); + try (SegmentFileBuilderV10 builder = + SegmentFileBuilderV10.create(JSON_MAPPER, deepDir)) { + // Never call startFileBundle; all writes default to the root bundle. + for (int i = 0; i < 3; i++) { + final File tmp = new File(perTestTempDir, "root-col" + i + ".bin"); + Files.write(Ints.toByteArray(i), tmp); + builder.add("col" + i, tmp); + } + } + return deepDir; + } + +} diff --git a/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrapTest.java b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrapTest.java new file mode 100644 index 000000000000..93776d6a6cd5 --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrapTest.java @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.ListBasedInputRow; +import org.apache.druid.data.input.impl.AggregateProjectionSpec; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.LongDimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.FileUtils; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.file.DirectoryBackedRangeReader; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import org.apache.druid.timeline.SegmentId; +import org.joda.time.DateTime; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.Collectors; + +class PartialSegmentCacheBootstrapTest +{ + private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper(); + private static final SegmentId SEGMENT_ID = SegmentId.of("test", Intervals.of("2025/2026"), "v1", 0); + private static final String AGG_BUNDLE = "dim1_metric1_sum"; + private static final long ESTIMATE = 16 * 1024 * 1024L; + + private static final DateTime TIME = DateTimes.of("2025-01-01"); + private static final RowSignature ROW_SIGNATURE = RowSignature.builder() + .add("dim1", ColumnType.STRING) + .add("metric1", ColumnType.LONG) + .build(); + + private static final List PROJECTIONS = Collections.singletonList( + AggregateProjectionSpec.builder(AGG_BUNDLE) + .groupingColumns(new StringDimensionSchema("dim1")) + .aggregators( + new LongSumAggregatorFactory("_metric1_sum", "metric1"), + new CountAggregatorFactory("_count") + ) + .build() + ); + + private static final List ROWS = Arrays.asList( + new ListBasedInputRow(ROW_SIGNATURE, TIME, ROW_SIGNATURE.getColumnNames(), Arrays.asList("a", 1L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(1), ROW_SIGNATURE.getColumnNames(), Arrays.asList("a", 2L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(2), ROW_SIGNATURE.getColumnNames(), Arrays.asList("b", 3L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(3), ROW_SIGNATURE.getColumnNames(), Arrays.asList("b", 4L)) + ); + + @TempDir + static File sharedTempDir; + + private static File deepStorageDir; + + @TempDir + File perTestTempDir; + + private File cacheDir; + + @BeforeAll + static void buildSegment() + { + final File tmp = new File(sharedTempDir, "build_" + ThreadLocalRandom.current().nextInt()); + deepStorageDir = IndexBuilder.create() + .useV10() + .tmpDir(tmp) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + IncrementalIndexSchema.builder() + .withDimensionsSpec( + DimensionsSpec.builder() + .setDimensions( + List.of( + new StringDimensionSchema("dim1"), + new LongDimensionSchema("metric1") + ) + ) + .build() + ) + .withRollup(false) + .withMinTimestamp(TIME.getMillis()) + .withProjections(PROJECTIONS) + .build() + ) + .indexSpec(IndexSpec.builder().withMetadataCompression(CompressionStrategy.NONE).build()) + .rows(ROWS) + .buildMMappedIndexFile(); + } + + @BeforeEach + void setupPerTest() throws IOException + { + cacheDir = new File(perTestTempDir, "cache_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(cacheDir); + } + + @Test + void testRestoreRebuildsBothEntriesFromDisk() throws IOException + { + primeOnDiskState(); + + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentCacheBootstrap.RestoreResult result = PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ); + + Assertions.assertNotNull(result.getMetadata()); + Assertions.assertTrue(result.getMetadata().isMounted()); + // metadata entry size matches on-disk header size, NOT a pessimistic estimate + final long headerSize = new File( + cacheDir, + IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + ).length(); + Assertions.assertEquals(headerSize, result.getMetadata().getSize()); + + // bundles: should have at least __base; aggregate may or may not exist on disk depending on what we primed + Assertions.assertFalse(result.getBundles().isEmpty()); + final Set bundleNames = result.getBundles().stream() + .map(PartialSegmentBundleCacheEntry::getBundleName) + .collect(Collectors.toSet()); + Assertions.assertTrue(bundleNames.contains(Projections.BASE_TABLE_PROJECTION_NAME)); + Assertions.assertTrue(bundleNames.contains(AGG_BUNDLE)); + for (PartialSegmentBundleCacheEntry bundle : result.getBundles()) { + Assertions.assertTrue(bundle.isMounted(), "bundle " + bundle.getBundleName() + " should be mounted"); + } + } + + @Test + void testRestoreEstablishesParentHoldOnBase() throws IOException + { + primeOnDiskState(); + + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentCacheBootstrap.RestoreResult result = PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ); + + // aggregate bundle should declare __base as its parent + final PartialSegmentBundleCacheEntry agg = result.getBundles().stream() + .filter(b -> AGG_BUNDLE.equals(b.getBundleName())) + .findFirst().orElseThrow(); + Assertions.assertEquals(1, agg.getParentEntryIds().size()); + Assertions.assertEquals( + Projections.BASE_TABLE_PROJECTION_NAME, + agg.getParentEntryIds().get(0).bundleName() + ); + } + + @Test + void testRestoreSkipsBundlesWithMissingContainers() throws IOException + { + primeOnDiskState(); + + // remove the aggregate bundle's container file(s), base's containers stay + final PartialSegmentFileMapperV10 introspect = PartialSegmentFileMapperV10.create( + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of() + ); + final List aggContainers = new ArrayList<>(); + final String prefix = AGG_BUNDLE + "/"; + for (var entry : introspect.getSegmentFileMetadata().getFiles().entrySet()) { + if (entry.getKey().startsWith(prefix)) { + aggContainers.add(entry.getValue().getContainer()); + } + } + introspect.close(); + for (Integer ci : aggContainers) { + final File cf = new File(cacheDir, StringUtils.format("%s.container.%05d", IndexIO.V10_FILE_NAME, ci)); + // only delete if no other bundle shares this container + if (cf.exists()) { + cf.delete(); + } + } + + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentCacheBootstrap.RestoreResult result = PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ); + + // base should still be restored; aggregate is skipped because its containers were removed + final Set bundleNames = result.getBundles().stream() + .map(PartialSegmentBundleCacheEntry::getBundleName) + .collect(Collectors.toSet()); + Assertions.assertTrue(bundleNames.contains(Projections.BASE_TABLE_PROJECTION_NAME)); + // Note: base and aggregate may legitimately share a container (small test segment with sub-cap data), in that + // case both end up restored. Don't assert absence of aggregate; assert presence of base. + } + + @Test + void testRestoreDeletesOrphanedBundleAndSkipsIt() throws IOException + { + primeOnDiskState(); + + // Remove base's container files. After this, base is unrestorable on disk, which makes the aggregate (which + // depends on base) an orphan that must be deleted rather than restored in a degenerate state. + final PartialSegmentFileMapperV10 introspect = PartialSegmentFileMapperV10.create( + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of() + ); + final Set baseContainers = new HashSet<>(); + final Set aggContainers = new HashSet<>(); + for (var entry : introspect.getSegmentFileMetadata().getFiles().entrySet()) { + final String fileName = entry.getKey(); + final int slash = fileName.indexOf('/'); + if (slash < 0) { + continue; + } + final String group = fileName.substring(0, slash); + if (Projections.BASE_TABLE_PROJECTION_NAME.equals(group)) { + baseContainers.add(entry.getValue().getContainer()); + } else if (AGG_BUNDLE.equals(group)) { + aggContainers.add(entry.getValue().getContainer()); + } + } + introspect.close(); + + // Only delete base containers; we want to verify the bootstrap deletes the aggregate's containers itself. + for (Integer ci : baseContainers) { + // Skip containers that base shares with aggregate (test segment is small enough that they may share). + if (aggContainers.contains(ci)) { + continue; + } + final File cf = new File(cacheDir, StringUtils.format("%s.container.%05d", IndexIO.V10_FILE_NAME, ci)); + Assertions.assertTrue(cf.exists()); + Assertions.assertTrue(cf.delete()); + } + + // If base shared all its containers with aggregate, this test scenario isn't reachable skip in that case. + final Set orphanContainers = new HashSet<>(aggContainers); + orphanContainers.removeAll(baseContainers); + if (orphanContainers.isEmpty()) { + // base and aggregate share all containers; aggregate isn't truly orphaned. skip. + return; + } + + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentCacheBootstrap.RestoreResult result = PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ); + + // Neither base (containers missing) nor aggregate (orphaned, parent missing) should be restored. + final Set restoredNames = result.getBundles().stream() + .map(PartialSegmentBundleCacheEntry::getBundleName) + .collect(Collectors.toSet()); + Assertions.assertFalse(restoredNames.contains(AGG_BUNDLE), "orphan must not be restored"); + + // Aggregate's container files (those exclusively owned by it) must have been deleted by the orphan cleanup. + for (Integer ci : orphanContainers) { + final File cf = new File(cacheDir, StringUtils.format("%s.container.%05d", IndexIO.V10_FILE_NAME, ci)); + Assertions.assertFalse( + cf.exists(), + "orphan's exclusive container " + ci + " must have been deleted, but " + cf + " still exists" + ); + } + } + + @Test + void testRestoreRollsBackOnBundleReservationFailure() throws IOException + { + primeOnDiskState(); + + final File headerFile = new File( + cacheDir, + IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + ); + // size location exactly to the header size: metadata reservation fits, but the first bundle's weak reservation + // has 0 bytes of remaining budget and no weak entries to reclaim, so addWeakReservationHold returns null + final StorageLocation location = new StorageLocation(cacheDir, headerFile.length(), null); + + Assertions.assertThrows( + DruidException.class, + () -> PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ) + ); + + // rollback must release the metadata reservation and leave no static/weak entries behind + Assertions.assertEquals(0, location.currentSizeBytes(), "metadata reservation must be released on bootstrap failure"); + Assertions.assertFalse( + location.isReserved(new SegmentCacheEntryIdentifier(SEGMENT_ID)), + "metadata entry must be removed from the static map on bootstrap failure" + ); + Assertions.assertEquals(0, location.getWeakEntryCount(), "no bundle entries should linger on bootstrap failure"); + // rollback flows through location.release -> metadata.unmount, which unconditionally clears the entry's + // storage-location footprint. A subsequent acquire will cold-fetch from deep storage. + Assertions.assertFalse(headerFile.exists(), "bootstrap failure deletes the header via the unmount cleanup path"); + } + + @Test + void testRestoreFailsWhenHeaderMissing() + { + // no priming: cacheDir is empty + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + Assertions.assertThrows( + DruidException.class, + () -> PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ) + ); + } + + @Test + void testIsPartialSegmentLayoutDetectsHeader() throws IOException + { + Assertions.assertFalse(PartialSegmentCacheBootstrap.isPartialSegmentLayout(cacheDir, IndexIO.V10_FILE_NAME)); + primeOnDiskState(); + Assertions.assertTrue(PartialSegmentCacheBootstrap.isPartialSegmentLayout(cacheDir, IndexIO.V10_FILE_NAME)); + Assertions.assertFalse(PartialSegmentCacheBootstrap.isPartialSegmentLayout(null, IndexIO.V10_FILE_NAME)); + Assertions.assertFalse(PartialSegmentCacheBootstrap.isPartialSegmentLayout( + new File(perTestTempDir, "nonexistent"), + IndexIO.V10_FILE_NAME + )); + } + + @Test + void testBitmapRepairClearsBitsForMissingContainers() throws IOException + { + primeOnDiskState(); + // download a file in the aggregate bundle to set a bit, then close (persists the bitmap) + final PartialSegmentFileMapperV10 mapper = PartialSegmentFileMapperV10.create( + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of() + ); + final String prefix = AGG_BUNDLE + "/"; + String fileInAgg = null; + int aggContainerIdx = -1; + for (var entry : mapper.getSegmentFileMetadata().getFiles().entrySet()) { + if (entry.getKey().startsWith(prefix)) { + fileInAgg = entry.getKey(); + aggContainerIdx = entry.getValue().getContainer(); + // ensure this container is exclusively the aggregate's, otherwise we won't be able to test + // the repair behavior; check that __base doesn't also use this container + final int idx = aggContainerIdx; + boolean baseSharesContainer = mapper.getSegmentFileMetadata().getFiles().entrySet().stream() + .anyMatch(e -> e.getKey().startsWith(Projections.BASE_TABLE_PROJECTION_NAME + "/") + && e.getValue().getContainer() == idx); + if (!baseSharesContainer) { + break; + } + fileInAgg = null; + } + } + if (fileInAgg == null) { + // small segment: base + aggregate share container 0. Repair behavior is then a no-op, just skip. + mapper.close(); + return; + } + Assertions.assertNotNull(mapper.mapFile(fileInAgg), "expected file " + fileInAgg + " to be downloadable"); + Assertions.assertTrue(mapper.getDownloadedFiles().contains(fileInAgg)); + mapper.close(); + + // now delete the aggregate container file out from under the bitmap + final File aggContainer = new File( + cacheDir, + StringUtils.format("%s.container.%05d", IndexIO.V10_FILE_NAME, aggContainerIdx) + ); + Assertions.assertTrue(aggContainer.delete()); + + // re-open the mapper: the bitmap-vs-container repair should clear the bit for the missing file + try (PartialSegmentFileMapperV10 restored = PartialSegmentFileMapperV10.create( + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of() + )) { + Assertions.assertFalse( + restored.getDownloadedFiles().contains(fileInAgg), + "bitmap repair should have cleared the bit for " + fileInAgg + ); + } + } + + /** + * Populate the per-segment cache dir with the on-disk artifacts a previous historical run would have left behind: + * the V10 header file plus sparse-allocated container files for the base and aggregate bundles. + */ + private void primeOnDiskState() throws IOException + { + final StorageLocation seedLocation = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry seedMeta = new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + ESTIMATE + ); + Assertions.assertTrue(seedLocation.reserve(seedMeta)); + seedMeta.mount(seedLocation); + + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + seedMeta, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + final var baseHold = seedLocation.addWeakReservationHold(base.getId(), () -> base); + Assertions.assertNotNull(baseHold); + base.mount(seedLocation); + + final PartialSegmentBundleCacheEntry agg = PartialSegmentBundleCacheEntry.forBundle( + seedMeta, + AGG_BUNDLE, + List.of(base.getId()) + ); + final var aggHold = seedLocation.addWeakReservationHold(agg.getId(), () -> agg); + Assertions.assertNotNull(aggHold); + agg.mount(seedLocation); + + // Leave on-disk state behind: unmount the bundles (which deletes container files!), that's the wrong final + // state. Instead, we want containers ON disk, so leave bundles mounted but close the file mapper. Since the + // restore path re-opens via PartialSegmentFileMapperV10.create which is idempotent w.r.t. on-disk files, + // un-mount on the SEED side AFTER files are sparse-allocated would also delete them. So we just leave the + // seed mounted: at test end @TempDir cleans up. + aggHold.close(); + baseHold.close(); + } + +} diff --git a/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntryTest.java b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntryTest.java new file mode 100644 index 000000000000..4beaf9ba4ffa --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntryTest.java @@ -0,0 +1,423 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.io.Files; +import com.google.common.primitives.Ints; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.FileUtils; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.concurrent.Execs; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.file.CountingRangeReader; +import org.apache.druid.segment.file.DirectoryBackedRangeReader; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilderV10; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.timeline.SegmentId; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +class PartialSegmentMetadataCacheEntryTest +{ + private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper(); + private static final SegmentId SEGMENT_ID = SegmentId.of("test", Intervals.of("2025/2026"), "v1", 0); + private static final long ESTIMATE = 16 * 1024 * 1024L; + + @TempDir + File tempDir; + + private File segmentFile; + private File cacheDir; + + @BeforeEach + void setup() throws IOException + { + segmentFile = buildTestSegment(20); + cacheDir = new File(tempDir, "cache"); + FileUtils.mkdirp(cacheDir); + } + + @Test + void testMountFetchesHeaderAndShrinksReservation() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + Assertions.assertEquals(ESTIMATE, entry.getSize()); + Assertions.assertEquals(ESTIMATE, location.currentSizeBytes()); + + entry.mount(location); + + Assertions.assertTrue(entry.isMounted()); + Assertions.assertNotNull(entry.getFileMapper()); + Assertions.assertNotNull(entry.getSegmentFileMetadata()); + final long actualSize = entry.getSize(); + Assertions.assertTrue(actualSize > 0 && actualSize < ESTIMATE, "expected shrink, got " + actualSize); + Assertions.assertEquals(actualSize, location.currentSizeBytes()); + + final File headerFile = new File(cacheDir, IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX); + Assertions.assertTrue(headerFile.exists()); + Assertions.assertEquals(headerFile.length(), actualSize); + } + + @Test + void testMountFailsWhenActualExceedsEstimate() + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + // estimate of 8 bytes is way too small for any real V10 header + final PartialSegmentMetadataCacheEntry entry = newEntry(8); + Assertions.assertTrue(location.reserve(entry)); + + final DruidException thrown = Assertions.assertThrows( + DruidException.class, + () -> entry.mount(location) + ); + Assertions.assertTrue( + thrown.getMessage().contains("virtualStorageMetadataReservationEstimate"), + "expected operator-facing config hint, got: " + thrown.getMessage() + ); + Assertions.assertFalse(entry.isMounted()); + Assertions.assertNull(entry.getFileMapper()); + // reservation accounting is unchanged + Assertions.assertEquals(8, entry.getSize()); + Assertions.assertEquals(8, location.currentSizeBytes()); + // mount failure must delete the on-disk header so a retry starts clean (matches eager SegmentCacheEntry behavior) + final File headerFile = new File(cacheDir, IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX); + Assertions.assertFalse(headerFile.exists(), "mount failure must delete the on-disk header file"); + } + + @Test + void testMountIsIdempotentInSameLocation() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + + entry.mount(location); + final PartialSegmentFileMapperV10 firstMapper = entry.getFileMapper(); + Assertions.assertNotNull(firstMapper); + + entry.mount(location); + Assertions.assertSame(firstMapper, entry.getFileMapper()); + } + + @Test + void testMountInDifferentLocationThrows() throws IOException + { + final StorageLocation location1 = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final File otherDir = new File(tempDir, "other"); + FileUtils.mkdirp(otherDir); + final StorageLocation location2 = new StorageLocation(otherDir, ESTIMATE * 4, null); + + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location1.reserve(entry)); + entry.mount(location1); + + Assertions.assertThrows(DruidException.class, () -> entry.mount(location2)); + } + + @Test + void testUnmountClearsState() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + Assertions.assertTrue(entry.isMounted()); + + entry.unmount(); + + Assertions.assertFalse(entry.isMounted()); + Assertions.assertNull(entry.getFileMapper()); + Assertions.assertNull(entry.getSegmentFileMetadata()); + } + + @Test + void testUnmountIsIdempotent() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + entry.unmount(); + entry.unmount(); // second call is a no-op + } + + @Test + void testUnmountDeletesHeaderFile() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + + final File headerFile = new File(cacheDir, IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX); + Assertions.assertTrue(headerFile.exists()); + + entry.unmount(); + Assertions.assertFalse(headerFile.exists(), "unmount must delete the entry's storage-location header file"); + } + + @Test + void testOnUnmountHookRunsAfterStorageLocationCleanup() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + + final File headerFile = new File(cacheDir, IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX); + final AtomicReference headerExistsWhenHookFired = new AtomicReference<>(); + final AtomicReference hookFired = new AtomicReference<>(false); + entry.setOnUnmount(() -> { + hookFired.set(true); + headerExistsWhenHookFired.set(headerFile.exists()); + }); + + entry.unmount(); + Assertions.assertTrue(hookFired.get(), "onUnmount hook must run"); + Assertions.assertEquals( + Boolean.FALSE, + headerExistsWhenHookFired.get(), + "hook must observe header already deleted (storage-location cleanup runs first)" + ); + } + + @Test + void testConstructorRejectsNonPositiveEstimate() + { + Assertions.assertThrows( + DruidException.class, + () -> new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(segmentFile.getParentFile()), + JSON_MAPPER, + 0 + ) + ); + } + + @Test + void testGettersReturnNullBeforeMount() + { + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertFalse(entry.isMounted()); + Assertions.assertNull(entry.getFileMapper()); + Assertions.assertNull(entry.getSegmentFileMetadata()); + } + + @Test + void testUnmountDefersHeaderDeleteWhileReferenceHeld() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + + final File headerFile = new File( + cacheDir, + IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + ); + Assertions.assertTrue(headerFile.exists()); + + final Closeable ref = entry.acquireReference(); + Assertions.assertTrue(entry.isMounted()); + + entry.unmount(); + // Header file MUST persist while the reference is held, even though unmount has been called. + Assertions.assertTrue(headerFile.exists(), "header file should persist while reference is held"); + Assertions.assertTrue(entry.isMounted(), "fileMapper should not be closed while reference is held"); + + ref.close(); + // Last reference released, deferred cleanup fires on this thread. + Assertions.assertFalse(headerFile.exists(), "header file should be deleted after last reference releases"); + Assertions.assertFalse(entry.isMounted()); + } + + @Test + void testConcurrentMountIsDeduplicated() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final CountingRangeReader rangeReader = new CountingRangeReader(segmentFile.getParentFile()); + final PartialSegmentMetadataCacheEntry entry = new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + rangeReader, + JSON_MAPPER, + ESTIMATE + ); + Assertions.assertTrue(location.reserve(entry)); + + final int threads = 8; + final CountDownLatch start = new CountDownLatch(1); + final CountDownLatch done = new CountDownLatch(threads); + final AtomicInteger errors = new AtomicInteger(); + final ExecutorService exec = Execs.multiThreaded(threads, "partial-segment-tests-%d"); + try { + for (int i = 0; i < threads; i++) { + exec.submit(() -> { + try { + start.await(); + entry.mount(location); + } + catch (Throwable t) { + errors.incrementAndGet(); + } + finally { + done.countDown(); + } + }); + } + start.countDown(); + Assertions.assertTrue(done.await(30, TimeUnit.SECONDS)); + Assertions.assertEquals(0, errors.get()); + Assertions.assertTrue(entry.isMounted()); + // Dedup proof: even with 8 concurrent mount() callers, the slow PartialSegmentFileMapperV10.create() path + // (which range-reads the header) ran exactly once. Without CAS+SettableFuture dedup, every caller would + // serialize through entryLock and each would still skip the actual fetch (early-return on already-mounted), + // but the FIRST few callers racing past the pre-check would re-fetch, counting range reads is the cleanest + // way to assert the slow work was deduped end to end. + Assertions.assertEquals( + 1, + rangeReader.getHeaderReadCount(), + "expected exactly one range-read of the header across 8 concurrent mounters" + ); + } + finally { + exec.shutdownNow(); + } + } + + @Test + void testMountRollsBackIfEntryNoLongerReservedAtLocation() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + + // Externally evict the entry by releasing it (without going through entry.unmount() ourselves). This simulates + // the race where a hold/reservation gets dropped (concurrent cancellation, coordinator drop) and the location + // no longer knows about the entry by the time mount() finishes its work. + location.release(entry); + Assertions.assertFalse(entry.isMounted(), "release should have triggered cleanup"); + Assertions.assertFalse(location.isReserved(entry.getId())); + + // Call mount() again without re-reserving. doMount will succeed (on-disk header is still present and the + // file mapper opens), but the post-mount check should detect the missing reservation and roll back. + entry.mount(location); + Assertions.assertFalse( + entry.isMounted(), + "mount must roll back when post-mount check detects the entry is no longer reserved with the location" + ); + Assertions.assertEquals(0, location.currentSizeBytes(), "no reservation should linger after rollback"); + } + + @Test + void testAcquireReferenceBeforeMountThrows() + { + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertThrows(DruidException.class, entry::acquireReference); + } + + @Test + void testAcquireReferenceAfterCleanupCompletesThrows() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + entry.unmount(); // no references; cleanup runs synchronously + Assertions.assertFalse(entry.isMounted()); + Assertions.assertThrows(DruidException.class, entry::acquireReference); + } + + @Test + void testInferParentBundlesForBaseReturnsEmpty() + { + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertEquals( + List.of(), + entry.inferParentBundles(Projections.BASE_TABLE_PROJECTION_NAME) + ); + } + + @Test + void testInferParentBundlesForAggregateReturnsBase() + { + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + final List parents = entry.inferParentBundles("some_aggregate_projection"); + Assertions.assertEquals(1, parents.size()); + Assertions.assertEquals(SEGMENT_ID, parents.getFirst().segmentId()); + Assertions.assertEquals( + Projections.BASE_TABLE_PROJECTION_NAME, + parents.getFirst().bundleName() + ); + } + + private PartialSegmentMetadataCacheEntry newEntry(long estimate) + { + return new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(segmentFile.getParentFile()), + JSON_MAPPER, + estimate + ); + } + + private File buildTestSegment(int numFiles) throws IOException + { + final File baseDir = new File(tempDir, "deep_storage"); + FileUtils.mkdirp(baseDir); + try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir, CompressionStrategy.NONE)) { + for (int i = 0; i < numFiles; ++i) { + File tmpFile = new File(tempDir, StringUtils.format("smoosh-%d.bin", i)); + Files.write(Ints.toByteArray(i), tmpFile); + builder.add(StringUtils.format("%d", i), tmpFile); + } + } + return new File(baseDir, IndexIO.V10_FILE_NAME); + } + +} diff --git a/server/src/test/java/org/apache/druid/segment/loading/SegmentLoaderConfigTest.java b/server/src/test/java/org/apache/druid/segment/loading/SegmentLoaderConfigTest.java index bd9c8c65deff..cda83b1ebb7c 100644 --- a/server/src/test/java/org/apache/druid/segment/loading/SegmentLoaderConfigTest.java +++ b/server/src/test/java/org/apache/druid/segment/loading/SegmentLoaderConfigTest.java @@ -34,7 +34,7 @@ public void testSetVirtualStorage() Assert.assertFalse(config.isVirtualStorageEphemeral()); // Set both to true - config.setVirtualStorage(true, true); + config.setVirtualStorage(true).setVirtualStorageIsEphemeral(true); // Verify both fields are set Assert.assertTrue(config.isVirtualStorage()); diff --git a/server/src/test/java/org/apache/druid/segment/loading/SegmentLocalCacheManagerConcurrencyTest.java b/server/src/test/java/org/apache/druid/segment/loading/SegmentLocalCacheManagerConcurrencyTest.java index dc4fe4b384f9..ddc6094bf6c7 100644 --- a/server/src/test/java/org/apache/druid/segment/loading/SegmentLocalCacheManagerConcurrencyTest.java +++ b/server/src/test/java/org/apache/druid/segment/loading/SegmentLocalCacheManagerConcurrencyTest.java @@ -175,6 +175,7 @@ public File getInfoDir() manager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestIndex.INDEX_IO, jsonMapper @@ -182,6 +183,7 @@ public File getInfoDir() virtualStorageManager = new SegmentLocalCacheManager( storageLocations, vsfLoaderConfig, + StorageLoadingThreadPool.createFromConfig(vsfLoaderConfig), new RoundRobinStorageLocationSelectorStrategy(storageLocations), TestIndex.INDEX_IO, jsonMapper diff --git a/server/src/test/java/org/apache/druid/segment/loading/SegmentLocalCacheManagerTest.java b/server/src/test/java/org/apache/druid/segment/loading/SegmentLocalCacheManagerTest.java index 79c84af538ba..c6b7e61f12a0 100644 --- a/server/src/test/java/org/apache/druid/segment/loading/SegmentLocalCacheManagerTest.java +++ b/server/src/test/java/org/apache/druid/segment/loading/SegmentLocalCacheManagerTest.java @@ -132,6 +132,7 @@ public List getLocations() manager = new SegmentLocalCacheManager( ImmutableList.of(), loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(ImmutableList.of()), TestIndex.INDEX_IO, jsonMapper @@ -145,9 +146,11 @@ public void testCanHandleSegmentsWithLocations() final ImmutableList locations = ImmutableList.of( new StorageLocation(localSegmentCacheDir, 10000000000L, null) ); + final SegmentLoaderConfig loaderConfig = new SegmentLoaderConfig(); SegmentLocalCacheManager manager = new SegmentLocalCacheManager( locations, - new SegmentLoaderConfig(), + loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(locations), TestIndex.INDEX_IO, jsonMapper @@ -158,9 +161,11 @@ public void testCanHandleSegmentsWithLocations() @Test public void testCanHandleSegmentsWithEmptyLocationsAndConfigLocations() { + final SegmentLoaderConfig loaderConfig = new SegmentLoaderConfig(); SegmentLocalCacheManager manager = new SegmentLocalCacheManager( ImmutableList.of(), - new SegmentLoaderConfig(), + loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(ImmutableList.of()), TestIndex.INDEX_IO, jsonMapper @@ -171,9 +176,11 @@ public void testCanHandleSegmentsWithEmptyLocationsAndConfigLocations() @Test public void testGetCachedSegmentsWhenCanHandleSegmentsIsFalse() { + final SegmentLoaderConfig loaderConfig = new SegmentLoaderConfig(); SegmentLocalCacheManager manager = new SegmentLocalCacheManager( null, - new SegmentLoaderConfig(), + loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(null), TestIndex.INDEX_IO, jsonMapper @@ -361,6 +368,7 @@ public List getLocations() SegmentLocalCacheManager manager = new SegmentLocalCacheManager( locations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(locations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -412,6 +420,7 @@ public void testRetrySuccessAtFirstLocation() throws Exception SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -449,7 +458,8 @@ public void testRetrySuccessAtSecondLocation() throws Exception final List storageLocations = loaderConfig.toStorageLocations(); SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, - new SegmentLoaderConfig().setLocations(locations), + loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -489,7 +499,8 @@ public void testRetryAllFail() throws Exception final List storageLocations = loaderConfig.toStorageLocations(); SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, - new SegmentLoaderConfig().setLocations(locations), + loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -528,7 +539,8 @@ public void testEmptyToFullOrder() throws Exception final List storageLocations = loaderConfig.toStorageLocations(); SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, - new SegmentLoaderConfig().setLocations(locations), + loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -580,9 +592,11 @@ public void testSegmentDistributionUsingRoundRobinStrategy() throws Exception ); } + final SegmentLoaderConfig loaderConfig = new SegmentLoaderConfig().setLocations(locationConfigs); SegmentLocalCacheManager manager = new SegmentLocalCacheManager( locations, - new SegmentLoaderConfig().setLocations(locationConfigs), + loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new RoundRobinStorageLocationSelectorStrategy(locations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -661,6 +675,7 @@ public void testSegmentDistributionUsingLeastBytesUsedStrategy() throws Exceptio SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, new SegmentLoaderConfig().setLocations(locations), + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -743,6 +758,7 @@ public void testSegmentDistributionUsingRandomStrategy() throws Exception SegmentLocalCacheManager manager = new SegmentLocalCacheManager( locations, segmentLoaderConfig, + StorageLoadingThreadPool.createFromConfig(segmentLoaderConfig), new RandomStorageLocationSelectorStrategy(locations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -824,6 +840,7 @@ public void testGetBootstrapSegment() throws SegmentLoadingException SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -861,6 +878,7 @@ public List getLocations() SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -908,6 +926,7 @@ public File getInfoDir() SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -977,6 +996,7 @@ public int getVirtualStorageLoadThreads() () -> new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -1023,6 +1043,7 @@ public File getInfoDir() SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -1084,6 +1105,7 @@ public boolean isVirtualStorage() SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -1172,6 +1194,7 @@ public File getInfoDir() SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -1256,6 +1279,7 @@ public File getInfoDir() final SegmentLocalCacheManager manager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper @@ -1380,6 +1404,7 @@ private SegmentLocalCacheManager makeDefaultManager(ObjectMapper jsonMapper) return new SegmentLocalCacheManager( locations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(locations), TestHelper.getTestIndexIO(jsonMapper, ColumnConfig.DEFAULT), jsonMapper diff --git a/server/src/test/java/org/apache/druid/segment/loading/StorageLocationTest.java b/server/src/test/java/org/apache/druid/segment/loading/StorageLocationTest.java index 30a4b69b97d1..2a0d8d858478 100644 --- a/server/src/test/java/org/apache/druid/segment/loading/StorageLocationTest.java +++ b/server/src/test/java/org/apache/druid/segment/loading/StorageLocationTest.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.loading; import com.google.common.collect.ImmutableMap; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.io.Closer; @@ -33,6 +34,7 @@ import org.junit.jupiter.api.io.TempDir; import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -414,6 +416,142 @@ public void testReclaimRestoreDoesNotCreateZombieEntries() hold2.close(); } + @Test + public void testAdjustReservationStaticEntry() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 50); + Assertions.assertTrue(location.reserve(entry)); + Assertions.assertEquals(50, location.currentSizeBytes()); + Assertions.assertEquals(50, location.availableSizeBytes()); + + location.adjustReservation(entry.getId(), 10); + Assertions.assertEquals(10, entry.getSize()); + Assertions.assertEquals(10, location.currentSizeBytes()); + Assertions.assertEquals(90, location.availableSizeBytes()); + + // after shrink, location can host new entries that wouldn't have fit at the original size + final TestResizableCacheEntry entry2 = new TestResizableCacheEntry("b", 80); + Assertions.assertTrue(location.reserve(entry2)); + + // release accounting still uses the (post-shrink) size + location.release(entry); + Assertions.assertEquals(80, location.currentSizeBytes()); + } + + @Test + public void testAdjustReservationWeakEntry() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 80); + Assertions.assertTrue(location.reserveWeak(entry)); + Assertions.assertEquals(80, location.currentWeakSizeBytes()); + + location.adjustReservation(entry.getId(), 30); + Assertions.assertEquals(30, entry.getSize()); + Assertions.assertEquals(30, location.currentWeakSizeBytes()); + Assertions.assertEquals(30, location.currentSizeBytes()); + } + + @Test + public void testAdjustReservationGrowThrows() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 30); + Assertions.assertTrue(location.reserve(entry)); + + Assertions.assertThrows( + DruidException.class, + () -> location.adjustReservation(entry.getId(), 60) + ); + // entry size and location accounting unchanged + Assertions.assertEquals(30, entry.getSize()); + Assertions.assertEquals(30, location.currentSizeBytes()); + } + + @Test + public void testAdjustReservationUnknownEntryThrows() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + Assertions.assertThrows( + DruidException.class, + () -> location.adjustReservation(new StringCacheIdentifier("nope"), 10) + ); + } + + @Test + public void testAdjustReservationNonResizableEntryThrows() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final CacheEntry entry = new TestCacheEntry("a", 30); + Assertions.assertTrue(location.reserve(entry)); + + Assertions.assertThrows( + DruidException.class, + () -> location.adjustReservation(entry.getId(), 10) + ); + } + + @Test + public void testAdjustReservationToSameSizeIsNoOp() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 50); + Assertions.assertTrue(location.reserve(entry)); + + location.adjustReservation(entry.getId(), 50); + Assertions.assertEquals(50, entry.getSize()); + Assertions.assertEquals(50, location.currentSizeBytes()); + } + + @Test + public void testAdjustReservationWeakEntryShrinksHeldBytes() throws IOException + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 80); + Assertions.assertTrue(location.reserveWeak(entry)); + + // Acquire a hold BEFORE shrinking. trackWeakHold records 80 bytes against currHoldBytes. + final StorageLocation.ReservationHold hold = location.addWeakReservationHold(entry.getId(), () -> entry); + Assertions.assertNotNull(hold); + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(80, location.getWeakStats().getHoldBytes()); + + // Shrink to 30: hold-bytes contribution from the active hold must shrink in lockstep so the eventual + // trackWeakRelease (which subtracts the new smaller size) leaves currHoldBytes at 0. + location.adjustReservation(entry.getId(), 30); + Assertions.assertEquals(30, entry.getSize()); + Assertions.assertEquals(30, location.currentWeakSizeBytes()); + Assertions.assertEquals(30, location.getWeakStats().getHoldBytes()); + + hold.close(); + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(0, location.getWeakStats().getHoldBytes()); + } + + @Test + public void testAdjustReservationWeakEntryShrinksHeldBytesWithMultipleHolds() throws IOException + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 50); + Assertions.assertTrue(location.reserveWeak(entry)); + + // Two concurrent holds: trackWeakHold fires twice, so currHoldBytes = 2 * 50 = 100. + final StorageLocation.ReservationHold hold1 = location.addWeakReservationHold(entry.getId(), () -> entry); + final StorageLocation.ReservationHold hold2 = location.addWeakReservationHold(entry.getId(), () -> entry); + Assertions.assertEquals(2, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(100, location.getWeakStats().getHoldBytes()); + + // Shrink by 30 (50 → 20): each of the two active holds contributes -30, so currHoldBytes drops by 60. + location.adjustReservation(entry.getId(), 20); + Assertions.assertEquals(40, location.getWeakStats().getHoldBytes()); + + hold1.close(); + Assertions.assertEquals(20, location.getWeakStats().getHoldBytes()); + hold2.close(); + Assertions.assertEquals(0, location.getWeakStats().getHoldBytes()); + } + @SuppressWarnings({"GuardedBy", "FieldAccessNotGuarded"}) private void verifyLoc(long maxSize, StorageLocation loc) { @@ -543,6 +681,55 @@ public void unmount() } } + private static final class TestResizableCacheEntry implements ResizableCacheEntry + { + private final StringCacheIdentifier id; + private long size; + private boolean isMounted = false; + + private TestResizableCacheEntry(String id, long size) + { + this.id = new StringCacheIdentifier(id); + this.size = size; + } + + @Override + public StringCacheIdentifier getId() + { + return id; + } + + @Override + public long getSize() + { + return size; + } + + @Override + public boolean isMounted() + { + return isMounted; + } + + @Override + public void mount(StorageLocation location) + { + isMounted = true; + } + + @Override + public void unmount() + { + isMounted = false; + } + + @Override + public void resizeReservation(long newSize) + { + this.size = newSize; + } + } + public static final class StringCacheIdentifier implements CacheEntryIdentifier { private final String string; diff --git a/server/src/test/java/org/apache/druid/segment/loading/external/DownloadableCacheEntryTest.java b/server/src/test/java/org/apache/druid/segment/loading/external/DownloadableCacheEntryTest.java new file mode 100644 index 000000000000..7440cf91e4a2 --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/loading/external/DownloadableCacheEntryTest.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading.external; + +import com.google.common.hash.Hashing; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class DownloadableCacheEntryTest +{ + @Test + public void test_simpleName_splitInHalf() + { + final String path = "foo"; + final String expected = "f-" + hash(path) + "-oo"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_singleExtension_suffixIsExtension() + { + final String path = "foo.gz"; + final String expected = "foo-" + hash(path) + "-.gz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_chainedExtensionsShortBasename_suffixIsFullExtensionChain() + { + final String path = "a.log.gz"; + final String expected = "a-" + hash(path) + "-.log.gz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_chainedExtensionsLongBasename_suffixIncludesFullExtensionChain() + { + final String path = "container.log.gz"; + final String expected = "containe-" + hash(path) + "-r.log.gz"; + final String result = DownloadableCacheEntry.sanitizePath(path); + Assertions.assertEquals(expected, result); + Assertions.assertTrue(result.endsWith(".log.gz")); + } + + @Test + public void test_uriWithExtensions_slashesReplacedAndSuffixEndsWithExtensionChain() + { + final String path = "file:/var/folders/raw/container.log.gz"; + final String expected = "file__var_folders_r-" + hash(path) + "-aw_container.log.gz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_nonAlnumChar_breaksExtensionChain_onlyTrailingAlnumExtensionsCount() + { + final String path = "foo.my_thing.gz"; + final String expected = "foo.my_-" + hash(path) + "-thing.gz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_underscoreInBasename_noTrailingExtension_splitInHalf() + { + final String path = "foo.bar_baz"; + final String expected = "foo.b-" + hash(path) + "-ar_baz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_dotsInDirectoryComponents_ignored_basenameHasNoExtension() + { + final String path = "foo.bar/baz"; + final String expected = "foo.b-" + hash(path) + "-ar_baz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_dotsInDirectoryAndBasename_onlyBasenameExtensionsConsidered() + { + final String path = "foo.bar/baz.qux"; + final String expected = "foo.bar-" + hash(path) + "-_baz.qux"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_trailingDot_notTreatedAsExtension() + { + final String path = "foo.bar."; + final String expected = "foo.-" + hash(path) + "-bar."; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_dotfile_leadingDotNotTreatedAsExtension() + { + final String path = "foo/.gitignore"; + final String expected = "foo_.gi-" + hash(path) + "-tignore"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_dotfileWithExtension_leadingDotIgnoredOnlyTrailingExtensionUsed() + { + final String path = "foo/.hidden.gz"; + final String expected = "foo_.hi-" + hash(path) + "-dden.gz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_basenameIsBareDotfile_noExtension() + { + final String path = ".gitignore"; + final String expected = ".giti-" + hash(path) + "-gnore"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_backslashesNormalizedToSlashes() + { + final String path = "foo\\bar\\baz.gz"; + final String expected = "foo_bar-" + hash(path) + "-_baz.gz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_leadingSlash_stripped() + { + final String path = "/foo/bar.gz"; + final String expected = "foo_b-" + hash(path) + "-ar.gz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_trailingSlash_dropped() + { + final String path = "foo/bar/"; + final String expected = "foo-" + hash(path) + "-_bar"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_longPath_truncatedToFirst50AndLast50_suffixEndsWithExtension() + { + final StringBuilder pathBuilder = new StringBuilder("/aaaa"); + while (pathBuilder.length() < 200) { + pathBuilder.append("/bbbb"); + } + pathBuilder.append("/finalfile.gz"); + final String path = pathBuilder.toString(); + + final String result = DownloadableCacheEntry.sanitizePath(path); + + Assertions.assertTrue( + result.endsWith(".gz"), + "truncated output should still end with the original extension, got: " + result + ); + Assertions.assertTrue( + result.contains("-" + hash(path) + "-"), + "hashCode should be embedded between '-' delimiters between the prefix and suffix, got: " + result + ); + Assertions.assertEquals( + 100 + 2 + hash(path).length(), + result.length(), + "kept portion should be exactly 100 chars (50 each side) plus delimiters and hashcode" + ); + } + + @Test + public void test_disallowedChars_replacedWithUnderscores() + { + final String path = "foo bar*baz?.gz"; + final String expected = "foo_bar-" + hash(path) + "-_baz_.gz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_hyphenPreserved() + { + final String path = "foo-bar.gz"; + final String expected = "foo-b-" + hash(path) + "-ar.gz"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_emptyPath_resultIsJustHashWithDelimiters() + { + final String path = ""; + final String expected = "-" + hash(path) + "-"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + @Test + public void test_slashOnly_resultIsJustHashWithDelimiters() + { + final String path = "/"; + final String expected = "-" + hash(path) + "-"; + Assertions.assertEquals(expected, DownloadableCacheEntry.sanitizePath(path)); + } + + /** + * Computes the disambiguating hash the way {@link DownloadableCacheEntry#sanitizePath(String)} does, so the tests + * below can assert on the prefix/suffix splitting (the actual behavior under test) without hardcoding hash values. + */ + private static String hash(final String path) + { + return Hashing.sha512().hashUnencodedChars(path).toString().substring(0, 16); + } +} diff --git a/server/src/test/java/org/apache/druid/segment/loading/external/StorageLocationVirtualStorageManagerTest.java b/server/src/test/java/org/apache/druid/segment/loading/external/StorageLocationVirtualStorageManagerTest.java new file mode 100644 index 000000000000..1f1ded3631ed --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/loading/external/StorageLocationVirtualStorageManagerTest.java @@ -0,0 +1,1037 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading.external; + +import org.apache.druid.common.asyncresource.AsyncResource; +import org.apache.druid.common.asyncresource.AsyncResources; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; +import org.apache.druid.java.util.common.concurrent.Execs; +import org.apache.druid.segment.loading.LeastBytesUsedStorageLocationSelectorStrategy; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; +import org.apache.druid.segment.loading.StorageLocation; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +public class StorageLocationVirtualStorageManagerTest +{ + @TempDir + public File tempFolder; + + private StorageLocation location; + private StorageLocationVirtualStorageManager manager; + + @BeforeEach + public void setup() throws IOException + { + File locationPath = new File(tempFolder, "storage"); + Files.createDirectories(locationPath.toPath()); + location = new StorageLocation(locationPath, 10_000_000L, null); + manager = new StorageLocationVirtualStorageManager( + Collections.singletonList(location), + new LeastBytesUsedStorageLocationSelectorStrategy(Collections.singletonList(location)), + StorageLoadingThreadPool.none() + ); + } + + @AfterEach + public void teardown() + { + location.reset(); + } + + /** + * Helper method to run test logic with an ExecutorService that is guaranteed to be shut down. + */ + private void withExecutor(ExecutorConsumer testLogic) throws Exception + { + ExecutorService executorService = Execs.multiThreaded(10, "test-thread-%d"); + try { + testLogic.accept(executorService); + } + finally { + executorService.shutdownNow(); + } + } + + @FunctionalInterface + private interface ExecutorConsumer + { + void accept(ExecutorService executor) throws Exception; + } + + @Test + public void testGetReturnsNullWhenNotExists() + { + CachedFile result = manager.get("nonexistent"); + Assertions.assertNull(result); + } + + @Test + public void testReserveAndPopulateCreatesFile() throws Exception + { + String identifier = "test-file"; + String content = "Hello, World!"; + + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(0, location.getWeakEntryCount()); + + try (CachedFile cachedFile = manager.reserveAndPopulate( + identifier, + content::length, + file -> Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)) + )) { + Assertions.assertNotNull(cachedFile); + Assertions.assertEquals(identifier, cachedFile.getIdentifier()); + + // Validate there's an active hold while the file is open + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(1, location.getWeakEntryCount()); + + // Validate that both load begin and load complete were tracked + Assertions.assertEquals(1, location.getWeakStats().getLoadBeginCount()); + Assertions.assertEquals(content.length(), location.getWeakStats().getLoadBeginBytes()); + Assertions.assertEquals(1, location.getWeakStats().getLoadCount()); + Assertions.assertEquals(content.length(), location.getWeakStats().getLoadBytes()); + + File file = cachedFile.getFile(); + Assertions.assertNotNull(file); + Assertions.assertTrue(file.exists()); + + String readContent = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8); + Assertions.assertEquals(content, readContent); + } + + // After close, hold should be released + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + // Entry remains in cache, just not held + Assertions.assertEquals(1, location.getWeakEntryCount()); + } + + @Test + public void testGetReturnsFileAfterPopulate() throws Exception + { + String identifier = "test-file"; + String content = "Test content"; + + // First populate + try (CachedFile cachedFile = manager.reserveAndPopulate( + identifier, + content::length, + file -> Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)) + )) { + Assertions.assertNotNull(cachedFile); + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + } + + // After close, hold should be released but entry remains + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(1, location.getWeakEntryCount()); + + // Initial populate counts as a completed load + Assertions.assertEquals(1, location.getWeakStats().getLoadCount()); + Assertions.assertEquals(content.length(), location.getWeakStats().getLoadBytes()); + + // Then get + try (CachedFile cachedFile = manager.get(identifier)) { + Assertions.assertNotNull(cachedFile); + Assertions.assertEquals(identifier, cachedFile.getIdentifier()); + + // Hold should be re-acquired + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + + // A hit does not count as another load + Assertions.assertEquals(1, location.getWeakStats().getLoadCount()); + Assertions.assertEquals(content.length(), location.getWeakStats().getLoadBytes()); + Assertions.assertEquals(1, location.getWeakStats().getHitCount()); + + String readContent = new String( + Files.readAllBytes(cachedFile.getFile().toPath()), + StandardCharsets.UTF_8 + ); + Assertions.assertEquals(content, readContent); + } + + // After second close, hold should be released again + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(1, location.getWeakEntryCount()); + } + + @Test + public void testGetReturnsNullAfterEphemeralEviction() throws Exception + { + location.setAreWeakEntriesEphemeral(true); + + String identifier = "test-file"; + String content = "Hello, World!"; + + try (CachedFile cachedFile = manager.reserveAndPopulate( + identifier, + content::length, + file -> Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)) + )) { + try (CachedFile fromGet = manager.get(identifier)) { + Assertions.assertNotNull(fromGet); + } + } + // Last hold released → ephemeral eviction → entry unmounted + + Assertions.assertNull(manager.get(identifier)); + Assertions.assertEquals(0, location.getWeakEntryCount()); + } + + @Test + public void testPopulationLockClearedOnEphemeralEviction() throws Exception + { + location.setAreWeakEntriesEphemeral(true); + + String identifier = "test-file"; + AtomicInteger firstSupplierCalls = new AtomicInteger(); + AtomicInteger secondSupplierCalls = new AtomicInteger(); + AtomicInteger firstPopulatorCalls = new AtomicInteger(); + AtomicInteger secondPopulatorCalls = new AtomicInteger(); + + try (CachedFile firstFile = manager.reserveAndPopulate( + identifier, + () -> { + firstSupplierCalls.incrementAndGet(); + return 4L; + }, + file -> { + firstPopulatorCalls.incrementAndGet(); + Files.write(file.toPath(), "v1".getBytes(StandardCharsets.UTF_8)); + } + )) { + Assertions.assertNotNull(firstFile); + } + // Hold released → ephemeral eviction → unmount → populationLocks.remove(id, lock) + + try (CachedFile secondFile = manager.reserveAndPopulate( + identifier, + () -> { + secondSupplierCalls.incrementAndGet(); + return 4L; + }, + file -> { + secondPopulatorCalls.incrementAndGet(); + Files.write(file.toPath(), "v2".getBytes(StandardCharsets.UTF_8)); + } + )) { + Assertions.assertNotNull(secondFile); + String readContent = new String( + Files.readAllBytes(secondFile.getFile().toPath()), + StandardCharsets.UTF_8 + ); + Assertions.assertEquals("v2", readContent); + } + + Assertions.assertEquals(1, firstSupplierCalls.get(), "first reserve must be a miss"); + Assertions.assertEquals(1, firstPopulatorCalls.get(), "first populator must run"); + Assertions.assertEquals( + 1, + secondSupplierCalls.get(), + "second reserve must also be a miss — proving the lock was cleared on unmount" + ); + Assertions.assertEquals(1, secondPopulatorCalls.get(), "second populator must run"); + } + + @Test + public void testPopulationLockClearedOnCapacityEviction() throws Exception + { + long capacity = location.availableSizeBytes(); + long smallSize = capacity / 4; + long bigSize = capacity - 100; // forces reclaim of the small entry + + String smallId = "small"; + String bigId = "big"; + + AtomicInteger populatorCalls = new AtomicInteger(); + try (CachedFile smallFile = manager.reserveAndPopulate( + smallId, + () -> smallSize, + file -> { + populatorCalls.incrementAndGet(); + Files.write(file.toPath(), new byte[]{1, 2, 3}); + } + )) { + Assertions.assertNotNull(smallFile); + } + // Small entry is now weakly cached, eligible for eviction. + + try (CachedFile bigFile = manager.reserveAndPopulate( + bigId, + () -> bigSize, + file -> { + populatorCalls.incrementAndGet(); + Files.write(file.toPath(), new byte[]{4, 5, 6}); + } + )) { + Assertions.assertNotNull(bigFile); + } + // Reserving `big` should have forced eviction of `small`, invoking unmount → cleanup. + + AtomicInteger supplierCalls = new AtomicInteger(); + try (CachedFile reborn = manager.reserveAndPopulate( + smallId, + () -> { + supplierCalls.incrementAndGet(); + return smallSize; + }, + file -> { + populatorCalls.incrementAndGet(); + Files.write(file.toPath(), new byte[]{7, 8, 9}); + } + )) { + Assertions.assertNotNull(reborn); + } + + Assertions.assertEquals( + 1, + supplierCalls.get(), + "re-reserving an evicted id must take the miss path" + ); + Assertions.assertEquals(3, populatorCalls.get()); + } + + @Test + public void testPopulationLockClearedOnLocationReset() throws Exception + { + // location.reset() unmounts every weakly-held entry; the cleanup hook must fire for each. + String identifier = "test-file"; + String content = "Hello, World!"; + + try (CachedFile cachedFile = manager.reserveAndPopulate( + identifier, + content::length, + file -> Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)) + )) { + Assertions.assertNotNull(cachedFile); + } + Assertions.assertEquals(1, location.getWeakEntryCount()); + + location.reset(); + Assertions.assertEquals(0, location.getWeakEntryCount()); + + AtomicInteger supplierCalls = new AtomicInteger(); + AtomicInteger populatorCalls = new AtomicInteger(); + try (CachedFile reborn = manager.reserveAndPopulate( + identifier, + () -> { + supplierCalls.incrementAndGet(); + return (long) content.length(); + }, + file -> { + populatorCalls.incrementAndGet(); + Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)); + } + )) { + Assertions.assertNotNull(reborn); + } + + Assertions.assertEquals( + 1, + supplierCalls.get(), + "re-reserving after location.reset() must take the miss path" + ); + Assertions.assertEquals(1, populatorCalls.get()); + } + + @Test + public void testMultipleCallsToReserveAndPopulateWithSameIdentifier() throws Exception + { + String identifier = "test-file"; + String content1 = "First content"; + String content2 = "Second content"; + + // First call + try (CachedFile cachedFile1 = manager.reserveAndPopulate( + identifier, + content1::length, + file -> Files.write(file.toPath(), content1.getBytes(StandardCharsets.UTF_8)) + )) { + Assertions.assertNotNull(cachedFile1); + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + + // Second call with same identifier should return existing file + try (CachedFile cachedFile2 = manager.reserveAndPopulate( + identifier, + content2::length, + file -> { + // This populator should NOT be called + throw new RuntimeException("Populator should not be called for existing file"); + } + )) { + Assertions.assertNotNull(cachedFile2); + + Assertions.assertEquals(2, location.getWeakStats().getHoldCount()); + + // Only the first call counts as a load + Assertions.assertEquals(1, location.getWeakStats().getLoadCount()); + Assertions.assertEquals(content1.length(), location.getWeakStats().getLoadBytes()); + + // Should have first content, not second + String readContent = new String( + Files.readAllBytes(cachedFile2.getFile().toPath()), + StandardCharsets.UTF_8 + ); + Assertions.assertEquals(content1, readContent); + } + + // Inner hold released, outer hold still active + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + } + + // All holds released + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(1, location.getWeakEntryCount()); + } + + @Test + public void testSizeSupplierIsNotCalledOnCacheHit() + { + String identifier = "test-file"; + String content = "Hello, World!"; + AtomicInteger missSupplierCalls = new AtomicInteger(); + AtomicInteger hitSupplierCalls = new AtomicInteger(); + + try (CachedFile first = manager.reserveAndPopulate( + identifier, + () -> { + missSupplierCalls.incrementAndGet(); + return content.length(); + }, + file -> Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)) + )) { + Assertions.assertNotNull(first); + Assertions.assertEquals(1, missSupplierCalls.get(), "supplier should be invoked on cache miss"); + + // Hit path: supplier must not be invoked. + try (CachedFile second = manager.reserveAndPopulate( + identifier, + () -> { + hitSupplierCalls.incrementAndGet(); + return content.length(); + }, + ignored -> { + throw new RuntimeException("Populator should not be called for existing file"); + } + )) { + Assertions.assertNotNull(second); + } + } + + Assertions.assertEquals( + 0, + hitSupplierCalls.get(), + "size supplier must not be invoked when the cached entry already exists" + ); + } + + @Test + public void testSizeSupplierIsCalledExactlyOnceOnCacheMiss() + { + // Companion to the cache-hit case: a successful miss path should evaluate the + // supplier exactly once, even though the implementation iterates storage locations + // looking for one with capacity. + String identifier = "test-file"; + String content = "Hello, World!"; + AtomicInteger supplierCalls = new AtomicInteger(); + + try (CachedFile cachedFile = manager.reserveAndPopulate( + identifier, + () -> { + supplierCalls.incrementAndGet(); + return content.length(); + }, + file -> Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)) + )) { + Assertions.assertNotNull(cachedFile); + } + + Assertions.assertEquals(1, supplierCalls.get(), "size supplier must be invoked exactly once on cache miss"); + } + + @Test + public void testPopulatorThrowsException() + { + String identifier = "test-file"; + + new DruidExceptionMatcher( + DruidException.Persona.OPERATOR, + DruidException.Category.RUNTIME_FAILURE, + "general" + ) + .expectMessageContains("populate") + .assertThrowsAndMatches(() -> manager.reserveAndPopulate( + identifier, + () -> 100, + file -> { + throw new IOException("Populator failed"); + } + )); + + // Note: The entry may still be in the cache but in an unmounted state. + // StorageLocation will eventually evict it. The important thing is that + // the exception was properly thrown and the file was not successfully created. + + // A failed populate counts as a load begin, but not as a completed load. + Assertions.assertEquals(1, location.getWeakStats().getLoadBeginCount()); + Assertions.assertEquals(0, location.getWeakStats().getLoadCount()); + Assertions.assertEquals(0, location.getWeakStats().getLoadBytes()); + } + + @Test + public void testInsufficientSpace() + { + String identifier = "huge-file"; + long hugeSize = location.availableSizeBytes() + 1; + + new DruidExceptionMatcher( + DruidException.Persona.OPERATOR, + DruidException.Category.CAPACITY_EXCEEDED, + "general" + ) + .expectMessageContains("No space available") + .assertThrowsAndMatches(() -> manager.reserveAndPopulate( + identifier, + () -> hugeSize, + file -> Files.write(file.toPath(), new byte[1000]) + )); + } + + @Test + public void testReserveAndPopulateAsyncRecoversFromInsufficientStorage() + { + // Verify something similar to what ExternalInputSliceReader does: fall back to recovery logic if the + // storage location cannot hold a file. + final long tooLargeFileSize = location.availableSizeBytes() + 1; + + final AsyncResource recovered = AsyncResources.recover( + AsyncResources.transform( + manager.reserveAndPopulateAsync( + "huge-file", + () -> tooLargeFileSize, + file -> Files.write(file.toPath(), new byte[]{1, 2, 3}) + ), + cachedFile -> "fetched" + ), + e -> VirtualStorageManager.isInsufficientStorage(e) ? "recovery" : null + ); + + try (recovered) { + Assertions.assertTrue(recovered.isReady()); + Assertions.assertEquals("recovery", recovered.get()); + } + } + + @Test + public void testReserveAndPopulateAsyncDoesNotRecoverFromOtherErrors() + { + // A populate failure that is not an insufficient-storage error must be propagated, not recovered: the recovery + // function returns null, so the original error surfaces from get(). + final AsyncResource recovered = AsyncResources.recover( + AsyncResources.transform( + manager.reserveAndPopulateAsync( + "boom-file", + () -> 100L, + file -> { + throw new IOException("populate failed"); + } + ), + cachedFile -> "fetched" + ), + e -> VirtualStorageManager.isInsufficientStorage(e) ? "streamed" : null + ); + + try (recovered) { + Assertions.assertTrue(recovered.isReady()); + final DruidException e = Assertions.assertThrows(DruidException.class, recovered::get); + Assertions.assertEquals(DruidException.Category.RUNTIME_FAILURE, e.getCategory()); + } + } + + @Test + public void testFilePathSanitization() + { + String identifier = "path/with/slashes/and-special_chars.txt"; + String content = "Test content"; + + try (CachedFile cachedFile = manager.reserveAndPopulate( + identifier, + content::length, + file -> Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)) + )) { + File file = cachedFile.getFile(); + Assertions.assertNotNull(file); + Assertions.assertTrue(file.exists()); + + // Verify file is in a subdirectory structure + Assertions.assertTrue(file.getAbsolutePath().contains("path")); + Assertions.assertTrue(file.getAbsolutePath().contains("slashes")); + } + } + + // ========== LIFECYCLE TESTS ========== + + @Test + public void testLifecycleWithMultipleFiles() + { + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(0, location.getWeakEntryCount()); + + // Create multiple files + CachedFile file1 = manager.reserveAndPopulate( + "file1", + () -> 5, + file -> Files.write(file.toPath(), "File1".getBytes(StandardCharsets.UTF_8)) + ); + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(1, location.getWeakEntryCount()); + + CachedFile file2 = manager.reserveAndPopulate( + "file2", + () -> 5, + file -> Files.write(file.toPath(), "File2".getBytes(StandardCharsets.UTF_8)) + ); + Assertions.assertEquals(2, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(2, location.getWeakEntryCount()); + + CachedFile file3 = manager.reserveAndPopulate( + "file3", + () -> 5, + file -> Files.write(file.toPath(), "File3".getBytes(StandardCharsets.UTF_8)) + ); + Assertions.assertEquals(3, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(3, location.getWeakEntryCount()); + + // Close one file + file1.close(); + Assertions.assertEquals(2, location.getWeakStats().getHoldCount()); + // Entry remains + Assertions.assertEquals(3, location.getWeakEntryCount()); + + // Close another + file2.close(); + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(3, location.getWeakEntryCount()); + + // Close last + file3.close(); + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(3, location.getWeakEntryCount()); + } + + @Test + public void testLifecycleWithMultipleHoldsOnSameFile() + { + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(0, location.getWeakEntryCount()); + + String identifier = "shared-file"; + String content = "Shared content"; + + // Create first hold + CachedFile hold1 = manager.reserveAndPopulate( + identifier, + content::length, + file -> Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)) + ); + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(1, location.getWeakEntryCount()); + + // Get second hold on same file + CachedFile hold2 = manager.get(identifier); + Assertions.assertNotNull(hold2); + Assertions.assertEquals(2, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(1, location.getWeakEntryCount()); + + // Get third hold + CachedFile hold3 = manager.reserveAndPopulate( + identifier, + content::length, + file -> { + throw new RuntimeException("Should not populate again"); + } + ); + Assertions.assertEquals(3, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(1, location.getWeakEntryCount()); + + // Release holds one by one - entry stays held until all are released + hold1.close(); + // Entry is still held by other handles + Assertions.assertEquals(2, location.getWeakStats().getHoldCount()); + + hold2.close(); + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + + // After last close, no holds remain + hold3.close(); + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(1, location.getWeakEntryCount()); + } + + // ========== THREAD-SAFETY TESTS ========== + + @Test + public void testConcurrentReserveAndPopulateSameIdentifier() throws Exception + { + withExecutor(executorService -> { + String identifier = "concurrent-file"; + String content = "Test content"; + int numThreads = 10; + + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + + AtomicInteger populatorCallCount = new AtomicInteger(0); + CountDownLatch startLatch = new CountDownLatch(1); + CountDownLatch completionLatch = new CountDownLatch(numThreads); + + List> futures = new ArrayList<>(); + + // Launch multiple threads that all try to populate the same identifier + for (int i = 0; i < numThreads; i++) { + Future future = executorService.submit(() -> { + try { + // Wait for all threads to be ready + startLatch.await(); + + CachedFile result = manager.reserveAndPopulate( + identifier, + content::length, + file -> { + populatorCallCount.incrementAndGet(); + // Add a small sleep to increase chance of race conditions if locking is broken + try { + Thread.sleep(10); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)); + } + ); + + return result; + } + finally { + completionLatch.countDown(); + } + }); + futures.add(future); + } + + // Start all threads at once + startLatch.countDown(); + + // Wait for all to complete + Assertions.assertTrue(completionLatch.await(10, TimeUnit.SECONDS)); + + // Verify populator was called exactly once + Assertions.assertEquals( + 1, + populatorCallCount.get(), + "Populator should only be called once despite concurrent access" + ); + + Assertions.assertEquals(numThreads, location.getWeakStats().getHoldCount()); + + // Verify all threads got a valid CachedFile and close them + for (Future future : futures) { + CachedFile cachedFile = future.get(); + Assertions.assertNotNull(cachedFile); + cachedFile.close(); + } + + // After all closes, no active holds + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(1, location.getWeakEntryCount()); + }); + } + + @Test + public void testConcurrentGetWhilePopulating() throws Exception + { + withExecutor(executorService -> { + String identifier = "concurrent-get-file"; + String content = "Test content"; + int numGetters = 5; + + CountDownLatch populateStarted = new CountDownLatch(1); + CountDownLatch populateCanComplete = new CountDownLatch(1); + CountDownLatch allComplete = new CountDownLatch(numGetters + 1); + + AtomicInteger nullGetCount = new AtomicInteger(0); + AtomicInteger nonNullGetCount = new AtomicInteger(0); + + // Start populator thread + Future populateFuture = executorService.submit(() -> { + try { + CachedFile result = manager.reserveAndPopulate( + identifier, + content::length, + file -> { + populateStarted.countDown(); + // Wait before completing population + try { + populateCanComplete.await(); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)); + } + ); + return result; + } + finally { + allComplete.countDown(); + } + }); + + // Wait for population to start + Assertions.assertTrue(populateStarted.await(5, TimeUnit.SECONDS)); + + // Launch getter threads while population is in progress + List> getterFutures = new ArrayList<>(); + for (int i = 0; i < numGetters; i++) { + Future future = executorService.submit(() -> { + try { + CachedFile result = manager.get(identifier); + if (result == null) { + nullGetCount.incrementAndGet(); + } else { + nonNullGetCount.incrementAndGet(); + } + return result; + } + finally { + allComplete.countDown(); + } + }); + getterFutures.add(future); + } + + // Let population complete + populateCanComplete.countDown(); + + // Wait for all threads + Assertions.assertTrue(allComplete.await(10, TimeUnit.SECONDS)); + + // Clean up + CachedFile populated = populateFuture.get(); + Assertions.assertNotNull(populated); + populated.close(); + + for (Future future : getterFutures) { + CachedFile result = future.get(); + if (result != null) { + result.close(); + } + } + + // Some gets might return null (before population), some might succeed (after) + // Both are valid outcomes, we just verify no exceptions occurred + Assertions.assertEquals(nullGetCount.get() + nonNullGetCount.get(), numGetters); + }); + } + + @Test + public void testConcurrentPopulationOfDifferentIdentifiers() throws Exception + { + withExecutor(executorService -> { + int numIdentifiers = 10; + String contentPrefix = "Content for file "; + + CyclicBarrier startBarrier = new CyclicBarrier(numIdentifiers); + CountDownLatch completionLatch = new CountDownLatch(numIdentifiers); + AtomicInteger successCount = new AtomicInteger(0); + + List> futures = new ArrayList<>(); + + for (int i = 0; i < numIdentifiers; i++) { + final int fileNum = i; + Future future = executorService.submit(() -> { + try { + // Wait for all threads to be ready + startBarrier.await(); + + String identifier = "file-" + fileNum; + String content = contentPrefix + fileNum; + + CachedFile result = manager.reserveAndPopulate( + identifier, + content::length, + file -> { + // Simulate some work + try { + Thread.sleep(5); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)); + } + ); + + successCount.incrementAndGet(); + return result; + } + finally { + completionLatch.countDown(); + } + }); + futures.add(future); + } + + // Wait for all to complete + Assertions.assertTrue(completionLatch.await(10, TimeUnit.SECONDS)); + + // Verify all succeeded + Assertions.assertEquals(numIdentifiers, successCount.get()); + + // Verify all files are correct + for (int i = 0; i < numIdentifiers; i++) { + try (CachedFile cachedFile = futures.get(i).get()) { + Assertions.assertNotNull(cachedFile); + + String expectedContent = contentPrefix + i; + String actualContent = new String( + Files.readAllBytes(cachedFile.getFile().toPath()), + StandardCharsets.UTF_8 + ); + Assertions.assertEquals(expectedContent, actualContent); + } + } + }); + } + + @Test + public void testResolvedLocationIsReused() + { + String identifier = "resolved-location-file"; + String content = "Test content"; + + // First reservation + try (CachedFile cachedFile1 = manager.reserveAndPopulate( + identifier, + content::length, + file -> Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)) + )) { + Assertions.assertNotNull(cachedFile1); + + // Second reservation should use the cached resolved location + // and find the existing file without trying other locations + try (CachedFile cachedFile2 = manager.reserveAndPopulate( + identifier, + content::length, + file -> { + throw new RuntimeException("Should not populate again"); + } + )) { + Assertions.assertNotNull(cachedFile2); + + // Both should point to same underlying file + Assertions.assertEquals( + cachedFile1.getFile().getAbsolutePath(), + cachedFile2.getFile().getAbsolutePath() + ); + } + } + } + + @Test + public void testConcurrentResolvedLocationLookup() throws Exception + { + withExecutor(executorService -> { + String identifier = "concurrent-resolved-file"; + String content = "Test content"; + int numThreads = 10; + + // First, populate the file + try (CachedFile initialFile = manager.reserveAndPopulate( + identifier, + content::length, + file -> Files.write(file.toPath(), content.getBytes(StandardCharsets.UTF_8)) + )) { + Assertions.assertNotNull(initialFile); + } + + // Now have multiple threads try to reserve the same identifier + CyclicBarrier startBarrier = new CyclicBarrier(numThreads); + CountDownLatch completionLatch = new CountDownLatch(numThreads); + AtomicInteger populatorCallCount = new AtomicInteger(0); + + List> futures = new ArrayList<>(); + + for (int i = 0; i < numThreads; i++) { + Future future = executorService.submit(() -> { + try { + startBarrier.await(); + + return manager.reserveAndPopulate( + identifier, + content::length, + file -> { + populatorCallCount.incrementAndGet(); + throw new RuntimeException("Should not be called"); + } + ); + } + finally { + completionLatch.countDown(); + } + }); + futures.add(future); + } + + Assertions.assertTrue(completionLatch.await(10, TimeUnit.SECONDS)); + + // Verify populator was never called (file already exists) + Assertions.assertEquals(0, populatorCallCount.get()); + + // Verify all got valid results + for (Future future : futures) { + CachedFile cachedFile = future.get(); + Assertions.assertNotNull(cachedFile); + cachedFile.close(); + } + }); + } +} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/ChatHandlerProviderTest.java b/server/src/test/java/org/apache/druid/segment/realtime/ChatHandlerProviderTest.java new file mode 100644 index 000000000000..2163a5c1948d --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/realtime/ChatHandlerProviderTest.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.realtime; + +import org.apache.druid.java.util.common.ISE; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class ChatHandlerProviderTest +{ + private static class TestChatHandler implements ChatHandler + { + } + + private static final String TEST_SERVICE_NAME = "test-service-name"; + + private ChatHandlerProvider chatHandlerProvider; + + @Before + public void setUp() + { + chatHandlerProvider = new ChatHandlerProvider(); + } + + @Test + public void testRegisterAndGet() + { + ChatHandler testChatHandler = new TestChatHandler(); + + Assert.assertFalse("bad initial state", chatHandlerProvider.get(TEST_SERVICE_NAME).isPresent()); + + chatHandlerProvider.register(TEST_SERVICE_NAME, testChatHandler); + Assert.assertTrue("chatHandler did not register", chatHandlerProvider.get(TEST_SERVICE_NAME).isPresent()); + Assert.assertEquals(testChatHandler, chatHandlerProvider.get(TEST_SERVICE_NAME).get()); + + chatHandlerProvider.unregister(TEST_SERVICE_NAME); + Assert.assertFalse("chatHandler did not deregister", chatHandlerProvider.get(TEST_SERVICE_NAME).isPresent()); + } + + @Test + public void testDuplicateRegistrationThrows() + { + chatHandlerProvider.register(TEST_SERVICE_NAME, new TestChatHandler()); + Assert.assertThrows(ISE.class, () -> chatHandlerProvider.register(TEST_SERVICE_NAME, new TestChatHandler())); + } +} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/ServiceAnnouncingChatHandlerProviderTest.java b/server/src/test/java/org/apache/druid/segment/realtime/ServiceAnnouncingChatHandlerProviderTest.java deleted file mode 100644 index 05fb11e4b620..000000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/ServiceAnnouncingChatHandlerProviderTest.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime; - -import org.apache.druid.curator.discovery.ServiceAnnouncer; -import org.apache.druid.server.DruidNode; -import org.easymock.Capture; -import org.easymock.EasyMock; -import org.easymock.EasyMockRunner; -import org.easymock.EasyMockSupport; -import org.easymock.Mock; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; - -@RunWith(EasyMockRunner.class) -public class ServiceAnnouncingChatHandlerProviderTest extends EasyMockSupport -{ - private static class TestChatHandler implements ChatHandler - { - } - - private static final String TEST_SERVICE_NAME = "test-service-name"; - private static final String TEST_HOST = "test-host"; - private static final int TEST_PORT = 1234; - - private ServiceAnnouncingChatHandlerProvider chatHandlerProvider; - - @Mock - private DruidNode node; - - @Mock - private ServiceAnnouncer serviceAnnouncer; - - @Before - public void setUp() - { - chatHandlerProvider = new ServiceAnnouncingChatHandlerProvider(node, serviceAnnouncer); - } - - @Test - public void testRegistrationDefault() - { - testRegistrationWithAnnounce(false); - } - - @Test - public void testRegistrationWithAnnounce() - { - testRegistrationWithAnnounce(true); - } - - @Test - public void testRegistrationWithoutAnnounce() - { - ChatHandler testChatHandler = new TestChatHandler(); - - Assert.assertFalse("bad initial state", chatHandlerProvider.get(TEST_SERVICE_NAME).isPresent()); - - chatHandlerProvider.register(TEST_SERVICE_NAME, testChatHandler, false); - Assert.assertTrue("chatHandler did not register", chatHandlerProvider.get(TEST_SERVICE_NAME).isPresent()); - Assert.assertEquals(testChatHandler, chatHandlerProvider.get(TEST_SERVICE_NAME).get()); - - chatHandlerProvider.unregister(TEST_SERVICE_NAME); - Assert.assertFalse("chatHandler did not deregister", chatHandlerProvider.get(TEST_SERVICE_NAME).isPresent()); - } - - private void testRegistrationWithAnnounce(boolean useThreeArgConstructor) - { - ChatHandler testChatHandler = new TestChatHandler(); - Capture captured = Capture.newInstance(); - - EasyMock.expect(node.getHost()).andReturn(TEST_HOST); - EasyMock.expect(node.isBindOnHost()).andReturn(false); - EasyMock.expect(node.getPlaintextPort()).andReturn(TEST_PORT); - EasyMock.expect(node.isEnablePlaintextPort()).andReturn(true); - EasyMock.expect(node.isEnableTlsPort()).andReturn(false); - EasyMock.expect(node.getTlsPort()).andReturn(-1); - serviceAnnouncer.announce(EasyMock.capture(captured)); - replayAll(); - - Assert.assertFalse("bad initial state", chatHandlerProvider.get(TEST_SERVICE_NAME).isPresent()); - - if (useThreeArgConstructor) { - chatHandlerProvider.register(TEST_SERVICE_NAME, testChatHandler, true); - } else { - chatHandlerProvider.register(TEST_SERVICE_NAME, testChatHandler); - } - verifyAll(); - - DruidNode param = captured.getValues().get(0); - Assert.assertEquals(TEST_SERVICE_NAME, param.getServiceName()); - Assert.assertEquals(TEST_HOST, param.getHost()); - Assert.assertEquals(TEST_PORT, param.getPlaintextPort()); - Assert.assertEquals(-1, param.getTlsPort()); - Assert.assertEquals(null, param.getHostAndTlsPort()); - Assert.assertTrue("chatHandler did not register", chatHandlerProvider.get(TEST_SERVICE_NAME).isPresent()); - Assert.assertEquals(testChatHandler, chatHandlerProvider.get(TEST_SERVICE_NAME).get()); - - captured.reset(); - resetAll(); - EasyMock.expect(node.getHost()).andReturn(TEST_HOST); - EasyMock.expect(node.isBindOnHost()).andReturn(false); - EasyMock.expect(node.getPlaintextPort()).andReturn(TEST_PORT); - EasyMock.expect(node.isEnablePlaintextPort()).andReturn(true); - EasyMock.expect(node.getTlsPort()).andReturn(-1); - EasyMock.expect(node.isEnableTlsPort()).andReturn(false); - serviceAnnouncer.unannounce(EasyMock.capture(captured)); - replayAll(); - - chatHandlerProvider.unregister(TEST_SERVICE_NAME); - verifyAll(); - - param = captured.getValues().get(0); - Assert.assertEquals(TEST_SERVICE_NAME, param.getServiceName()); - Assert.assertEquals(TEST_HOST, param.getHost()); - Assert.assertEquals(TEST_PORT, param.getPlaintextPort()); - Assert.assertEquals(-1, param.getTlsPort()); - Assert.assertEquals(null, param.getHostAndTlsPort()); - Assert.assertFalse("chatHandler did not deregister", chatHandlerProvider.get(TEST_SERVICE_NAME).isPresent()); - } -} diff --git a/server/src/test/java/org/apache/druid/server/SegmentManagerTest.java b/server/src/test/java/org/apache/druid/server/SegmentManagerTest.java index 2246237f5a0e..3b0d6c6e9db6 100644 --- a/server/src/test/java/org/apache/druid/server/SegmentManagerTest.java +++ b/server/src/test/java/org/apache/druid/server/SegmentManagerTest.java @@ -49,6 +49,7 @@ import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.loading.SegmentLoadingException; import org.apache.druid.segment.loading.SegmentLocalCacheManager; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.server.SegmentManager.DataSourceState; @@ -165,6 +166,7 @@ public boolean isVirtualStorage() cacheManager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestIndex.INDEX_IO, objectMapper @@ -175,6 +177,7 @@ public boolean isVirtualStorage() virtualCacheManager = new SegmentLocalCacheManager( virtualStorageLocations, virtualLoaderConfig, + StorageLoadingThreadPool.createFromConfig(virtualLoaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(virtualStorageLocations), TestIndex.INDEX_IO, objectMapper diff --git a/server/src/test/java/org/apache/druid/server/coordination/SegmentCacheBootstrapperCacheTest.java b/server/src/test/java/org/apache/druid/server/coordination/SegmentCacheBootstrapperCacheTest.java index bf8edc619563..7878630d2af7 100644 --- a/server/src/test/java/org/apache/druid/server/coordination/SegmentCacheBootstrapperCacheTest.java +++ b/server/src/test/java/org/apache/druid/server/coordination/SegmentCacheBootstrapperCacheTest.java @@ -31,6 +31,7 @@ import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.loading.SegmentLoadingException; import org.apache.druid.segment.loading.SegmentLocalCacheManager; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.server.SegmentManager; @@ -99,6 +100,7 @@ public List getLocations() cacheManager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestIndex.INDEX_IO, objectMapper @@ -119,6 +121,7 @@ public void testLoadStartStopWithEmptyLocations() throws IOException new SegmentLocalCacheManager( emptyLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(emptyLocations), TestIndex.INDEX_IO, objectMapper diff --git a/server/src/test/java/org/apache/druid/server/coordination/SegmentManagerBroadcastJoinIndexedTableTest.java b/server/src/test/java/org/apache/druid/server/coordination/SegmentManagerBroadcastJoinIndexedTableTest.java index fdcada7f9414..5ee311c389b1 100644 --- a/server/src/test/java/org/apache/druid/server/coordination/SegmentManagerBroadcastJoinIndexedTableTest.java +++ b/server/src/test/java/org/apache/druid/server/coordination/SegmentManagerBroadcastJoinIndexedTableTest.java @@ -55,6 +55,7 @@ import org.apache.druid.segment.loading.SegmentLoadingException; import org.apache.druid.segment.loading.SegmentLocalCacheManager; import org.apache.druid.segment.loading.SegmentizerFactory; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; @@ -149,6 +150,7 @@ public List getLocations() segmentCacheManager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestIndex.INDEX_IO, objectMapper diff --git a/server/src/test/java/org/apache/druid/server/coordination/SegmentManagerThreadSafetyTest.java b/server/src/test/java/org/apache/druid/server/coordination/SegmentManagerThreadSafetyTest.java index 43c79372c501..8695cef6fcff 100644 --- a/server/src/test/java/org/apache/druid/server/coordination/SegmentManagerThreadSafetyTest.java +++ b/server/src/test/java/org/apache/druid/server/coordination/SegmentManagerThreadSafetyTest.java @@ -43,6 +43,7 @@ import org.apache.druid.segment.loading.SegmentLoadingException; import org.apache.druid.segment.loading.SegmentLocalCacheManager; import org.apache.druid.segment.loading.SegmentizerFactory; +import org.apache.druid.segment.loading.StorageLoadingThreadPool; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.server.SegmentManager; @@ -119,6 +120,7 @@ public List getLocations() segmentCacheManager = new SegmentLocalCacheManager( storageLocations, loaderConfig, + StorageLoadingThreadPool.createFromConfig(loaderConfig), new LeastBytesUsedStorageLocationSelectorStrategy(storageLocations), TestIndex.INDEX_IO, objectMapper diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java index 332e6ffbadac..952bce46a6da 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java @@ -31,7 +31,6 @@ import org.apache.druid.client.ImmutableDruidServer; import org.apache.druid.client.ServerInventoryView; import org.apache.druid.common.config.JacksonConfigManager; -import org.apache.druid.curator.discovery.LatchableServiceAnnouncer; import org.apache.druid.discovery.DruidLeaderSelector; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.concurrent.ScheduledExecutorFactory; @@ -103,8 +102,6 @@ public class DruidCoordinatorTest private ScheduledExecutorFactory scheduledExecutorFactory; private LoadQueueTaskMaster loadQueueTaskMaster; private MetadataRuleManager metadataRuleManager; - private CountDownLatch leaderAnnouncerLatch; - private CountDownLatch leaderUnannouncerLatch; private DruidCoordinatorConfig druidCoordinatorConfig; private DruidNode druidNode; private OverlordClient overlordClient; @@ -147,8 +144,6 @@ public void setUp() throws Exception ); druidNode = new DruidNode("hey", "what", false, 1234, null, true, false); scheduledExecutorFactory = ScheduledExecutors::fixed; - leaderAnnouncerLatch = new CountDownLatch(1); - leaderUnannouncerLatch = new CountDownLatch(1); serviceEmitter = new LatchableServiceEmitter(); coordinator = new DruidCoordinator( druidCoordinatorConfig, @@ -159,8 +154,6 @@ public void setUp() throws Exception overlordClient, loadQueueTaskMaster, new SegmentLoadQueueManager(serverInventoryView, loadQueueTaskMaster), - new LatchableServiceAnnouncer(leaderAnnouncerLatch, leaderUnannouncerLatch), - druidNode, new CoordinatorCustomDutyGroups(ImmutableSet.of()), EasyMock.createNiceMock(LookupCoordinatorManager.class), new TestDruidLeaderSelector(), @@ -242,7 +235,6 @@ public void testCoordinatorRun() throws Exception Assert.assertNull(coordinator.getBroadcastSegments()); // Wait for this coordinator to become leader - leaderAnnouncerLatch.await(); // This coordinator should be leader by now Assert.assertTrue(coordinator.isLeader()); @@ -291,7 +283,6 @@ public void testCoordinatorRun() throws Exception Assert.assertEquals(Integer.valueOf(2), coordinator.getReplicationFactor(dataSegment.getId())); coordinator.stop(); - leaderUnannouncerLatch.await(); Assert.assertFalse(coordinator.isLeader()); Assert.assertNull(coordinator.getCurrentLeader()); @@ -338,7 +329,6 @@ public void testCoordinatorTieredRun() throws Exception EasyMock.replay(metadataRuleManager, serverInventoryView, loadQueueTaskMaster); coordinator.start(); - leaderAnnouncerLatch.await(); // Wait for this coordinator to become leader serviceEmitter.coordinatorRunLatch.await(); @@ -359,7 +349,6 @@ public void testCoordinatorTieredRun() throws Exception dataSegments.forEach(dataSegment -> Assert.assertEquals(Integer.valueOf(1), coordinator.getReplicationFactor(dataSegment.getId()))); coordinator.stop(); - leaderUnannouncerLatch.await(); EasyMock.verify(serverInventoryView); EasyMock.verify(segmentsMetadataManager); @@ -422,7 +411,6 @@ public void testComputeUnderReplicationCountsPerDataSourcePerTierForSegmentsWith EasyMock.replay(metadataRuleManager, serverInventoryView, loadQueueTaskMaster); coordinator.start(); - leaderAnnouncerLatch.await(); // Wait for this coordinator to become leader serviceEmitter.coordinatorRunLatch.await(); @@ -447,7 +435,6 @@ public void testComputeUnderReplicationCountsPerDataSourcePerTierForSegmentsWith Assert.assertEquals(0L, underReplicationCountsPerDataSourcePerTierUsingClusterView.get(tierName2).getLong(dataSource)); coordinator.stop(); - leaderUnannouncerLatch.await(); EasyMock.verify(serverInventoryView); EasyMock.verify(segmentsMetadataManager); @@ -472,8 +459,6 @@ public void testCompactSegmentsDutyWhenCustomDutyGroupEmpty() overlordClient, loadQueueTaskMaster, null, - new LatchableServiceAnnouncer(leaderAnnouncerLatch, leaderUnannouncerLatch), - druidNode, emptyCustomDutyGroups, EasyMock.createNiceMock(LookupCoordinatorManager.class), new TestDruidLeaderSelector(), @@ -525,8 +510,6 @@ public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupDoesNotContainsC overlordClient, loadQueueTaskMaster, null, - new LatchableServiceAnnouncer(leaderAnnouncerLatch, leaderUnannouncerLatch), - druidNode, customDutyGroups, EasyMock.createNiceMock(LookupCoordinatorManager.class), new TestDruidLeaderSelector(), @@ -578,8 +561,6 @@ public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupContainsCompactS overlordClient, loadQueueTaskMaster, null, - new LatchableServiceAnnouncer(leaderAnnouncerLatch, leaderUnannouncerLatch), - druidNode, customDutyGroups, EasyMock.createNiceMock(LookupCoordinatorManager.class), new TestDruidLeaderSelector(), @@ -689,8 +670,6 @@ public void testCoordinatorCustomDutyGroupsRunAsExpected() throws Exception overlordClient, loadQueueTaskMaster, new SegmentLoadQueueManager(serverInventoryView, loadQueueTaskMaster), - new LatchableServiceAnnouncer(leaderAnnouncerLatch, leaderUnannouncerLatch), - druidNode, groups, EasyMock.createNiceMock(LookupCoordinatorManager.class), new TestDruidLeaderSelector(), @@ -784,7 +763,6 @@ public void testCoordinatorRun_queryFromDeepStorage() throws Exception coordinator.start(); // Wait for this coordinator to become leader - leaderAnnouncerLatch.await(); // This coordinator should be leader by now Assert.assertTrue(coordinator.isLeader()); @@ -817,7 +795,6 @@ public void testCoordinatorRun_queryFromDeepStorage() throws Exception Assert.assertEquals(1, numsDeepStorageOnlySegmentsPerDataSource.getInt(dataSource)); coordinator.stop(); - leaderUnannouncerLatch.await(); Assert.assertFalse(coordinator.isLeader()); Assert.assertNull(coordinator.getCurrentLeader()); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategyTest.java b/server/src/test/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategyTest.java index f57199c1f48c..d6c3ad44e135 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategyTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/balancer/DiskNormalizedCostBalancerStrategyTest.java @@ -120,6 +120,11 @@ public static DataSegment getSegment(int index) } public static DataSegment getSegment(int index, String dataSource, Interval interval) + { + return getSegment(index, dataSource, interval, index * 100L); + } + + public static DataSegment getSegment(int index, String dataSource, Interval interval, long size) { // Not using EasyMock as it hampers the performance of multithreads. DataSegment segment = new DataSegment( @@ -131,7 +136,7 @@ public static DataSegment getSegment(int index, String dataSource, Interval inte new ArrayList<>(), null, 0, - index * 100L + size ); return segment; } @@ -180,6 +185,16 @@ private static ServerHolder buildServer( List segments = IntStream.range(baseIndex, baseIndex + segmentCount) .mapToObj(DiskNormalizedCostBalancerStrategyTest::getSegment) .collect(Collectors.toList()); + return buildServer(name, maxSize, sizeUsed, segments); + } + + private static ServerHolder buildServer( + String name, + long maxSize, + long sizeUsed, + List segments + ) + { ImmutableDruidDataSource ds = new ImmutableDruidDataSource("DUMMY", Collections.emptyMap(), segments); return new ServerHolder( @@ -228,8 +243,8 @@ public void testDiskWeightingBeatsRawCost() newCostStrategy().findServersToLoadSegment(proposal, servers).next().getServer().getName() ); - // DiskNormalized: A = 10 * 0.9 = 9.0, B = 60 * 0.1 = 6.0. - // The emptier server must win. + // DiskNormalized uses projected headroom: A ~= 10K / 0.09, B ~= 60K / 0.89. + // The emptier server wins despite the higher raw cost. Assert.assertEquals( "DiskNormalizedCostBalancerStrategy must prefer the emptier server", "B", @@ -263,8 +278,8 @@ public void testDiskNormalizedFixesSkewThatCostCannotCorrect() ); // DiskNormalizedCostBalancerStrategy (default 5% threshold): - // A: 38K * 0.80 * 0.95 = 28.88K - // B: 40K * 0.20 = 8.00K + // A: 38K / 0.20 * 0.95 = 180.5K + // B: 40K / 0.80 = 50.0K // B wins decisively and the segment moves, reducing the skew. final ServerHolder diskNormalizedResult = newDiskNormalizedStrategy().findDestinationServerToMoveSegment(segmentToMove, heavy, servers); @@ -280,7 +295,7 @@ public void testThresholdBlocksMarginalMove() { final long maxSize = 10_000_000L; final ServerHolder source = buildServer("SOURCE", maxSize, 8_000_000L, 0, 20); - final ServerHolder dest = buildServer("DEST", maxSize, 7_400_000L, 100, 20); + final ServerHolder dest = buildServer("DEST", maxSize, 7_830_000L, 100, 20); final DataSegment segmentToMove = getSegment(0); final List servers = new ArrayList<>(); @@ -293,17 +308,142 @@ public void testThresholdBlocksMarginalMove() newDiskNormalizedStrategy().findDestinationServerToMoveSegment(segmentToMove, source, servers) ); - // threshold=0 removes the discount; the same marginal difference now - // triggers the move. This proves the threshold is what blocks it above. - final BalancerStrategy noDiscount = new DiskNormalizedCostBalancerStrategy( + // Lowering the threshold to 1% reduces the discount; the same marginal + // difference now triggers the move. This proves the threshold is what + // blocks it above. + final BalancerStrategy onePercentThreshold = new DiskNormalizedCostBalancerStrategy( MoreExecutors.listeningDecorator(Execs.multiThreaded(1, "DiskNormalizedCostBalancerStrategyTest-%d")), 0.01 ); - final ServerHolder movedTo = noDiscount.findDestinationServerToMoveSegment(segmentToMove, source, servers); + final ServerHolder movedTo = onePercentThreshold.findDestinationServerToMoveSegment(segmentToMove, source, servers); Assert.assertNotNull("With threshold=0.01, the marginal move should fire", movedTo); Assert.assertEquals("DEST", movedTo.getServer().getName()); } + @Test + public void testNearFullServerIsNotChosenForNewSegmentLoad() + { + final long maxSize = 10_000_000L; + // A: 95% full, 5 same-DS DAY segments -> raw cost = 10 * K (low, few co-located segs) + final ServerHolder nearFull = buildServer("A", maxSize, 9_500_000L, 0, 5); + // B: 70% full, 20 same-DS DAY segments -> raw cost = 40 * K (higher, more co-located) + final ServerHolder partial = buildServer("B", maxSize, 7_000_000L, 100, 20); + + final DataSegment newSegment = getSegment(1000); + final List servers = new ArrayList<>(); + servers.add(nearFull); + servers.add(partial); + + // CostBalancerStrategy picks A because raw cost 10K < 40K. + Assert.assertEquals( + "Pure CostBalancerStrategy must pick the near-full server (lower raw cost)", + "A", + newCostStrategy().findServersToLoadSegment(newSegment, servers).next().getServer().getName() + ); + + // DiskNormalized uses projected headroom: A_norm = 10K / 0.04 = 250K, + // B_norm = 40K / 0.29 = 138K -> B wins. + Assert.assertEquals( + "DiskNormalized must prefer the emptier server despite its higher raw cost", + "B", + newDiskNormalizedStrategy().findServersToLoadSegment(newSegment, servers).next().getServer().getName() + ); + } + + @Test + public void testProjectedSegmentSizeIsUsedForNewSegmentLoad() + { + final long maxSize = 1_000_000L; + // A has the lower raw cost, but the 250 KB proposal would leave only 5% headroom. + final ServerHolder almostFullAfterLoad = buildServer("A", maxSize, 700_000L, 0, 5); + // B has more co-located segments, but keeps 25% headroom after the proposal. + final ServerHolder moreHeadroomAfterLoad = buildServer("B", maxSize, 500_000L, 100, 20); + + final DataSegment largeSegment = getSegment(1000, "DUMMY", DAY, 250_000L); + final List servers = new ArrayList<>(); + servers.add(almostFullAfterLoad); + servers.add(moreHeadroomAfterLoad); + + // CostBalancerStrategy picks A because raw cost 10K < 40K. + Assert.assertEquals( + "Pure CostBalancerStrategy must pick the lower raw-cost server", + "A", + newCostStrategy().findServersToLoadSegment(largeSegment, servers).next().getServer().getName() + ); + + // If diskNormalized used current headroom, A would also win: + // A_current = 10K / 0.30, B_current = 40K / 0.50. + // With projected headroom, B wins: + // A_projected = 10K / 0.05, B_projected = 40K / 0.25. + Assert.assertEquals( + "DiskNormalized must account for the proposal size before choosing a server", + "B", + newDiskNormalizedStrategy().findServersToLoadSegment(largeSegment, servers).next().getServer().getName() + ); + } + + @Test + public void testNearFullServerIsNotChosenAsMoveDestination() + { + final long maxSize = 10_000_000L; + // SOURCE: 70% full, 20 same-DS DAY segments; segmentToMove is one of them. + final ServerHolder source = buildServer("SOURCE", maxSize, 7_000_000L, 0, 20); + // DEST: 95% full, 5 same-DS DAY segments -> raw cost 10K < SOURCE's 38K. + final ServerHolder nearFullDest = buildServer("DEST", maxSize, 9_500_000L, 100, 5); + + final DataSegment segmentToMove = getSegment(0); + final List servers = new ArrayList<>(); + servers.add(source); + servers.add(nearFullDest); + + // CostBalancerStrategy: DEST raw cost (10K) < SOURCE raw cost (38K) -> recommends the move. + final ServerHolder costResult = + newCostStrategy().findDestinationServerToMoveSegment(segmentToMove, source, servers); + Assert.assertNotNull("CostBalancerStrategy must recommend moving to the near-full DEST", costResult); + Assert.assertEquals("DEST", costResult.getServer().getName()); + + // DiskNormalized: DEST_norm = 10K / 0.05 = 200K > SOURCE_norm = 38K / 0.30 * 0.95 ≈ 120K. + // Near-full DEST is too expensive after normalization -> no move. + Assert.assertNull( + "DiskNormalized must block the move to the near-full server", + newDiskNormalizedStrategy().findDestinationServerToMoveSegment(segmentToMove, source, servers) + ); + } + + @Test + public void testProjectedSegmentSizePreventsMoveThatWouldFillDestination() + { + final long maxSize = 10_000_000L; + final DataSegment largeSegment = getSegment(0, "DUMMY", DAY, 2_500_000L); + final List sourceSegments = new ArrayList<>(); + sourceSegments.add(largeSegment); + IntStream.range(1, 20) + .mapToObj(DiskNormalizedCostBalancerStrategyTest::getSegment) + .forEach(sourceSegments::add); + + // SOURCE is fuller before the move, but already projects the segment. + final ServerHolder source = buildServer("SOURCE", maxSize, 8_000_000L, sourceSegments); + // DEST has low raw cost, but loading the 2.5 MB segment would leave only 5% headroom. + final ServerHolder dest = buildServer("DEST", maxSize, 7_000_000L, 100, 5); + + final List servers = new ArrayList<>(); + servers.add(source); + servers.add(dest); + + // CostBalancerStrategy recommends the move because DEST raw cost (10K) < SOURCE raw cost (38K). + final ServerHolder costResult = + newCostStrategy().findDestinationServerToMoveSegment(largeSegment, source, servers); + Assert.assertNotNull("CostBalancerStrategy must recommend moving to the lower raw-cost DEST", costResult); + Assert.assertEquals("DEST", costResult.getServer().getName()); + + // If diskNormalized used current headroom, DEST would win: 10K / 0.30 < 38K / 0.20 * 0.95. + // With projected headroom, DEST is too full after placement: 10K / 0.05 > 38K / 0.20 * 0.95. + Assert.assertNull( + "DiskNormalized must not move a large segment to a server that would become too full", + newDiskNormalizedStrategy().findDestinationServerToMoveSegment(largeSegment, source, servers) + ); + } + @Test public void testRejectsInvalidThreshold() { diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/CoordinatorSimulationBuilder.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/CoordinatorSimulationBuilder.java index b6e45ad2aea9..86ce9871aef0 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/CoordinatorSimulationBuilder.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/CoordinatorSimulationBuilder.java @@ -27,7 +27,6 @@ import org.apache.druid.audit.AuditInfo; import org.apache.druid.client.DruidServer; import org.apache.druid.common.config.JacksonConfigManager; -import org.apache.druid.curator.discovery.ServiceAnnouncer; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; @@ -217,8 +216,6 @@ public CoordinatorSimulation build() new SimOverlordClient(env.segmentManager), env.loadQueueTaskMaster, env.loadQueueManager, - new ServiceAnnouncer.Noop(), - null, new CoordinatorCustomDutyGroups(Collections.emptySet()), env.lookupCoordinatorManager, env.leaderSelector, diff --git a/services/src/main/java/org/apache/druid/cli/CliBroker.java b/services/src/main/java/org/apache/druid/cli/CliBroker.java index 180ca60b6b32..4e8d79ce71d7 100644 --- a/services/src/main/java/org/apache/druid/cli/CliBroker.java +++ b/services/src/main/java/org/apache/druid/cli/CliBroker.java @@ -187,7 +187,7 @@ protected List getModules() bindAnnouncer( binder, - DiscoverySideEffectsProvider.withLegacyAnnouncer() + DiscoverySideEffectsProvider.create() ); Jerseys.addResource(binder, SelfDiscoveryResource.class); diff --git a/services/src/main/java/org/apache/druid/cli/CliIndexer.java b/services/src/main/java/org/apache/druid/cli/CliIndexer.java index acbeae18f6ad..7839ba7e6b46 100644 --- a/services/src/main/java/org/apache/druid/cli/CliIndexer.java +++ b/services/src/main/java/org/apache/druid/cli/CliIndexer.java @@ -30,7 +30,6 @@ import com.google.inject.name.Names; import org.apache.druid.client.DruidServer; import org.apache.druid.client.DruidServerConfig; -import org.apache.druid.curator.ZkEnablementConfig; import org.apache.druid.discovery.DataNodeService; import org.apache.druid.discovery.NodeRole; import org.apache.druid.discovery.WorkerNodeService; @@ -102,7 +101,6 @@ public class CliIndexer extends ServerRunnable private static final Logger log = new Logger(CliIndexer.class); private Properties properties; - private boolean isZkEnabled = true; public CliIndexer() { @@ -113,7 +111,6 @@ public CliIndexer() public void configure(Properties properties) { this.properties = properties; - isZkEnabled = ZkEnablementConfig.isEnabled(properties); } @Override @@ -166,7 +163,7 @@ public void configure(Binder binder) CliPeon.bindPeonDataSegmentHandlers(binder); CliPeon.bindRealtimeCache(binder); CliPeon.bindCoordinatorHandoffNotifer(binder); - binder.install(CliMiddleManager.makeWorkerManagementModule(isZkEnabled)); + binder.install(CliMiddleManager.makeWorkerManagementModule()); binder.bind(AppenderatorsManager.class) .to(UnifiedIndexerAppenderatorsManager.class) diff --git a/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java b/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java index 57bd4672b287..4476a64fbca4 100644 --- a/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java +++ b/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java @@ -25,7 +25,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.inject.Binder; -import com.google.inject.Inject; import com.google.inject.Key; import com.google.inject.Module; import com.google.inject.Provides; @@ -33,7 +32,6 @@ import com.google.inject.name.Named; import com.google.inject.name.Names; import com.google.inject.util.Providers; -import org.apache.druid.curator.ZkEnablementConfig; import org.apache.druid.discovery.NodeRole; import org.apache.druid.discovery.WorkerNodeService; import org.apache.druid.guice.IndexingServiceInputSourceModule; @@ -57,9 +55,7 @@ import org.apache.druid.indexing.overlord.ForkingTaskRunner; import org.apache.druid.indexing.overlord.TaskRunner; import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.indexing.worker.WorkerCuratorCoordinator; import org.apache.druid.indexing.worker.WorkerTaskManager; -import org.apache.druid.indexing.worker.WorkerTaskMonitor; import org.apache.druid.indexing.worker.config.WorkerConfig; import org.apache.druid.indexing.worker.http.TaskManagementResource; import org.apache.druid.indexing.worker.http.WorkerResource; @@ -76,7 +72,6 @@ import org.apache.druid.query.lookup.LookupSerdeModule; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.realtime.ChatHandlerProvider; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.appenderator.DummyForInjectionAppenderatorsManager; import org.apache.druid.server.DruidNode; @@ -104,19 +99,11 @@ public class CliMiddleManager extends ServerRunnable { private static final Logger log = new Logger(CliMiddleManager.class); - private boolean isZkEnabled = true; - public CliMiddleManager() { super(log); } - @Inject - public void configure(Properties properties) - { - isZkEnabled = ZkEnablementConfig.isEnabled(properties); - } - @Override protected Set getNodeRoles(Properties properties) { @@ -152,7 +139,7 @@ public void configure(Binder binder) binder.bind(ParallelIndexSupervisorTaskClientProvider.class).toProvider(Providers.of(null)); binder.bind(ShuffleClient.class).toProvider(Providers.of(null)); - binder.bind(ChatHandlerProvider.class).toProvider(Providers.of(new NoopChatHandlerProvider())); + binder.bind(ChatHandlerProvider.class).in(LazySingleton.class); PolyBind.createChoice( binder, "druid.indexer.task.rowIngestionMeters.type", @@ -167,7 +154,7 @@ public void configure(Binder binder) .in(LazySingleton.class); binder.bind(DropwizardRowIngestionMetersFactory.class).in(LazySingleton.class); - binder.install(makeWorkerManagementModule(isZkEnabled)); + binder.install(makeWorkerManagementModule()); binder.bind(JettyServerInitializer.class) .to(MiddleManagerJettyServerInitializer.class) @@ -260,21 +247,14 @@ public WorkerNodeService getWorkerNodeService(WorkerConfig workerConfig) ); } - public static Module makeWorkerManagementModule(boolean isZkEnabled) + public static Module makeWorkerManagementModule() { return new Module() { @Override public void configure(Binder binder) { - if (isZkEnabled) { - binder.bind(WorkerTaskManager.class).to(WorkerTaskMonitor.class); - binder.bind(WorkerTaskMonitor.class).in(ManageLifecycle.class); - binder.bind(WorkerCuratorCoordinator.class).in(ManageLifecycle.class); - LifecycleModule.register(binder, WorkerTaskMonitor.class); - } else { - binder.bind(WorkerTaskManager.class).in(ManageLifecycle.class); - } + binder.bind(WorkerTaskManager.class).in(ManageLifecycle.class); Jerseys.addResource(binder, WorkerResource.class); Jerseys.addResource(binder, TaskManagementResource.class); diff --git a/services/src/main/java/org/apache/druid/cli/CliOverlord.java b/services/src/main/java/org/apache/druid/cli/CliOverlord.java index b96f299dfcd5..67a8be584df7 100644 --- a/services/src/main/java/org/apache/druid/cli/CliOverlord.java +++ b/services/src/main/java/org/apache/druid/cli/CliOverlord.java @@ -77,7 +77,6 @@ import org.apache.druid.indexing.overlord.HeapMemoryTaskStorage; import org.apache.druid.indexing.overlord.IndexerMetadataStorageAdapter; import org.apache.druid.indexing.overlord.MetadataTaskStorage; -import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory; import org.apache.druid.indexing.overlord.TaskMaster; import org.apache.druid.indexing.overlord.TaskQueryTool; import org.apache.druid.indexing.overlord.TaskRunnerFactory; @@ -121,7 +120,6 @@ import org.apache.druid.query.lookup.LookupSerdeModule; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.realtime.ChatHandlerProvider; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.appenderator.DummyForInjectionAppenderatorsManager; import org.apache.druid.server.compaction.CompactionStatusTracker; @@ -261,7 +259,7 @@ public void configure(Binder binder) binder.bind(ParallelIndexSupervisorTaskClientProvider.class).toProvider(Providers.of(null)); binder.bind(ShuffleClient.class).toProvider(Providers.of(null)); - binder.bind(ChatHandlerProvider.class).toProvider(Providers.of(new NoopChatHandlerProvider())); + binder.bind(ChatHandlerProvider.class).in(LazySingleton.class); CliPeon.bindDataSegmentKiller(binder); @@ -388,11 +386,6 @@ public void configure(Binder binder) biddy.addBinding("local").to(ForkingTaskRunnerFactory.class); binder.bind(ForkingTaskRunnerFactory.class).in(LazySingleton.class); - biddy.addBinding(RemoteTaskRunnerFactory.TYPE_NAME) - .to(RemoteTaskRunnerFactory.class) - .in(LazySingleton.class); - binder.bind(RemoteTaskRunnerFactory.class).in(LazySingleton.class); - biddy.addBinding(HttpRemoteTaskRunnerFactory.TYPE_NAME) .to(HttpRemoteTaskRunnerFactory.class) .in(LazySingleton.class); diff --git a/services/src/main/java/org/apache/druid/cli/CliPeon.java b/services/src/main/java/org/apache/druid/cli/CliPeon.java index d234a6f37fec..95622409a275 100644 --- a/services/src/main/java/org/apache/druid/cli/CliPeon.java +++ b/services/src/main/java/org/apache/druid/cli/CliPeon.java @@ -113,8 +113,6 @@ import org.apache.druid.segment.loading.OmniDataSegmentMover; import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.realtime.ChatHandlerProvider; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; -import org.apache.druid.segment.realtime.ServiceAnnouncingChatHandlerProvider; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.appenderator.PeonAppenderatorsManager; import org.apache.druid.server.DruidNode; @@ -419,24 +417,7 @@ static void bindRowIngestionMeters(Binder binder) static void bindChatHandler(Binder binder) { - PolyBind.createChoice( - binder, - "druid.indexer.task.chathandler.type", - Key.get(ChatHandlerProvider.class), - Key.get(ServiceAnnouncingChatHandlerProvider.class) - ); - final MapBinder handlerProviderBinder = - PolyBind.optionBinder(binder, Key.get(ChatHandlerProvider.class)); - handlerProviderBinder - .addBinding("announce") - .to(ServiceAnnouncingChatHandlerProvider.class) - .in(LazySingleton.class); - handlerProviderBinder - .addBinding("noop") - .to(NoopChatHandlerProvider.class) - .in(LazySingleton.class); - binder.bind(ServiceAnnouncingChatHandlerProvider.class).in(LazySingleton.class); - binder.bind(NoopChatHandlerProvider.class).in(LazySingleton.class); + binder.bind(ChatHandlerProvider.class).in(LazySingleton.class); } static void bindPeonDataSegmentHandlers(Binder binder) diff --git a/services/src/main/java/org/apache/druid/cli/CliRouter.java b/services/src/main/java/org/apache/druid/cli/CliRouter.java index a3344aa4e7fe..da6e4772188f 100644 --- a/services/src/main/java/org/apache/druid/cli/CliRouter.java +++ b/services/src/main/java/org/apache/druid/cli/CliRouter.java @@ -26,7 +26,6 @@ import com.google.inject.Module; import com.google.inject.TypeLiteral; import com.google.inject.name.Names; -import org.apache.druid.curator.discovery.DiscoveryModule; import org.apache.druid.discovery.NodeRole; import org.apache.druid.guice.Jerseys; import org.apache.druid.guice.JsonConfigProvider; @@ -36,7 +35,6 @@ import org.apache.druid.guice.QueryRunnerFactoryModule; import org.apache.druid.guice.QueryableModule; import org.apache.druid.guice.RouterProcessingModule; -import org.apache.druid.guice.annotations.Self; import org.apache.druid.guice.http.JettyHttpClientModule; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.QuerySegmentWalker; @@ -121,7 +119,6 @@ protected List getModules() LifecycleModule.register(binder, RouterResource.class); LifecycleModule.register(binder, Server.class); - DiscoveryModule.register(binder, Self.class); bindAnnouncer(binder, DiscoverySideEffectsProvider.create()); diff --git a/services/src/main/java/org/apache/druid/cli/ServerRunnable.java b/services/src/main/java/org/apache/druid/cli/ServerRunnable.java index 0c4143c48eed..662b259e156d 100644 --- a/services/src/main/java/org/apache/druid/cli/ServerRunnable.java +++ b/services/src/main/java/org/apache/druid/cli/ServerRunnable.java @@ -25,7 +25,6 @@ import com.google.inject.Injector; import com.google.inject.Key; import com.google.inject.Provider; -import org.apache.druid.curator.discovery.ServiceAnnouncer; import org.apache.druid.discovery.DiscoveryDruidNode; import org.apache.druid.discovery.DruidNodeAnnouncer; import org.apache.druid.discovery.DruidService; @@ -120,9 +119,6 @@ public static class Child @Inject private DruidNodeAnnouncer announcer; - @Inject - private ServiceAnnouncer legacyAnnouncer; - @Inject private Lifecycle lifecycle; @@ -139,21 +135,13 @@ public static class Child @Inject private ServiceAnnouncementState serviceAnnouncementState; - private final boolean useLegacyAnnouncer; - public static DiscoverySideEffectsProvider create() { - return new DiscoverySideEffectsProvider(false); - } - - public static DiscoverySideEffectsProvider withLegacyAnnouncer() - { - return new DiscoverySideEffectsProvider(true); + return new DiscoverySideEffectsProvider(); } - private DiscoverySideEffectsProvider(final boolean useLegacyAnnouncer) + private DiscoverySideEffectsProvider() { - this.useLegacyAnnouncer = useLegacyAnnouncer; } @Override @@ -181,11 +169,6 @@ public Child get() public void start() { announcer.announce(discoveryDruidNode); - - if (useLegacyAnnouncer) { - legacyAnnouncer.announce(discoveryDruidNode.getDruidNode()); - } - serviceAnnouncementState.markReady(); } @@ -193,13 +176,6 @@ public void start() public void stop() { serviceAnnouncementState.markNotReady(); - - // Reverse order vs. start(). - - if (useLegacyAnnouncer) { - legacyAnnouncer.unannounce(discoveryDruidNode.getDruidNode()); - } - announcer.unannounce(discoveryDruidNode); } }, diff --git a/services/src/test/java/org/apache/druid/cli/DiscoverySideEffectsProviderTest.java b/services/src/test/java/org/apache/druid/cli/DiscoverySideEffectsProviderTest.java index 09ad6af85e2c..83a10e5af652 100644 --- a/services/src/test/java/org/apache/druid/cli/DiscoverySideEffectsProviderTest.java +++ b/services/src/test/java/org/apache/druid/cli/DiscoverySideEffectsProviderTest.java @@ -26,7 +26,6 @@ import com.google.inject.multibindings.ProvidesIntoSet; import com.google.inject.name.Named; import org.apache.druid.cli.ServerRunnable.DiscoverySideEffectsProvider; -import org.apache.druid.curator.discovery.ServiceAnnouncer; import org.apache.druid.discovery.DiscoveryDruidNode; import org.apache.druid.discovery.DruidNodeAnnouncer; import org.apache.druid.discovery.DruidService; @@ -61,8 +60,6 @@ public class DiscoverySideEffectsProviderTest @Mock private DruidNodeAnnouncer discoverableOnlyAnnouncer; @Mock - private ServiceAnnouncer legacyAnnouncer; - @Mock private Lifecycle lifecycle; private List lifecycleHandlers; @@ -84,7 +81,7 @@ public void setUp() .doAnswer((invocation) -> lifecycleHandlers.add(invocation.getArgument(0))) .when(lifecycle) .addHandler(ArgumentMatchers.any(Lifecycle.Handler.class), ArgumentMatchers.eq(Lifecycle.Stage.ANNOUNCEMENTS)); - target = DiscoverySideEffectsProvider.withLegacyAnnouncer(); + target = DiscoverySideEffectsProvider.create(); } @Test @@ -187,7 +184,6 @@ private Injector createInjector(List modules) binder -> { binder.bind(DruidNodeAnnouncer.class).toInstance(discoverableOnlyAnnouncer); binder.bind(DruidNode.class).annotatedWith(Self.class).toInstance(druidNode); - binder.bind(ServiceAnnouncer.class).toInstance(legacyAnnouncer); binder.bind(Lifecycle.class).toInstance(lifecycle); } ) diff --git a/services/src/test/java/org/apache/druid/testing/embedded/EmbeddedClusterApis.java b/services/src/test/java/org/apache/druid/testing/embedded/EmbeddedClusterApis.java index 257533aecbd0..6ae8750b8d8e 100644 --- a/services/src/test/java/org/apache/druid/testing/embedded/EmbeddedClusterApis.java +++ b/services/src/test/java/org/apache/druid/testing/embedded/EmbeddedClusterApis.java @@ -430,6 +430,16 @@ public String postSupervisor(SupervisorSpec supervisor) return onLeaderOverlord(o -> o.postSupervisor(supervisor)).get("id"); } + /** + * Resets a supervisor to the latest stream offsets and starts a bounded backfill supervisor. + * + * @return Map containing "id" and "backfillSupervisorId" + */ + public Map resetToLatestAndBackfill(String supervisorId) + { + return onLeaderOverlord(o -> o.resetToLatestAndBackfill(supervisorId)); + } + /** * Fetches the current status of the given supervisor ID. */ diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/SqlTestFramework.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/SqlTestFramework.java index 5838f9a2eb85..487c50570f3b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/SqlTestFramework.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/SqlTestFramework.java @@ -81,7 +81,6 @@ import org.apache.druid.quidem.TestSqlModule; import org.apache.druid.segment.join.JoinableFactoryWrapper; import org.apache.druid.segment.realtime.ChatHandlerProvider; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; import org.apache.druid.server.ClientQuerySegmentWalker; import org.apache.druid.server.DruidNode; import org.apache.druid.server.LocalQuerySegmentWalker; @@ -911,7 +910,7 @@ public QuerySegmentWalker getQuerySegmentWalker(SpecificSegmentsQuerySegmentWalk @Provides ChatHandlerProvider getChatHandlerProvider() { - return new NoopChatHandlerProvider(); + return new ChatHandlerProvider(); } @Override diff --git a/web-console/src/components/fancy-numeric-input/fancy-numeric-input.tsx b/web-console/src/components/fancy-numeric-input/fancy-numeric-input.tsx index 637c416b5f85..444d44934b86 100644 --- a/web-console/src/components/fancy-numeric-input/fancy-numeric-input.tsx +++ b/web-console/src/components/fancy-numeric-input/fancy-numeric-input.tsx @@ -220,14 +220,20 @@ export const FancyNumericInput = React.memo(function FancyNumericInput( disabled={effectiveDisabled || isIncrementDisabled} icon={IconNames.CHEVRON_UP} intent={intent} - onMouseDown={e => increment(getIncrementSize(e.shiftKey, e.altKey))} + onMouseDown={e => { + e.preventDefault(); + increment(getIncrementSize(e.shiftKey, e.altKey)); + }} />