From 47925aa0998756f0137ec35b4202d2851ca9819d Mon Sep 17 00:00:00 2001 From: Lucas Capistrant Date: Fri, 22 May 2026 12:13:21 -0500 Subject: [PATCH 01/12] fix: Close exposure to potential NPE in S3Utils.useHttps (#19504) --- .../org/apache/druid/storage/s3/S3Utils.java | 4 +- .../data/input/s3/S3InputSourceTest.java | 35 ++++++++++++++++ .../apache/druid/storage/s3/S3UtilsTest.java | 41 +++++++++++++++++++ 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java index 2cac95a200b1..82412fe412c9 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3Utils.java @@ -417,9 +417,9 @@ static void uploadFileIfPossible( /** * Determines whether to use HTTP or HTTPS protocol based on configuration. */ - public static boolean useHttps(AWSClientConfig clientConfig, AWSEndpointConfig endpointConfig) + public static boolean useHttps(@Nullable AWSClientConfig clientConfig, AWSEndpointConfig endpointConfig) { - String protocol = clientConfig.getProtocol(); + final String protocol = clientConfig == null ? null : clientConfig.getProtocol(); final String endpointUrl = endpointConfig.getUrl(); if (org.apache.commons.lang3.StringUtils.isNotEmpty(endpointUrl)) { diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java index 168216affdca..96db444aa088 100644 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java @@ -404,6 +404,41 @@ public void testSerdeWithCloudConfigPropertiesWithSessionToken() throws Exceptio EasyMock.verify(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); } + @Test + public void testSchemelessEndpointConfigUrlWithNullClientConfigResolvesSupplier() throws Exception + { + EasyMock.reset(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); + EasyMock.expect(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER.setS3ClientSupplier(EasyMock.anyObject())) + .andReturn(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); + EasyMock.expect(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER.build()) + .andReturn(SERVICE); + EasyMock.replay(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); + + final AWSEndpointConfig schemelessEndpoint = MAPPER.readValue( + "{\"url\":\"s3.example.com\",\"signingRegion\":\"us-east-1\"}", + AWSEndpointConfig.class + ); + + final S3InputSource inputSource = new S3InputSource( + SERVICE, + SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, + INPUT_DATA_CONFIG, + null, + null, + EXPECTED_LOCATION, + null, + CLOUD_CONFIG_PROPERTIES, + null, + schemelessEndpoint, + null + ); + + // Forces s3ClientSupplier evaluation, which hits S3Utils.useHttps and confirms a null client config does not blow up. + inputSource.createEntity(new CloudObjectLocation("bucket", "path")); + + EasyMock.verify(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); + } + @Test public void testGetSetSessionToken() { diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java index 6c46df7d993a..16b8c20d0f2e 100644 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3UtilsTest.java @@ -19,6 +19,9 @@ package org.apache.druid.storage.s3; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.common.aws.AWSClientConfig; +import org.apache.druid.common.aws.AWSEndpointConfig; import org.easymock.Capture; import org.easymock.CaptureType; import org.easymock.EasyMock; @@ -382,4 +385,42 @@ public void testRetryWithS3MultiObjectDeleteException() throws Exception ); Assert.assertEquals(maxRetries, count.get()); } + + private static final ObjectMapper JSON = new ObjectMapper(); + + private static AWSEndpointConfig endpointWith(String json) throws IOException + { + return JSON.readValue(json, AWSEndpointConfig.class); + } + + @Test + public void testUseHttpsNullClientConfigSchemelessEndpointReturnsTrue() throws IOException + { + Assert.assertTrue(S3Utils.useHttps(null, endpointWith("{\"url\":\"s3.example.com\"}"))); + } + + @Test + public void testUseHttpsNullClientConfigHttpEndpointReturnsFalse() throws IOException + { + Assert.assertFalse(S3Utils.useHttps(null, endpointWith("{\"url\":\"http://s3.example.com\"}"))); + } + + @Test + public void testUseHttpsNullClientConfigHttpsEndpointReturnsTrue() throws IOException + { + Assert.assertTrue(S3Utils.useHttps(null, endpointWith("{\"url\":\"https://s3.example.com\"}"))); + } + + @Test + public void testUseHttpsNullClientConfigNullEndpointUrlReturnsTrue() throws IOException + { + Assert.assertTrue(S3Utils.useHttps(null, new AWSEndpointConfig())); + } + + @Test + public void testUseHttpsDefaultClientConfigSchemelessEndpointReturnsTrue() throws IOException + { + // Sanity check: default AWSClientConfig protocol is "https"; schemeless URL inherits "https". + Assert.assertTrue(S3Utils.useHttps(new AWSClientConfig(), endpointWith("{\"url\":\"s3.example.com\"}"))); + } } From b3b1ff255ba7f6145da8e6b1f27020c1ccf675e4 Mon Sep 17 00:00:00 2001 From: Virushade Date: Sat, 23 May 2026 01:40:39 +0800 Subject: [PATCH 02/12] docs: Stop Referring to MSQ as an extension (#19507) --- docs/api-reference/service-status-api.md | 32 +-------------------- docs/api-reference/sql-ingestion-api.md | 7 ++--- docs/configuration/extensions.md | 1 - docs/ingestion/native-batch.md | 2 +- docs/multi-stage-query/security.md | 7 ++--- docs/operations/web-console.md | 2 +- docs/tutorials/index.md | 2 +- docs/tutorials/tutorial-msq-convert-spec.md | 5 ++-- docs/tutorials/tutorial-msq-extern.md | 5 ++-- multi-stage-query/README.md | 4 +-- 10 files changed, 16 insertions(+), 51 deletions(-) diff --git a/docs/api-reference/service-status-api.md b/docs/api-reference/service-status-api.md index 1ba8b55c4d78..1c192162d5d0 100644 --- a/docs/api-reference/service-status-api.md +++ b/docs/api-reference/service-status-api.md @@ -154,36 +154,6 @@ Host: http://ROUTER_IP:ROUTER_PORT "name": "org.apache.druid.query.aggregation.datasketches.kll.KllSketchModule", "artifact": "druid-datasketches", "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.MSQExternalDataSourceModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.MSQIndexingModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.MSQDurableStorageModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.MSQServiceClientModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.MSQSqlModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" - }, - { - "name": "org.apache.druid.msq.guice.SqlTaskModule", - "artifact": "druid-multi-stage-query", - "version": "26.0.0" } ], "memory": { @@ -326,7 +296,7 @@ Host: http://ROUTER_IP:ROUTER_PORT "log4j.shutdownHookEnabled": "true", "java.vm.vendor": "Homebrew", "sun.arch.data.model": "64", - "druid.extensions.loadList": "[\"druid-hdfs-storage\", \"druid-kafka-indexing-service\", \"druid-datasketches\", \"druid-multi-stage-query\"]", + "druid.extensions.loadList": "[\"druid-hdfs-storage\", \"druid-kafka-indexing-service\", \"druid-datasketches\"]", "java.vendor.url": "https://github.com/Homebrew/homebrew-core/issues", "druid.router.coordinatorServiceName": "druid/coordinator", "user.timezone": "UTC", diff --git a/docs/api-reference/sql-ingestion-api.md b/docs/api-reference/sql-ingestion-api.md index 59942aff8e0c..9348291581e8 100644 --- a/docs/api-reference/sql-ingestion-api.md +++ b/docs/api-reference/sql-ingestion-api.md @@ -26,9 +26,8 @@ import TabItem from '@theme/TabItem'; --> :::info - This page describes SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md) - extension, new in Druid 24.0. Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which - ingestion method is right for you. + This page describes SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md). + Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you. ::: The **Query** view in the web console provides a friendly experience for the multi-stage query task engine (MSQ task engine) and multi-stage query architecture. We recommend using the web console if you don't need a programmatic interface. @@ -847,4 +846,4 @@ The response shows the ID of the task that was canceled. { "task": "query-655efe33-781a-4c50-ae84-c2911b42d63c" } -``` \ No newline at end of file +``` diff --git a/docs/configuration/extensions.md b/docs/configuration/extensions.md index 6c802739fc4b..31f1a5b62b29 100644 --- a/docs/configuration/extensions.md +++ b/docs/configuration/extensions.md @@ -50,7 +50,6 @@ Core extensions are maintained by Druid committers. |druid-kerberos|Kerberos authentication for druid processes.|[link](../development/extensions-core/druid-kerberos.md)| |druid-lookups-cached-global|A module for [lookups](../querying/lookups.md) providing a jvm-global eager caching for lookups. It provides JDBC and URI implementations for fetching lookup data.|[link](../querying/lookups-cached-global.md)| |druid-lookups-cached-single| Per lookup caching module to support the use cases where a lookup need to be isolated from the global pool of lookups |[link](../development/extensions-core/druid-lookups.md)| -|druid-multi-stage-query| Support for the multi-stage query architecture for Apache Druid and the multi-stage query task engine.|[link](../multi-stage-query/index.md)| |druid-orc-extensions|Support for data in Apache ORC data format.|[link](../development/extensions-core/orc.md)| |druid-parquet-extensions|Support for data in Apache Parquet data format. Requires druid-avro-extensions to be loaded.|[link](../development/extensions-core/parquet.md)| |druid-protobuf-extensions| Support for data in Protobuf data format.|[link](../development/extensions-core/protobuf.md)| diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md index 50eaf43366dc..986d7e977975 100644 --- a/docs/ingestion/native-batch.md +++ b/docs/ingestion/native-batch.md @@ -24,7 +24,7 @@ sidebar_label: JSON-based batch --> :::info - This page describes JSON-based batch ingestion using [ingestion specs](ingestion-spec.md). For SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md) engine, see [SQL-based ingestion](../multi-stage-query/index.md). Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you. + This page describes JSON-based batch ingestion using [ingestion specs](ingestion-spec.md). For SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md), see [SQL-based ingestion](../multi-stage-query/index.md). Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you. ::: Apache Druid supports the following types of JSON-based batch indexing tasks: diff --git a/docs/multi-stage-query/security.md b/docs/multi-stage-query/security.md index 77acafc29f51..0a50b68d4d6f 100644 --- a/docs/multi-stage-query/security.md +++ b/docs/multi-stage-query/security.md @@ -23,9 +23,9 @@ sidebar_label: Security ~ under the License. --> -All authenticated users can use the multi-stage query task engine (MSQ task engine) through the UI and API if the -extension is loaded. However, without additional permissions, users are not able to issue queries that read or write -Druid datasources or external data. The permission needed depends on what the user is trying to do. +All authenticated users can use the multi-stage query task engine (MSQ task engine) through the UI and API. However, +without additional permissions, users are not able to issue queries that read or write Druid datasources or external +data. The permission needed depends on what the user is trying to do. To submit a query: @@ -77,4 +77,3 @@ The MSQ task engine needs the following permissions for pushing, fetching, and r - `Microsoft.Storage/storageAccounts/blobServices/containers/blobs/delete` to delete files when they're no longer needed. - diff --git a/docs/operations/web-console.md b/docs/operations/web-console.md index ef1118ebc4ce..5d935106c3d5 100644 --- a/docs/operations/web-console.md +++ b/docs/operations/web-console.md @@ -65,7 +65,7 @@ You can access the [data loader](#data-loader) and [lookups view](#lookups) from ## Query -SQL-based ingestion and the multi-stage query task engine use the **Query** view, which provides you with a UI to edit and use SQL queries. You should see this UI automatically in Druid 24.0 and later since the multi-stage query extension is loaded by default. +SQL-based ingestion and the multi-stage query task engine use the **Query** view, which provides you with a UI to edit and use SQL queries. The following screenshot shows a populated enhanced **Query** view along with a description of its parts: diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index f270c1b74353..730fef78d074 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -67,7 +67,7 @@ The distribution directory contains `LICENSE` and `NOTICE` files and subdirector ## Start up Druid services Start up Druid services using the automatic single-machine configuration. -This configuration includes default settings that are appropriate for this tutorial, such as loading the `druid-multi-stage-query` extension by default so that you can use the MSQ task engine. +This configuration includes default settings that are appropriate for this tutorial. You can view the default settings in the configuration files located in `conf/druid/auto`. diff --git a/docs/tutorials/tutorial-msq-convert-spec.md b/docs/tutorials/tutorial-msq-convert-spec.md index 0d386bc06293..a8501284ca9d 100644 --- a/docs/tutorials/tutorial-msq-convert-spec.md +++ b/docs/tutorials/tutorial-msq-convert-spec.md @@ -25,9 +25,8 @@ description: How to convert an ingestion spec to a query for SQL-based ingestion --> :::info - This page describes SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md) - extension, new in Druid 24.0. Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which - ingestion method is right for you. + This page describes SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md). + Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you. ::: If you're already ingesting data with [native batch ingestion](../ingestion/native-batch.md), you can use the [web console](../operations/web-console.md) to convert the ingestion spec to a SQL query that the multi-stage query task engine can use to ingest data. diff --git a/docs/tutorials/tutorial-msq-extern.md b/docs/tutorials/tutorial-msq-extern.md index dcd0d5095980..1cb7aac89092 100644 --- a/docs/tutorials/tutorial-msq-extern.md +++ b/docs/tutorials/tutorial-msq-extern.md @@ -25,9 +25,8 @@ description: How to generate a query that references externally hosted data --> :::info - This page describes SQL-based batch ingestion using the [`druid-multi-stage-query`](../multi-stage-query/index.md) - extension, new in Druid 24.0. Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which - ingestion method is right for you. + This page describes SQL-based batch ingestion using the [multi-stage query (MSQ) task engine](../multi-stage-query/index.md). + Refer to the [ingestion methods](../ingestion/index.md#batch) table to determine which ingestion method is right for you. ::: This tutorial demonstrates how to generate a query that references externally hosted data using the **Connect external data** wizard. diff --git a/multi-stage-query/README.md b/multi-stage-query/README.md index 5b00da45f0b0..11fd12d11f22 100644 --- a/multi-stage-query/README.md +++ b/multi-stage-query/README.md @@ -17,9 +17,9 @@ ~ under the License. --> -# `druid-multi-stage-query` developer notes +# Multi-stage query developer notes -This document provides developer notes for the major packages of the `druid-multi-stage-query` extension. It does not +This document provides developer notes for the major packages of the multi-stage query module. It does not discuss future plans; these are discussed on the list or in GitHub issues. ## Model From 50ce46501f2e823598087b2999d3a900e9cc4bc1 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 22 May 2026 12:07:25 -0700 Subject: [PATCH 03/12] refactor: remove deprecated zookeeper-based task runner (#19500) --- docs/configuration/index.md | 23 +- docs/design/zookeeper.md | 3 +- docs/development/extensions-core/k8s-jobs.md | 2 +- .../KubernetesAndWorkerTaskRunnerConfig.java | 9 +- .../overlord/KubernetesOverlordModule.java | 15 +- ...bernetesAndWorkerTaskRunnerConfigTest.java | 2 +- .../KubernetesOverlordModuleTest.java | 59 +- .../kubernetesAndWorkerTaskRunnerConfig.json | 4 +- indexing-service/pom.xml | 12 - .../guice/IndexingServiceModuleHelper.java | 4 - .../overlord/ImmutableWorkerInfo.java | 3 +- .../indexing/overlord/RemoteTaskRunner.java | 1673 ----------------- .../overlord/RemoteTaskRunnerFactory.java | 99 - .../druid/indexing/overlord/ZkWorker.java | 271 --- .../config/HttpRemoteTaskRunnerConfig.java | 77 +- .../config/RemoteTaskRunnerConfig.java | 202 -- .../overlord/hrtr/HttpRemoteTaskRunner.java | 96 - .../hrtr/HttpRemoteTaskRunnerFactory.java | 23 - .../overlord/setup/WorkerSelectStrategy.java | 3 +- .../worker/WorkerCuratorCoordinator.java | 226 --- .../indexing/worker/WorkerTaskManager.java | 28 +- .../indexing/worker/WorkerTaskMonitor.java | 226 --- .../indexing/worker/http/WorkerResource.java | 37 +- .../initialization/IndexerZkConfig.java | 136 -- .../overlord/OverlordBlinkLeadershipTest.java | 93 - .../overlord/RemoteTaskRunnerFactoryTest.java | 72 - ...kRunnerRunPendingTasksConcurrencyTest.java | 179 -- .../overlord/RemoteTaskRunnerTest.java | 1303 ------------- .../overlord/RemoteTaskRunnerTestUtils.java | 307 --- .../indexing/overlord/TaskLockConfigTest.java | 3 +- .../indexing/overlord/TaskQueueTest.java | 5 - .../overlord/TestRemoteTaskRunnerConfig.java | 84 - .../druid/indexing/overlord/ZkWorkerTest.java | 148 -- ...dingTaskBasedProvisioningStrategyTest.java | 178 +- .../SimpleProvisioningStrategyTest.java | 116 +- .../config/RemoteTaskRunnerConfigTest.java | 880 --------- .../hrtr/HttpRemoteTaskRunnerTest.java | 31 - ...nWithAffinityWorkerSelectStrategyTest.java | 16 +- ...hCategorySpecWorkerSelectStrategyTest.java | 10 +- ...lDistributionWorkerSelectStrategyTest.java | 22 +- ...yWithAffinityWorkerSelectStrategyTest.java | 16 +- ...hCategorySpecWorkerSelectStrategyTest.java | 10 +- .../JavaScriptWorkerSelectStrategyTest.java | 19 +- .../worker/WorkerTaskManagerTest.java | 13 +- .../worker/WorkerTaskMonitorTest.java | 406 ---- .../worker/http/WorkerResourceTest.java | 115 +- .../initialization/IndexerZkConfigTest.java | 278 --- .../druid/msq/rpc/ControllerResource.java | 4 +- .../druid/guice/StartupInjectorBuilder.java | 23 + .../guice/StartupInjectorBuilderTest.java | 28 + .../discovery/CuratorServiceUtils.java | 14 +- .../curator/discovery/DiscoveryModule.java | 9 - .../discovery/ServerDiscoveryFactory.java | 91 - .../discovery/ServerDiscoverySelector.java | 146 -- .../apache/druid/guice/AnnouncerModule.java | 13 - .../annotations/DirectExecutorAnnouncer.java | 34 - ...nlineSchemaDataSourceCompactionConfig.java | 4 +- .../ServerDiscoverySelectorTest.java | 237 --- .../java/org/apache/druid/cli/CliIndexer.java | 5 +- .../apache/druid/cli/CliMiddleManager.java | 25 +- .../org/apache/druid/cli/CliOverlord.java | 6 - 61 files changed, 389 insertions(+), 7787 deletions(-) delete mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunner.java delete mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactory.java delete mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/overlord/ZkWorker.java delete mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfig.java delete mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerCuratorCoordinator.java delete mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskMonitor.java delete mode 100644 indexing-service/src/main/java/org/apache/druid/server/initialization/IndexerZkConfig.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/overlord/OverlordBlinkLeadershipTest.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactoryTest.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerRunPendingTasksConcurrencyTest.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTestUtils.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestRemoteTaskRunnerConfig.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/overlord/ZkWorkerTest.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfigTest.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java delete mode 100644 indexing-service/src/test/java/org/apache/druid/server/initialization/IndexerZkConfigTest.java delete mode 100644 server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoveryFactory.java delete mode 100644 server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoverySelector.java delete mode 100644 server/src/main/java/org/apache/druid/guice/annotations/DirectExecutorAnnouncer.java delete mode 100644 server/src/test/java/org/apache/druid/curator/discovery/ServerDiscoverySelectorTest.java diff --git a/docs/configuration/index.md b/docs/configuration/index.md index f0b80523c401..12a7cd387dcd 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -156,18 +156,6 @@ Druid interacts with ZooKeeper through a set of standard path configurations. We |`druid.zk.paths.base`|Base ZooKeeper path.|`/druid`| |`druid.zk.paths.coordinatorPath`|Used by the Coordinator for leader election.|`${druid.zk.paths.base}/coordinator`| -The indexing service also uses its own set of paths. These configs can be included in the common configuration. - -|Property|Description|Default| -|--------|-----------|-------| -|`druid.zk.paths.indexer.base`|Base ZooKeeper path for |`${druid.zk.paths.base}/indexer`| -|`druid.zk.paths.indexer.announcementsPath`|Middle Managers announce themselves here.|`${druid.zk.paths.indexer.base}/announcements`| -|`druid.zk.paths.indexer.tasksPath`|Used to assign tasks to Middle Managers.|`${druid.zk.paths.indexer.base}/tasks`| -|`druid.zk.paths.indexer.statusPath`|Parent path for announcement of task statuses.|`${druid.zk.paths.indexer.base}/status`| - -If `druid.zk.paths.base` and `druid.zk.paths.indexer.base` are both set, and none of the other `druid.zk.paths.*` or `druid.zk.paths.indexer.*` values are set, then the other properties will be evaluated relative to their respective `base`. -For example, if `druid.zk.paths.base` is set to `/druid1` and `druid.zk.paths.indexer.base` is set to `/druid2` then `druid.zk.paths.coordinatorPath` will default to `/druid1/coordinator` while `druid.zk.paths.indexer.announcementsPath` will default to `/druid2/announcements`. - The following path is used for service discovery. It is **not** affected by `druid.zk.paths.base` and **must** be specified separately. |Property|Description|Default| @@ -966,7 +954,7 @@ These Overlord static configurations can be defined in the `overlord/runtime.pro |Property|Description|Default| |--------|-----------|-------| -|`druid.indexer.runner.type`|Indicates whether tasks should be run locally using `local` or in a distributed environment using `remote`. The recommended option is `httpRemote`, which is similar to `remote` but uses HTTP to interact with Middle Managers instead of ZooKeeper.|`httpRemote`| +|`druid.indexer.runner.type`|Indicates whether tasks should be run locally using `local` or in a distributed environment using `httpRemote`. `httpRemote` is recommended for distributed deployments and uses HTTP to interact with Middle Managers.|`httpRemote`| |`druid.indexer.server.maxConcurrentActions`|Maximum number of concurrent action requests (such as getting locks, creating segments, fetching segments etc) that the Overlord will process simultaneously. This prevents thread exhaustion while preserving access to health check endpoints. Set to `0` to disable quality of service filtering entirely. If not specified, defaults to `max(1, max(serverHttpNumThreads - 4, serverHttpNumThreads * 0.8))`.|`max(1, max(serverHttpNumThreads - 4, serverHttpNumThreads * 0.8))`| |`druid.indexer.storage.type`|Indicates whether incoming tasks should be stored locally (in heap) or in metadata storage. One of `local` or `metadata`. `local` is mainly for internal testing while `metadata` is recommended in production because storing incoming tasks in metadata storage allows for tasks to be resumed if the Overlord should fail.|`local`| |`druid.indexer.storage.recentlyFinishedThreshold`|Duration of time to store task results. Default is 24 hours. If you have hundreds of tasks running in a day, consider increasing this threshold.|`PT24H`| @@ -981,17 +969,14 @@ These Overlord static configurations can be defined in the `overlord/runtime.pro |`druid.indexer.queue.storageSyncRate`|Sync Overlord state this often with an underlying task persistence mechanism.|`PT1M`| |`druid.indexer.queue.maxTaskPayloadSize`|Maximum allowed size in bytes of a single task payload accepted by the Overlord.|none (allow all task payload sizes)| -The following configs only apply if the Overlord is running in remote mode. For a description of local vs. remote mode, see [Overlord service](../design/overlord.md). +The following configs apply when the Overlord is running with the `httpRemote` runner. For a description of local vs. distributed mode, see [Overlord service](../design/overlord.md). |Property|Description|Default| |--------|-----------|-------| |`druid.indexer.runner.taskAssignmentTimeout`|How long to wait after a task has been assigned to a Middle Manager before throwing an error.|`PT5M`| |`druid.indexer.runner.minWorkerVersion`|The minimum Middle Manager version to send tasks to. The version number is a string. This affects the expected behavior during certain operations like comparison against `druid.worker.version`. Specifically, the version comparison follows dictionary order. Use ISO8601 date format for the version to accommodate date comparisons. |"0"| |`druid.indexer.runner.parallelIndexTaskSlotRatio`| The ratio of task slots available for parallel indexing supervisor tasks per worker. The specified value must be in the range `[0, 1]`. |1| -|`druid.indexer.runner.compressZnodes`|Indicates whether or not the Overlord should expect Middle Managers to compress Znodes.|true| -|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in ZooKeeper, should be in the range of `[10KiB, 2GiB)`. [Human-readable format](human-readable-byte.md) is supported.| 512 KiB | -|`druid.indexer.runner.taskCleanupTimeout`|How long to wait before failing a task after a Middle Manager is disconnected from ZooKeeper.|`PT15M`| -|`druid.indexer.runner.taskShutdownLinkTimeout`|How long to wait on a shutdown request to a Middle Manager before timing out|`PT1M`| +|`druid.indexer.runner.taskCleanupTimeout`|How long to wait before failing a task after a Middle Manager is disconnected.|`PT15M`| |`druid.indexer.runner.pendingTasksRunnerNumThreads`|Number of threads to allocate pending-tasks to workers, must be at least 1.|1| |`druid.indexer.runner.maxRetriesBeforeBlacklist`|Number of consecutive times the Middle Manager can fail tasks, before the worker is blacklisted, must be at least 1|5| |`druid.indexer.runner.workerBlackListBackoffTime`|How long to wait before a task is whitelisted again. This value should be greater that the value set for taskBlackListCleanupPeriod.|`PT15M`| @@ -1322,12 +1307,10 @@ Middle Managers pass their configurations down to their child peons. The Middle |Property|Description|Default| |--------|-----------|-------| |`druid.indexer.runner.allowedPrefixes`|Whitelist of prefixes for configs that can be passed down to child peons.|`com.metamx`, `druid`, `org.apache.druid`, `user.timezone`, `file.encoding`, `java.io.tmpdir`, `hadoop`| -|`druid.indexer.runner.compressZnodes`|Indicates whether or not the Middle Managers should compress Znodes.|true| |`druid.indexer.runner.classpath`|Java classpath for the peon.|`System.getProperty("java.class.path")`| |`druid.indexer.runner.javaCommand`|Command required to execute java.|java| |`druid.indexer.runner.javaOpts`|_DEPRECATED_ A string of -X Java options to pass to the peon's JVM. Quotable parameters or parameters with spaces are encouraged to use javaOptsArray|`''`| |`druid.indexer.runner.javaOptsArray`|A JSON array of strings to be passed in as options to the peon's JVM. This is additive to `druid.indexer.runner.javaOpts` and is recommended for properly handling arguments which contain quotes or spaces like `["-XX:OnOutOfMemoryError=kill -9 %p"]`|`[]`| -|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in ZooKeeper, should be in the range of [10KiB, 2GiB). [Human-readable format](human-readable-byte.md) is supported.|512KiB| |`druid.indexer.runner.startPort`|Starting port used for Peon services, should be greater than 1023 and less than 65536.|8100| |`druid.indexer.runner.endPort`|Ending port used for Peon services, should be greater than or equal to `druid.indexer.runner.startPort` and less than 65536.|65535| |`druid.indexer.runner.ports`|A JSON array of integers to specify ports that used for Peon services. If provided and non-empty, ports for Peon services will be chosen from these ports. And `druid.indexer.runner.startPort/druid.indexer.runner.endPort` will be completely ignored.|`[]`| diff --git a/docs/design/zookeeper.md b/docs/design/zookeeper.md index ca64e1a0d5bc..d69ba92f0a1c 100644 --- a/docs/design/zookeeper.md +++ b/docs/design/zookeeper.md @@ -36,9 +36,8 @@ The operations that happen over ZK are: 1. [Coordinator](../design/coordinator.md) leader election 2. [Overlord](../design/overlord.md) leader election 3. Service (node) announcement and discovery — services announce their presence so other services can find them -4. [Overlord](../design/overlord.md) and [Middle Manager](../design/middlemanager.md) task management -Segment loading, dropping, and discovery no longer use ZooKeeper — they are served over HTTP. +Segment loading, segment discovery, and Overlord ↔ Middle Manager task management no longer use ZooKeeper — they are served over HTTP. ## Coordinator leader election diff --git a/docs/development/extensions-core/k8s-jobs.md b/docs/development/extensions-core/k8s-jobs.md index 67be33522ef1..b65a7bb496bd 100644 --- a/docs/development/extensions-core/k8s-jobs.md +++ b/docs/development/extensions-core/k8s-jobs.md @@ -1019,7 +1019,7 @@ To do this, set the following property. |Property| Possible Values |Description|Default|required| |--------|-----------------|-----------|-------|--------| |`druid.indexer.runner.k8sAndWorker.runnerStrategy.type`| `String` (e.g., `k8s`, `worker`, `taskType`)| Defines the strategy for task runner selection. |`k8s`|No| -|`druid.indexer.runner.k8sAndWorker.runnerStrategy.workerType`| `String` (e.g., `httpRemote`, `remote`)| Specifies the variant of the worker task runner to be utilized.|`httpRemote`|No| +|`druid.indexer.runner.k8sAndWorker.runnerStrategy.workerType`| `String` (e.g., `httpRemote`)| Specifies the variant of the worker task runner to be utilized.|`httpRemote`|No| | **For `taskType` runner strategy:**||||| |`druid.indexer.runner.k8sAndWorker.runnerStrategy.taskType.default`| `String` (e.g., `k8s`, `worker`) | Specifies the default runner to use if no overrides apply. This setting ensures there is always a fallback runner available.|None|No| |`druid.indexer.runner.k8sAndWorker.runnerStrategy.taskType.overrides`| `JsonObject`(e.g., `{"index_kafka": "worker"}`)| Defines task-specific overrides for runner types. Each entry sets a task type to a specific runner, allowing fine control. |`{}`|No| diff --git a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java index 89311981b0e2..9ec20045361c 100644 --- a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java +++ b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfig.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import org.apache.commons.lang3.ObjectUtils; -import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory; import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunnerFactory; import javax.annotation.Nullable; @@ -51,11 +50,9 @@ public KubernetesAndWorkerTaskRunnerConfig( this.runnerStrategy = ObjectUtils.getIfNull(runnerStrategy, KubernetesTaskRunnerFactory.TYPE_NAME); this.workerType = ObjectUtils.getIfNull(workerType, HttpRemoteTaskRunnerFactory.TYPE_NAME); Preconditions.checkArgument( - this.workerType.equals(HttpRemoteTaskRunnerFactory.TYPE_NAME) || - this.workerType.equals(RemoteTaskRunnerFactory.TYPE_NAME), - "workerType must be set to one of (%s, %s)", - HttpRemoteTaskRunnerFactory.TYPE_NAME, - RemoteTaskRunnerFactory.TYPE_NAME + this.workerType.equals(HttpRemoteTaskRunnerFactory.TYPE_NAME), + "workerType must be set to [%s]; the ZooKeeper-based 'remote' worker type has been removed.", + HttpRemoteTaskRunnerFactory.TYPE_NAME ); } diff --git a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java index 6e82bb8766ff..b45aa6fb846d 100644 --- a/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java +++ b/extensions-core/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesOverlordModule.java @@ -44,7 +44,6 @@ import org.apache.druid.guice.annotations.Self; import org.apache.druid.guice.annotations.Smile; import org.apache.druid.indexing.common.config.TaskConfig; -import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory; import org.apache.druid.indexing.overlord.TaskRunnerFactory; import org.apache.druid.indexing.overlord.WorkerTaskRunner; import org.apache.druid.indexing.overlord.config.TaskQueueConfig; @@ -264,10 +263,10 @@ public void stop() } /** - * Provides a TaskRunnerFactory instance suitable for environments without Zookeeper. - * In such environments, the standard RemoteTaskRunnerFactory may not be operational. - * Depending on the workerType defined in KubernetesAndWorkerTaskRunnerConfig, - * this method selects and returns an appropriate TaskRunnerFactory implementation. + * Provides the worker-side {@link TaskRunnerFactory} that the {@code k8sAndWorker} runner pairs + * with {@link KubernetesTaskRunnerFactory}. Only {@link HttpRemoteTaskRunnerFactory} is + * supported; the ZooKeeper-based 'remote' worker type was removed, and + * {@link KubernetesAndWorkerTaskRunnerConfig} enforces this at config-validation time. */ @Provides @LazySingleton @@ -277,10 +276,8 @@ TaskRunnerFactory provideWorkerTaskRunner( Injector injector ) { - String workerType = runnerConfig.getWorkerType(); - return HttpRemoteTaskRunnerFactory.TYPE_NAME.equals(workerType) - ? injector.getInstance(HttpRemoteTaskRunnerFactory.class) - : injector.getInstance(RemoteTaskRunnerFactory.class); + // workerType is validated to be HttpRemoteTaskRunnerFactory.TYPE_NAME by the config. + return injector.getInstance(HttpRemoteTaskRunnerFactory.class); } /** diff --git a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java index 329a1ea52bce..5338ad2ebb9b 100644 --- a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java +++ b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesAndWorkerTaskRunnerConfigTest.java @@ -38,7 +38,7 @@ public void test_deserializable() throws IOException ); Assertions.assertEquals("worker", config.getRunnerStrategy()); - Assertions.assertEquals("remote", config.getWorkerType()); + Assertions.assertEquals("httpRemote", config.getWorkerType()); } @Test diff --git a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java index e37313ebb0fb..55e5103567b6 100644 --- a/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java +++ b/extensions-core/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesOverlordModuleTest.java @@ -35,7 +35,6 @@ import org.apache.druid.guice.annotations.EscalatedGlobal; import org.apache.druid.guice.annotations.Self; import org.apache.druid.indexing.common.config.TaskConfig; -import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory; import org.apache.druid.indexing.overlord.TaskRunnerFactory; import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunnerFactory; import org.apache.druid.jackson.JacksonModule; @@ -77,8 +76,6 @@ public class KubernetesOverlordModuleTest @Mock private HttpClient httpClient; @Mock - private RemoteTaskRunnerFactory remoteTaskRunnerFactory; - @Mock private HttpRemoteTaskRunnerFactory httpRemoteTaskRunnerFactory; @Mock private ConfigManagerConfig configManagerConfig; @@ -111,7 +108,7 @@ public void setUpConfigManagerMock() @Test public void testDefaultHttpRemoteTaskRunnerFactoryBindSuccessfully() { - injector = makeInjectorWithProperties(initializePropertes(false), false, true); + injector = makeInjectorWithProperties(initializePropertes(), true); KubernetesAndWorkerTaskRunnerFactory taskRunnerFactory = injector.getInstance( KubernetesAndWorkerTaskRunnerFactory.class); Assertions.assertNotNull(taskRunnerFactory); @@ -122,32 +119,21 @@ public void testDefaultHttpRemoteTaskRunnerFactoryBindSuccessfully() @Test public void testMultipleKubernetesTaskRunnerFactoryBindSuccessfully() { - final Properties props = initializePropertes(false); + final Properties props = initializePropertes(); props.setProperty("druid.indexer.runner.type", MultipleKubernetesTaskRunnerFactory.TYPE_NAME); props.setProperty("druid.indexer.runner.clusters[0].taskNamespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); final TaskRunnerFactory taskRunnerFactory = injector.getInstance(TaskRunnerFactory.class); Assertions.assertInstanceOf(MultipleKubernetesTaskRunnerFactory.class, taskRunnerFactory); } - @Test - public void testRemoteTaskRunnerFactoryBindSuccessfully() - { - injector = makeInjectorWithProperties(initializePropertes(true), true, false); - KubernetesAndWorkerTaskRunnerFactory taskRunnerFactory = injector.getInstance( - KubernetesAndWorkerTaskRunnerFactory.class); - Assertions.assertNotNull(taskRunnerFactory); - - Assertions.assertNotNull(taskRunnerFactory.build()); - } - @Test public void testExceptionThrownIfNoTaskRunnerFactoryBind() { Assertions.assertThrows(ProvisionException.class, () -> { - injector = makeInjectorWithProperties(initializePropertes(false), false, false); + injector = makeInjectorWithProperties(initializePropertes(), false); injector.getInstance(KubernetesAndWorkerTaskRunnerFactory.class); }); } @@ -159,7 +145,7 @@ public void test_build_withMultiContainerAdapterType_returnsWithMultiContainerTa props.setProperty("druid.indexer.runner.k8s.adapter.type", "overlordMultiContainer"); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); TaskAdapter taskAdapter = injector.getInstance( TaskAdapter.class); @@ -173,7 +159,7 @@ public void test_build_withSingleContainerAdapterType_returnsKubernetesTaskRunne Properties props = new Properties(); props.setProperty("druid.indexer.runner.k8s.adapter.type", "overlordSingleContainer"); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); TaskAdapter taskAdapter = injector.getInstance( TaskAdapter.class); @@ -188,7 +174,7 @@ public void test_build_withSingleContainerAdapterTypeAndSidecarSupport_throwsPro props.setProperty("druid.indexer.runner.k8s.adapter.type", "overlordSingleContainer"); props.setProperty("druid.indexer.runner.sidecarSupport", "true"); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); Assertions.assertThrows( ProvisionException.class, @@ -203,7 +189,7 @@ public void test_build_withSidecarSupport_returnsKubernetesTaskRunnerWithMultiCo Properties props = new Properties(); props.setProperty("druid.indexer.runner.sidecarSupport", "true"); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); TaskAdapter adapter = injector.getInstance(TaskAdapter.class); @@ -218,7 +204,7 @@ public void test_build_withoutSidecarSupport_returnsKubernetesTaskRunnerWithSing Properties props = new Properties(); props.setProperty("druid.indexer.runner.sidecarSupport", "false"); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); TaskAdapter adapter = injector.getInstance(TaskAdapter.class); @@ -235,7 +221,7 @@ public void test_build_withPodTemplateAdapterType_returnsKubernetesTaskRunnerWit props.setProperty("druid.indexer.runner.k8s.adapter.type", "customTemplateAdapter"); props.setProperty("druid.indexer.runner.k8s.podTemplate.base", url.getPath()); props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); TaskAdapter adapter = injector.getInstance(TaskAdapter.class); @@ -251,7 +237,7 @@ public void test_httpClientFactory_defaultsToVertx() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); // Don't set httpClientType - should default to vertx - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class); Assertions.assertNotNull(factory); @@ -266,7 +252,7 @@ public void test_httpClientFactory_okhttpSelection() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "okhttp"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class); Assertions.assertNotNull(factory); @@ -281,7 +267,7 @@ public void test_httpClientFactory_vertxExplicitSelection() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "vertx"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class); Assertions.assertNotNull(factory); @@ -296,7 +282,7 @@ public void test_httpClientFactory_jdkSelection() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "javaStandardHttp"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); DruidKubernetesHttpClientFactory factory = injector.getInstance(DruidKubernetesHttpClientFactory.class); Assertions.assertNotNull(factory); @@ -312,7 +298,7 @@ public void test_httpClientFactory_invalidTypeThrowsException() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); props.setProperty("druid.indexer.runner.k8sAndWorker.http.httpClientType", "invalid"); - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); injector.getInstance(DruidKubernetesHttpClientFactory.class); }); } @@ -324,7 +310,7 @@ public void test_druidKubernetesClient_createdWithVertxClient() props.setProperty("druid.indexer.runner.namespace", "NAMESPACE"); // Don't set httpClientType - should default to vertx - injector = makeInjectorWithProperties(props, false, true); + injector = makeInjectorWithProperties(props, true); DruidKubernetesClient client = injector.getInstance(DruidKubernetesClient.class); Assertions.assertNotNull(client, "DruidKubernetesClient should be created successfully"); @@ -333,8 +319,7 @@ public void test_druidKubernetesClient_createdWithVertxClient() private Injector makeInjectorWithProperties( final Properties props, - boolean isWorkerTypeRemote, - boolean isWorkerTypeHttpRemote + boolean bindHttpRemoteTaskRunnerFactory ) { return Guice.createInjector( @@ -350,10 +335,7 @@ private Injector makeInjectorWithProperties( binder.bind(DruidNode.class) .annotatedWith(Self.class) .toInstance(new DruidNode("test-inject", null, false, null, null, true, false)); - if (isWorkerTypeRemote) { - binder.bind(RemoteTaskRunnerFactory.class).toInstance(remoteTaskRunnerFactory); - } - if (isWorkerTypeHttpRemote) { + if (bindHttpRemoteTaskRunnerFactory) { binder.bind(HttpRemoteTaskRunnerFactory.class).toInstance(httpRemoteTaskRunnerFactory); } binder.bind( @@ -374,14 +356,11 @@ private Injector makeInjectorWithProperties( )); } - private static Properties initializePropertes(boolean isWorkerTypeRemote) + private static Properties initializePropertes() { final Properties props = new Properties(); props.put("druid.indexer.runner.namespace", "NAMESPACE"); props.put("druid.indexer.runner.k8sAndWorker.runnerStrategy.type", "k8s"); - if (isWorkerTypeRemote) { - props.put("druid.indexer.runner.k8sAndWorker.runnerStrategy.workerType", "remote"); - } return props; } } diff --git a/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json b/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json index 43e7414f11f8..de09ff0ee209 100644 --- a/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json +++ b/extensions-core/kubernetes-overlord-extensions/src/test/resources/kubernetesAndWorkerTaskRunnerConfig.json @@ -1,4 +1,4 @@ { "runnerStrategy.type": "worker", - "runnerStrategy.workerType": "remote" -} \ No newline at end of file + "runnerStrategy.workerType": "httpRemote" +} diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml index 42e117203649..e6cc7d787c10 100644 --- a/indexing-service/pom.xml +++ b/indexing-service/pom.xml @@ -92,14 +92,6 @@ io.netty netty - - org.apache.zookeeper - zookeeper - - - org.apache.zookeeper - zookeeper-jute - com.fasterxml.jackson.core jackson-core @@ -112,10 +104,6 @@ com.google.guava guava - - org.apache.curator - curator-recipes - jakarta.validation jakarta.validation-api diff --git a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java index cc3732439d8a..da60043c6780 100644 --- a/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java +++ b/indexing-service/src/main/java/org/apache/druid/guice/IndexingServiceModuleHelper.java @@ -22,8 +22,6 @@ import com.google.inject.Binder; import org.apache.druid.indexing.overlord.config.ForkingTaskRunnerConfig; import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.server.initialization.IndexerZkConfig; /** */ @@ -34,8 +32,6 @@ public class IndexingServiceModuleHelper public static void configureTaskRunnerConfigs(Binder binder) { JsonConfigProvider.bind(binder, INDEXER_RUNNER_PROPERTY_PREFIX, ForkingTaskRunnerConfig.class); - JsonConfigProvider.bind(binder, INDEXER_RUNNER_PROPERTY_PREFIX, RemoteTaskRunnerConfig.class); JsonConfigProvider.bind(binder, INDEXER_RUNNER_PROPERTY_PREFIX, HttpRemoteTaskRunnerConfig.class); - JsonConfigProvider.bind(binder, "druid.zk.paths.indexer", IndexerZkConfig.class); } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java index cd911ed99811..76a8385d7095 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ImmutableWorkerInfo.java @@ -105,7 +105,8 @@ public ImmutableWorkerInfo( } /** - * Helper used by {@link ZkWorker} and {@link org.apache.druid.indexing.overlord.hrtr.WorkerHolder}. + * Helper used by {@link org.apache.druid.indexing.overlord.hrtr.WorkerHolder} to build a worker view from a set of + * task announcements. */ public static ImmutableWorkerInfo fromWorkerAnnouncements( final Worker worker, diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunner.java deleted file mode 100644 index 4018701d447f..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunner.java +++ /dev/null @@ -1,1673 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Optional; -import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import com.google.common.base.Stopwatch; -import com.google.common.base.Supplier; -import com.google.common.base.Throwables; -import com.google.common.collect.Collections2; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListenableScheduledFuture; -import com.google.common.util.concurrent.ListeningScheduledExecutorService; -import com.google.common.util.concurrent.MoreExecutors; -import com.google.common.util.concurrent.SettableFuture; -import org.apache.commons.lang3.mutable.MutableInt; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.recipes.cache.PathChildrenCache; -import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; -import org.apache.curator.utils.ZKPaths; -import org.apache.druid.concurrent.LifecycleLock; -import org.apache.druid.curator.CuratorUtils; -import org.apache.druid.curator.cache.PathChildrenCacheFactory; -import org.apache.druid.indexer.RunnerTaskState; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskState; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.task.IndexTaskUtils; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningService; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningStrategy; -import org.apache.druid.indexing.overlord.autoscaling.ScalingStats; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; -import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; -import org.apache.druid.indexing.overlord.setup.WorkerSelectStrategy; -import org.apache.druid.indexing.worker.TaskAnnouncement; -import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.Pair; -import org.apache.druid.java.util.common.RE; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.common.lifecycle.LifecycleStart; -import org.apache.druid.java.util.common.lifecycle.LifecycleStop; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; -import org.apache.druid.java.util.http.client.HttpClient; -import org.apache.druid.java.util.http.client.Request; -import org.apache.druid.java.util.http.client.response.InputStreamResponseHandler; -import org.apache.druid.java.util.http.client.response.StatusResponseHandler; -import org.apache.druid.java.util.http.client.response.StatusResponseHolder; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.tasklogs.TaskLogStreamer; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.jboss.netty.handler.codec.http.HttpMethod; -import org.jboss.netty.handler.codec.http.HttpResponseStatus; -import org.joda.time.Duration; -import org.joda.time.Period; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executor; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -/** - * The RemoteTaskRunner's primary responsibility is to assign tasks to worker nodes. - * The RemoteTaskRunner uses Zookeeper to keep track of which workers are running which tasks. Tasks are assigned by - * creating ephemeral nodes in ZK that workers must remove. Workers announce the statuses of the tasks they are running. - * Once a task completes, it is up to the RTR to remove the task status and run any necessary cleanup. - * The RemoteTaskRunner is event driven and updates state according to ephemeral node changes in ZK. - *

- * The RemoteTaskRunner will assign tasks to a node until the node hits capacity. At that point, task assignment will - * fail. The RemoteTaskRunner depends on another component to create additional worker resources. - *

- * If a worker node becomes inexplicably disconnected from Zk, the RemoteTaskRunner will fail any tasks associated with the - * worker after waiting for RemoteTaskRunnerConfig.taskCleanupTimeout for the worker to show up. - *

- * The RemoteTaskRunner uses ZK for job management and assignment and http for IPC messages. - */ -public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer -{ - private static final EmittingLogger log = new EmittingLogger(RemoteTaskRunner.class); - private static final Joiner JOINER = Joiner.on("/"); - - private final ObjectMapper jsonMapper; - private final RemoteTaskRunnerConfig config; - private final Duration shutdownTimeout; - private final IndexerZkConfig indexerZkConfig; - private final CuratorFramework cf; - private final PathChildrenCacheFactory workerStatusPathChildrenCacheFactory; - private final ExecutorService workerStatusPathChildrenCacheExecutor; - private final PathChildrenCache workerPathCache; - private final HttpClient httpClient; - private final Supplier workerConfigRef; - - // all workers that exist in ZK - private final ConcurrentMap zkWorkers = new ConcurrentHashMap<>(); - // payloads of pending tasks, which we remember just long enough to assign to workers - private final ConcurrentMap pendingTaskPayloads = new ConcurrentHashMap<>(); - // tasks that have not yet been assigned to a worker - private final RemoteTaskRunnerWorkQueue pendingTasks = new RemoteTaskRunnerWorkQueue(); - // all tasks that have been assigned to a worker - private final RemoteTaskRunnerWorkQueue runningTasks = new RemoteTaskRunnerWorkQueue(); - // tasks that are complete but not cleaned up yet - private final RemoteTaskRunnerWorkQueue completeTasks = new RemoteTaskRunnerWorkQueue(); - - private final ExecutorService runPendingTasksExec; - - // Workers that have been marked as lazy. these workers are not running any tasks and can be terminated safely by the scaling policy. - private final ConcurrentMap lazyWorkers = new ConcurrentHashMap<>(); - - // Workers that have been blacklisted. - private final Set blackListedWorkers = Collections.synchronizedSet(new HashSet<>()); - - // task runner listeners - private final CopyOnWriteArrayList> listeners = new CopyOnWriteArrayList<>(); - - // workers which were assigned a task and are yet to acknowledge same. - // Map: workerId -> taskId - private final ConcurrentMap workersWithUnacknowledgedTask = new ConcurrentHashMap<>(); - // Map: taskId -> taskId .tasks which are being tried to be assigned to a worker - private final ConcurrentMap tryAssignTasks = new ConcurrentHashMap<>(); - - private final Object statusLock = new Object(); - - private final LifecycleLock lifecycleLock = new LifecycleLock(); - - private final ListeningScheduledExecutorService cleanupExec; - - private final ConcurrentMap removedWorkerCleanups = new ConcurrentHashMap<>(); - private final ProvisioningStrategy provisioningStrategy; - private final ServiceEmitter emitter; - private ProvisioningService provisioningService; - - public RemoteTaskRunner( - ObjectMapper jsonMapper, - RemoteTaskRunnerConfig config, - IndexerZkConfig indexerZkConfig, - CuratorFramework cf, - PathChildrenCacheFactory.Builder pathChildrenCacheFactory, - HttpClient httpClient, - Supplier workerConfigRef, - ProvisioningStrategy provisioningStrategy, - ServiceEmitter emitter - ) - { - this.jsonMapper = jsonMapper; - this.config = config; - this.shutdownTimeout = config.getTaskShutdownLinkTimeout().toStandardDuration(); // Fail fast - this.indexerZkConfig = indexerZkConfig; - this.cf = cf; - this.workerPathCache = pathChildrenCacheFactory.build().make(cf, indexerZkConfig.getAnnouncementsPath()); - this.workerStatusPathChildrenCacheExecutor = PathChildrenCacheFactory.Builder.createDefaultExecutor(); - this.workerStatusPathChildrenCacheFactory = pathChildrenCacheFactory - .withExecutorService(workerStatusPathChildrenCacheExecutor) - .withShutdownExecutorOnClose(false) - .build(); - this.httpClient = httpClient; - this.workerConfigRef = workerConfigRef; - this.cleanupExec = MoreExecutors.listeningDecorator( - ScheduledExecutors.fixed(1, "RemoteTaskRunner-Scheduled-Cleanup--%d") - ); - this.provisioningStrategy = provisioningStrategy; - this.runPendingTasksExec = Execs.multiThreaded( - config.getPendingTasksRunnerNumThreads(), - "rtr-pending-tasks-runner-%d" - ); - this.emitter = emitter; - } - - @Override - @LifecycleStart - public void start() - { - if (!lifecycleLock.canStart()) { - return; - } - try { - log.info("Starting RemoteTaskRunner..."); - final MutableInt waitingFor = new MutableInt(1); - final Object waitingForMonitor = new Object(); - - // Add listener for creation/deletion of workers - workerPathCache.getListenable().addListener( - (client, event) -> { - final Worker worker; - switch (event.getType()) { - case CHILD_ADDED: - worker = jsonMapper.readValue( - event.getData().getData(), - Worker.class - ); - synchronized (waitingForMonitor) { - waitingFor.increment(); - } - Futures.addCallback( - addWorker(worker), - new FutureCallback<>() - { - @Override - public void onSuccess(ZkWorker zkWorker) - { - synchronized (waitingForMonitor) { - waitingFor.decrement(); - waitingForMonitor.notifyAll(); - } - } - - @Override - public void onFailure(Throwable throwable) - { - synchronized (waitingForMonitor) { - waitingFor.decrement(); - waitingForMonitor.notifyAll(); - } - } - }, - MoreExecutors.directExecutor() - ); - break; - case CHILD_UPDATED: - worker = jsonMapper.readValue( - event.getData().getData(), - Worker.class - ); - updateWorker(worker); - break; - - case CHILD_REMOVED: - worker = jsonMapper.readValue( - event.getData().getData(), - Worker.class - ); - removeWorker(worker); - break; - case INITIALIZED: - // Schedule cleanup for task status of the workers that might have disconnected while overlord was not running - List workers; - try { - workers = cf.getChildren().forPath(indexerZkConfig.getStatusPath()); - } - catch (KeeperException.NoNodeException e) { - // statusPath doesn't exist yet; can occur if no middleManagers have started. - workers = ImmutableList.of(); - } - for (String workerId : workers) { - final String workerAnnouncePath = JOINER.join(indexerZkConfig.getAnnouncementsPath(), workerId); - final String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId); - if (!zkWorkers.containsKey(workerId) && cf.checkExists().forPath(workerAnnouncePath) == null) { - try { - scheduleTasksCleanupForWorker(workerId, cf.getChildren().forPath(workerStatusPath)); - } - catch (Exception e) { - log.warn( - e, - "Could not schedule cleanup for worker[%s] during startup (maybe someone removed the status znode[%s]?). Skipping.", - workerId, - workerStatusPath - ); - } - } - } - synchronized (waitingForMonitor) { - waitingFor.decrement(); - waitingForMonitor.notifyAll(); - } - break; - case CONNECTION_SUSPENDED: - case CONNECTION_RECONNECTED: - case CONNECTION_LOST: - // do nothing - } - } - ); - workerPathCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT); - synchronized (waitingForMonitor) { - while (waitingFor.intValue() > 0) { - waitingForMonitor.wait(); - } - } - - ScheduledExecutors.scheduleAtFixedRate( - cleanupExec, - Period.ZERO.toStandardDuration(), - config.getWorkerBlackListCleanupPeriod().toStandardDuration(), - this::checkBlackListedNodes - ); - - provisioningService = provisioningStrategy.makeProvisioningService(this); - lifecycleLock.started(); - } - catch (Exception e) { - throw new RuntimeException(e); - } - finally { - lifecycleLock.exitStart(); - } - } - - @Override - @LifecycleStop - public void stop() - { - if (!lifecycleLock.canStop()) { - return; - } - try { - log.info("Stopping RemoteTaskRunner..."); - provisioningService.close(); - - Closer closer = Closer.create(); - for (ZkWorker zkWorker : zkWorkers.values()) { - closer.register(zkWorker); - } - closer.register(workerPathCache); - try { - closer.close(); - } - finally { - workerStatusPathChildrenCacheExecutor.shutdown(); - } - - if (runPendingTasksExec != null) { - runPendingTasksExec.shutdown(); - } - - if (cleanupExec != null) { - cleanupExec.shutdown(); - } - } - catch (Exception e) { - throw new RuntimeException(e); - } - finally { - lifecycleLock.exitStop(); - } - } - - @Override - public List>> restore() - { - return ImmutableList.of(); - } - - @Override - public void registerListener(TaskRunnerListener listener, Executor executor) - { - for (Pair pair : listeners) { - if (pair.lhs.getListenerId().equals(listener.getListenerId())) { - throw new ISE("Listener [%s] already registered", listener.getListenerId()); - } - } - - final Pair listenerPair = Pair.of(listener, executor); - - synchronized (statusLock) { - for (Map.Entry entry : runningTasks.entrySet()) { - TaskRunnerUtils.notifyLocationChanged( - ImmutableList.of(listenerPair), - entry.getKey(), - entry.getValue().getLocation() - ); - } - - log.info("Registered listener [%s]", listener.getListenerId()); - listeners.add(listenerPair); - } - } - - @Override - public void unregisterListener(String listenerId) - { - for (Pair pair : listeners) { - if (pair.lhs.getListenerId().equals(listenerId)) { - listeners.remove(pair); - log.info("Unregistered listener [%s]", listenerId); - return; - } - } - } - - @Override - public Collection getWorkers() - { - return getImmutableWorkerFromZK(zkWorkers.values()); - } - - @Override - public Collection getRunningTasks() - { - return ImmutableList.copyOf(runningTasks.values()); - } - - @Override - public Collection getPendingTasks() - { - return ImmutableList.copyOf(pendingTasks.values()); - } - - @Override - public Collection getPendingTaskPayloads() - { - // return a snapshot of current pending task payloads. - return ImmutableList.copyOf(pendingTaskPayloads.values()); - } - - @Override - public RemoteTaskRunnerConfig getConfig() - { - return config; - } - - @Override - public Collection getKnownTasks() - { - // Use a map to dedupe tasks, since they may transition from one state to another while this method is iterating - // through the various collections. - final Map items = new LinkedHashMap<>(); - - // Racey, since there is a period of time during assignment when a task is neither pending nor running. - for (RemoteTaskRunnerWorkItem item : pendingTasks.values()) { - items.put(item.getTaskId(), item); - } - - for (RemoteTaskRunnerWorkItem item : runningTasks.values()) { - items.put(item.getTaskId(), item); - } - - for (RemoteTaskRunnerWorkItem item : completeTasks.values()) { - items.put(item.getTaskId(), item); - } - - return ImmutableList.copyOf(items.values()); - } - - @Nullable - @Override - public RunnerTaskState getRunnerTaskState(String taskId) - { - if (pendingTasks.containsKey(taskId)) { - return RunnerTaskState.PENDING; - } - if (runningTasks.containsKey(taskId)) { - return RunnerTaskState.RUNNING; - } - if (completeTasks.containsKey(taskId)) { - return RunnerTaskState.NONE; - } - - return null; - } - - @Override - public TaskLocation getTaskLocation(String taskId) - { - if (pendingTasks.containsKey(taskId)) { - return pendingTasks.get(taskId).getLocation(); - } - if (runningTasks.containsKey(taskId)) { - return runningTasks.get(taskId).getLocation(); - } - if (completeTasks.containsKey(taskId)) { - return completeTasks.get(taskId).getLocation(); - } - - return TaskLocation.unknown(); - } - - @Override - public Optional getScalingStats() - { - return Optional.fromNullable(provisioningService.getStats()); - } - - @Nullable - public ZkWorker findWorkerRunningTask(String taskId) - { - for (ZkWorker zkWorker : zkWorkers.values()) { - if (zkWorker.isRunningTask(taskId)) { - return zkWorker; - } - } - return null; - } - - /** - * Retrieve {@link ZkWorker} based on an ID (host), or null if the ID doesn't exist. - */ - @Nullable - ZkWorker findWorkerId(String workerId) - { - return zkWorkers.get(workerId); - } - - public boolean isWorkerRunningTask(ZkWorker worker, String taskId) - { - return Preconditions.checkNotNull(worker, "worker").isRunningTask(taskId); - } - - /** - * A task will be run only if there is no current knowledge in the RemoteTaskRunner of the task. - * - * @param task task to run - */ - @Override - public ListenableFuture run(final Task task) - { - final RemoteTaskRunnerWorkItem completeTask, runningTask, pendingTask; - if ((pendingTask = pendingTasks.get(task.getId())) != null) { - log.info("Assigned a task[%s] that is already pending!", task.getId()); - runPendingTasks(); - return pendingTask.getResult(); - } else if ((runningTask = runningTasks.get(task.getId())) != null) { - ZkWorker zkWorker = findWorkerRunningTask(task.getId()); - if (zkWorker == null) { - log.warn("Told to run task[%s], but no worker has started running it yet.", task.getId()); - } else { - log.info("Task[%s] already running on %s.", task.getId(), zkWorker.getWorker().getHost()); - TaskAnnouncement announcement = zkWorker.getRunningTasks().get(task.getId()); - if (announcement.getTaskStatus().isComplete()) { - taskComplete(runningTask, zkWorker, announcement.getTaskStatus()); - } - } - return runningTask.getResult(); - } else if ((completeTask = completeTasks.get(task.getId())) != null) { - return completeTask.getResult(); - } else { - RemoteTaskRunnerWorkItem workItem = addPendingTask(task); - runPendingTasks(); - return workItem.getResult(); - } - } - - /** - * Finds the worker running the task and forwards the shutdown signal to the worker. - * - * @param taskId - task id to shutdown - */ - @Override - public void shutdown(final String taskId, String reason) - { - log.info("Shutdown [%s] because: [%s]", taskId, reason); - if (!lifecycleLock.awaitStarted(1, TimeUnit.SECONDS)) { - log.info("This TaskRunner is stopped or not yet started. Ignoring shutdown command for task: %s", taskId); - } else if (pendingTasks.remove(taskId) != null) { - pendingTaskPayloads.remove(taskId); - log.info("Removed task from pending queue: %s", taskId); - } else if (completeTasks.containsKey(taskId)) { - cleanup(taskId); - } else { - final ZkWorker zkWorker = findWorkerRunningTask(taskId); - - if (zkWorker == null) { - log.info("Can't shutdown! No worker running task %s", taskId); - return; - } - URL url = null; - try { - url = TaskRunnerUtils.makeWorkerURL(zkWorker.getWorker(), "/druid/worker/v1/task/%s/shutdown", taskId); - final StatusResponseHolder response = httpClient.go( - new Request(HttpMethod.POST, url), - StatusResponseHandler.getInstance(), - shutdownTimeout - ).get(); - - log.info( - "Sent shutdown message to worker: %s, status %s, response: %s", - zkWorker.getWorker().getHost(), - response.getStatus(), - response.getContent() - ); - - if (!HttpResponseStatus.OK.equals(response.getStatus())) { - log.error("Shutdown failed for %s! Are you sure the task was running?", taskId); - } - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RE(e, "Interrupted posting shutdown to [%s] for task [%s]", url, taskId); - } - catch (Exception e) { - throw new RE(e, "Error in handling post to [%s] for task [%s]", zkWorker.getWorker().getHost(), taskId); - } - } - } - - @Override - public Optional streamTaskLog(final String taskId, final long offset) throws IOException - { - final ZkWorker zkWorker = findWorkerRunningTask(taskId); - - if (zkWorker == null) { - // Worker is not running this task, it might be available in deep storage - return Optional.absent(); - } else { - // Worker is still running this task - final URL url = TaskRunnerUtils.makeWorkerURL( - zkWorker.getWorker(), - "/druid/worker/v1/task/%s/log?offset=%s", - taskId, - Long.toString(offset) - ); - try { - return Optional.of(httpClient.go( - new Request(HttpMethod.GET, url), - new InputStreamResponseHandler() - ).get()); - } - catch (InterruptedException e) { - throw new RuntimeException(e); - } - catch (ExecutionException e) { - // Unwrap if possible - Throwables.propagateIfPossible(e.getCause(), IOException.class); - throw new RuntimeException(e); - } - } - } - - - @Override - public Optional streamTaskReports(final String taskId) throws IOException - { - final ZkWorker zkWorker = findWorkerRunningTask(taskId); - - if (zkWorker == null) { - // Worker is not running this task, it might be available in deep storage - return Optional.absent(); - } - - final RemoteTaskRunnerWorkItem runningWorkItem = runningTasks.get(taskId); - - if (runningWorkItem == null) { - // Worker very recently exited. - return Optional.absent(); - } - - final TaskLocation taskLocation = runningWorkItem.getLocation(); - - if (TaskLocation.unknown().equals(taskLocation)) { - // No location known for this task. It may have not been assigned one yet. - return Optional.absent(); - } - - final URL url = TaskRunnerUtils.makeTaskLocationURL( - taskLocation, - "/druid/worker/v1/chat/%s/liveReports", - taskId - ); - - return TaskRunnerUtils.streamTaskReportsFromTaskLocation(httpClient, url); - } - - - /** - * Adds a task to the pending queue. - * {@link #runPendingTasks()} should be called to run the pending task. - */ - @VisibleForTesting - RemoteTaskRunnerWorkItem addPendingTask(final Task task) - { - log.info("Added pending task %s", task.getId()); - final RemoteTaskRunnerWorkItem taskRunnerWorkItem = new RemoteTaskRunnerWorkItem( - task.getId(), - task.getType(), - null, - null, - task.getDataSource() - ); - pendingTaskPayloads.put(task.getId(), task); - pendingTasks.put(task.getId(), taskRunnerWorkItem); - return taskRunnerWorkItem; - } - - /** - * This method uses a multi-threaded executor to extract all pending tasks and attempt to run them. Any tasks that - * are successfully assigned to a worker will be moved from pendingTasks to runningTasks. This method is thread-safe. - * This method should be run each time there is new worker capacity or if new tasks are assigned. - */ - @VisibleForTesting - void runPendingTasks() - { - runPendingTasksExec.submit( - (Callable) () -> { - try { - // make a copy of the pending tasks because tryAssignTask may delete tasks from pending and move them - // into running status - List copy = Lists.newArrayList(pendingTasks.values()); - sortByInsertionTime(copy); - - for (RemoteTaskRunnerWorkItem taskRunnerWorkItem : copy) { - runPendingTask(taskRunnerWorkItem); - } - } - catch (Exception e) { - log.makeAlert(e, "Exception in running pending tasks").emit(); - } - - return null; - } - ); - } - - /** - * Run one pending task. This method must be called in the same class except for unit tests. - */ - @VisibleForTesting - void runPendingTask(RemoteTaskRunnerWorkItem taskRunnerWorkItem) - { - String taskId = taskRunnerWorkItem.getTaskId(); - if (tryAssignTasks.putIfAbsent(taskId, taskId) == null) { - try { - //this can still be null due to race from explicit task shutdown request - //or if another thread steals and completes this task right after this thread makes copy - //of pending tasks. See https://github.com/apache/druid/issues/2842 . - Task task = pendingTaskPayloads.get(taskId); - if (task != null && tryAssignTask(task, taskRunnerWorkItem)) { - pendingTaskPayloads.remove(taskId); - } - } - catch (Exception e) { - log.makeAlert(e, "Exception while trying to assign task") - .addData("taskId", taskRunnerWorkItem.getTaskId()) - .emit(); - RemoteTaskRunnerWorkItem workItem = pendingTasks.remove(taskId); - if (workItem != null) { - taskComplete( - workItem, - null, - TaskStatus.failure( - taskId, - StringUtils.format("Failed to assign this task. See overlord logs for more details.") - ) - ); - } - } - finally { - tryAssignTasks.remove(taskId); - } - } - } - - @VisibleForTesting - static void sortByInsertionTime(List tasks) - { - Collections.sort(tasks, Comparator.comparing(RemoteTaskRunnerWorkItem::getQueueInsertionTime)); - } - - /** - * Removes a task from the complete queue and clears out the ZK status path of the task. - * - * @param taskId - the task to cleanup - */ - private void cleanup(final String taskId) - { - if (!lifecycleLock.awaitStarted(1, TimeUnit.SECONDS)) { - return; - } - final RemoteTaskRunnerWorkItem removed = completeTasks.remove(taskId); - final Worker worker; - if (removed == null || (worker = removed.getWorker()) == null) { - log.makeAlert("Asked to cleanup nonexistent task") - .addData("taskId", taskId) - .emit(); - } else { - final String workerId = worker.getHost(); - log.info("Cleaning up task[%s] on worker[%s]", taskId, workerId); - final String statusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId, taskId); - try { - cf.delete().guaranteed().forPath(statusPath); - } - catch (KeeperException.NoNodeException e) { - log.info("Tried to delete status path[%s] that didn't exist! Must've gone away already?", statusPath); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - } - - /** - * Ensures no workers are already running a task before assigning the task to a worker. - * It is possible that a worker is running a task that the RTR has no knowledge of. This occurs when the RTR - * needs to bootstrap after a restart. - * - * @param taskRunnerWorkItem - the task to assign - * @return true iff the task is now assigned - */ - private boolean tryAssignTask(final Task task, final RemoteTaskRunnerWorkItem taskRunnerWorkItem) throws Exception - { - Preconditions.checkNotNull(task, "task"); - Preconditions.checkNotNull(taskRunnerWorkItem, "taskRunnerWorkItem"); - Preconditions.checkArgument(task.getId().equals(taskRunnerWorkItem.getTaskId()), "task id != workItem id"); - - if (runningTasks.containsKey(task.getId()) || findWorkerRunningTask(task.getId()) != null) { - log.info("Task[%s] already running.", task.getId()); - return true; - } else { - // Nothing running this task, announce it in ZK for a worker to run it - WorkerBehaviorConfig workerConfig = workerConfigRef.get(); - WorkerSelectStrategy strategy; - if (workerConfig == null || workerConfig.getSelectStrategy() == null) { - strategy = WorkerBehaviorConfig.DEFAULT_STRATEGY; - log.debug("No worker selection strategy set. Using default of [%s]", strategy.getClass().getSimpleName()); - } else { - strategy = workerConfig.getSelectStrategy(); - } - - ZkWorker assignedWorker = null; - final ImmutableWorkerInfo immutableZkWorker; - try { - synchronized (workersWithUnacknowledgedTask) { - immutableZkWorker = strategy.findWorkerForTask( - config, - ImmutableMap.copyOf(getWorkersEligibleToRunTasks()), - task - ); - - if (immutableZkWorker != null && - workersWithUnacknowledgedTask.putIfAbsent(immutableZkWorker.getWorker().getHost(), task.getId()) - == null) { - assignedWorker = zkWorkers.get(immutableZkWorker.getWorker().getHost()); - } - } - - if (assignedWorker != null) { - return announceTask(task, assignedWorker, taskRunnerWorkItem); - } else { - log.debug( - "Unsuccessful task-assign attempt for task [%s] on workers [%s]. Workers to ack tasks are [%s].", - task.getId(), - zkWorkers.values(), - workersWithUnacknowledgedTask - ); - } - - return false; - } - finally { - if (assignedWorker != null) { - workersWithUnacknowledgedTask.remove(assignedWorker.getWorker().getHost()); - //if this attempt won the race to run the task then other task might be able to use this worker now after task ack. - runPendingTasks(); - } - } - } - } - - Map getWorkersEligibleToRunTasks() - { - return Maps.transformEntries( - Maps.filterEntries( - zkWorkers, - input -> !lazyWorkers.containsKey(input.getKey()) && - !workersWithUnacknowledgedTask.containsKey(input.getKey()) && - !blackListedWorkers.contains(input.getValue()) - ), - (String key, ZkWorker value) -> value.toImmutable() - ); - } - - /** - * Creates a ZK entry under a specific path associated with a worker. The worker is responsible for - * removing the task ZK entry and creating a task status ZK entry. - * - * @param theZkWorker The worker the task is assigned to - * @param taskRunnerWorkItem The task to be assigned - * @return boolean indicating whether the task was successfully assigned or not - */ - private boolean announceTask( - final Task task, - final ZkWorker theZkWorker, - final RemoteTaskRunnerWorkItem taskRunnerWorkItem - ) throws Exception - { - Preconditions.checkArgument(task.getId().equals(taskRunnerWorkItem.getTaskId()), "task id != workItem id"); - final String worker = theZkWorker.getWorker().getHost(); - synchronized (statusLock) { - if (!zkWorkers.containsKey(worker) || lazyWorkers.containsKey(worker)) { - // the worker might have been killed or marked as lazy - log.debug("Not assigning task to already removed worker[%s]", worker); - return false; - } - log.info("Assigning task [%s] to worker [%s]", task.getId(), worker); - - CuratorUtils.createIfNotExists( - cf, - JOINER.join(indexerZkConfig.getTasksPath(), worker, task.getId()), - CreateMode.EPHEMERAL, - jsonMapper.writeValueAsBytes(task), - config.getMaxZnodeBytes() - ); - - RemoteTaskRunnerWorkItem workItem = pendingTasks.remove(task.getId()); - if (workItem == null) { - log.makeAlert("Ignoring null work item from pending task queue") - .addData("taskId", task.getId()) - .emit(); - return false; - } - - final ServiceMetricEvent.Builder metricBuilder = new ServiceMetricEvent.Builder(); - IndexTaskUtils.setTaskDimensions(metricBuilder, task); - emitter.emit(metricBuilder.setMetric( - "task/pending/time", - new Duration(workItem.getQueueInsertionTime(), DateTimes.nowUtc()).getMillis()) - ); - - RemoteTaskRunnerWorkItem newWorkItem = workItem.withWorker(theZkWorker.getWorker(), null); - runningTasks.put(task.getId(), newWorkItem); - log.info("Task [%s] started running on worker [%s]", task.getId(), newWorkItem.getWorker().getHost()); - TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.running(task.getId())); - - // Syncing state with Zookeeper - don't assign new tasks until the task we just assigned is actually running - // on a worker - this avoids overflowing a worker with tasks - Stopwatch timeoutStopwatch = Stopwatch.createStarted(); - while (!isWorkerRunningTask(theZkWorker, task.getId())) { - final long waitMs = config.getTaskAssignmentTimeout().toStandardDuration().getMillis(); - statusLock.wait(waitMs); - long elapsed = timeoutStopwatch.elapsed(TimeUnit.MILLISECONDS); - if (elapsed >= waitMs) { - log.makeAlert( - "Task assignment timed out on worker [%s], never ran task [%s]! Timeout: (%s >= %s)!", - worker, - task.getId(), - elapsed, - config.getTaskAssignmentTimeout() - ).emit(); - taskComplete( - taskRunnerWorkItem, - theZkWorker, - TaskStatus.failure( - task.getId(), - StringUtils.format( - "The worker that this task is assigned did not start it in timeout[%s]. " - + "See overlord logs for more details.", - config.getTaskAssignmentTimeout() - ) - ) - ); - break; - } - } - return true; - } - } - - private boolean cancelWorkerCleanup(String workerHost) - { - ScheduledFuture previousCleanup = removedWorkerCleanups.remove(workerHost); - if (previousCleanup != null) { - log.info("Cancelling Worker[%s] scheduled task cleanup", workerHost); - previousCleanup.cancel(false); - } - return previousCleanup != null; - } - - /** - * When a new worker appears, listeners are registered for status changes associated with tasks assigned to - * the worker. Status changes indicate the creation or completion of a task. - * The RemoteTaskRunner updates state according to these changes. - * - * @param worker contains metadata for a worker that has appeared in ZK - * @return future that will contain a fully initialized worker - */ - private ListenableFuture addWorker(final Worker worker) - { - log.info("Worker[%s] reportin' for duty!", worker.getHost()); - - try { - cancelWorkerCleanup(worker.getHost()); - - final String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), worker.getHost()); - final PathChildrenCache statusCache = workerStatusPathChildrenCacheFactory.make(cf, workerStatusPath); - final SettableFuture retVal = SettableFuture.create(); - final ZkWorker zkWorker = new ZkWorker( - worker, - statusCache, - jsonMapper - ); - - // Add status listener to the watcher for status changes - zkWorker.addListener(getStatusListener(worker, zkWorker, retVal)); - zkWorker.start(); - return retVal; - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - - @VisibleForTesting - PathChildrenCacheListener getStatusListener(final Worker worker, final ZkWorker zkWorker, final SettableFuture retVal) - { - return (client, event) -> { - final String taskId; - final RemoteTaskRunnerWorkItem taskRunnerWorkItem; - synchronized (statusLock) { - try { - switch (event.getType()) { - case CHILD_ADDED: - case CHILD_UPDATED: - if (event.getData() == null) { - log.error("Unexpected null for event.getData() in handle new worker status for [%s]", event.getType().toString()); - log.makeAlert("Unexpected null for event.getData() in handle new worker status") - .addData("worker", zkWorker.getWorker().getHost()) - .addData("eventType", event.getType().toString()) - .emit(); - return; - } - taskId = ZKPaths.getNodeFromPath(event.getData().getPath()); - final TaskAnnouncement announcement = jsonMapper.readValue( - event.getData().getData(), TaskAnnouncement.class - ); - - log.info( - "Worker[%s] wrote %s status for task [%s] on [%s]", - zkWorker.getWorker().getHost(), - announcement.getTaskStatus().getStatusCode(), - taskId, - announcement.getTaskLocation() - ); - - // Synchronizing state with ZK - statusLock.notifyAll(); - - final RemoteTaskRunnerWorkItem tmp; - if ((tmp = runningTasks.get(taskId)) != null) { - taskRunnerWorkItem = tmp; - } else { - final RemoteTaskRunnerWorkItem newTaskRunnerWorkItem = new RemoteTaskRunnerWorkItem( - taskId, - announcement.getTaskType(), - zkWorker.getWorker(), - TaskLocation.unknown(), - announcement.getTaskDataSource() - ); - final RemoteTaskRunnerWorkItem existingItem = runningTasks.putIfAbsent( - taskId, - newTaskRunnerWorkItem - ); - if (existingItem == null) { - log.warn( - "Worker[%s] announced a status for a task I didn't know about, adding to runningTasks: %s", - zkWorker.getWorker().getHost(), - taskId - ); - taskRunnerWorkItem = newTaskRunnerWorkItem; - } else { - taskRunnerWorkItem = existingItem; - } - } - - if (!announcement.getTaskLocation().equals(taskRunnerWorkItem.getLocation())) { - taskRunnerWorkItem.setLocation(announcement.getTaskLocation()); - TaskRunnerUtils.notifyLocationChanged(listeners, taskId, announcement.getTaskLocation()); - } - - if (announcement.getTaskStatus().isComplete()) { - taskComplete(taskRunnerWorkItem, zkWorker, announcement.getTaskStatus()); - runPendingTasks(); - } - break; - case CHILD_REMOVED: - if (event.getData() == null) { - log.error("Unexpected null for event.getData() in handle new worker status for [%s]", event.getType().toString()); - log.makeAlert("Unexpected null for event.getData() in handle new worker status") - .addData("worker", zkWorker.getWorker().getHost()) - .addData("eventType", event.getType().toString()) - .emit(); - return; - } - taskId = ZKPaths.getNodeFromPath(event.getData().getPath()); - taskRunnerWorkItem = runningTasks.remove(taskId); - if (taskRunnerWorkItem != null) { - log.warn("Task[%s] just disappeared!", taskId); - final TaskStatus taskStatus = TaskStatus.failure( - taskId, - "The worker that this task was assigned disappeared. See overlord logs for more details." - ); - taskRunnerWorkItem.setResult(taskStatus); - TaskRunnerUtils.notifyStatusChanged(listeners, taskId, taskStatus); - } else { - log.info("Task[%s] went bye bye.", taskId); - } - break; - case INITIALIZED: - if (zkWorkers.putIfAbsent(worker.getHost(), zkWorker) == null) { - retVal.set(zkWorker); - } else { - final String message = StringUtils.format( - "This should not happen...tried to add already-existing worker[%s]", - worker.getHost() - ); - log.makeAlert(message) - .addData("workerHost", worker.getHost()) - .addData("workerIp", worker.getIp()) - .emit(); - retVal.setException(new IllegalStateException(message)); - } - runPendingTasks(); - break; - case CONNECTION_SUSPENDED: - case CONNECTION_RECONNECTED: - case CONNECTION_LOST: - // do nothing - } - } - catch (Exception e) { - String znode = null; - if (event.getData() != null) { - znode = event.getData().getPath(); - } - log.makeAlert(e, "Failed to handle new worker status") - .addData("worker", zkWorker.getWorker().getHost()) - .addData("znode", znode) - .addData("eventType", event.getType().toString()) - .emit(); - } - } - }; - } - - /** - * We allow workers to change their own capacities and versions. They cannot change their own hosts or ips without - * dropping themselves and re-announcing. - */ - private void updateWorker(final Worker worker) - { - final ZkWorker zkWorker = zkWorkers.get(worker.getHost()); - if (zkWorker != null) { - log.info("Worker[%s] updated its announcement from[%s] to[%s].", worker.getHost(), zkWorker.getWorker(), worker); - zkWorker.setWorker(worker); - } else { - log.warn( - "Worker[%s] updated its announcement but we didn't have a ZkWorker for it. Ignoring.", - worker.getHost() - ); - } - } - - /** - * When a ephemeral worker node disappears from ZK, incomplete running tasks will be retried by - * the logic in the status listener. We still have to make sure there are no tasks assigned - * to the worker but not yet running. - * - * @param worker - the removed worker - */ - private void removeWorker(final Worker worker) - { - log.info("Kaboom! Worker[%s] removed!", worker.getHost()); - - final ZkWorker zkWorker = zkWorkers.get(worker.getHost()); - if (zkWorker != null) { - try { - scheduleTasksCleanupForWorker(worker.getHost(), getAssignedTasks(worker)); - } - catch (Exception e) { - throw new RuntimeException(e); - } - finally { - try { - zkWorker.close(); - } - catch (Exception e) { - log.error(e, "Exception closing worker[%s]!", worker.getHost()); - } - zkWorkers.remove(worker.getHost()); - checkBlackListedNodes(); - } - } - lazyWorkers.remove(worker.getHost()); - } - - /** - * Schedule a task that will, at some point in the future, clean up znodes and issue failures for "tasksToFail" - * if they are being run by "worker". - */ - private void scheduleTasksCleanupForWorker(final String worker, final List tasksToFail) - { - // This method is only called from the PathChildrenCache event handler, so this may look like a race, - // but is actually not. - cancelWorkerCleanup(worker); - - final ListenableScheduledFuture cleanupTask = cleanupExec.schedule( - () -> { - log.info("Running scheduled cleanup for Worker[%s]", worker); - try { - for (String assignedTask : tasksToFail) { - String taskPath = JOINER.join(indexerZkConfig.getTasksPath(), worker, assignedTask); - String statusPath = JOINER.join(indexerZkConfig.getStatusPath(), worker, assignedTask); - if (cf.checkExists().forPath(taskPath) != null) { - cf.delete().guaranteed().forPath(taskPath); - } - - if (cf.checkExists().forPath(statusPath) != null) { - cf.delete().guaranteed().forPath(statusPath); - } - - log.info("Failing task[%s]", assignedTask); - RemoteTaskRunnerWorkItem taskRunnerWorkItem = runningTasks.remove(assignedTask); - if (taskRunnerWorkItem != null) { - final TaskStatus taskStatus = TaskStatus.failure( - assignedTask, - StringUtils.format("Canceled for worker cleanup. See overlord logs for more details.") - ); - taskRunnerWorkItem.setResult(taskStatus); - TaskRunnerUtils.notifyStatusChanged(listeners, assignedTask, taskStatus); - } else { - log.warn("RemoteTaskRunner has no knowledge of task[%s]", assignedTask); - } - } - - // worker is gone, remove worker task status announcements path. - String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), worker); - if (cf.checkExists().forPath(workerStatusPath) != null) { - cf.delete().guaranteed().forPath(JOINER.join(indexerZkConfig.getStatusPath(), worker)); - } - } - catch (Exception e) { - log.makeAlert("Exception while cleaning up worker[%s]", worker).emit(); - throw new RuntimeException(e); - } - }, - config.getTaskCleanupTimeout().toStandardDuration().getMillis(), - TimeUnit.MILLISECONDS - ); - - removedWorkerCleanups.put(worker, cleanupTask); - - // Remove this entry from removedWorkerCleanups when done, if it's actually the one in there. - Futures.addCallback( - cleanupTask, - new FutureCallback() - { - @Override - public void onSuccess(Object result) - { - removedWorkerCleanups.remove(worker, cleanupTask); - } - - @Override - public void onFailure(Throwable t) - { - removedWorkerCleanups.remove(worker, cleanupTask); - } - }, - MoreExecutors.directExecutor() - ); - } - - private void taskComplete( - RemoteTaskRunnerWorkItem taskRunnerWorkItem, - @Nullable ZkWorker zkWorker, - TaskStatus taskStatus - ) - { - Preconditions.checkNotNull(taskRunnerWorkItem, "taskRunnerWorkItem"); - Preconditions.checkNotNull(taskStatus, "taskStatus"); - if (zkWorker != null) { - log.info( - "Worker[%s] completed task[%s] with status[%s]", - zkWorker.getWorker().getHost(), - taskStatus.getId(), - taskStatus.getStatusCode() - ); - // Worker is done with this task - zkWorker.setLastCompletedTaskTime(DateTimes.nowUtc()); - } else { - log.info("Workerless task[%s] completed with status[%s]", taskStatus.getId(), taskStatus.getStatusCode()); - } - - // Move from running -> complete - // If the task was running and this is the first complete event, - // previousComplete should be null and removedRunning should not. - final RemoteTaskRunnerWorkItem previousComplete = completeTasks.put(taskStatus.getId(), taskRunnerWorkItem); - final RemoteTaskRunnerWorkItem removedRunning = runningTasks.remove(taskStatus.getId()); - - if (previousComplete != null && removedRunning != null) { - log.warn( - "This is not the first complete event for task[%s], but it was still known as running. " - + "Ignoring the previously known running status.", - taskStatus.getId() - ); - } - - if (previousComplete != null) { - // This is not the first complete event for the same task. - try { - // getResult().get() must return immediately. - TaskState lastKnownState = previousComplete.getResult().get(1, TimeUnit.MILLISECONDS).getStatusCode(); - if (taskStatus.getStatusCode() != lastKnownState) { - log.warn( - "The state of the new task complete event is different from its last known state. " - + "New state[%s], last known state[%s]", - taskStatus.getStatusCode(), - lastKnownState - ); - } - } - catch (InterruptedException e) { - log.warn(e, "Interrupted while getting the last known task status."); - Thread.currentThread().interrupt(); - } - catch (ExecutionException | TimeoutException e) { - // This case should not really happen. - log.warn(e, "Failed to get the last known task status. Ignoring this failure."); - } - } else { - // This is the first complete event for this task. - // Update success/failure counters - if (zkWorker != null) { - if (taskStatus.isSuccess()) { - zkWorker.resetContinuouslyFailedTasksCount(); - if (blackListedWorkers.remove(zkWorker)) { - zkWorker.setBlacklistedUntil(null); - log.info("[%s] removed from blacklist because a task finished with SUCCESS", zkWorker.getWorker()); - } - } else if (taskStatus.isFailure()) { - zkWorker.incrementContinuouslyFailedTasksCount(); - } - - // Blacklist node if there are too many failures. - synchronized (blackListedWorkers) { - if (zkWorker.getContinuouslyFailedTasksCount() > config.getMaxRetriesBeforeBlacklist() && - blackListedWorkers.size() <= zkWorkers.size() * (config.getMaxPercentageBlacklistWorkers() / 100.0) - 1) { - zkWorker.setBlacklistedUntil(DateTimes.nowUtc().plus(config.getWorkerBlackListBackoffTime())); - if (blackListedWorkers.add(zkWorker)) { - log.info( - "Blacklisting [%s] until [%s] after [%,d] failed tasks in a row.", - zkWorker.getWorker(), - zkWorker.getBlacklistedUntil(), - zkWorker.getContinuouslyFailedTasksCount() - ); - } - } - } - } - - // Notify interested parties - taskRunnerWorkItem.setResult(taskStatus); - TaskRunnerUtils.notifyStatusChanged(listeners, taskStatus.getId(), taskStatus); - } - } - - @Override - public Collection markWorkersLazy(Predicate isLazyWorker, int maxLazyWorkers) - { - // skip the lock and bail early if we should not mark any workers lazy (e.g. number - // of current workers is at or below the minNumWorkers of autoscaler config) - if (lazyWorkers.size() >= maxLazyWorkers) { - return getLazyWorkers(); - } - - // Search for new workers to mark lazy. - // Status lock is used to prevent any tasks being assigned to workers while we mark them lazy - synchronized (statusLock) { - for (Map.Entry worker : zkWorkers.entrySet()) { - if (lazyWorkers.size() >= maxLazyWorkers) { - break; - } - final ZkWorker zkWorker = worker.getValue(); - try { - if (getAssignedTasks(zkWorker.getWorker()).isEmpty() && isLazyWorker.apply(zkWorker.toImmutable())) { - log.info("Adding Worker[%s] to lazySet!", zkWorker.getWorker().getHost()); - lazyWorkers.put(worker.getKey(), zkWorker); - } - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - } - - return getLazyWorkers(); - } - - protected List getAssignedTasks(Worker worker) throws Exception - { - final List assignedTasks = Lists.newArrayList( - cf.getChildren().forPath(JOINER.join(indexerZkConfig.getTasksPath(), worker.getHost())) - ); - - for (Map.Entry entry : runningTasks.entrySet()) { - if (entry.getValue() == null) { - log.error( - "Huh? null work item for [%s]", - entry.getKey() - ); - } else if (entry.getValue().getWorker() == null) { - log.error("Huh? no worker for [%s]", entry.getKey()); - } else if (entry.getValue().getWorker().getHost().equalsIgnoreCase(worker.getHost())) { - log.info("[%s]: Found [%s] running", worker.getHost(), entry.getKey()); - assignedTasks.add(entry.getKey()); - } - } - log.info("[%s]: Found %d tasks assigned", worker.getHost(), assignedTasks.size()); - return assignedTasks; - } - - @Override - public Collection getLazyWorkers() - { - return getWorkerFromZK(lazyWorkers.values()); - } - - private static ImmutableList getImmutableWorkerFromZK(Collection workers) - { - return ImmutableList.copyOf(Collections2.transform(workers, ZkWorker::toImmutable)); - } - - private static ImmutableList getWorkerFromZK(Collection workers) - { - return ImmutableList.copyOf(Collections2.transform(workers, ZkWorker::getWorker)); - } - - public Collection getBlackListedWorkers() - { - synchronized (blackListedWorkers) { - return getImmutableWorkerFromZK(blackListedWorkers); - } - } - - private boolean shouldRemoveNodeFromBlackList(ZkWorker zkWorker) - { - if (blackListedWorkers.size() > zkWorkers.size() * (config.getMaxPercentageBlacklistWorkers() / 100.0)) { - log.info( - "Removing [%s] from blacklist because percentage of blacklisted workers exceeds [%d]", - zkWorker.getWorker(), - config.getMaxPercentageBlacklistWorkers() - ); - - return true; - } - - long remainingMillis = zkWorker.getBlacklistedUntil().getMillis() - getCurrentTimeMillis(); - if (remainingMillis <= 0) { - log.info("Removing [%s] from blacklist because backoff time elapsed", zkWorker.getWorker()); - return true; - } - - log.info("[%s] still blacklisted for [%,ds]", zkWorker.getWorker(), remainingMillis / 1000); - return false; - } - - @VisibleForTesting - void checkBlackListedNodes() - { - boolean shouldRunPendingTasks = false; - - // must be synchronized while iterating: - // https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Collections.html#synchronizedSet(java.util.Set) - synchronized (blackListedWorkers) { - for (Iterator iterator = blackListedWorkers.iterator(); iterator.hasNext(); ) { - ZkWorker zkWorker = iterator.next(); - if (shouldRemoveNodeFromBlackList(zkWorker)) { - iterator.remove(); - zkWorker.resetContinuouslyFailedTasksCount(); - zkWorker.setBlacklistedUntil(null); - shouldRunPendingTasks = true; - } - } - } - - if (shouldRunPendingTasks) { - runPendingTasks(); - } - } - - @VisibleForTesting - protected long getCurrentTimeMillis() - { - return System.currentTimeMillis(); - } - - @VisibleForTesting - ConcurrentMap getRemovedWorkerCleanups() - { - return removedWorkerCleanups; - } - - @VisibleForTesting - RemoteTaskRunnerConfig getRemoteTaskRunnerConfig() - { - return config; - } - - @VisibleForTesting - Map getWorkersWithUnacknowledgedTask() - { - return workersWithUnacknowledgedTask; - } - - @VisibleForTesting - ProvisioningStrategy getProvisioningStrategy() - { - return provisioningStrategy; - } - - @Override - public Map getTotalTaskSlotCount() - { - Map totalPeons = new HashMap<>(); - for (ImmutableWorkerInfo worker : getWorkers()) { - String workerCategory = worker.getWorker().getCategory(); - int workerCapacity = worker.getWorker().getCapacity(); - totalPeons.compute( - workerCategory, - (category, totalCapacity) -> totalCapacity == null ? workerCapacity : totalCapacity + workerCapacity - ); - } - - return totalPeons; - } - - @Override - public Map getIdleTaskSlotCount() - { - Map totalIdlePeons = new HashMap<>(); - for (ImmutableWorkerInfo worker : getWorkersEligibleToRunTasks().values()) { - String workerCategory = worker.getWorker().getCategory(); - int workerAvailableCapacity = worker.getAvailableCapacity(); - totalIdlePeons.compute( - workerCategory, - (category, availableCapacity) -> availableCapacity == null ? workerAvailableCapacity : availableCapacity + workerAvailableCapacity - ); - } - - return totalIdlePeons; - } - - @Override - public Map getUsedTaskSlotCount() - { - Map totalUsedPeons = new HashMap<>(); - for (ImmutableWorkerInfo worker : getWorkers()) { - String workerCategory = worker.getWorker().getCategory(); - int workerUsedCapacity = worker.getCurrCapacityUsed(); - totalUsedPeons.compute( - workerCategory, - (category, usedCapacity) -> usedCapacity == null ? workerUsedCapacity : usedCapacity + workerUsedCapacity - ); - } - - return totalUsedPeons; - } - - @Override - public Map getLazyTaskSlotCount() - { - Map totalLazyPeons = new HashMap<>(); - for (Worker worker : getLazyWorkers()) { - String workerCategory = worker.getCategory(); - int workerLazyPeons = worker.getCapacity(); - totalLazyPeons.compute( - workerCategory, - (category, lazyPeons) -> lazyPeons == null ? workerLazyPeons : lazyPeons + workerLazyPeons - ); - } - - return totalLazyPeons; - } - - @Override - public Map getBlacklistedTaskSlotCount() - { - Map totalBlacklistedPeons = new HashMap<>(); - for (ImmutableWorkerInfo worker : getBlackListedWorkers()) { - String workerCategory = worker.getWorker().getCategory(); - int workerBlacklistedPeons = worker.getWorker().getCapacity(); - totalBlacklistedPeons.compute( - workerCategory, - (category, blacklistedPeons) -> blacklistedPeons == null ? workerBlacklistedPeons : blacklistedPeons + workerBlacklistedPeons - ); - } - - return totalBlacklistedPeons; - } - - @Override - public int getTotalCapacity() - { - return getWorkers().stream().mapToInt(workerInfo -> workerInfo.getWorker().getCapacity()).sum(); - } - - /** - * Retrieves the maximum capacity of the task runner when autoscaling is enabled.* - * @return The maximum capacity as an integer value. Returns -1 if the maximum - * capacity cannot be determined or if autoscaling is not enabled. - */ - @Override - public int getMaximumCapacityWithAutoscale() - { - int maximumCapacity = -1; - WorkerBehaviorConfig workerBehaviorConfig = workerConfigRef.get(); - if (workerBehaviorConfig == null) { - // Auto scale not setup - log.debug("Cannot calculate maximum worker capacity as worker behavior config is not configured"); - maximumCapacity = -1; - } else if (workerBehaviorConfig instanceof DefaultWorkerBehaviorConfig) { - DefaultWorkerBehaviorConfig defaultWorkerBehaviorConfig = (DefaultWorkerBehaviorConfig) workerBehaviorConfig; - if (defaultWorkerBehaviorConfig.getAutoScaler() == null) { - // Auto scale not setup - log.debug("Cannot calculate maximum worker capacity as auto scaler not configured"); - maximumCapacity = -1; - } else { - int maxWorker = defaultWorkerBehaviorConfig.getAutoScaler().getMaxNumWorkers(); - int expectedWorkerCapacity = provisioningStrategy.getExpectedWorkerCapacity(getWorkers()); - maximumCapacity = expectedWorkerCapacity == -1 ? -1 : maxWorker * expectedWorkerCapacity; - } - } - return maximumCapacity; - } - - @Override - public int getUsedCapacity() - { - return getWorkers().stream().mapToInt(ImmutableWorkerInfo::getCurrCapacityUsed).sum(); - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactory.java deleted file mode 100644 index a455c40cb4ac..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactory.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Supplier; -import com.google.inject.Inject; -import org.apache.curator.framework.CuratorFramework; -import org.apache.druid.curator.cache.PathChildrenCacheFactory; -import org.apache.druid.guice.annotations.EscalatedGlobal; -import org.apache.druid.indexing.overlord.autoscaling.NoopProvisioningStrategy; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningSchedulerConfig; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningStrategy; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.java.util.http.client.HttpClient; -import org.apache.druid.server.initialization.IndexerZkConfig; - -/** - */ -public class RemoteTaskRunnerFactory implements TaskRunnerFactory -{ - public static final String TYPE_NAME = "remote"; - private final CuratorFramework curator; - private final RemoteTaskRunnerConfig remoteTaskRunnerConfig; - private final IndexerZkConfig zkPaths; - private final ObjectMapper jsonMapper; - private final HttpClient httpClient; - private final Supplier workerConfigRef; - private final ProvisioningSchedulerConfig provisioningSchedulerConfig; - private final ProvisioningStrategy provisioningStrategy; - private final ServiceEmitter emitter; - private RemoteTaskRunner runner; - - @Inject - public RemoteTaskRunnerFactory( - final CuratorFramework curator, - final RemoteTaskRunnerConfig remoteTaskRunnerConfig, - final IndexerZkConfig zkPaths, - final ObjectMapper jsonMapper, - @EscalatedGlobal final HttpClient httpClient, - final Supplier workerConfigRef, - final ProvisioningSchedulerConfig provisioningSchedulerConfig, - final ProvisioningStrategy provisioningStrategy, - final ServiceEmitter emitter - ) - { - this.curator = curator; - this.remoteTaskRunnerConfig = remoteTaskRunnerConfig; - this.zkPaths = zkPaths; - this.jsonMapper = jsonMapper; - this.httpClient = httpClient; - this.workerConfigRef = workerConfigRef; - this.provisioningSchedulerConfig = provisioningSchedulerConfig; - this.provisioningStrategy = provisioningStrategy; - this.emitter = emitter; - } - - @Override - public RemoteTaskRunner build() - { - runner = new RemoteTaskRunner( - jsonMapper, - remoteTaskRunnerConfig, - zkPaths, - curator, - new PathChildrenCacheFactory.Builder().withCompressed(true), - httpClient, - workerConfigRef, - provisioningSchedulerConfig.isDoAutoscale() ? provisioningStrategy : new NoopProvisioningStrategy<>(), - emitter - ); - return runner; - } - - @Override - public RemoteTaskRunner get() - { - return runner; - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ZkWorker.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ZkWorker.java deleted file mode 100644 index 21ed55115418..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/ZkWorker.java +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonToken; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import org.apache.curator.framework.recipes.cache.ChildData; -import org.apache.curator.framework.recipes.cache.PathChildrenCache; -import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; -import org.apache.druid.annotations.UsedInGeneratedCode; -import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask; -import org.apache.druid.indexing.worker.TaskAnnouncement; -import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.jackson.JacksonUtils; -import org.joda.time.DateTime; - -import java.io.Closeable; -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.Collectors; - -/** - * Holds information about a worker and a listener for task status changes associated with the worker. - */ -public class ZkWorker implements Closeable -{ - private final PathChildrenCache statusCache; - private final Function cacheConverter; - private final java.util.function.Function taskIdExtractor; - - private AtomicReference worker; - private AtomicReference lastCompletedTaskTime = new AtomicReference<>(DateTimes.nowUtc()); - private AtomicReference blacklistedUntil = new AtomicReference<>(); - private AtomicInteger continuouslyFailedTasksCount = new AtomicInteger(0); - - public ZkWorker(Worker worker, PathChildrenCache statusCache, final ObjectMapper jsonMapper) - { - this.worker = new AtomicReference<>(worker); - this.statusCache = statusCache; - this.cacheConverter = (ChildData input) -> - JacksonUtils.readValue(jsonMapper, input.getData(), TaskAnnouncement.class); - this.taskIdExtractor = createTaskIdExtractor(jsonMapper); - } - - static java.util.function.Function createTaskIdExtractor(final ObjectMapper jsonMapper) - { - return (ChildData input) -> { - try (JsonParser parser = jsonMapper.getFactory().createParser(input.getData())) { - while (parser.nextToken() != JsonToken.END_OBJECT) { - String currentName = parser.getCurrentName(); - if (currentName == null) { - continue; - } - - switch (currentName) { - case TaskAnnouncement.TASK_ID_KEY: - parser.nextToken(); - return parser.getValueAsString(); - default: - parser.skipChildren(); - } - } - return null; - } - catch (IOException e) { - throw new RuntimeException(e); - } - }; - } - - public void start() throws Exception - { - statusCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT); - } - - public void addListener(PathChildrenCacheListener listener) - { - statusCache.getListenable().addListener(listener); - } - - @JsonProperty("worker") - public Worker getWorker() - { - return worker.get(); - } - - @JsonProperty("runningTasks") - public Collection getRunningTaskIds() - { - return statusCache.getCurrentData() - .stream() - .map(taskIdExtractor) - .collect(Collectors.toSet()); - } - - public Map getRunningTasks() - { - Map retVal = new HashMap<>(); - for (TaskAnnouncement taskAnnouncement : Lists.transform( - statusCache.getCurrentData(), - cacheConverter - )) { - retVal.put(taskAnnouncement.getTaskStatus().getId(), taskAnnouncement); - } - - return retVal; - } - - @JsonProperty("currCapacityUsed") - public int getCurrCapacityUsed() - { - return getCurrCapacityUsed(getRunningTasks()); - } - - private static int getCurrCapacityUsed(Map tasks) - { - int currCapacity = 0; - for (TaskAnnouncement taskAnnouncement : tasks.values()) { - currCapacity += taskAnnouncement.getTaskResource().getRequiredCapacity(); - } - return currCapacity; - } - - @JsonProperty("currParallelIndexCapacityUsed") - public int getCurrParallelIndexCapacityUsed() - { - return getCurrParallelIndexCapacityUsed(getRunningTasks()); - } - - private int getCurrParallelIndexCapacityUsed(Map tasks) - { - int currParallelIndexCapacityUsed = 0; - for (TaskAnnouncement taskAnnouncement : tasks.values()) { - if (taskAnnouncement.getTaskType().equals(ParallelIndexSupervisorTask.TYPE)) { - currParallelIndexCapacityUsed += taskAnnouncement.getTaskResource().getRequiredCapacity(); - } - } - return currParallelIndexCapacityUsed; - } - - @JsonProperty("availabilityGroups") - public Set getAvailabilityGroups() - { - return getAvailabilityGroups(getRunningTasks()); - } - - private static Set getAvailabilityGroups(Map tasks) - { - Set retVal = new HashSet<>(); - for (TaskAnnouncement taskAnnouncement : tasks.values()) { - retVal.add(taskAnnouncement.getTaskResource().getAvailabilityGroup()); - } - return retVal; - } - - @JsonProperty - public DateTime getLastCompletedTaskTime() - { - return lastCompletedTaskTime.get(); - } - - @JsonProperty - public DateTime getBlacklistedUntil() - { - return blacklistedUntil.get(); - } - - public boolean isRunningTask(String taskId) - { - return statusCache.getCurrentData() - .stream() - .map(taskIdExtractor) - .anyMatch((String s) -> taskId.equals(s)); - } - - @UsedInGeneratedCode // See JavaScriptWorkerSelectStrategyTest - public boolean isValidVersion(String minVersion) - { - final Worker w = worker.get(); - return !w.isDisabled() && w.getVersion().compareTo(minVersion) >= 0; - } - - public void setWorker(Worker newWorker) - { - final Worker oldWorker = worker.get(); - Preconditions.checkArgument(newWorker.getHost().equals(oldWorker.getHost()), "Cannot change Worker host"); - Preconditions.checkArgument(newWorker.getIp().equals(oldWorker.getIp()), "Cannot change Worker ip"); - - worker.set(newWorker); - } - - public void setLastCompletedTaskTime(DateTime completedTaskTime) - { - lastCompletedTaskTime.set(completedTaskTime); - } - - public void setBlacklistedUntil(DateTime blacklistedUntil) - { - this.blacklistedUntil.set(blacklistedUntil); - } - - public ImmutableWorkerInfo toImmutable() - { - return ImmutableWorkerInfo.fromWorkerAnnouncements( - worker.get(), - getRunningTasks(), - lastCompletedTaskTime.get(), - blacklistedUntil.get() - ); - } - - @Override - public void close() throws IOException - { - statusCache.close(); - } - - public int getContinuouslyFailedTasksCount() - { - return continuouslyFailedTasksCount.get(); - } - - public void resetContinuouslyFailedTasksCount() - { - this.continuouslyFailedTasksCount.set(0); - } - - public void incrementContinuouslyFailedTasksCount() - { - this.continuouslyFailedTasksCount.incrementAndGet(); - } - - @Override - public String toString() - { - return "ZkWorker{" + - "worker=" + worker + - ", lastCompletedTaskTime=" + lastCompletedTaskTime + - ", blacklistedUntil=" + blacklistedUntil + - '}'; - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/HttpRemoteTaskRunnerConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/HttpRemoteTaskRunnerConfig.java index bc0ba7f81c70..acb288f13264 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/HttpRemoteTaskRunnerConfig.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/HttpRemoteTaskRunnerConfig.java @@ -23,10 +23,45 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.joda.time.Period; +import javax.validation.constraints.Max; +import javax.validation.constraints.Min; +import javax.validation.constraints.NotNull; + /** */ -public class HttpRemoteTaskRunnerConfig extends RemoteTaskRunnerConfig +public class HttpRemoteTaskRunnerConfig extends WorkerTaskRunnerConfig { + // This default value is kept to take MM restart into consideration just in case it was + // restarted right after task assignment. + @JsonProperty + @NotNull + private Period taskAssignmentTimeout = new Period("PT5M"); + + @JsonProperty + @NotNull + private Period taskCleanupTimeout = new Period("PT15M"); + + @JsonProperty + @Min(1) + private int pendingTasksRunnerNumThreads = 1; + + @JsonProperty + @Min(1) + private int maxRetriesBeforeBlacklist = 5; + + @JsonProperty + @NotNull + private Period workerBlackListBackoffTime = new Period("PT15M"); + + @JsonProperty + @NotNull + private Period workerBlackListCleanupPeriod = new Period("PT5M"); + + @JsonProperty + @Max(100) + @Min(0) + private int maxPercentageBlacklistWorkers = 20; + @JsonProperty private int workerSyncNumThreads = 5; @@ -48,6 +83,46 @@ public class HttpRemoteTaskRunnerConfig extends RemoteTaskRunnerConfig @JsonProperty private Period serverUnstabilityTimeout = new Period("PT1M"); + public Period getTaskAssignmentTimeout() + { + return taskAssignmentTimeout; + } + + public Period getTaskCleanupTimeout() + { + return taskCleanupTimeout; + } + + public int getPendingTasksRunnerNumThreads() + { + return pendingTasksRunnerNumThreads; + } + + public int getMaxRetriesBeforeBlacklist() + { + return maxRetriesBeforeBlacklist; + } + + public Period getWorkerBlackListBackoffTime() + { + return workerBlackListBackoffTime; + } + + public Period getWorkerBlackListCleanupPeriod() + { + return workerBlackListCleanupPeriod; + } + + public int getMaxPercentageBlacklistWorkers() + { + return maxPercentageBlacklistWorkers; + } + + public void setMaxPercentageBlacklistWorkers(int maxPercentageBlacklistWorkers) + { + this.maxPercentageBlacklistWorkers = maxPercentageBlacklistWorkers; + } + public int getWorkerSyncNumThreads() { return workerSyncNumThreads; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfig.java deleted file mode 100644 index 9cd90167813a..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfig.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord.config; - -import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.druid.curator.CuratorUtils; -import org.apache.druid.java.util.common.HumanReadableBytes; -import org.apache.druid.java.util.common.HumanReadableBytesRange; -import org.joda.time.Period; - -import javax.validation.constraints.Max; -import javax.validation.constraints.Min; -import javax.validation.constraints.NotNull; - -/** - */ -public class RemoteTaskRunnerConfig extends WorkerTaskRunnerConfig -{ - // This default value is kept to take MM restart into consideration just in case it was - // restarted right after task assignment. - @JsonProperty - @NotNull - private Period taskAssignmentTimeout = new Period("PT5M"); - - @JsonProperty - @NotNull - private Period taskCleanupTimeout = new Period("PT15M"); - - @JsonProperty - @HumanReadableBytesRange(min = 10 * 1024, - max = Integer.MAX_VALUE, - message = "maxZnodeBytes must be in the range of [10KiB, 2GiB)" - ) - private HumanReadableBytes maxZnodeBytes = HumanReadableBytes.valueOf(CuratorUtils.DEFAULT_MAX_ZNODE_BYTES); - - @JsonProperty - private Period taskShutdownLinkTimeout = new Period("PT1M"); - - @JsonProperty - @Min(1) - private int pendingTasksRunnerNumThreads = 1; - - @JsonProperty - @Min(1) - private int maxRetriesBeforeBlacklist = 5; - - @JsonProperty - @NotNull - private Period workerBlackListBackoffTime = new Period("PT15M"); - - @JsonProperty - @NotNull - private Period workerBlackListCleanupPeriod = new Period("PT5M"); - - @JsonProperty - @Max(100) - @Min(0) - private int maxPercentageBlacklistWorkers = 20; - - public Period getTaskAssignmentTimeout() - { - return taskAssignmentTimeout; - } - - public Period getTaskCleanupTimeout() - { - return taskCleanupTimeout; - } - - public int getMaxZnodeBytes() - { - return maxZnodeBytes.getBytesInInt(); - } - - public Period getTaskShutdownLinkTimeout() - { - return taskShutdownLinkTimeout; - } - - public int getPendingTasksRunnerNumThreads() - { - return pendingTasksRunnerNumThreads; - } - - public int getMaxRetriesBeforeBlacklist() - { - return maxRetriesBeforeBlacklist; - } - - public Period getWorkerBlackListBackoffTime() - { - return workerBlackListBackoffTime; - } - - public Period getWorkerBlackListCleanupPeriod() - { - return workerBlackListCleanupPeriod; - } - - public int getMaxPercentageBlacklistWorkers() - { - return maxPercentageBlacklistWorkers; - } - - public void setMaxPercentageBlacklistWorkers(int maxPercentageBlacklistWorkers) - { - this.maxPercentageBlacklistWorkers = maxPercentageBlacklistWorkers; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - RemoteTaskRunnerConfig that = (RemoteTaskRunnerConfig) o; - - if (!maxZnodeBytes.equals(that.maxZnodeBytes)) { - return false; - } - if (pendingTasksRunnerNumThreads != that.pendingTasksRunnerNumThreads) { - return false; - } - if (!taskAssignmentTimeout.equals(that.taskAssignmentTimeout)) { - return false; - } - if (!taskCleanupTimeout.equals(that.taskCleanupTimeout)) { - return false; - } - if (!getMinWorkerVersion().equals(that.getMinWorkerVersion())) { - return false; - } - if (!taskShutdownLinkTimeout.equals(that.taskShutdownLinkTimeout)) { - return false; - } - if (maxRetriesBeforeBlacklist != that.maxRetriesBeforeBlacklist) { - return false; - } - if (!workerBlackListBackoffTime.equals(that.getWorkerBlackListBackoffTime())) { - return false; - } - if (maxPercentageBlacklistWorkers != that.maxPercentageBlacklistWorkers) { - return false; - } - return workerBlackListCleanupPeriod.equals(that.workerBlackListCleanupPeriod); - - } - - @Override - public int hashCode() - { - int result = taskAssignmentTimeout.hashCode(); - result = 31 * result + taskCleanupTimeout.hashCode(); - result = 31 * result + getMinWorkerVersion().hashCode(); - result = 31 * result + maxZnodeBytes.hashCode(); - result = 31 * result + taskShutdownLinkTimeout.hashCode(); - result = 31 * result + pendingTasksRunnerNumThreads; - result = 31 * result + maxRetriesBeforeBlacklist; - result = 31 * result + workerBlackListBackoffTime.hashCode(); - result = 31 * result + workerBlackListCleanupPeriod.hashCode(); - result = 31 * result + maxPercentageBlacklistWorkers; - return result; - } - - @Override - public String toString() - { - return "RemoteTaskRunnerConfig{" + - "taskAssignmentTimeout=" + taskAssignmentTimeout + - ", taskCleanupTimeout=" + taskCleanupTimeout + - ", minWorkerVersion='" + getMinWorkerVersion() + '\'' + - ", maxZnodeBytes=" + maxZnodeBytes + - ", taskShutdownLinkTimeout=" + taskShutdownLinkTimeout + - ", pendingTasksRunnerNumThreads=" + pendingTasksRunnerNumThreads + - ", maxRetriesBeforeBlacklist=" + maxRetriesBeforeBlacklist + - ", taskBlackListBackoffTimeMillis=" + workerBlackListBackoffTime + - ", taskBlackListCleanupPeriod=" + workerBlackListCleanupPeriod + - ", maxPercentageBlacklistWorkers= " + maxPercentageBlacklistWorkers + - '}'; - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunner.java index 53bd0dbbf940..dcf4bdf266ec 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunner.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; import com.google.common.base.Objects; import com.google.common.base.Optional; import com.google.common.base.Preconditions; @@ -41,7 +40,6 @@ import com.google.common.util.concurrent.ListeningScheduledExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.errorprone.annotations.concurrent.GuardedBy; -import org.apache.curator.framework.CuratorFramework; import org.apache.druid.concurrent.LifecycleLock; import org.apache.druid.discovery.DiscoveryDruidNode; import org.apache.druid.discovery.DruidNodeDiscovery; @@ -86,9 +84,7 @@ import org.apache.druid.java.util.http.client.Request; import org.apache.druid.java.util.http.client.response.InputStreamResponseHandler; import org.apache.druid.query.DruidMetrics; -import org.apache.druid.server.initialization.IndexerZkConfig; import org.apache.druid.tasklogs.TaskLogStreamer; -import org.apache.zookeeper.KeeperException; import org.jboss.netty.handler.codec.http.HttpMethod; import org.joda.time.Duration; import org.joda.time.Period; @@ -126,10 +122,6 @@ * 3. GET request for getting list of assigned, running, completed tasks on Middle Manager and its enable/disable status. * This endpoint is implemented to support long poll and holds the request till there is a change. This class * sends the next request immediately as the previous finishes to keep the state up-to-date. - *

- * ZK_CLEANUP_TODO : As of 0.11.1, it is required to cleanup task status paths from ZK which are created by the - * workers to support deprecated RemoteTaskRunner. So a method "scheduleCompletedTaskStatusCleanupFromZk()" is added' - * which should be removed in the release that removes RemoteTaskRunner legacy ZK updation WorkerTaskMonitor class. */ public class HttpRemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer, WorkerHolder.Listener { @@ -194,15 +186,6 @@ public class HttpRemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer, private final TaskStorage taskStorage; private final ServiceEmitter emitter; - // ZK_CLEANUP_TODO : Remove these when RemoteTaskRunner and WorkerTaskMonitor are removed. - private static final Joiner JOINER = Joiner.on("/"); - - @Nullable // Null, if zk is disabled - private final CuratorFramework cf; - - @Nullable // Null, if zk is disabled - private final ScheduledExecutorService zkCleanupExec; - private final IndexerZkConfig indexerZkConfig; private volatile DruidNodeDiscovery.Listener nodeDiscoveryListener; public HttpRemoteTaskRunner( @@ -213,8 +196,6 @@ public HttpRemoteTaskRunner( ProvisioningStrategy provisioningStrategy, DruidNodeDiscoveryProvider druidNodeDiscoveryProvider, TaskStorage taskStorage, - @Nullable CuratorFramework cf, - IndexerZkConfig indexerZkConfig, ServiceEmitter emitter ) { @@ -240,19 +221,6 @@ public HttpRemoteTaskRunner( ScheduledExecutors.fixed(1, "HttpRemoteTaskRunner-Worker-Cleanup-%d") ); - if (cf != null) { - this.cf = cf; - this.zkCleanupExec = ScheduledExecutors.fixed( - 1, - "HttpRemoteTaskRunner-zk-cleanup-%d" - ); - } else { - this.cf = null; - this.zkCleanupExec = null; - } - - this.indexerZkConfig = indexerZkConfig; - this.provisioningStrategy = provisioningStrategy; } @@ -267,8 +235,6 @@ public void start() try { log.info("Starting..."); - scheduleCompletedTaskStatusCleanupFromZk(); - startWorkersHandling(); ScheduledExecutors.scheduleAtFixedRate( @@ -296,68 +262,6 @@ public void start() } } - private void scheduleCompletedTaskStatusCleanupFromZk() - { - if (cf == null) { - return; - } - - zkCleanupExec.scheduleAtFixedRate( - () -> { - try { - List workers; - try { - workers = cf.getChildren().forPath(indexerZkConfig.getStatusPath()); - } - catch (KeeperException.NoNodeException e) { - // statusPath doesn't exist yet; can occur if no middleManagers have started. - workers = ImmutableList.of(); - } - - Set knownActiveTaskIds = new HashSet<>(); - if (!workers.isEmpty()) { - for (Task task : taskStorage.getActiveTasks()) { - knownActiveTaskIds.add(task.getId()); - } - } - - for (String workerId : workers) { - String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId); - - List taskIds; - try { - taskIds = cf.getChildren().forPath(workerStatusPath); - } - catch (KeeperException.NoNodeException e) { - taskIds = ImmutableList.of(); - } - - for (String taskId : taskIds) { - if (!knownActiveTaskIds.contains(taskId)) { - String taskStatusPath = JOINER.join(workerStatusPath, taskId); - try { - cf.delete().guaranteed().forPath(taskStatusPath); - } - catch (KeeperException.NoNodeException e) { - log.info("Failed to delete taskStatusPath[%s].", taskStatusPath); - } - } - } - } - } - catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - } - catch (Exception ex) { - log.error(ex, "Unknown error while doing task status cleanup in ZK."); - } - }, - 1, - 5, - TimeUnit.MINUTES - ); - } - /** * Must not be used outside of this class and {@link HttpRemoteTaskRunnerResource} */ diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerFactory.java index 3e0fddc1c0e8..fd1b53ffa3c2 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerFactory.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerFactory.java @@ -22,9 +22,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Supplier; import com.google.inject.Inject; -import com.google.inject.Provider; -import org.apache.curator.framework.CuratorFramework; -import org.apache.druid.curator.ZkEnablementConfig; import org.apache.druid.discovery.DruidNodeDiscoveryProvider; import org.apache.druid.guice.annotations.EscalatedGlobal; import org.apache.druid.guice.annotations.Smile; @@ -37,9 +34,6 @@ import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.http.client.HttpClient; -import org.apache.druid.server.initialization.IndexerZkConfig; - -import javax.annotation.Nullable; /** */ @@ -58,11 +52,6 @@ public class HttpRemoteTaskRunnerFactory implements TaskRunnerFactory cfProvider, - final IndexerZkConfig indexerZkConfig, - final ZkEnablementConfig zkEnablementConfig, final ServiceEmitter emitter ) { @@ -87,14 +73,7 @@ public HttpRemoteTaskRunnerFactory( this.provisioningStrategy = provisioningStrategy; this.druidNodeDiscoveryProvider = druidNodeDiscoveryProvider; this.taskStorage = taskStorage; - this.indexerZkConfig = indexerZkConfig; this.emitter = emitter; - - if (zkEnablementConfig.isEnabled()) { - this.cf = cfProvider.get(); - } else { - this.cf = null; - } } @Override @@ -108,8 +87,6 @@ public HttpRemoteTaskRunner build() provisioningSchedulerConfig.isDoAutoscale() ? provisioningStrategy : new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, taskStorage, - cf, - indexerZkConfig, emitter ); return runner; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/setup/WorkerSelectStrategy.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/setup/WorkerSelectStrategy.java index a3443ee73583..8fff4bb7e7be 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/setup/WorkerSelectStrategy.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/setup/WorkerSelectStrategy.java @@ -30,7 +30,8 @@ import javax.annotation.Nullable; /** - * The {@link org.apache.druid.indexing.overlord.RemoteTaskRunner} uses this class to select a worker to assign tasks to. + * The {@link org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner} uses this class to select a worker to assign + * tasks to. */ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = EqualDistributionWorkerSelectStrategy.class) @JsonSubTypes(value = { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerCuratorCoordinator.java b/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerCuratorCoordinator.java deleted file mode 100644 index c018c6a1a63b..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerCuratorCoordinator.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.worker; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableMap; -import com.google.inject.Inject; -import org.apache.curator.framework.CuratorFramework; -import org.apache.druid.curator.CuratorUtils; -import org.apache.druid.curator.announcement.ServiceAnnouncer; -import org.apache.druid.guice.annotations.DirectExecutorAnnouncer; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.lifecycle.LifecycleStart; -import org.apache.druid.java.util.common.lifecycle.LifecycleStop; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -/** - * The CuratorCoordinator provides methods to use Curator. Persistent ZK paths are created on {@link #start()}. - */ -public class WorkerCuratorCoordinator -{ - private static final Logger log = new Logger(WorkerCuratorCoordinator.class); - private static final Joiner JOINER = Joiner.on("/"); - - private final Object lock = new Object(); - - private final ObjectMapper jsonMapper; - private final RemoteTaskRunnerConfig config; - private final CuratorFramework curatorFramework; - private final ServiceAnnouncer announcer; - - private final String baseAnnouncementsPath; - private final String baseTaskPath; - private final String baseStatusPath; - - private volatile Worker worker; - private volatile boolean started; - - @Inject - public WorkerCuratorCoordinator( - ObjectMapper jsonMapper, - IndexerZkConfig indexerZkConfig, - RemoteTaskRunnerConfig config, - CuratorFramework curatorFramework, - @DirectExecutorAnnouncer ServiceAnnouncer announcer, - Worker worker - ) - { - this.jsonMapper = jsonMapper; - this.config = config; - this.curatorFramework = curatorFramework; - this.worker = worker; - this.announcer = announcer; - - this.baseAnnouncementsPath = getPath(Arrays.asList(indexerZkConfig.getAnnouncementsPath(), worker.getHost())); - this.baseTaskPath = getPath(Arrays.asList(indexerZkConfig.getTasksPath(), worker.getHost())); - this.baseStatusPath = getPath(Arrays.asList(indexerZkConfig.getStatusPath(), worker.getHost())); - } - - @LifecycleStart - public void start() throws Exception - { - log.info("WorkerCuratorCoordinator good to go. Server[%s]", worker.getHost()); - synchronized (lock) { - if (started) { - return; - } - - CuratorUtils.createIfNotExists( - curatorFramework, - getTaskPathForWorker(), - CreateMode.PERSISTENT, - jsonMapper.writeValueAsBytes(ImmutableMap.of("created", DateTimes.nowUtc().toString())), - config.getMaxZnodeBytes() - ); - - CuratorUtils.createIfNotExists( - curatorFramework, - getStatusPathForWorker(), - CreateMode.PERSISTENT, - jsonMapper.writeValueAsBytes(ImmutableMap.of("created", DateTimes.nowUtc().toString())), - config.getMaxZnodeBytes() - ); - - announcer.start(); - announcer.announce(getAnnouncementsPathForWorker(), jsonMapper.writeValueAsBytes(worker), false); - - started = true; - } - } - - @LifecycleStop - public void stop() - { - log.info("Stopping WorkerCuratorCoordinator for worker[%s]", worker.getHost()); - synchronized (lock) { - if (!started) { - return; - } - announcer.stop(); - - started = false; - } - } - - public String getPath(Iterable parts) - { - return JOINER.join(parts); - } - - public String getAnnouncementsPathForWorker() - { - return baseAnnouncementsPath; - } - - public String getTaskPathForWorker() - { - return baseTaskPath; - } - - public String getTaskPathForId(String taskId) - { - return getPath(Arrays.asList(baseTaskPath, taskId)); - } - - public String getStatusPathForWorker() - { - return baseStatusPath; - } - - public String getStatusPathForId(String statusId) - { - return getPath(Arrays.asList(baseStatusPath, statusId)); - } - - public Worker getWorker() - { - return worker; - } - - public void removeTaskRunZnode(String taskId) throws Exception - { - try { - curatorFramework.delete().guaranteed().forPath(getTaskPathForId(taskId)); - } - catch (KeeperException e) { - log.debug( - e, - "Could not delete task path for task[%s]. This is not an error if httpRemote taskRunner is being used at overlord.", - taskId - ); - } - } - - public void updateTaskStatusAnnouncement(TaskAnnouncement announcement) throws Exception - { - synchronized (lock) { - if (!started) { - return; - } - - CuratorUtils.createOrSet( - curatorFramework, - getStatusPathForId(announcement.getTaskStatus().getId()), - CreateMode.PERSISTENT, - jsonMapper.writeValueAsBytes(announcement), - config.getMaxZnodeBytes() - ); - } - } - - public List getAnnouncements() throws Exception - { - final List announcements = new ArrayList<>(); - - for (String id : curatorFramework.getChildren().forPath(getStatusPathForWorker())) { - announcements.add( - jsonMapper.readValue( - curatorFramework.getData().forPath(getStatusPathForId(id)), - TaskAnnouncement.class - ) - ); - } - - return announcements; - } - - public void updateWorkerAnnouncement(Worker newWorker) throws Exception - { - synchronized (lock) { - if (!started) { - throw new ISE("Cannot update worker! Not Started!"); - } - - this.worker = newWorker; - announcer.update(getAnnouncementsPathForWorker(), jsonMapper.writeValueAsBytes(newWorker)); - } - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskManager.java b/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskManager.java index 7379e34293f1..34ff1757cf5d 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskManager.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskManager.java @@ -96,10 +96,8 @@ public class WorkerTaskManager implements IndexerTaskCountStatsProvider private final ConcurrentMap assignedTasks = new ConcurrentHashMap<>(); - // ZK_CLEANUP_TODO : these are marked protected to be used in subclass WorkerTaskMonitor that updates ZK. - // should be marked private alongwith WorkerTaskMonitor removal. - protected final ConcurrentMap runningTasks = new ConcurrentHashMap<>(); - protected final ConcurrentMap completedTasks = new ConcurrentHashMap<>(); + private final ConcurrentMap runningTasks = new ConcurrentHashMap<>(); + private final ConcurrentMap completedTasks = new ConcurrentHashMap<>(); private final ChangeRequestHistory changeHistory = new ChangeRequestHistory<>(); @@ -782,8 +780,6 @@ public void handle() "Got run notice for task [%s] that I am already running or completed...", task.getId() ); - - taskStarted(task.getId()); return; } @@ -801,9 +797,6 @@ public void handle() cleanupAssignedTask(task); log.info("Task[%s] started.", task.getId()); } - - taskAnnouncementChanged(announcement); - taskStarted(task.getId()); } } @@ -855,7 +848,6 @@ public void handle() moveFromRunningToCompleted(task.getId(), latest); changeHistory.addChangeRequest(new WorkerHistoryItem.TaskUpdate(latest)); - taskAnnouncementChanged(latest); log.info( "Task [%s] completed with status [%s].", task.getId(), @@ -903,24 +895,8 @@ public void handle() ); changeHistory.addChangeRequest(new WorkerHistoryItem.TaskUpdate(latest)); - taskAnnouncementChanged(latest); } } } } - - // ZK_CLEANUP_TODO : - //Note: Following abstract methods exist only to support WorkerTaskMonitor that - //watches task assignments and updates task statuses inside Zookeeper. When the transition to HTTP is complete - //in Overlord as well as MiddleManagers then WorkerTaskMonitor should be deleted, this class should no longer be abstract - //and the methods below should be removed. - protected void taskStarted(String taskId) - { - - } - - protected void taskAnnouncementChanged(TaskAnnouncement announcement) - { - - } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskMonitor.java b/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskMonitor.java deleted file mode 100644 index c8537997b588..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/indexing/worker/WorkerTaskMonitor.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.worker; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; -import com.google.inject.Inject; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.recipes.cache.PathChildrenCache; -import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; -import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; -import org.apache.druid.curator.CuratorUtils; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.config.TaskConfig; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.overlord.TaskRunner; -import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.java.util.common.lifecycle.LifecycleStart; -import org.apache.druid.java.util.common.lifecycle.LifecycleStop; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.rpc.indexing.OverlordClient; - -/** - * This class is deprecated and required only to support {@link org.apache.druid.indexing.overlord.RemoteTaskRunner}. - * {@link org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner} should be used instead. - * - * The monitor watches ZK at a specified path for new tasks to appear. Upon starting the monitor, a listener will be - * created that waits for new tasks. Tasks are executed as soon as they are seen. - */ -@Deprecated -public class WorkerTaskMonitor extends WorkerTaskManager -{ - private static final EmittingLogger log = new EmittingLogger(WorkerTaskMonitor.class); - - private final ObjectMapper jsonMapper; - private final PathChildrenCache pathChildrenCache; - private final CuratorFramework cf; - private final WorkerCuratorCoordinator workerCuratorCoordinator; - - private final Object lifecycleLock = new Object(); - private volatile boolean started = false; - - @Inject - public WorkerTaskMonitor( - ObjectMapper jsonMapper, - TaskRunner taskRunner, - TaskConfig taskConfig, - WorkerConfig workerConfig, - CuratorFramework cf, - WorkerCuratorCoordinator workerCuratorCoordinator, - OverlordClient overlordClient - ) - { - super(jsonMapper, taskRunner, taskConfig, workerConfig, overlordClient); - - this.jsonMapper = jsonMapper; - this.pathChildrenCache = new PathChildrenCache( - cf, - workerCuratorCoordinator.getTaskPathForWorker(), - false, - true, - Execs.makeThreadFactory("TaskMonitorCache-%s") - ); - this.cf = cf; - this.workerCuratorCoordinator = workerCuratorCoordinator; - } - - /** - * Register a monitor for new tasks. When new tasks appear, the worker node announces a status to indicate it has - * started the task. When the task is complete, the worker node updates the status. - */ - @LifecycleStart - @Override - public void start() throws Exception - { - super.start(); - - synchronized (lifecycleLock) { - Preconditions.checkState(!started, "already started"); - started = true; - - try { - cleanupStaleAnnouncements(); - registerRunListener(); - pathChildrenCache.start(); - - log.debug("Started WorkerTaskMonitor."); - started = true; - } - catch (InterruptedException e) { - throw e; - } - catch (Exception e) { - log.makeAlert(e, "Exception starting WorkerTaskMonitor") - .emit(); - throw e; - } - } - } - - private void cleanupStaleAnnouncements() throws Exception - { - synchronized (lock) { - // cleanup any old running task announcements which are invalid after restart - for (TaskAnnouncement announcement : workerCuratorCoordinator.getAnnouncements()) { - if (announcement.getTaskStatus().isRunnable()) { - TaskStatus completionStatus = null; - TaskAnnouncement completedAnnouncement = completedTasks.get(announcement.getTaskId()); - if (completedAnnouncement != null) { - completionStatus = completedAnnouncement.getTaskStatus(); - } else if (!runningTasks.containsKey(announcement.getTaskStatus().getId())) { - completionStatus = TaskStatus.failure( - announcement.getTaskStatus().getId(), - "Canceled as unknown task. See middleManager or indexer logs for more details." - ); - } - - if (completionStatus != null) { - log.info( - "Cleaning up stale announcement for task [%s]. New status is [%s].", - announcement.getTaskStatus().getId(), - completionStatus.getStatusCode() - ); - workerCuratorCoordinator.updateTaskStatusAnnouncement( - TaskAnnouncement.create( - announcement.getTaskStatus().getId(), - announcement.getTaskType(), - announcement.getTaskResource(), - completionStatus, - TaskLocation.unknown(), - announcement.getTaskDataSource() - ) - ); - } - } - } - } - } - - private void registerRunListener() - { - pathChildrenCache.getListenable().addListener( - new PathChildrenCacheListener() - { - @Override - public void childEvent(CuratorFramework curatorFramework, PathChildrenCacheEvent event) - throws Exception - { - if (CuratorUtils.isChildAdded(event)) { - final Task task = jsonMapper.readValue( - cf.getData().forPath(event.getData().getPath()), - Task.class - ); - - assignTask(task); - } - } - } - ); - } - - @LifecycleStop - @Override - public void stop() throws Exception - { - super.stop(); - - synchronized (lifecycleLock) { - Preconditions.checkState(started, "not started"); - - try { - started = false; - pathChildrenCache.close(); - - log.debug("Stopped WorkerTaskMonitor."); - } - catch (Exception e) { - log.makeAlert(e, "Exception stopping WorkerTaskMonitor") - .emit(); - } - } - } - - @Override - protected void taskStarted(String taskId) - { - try { - workerCuratorCoordinator.removeTaskRunZnode(taskId); - } - catch (Exception ex) { - log.error(ex, "Unknown exception while deleting task[%s] znode.", taskId); - } - } - - @Override - protected void taskAnnouncementChanged(TaskAnnouncement announcement) - { - try { - workerCuratorCoordinator.updateTaskStatusAnnouncement(announcement); - } - catch (Exception ex) { - log.makeAlert(ex, "Failed to update task announcement") - .addData("task", announcement.getTaskId()) - .emit(); - } - } -} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/worker/http/WorkerResource.java b/indexing-service/src/main/java/org/apache/druid/indexing/worker/http/WorkerResource.java index 528c230857e4..5d81b14f2ee1 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/worker/http/WorkerResource.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/worker/http/WorkerResource.java @@ -25,14 +25,11 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.inject.Inject; -import com.google.inject.Provider; import com.sun.jersey.spi.container.ResourceFilters; import org.apache.druid.common.utils.IdUtils; -import org.apache.druid.curator.ZkEnablementConfig; import org.apache.druid.indexing.overlord.TaskRunner; import org.apache.druid.indexing.overlord.TaskRunnerWorkItem; import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.indexing.worker.WorkerCuratorCoordinator; import org.apache.druid.indexing.worker.WorkerTaskManager; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; @@ -41,7 +38,6 @@ import org.apache.druid.server.http.security.StateResourceFilter; import org.apache.druid.tasklogs.TaskLogStreamer; -import javax.annotation.Nullable; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -60,35 +56,21 @@ public class WorkerResource { private static final Logger log = new Logger(WorkerResource.class); - private static String DISABLED_VERSION = ""; private final Worker enabledWorker; - - @Nullable // Null, if zk is disabled - private final WorkerCuratorCoordinator curatorCoordinator; - private final TaskRunner taskRunner; private final WorkerTaskManager workerTaskManager; @Inject public WorkerResource( Worker worker, - Provider curatorCoordinatorProvider, TaskRunner taskRunner, - WorkerTaskManager workerTaskManager, - ZkEnablementConfig zkEnablementConfig - + WorkerTaskManager workerTaskManager ) { this.enabledWorker = worker; this.taskRunner = taskRunner; this.workerTaskManager = workerTaskManager; - - if (zkEnablementConfig.isEnabled()) { - this.curatorCoordinator = curatorCoordinatorProvider.get(); - } else { - this.curatorCoordinator = null; - } } @@ -99,20 +81,6 @@ public WorkerResource( public Response doDisable() { try { - if (curatorCoordinator != null) { - // Dual-write disabled signal: legacy version="" for old overlords + disabled=true for new overlords. - // TODO: Safe to drop DISABLED_VERSION once backward compatibility with overlords is no longer required. - final Worker disabledWorker = new Worker( - enabledWorker.getScheme(), - enabledWorker.getHost(), - enabledWorker.getIp(), - enabledWorker.getCapacity(), - DISABLED_VERSION, - enabledWorker.getCategory(), - true - ); - curatorCoordinator.updateWorkerAnnouncement(disabledWorker); - } workerTaskManager.workerDisabled(); return Response.ok(ImmutableMap.of(enabledWorker.getHost(), "disabled")).build(); } @@ -128,9 +96,6 @@ public Response doDisable() public Response doEnable() { try { - if (curatorCoordinator != null) { - curatorCoordinator.updateWorkerAnnouncement(enabledWorker); - } workerTaskManager.workerEnabled(); return Response.ok(ImmutableMap.of(enabledWorker.getHost(), "enabled")).build(); } diff --git a/indexing-service/src/main/java/org/apache/druid/server/initialization/IndexerZkConfig.java b/indexing-service/src/main/java/org/apache/druid/server/initialization/IndexerZkConfig.java deleted file mode 100644 index 41f245e106ff..000000000000 --- a/indexing-service/src/main/java/org/apache/druid/server/initialization/IndexerZkConfig.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.server.initialization; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.curator.utils.ZKPaths; - -/** - * - */ -public class IndexerZkConfig -{ - @JsonCreator - public IndexerZkConfig( - @JacksonInject ZkPathsConfig zkPathsConfig, - @JsonProperty("base") String base, - @JsonProperty("announcementsPath") String announcementsPath, - @JsonProperty("tasksPath") String tasksPath, - @JsonProperty("statusPath") String statusPath - ) - { - this.zkPathsConfig = zkPathsConfig; - this.base = base; - this.announcementsPath = announcementsPath; - this.tasksPath = tasksPath; - this.statusPath = statusPath; - } - - @JacksonInject - private final ZkPathsConfig zkPathsConfig; - - @JsonProperty - private final String base; - - @JsonProperty - private final String announcementsPath; - - @JsonProperty - private final String tasksPath; - - @JsonProperty - private final String statusPath; - - private String defaultIndexerPath(final String subPath) - { - return ZKPaths.makePath(getBase(), subPath); - } - - public String getBase() - { - return base == null ? getZkPathsConfig().defaultPath("indexer") : base; - } - - public String getAnnouncementsPath() - { - return announcementsPath == null ? defaultIndexerPath("announcements") : announcementsPath; - } - - public String getTasksPath() - { - return tasksPath == null ? defaultIndexerPath("tasks") : tasksPath; - } - - public String getStatusPath() - { - return statusPath == null ? defaultIndexerPath("status") : statusPath; - } - - public ZkPathsConfig getZkPathsConfig() - { - return zkPathsConfig; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - IndexerZkConfig that = (IndexerZkConfig) o; - - if (announcementsPath != null - ? !announcementsPath.equals(that.announcementsPath) - : that.announcementsPath != null) { - return false; - } - if (base != null ? !base.equals(that.base) : that.base != null) { - return false; - } - if (statusPath != null ? !statusPath.equals(that.statusPath) : that.statusPath != null) { - return false; - } - if (tasksPath != null ? !tasksPath.equals(that.tasksPath) : that.tasksPath != null) { - return false; - } - if (zkPathsConfig != null ? !zkPathsConfig.equals(that.zkPathsConfig) : that.zkPathsConfig != null) { - return false; - } - - return true; - } - - @Override - public int hashCode() - { - int result = zkPathsConfig != null ? zkPathsConfig.hashCode() : 0; - result = 31 * result + (base != null ? base.hashCode() : 0); - result = 31 * result + (announcementsPath != null ? announcementsPath.hashCode() : 0); - result = 31 * result + (tasksPath != null ? tasksPath.hashCode() : 0); - result = 31 * result + (statusPath != null ? statusPath.hashCode() : 0); - return result; - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/OverlordBlinkLeadershipTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/OverlordBlinkLeadershipTest.java deleted file mode 100644 index af3b81977998..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/OverlordBlinkLeadershipTest.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.google.common.base.Supplier; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningSchedulerConfig; -import org.apache.druid.indexing.overlord.autoscaling.SimpleWorkerProvisioningConfig; -import org.apache.druid.indexing.overlord.autoscaling.SimpleWorkerProvisioningStrategy; -import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; -import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; -import org.joda.time.Period; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -public class OverlordBlinkLeadershipTest -{ - private RemoteTaskRunnerTestUtils rtrUtils; - private final TestRemoteTaskRunnerConfig remoteTaskRunnerConfig = new TestRemoteTaskRunnerConfig(new Period("PT5M")); - private final DefaultWorkerBehaviorConfig defaultWorkerBehaviourConfig = DefaultWorkerBehaviorConfig.defaultConfig(); - private final Supplier workerBehaviorConfigSupplier = new Supplier<>() - { - @Override - public DefaultWorkerBehaviorConfig get() - { - return defaultWorkerBehaviourConfig; - } - }; - private final SimpleWorkerProvisioningStrategy resourceManagement = new SimpleWorkerProvisioningStrategy( - new SimpleWorkerProvisioningConfig(), - workerBehaviorConfigSupplier, - new ProvisioningSchedulerConfig() - ); - - @Before - public void setUp() throws Exception - { - rtrUtils = new RemoteTaskRunnerTestUtils(); - rtrUtils.setUp(); - } - - @After - public void tearDown() throws Exception - { - rtrUtils.tearDown(); - } - - /** - * Test that we can start taskRunner, then stop it (emulating "losing leadership", see {@link - * TaskMaster#stopBeingLeader()}), then creating a new taskRunner from {@link - * org.apache.curator.framework.recipes.leader.LeaderSelectorListener#takeLeadership} implementation in - * {@link TaskMaster} and start it again. - */ - @Test(timeout = 60_000L) - public void testOverlordBlinkLeadership() - { - try { - RemoteTaskRunner remoteTaskRunner1 = rtrUtils.makeRemoteTaskRunner( - remoteTaskRunnerConfig, - resourceManagement, - null - ); - remoteTaskRunner1.stop(); - RemoteTaskRunner remoteTaskRunner2 = rtrUtils.makeRemoteTaskRunner( - remoteTaskRunnerConfig, - resourceManagement, - null - ); - remoteTaskRunner2.stop(); - } - catch (Exception e) { - Assert.fail("Should have not thrown any exceptions, thrown: " + e); - } - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactoryTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactoryTest.java deleted file mode 100644 index f62c86b14c1f..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerFactoryTest.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import org.apache.curator.framework.CuratorFramework; -import org.apache.druid.indexing.overlord.autoscaling.NoopProvisioningStrategy; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningSchedulerConfig; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; -import org.junit.Assert; -import org.junit.Test; -import org.mockito.Mockito; - -public class RemoteTaskRunnerFactoryTest -{ - @Test - public void testBuildWithAutoScale() - { - ProvisioningSchedulerConfig provisioningSchedulerConfig = Mockito.mock(ProvisioningSchedulerConfig.class); - Mockito.when(provisioningSchedulerConfig.isDoAutoscale()).thenReturn(true); - - RemoteTaskRunnerFactory remoteTaskRunnerFactory = getTestRemoteTaskRunnerFactory(provisioningSchedulerConfig); - - Assert.assertNull(remoteTaskRunnerFactory.build().getProvisioningStrategy()); - } - - @Test - public void testBuildWithoutAutoScale() - { - ProvisioningSchedulerConfig provisioningSchedulerConfig = Mockito.mock(ProvisioningSchedulerConfig.class); - Mockito.when(provisioningSchedulerConfig.isDoAutoscale()).thenReturn(false); - - RemoteTaskRunnerFactory remoteTaskRunnerFactory = getTestRemoteTaskRunnerFactory(provisioningSchedulerConfig); - - Assert.assertTrue(remoteTaskRunnerFactory.build().getProvisioningStrategy() instanceof NoopProvisioningStrategy); - } - - private RemoteTaskRunnerFactory getTestRemoteTaskRunnerFactory(ProvisioningSchedulerConfig provisioningSchedulerConfig) - { - CuratorFramework curator = Mockito.mock(CuratorFramework.class); - Mockito.when(curator.newWatcherRemoveCuratorFramework()).thenReturn(null); - return new RemoteTaskRunnerFactory( - curator, - new RemoteTaskRunnerConfig(), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), - null, - null, - null, - provisioningSchedulerConfig, - null, - null - ); - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerRunPendingTasksConcurrencyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerRunPendingTasksConcurrencyTest.java deleted file mode 100644 index 62c18d7bd232..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerRunPendingTasksConcurrencyTest.java +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.google.common.util.concurrent.ListenableFuture; -import org.apache.druid.indexer.TaskState; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.TestTasks; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.java.util.common.ISE; -import org.apache.zookeeper.ZooKeeper; -import org.joda.time.Period; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -/** - */ -public class RemoteTaskRunnerRunPendingTasksConcurrencyTest -{ - private RemoteTaskRunner remoteTaskRunner; - private final RemoteTaskRunnerTestUtils rtrTestUtils = new RemoteTaskRunnerTestUtils(); - - @Before - public void setUp() throws Exception - { - rtrTestUtils.setUp(); - } - - @After - public void tearDown() throws Exception - { - if (remoteTaskRunner != null) { - remoteTaskRunner.stop(); - } - rtrTestUtils.tearDown(); - } - - // This task reproduces the races described in https://github.com/apache/druid/issues/2842 - @Test(timeout = 60_000L) - public void testConcurrency() throws Exception - { - rtrTestUtils.makeWorker("worker0", 3); - rtrTestUtils.makeWorker("worker1", 3); - - remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner( - new TestRemoteTaskRunnerConfig(new Period("PT3600S")) - { - @Override - public int getPendingTasksRunnerNumThreads() - { - return 2; - } - }, - null - ); - - int numTasks = 6; - ListenableFuture[] results = new ListenableFuture[numTasks]; - Task[] tasks = new Task[numTasks]; - - //2 tasks - for (int i = 0; i < 2; i++) { - tasks[i] = TestTasks.unending("task" + i); - results[i] = (remoteTaskRunner.run(tasks[i])); - } - - waitForBothWorkersToHaveUnackedTasks(); - - //3 more tasks, all of which get queued up - for (int i = 2; i < 5; i++) { - tasks[i] = TestTasks.unending("task" + i); - results[i] = (remoteTaskRunner.run(tasks[i])); - } - - //simulate completion of task0 and task1 - mockWorkerRunningAndCompletionSuccessfulTasks(tasks[0], tasks[1]); - - Assert.assertEquals(TaskState.SUCCESS, results[0].get().getStatusCode()); - Assert.assertEquals(TaskState.SUCCESS, results[1].get().getStatusCode()); - - // now both threads race to run the last 3 tasks. task2 and task3 are being assigned - waitForBothWorkersToHaveUnackedTasks(); - - if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[2].getId()) - && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[3].getId())) { - remoteTaskRunner.shutdown("task4", "test"); - mockWorkerRunningAndCompletionSuccessfulTasks(tasks[3], tasks[2]); - Assert.assertEquals(TaskState.SUCCESS, results[3].get().getStatusCode()); - Assert.assertEquals(TaskState.SUCCESS, results[2].get().getStatusCode()); - } else if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[3].getId()) - && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[4].getId())) { - remoteTaskRunner.shutdown("task2", "test"); - mockWorkerRunningAndCompletionSuccessfulTasks(tasks[4], tasks[3]); - Assert.assertEquals(TaskState.SUCCESS, results[4].get().getStatusCode()); - Assert.assertEquals(TaskState.SUCCESS, results[3].get().getStatusCode()); - } else if (remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[4].getId()) - && remoteTaskRunner.getWorkersWithUnacknowledgedTask().containsValue(tasks[2].getId())) { - remoteTaskRunner.shutdown("task3", "test"); - mockWorkerRunningAndCompletionSuccessfulTasks(tasks[4], tasks[2]); - Assert.assertEquals(TaskState.SUCCESS, results[4].get().getStatusCode()); - Assert.assertEquals(TaskState.SUCCESS, results[2].get().getStatusCode()); - } else { - throw new ISE("two out of three tasks 2,3 and 4 must be waiting for ack."); - } - - //ensure that RTR is doing OK and still making progress - tasks[5] = TestTasks.unending("task5"); - results[5] = remoteTaskRunner.run(tasks[5]); - waitForOneWorkerToHaveUnackedTasks(); - if (rtrTestUtils.taskAssigned("worker0", tasks[5].getId())) { - rtrTestUtils.mockWorkerRunningTask("worker0", tasks[5]); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker0", tasks[5]); - } else { - rtrTestUtils.mockWorkerRunningTask("worker1", tasks[5]); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker1", tasks[5]); - } - Assert.assertEquals(TaskState.SUCCESS, results[5].get().getStatusCode()); - } - - private void mockWorkerRunningAndCompletionSuccessfulTasks(Task t1, Task t2) throws Exception - { - if (rtrTestUtils.taskAssigned("worker0", t1.getId())) { - rtrTestUtils.mockWorkerRunningTask("worker0", t1); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker0", t1); - rtrTestUtils.mockWorkerRunningTask("worker1", t2); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker1", t2); - } else { - rtrTestUtils.mockWorkerRunningTask("worker1", t1); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker1", t1); - rtrTestUtils.mockWorkerRunningTask("worker0", t2); - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker0", t2); - } - } - - private void waitForOneWorkerToHaveUnackedTasks() throws Exception - { - while (remoteTaskRunner.getWorkersWithUnacknowledgedTask().size() < 1) { - Thread.sleep(5); - } - - ZooKeeper zk = rtrTestUtils.getCuratorFramework().getZookeeperClient().getZooKeeper(); - while (zk.getChildren(RemoteTaskRunnerTestUtils.TASKS_PATH + "/worker0", false).size() < 1 - && zk.getChildren(RemoteTaskRunnerTestUtils.TASKS_PATH + "/worker1", false).size() < 1) { - Thread.sleep(5); - } - } - - private void waitForBothWorkersToHaveUnackedTasks() throws Exception - { - while (remoteTaskRunner.getWorkersWithUnacknowledgedTask().size() < 2) { - Thread.sleep(5); - } - - ZooKeeper zk = rtrTestUtils.getCuratorFramework().getZookeeperClient().getZooKeeper(); - while (zk.getChildren(RemoteTaskRunnerTestUtils.TASKS_PATH + "/worker0", false).size() < 1 - || zk.getChildren(RemoteTaskRunnerTestUtils.TASKS_PATH + "/worker1", false).size() < 1) { - Thread.sleep(5); - } - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java deleted file mode 100644 index 2a94d4f21ed1..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java +++ /dev/null @@ -1,1303 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Function; -import com.google.common.base.Joiner; -import com.google.common.base.Optional; -import com.google.common.base.Predicate; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.common.io.ByteStreams; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.api.CuratorWatcher; -import org.apache.curator.framework.recipes.cache.PathChildrenCache; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskState; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.IndexingServiceCondition; -import org.apache.druid.indexing.common.TaskLockType; -import org.apache.druid.indexing.common.TestIndexTask; -import org.apache.druid.indexing.common.TestTasks; -import org.apache.druid.indexing.common.TestUtils; -import org.apache.druid.indexing.common.actions.SegmentTransactionalAppendAction; -import org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction; -import org.apache.druid.indexing.common.actions.SegmentTransactionalReplaceAction; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.common.task.TaskResource; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; -import org.apache.druid.indexing.overlord.setup.EqualDistributionWorkerSelectStrategy; -import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.java.util.http.client.HttpClient; -import org.apache.druid.java.util.http.client.Request; -import org.apache.druid.java.util.http.client.response.InputStreamFullResponseHolder; -import org.apache.druid.server.metrics.NoopServiceEmitter; -import org.apache.druid.testing.DeadlockDetectingTimeout; -import org.apache.zookeeper.Watcher; -import org.easymock.Capture; -import org.easymock.EasyMock; -import org.jboss.netty.handler.codec.http.DefaultHttpResponse; -import org.jboss.netty.handler.codec.http.HttpResponseStatus; -import org.jboss.netty.handler.codec.http.HttpVersion; -import org.joda.time.Period; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TestRule; -import org.junit.rules.TestWatcher; -import org.junit.runner.Description; -import org.mockito.Mockito; - -import java.io.ByteArrayOutputStream; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; - -public class RemoteTaskRunnerTest -{ - private static final Logger LOG = new Logger(RemoteTaskRunnerTest.class); - private static final Joiner JOINER = RemoteTaskRunnerTestUtils.JOINER; - private static final String WORKER_HOST = "worker"; - private static final String ANNOUCEMENTS_PATH = JOINER.join( - RemoteTaskRunnerTestUtils.ANNOUNCEMENTS_PATH, - WORKER_HOST - ); - private static final String STATUS_PATH = JOINER.join(RemoteTaskRunnerTestUtils.STATUS_PATH, WORKER_HOST); - - // higher timeout to reduce flakiness on CI pipeline - private static final Period TIMEOUT_PERIOD = Period.millis(30000); - - private RemoteTaskRunner remoteTaskRunner; - private HttpClient httpClient; - private RemoteTaskRunnerTestUtils rtrTestUtils = new RemoteTaskRunnerTestUtils(); - private ObjectMapper jsonMapper; - private CuratorFramework cf; - - private Task task; - private Worker worker; - - @Rule - public TestRule watcher = new TestWatcher() - { - @Override - protected void starting(Description description) - { - LOG.info("Starting test: " + description.getMethodName()); - } - - @Override - protected void finished(Description description) - { - LOG.info("Finishing test: " + description.getMethodName()); - } - }; - - @Rule - public final TestRule timeout = new DeadlockDetectingTimeout(60, TimeUnit.SECONDS); - - @Before - public void setUp() throws Exception - { - rtrTestUtils.setUp(); - jsonMapper = rtrTestUtils.getObjectMapper(); - cf = rtrTestUtils.getCuratorFramework(); - - task = TestTasks.unending("task id with spaces"); - EmittingLogger.registerEmitter(new NoopServiceEmitter()); - } - - @After - public void tearDown() throws Exception - { - if (remoteTaskRunner != null) { - remoteTaskRunner.stop(); - } - rtrTestUtils.tearDown(); - } - - @Test - public void testRun() throws Exception - { - doSetup(); - - Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(3, remoteTaskRunner.getIdleTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(0, remoteTaskRunner.getUsedTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(3, remoteTaskRunner.getTotalCapacity()); - Assert.assertEquals(-1, remoteTaskRunner.getMaximumCapacityWithAutoscale()); - Assert.assertEquals(0, remoteTaskRunner.getUsedCapacity()); - - - ListenableFuture result = remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - - cf.delete().guaranteed().forPath(JOINER.join(STATUS_PATH, task.getId())); - - Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(3, remoteTaskRunner.getIdleTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(0, remoteTaskRunner.getUsedTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(3, remoteTaskRunner.getTotalCapacity()); - Assert.assertEquals(0, remoteTaskRunner.getUsedCapacity()); - } - - @Test - public void testRunTaskThatAlreadyPending() throws Exception - { - doSetup(); - remoteTaskRunner.addPendingTask(task); - remoteTaskRunner.runPendingTasks(); - Assert.assertFalse(workerRunningTask(task.getId())); - - ListenableFuture result = remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - } - - @Test - public void testStartWithNoWorker() - { - makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD)); - } - - @Test - public void testRunExistingTaskThatHasntStartedRunning() throws Exception - { - doSetup(); - - remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - - ListenableFuture result = remoteTaskRunner.run(task); - - Assert.assertFalse(result.isDone()); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - } - - @Test - public void testRunExistingTaskThatHasStartedRunning() throws Exception - { - doSetup(); - - remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - - ListenableFuture result = remoteTaskRunner.run(task); - - Assert.assertFalse(result.isDone()); - - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - } - - @Test - public void testRunTooMuchZKData() throws Exception - { - ServiceEmitter emitter = EasyMock.createMock(ServiceEmitter.class); - EmittingLogger.registerEmitter(emitter); - EasyMock.replay(emitter); - - doSetup(); - - remoteTaskRunner.run(TestTasks.unending(new String(new char[5000]))); - - EasyMock.verify(emitter); - } - - @Test - public void testRunSameAvailabilityGroup() throws Exception - { - doSetup(); - - TestIndexTask task1 = new TestIndexTask( - "rt1", - new TaskResource("rt1", 1), - "foo", - TaskStatus.running("rt1"), - jsonMapper - ); - remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - - TestIndexTask task2 = new TestIndexTask( - "rt2", - new TaskResource("rt1", 1), - "foo", - TaskStatus.running("rt2"), - jsonMapper - ); - remoteTaskRunner.run(task2); - - TestIndexTask task3 = new TestIndexTask( - "rt3", - new TaskResource("rt2", 1), - "foo", - TaskStatus.running("rt3"), - jsonMapper - ); - remoteTaskRunner.run(task3); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getRunningTasks().size() == 2; - } - } - ) - ); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getPendingTasks().size() == 1; - } - } - ) - ); - - Assert.assertTrue(remoteTaskRunner.getPendingTasks().iterator().next().getTaskId().equals("rt2")); - } - - @Test - public void testRunWithCapacity() throws Exception - { - doSetup(); - - TestIndexTask task1 = new TestIndexTask( - "rt1", - new TaskResource("rt1", 1), - "foo", - TaskStatus.running("rt1"), - jsonMapper - ); - remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - - TestIndexTask task2 = new TestIndexTask( - "rt2", - new TaskResource("rt2", 3), - "foo", - TaskStatus.running("rt2"), - jsonMapper - ); - remoteTaskRunner.run(task2); - - TestIndexTask task3 = new TestIndexTask( - "rt3", - new TaskResource("rt3", 2), - "foo", - TaskStatus.running("rt3"), - jsonMapper - ); - remoteTaskRunner.run(task3); - Assert.assertTrue(taskAnnounced(task3.getId())); - mockWorkerRunningTask(task3); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getRunningTasks().size() == 2; - } - } - ) - ); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getPendingTasks().size() == 1; - } - } - ) - ); - - Assert.assertTrue(remoteTaskRunner.getPendingTasks().iterator().next().getTaskId().equals("rt2")); - } - - @Test - public void testStatusRemoved() throws Exception - { - doSetup(); - CountDownLatch deletionLatch = new CountDownLatch(1); - ListenableFuture future = remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - - Assert.assertTrue(workerRunningTask(task.getId())); - - Assert.assertTrue(remoteTaskRunner.getRunningTasks().iterator().next().getTaskId().equals(task.getId())); - - String taskStatusPath = JOINER.join(STATUS_PATH, task.getId()); - cf.checkExists().usingWatcher((CuratorWatcher) event -> { - if (event.getType() == Watcher.Event.EventType.NodeDeleted) { - deletionLatch.countDown(); - } - }).forPath(taskStatusPath); - - cf.delete().forPath(taskStatusPath); - - Assert.assertTrue("Deletion event not received", deletionLatch.await(5, TimeUnit.SECONDS)); - - TaskStatus status = future.get(); - - Assert.assertEquals(status.getStatusCode(), TaskState.FAILED); - Assert.assertNotNull(status.getErrorMsg()); - Assert.assertTrue(status.getErrorMsg().contains("The worker that this task was assigned disappeared")); - } - - @Test - public void testBootstrap() throws Exception - { - makeWorker(); - - RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD); - rtrConfig.setMaxPercentageBlacklistWorkers(100); - - makeRemoteTaskRunner(rtrConfig); - - TestIndexTask task1 = new TestIndexTask( - "first", - new TaskResource("first", 1), - "foo", - TaskStatus.running("first"), - jsonMapper - ); - remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - - TestIndexTask task = new TestIndexTask( - "second", - new TaskResource("task", 2), - "foo", - TaskStatus.running("task"), - jsonMapper - ); - remoteTaskRunner.run(task); - - TestIndexTask task2 = new TestIndexTask( - "second", - new TaskResource("second", 2), - "foo", - TaskStatus.running("second"), - jsonMapper - ); - remoteTaskRunner.run(task2); - Assert.assertTrue(taskAnnounced(task2.getId())); - mockWorkerRunningTask(task2); - - final Set runningTasks = Sets.newHashSet( - Iterables.transform( - remoteTaskRunner.getRunningTasks(), - new Function<>() - { - @Override - public String apply(RemoteTaskRunnerWorkItem input) - { - return input.getTaskId(); - } - } - ) - ); - Assert.assertEquals("runningTasks", ImmutableSet.of("first", "second"), runningTasks); - } - - @Test - public void testRunWithTaskComplete() throws Exception - { - doSetup(); - TestIndexTask task1 = new TestIndexTask( - "testTask", - new TaskResource("testTask", 2), - "foo", - TaskStatus.success("testTask"), - jsonMapper - ); - remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - mockWorkerCompleteSuccessfulTask(task1); - - Assert.assertEquals(TaskState.SUCCESS, remoteTaskRunner.run(task1).get().getStatusCode()); - } - - @Test - public void testWorkerRemoved() throws Exception - { - doSetup(); - Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertEquals(3, remoteTaskRunner.getIdleTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - - Future future = remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - - Assert.assertTrue(workerRunningTask(task.getId())); - - cf.delete().forPath(ANNOUCEMENTS_PATH); - - TaskStatus status = future.get(); - - Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); - Assert.assertNotNull(status.getErrorMsg()); - Assert.assertTrue(status.getErrorMsg().contains("Canceled for worker cleanup")); - RemoteTaskRunnerConfig config = remoteTaskRunner.getRemoteTaskRunnerConfig(); - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getRemovedWorkerCleanups().isEmpty(); - } - }, - // cleanup task is independently scheduled by event listener. we need to wait some more time. - config.getTaskCleanupTimeout().toStandardDuration().getMillis() * 2 - ) - ); - Assert.assertNull(cf.checkExists().forPath(STATUS_PATH)); - - Assert.assertFalse(remoteTaskRunner.getTotalTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - Assert.assertFalse(remoteTaskRunner.getIdleTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - } - - @Test - public void testWorkerDisabled() throws Exception - { - doSetup(); - final ListenableFuture result = remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - - // Disable while task running - disableWorker(); - - // Continue test - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - - // Confirm RTR thinks the worker is disabled. - Assert.assertTrue(Iterables.getOnlyElement(remoteTaskRunner.getWorkers()).getWorker().isDisabled()); - } - - @Test - public void testRestartRemoteTaskRunner() throws Exception - { - doSetup(); - remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Assert.assertTrue(workerRunningTask(task.getId())); - - remoteTaskRunner.stop(); - makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD)); - final RemoteTaskRunnerWorkItem newWorkItem = remoteTaskRunner - .getKnownTasks() - .stream() - .filter(workItem -> workItem.getTaskId().equals(task.getId())) - .findFirst() - .orElse(null); - final ListenableFuture result = newWorkItem.getResult(); - - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - - Assert.assertEquals(task.getId(), result.get().getId()); - Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); - } - - @Test - public void testRunPendingTaskFailToAssignTask() throws Exception - { - doSetup(); - Thread.sleep(100); - RemoteTaskRunnerWorkItem originalItem = remoteTaskRunner.addPendingTask(task); - // modify taskId to make task assignment failed - RemoteTaskRunnerWorkItem wankyItem = Mockito.mock(RemoteTaskRunnerWorkItem.class); - Mockito.when(wankyItem.getTaskId()).thenReturn(originalItem.getTaskId()).thenReturn("wrongId"); - remoteTaskRunner.runPendingTask(wankyItem); - TaskStatus taskStatus = originalItem.getResult().get(0, TimeUnit.MILLISECONDS); - Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode()); - Assert.assertEquals( - "Failed to assign this task. See overlord logs for more details.", - taskStatus.getErrorMsg() - ); - } - - @Test - public void testRunPendingTaskTimeoutToAssign() throws Exception - { - makeWorker(); - makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD)); - RemoteTaskRunnerWorkItem workItem = remoteTaskRunner.addPendingTask(task); - remoteTaskRunner.runPendingTask(workItem); - TaskStatus taskStatus = workItem.getResult().get(0, TimeUnit.MILLISECONDS); - Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode()); - Assert.assertNotNull(taskStatus.getErrorMsg()); - Assert.assertTrue( - taskStatus.getErrorMsg().startsWith("The worker that this task is assigned did not start it in timeout") - ); - } - - @Test - public void testGetMaximumCapacity_noWorkerConfig() - { - httpClient = EasyMock.createMock(HttpClient.class); - remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner( - new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD), - new TestProvisioningStrategy<>(), - httpClient, - null - ); - Assert.assertEquals(-1, remoteTaskRunner.getMaximumCapacityWithAutoscale()); - } - - @Test - public void testGetMaximumCapacity_noAutoScaler() - { - httpClient = EasyMock.createMock(HttpClient.class); - remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner( - new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD), - new TestProvisioningStrategy<>(), - httpClient, - new DefaultWorkerBehaviorConfig(new EqualDistributionWorkerSelectStrategy(null, null), null) - ); - Assert.assertEquals(-1, remoteTaskRunner.getMaximumCapacityWithAutoscale()); - } - - @Test - public void testGetMaximumCapacity_withAutoScaler() - { - httpClient = EasyMock.createMock(HttpClient.class); - remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner( - new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD), - new TestProvisioningStrategy<>(), - httpClient, - DefaultWorkerBehaviorConfig.defaultConfig() - ); - // Default autoscaler has max workers of 0 - Assert.assertEquals(0, remoteTaskRunner.getMaximumCapacityWithAutoscale()); - } - - private void doSetup() throws Exception - { - makeWorker(); - makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD)); - } - - private void makeRemoteTaskRunner(RemoteTaskRunnerConfig config) - { - httpClient = EasyMock.createMock(HttpClient.class); - remoteTaskRunner = rtrTestUtils.makeRemoteTaskRunner(config, httpClient); - } - - private void makeWorker() throws Exception - { - worker = rtrTestUtils.makeWorker(WORKER_HOST, 3); - } - - private void disableWorker() throws Exception - { - rtrTestUtils.disableWorker(worker); - } - - private boolean taskAnnounced(final String taskId) - { - return rtrTestUtils.taskAssigned(WORKER_HOST, taskId); - } - - private boolean workerRunningTask(final String taskId) - { - return rtrTestUtils.workerRunningTask(WORKER_HOST, taskId); - } - - private boolean workerCompletedTask(final ListenableFuture result) - { - return TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return result.isDone(); - } - } - ); - } - - private void mockWorkerRunningTask(final Task task) throws Exception - { - rtrTestUtils.mockWorkerRunningTask("worker", task); - } - - private void mockWorkerCompleteSuccessfulTask(final Task task) throws Exception - { - rtrTestUtils.mockWorkerCompleteSuccessfulTask("worker", task); - } - - private void mockWorkerCompleteFailedTask(final Task task) throws Exception - { - rtrTestUtils.mockWorkerCompleteFailedTask("worker", task); - } - - @Test - public void testFindLazyWorkerTaskRunning() throws Exception - { - doSetup(); - remoteTaskRunner.start(); - remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - Collection lazyworkers = remoteTaskRunner.markWorkersLazy( - new Predicate<>() - { - @Override - public boolean apply(ImmutableWorkerInfo input) - { - return true; - } - }, 1 - ); - Assert.assertTrue(lazyworkers.isEmpty()); - Assert.assertTrue(remoteTaskRunner.getLazyWorkers().isEmpty()); - Assert.assertEquals(1, remoteTaskRunner.getWorkers().size()); - } - - @Test - public void testFindLazyWorkerForWorkerJustAssignedTask() throws Exception - { - doSetup(); - remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - Collection lazyworkers = remoteTaskRunner.markWorkersLazy( - new Predicate<>() - { - @Override - public boolean apply(ImmutableWorkerInfo input) - { - return true; - } - }, 1 - ); - Assert.assertTrue(lazyworkers.isEmpty()); - Assert.assertTrue(remoteTaskRunner.getLazyWorkers().isEmpty()); - Assert.assertEquals(1, remoteTaskRunner.getWorkers().size()); - } - - @Test - public void testFindLazyWorkerNotRunningAnyTask() throws Exception - { - doSetup(); - Collection lazyworkers = remoteTaskRunner.markWorkersLazy( - new Predicate<>() - { - @Override - public boolean apply(ImmutableWorkerInfo input) - { - return true; - } - }, 1 - ); - Assert.assertEquals(1, lazyworkers.size()); - Assert.assertEquals(1, remoteTaskRunner.getLazyWorkers().size()); - Assert.assertEquals(3, remoteTaskRunner.getTotalTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - Assert.assertFalse(remoteTaskRunner.getIdleTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - Assert.assertEquals(3, remoteTaskRunner.getLazyTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue()); - } - - @Test - public void testFindLazyWorkerNotRunningAnyTaskButWithZeroMaxWorkers() throws Exception - { - doSetup(); - Collection lazyworkers = remoteTaskRunner.markWorkersLazy( - new Predicate<>() - { - @Override - public boolean apply(ImmutableWorkerInfo input) - { - return true; - } - }, 0 - ); - Assert.assertEquals(0, lazyworkers.size()); - Assert.assertEquals(0, remoteTaskRunner.getLazyWorkers().size()); - } - - @Test - public void testWorkerZKReconnect() throws Exception - { - makeWorker(); - makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(new Period("PT5M"))); - Future future = remoteTaskRunner.run(task); - - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - - Assert.assertTrue(workerRunningTask(task.getId())); - byte[] bytes = cf.getData().forPath(ANNOUCEMENTS_PATH); - cf.delete().forPath(ANNOUCEMENTS_PATH); - // worker task cleanup scheduled - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return remoteTaskRunner.getRemovedWorkerCleanups().containsKey(worker.getHost()); - } - } - ) - ); - - // Worker got reconnected - cf.create().forPath(ANNOUCEMENTS_PATH, bytes); - - // worker task cleanup should get cancelled and removed - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - return !remoteTaskRunner.getRemovedWorkerCleanups().containsKey(worker.getHost()); - } - } - ) - ); - - mockWorkerCompleteSuccessfulTask(task); - TaskStatus status = future.get(); - Assert.assertEquals(status.getStatusCode(), TaskState.SUCCESS); - Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode()); - } - - @Test - public void testSortByInsertionTime() - { - RemoteTaskRunnerWorkItem item1 = new RemoteTaskRunnerWorkItem("b", "t", null, null, "ds_test") - .withQueueInsertionTime(DateTimes.of("2015-01-01T00:00:03Z")); - RemoteTaskRunnerWorkItem item2 = new RemoteTaskRunnerWorkItem("a", "t", null, null, "ds_test") - .withQueueInsertionTime(DateTimes.of("2015-01-01T00:00:02Z")); - RemoteTaskRunnerWorkItem item3 = new RemoteTaskRunnerWorkItem("c", "t", null, null, "ds_test") - .withQueueInsertionTime(DateTimes.of("2015-01-01T00:00:01Z")); - ArrayList workItems = Lists.newArrayList(item1, item2, item3); - RemoteTaskRunner.sortByInsertionTime(workItems); - Assert.assertEquals(item3, workItems.get(0)); - Assert.assertEquals(item2, workItems.get(1)); - Assert.assertEquals(item1, workItems.get(2)); - } - - @Test - public void testBlacklistZKWorkers() throws Exception - { - makeWorker(); - - RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD); - rtrConfig.setMaxPercentageBlacklistWorkers(100); - - makeRemoteTaskRunner(rtrConfig); - - TestIndexTask task1 = new TestIndexTask( - "test_index1", - new TaskResource("test_index1", 1), - "foo", - TaskStatus.success("test_index1"), - jsonMapper - ); - Future taskFuture1 = remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - mockWorkerCompleteFailedTask(task1); - Assert.assertTrue(taskFuture1.get().isFailure()); - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - 1, - remoteTaskRunner.findWorkerRunningTask(task1.getId()).getContinuouslyFailedTasksCount() - ); - - TestIndexTask task2 = new TestIndexTask( - "test_index2", - new TaskResource("test_index2", 1), - "foo", - TaskStatus.running("test_index2"), - jsonMapper - ); - Future taskFuture2 = remoteTaskRunner.run(task2); - Assert.assertTrue(taskAnnounced(task2.getId())); - mockWorkerRunningTask(task2); - mockWorkerCompleteFailedTask(task2); - Assert.assertTrue(taskFuture2.get().isFailure()); - Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - 2, - remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount() - ); - - ((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner) - .setCurrentTimeMillis(System.currentTimeMillis()); - remoteTaskRunner.checkBlackListedNodes(); - - Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size()); - - ((RemoteTaskRunnerTestUtils.TestableRemoteTaskRunner) remoteTaskRunner) - .setCurrentTimeMillis(System.currentTimeMillis() + 2 * TIMEOUT_PERIOD.toStandardDuration().getMillis()); - remoteTaskRunner.checkBlackListedNodes(); - - // After backOffTime the nodes are removed from blacklist - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - 0, - remoteTaskRunner.findWorkerRunningTask(task2.getId()).getContinuouslyFailedTasksCount() - ); - - TestIndexTask task3 = new TestIndexTask( - "test_index3", - new TaskResource("test_index3", 1), - "foo", - TaskStatus.running("test_index3"), - jsonMapper - ); - Future taskFuture3 = remoteTaskRunner.run(task3); - Assert.assertTrue(taskAnnounced(task3.getId())); - mockWorkerRunningTask(task3); - mockWorkerCompleteSuccessfulTask(task3); - Assert.assertTrue(taskFuture3.get().isSuccess()); - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - 0, - remoteTaskRunner.findWorkerRunningTask(task3.getId()).getContinuouslyFailedTasksCount() - ); - } - - /** - * With 2 workers and maxPercentageBlacklistWorkers(25), no worker should be blacklisted even after exceeding - * maxRetriesBeforeBlacklist. - */ - @Test - public void testBlacklistZKWorkers25Percent() throws Exception - { - rtrTestUtils.makeWorker("worker", 10); - rtrTestUtils.makeWorker("worker2", 10); - - RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD); - rtrConfig.setMaxPercentageBlacklistWorkers(25); - - makeRemoteTaskRunner(rtrConfig); - - String assignedWorker = null; - - for (int i = 1; i < 13; i++) { - String taskId = StringUtils.format("rt-%d", i); - TestIndexTask task = new TestIndexTask( - taskId, - new TaskResource(taskId, 1), - "foo", - TaskStatus.success(taskId), - jsonMapper - ); - - Future taskFuture = remoteTaskRunner.run(task); - - if (i == 1) { - if (rtrTestUtils.taskAssigned("worker2", task.getId())) { - assignedWorker = "worker2"; - } else { - assignedWorker = "worker"; - } - } - - Assert.assertTrue(rtrTestUtils.taskAssigned(assignedWorker, task.getId())); - rtrTestUtils.mockWorkerRunningTask(assignedWorker, task); - rtrTestUtils.mockWorkerCompleteFailedTask(assignedWorker, task); - - Assert.assertTrue(taskFuture.get().isFailure()); - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - i, - remoteTaskRunner.findWorkerId("worker").getContinuouslyFailedTasksCount() - + remoteTaskRunner.findWorkerId("worker2").getContinuouslyFailedTasksCount() - ); - } - } - - /** - * With 2 workers and maxPercentageBlacklistWorkers(50), one worker should get blacklisted after the second failure - * and the second worker should never be blacklisted even after exceeding maxRetriesBeforeBlacklist. - */ - @Test - public void testBlacklistZKWorkers50Percent() throws Exception - { - rtrTestUtils.makeWorker("worker", 10); - rtrTestUtils.makeWorker("worker2", 10); - - RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD); - rtrConfig.setMaxPercentageBlacklistWorkers(50); - - makeRemoteTaskRunner(rtrConfig); - - String firstWorker = null; - String secondWorker = null; - - for (int i = 1; i < 13; i++) { - String taskId = StringUtils.format("rt-%d", i); - TestIndexTask task = new TestIndexTask( - taskId, - new TaskResource(taskId, 1), - "foo", - TaskStatus.success(taskId), - jsonMapper - ); - - Future taskFuture = remoteTaskRunner.run(task); - - if (i == 1) { - if (rtrTestUtils.taskAssigned("worker2", task.getId())) { - firstWorker = "worker2"; - secondWorker = "worker"; - } else { - firstWorker = "worker"; - secondWorker = "worker2"; - } - } - - final String expectedWorker = i > 2 ? secondWorker : firstWorker; - - Assert.assertTrue( - StringUtils.format("Task[%s] assigned to worker[%s]", i, expectedWorker), - rtrTestUtils.taskAssigned(expectedWorker, task.getId()) - ); - rtrTestUtils.mockWorkerRunningTask(expectedWorker, task); - rtrTestUtils.mockWorkerCompleteFailedTask(expectedWorker, task); - - Assert.assertTrue(taskFuture.get().isFailure()); - Assert.assertEquals( - StringUtils.format("Blacklisted workers after task[%s]", i), - i >= 2 ? 1 : 0, - remoteTaskRunner.getBlackListedWorkers().size() - ); - Assert.assertEquals( - StringUtils.format("Continuously failed tasks after task[%s]", i), - i, - remoteTaskRunner.findWorkerId("worker").getContinuouslyFailedTasksCount() - + remoteTaskRunner.findWorkerId("worker2").getContinuouslyFailedTasksCount() - ); - } - } - - @Test - public void testSuccessfulTaskOnBlacklistedWorker() throws Exception - { - makeWorker(); - - RemoteTaskRunnerConfig rtrConfig = new TestRemoteTaskRunnerConfig(TIMEOUT_PERIOD); - rtrConfig.setMaxPercentageBlacklistWorkers(100); - - makeRemoteTaskRunner(rtrConfig); - - TestIndexTask task1 = new TestIndexTask( - "test_index1", new TaskResource("test_index1", 1), "foo", TaskStatus.success("test_index1"), jsonMapper - ); - TestIndexTask task2 = new TestIndexTask( - "test_index2", new TaskResource("test_index2", 1), "foo", TaskStatus.success("test_index2"), jsonMapper - ); - TestIndexTask task3 = new TestIndexTask( - "test_index3", new TaskResource("test_index3", 1), "foo", TaskStatus.success("test_index3"), jsonMapper - ); - - Future taskFuture1 = remoteTaskRunner.run(task1); - Assert.assertTrue(taskAnnounced(task1.getId())); - mockWorkerRunningTask(task1); - mockWorkerCompleteFailedTask(task1); - Assert.assertTrue(taskFuture1.get().isFailure()); - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertFalse(remoteTaskRunner.getBlacklistedTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - - Future taskFuture2 = remoteTaskRunner.run(task2); - Assert.assertTrue(taskAnnounced(task2.getId())); - mockWorkerRunningTask(task2); - Assert.assertFalse(remoteTaskRunner.getBlacklistedTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - - Future taskFuture3 = remoteTaskRunner.run(task3); - Assert.assertTrue(taskAnnounced(task3.getId())); - mockWorkerRunningTask(task3); - mockWorkerCompleteFailedTask(task3); - Assert.assertTrue(taskFuture3.get().isFailure()); - Assert.assertEquals(1, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertEquals( - 3, - remoteTaskRunner.getBlacklistedTaskSlotCount().get(WorkerConfig.DEFAULT_CATEGORY).longValue() - ); - - mockWorkerCompleteSuccessfulTask(task2); - Assert.assertTrue(taskFuture2.get().isSuccess()); - Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size()); - Assert.assertFalse(remoteTaskRunner.getBlacklistedTaskSlotCount().containsKey(WorkerConfig.DEFAULT_CATEGORY)); - } - - @Test - public void testStatusListenerEventDataNullShouldNotThrowException() throws Exception - { - // Set up mock emitter to verify log alert when exception is thrown inside the status listener - Worker worker = EasyMock.createMock(Worker.class); - EasyMock.expect(worker.getHost()).andReturn("host").atLeastOnce(); - EasyMock.replay(worker); - ServiceEmitter emitter = EasyMock.createMock(ServiceEmitter.class); - Capture capturedArgument = Capture.newInstance(); - emitter.emit(EasyMock.capture(capturedArgument)); - EasyMock.expectLastCall().atLeastOnce(); - EmittingLogger.registerEmitter(emitter); - EasyMock.replay(emitter); - - PathChildrenCache cache = new PathChildrenCache(cf, "/test", true); - testStartWithNoWorker(); - cache.getListenable() - .addListener(remoteTaskRunner.getStatusListener(worker, new ZkWorker(worker, cache, jsonMapper), null)); - cache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT); - - // Status listener will recieve event with null data - Assert.assertTrue( - TestUtils.conditionValid(() -> cache.getCurrentData().size() == 1) - ); - - // Verify that the log emitter was called - EasyMock.verify(worker); - EasyMock.verify(emitter); - Map alertDataMap = capturedArgument.getValue().build(null).getDataMap(); - Assert.assertTrue(alertDataMap.containsKey("znode")); - Assert.assertNull(alertDataMap.get("znode")); - // Status listener should successfully completes without throwing exception - } - - @Test - public void testStreamTaskReportsUnknownTask() throws Exception - { - doSetup(); - Assert.assertEquals(Optional.absent(), remoteTaskRunner.streamTaskReports("foo")); - } - - @Test - public void testStreamTaskReportsKnownTask() throws Exception - { - doSetup(); - final Capture capturedRequest = Capture.newInstance(); - final String reportString = "my report!"; - final InputStreamFullResponseHolder reportResponse = taskReportResponse(HttpResponseStatus.OK, reportString); - EasyMock.expect(httpClient.go(EasyMock.capture(capturedRequest), EasyMock.anyObject())) - .andReturn(Futures.immediateFuture(reportResponse)); - EasyMock.replay(httpClient); - - ListenableFuture result = remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - - // Wait for the task to have a known location. - Assert.assertTrue( - TestUtils.conditionValid( - () -> - !remoteTaskRunner.getRunningTasks().isEmpty() - && !Iterables.getOnlyElement(remoteTaskRunner.getRunningTasks()) - .getLocation() - .equals(TaskLocation.unknown()) - ) - ); - - // Stream task reports from a running task. - final InputStream in = remoteTaskRunner.streamTaskReports(task.getId()).get(); - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ByteStreams.copy(in, baos); - Assert.assertEquals(reportString, StringUtils.fromUtf8(baos.toByteArray())); - - // Stream task reports from a completed task. - mockWorkerCompleteSuccessfulTask(task); - Assert.assertTrue(workerCompletedTask(result)); - Assert.assertEquals(Optional.absent(), remoteTaskRunner.streamTaskReports(task.getId())); - - // Verify the HTTP request. - EasyMock.verify(httpClient); - Assert.assertEquals( - "http://dummy:9000/druid/worker/v1/chat/task%20id%20with%20spaces/liveReports", - capturedRequest.getValue().getUrl().toString() - ); - } - - @Test - public void testStreamTaskReportsUnavailableFromWorker() throws Exception - { - doSetup(); - final Capture capturedRequest = Capture.newInstance(); - final InputStreamFullResponseHolder reportResponse = taskReportResponse( - HttpResponseStatus.SERVICE_UNAVAILABLE, - "{\"error\":\"Can't find chatHandler for handler[task]\"}" - ); - EasyMock.expect(httpClient.go(EasyMock.capture(capturedRequest), EasyMock.anyObject())) - .andReturn(Futures.immediateFuture(reportResponse)); - EasyMock.replay(httpClient); - - remoteTaskRunner.run(task); - Assert.assertTrue(taskAnnounced(task.getId())); - mockWorkerRunningTask(task); - - // Wait for the task to have a known location. - Assert.assertTrue( - TestUtils.conditionValid( - () -> - !remoteTaskRunner.getRunningTasks().isEmpty() - && !Iterables.getOnlyElement(remoteTaskRunner.getRunningTasks()) - .getLocation() - .equals(TaskLocation.unknown()) - ) - ); - - Assert.assertEquals(Optional.absent(), remoteTaskRunner.streamTaskReports(task.getId())); - - EasyMock.verify(httpClient); - Assert.assertEquals( - "http://dummy:9000/druid/worker/v1/chat/task%20id%20with%20spaces/liveReports", - capturedRequest.getValue().getUrl().toString() - ); - } - - @Test - public void testBuildPublishAction() - { - TestIndexTask task = new TestIndexTask( - "test_index1", - new TaskResource("test_index1", 1), - "foo", - TaskStatus.success("test_index1"), - jsonMapper - ); - - Assert.assertEquals( - SegmentTransactionalAppendAction.class, - task.buildPublishActionForTest( - Collections.emptySet(), - Collections.emptySet(), - null, - TaskLockType.APPEND - ).getClass() - ); - - Assert.assertEquals( - SegmentTransactionalReplaceAction.class, - task.buildPublishActionForTest( - Collections.emptySet(), - Collections.emptySet(), - null, - TaskLockType.REPLACE - ).getClass() - ); - - Assert.assertEquals( - SegmentTransactionalInsertAction.class, - task.buildPublishActionForTest( - Collections.emptySet(), - Collections.emptySet(), - null, - TaskLockType.EXCLUSIVE - ).getClass() - ); - } - - private static InputStreamFullResponseHolder taskReportResponse( - final HttpResponseStatus status, - final String content - ) - { - final InputStreamFullResponseHolder response = new InputStreamFullResponseHolder( - new DefaultHttpResponse(HttpVersion.HTTP_1_1, status) - ); - response.addChunk(StringUtils.toUtf8(content)); - response.done(); - return response; - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTestUtils.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTestUtils.java deleted file mode 100644 index af33b6fc9196..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTestUtils.java +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.base.Supplier; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.CuratorFrameworkFactory; -import org.apache.curator.retry.ExponentialBackoffRetry; -import org.apache.curator.test.TestingCluster; -import org.apache.druid.common.guava.DSuppliers; -import org.apache.druid.curator.PotentiallyGzippedCompressionProvider; -import org.apache.druid.curator.cache.PathChildrenCacheFactory; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.IndexingServiceCondition; -import org.apache.druid.indexing.common.TestUtils; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.overlord.autoscaling.NoopProvisioningStrategy; -import org.apache.druid.indexing.overlord.autoscaling.ProvisioningStrategy; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; -import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; -import org.apache.druid.indexing.worker.TaskAnnouncement; -import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.http.client.HttpClient; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; -import org.apache.druid.server.metrics.NoopServiceEmitter; -import org.apache.zookeeper.CreateMode; - -import java.util.concurrent.atomic.AtomicReference; - -/** - */ -public class RemoteTaskRunnerTestUtils -{ - static final Joiner JOINER = Joiner.on("/"); - static final String BASE_PATH = "/test/druid"; - static final String ANNOUNCEMENTS_PATH = StringUtils.format("%s/indexer/announcements", BASE_PATH); - static final String TASKS_PATH = StringUtils.format("%s/indexer/tasks", BASE_PATH); - static final String STATUS_PATH = StringUtils.format("%s/indexer/status", BASE_PATH); - static final TaskLocation DUMMY_LOCATION = TaskLocation.create("dummy", 9000, -1); - - private TestingCluster testingCluster; - - private CuratorFramework cf; - private ObjectMapper jsonMapper; - - RemoteTaskRunnerTestUtils() - { - TestUtils testUtils = new TestUtils(); - jsonMapper = testUtils.getTestObjectMapper(); - } - - CuratorFramework getCuratorFramework() - { - return cf; - } - - ObjectMapper getObjectMapper() - { - return jsonMapper; - } - - void setUp() throws Exception - { - testingCluster = new TestingCluster(1); - testingCluster.start(); - - cf = CuratorFrameworkFactory.builder() - .connectString(testingCluster.getConnectString()) - .retryPolicy(new ExponentialBackoffRetry(1, 10)) - .compressionProvider(new PotentiallyGzippedCompressionProvider(false)) - .build(); - cf.start(); - cf.blockUntilConnected(); - cf.create().creatingParentsIfNeeded().forPath(BASE_PATH); - cf.create().creatingParentsIfNeeded().forPath(TASKS_PATH); - } - - void tearDown() throws Exception - { - cf.close(); - testingCluster.stop(); - } - - RemoteTaskRunner makeRemoteTaskRunner(RemoteTaskRunnerConfig config, HttpClient httpClient) - { - NoopProvisioningStrategy resourceManagement = new NoopProvisioningStrategy<>(); - return makeRemoteTaskRunner(config, resourceManagement, httpClient); - } - - public RemoteTaskRunner makeRemoteTaskRunner( - RemoteTaskRunnerConfig config, - ProvisioningStrategy provisioningStrategy, - HttpClient httpClient - ) - { - return makeRemoteTaskRunner( - config, - provisioningStrategy, - httpClient, - DefaultWorkerBehaviorConfig.defaultConfig() - ); - } - - public RemoteTaskRunner makeRemoteTaskRunner( - RemoteTaskRunnerConfig config, - ProvisioningStrategy provisioningStrategy, - HttpClient httpClient, - WorkerBehaviorConfig workerBehaviorConfig - ) - { - RemoteTaskRunner remoteTaskRunner = new TestableRemoteTaskRunner( - jsonMapper, - config, - new IndexerZkConfig( - new ZkPathsConfig() - { - @Override - public String getBase() - { - return BASE_PATH; - } - }, null, null, null, null - ), - cf, - new PathChildrenCacheFactory.Builder(), - httpClient, - DSuppliers.of(new AtomicReference<>(workerBehaviorConfig)), - provisioningStrategy - ); - - remoteTaskRunner.start(); - return remoteTaskRunner; - } - - Worker makeWorker(final String workerId, final int capacity) throws Exception - { - Worker worker = new Worker( - "http", - workerId, - workerId, - capacity, - "0", - WorkerConfig.DEFAULT_CATEGORY - ); - - cf.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath( - JOINER.join(ANNOUNCEMENTS_PATH, workerId), - jsonMapper.writeValueAsBytes(worker) - ); - cf.create().creatingParentsIfNeeded().forPath(JOINER.join(TASKS_PATH, workerId)); - - return worker; - } - - void disableWorker(Worker worker) throws Exception - { - cf.setData().forPath( - JOINER.join(ANNOUNCEMENTS_PATH, worker.getHost()), - jsonMapper.writeValueAsBytes(new Worker( - worker.getScheme(), - worker.getHost(), - worker.getIp(), - worker.getCapacity(), - worker.getVersion(), - worker.getCategory(), - true - )) - ); - } - - void mockWorkerRunningTask(final String workerId, final Task task) throws Exception - { - cf.delete().forPath(JOINER.join(TASKS_PATH, workerId, task.getId())); - - final String taskStatusPath = JOINER.join(STATUS_PATH, workerId, task.getId()); - TaskAnnouncement taskAnnouncement = TaskAnnouncement.create(task, TaskStatus.running(task.getId()), DUMMY_LOCATION); - cf.create() - .creatingParentsIfNeeded() - .forPath(taskStatusPath, jsonMapper.writeValueAsBytes(taskAnnouncement)); - - Preconditions.checkNotNull( - cf.checkExists().forPath(taskStatusPath), - "Failed to write status on [%s]", - taskStatusPath - ); - } - - void mockWorkerCompleteSuccessfulTask(final String workerId, final Task task) throws Exception - { - TaskAnnouncement taskAnnouncement = TaskAnnouncement.create(task, TaskStatus.success(task.getId()), DUMMY_LOCATION); - cf.setData().forPath(JOINER.join(STATUS_PATH, workerId, task.getId()), jsonMapper.writeValueAsBytes(taskAnnouncement)); - } - - void mockWorkerCompleteFailedTask(final String workerId, final Task task) throws Exception - { - TaskAnnouncement taskAnnouncement = TaskAnnouncement.create( - task, - TaskStatus.failure( - task.getId(), - "Dummy task status failure for testing" - ), - DUMMY_LOCATION - ); - cf.setData() - .forPath(JOINER.join(STATUS_PATH, workerId, task.getId()), jsonMapper.writeValueAsBytes(taskAnnouncement)); - } - - boolean workerRunningTask(final String workerId, final String taskId) - { - return pathExists(JOINER.join(STATUS_PATH, workerId, taskId)); - } - - boolean taskAssigned(final String workerId, final String taskId) - { - return pathExists(JOINER.join(TASKS_PATH, workerId, taskId)); - } - - boolean pathExists(final String path) - { - return TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - return cf.checkExists().forPath(path) != null; - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public String toString() - { - return StringUtils.format("Path[%s] exists", path); - } - } - ); - } - - public static class TestableRemoteTaskRunner extends RemoteTaskRunner - { - private long currentTimeMillis = System.currentTimeMillis(); - - public TestableRemoteTaskRunner( - ObjectMapper jsonMapper, - RemoteTaskRunnerConfig config, - IndexerZkConfig indexerZkConfig, - CuratorFramework cf, - PathChildrenCacheFactory.Builder pathChildrenCacheFactory, - HttpClient httpClient, - Supplier workerConfigRef, - ProvisioningStrategy provisioningStrategy - ) - { - super( - jsonMapper, - config, - indexerZkConfig, - cf, - pathChildrenCacheFactory, - httpClient, - workerConfigRef, - provisioningStrategy, - new NoopServiceEmitter() - ); - } - - void setCurrentTimeMillis(long currentTimeMillis) - { - this.currentTimeMillis = currentTimeMillis; - } - - @Override - protected long getCurrentTimeMillis() - { - return currentTimeMillis; - } - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockConfigTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockConfigTest.java index f257691fdd87..4936243c886f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockConfigTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockConfigTest.java @@ -30,6 +30,7 @@ import org.apache.druid.indexing.overlord.config.DefaultTaskConfig; import org.apache.druid.indexing.overlord.config.TaskLockConfig; import org.apache.druid.indexing.overlord.config.TaskQueueConfig; +import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner; import org.apache.druid.indexing.test.TestIndexerMetadataStorageCoordinator; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.emitter.service.ServiceEmitter; @@ -110,7 +111,7 @@ public boolean isForceTimeChunkLock() lockConfig = new TaskLockConfig(); } final TaskQueueConfig queueConfig = new TaskQueueConfig(null, null, null, null, null, null); - final TaskRunner taskRunner = EasyMock.createNiceMock(RemoteTaskRunner.class); + final TaskRunner taskRunner = EasyMock.createNiceMock(HttpRemoteTaskRunner.class); final TaskActionClientFactory actionClientFactory = EasyMock.createNiceMock(LocalTaskActionClientFactory.class); final GlobalTaskLockbox lockbox = new GlobalTaskLockbox(taskStorage, new TestIndexerMetadataStorageCoordinator()); final ServiceEmitter emitter = new NoopServiceEmitter(); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java index bc409064e97c..d64b40039347 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueTest.java @@ -27,7 +27,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; -import org.apache.curator.framework.CuratorFramework; import org.apache.druid.common.guava.DSuppliers; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.HttpInputSource; @@ -88,8 +87,6 @@ import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordinator.stats.CoordinatorRunStats; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; import org.apache.druid.timeline.DataSegment; import org.easymock.EasyMock; import org.joda.time.Interval; @@ -810,8 +807,6 @@ private HttpRemoteTaskRunner createHttpRemoteTaskRunner() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), serviceEmitter ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestRemoteTaskRunnerConfig.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestRemoteTaskRunnerConfig.java deleted file mode 100644 index a634294637c7..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestRemoteTaskRunnerConfig.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; -import org.joda.time.Period; - -/** - */ -public class TestRemoteTaskRunnerConfig extends RemoteTaskRunnerConfig -{ - private final Period timeout; - - public TestRemoteTaskRunnerConfig(Period timeout) - { - this.timeout = timeout; - } - - @Override - public Period getTaskAssignmentTimeout() - { - return timeout; - } - - @Override - public Period getTaskCleanupTimeout() - { - return timeout; - } - - @Override - public int getMaxZnodeBytes() - { - // make sure this is large enough, otherwise RemoteTaskRunnerTest might fail unexpectedly - return 10 * 1024; - } - - @Override - public Period getTaskShutdownLinkTimeout() - { - return timeout; - } - - @Override - public String getMinWorkerVersion() - { - return ""; - } - - @Override - public int getMaxRetriesBeforeBlacklist() - { - return 1; - } - - @Override - public Period getWorkerBlackListBackoffTime() - { - return timeout; - } - - @Override - public Period getWorkerBlackListCleanupPeriod() - { - return timeout; - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/ZkWorkerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/ZkWorkerTest.java deleted file mode 100644 index fac929912088..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/ZkWorkerTest.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.curator.framework.recipes.cache.ChildData; -import org.apache.druid.indexer.TaskLocation; -import org.apache.druid.indexer.TaskStatus; -import org.apache.druid.indexing.common.task.NoopTask; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.worker.TaskAnnouncement; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.zookeeper.data.Stat; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.function.Function; - -public class ZkWorkerTest -{ - Function extract; - - @Before - public void setup() - { - ObjectMapper mapper = new DefaultObjectMapper(); - extract = ZkWorker.createTaskIdExtractor(mapper); - } - - ChildData prepare(String input) - { - String replaced = StringUtils.format(StringUtils.replaceChar(input, '\'', "\""), TaskAnnouncement.TASK_ID_KEY); - byte[] data = StringUtils.toUtf8(replaced); - return new ChildData("/a/b/c", new Stat(), data); - } - - @Test - public void testShallowObjectWithIdFirst() - { - ChildData input = prepare("{'%s': 'abcd', 'status': 'RUNNING'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testShallowObjectWithIdMiddle() - { - ChildData input = prepare("{'before': 'something', '%s': 'abcd', 'status': 'RUNNING'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testShallowObjectWithIdLast() - { - ChildData input = prepare("{'before': 'something', 'status': 'RUNNING', '%s': 'abcd'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testShallowObjectWithNoId() - { - ChildData input = prepare("{'before': 'something', 'status': 'RUNNING'}"); - String actual = extract.apply(input); - Assert.assertNull(actual); - } - - @Test - public void testDeepObjectWithIdFirst() - { - ChildData input = prepare("{'%s': 'abcd', 'subobject': { 'subkey': 'subvalue' }, 'subarray': [{'key': 'val'}, 2, 3], 'status': 'RUNNING'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testDeepObjectWithIdLast() - { - ChildData input = prepare("{'subobject': { 'subkey': 'subvalue' }, 'subarray': [{'key': 'val'}, 2, 3], 'status': 'RUNNING', '%s': 'abcd'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testDeepObjectWithIdInNestedOnly() - { - ChildData input = prepare("{'subobject': { '%s': 'defg' }, 'subarray': [{'key': 'val'}, 2, 3], 'status': 'RUNNING'}"); - String actual = extract.apply(input); - Assert.assertNull(actual); - } - - @Test - public void testDeepObjectWithIdInNestedAndOuter() - { - ChildData input = prepare("{'subobject': { '%s': 'defg' }, 'subarray': [{'key': 'val'}, 2, 3], 'status': 'RUNNING', '%1$s': 'abcd'}"); - String actual = extract.apply(input); - Assert.assertEquals("abcd", actual); - } - - @Test - public void testIdWithWrongTypeReturnsNull() - { - ChildData input = prepare("{'%s': {'nested': 'obj'}'"); - String actual = extract.apply(input); - Assert.assertNull(actual); - } - - @Test - public void testCanReadIdFromAJacksonSerializedTaskAnnouncement() throws JsonProcessingException - { - Task task0 = NoopTask.create(); - TaskAnnouncement taskAnnouncement = TaskAnnouncement.create( - task0, - TaskStatus.running(task0.getId()), - TaskLocation.unknown() - ); - - ObjectMapper objectMapper = new ObjectMapper(); - - byte[] serialized = objectMapper.writeValueAsBytes(taskAnnouncement); - - ChildData zkNode = new ChildData("/a/b/c", new Stat(), serialized); - - String actualExtractedTaskId = extract.apply(zkNode); - Assert.assertEquals(task0.getId(), actualExtractedTaskId); - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/PendingTaskBasedProvisioningStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/PendingTaskBasedProvisioningStrategyTest.java index eaba6f9e6f9a..69867b1ba38b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/PendingTaskBasedProvisioningStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/PendingTaskBasedProvisioningStrategyTest.java @@ -29,17 +29,15 @@ import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.RemoteTaskRunner; import org.apache.druid.indexing.overlord.RemoteTaskRunnerWorkItem; -import org.apache.druid.indexing.overlord.ZkWorker; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner; import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; import org.apache.druid.indexing.overlord.setup.FillCapacityWorkerSelectStrategy; import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; import org.apache.druid.indexing.worker.TaskAnnouncement; import org.apache.druid.indexing.worker.Worker; import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.emitter.EmittingLogger; @@ -240,7 +238,7 @@ public void testSuccessfulInitialMinWorkersProvision() EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5); EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(new ArrayList()); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // No pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( new ArrayList<>() @@ -248,7 +246,7 @@ public void testSuccessfulInitialMinWorkersProvision() EasyMock.expect(runner.getWorkers()).andReturn( Collections.emptyList() ); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("aNode")) ).times(3); @@ -291,7 +289,7 @@ public ScheduledExecutorService get() EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5); EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(new ArrayList()); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // No pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( new ArrayList<>() @@ -299,7 +297,7 @@ public ScheduledExecutorService get() EasyMock.expect(runner.getWorkers()).andReturn( Collections.emptyList() ); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("aNode")) ).times(3); @@ -343,7 +341,7 @@ public ScheduledExecutorService get() EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5); EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(new ArrayList()); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // No pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( new ArrayList<>() @@ -351,7 +349,7 @@ public ScheduledExecutorService get() EasyMock.expect(runner.getWorkers()).andReturn( Collections.emptyList() ); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.replay(runner, autoScaler); Provisioner provisioner = strategy.makeProvisioner(runner); boolean provisionedSomething = provisioner.doProvision(); @@ -366,7 +364,7 @@ public void testSuccessfulMinWorkersProvision() EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5); EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(new ArrayList()); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // No pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( new ArrayList<>() @@ -374,10 +372,10 @@ public void testSuccessfulMinWorkersProvision() // 1 node already running, only provision 2 more. EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("aNode")) ).times(2); @@ -400,7 +398,7 @@ public void testSuccessfulMinWorkersProvisionWithOldVersionNodeRunning() EasyMock.expect(autoScaler.getMaxNumWorkers()).andReturn(5); EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(new ArrayList()); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // No pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( new ArrayList<>() @@ -408,11 +406,11 @@ public void testSuccessfulMinWorkersProvisionWithOldVersionNodeRunning() // 1 node already running, only provision 2 more. EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask, "http", "h1", "n1", INVALID_VERSION).toImmutable() // Invalid version node + workerWithTask(testTask), + workerWithTask(testTask, "http", "h1", "n1", INVALID_VERSION) // Invalid version node ) ); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("aNode")) ).times(2); @@ -438,7 +436,7 @@ public void testProvisioning() EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("fake")) ); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( Collections.singletonList( NoopTask.create() @@ -446,11 +444,11 @@ public void testProvisioning() ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask, "http", "h1", "n1", INVALID_VERSION).toImmutable() // Invalid version node + workerWithTask(testTask), + workerWithTask(testTask, "http", "h1", "n1", INVALID_VERSION) // Invalid version node ) ).times(2); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()).times(1); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()).times(1); EasyMock.replay(runner); EasyMock.replay(autoScaler); @@ -509,7 +507,7 @@ public ScheduledExecutorService get() EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("fake")) ).times(2); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // two pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( ImmutableList.of( @@ -520,11 +518,11 @@ public ScheduledExecutorService get() // Capacity for current worker is 1 EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask, "http", "h1", "n1", INVALID_VERSION).toImmutable() // Invalid version node + workerWithTask(testTask), + workerWithTask(testTask, "http", "h1", "n1", INVALID_VERSION) // Invalid version node ) ).times(2); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()).times(1); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()).times(1); EasyMock.replay(runner); EasyMock.replay(autoScaler); @@ -584,7 +582,7 @@ public ScheduledExecutorService get() EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("fake")) ).times(1); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); // two pending tasks EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( ImmutableList.of( @@ -597,7 +595,7 @@ public ScheduledExecutorService get() Collections.emptyList() ).times(2); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()).times(1); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()).times(1); EasyMock.replay(runner); EasyMock.replay(autoScaler); @@ -645,7 +643,7 @@ public void testProvisionAlert() throws Exception new AutoScalingData(Collections.singletonList("fake")) ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( Collections.singletonList( NoopTask.create() @@ -653,12 +651,12 @@ public void testProvisionAlert() throws Exception ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(testTask, "http", "hi", "lo", MIN_VERSION, 1).toImmutable(), - new TestZkWorker(testTask, "http", "h1", "n1", INVALID_VERSION).toImmutable(), // Invalid version node - new TestZkWorker(testTask, "http", "h2", "n1", INVALID_VERSION).toImmutable() // Invalid version node + workerWithTask(testTask, "http", "hi", "lo", MIN_VERSION, 1), + workerWithTask(testTask, "http", "h1", "n1", INVALID_VERSION), // Invalid version node + workerWithTask(testTask, "http", "h2", "n1", INVALID_VERSION) // Invalid version node ) ).times(2); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.replay(runner); Provisioner provisioner = strategy.makeProvisioner(runner); @@ -699,9 +697,9 @@ public void testDoSuccessfulTerminate() new AutoScalingData(new ArrayList<>()) ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem( testTask.getId(), testTask.getType(), @@ -713,12 +711,12 @@ public void testDoSuccessfulTerminate() ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( ImmutableList.of( - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask), + workerWithTask(testTask) ) ).times(2); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) - .andReturn(Collections.singletonList(new TestZkWorker(testTask).getWorker())); + .andReturn(Collections.singletonList(workerWithTask(testTask).getWorker())); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); EasyMock.replay(runner); @@ -745,17 +743,17 @@ public void testSomethingTerminating() ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getWorkers()).andReturn( ImmutableList.of( - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask).toImmutable(), - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask), + workerWithTask(testTask), + workerWithTask(testTask) ) ).times(2); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()).times(2); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) - .andReturn(Collections.singletonList(new TestZkWorker(testTask).toImmutable().getWorker())); + .andReturn(Collections.singletonList(workerWithTask(testTask).getWorker())); EasyMock.replay(runner); Provisioner provisioner = strategy.makeProvisioner(runner); @@ -788,7 +786,7 @@ public void testNoActionNeeded() .andReturn(Collections.singletonList("ip")); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( Collections.singletonList( (Task) NoopTask.create() @@ -796,11 +794,11 @@ public void testNoActionNeeded() ).times(1); EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(NoopTask.create()).toImmutable(), - new TestZkWorker(NoopTask.create()).toImmutable() + workerWithTask(NoopTask.create()), + workerWithTask(NoopTask.create()) ) ).times(2); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) @@ -836,16 +834,16 @@ public void testMinCountIncrease() EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(Collections.singletonList("ip")); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( Collections.emptyList() ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(NoopTask.create(), "http", "h1", "i1", MIN_VERSION).toImmutable() + workerWithTask(NoopTask.create(), "http", "h1", "i1", MIN_VERSION) ) ).times(3); - EasyMock.expect(runner.getConfig()).andReturn(new RemoteTaskRunnerConfig()).times(2); + EasyMock.expect(runner.getConfig()).andReturn(new HttpRemoteTaskRunnerConfig()).times(2); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) @@ -894,7 +892,7 @@ public void testNullWorkerConfig() workerConfig.set(null); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTaskPayloads()).andReturn( Collections.singletonList( NoopTask.create() @@ -902,7 +900,7 @@ public void testNullWorkerConfig() ).times(1); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(null).toImmutable() + workerWithTask(null) ) ).times(2); EasyMock.replay(runner); @@ -919,56 +917,38 @@ public void testNullWorkerConfig() EasyMock.verify(runner); } - private static class TestZkWorker extends ZkWorker + private static ImmutableWorkerInfo workerWithTask(Task task) { - private final Task testTask; - - public TestZkWorker( - Task testTask - ) - { - this(testTask, "http", "host", "ip", MIN_VERSION); - } - - public TestZkWorker( - Task testTask, - String scheme, - String host, - String ip, - String version - ) - { - this(testTask, scheme, host, ip, version, 1); - } + return workerWithTask(task, "http", "host", "ip", MIN_VERSION, 1); + } - public TestZkWorker( - Task testTask, - String scheme, - String host, - String ip, - String version, - int capacity - ) - { - super(new Worker(scheme, host, ip, capacity, version, WorkerConfig.DEFAULT_CATEGORY), null, new DefaultObjectMapper()); - - this.testTask = testTask; - } + private static ImmutableWorkerInfo workerWithTask( + Task task, + String scheme, + String host, + String ip, + String version + ) + { + return workerWithTask(task, scheme, host, ip, version, 1); + } - @Override - public Map getRunningTasks() - { - if (testTask == null) { - return new HashMap<>(); - } - return ImmutableMap.of( - testTask.getId(), - TaskAnnouncement.create( - testTask, - TaskStatus.running(testTask.getId()), - TaskLocation.unknown() - ) - ); - } + private static ImmutableWorkerInfo workerWithTask( + Task task, + String scheme, + String host, + String ip, + String version, + int capacity + ) + { + Worker worker = new Worker(scheme, host, ip, capacity, version, WorkerConfig.DEFAULT_CATEGORY); + Map running = task == null + ? new HashMap<>() + : ImmutableMap.of( + task.getId(), + TaskAnnouncement.create(task, TaskStatus.running(task.getId()), TaskLocation.unknown()) + ); + return ImmutableWorkerInfo.fromWorkerAnnouncements(worker, running, DateTimes.EPOCH, null); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/SimpleProvisioningStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/SimpleProvisioningStrategyTest.java index 0799b3ba6e43..05b5f7ff1657 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/SimpleProvisioningStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/autoscaling/SimpleProvisioningStrategyTest.java @@ -27,15 +27,14 @@ import org.apache.druid.indexing.common.TestTasks; import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.overlord.RemoteTaskRunner; +import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; import org.apache.druid.indexing.overlord.RemoteTaskRunnerWorkItem; -import org.apache.druid.indexing.overlord.ZkWorker; +import org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner; import org.apache.druid.indexing.overlord.setup.DefaultWorkerBehaviorConfig; import org.apache.druid.indexing.overlord.setup.WorkerBehaviorConfig; import org.apache.druid.indexing.worker.TaskAnnouncement; import org.apache.druid.indexing.worker.Worker; import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.emitter.EmittingLogger; @@ -51,6 +50,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -120,16 +120,16 @@ public void testSuccessfulProvision() EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("aNode")) ); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ); EasyMock.replay(runner); @@ -158,16 +158,16 @@ public void testSomethingProvisioning() EasyMock.expect(autoScaler.provision()).andReturn( new AutoScalingData(Collections.singletonList("fake")) ); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ).times(2); EasyMock.replay(runner); @@ -217,16 +217,16 @@ public void testProvisionAlert() throws Exception new AutoScalingData(Collections.singletonList("fake")) ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ).times(2); EasyMock.replay(runner); @@ -270,20 +270,20 @@ public void testDoSuccessfulTerminate() new AutoScalingData(new ArrayList<>()) ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ).times(2); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) - .andReturn(Collections.singletonList(new TestZkWorker(testTask).getWorker())); + .andReturn(Collections.singletonList(workerWithTask(testTask).getWorker())); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); EasyMock.replay(runner); @@ -311,21 +311,21 @@ public void testSomethingTerminating() ); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(testTask).toImmutable() + workerWithTask(testTask) ) ).times(2); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()).times(2); EasyMock.expect(runner.markWorkersLazy(EasyMock.anyObject(), EasyMock.anyInt())) - .andReturn(Collections.singletonList(new TestZkWorker(testTask).getWorker())); + .andReturn(Collections.singletonList(workerWithTask(testTask).getWorker())); EasyMock.replay(runner); Provisioner provisioner = strategy.makeProvisioner(runner); @@ -359,17 +359,17 @@ public void testNoActionNeeded() .andReturn(Collections.singletonList("ip")); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Arrays.asList( - new TestZkWorker(NoopTask.create()).toImmutable(), - new TestZkWorker(NoopTask.create()).toImmutable() + workerWithTask(NoopTask.create()), + workerWithTask(NoopTask.create()) ) ).times(2); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); @@ -407,13 +407,13 @@ public void testMinCountIncrease() EasyMock.expect(autoScaler.ipToIdLookup(EasyMock.anyObject())) .andReturn(Collections.singletonList("ip")); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( Collections.emptyList() ).times(3); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(NoopTask.create(), "http", "h1", "i1", "0").toImmutable() + workerWithTask(NoopTask.create(), "http", "h1", "i1", "0") ) ).times(3); EasyMock.expect(runner.getLazyWorkers()).andReturn(new ArrayList<>()); @@ -463,16 +463,16 @@ public void testNullWorkerConfig() workerConfig.set(null); EasyMock.replay(autoScaler); - RemoteTaskRunner runner = EasyMock.createMock(RemoteTaskRunner.class); + HttpRemoteTaskRunner runner = EasyMock.createMock(HttpRemoteTaskRunner.class); EasyMock.expect(runner.getPendingTasks()).andReturn( - Collections.singletonList( + (Collection) Collections.singletonList( new RemoteTaskRunnerWorkItem(testTask.getId(), testTask.getType(), null, null, testTask.getDataSource()) .withQueueInsertionTime(DateTimes.nowUtc()) ) ).times(2); EasyMock.expect(runner.getWorkers()).andReturn( Collections.singletonList( - new TestZkWorker(null).toImmutable() + workerWithTask(null) ) ).times(1); EasyMock.replay(runner); @@ -489,44 +489,26 @@ public void testNullWorkerConfig() EasyMock.verify(runner); } - private static class TestZkWorker extends ZkWorker + private static ImmutableWorkerInfo workerWithTask(Task task) + { + return workerWithTask(task, "http", "host", "ip", "0"); + } + + private static ImmutableWorkerInfo workerWithTask( + Task task, + String scheme, + String host, + String ip, + String version + ) { - private final Task testTask; - - public TestZkWorker( - Task testTask - ) - { - this(testTask, "http", "host", "ip", "0"); - } - - public TestZkWorker( - Task testTask, - String scheme, - String host, - String ip, - String version - ) - { - super(new Worker(scheme, host, ip, 3, version, WorkerConfig.DEFAULT_CATEGORY), null, new DefaultObjectMapper()); - - this.testTask = testTask; - } - - @Override - public Map getRunningTasks() - { - if (testTask == null) { - return new HashMap<>(); - } - return ImmutableMap.of( - testTask.getId(), - TaskAnnouncement.create( - testTask, - TaskStatus.running(testTask.getId()), - TaskLocation.unknown() - ) - ); - } + Worker worker = new Worker(scheme, host, ip, 3, version, WorkerConfig.DEFAULT_CATEGORY); + Map running = task == null + ? new HashMap<>() + : ImmutableMap.of( + task.getId(), + TaskAnnouncement.create(task, TaskStatus.running(task.getId()), TaskLocation.unknown()) + ); + return ImmutableWorkerInfo.fromWorkerAnnouncements(worker, running, DateTimes.EPOCH, null); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfigTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfigTest.java deleted file mode 100644 index 1cc72346a0a2..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/config/RemoteTaskRunnerConfigTest.java +++ /dev/null @@ -1,880 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.overlord.config; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableList; -import com.google.inject.Injector; -import com.google.inject.ProvisionException; -import org.apache.druid.guice.GuiceInjectors; -import org.apache.druid.guice.IndexingServiceModuleHelper; -import org.apache.druid.guice.JsonConfigProvider; -import org.apache.druid.guice.JsonConfigurator; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.joda.time.Period; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; - -public class RemoteTaskRunnerConfigTest -{ - @Rule - public ExpectedException expectedException = ExpectedException.none(); - - private static final ObjectMapper MAPPER = new DefaultObjectMapper(); - private static final Period DEFAULT_TIMEOUT = Period.ZERO; - private static final String DEFAULT_VERSION = ""; - private static final long DEFAULT_MAX_ZNODE = 10 * 1024; - private static final int DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS = 5; - private static final int DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST = 5; - private static final Period DEFAULT_TASK_BACKOFF = new Period("PT10M"); - private static final Period DEFAULT_BLACKLIST_CLEANUP_PERIOD = new Period("PT5M"); - - @Test - public void testIsJsonConfiguratable() - { - JsonConfigurator.verifyClazzIsConfigurable(MAPPER, RemoteTaskRunnerConfig.class, null); - } - - @Test - public void testGetTaskAssignmentTimeout() throws Exception - { - final Period timeout = Period.hours(1); - Assert.assertEquals( - timeout, - reflect(generateRemoteTaskRunnerConfig( - timeout, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getTaskAssignmentTimeout() - ); - } - - @Test - public void testGetPendingTasksRunnerNumThreads() throws Exception - { - final int pendingTasksRunnerNumThreads = 20; - Assert.assertEquals( - pendingTasksRunnerNumThreads, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - pendingTasksRunnerNumThreads, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getPendingTasksRunnerNumThreads() - ); - } - - @Test - public void testGetMinWorkerVersion() throws Exception - { - final String version = "some version"; - Assert.assertEquals( - version, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - version, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getMinWorkerVersion() - ); - } - - @Test - public void testGetMaxZnodeBytes() throws Exception - { - final long max = 20 * 1024; - Assert.assertEquals( - max, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - max, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getMaxZnodeBytes() - ); - } - - @Test - public void testGetTaskShutdownLinkTimeout() throws Exception - { - final Period timeout = Period.hours(1); - Assert.assertEquals( - timeout, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - timeout, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getTaskShutdownLinkTimeout() - ); - } - - @Test - public void testGetTaskCleanupTimeout() throws Exception - { - final Period timeout = Period.hours(1); - Assert.assertEquals( - timeout, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - timeout, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getTaskCleanupTimeout() - ); - } - - @Test - public void testGetMaxRetriesBeforeBlacklist() throws Exception - { - final int maxRetriesBeforeBlacklist = 2; - Assert.assertEquals( - maxRetriesBeforeBlacklist, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - maxRetriesBeforeBlacklist, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getMaxRetriesBeforeBlacklist() - ); - } - - @Test - public void testGetWorkerBlackListBackoffTime() throws Exception - { - final Period taskBlackListBackoffTime = new Period("PT1M"); - Assert.assertEquals( - taskBlackListBackoffTime, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - taskBlackListBackoffTime, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).getWorkerBlackListBackoffTime() - ); - } - - @Test - public void testGetTaskBlackListCleanupPeriod() throws Exception - { - final Period taskBlackListCleanupPeriod = Period.years(100); - Assert.assertEquals( - taskBlackListCleanupPeriod, - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - taskBlackListCleanupPeriod - )).getWorkerBlackListCleanupPeriod() - ); - } - - @Test - public void testEquals() throws Exception - { - Assert.assertEquals( - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )), - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )) - ); - final Period timeout = Period.years(999); - final String version = "someVersion"; - final long max = 20 * 1024; - final int pendingTasksRunnerNumThreads = 20; - final int maxRetriesBeforeBlacklist = 1; - final Period taskBlackListBackoffTime = new Period("PT1M"); - final Period taskBlackListCleanupPeriod = Period.years(10); - Assert.assertEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - DEFAULT_TIMEOUT, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - DEFAULT_VERSION, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - DEFAULT_MAX_ZNODE, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - DEFAULT_TIMEOUT, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )) - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - DEFAULT_TASK_BACKOFF, - taskBlackListCleanupPeriod - )) - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )) - ); - } - - @Test - public void testHashCode() throws Exception - { - Assert.assertEquals( - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - DEFAULT_TIMEOUT, - DEFAULT_VERSION, - DEFAULT_MAX_ZNODE, - DEFAULT_TIMEOUT, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - DEFAULT_TASK_BACKOFF, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).hashCode() - ); - final Period timeout = Period.years(999); - final String version = "someVersion"; - final long max = 20 * 1024; - final int pendingTasksRunnerNumThreads = 20; - final int maxRetriesBeforeBlacklist = 80; - final Period taskBlackListBackoffTime = new Period("PT1M"); - final Period taskBlackListCleanupPeriod = Period.years(10); - Assert.assertEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - DEFAULT_TIMEOUT, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - DEFAULT_TIMEOUT, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - DEFAULT_VERSION, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - DEFAULT_MAX_ZNODE, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - DEFAULT_TIMEOUT, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - DEFAULT_PENDING_TASKS_RUNNER_NUM_THREADS, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - DEFAULT_MAX_RETRIES_BEFORE_BLACKLIST, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode() - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - DEFAULT_TASK_BACKOFF, - taskBlackListCleanupPeriod - )).hashCode() - ); - - Assert.assertNotEquals( - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - taskBlackListCleanupPeriod - )).hashCode(), - reflect(generateRemoteTaskRunnerConfig( - timeout, - timeout, - version, - max, - timeout, - pendingTasksRunnerNumThreads, - maxRetriesBeforeBlacklist, - taskBlackListBackoffTime, - DEFAULT_BLACKLIST_CLEANUP_PERIOD - )).hashCode() - ); - } - - @Test - public void testMaxZnodeBytesLowerThanExpected() - { - final Injector injector = GuiceInjectors.makeStartupInjectorWithModules(ImmutableList.of( - binder -> IndexingServiceModuleHelper.configureTaskRunnerConfigs(binder)) - ); - - this.expectedException.expect(ProvisionException.class); - this.expectedException.expectMessage("maxZnodeBytes must be in the range of [10KiB, 2GiB)"); - - Properties props = new Properties(); - props.put(IndexingServiceModuleHelper.INDEXER_RUNNER_PROPERTY_PREFIX + ".maxZnodeBytes", "9KiB"); - - JsonConfigProvider configProvider = JsonConfigProvider.of( - IndexingServiceModuleHelper.INDEXER_RUNNER_PROPERTY_PREFIX, - RemoteTaskRunnerConfig.class - ); - configProvider.inject(props, injector.getBinding(JsonConfigurator.class).getProvider().get()); - configProvider.get(); - } - - @Test - public void testMaxZnodeBytesGreaterThanExpected() - { - final Injector injector = GuiceInjectors.makeStartupInjectorWithModules(ImmutableList.of( - binder -> IndexingServiceModuleHelper.configureTaskRunnerConfigs(binder)) - ); - - this.expectedException.expect(ProvisionException.class); - this.expectedException.expectMessage("maxZnodeBytes must be in the range of [10KiB, 2GiB)"); - - Properties props = new Properties(); - props.put(IndexingServiceModuleHelper.INDEXER_RUNNER_PROPERTY_PREFIX + ".maxZnodeBytes", "2GiB"); - - JsonConfigProvider configProvider = JsonConfigProvider.of( - IndexingServiceModuleHelper.INDEXER_RUNNER_PROPERTY_PREFIX, - RemoteTaskRunnerConfig.class - ); - configProvider.inject(props, injector.getBinding(JsonConfigurator.class).getProvider().get()); - configProvider.get(); - } - - - private RemoteTaskRunnerConfig reflect(RemoteTaskRunnerConfig config) throws IOException - { - return MAPPER.readValue(MAPPER.writeValueAsString(config), RemoteTaskRunnerConfig.class); - } - - private RemoteTaskRunnerConfig generateRemoteTaskRunnerConfig( - Period taskAssignmentTimeout, - Period taskCleanupTimeout, - String minWorkerVersion, - long maxZnodeBytes, - Period taskShutdownLinkTimeout, - int pendingTasksRunnerNumThreads, - int maxRetriesBeforeBlacklist, - Period taskBlackListBackoffTime, - Period taskBlackListCleanupPeriod - ) - { - final Map objectMap = new HashMap<>(); - objectMap.put("taskAssignmentTimeout", taskAssignmentTimeout); - objectMap.put("taskCleanupTimeout", taskCleanupTimeout); - objectMap.put("minWorkerVersion", minWorkerVersion); - objectMap.put("maxZnodeBytes", maxZnodeBytes); - objectMap.put("taskShutdownLinkTimeout", taskShutdownLinkTimeout); - objectMap.put("pendingTasksRunnerNumThreads", pendingTasksRunnerNumThreads); - objectMap.put("maxRetriesBeforeBlacklist", maxRetriesBeforeBlacklist); - objectMap.put("workerBlackListBackoffTime", taskBlackListBackoffTime); - objectMap.put("workerBlackListCleanupPeriod", taskBlackListCleanupPeriod); - return MAPPER.convertValue(objectMap, RemoteTaskRunnerConfig.class); - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerTest.java index 8e4d1d3aacd7..578bb7d9491d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/hrtr/HttpRemoteTaskRunnerTest.java @@ -28,7 +28,6 @@ import com.google.common.collect.Iterables; import com.google.common.io.ByteStreams; import com.google.common.util.concurrent.Futures; -import org.apache.curator.framework.CuratorFramework; import org.apache.druid.common.guava.DSuppliers; import org.apache.druid.concurrent.LifecycleLock; import org.apache.druid.discovery.DiscoveryDruidNode; @@ -67,8 +66,6 @@ import org.apache.druid.segment.TestHelper; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.ChangeRequestHttpSyncer; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; import org.apache.druid.server.metrics.NoopServiceEmitter; import org.easymock.Capture; import org.easymock.EasyMock; @@ -296,8 +293,6 @@ public int getPendingTasksRunnerNumThreads() provisioningStrategy, druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -365,8 +360,6 @@ public int getPendingTasksRunnerNumThreads() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -470,8 +463,6 @@ public int getPendingTasksRunnerNumThreads() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, taskStorageMock, - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -613,8 +604,6 @@ public int getPendingTasksRunnerNumThreads() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -789,8 +778,6 @@ public Period getTaskCleanupTimeout() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -987,8 +974,6 @@ public int getPendingTasksRunnerNumThreads() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -1500,8 +1485,6 @@ public Period getTaskAssignmentTimeout() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -1613,8 +1596,6 @@ public Period getTaskAssignmentTimeout() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -1774,8 +1755,6 @@ public void testSyncMonitoring_finiteIteration() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { @@ -1820,8 +1799,6 @@ public void testGetMaximumCapacity_noWorkerConfig() new TestProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ); Assert.assertEquals(-1, taskRunner.getMaximumCapacityWithAutoscale()); @@ -1844,8 +1821,6 @@ public void testGetMaximumCapacity_noAutoScaler() new TestProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ); Assert.assertEquals(-1, taskRunner.getMaximumCapacityWithAutoscale()); @@ -1868,8 +1843,6 @@ public void testGetMaximumCapacity_withAutoScaler() new TestProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ); // Default autoscaler has max workers of 0 @@ -1902,8 +1875,6 @@ public int getPendingTasksRunnerNumThreads() new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, taskStorage, - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ); @@ -2325,8 +2296,6 @@ public int getPendingTasksRunnerNumThreads() provisioningStrategy, druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), - EasyMock.createNiceMock(CuratorFramework.class), - new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null), new NoopServiceEmitter() ) { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithAffinityWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithAffinityWorkerSelectStrategyTest.java index 1e81be9cc978..3450aa8d4807 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithAffinityWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithAffinityWorkerSelectStrategyTest.java @@ -24,7 +24,7 @@ import com.google.common.collect.ImmutableSet; import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.indexing.worker.Worker; import org.apache.druid.indexing.worker.config.WorkerConfig; import org.apache.druid.java.util.common.DateTimes; @@ -49,7 +49,7 @@ public void testFindWorkerForTask() NoopTask noopTask = NoopTask.forDatasource("foo"); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -94,7 +94,7 @@ public void testFindWorkerForTaskWithNulls() ); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -125,7 +125,7 @@ public void testIsolation() ); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost", new ImmutableWorkerInfo( @@ -170,7 +170,7 @@ public void testFindWorkerForTaskWithGlobalLimits() NoopTask noopTask = NoopTask.forDatasource("foo"); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -194,7 +194,7 @@ public void testFindWorkerForTaskWithGlobalLimits() Assert.assertNotNull(worker); ImmutableWorkerInfo worker1 = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -237,7 +237,7 @@ public void testFindWorkerForTaskWithGlobalRatios() NoopTask noopTask = NoopTask.forDatasource("foo"); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -261,7 +261,7 @@ public void testFindWorkerForTaskWithGlobalRatios() Assert.assertNotNull(worker); ImmutableWorkerInfo worker1 = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithCategorySpecWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithCategorySpecWorkerSelectStrategyTest.java index 3c22a6c4c6d1..442a64ae068d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithCategorySpecWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWithCategorySpecWorkerSelectStrategyTest.java @@ -26,7 +26,7 @@ import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskIOConfig; import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTuningConfig; import org.apache.druid.indexing.seekablestream.TestSeekableStreamIndexTask; @@ -212,7 +212,7 @@ public void testSupervisorIdCategoryAffinity() new EqualDistributionWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -241,7 +241,7 @@ public void testSupervisorIdCategoryAffinityFallbackToDatasource() new EqualDistributionWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -271,7 +271,7 @@ public void testSupervisorIdCategoryAffinityFallbackToDefault() new EqualDistributionWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -286,7 +286,7 @@ private ImmutableWorkerInfo selectWorker(WorkerCategorySpec workerCategorySpec) new EqualDistributionWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, NoopTask.forDatasource("ds1") ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWorkerSelectStrategyTest.java index 05b1ab36f497..2568217042b2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/EqualDistributionWorkerSelectStrategyTest.java @@ -23,7 +23,7 @@ import com.google.common.collect.ImmutableSet; import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.indexing.worker.Worker; import org.apache.druid.indexing.worker.config.WorkerConfig; import org.apache.druid.java.util.common.DateTimes; @@ -72,7 +72,7 @@ public void testFindWorkerForTask() final EqualDistributionWorkerSelectStrategy strategy = new EqualDistributionWorkerSelectStrategy(null, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -100,7 +100,7 @@ public void testFindWorkerForTaskWhenSameCurrCapacityUsed() final EqualDistributionWorkerSelectStrategy strategy = new EqualDistributionWorkerSelectStrategy(null, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -128,7 +128,7 @@ public void testOneDisableWorkerDifferentUsedCapacity() final EqualDistributionWorkerSelectStrategy strategy = new EqualDistributionWorkerSelectStrategy(null, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -156,7 +156,7 @@ public void testOneDisableWorkerSameUsedCapacity() final EqualDistributionWorkerSelectStrategy strategy = new EqualDistributionWorkerSelectStrategy(null, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -193,7 +193,7 @@ public void testWeakAffinity() ); ImmutableWorkerInfo workerFoo = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("foo") ); @@ -201,14 +201,14 @@ public void testWeakAffinity() // With weak affinity, bar (which has no affinity workers available) can use a non-affinity worker. ImmutableWorkerInfo workerBar = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("bar") ); Assert.assertEquals("localhost0", workerBar.getWorker().getHost()); ImmutableWorkerInfo workerBaz = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("baz") ); @@ -230,7 +230,7 @@ public void testStrongAffinity() ); ImmutableWorkerInfo workerFoo = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("foo") ); @@ -238,14 +238,14 @@ public void testStrongAffinity() // With strong affinity, no workers can be found for bar. ImmutableWorkerInfo workerBar = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("bar") ); Assert.assertNull(workerBar); ImmutableWorkerInfo workerBaz = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_AFFINITY_TESTS, createDummyTask("baz") ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithAffinityWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithAffinityWorkerSelectStrategyTest.java index 207c6e43dda3..0455e394937b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithAffinityWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithAffinityWorkerSelectStrategyTest.java @@ -23,7 +23,7 @@ import com.google.common.collect.ImmutableSet; import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.indexing.worker.Worker; import org.apache.druid.indexing.worker.config.WorkerConfig; import org.apache.druid.java.util.common.DateTimes; @@ -45,7 +45,7 @@ public void testFindWorkerForTask() ); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -76,7 +76,7 @@ public void testFindWorkerForTaskWithNulls() ); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "lhost", new ImmutableWorkerInfo( @@ -107,7 +107,7 @@ public void testIsolation() ); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost", new ImmutableWorkerInfo( @@ -137,7 +137,7 @@ public void testFindWorkerForTaskWithGlobalLimits() NoopTask noopTask = NoopTask.forDatasource("foo"); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -161,7 +161,7 @@ public void testFindWorkerForTaskWithGlobalLimits() Assert.assertNotNull(worker); ImmutableWorkerInfo worker1 = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -204,7 +204,7 @@ public void testFindWorkerForTaskWithGlobalRatios() NoopTask noopTask = NoopTask.forDatasource("foo"); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( @@ -228,7 +228,7 @@ public void testFindWorkerForTaskWithGlobalRatios() Assert.assertNotNull(worker); ImmutableWorkerInfo worker1 = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), ImmutableMap.of( "localhost0", new ImmutableWorkerInfo( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithCategorySpecWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithCategorySpecWorkerSelectStrategyTest.java index 27fde352caba..a63dde25b25c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithCategorySpecWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/FillCapacityWithCategorySpecWorkerSelectStrategyTest.java @@ -26,7 +26,7 @@ import org.apache.druid.indexing.common.task.NoopTask; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskIOConfig; import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTuningConfig; import org.apache.druid.indexing.seekablestream.TestSeekableStreamIndexTask; @@ -214,7 +214,7 @@ public void testSupervisorIdCategoryAffinity() new FillCapacityWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -245,7 +245,7 @@ public void testSupervisorIdCategoryAffinityFallbackToDatasource() new FillCapacityWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -276,7 +276,7 @@ public void testSupervisorIdCategoryAffinityFallbackToDefault() new FillCapacityWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, taskWithSupervisor ); @@ -291,7 +291,7 @@ private ImmutableWorkerInfo selectWorker(WorkerCategorySpec workerCategorySpec) new FillCapacityWithCategorySpecWorkerSelectStrategy(workerCategorySpec, null); ImmutableWorkerInfo worker = strategy.findWorkerForTask( - new RemoteTaskRunnerConfig(), + new HttpRemoteTaskRunnerConfig(), WORKERS_FOR_TIER_TESTS, NoopTask.forDatasource("ds1") ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java index 222c26ae5bd3..fb01549951a5 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java @@ -25,12 +25,11 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.ImmutableWorkerInfo; -import org.apache.druid.indexing.overlord.TestRemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.js.JavaScriptConfig; import org.easymock.EasyMock; import org.hamcrest.CoreMatchers; -import org.joda.time.Period; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -123,7 +122,7 @@ public void testFindWorkerForTask() ); ImmutableWorkerInfo workerForBatchTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("index_parallel") ); @@ -131,7 +130,7 @@ public void testFindWorkerForTask() Assert.assertEquals(worker1, workerForBatchTask); ImmutableWorkerInfo workerForOtherTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("other_type") ); @@ -147,7 +146,7 @@ public void testIsolationOfBatchWorker() "10.0.0.2", createMockWorker(1, true, true) ); ImmutableWorkerInfo workerForOtherTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("other_type") ); @@ -162,14 +161,14 @@ public void testNoValidWorker() "10.0.0.4", createMockWorker(1, true, false) ); ImmutableWorkerInfo workerForBatchTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("index_parallel") ); Assert.assertNull(workerForBatchTask); ImmutableWorkerInfo workerForOtherTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("otherTask") ); @@ -185,14 +184,14 @@ public void testNoWorkerCanRunTask() "10.0.0.4", createMockWorker(1, false, true) ); ImmutableWorkerInfo workerForBatchTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("index_parallel") ); Assert.assertNull(workerForBatchTask); ImmutableWorkerInfo workerForOtherTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("otherTask") ); @@ -209,7 +208,7 @@ public void testFillWorkerCapacity() "10.0.0.2", createMockWorker(5, true, true) ); ImmutableWorkerInfo workerForBatchTask = STRATEGY.findWorkerForTask( - new TestRemoteTaskRunnerConfig(new Period("PT1S")), + new HttpRemoteTaskRunnerConfig(), workerMap, createMockTask("index_parallel") ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java index 75cfaf68dc22..0f072d384cc9 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskManagerTest.java @@ -192,18 +192,7 @@ private WorkerTaskManager createWorkerTaskManager(File baseDir, WorkerConfig wor taskConfig, workerConfig, overlordClient - ) - { - @Override - protected void taskStarted(String taskId) - { - } - - @Override - protected void taskAnnouncementChanged(TaskAnnouncement announcement) - { - } - }; + ); } @Before diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java deleted file mode 100644 index 7663d38ef3e9..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/WorkerTaskMonitorTest.java +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.indexing.worker; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.jsontype.NamedType; -import com.google.common.base.Joiner; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.CuratorFrameworkFactory; -import org.apache.curator.retry.ExponentialBackoffRetry; -import org.apache.curator.test.TestingCluster; -import org.apache.druid.client.coordinator.NoopCoordinatorClient; -import org.apache.druid.curator.PotentiallyGzippedCompressionProvider; -import org.apache.druid.curator.announcement.NodeAnnouncer; -import org.apache.druid.indexer.TaskState; -import org.apache.druid.indexing.common.IndexingServiceCondition; -import org.apache.druid.indexing.common.SegmentCacheManagerFactory; -import org.apache.druid.indexing.common.TaskToolboxFactory; -import org.apache.druid.indexing.common.TestIndexTask; -import org.apache.druid.indexing.common.TestTasks; -import org.apache.druid.indexing.common.TestUtils; -import org.apache.druid.indexing.common.actions.TaskActionClient; -import org.apache.druid.indexing.common.actions.TaskActionClientFactory; -import org.apache.druid.indexing.common.config.TaskConfig; -import org.apache.druid.indexing.common.config.TaskConfigBuilder; -import org.apache.druid.indexing.common.task.NoopTestTaskReportFileWriter; -import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.indexing.common.task.TestAppenderatorsManager; -import org.apache.druid.indexing.overlord.SingleTaskBackgroundRunner; -import org.apache.druid.indexing.overlord.TestRemoteTaskRunnerConfig; -import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.query.policy.NoopPolicyEnforcer; -import org.apache.druid.rpc.indexing.NoopOverlordClient; -import org.apache.druid.rpc.indexing.OverlordClient; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.IndexMergerV9Factory; -import org.apache.druid.segment.TestIndex; -import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; -import org.apache.druid.segment.join.NoopJoinableFactory; -import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.NoopChatHandlerProvider; -import org.apache.druid.server.DruidNode; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ServerConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; -import org.apache.druid.server.metrics.NoopServiceEmitter; -import org.apache.druid.server.security.AuthTestUtils; -import org.apache.druid.utils.JvmUtils; -import org.easymock.EasyMock; -import org.joda.time.Period; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.util.List; - -/** - * - */ -public class WorkerTaskMonitorTest -{ - private static final Joiner JOINER = Joiner.on("/"); - private static final String BASE_PATH = "/test/druid"; - private static final String TASKS_PATH = StringUtils.format("%s/indexer/tasks/worker", BASE_PATH); - private static final String STATUS_PATH = StringUtils.format("%s/indexer/status/worker", BASE_PATH); - private static final DruidNode DUMMY_NODE = new DruidNode("dummy", "dummy", false, 9000, null, true, false); - - private TestingCluster testingCluster; - private CuratorFramework cf; - private WorkerCuratorCoordinator workerCuratorCoordinator; - private WorkerTaskMonitor workerTaskMonitor; - - private Task task; - - private Worker worker; - private final TestUtils testUtils; - private ObjectMapper jsonMapper; - private IndexMergerV9Factory indexMergerV9Factory; - private IndexIO indexIO; - - public WorkerTaskMonitorTest() - { - testUtils = new TestUtils(); - jsonMapper = testUtils.getTestObjectMapper(); - indexMergerV9Factory = testUtils.getIndexMergerV9Factory(); - indexIO = testUtils.getTestIndexIO(); - } - - @Before - public void setUp() throws Exception - { - testingCluster = new TestingCluster(1); - testingCluster.start(); - - cf = CuratorFrameworkFactory.builder() - .connectString(testingCluster.getConnectString()) - .retryPolicy(new ExponentialBackoffRetry(1, 10)) - .compressionProvider(new PotentiallyGzippedCompressionProvider(false)) - .build(); - cf.start(); - cf.blockUntilConnected(); - cf.create().creatingParentsIfNeeded().forPath(BASE_PATH); - - worker = new Worker( - "http", - "worker", - "localhost", - 3, - "0", - WorkerConfig.DEFAULT_CATEGORY - ); - - workerCuratorCoordinator = new WorkerCuratorCoordinator( - jsonMapper, - new IndexerZkConfig( - new ZkPathsConfig() - { - @Override - public String getBase() - { - return BASE_PATH; - } - }, null, null, null, null - ), - new TestRemoteTaskRunnerConfig(new Period("PT1S")), - cf, - new NodeAnnouncer(cf, Execs.directExecutor()), - worker - ); - workerCuratorCoordinator.start(); - - - // Start a task monitor - workerTaskMonitor = createTaskMonitor(); - TestTasks.registerSubtypes(jsonMapper); - jsonMapper.registerSubtypes(new NamedType(TestIndexTask.class, "test_index")); - workerTaskMonitor.start(); - - task = TestTasks.immediateSuccess("test"); - } - - private WorkerTaskMonitor createTaskMonitor() - { - final TaskConfig taskConfig = new TaskConfigBuilder() - .setBaseDir(FileUtils.createTempDir().toString()) - .build(); - - TaskActionClientFactory taskActionClientFactory = EasyMock.createNiceMock(TaskActionClientFactory.class); - TaskActionClient taskActionClient = EasyMock.createNiceMock(TaskActionClient.class); - EasyMock.expect(taskActionClientFactory.create(EasyMock.anyObject())).andReturn(taskActionClient).anyTimes(); - SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class); - EasyMock.replay(taskActionClientFactory, taskActionClient, notifierFactory); - return new WorkerTaskMonitor( - jsonMapper, - new SingleTaskBackgroundRunner( - new TaskToolboxFactory( - null, - taskConfig, - null, - taskActionClientFactory, - null, - NoopPolicyEnforcer.instance(), - null, - null, - null, - null, - null, - notifierFactory, - null, - null, - null, - NoopJoinableFactory.INSTANCE, - null, - new SegmentCacheManagerFactory(TestIndex.INDEX_IO, jsonMapper), - jsonMapper, - indexIO, - null, - null, - null, - indexMergerV9Factory, - null, - null, - null, - null, - null, - new NoopTestTaskReportFileWriter(), - null, - AuthTestUtils.TEST_AUTHORIZER_MAPPER, - new NoopChatHandlerProvider(), - testUtils.getRowIngestionMetersFactory(), - new TestAppenderatorsManager(), - new NoopOverlordClient(), - new NoopCoordinatorClient(), - null, - null, - null, - "1", - CentralizedDatasourceSchemaConfig.create(), - JvmUtils.getRuntimeInfo() - ), - taskConfig, - new NoopServiceEmitter(), - DUMMY_NODE, - new ServerConfig() - ), - taskConfig, - new WorkerConfig(), - cf, - workerCuratorCoordinator, - EasyMock.createNiceMock(OverlordClient.class) - ); - } - - @After - public void tearDown() throws Exception - { - workerCuratorCoordinator.stop(); - workerTaskMonitor.stop(); - cf.close(); - testingCluster.stop(); - } - - @Test(timeout = 60_000L) - public void testRunTask() throws Exception - { - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - return cf.checkExists().forPath(JOINER.join(TASKS_PATH, task.getId())) == null; - } - catch (Exception e) { - return false; - } - } - } - ) - ); - - cf.create() - .creatingParentsIfNeeded() - .forPath(JOINER.join(TASKS_PATH, task.getId()), jsonMapper.writeValueAsBytes(task)); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - final byte[] bytes = cf.getData().forPath(JOINER.join(STATUS_PATH, task.getId())); - final TaskAnnouncement announcement = jsonMapper.readValue( - bytes, - TaskAnnouncement.class - ); - return announcement.getTaskStatus().isComplete(); - } - catch (Exception e) { - return false; - } - } - } - ) - ); - - TaskAnnouncement taskAnnouncement = jsonMapper.readValue( - cf.getData().forPath(JOINER.join(STATUS_PATH, task.getId())), TaskAnnouncement.class - ); - - Assert.assertEquals(task.getId(), taskAnnouncement.getTaskStatus().getId()); - Assert.assertEquals(TaskState.SUCCESS, taskAnnouncement.getTaskStatus().getStatusCode()); - } - - @Test(timeout = 60_000L) - public void testGetAnnouncements() throws Exception - { - cf.create() - .creatingParentsIfNeeded() - .forPath(JOINER.join(TASKS_PATH, task.getId()), jsonMapper.writeValueAsBytes(task)); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - final byte[] bytes = cf.getData().forPath(JOINER.join(STATUS_PATH, task.getId())); - final TaskAnnouncement announcement = jsonMapper.readValue( - bytes, - TaskAnnouncement.class - ); - return announcement.getTaskStatus().isComplete(); - } - catch (Exception e) { - return false; - } - } - } - ) - ); - - List announcements = workerCuratorCoordinator.getAnnouncements(); - Assert.assertEquals(1, announcements.size()); - Assert.assertEquals(task.getId(), announcements.get(0).getTaskStatus().getId()); - Assert.assertEquals(TaskState.SUCCESS, announcements.get(0).getTaskStatus().getStatusCode()); - Assert.assertEquals(DUMMY_NODE.getHost(), announcements.get(0).getTaskLocation().getHost()); - Assert.assertEquals(DUMMY_NODE.getPlaintextPort(), announcements.get(0).getTaskLocation().getPort()); - } - - @Test(timeout = 60_000L) - public void testRestartCleansOldStatus() throws Exception - { - task = TestTasks.unending("test"); - - cf.create() - .creatingParentsIfNeeded() - .forPath(JOINER.join(TASKS_PATH, task.getId()), jsonMapper.writeValueAsBytes(task)); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - return cf.checkExists().forPath(JOINER.join(STATUS_PATH, task.getId())) != null; - } - catch (Exception e) { - return false; - } - } - } - ) - ); - // simulate node restart - workerTaskMonitor.stop(); - workerTaskMonitor = createTaskMonitor(); - workerTaskMonitor.start(); - List announcements = workerCuratorCoordinator.getAnnouncements(); - Assert.assertEquals(1, announcements.size()); - Assert.assertEquals(task.getId(), announcements.get(0).getTaskStatus().getId()); - Assert.assertEquals(TaskState.FAILED, announcements.get(0).getTaskStatus().getStatusCode()); - Assert.assertEquals( - "Canceled as unknown task. See middleManager or indexer logs for more details.", - announcements.get(0).getTaskStatus().getErrorMsg() - ); - } - - @Test(timeout = 60_000L) - public void testStatusAnnouncementsArePersistent() throws Exception - { - cf.create() - .creatingParentsIfNeeded() - .forPath(JOINER.join(TASKS_PATH, task.getId()), jsonMapper.writeValueAsBytes(task)); - - Assert.assertTrue( - TestUtils.conditionValid( - new IndexingServiceCondition() - { - @Override - public boolean isValid() - { - try { - return cf.checkExists().forPath(JOINER.join(STATUS_PATH, task.getId())) != null; - } - catch (Exception e) { - return false; - } - } - } - ) - ); - // ephemeral owner is 0 is created node is PERSISTENT - Assert.assertEquals(0, cf.checkExists().forPath(JOINER.join(STATUS_PATH, task.getId())).getEphemeralOwner()); - - } -} diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/worker/http/WorkerResourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/worker/http/WorkerResourceTest.java index 096688e5eb35..3421f4645b9f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/worker/http/WorkerResourceTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/worker/http/WorkerResourceTest.java @@ -19,26 +19,11 @@ package org.apache.druid.indexing.worker.http; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.CuratorFrameworkFactory; -import org.apache.curator.retry.ExponentialBackoffRetry; -import org.apache.curator.test.TestingCluster; -import org.apache.druid.curator.PotentiallyGzippedCompressionProvider; -import org.apache.druid.curator.ZkEnablementConfig; -import org.apache.druid.curator.announcement.NodeAnnouncer; -import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; +import org.apache.druid.indexing.overlord.TaskRunner; import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.indexing.worker.WorkerCuratorCoordinator; -import org.apache.druid.indexing.worker.WorkerTaskMonitor; +import org.apache.druid.indexing.worker.WorkerTaskManager; import org.apache.druid.indexing.worker.config.WorkerConfig; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.server.initialization.IndexerZkConfig; -import org.apache.druid.server.initialization.ZkPathsConfig; import org.easymock.EasyMock; -import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -49,33 +34,13 @@ */ public class WorkerResourceTest { - private static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper(); - private static final String BASE_PATH = "/test/druid"; - private static final String ANNOUNCEMENT_PATH = StringUtils.format("%s/indexer/announcements/host", BASE_PATH); - - private TestingCluster testingCluster; - private CuratorFramework cf; - private Worker worker; - - private WorkerCuratorCoordinator curatorCoordinator; + private WorkerTaskManager workerTaskManager; private WorkerResource workerResource; @Before - public void setUp() throws Exception + public void setUp() { - testingCluster = new TestingCluster(1); - testingCluster.start(); - - cf = CuratorFrameworkFactory.builder() - .connectString(testingCluster.getConnectString()) - .retryPolicy(new ExponentialBackoffRetry(1, 10)) - .compressionProvider(new PotentiallyGzippedCompressionProvider(false)) - .build(); - cf.start(); - cf.blockUntilConnected(); - cf.create().creatingParentsIfNeeded().forPath(BASE_PATH); - worker = new Worker( "http", "host", @@ -84,71 +49,49 @@ public void setUp() throws Exception "v1", WorkerConfig.DEFAULT_CATEGORY ); - - curatorCoordinator = new WorkerCuratorCoordinator( - JSON_MAPPER, - new IndexerZkConfig(new ZkPathsConfig() - { - @Override - public String getBase() - { - return BASE_PATH; - } - }, null, null, null, null), - new RemoteTaskRunnerConfig(), - cf, - new NodeAnnouncer(cf, Execs.directExecutor()), - worker - ); - curatorCoordinator.start(); - + workerTaskManager = EasyMock.createMock(WorkerTaskManager.class); workerResource = new WorkerResource( worker, - () -> curatorCoordinator, - null, - EasyMock.createNiceMock(WorkerTaskMonitor.class), - ZkEnablementConfig.ENABLED + EasyMock.createNiceMock(TaskRunner.class), + workerTaskManager ); } - @After - public void tearDown() throws Exception + @Test + public void testDoDisable() { - curatorCoordinator.stop(); - cf.close(); - testingCluster.close(); + workerTaskManager.workerDisabled(); + EasyMock.expectLastCall(); + EasyMock.replay(workerTaskManager); + + Response res = workerResource.doDisable(); + Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus()); + + EasyMock.verify(workerTaskManager); } @Test - public void testDoDisable() throws Exception + public void testDoEnable() { - Worker theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class); - Assert.assertEquals("v1", theWorker.getVersion()); - Assert.assertFalse(theWorker.isDisabled()); + workerTaskManager.workerEnabled(); + EasyMock.expectLastCall(); + EasyMock.replay(workerTaskManager); - Response res = workerResource.doDisable(); + Response res = workerResource.doEnable(); Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus()); - theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class); - Assert.assertTrue(theWorker.getVersion().isEmpty()); - Assert.assertTrue(theWorker.isDisabled()); + EasyMock.verify(workerTaskManager); } @Test - public void testDoEnable() throws Exception + public void testIsEnabled() { - // Disable the worker - Response res = workerResource.doDisable(); - Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus()); - Worker theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class); - Assert.assertTrue(theWorker.getVersion().isEmpty()); - Assert.assertTrue(theWorker.isDisabled()); + EasyMock.expect(workerTaskManager.isWorkerEnabled()).andReturn(true); + EasyMock.replay(workerTaskManager); - // Enable the worker - res = workerResource.doEnable(); + Response res = workerResource.isEnabled(); Assert.assertEquals(Response.Status.OK.getStatusCode(), res.getStatus()); - theWorker = JSON_MAPPER.readValue(cf.getData().forPath(ANNOUNCEMENT_PATH), Worker.class); - Assert.assertEquals("v1", theWorker.getVersion()); - Assert.assertFalse(theWorker.isDisabled()); + + EasyMock.verify(workerTaskManager); } } diff --git a/indexing-service/src/test/java/org/apache/druid/server/initialization/IndexerZkConfigTest.java b/indexing-service/src/test/java/org/apache/druid/server/initialization/IndexerZkConfigTest.java deleted file mode 100644 index 92ddb1b63caa..000000000000 --- a/indexing-service/src/test/java/org/apache/druid/server/initialization/IndexerZkConfigTest.java +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.server.initialization; - -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.collect.ImmutableList; -import com.google.inject.Binder; -import com.google.inject.Injector; -import com.google.inject.Module; -import com.google.inject.name.Names; -import org.apache.druid.guice.GuiceInjectors; -import org.apache.druid.guice.JsonConfigProvider; -import org.apache.druid.guice.JsonConfigurator; -import org.apache.druid.initialization.Initialization; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.jackson.JacksonUtils; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.Collection; -import java.util.HashSet; -import java.util.Map; -import java.util.Properties; -import java.util.UUID; - -/** - * - */ -public class IndexerZkConfigTest -{ - private static final String INDEXER_PROPERTY_STRING = "test.druid.zk.paths.indexer"; - private static final String ZK_SERVICE_CONFIG_STRING = "test.druid.zk.paths"; - private static final Collection CLOBBERABLE_PROPERTIES = new HashSet<>(); - - private static final Module SIMPLE_ZK_CONFIG_MODULE = new Module() - { - @Override - public void configure(Binder binder) - { - binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/test"); - binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0); - binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1); - // See IndexingServiceModuleHelper - JsonConfigProvider.bind(binder, INDEXER_PROPERTY_STRING, IndexerZkConfig.class); - JsonConfigProvider.bind(binder, ZK_SERVICE_CONFIG_STRING, ZkPathsConfig.class); - } - }; - - @BeforeClass - public static void setup() - { - for (Field field : IndexerZkConfig.class.getDeclaredFields()) { - if (null != field.getAnnotation(JsonProperty.class)) { - CLOBBERABLE_PROPERTIES.add(StringUtils.format("%s.%s", INDEXER_PROPERTY_STRING, field.getName())); - } - } - for (Field field : ZkPathsConfig.class.getDeclaredFields()) { - if (null != field.getAnnotation(JsonProperty.class)) { - CLOBBERABLE_PROPERTIES.add(StringUtils.format("%s.%s", ZK_SERVICE_CONFIG_STRING, field.getName())); - } - } - } - - private Properties propertyValues = new Properties(); - private int assertions = 0; - - @Before - public void setupTest() - { - for (String property : CLOBBERABLE_PROPERTIES) { - propertyValues.put(property, UUID.randomUUID().toString()); - } - assertions = 0; - } - - - private void validateEntries(ZkPathsConfig zkPathsConfig) - throws IllegalAccessException, NoSuchMethodException, InvocationTargetException - { - for (Field field : ZkPathsConfig.class.getDeclaredFields()) { - if (null != field.getAnnotation(JsonProperty.class)) { - String property = StringUtils.format("%s.%s", ZK_SERVICE_CONFIG_STRING, field.getName()); - String getter = StringUtils.format( - "get%s%s", - StringUtils.toUpperCase(field.getName().substring(0, 1)), - field.getName().substring(1) - ); - Method method = ZkPathsConfig.class.getDeclaredMethod(getter); - Assert.assertEquals(propertyValues.getProperty(property), method.invoke(zkPathsConfig)); - ++assertions; - } - } - } - - private void validateEntries(IndexerZkConfig indexerZkConfig) - throws IllegalAccessException, NoSuchMethodException, InvocationTargetException - { - for (Field field : IndexerZkConfig.class.getDeclaredFields()) { - if (null != field.getAnnotation(JsonProperty.class)) { - String property = StringUtils.format("%s.%s", INDEXER_PROPERTY_STRING, field.getName()); - String getter = StringUtils.format( - "get%s%s", - StringUtils.toUpperCase(field.getName().substring(0, 1)), - field.getName().substring(1) - ); - Method method = IndexerZkConfig.class.getDeclaredMethod(getter); - Assert.assertEquals(propertyValues.getProperty(property), method.invoke(indexerZkConfig)); - ++assertions; - } - } - } - - @Test - public void testNullConfig() - { - propertyValues.clear(); - - final Injector injector = Initialization.makeInjectorWithModules( - GuiceInjectors.makeStartupInjector(), - ImmutableList.of(SIMPLE_ZK_CONFIG_MODULE) - ); - JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get(); - - JsonConfigProvider zkPathsConfig = JsonConfigProvider.of(ZK_SERVICE_CONFIG_STRING, ZkPathsConfig.class); - zkPathsConfig.inject(propertyValues, configurator); - - JsonConfigProvider indexerZkConfig = JsonConfigProvider.of( - INDEXER_PROPERTY_STRING, - IndexerZkConfig.class - ); - indexerZkConfig.inject(propertyValues, configurator); - - Assert.assertEquals("/druid/indexer/tasks", indexerZkConfig.get().getTasksPath()); - } - - @Test - public void testSimpleConfig() throws IllegalAccessException, NoSuchMethodException, InvocationTargetException - { - final Injector injector = Initialization.makeInjectorWithModules( - GuiceInjectors.makeStartupInjector(), - ImmutableList.of(SIMPLE_ZK_CONFIG_MODULE) - ); - JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get(); - - JsonConfigProvider zkPathsConfig = JsonConfigProvider.of(ZK_SERVICE_CONFIG_STRING, ZkPathsConfig.class); - zkPathsConfig.inject(propertyValues, configurator); - - JsonConfigProvider indexerZkConfig = JsonConfigProvider.of( - INDEXER_PROPERTY_STRING, - IndexerZkConfig.class - ); - indexerZkConfig.inject(propertyValues, configurator); - - - IndexerZkConfig zkConfig = indexerZkConfig.get(); - ZkPathsConfig zkPathsConfig1 = zkPathsConfig.get(); - - validateEntries(zkConfig); - validateEntries(zkPathsConfig1); - Assert.assertEquals(CLOBBERABLE_PROPERTIES.size(), assertions); - } - - - - @Test - public void testIndexerBaseOverride() - { - final String overrideValue = "/foo/bar/baz"; - final String indexerPropertyKey = INDEXER_PROPERTY_STRING + ".base"; - final String priorValue = System.getProperty(indexerPropertyKey); - System.setProperty(indexerPropertyKey, overrideValue); // Set it here so that the binding picks it up - final Injector injector = Initialization.makeInjectorWithModules( - GuiceInjectors.makeStartupInjector(), - ImmutableList.of(SIMPLE_ZK_CONFIG_MODULE) - ); - propertyValues.clear(); - propertyValues.setProperty(indexerPropertyKey, overrideValue); // Have to set it here as well annoyingly enough - - - JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get(); - - JsonConfigProvider indexerPathsConfig = JsonConfigProvider.of( - INDEXER_PROPERTY_STRING, - IndexerZkConfig.class - ); - indexerPathsConfig.inject(propertyValues, configurator); - IndexerZkConfig indexerZkConfig = indexerPathsConfig.get(); - - - // Rewind value before we potentially fail - if (priorValue == null) { - System.clearProperty(indexerPropertyKey); - } else { - System.setProperty(indexerPropertyKey, priorValue); - } - - Assert.assertEquals(overrideValue, indexerZkConfig.getBase()); - Assert.assertEquals(overrideValue + "/announcements", indexerZkConfig.getAnnouncementsPath()); - } - - @Test - public void testExactConfig() - { - final Injector injector = Initialization.makeInjectorWithModules( - GuiceInjectors.makeStartupInjector(), - ImmutableList.of(SIMPLE_ZK_CONFIG_MODULE) - ); - propertyValues.setProperty(ZK_SERVICE_CONFIG_STRING + ".base", "/druid/metrics"); - - - JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get(); - - JsonConfigProvider zkPathsConfig = JsonConfigProvider.of( - ZK_SERVICE_CONFIG_STRING, - ZkPathsConfig.class - ); - - zkPathsConfig.inject(propertyValues, configurator); - - ZkPathsConfig zkPathsConfig1 = zkPathsConfig.get(); - - IndexerZkConfig indexerZkConfig = new IndexerZkConfig(zkPathsConfig1, null, null, null, null); - - Assert.assertEquals("/druid/metrics/indexer", indexerZkConfig.getBase()); - Assert.assertEquals("/druid/metrics/indexer/announcements", indexerZkConfig.getAnnouncementsPath()); - } - - @Test - public void testFullOverride() throws Exception - { - final DefaultObjectMapper mapper = new DefaultObjectMapper(); - final ZkPathsConfig zkPathsConfig = new ZkPathsConfig(); - - IndexerZkConfig indexerZkConfig = new IndexerZkConfig( - zkPathsConfig, - "/druid/prod", - "/druid/prod/a", - "/druid/prod/t", - "/druid/prod/s" - ); - - Map value = mapper.readValue( - mapper.writeValueAsString(indexerZkConfig), JacksonUtils.TYPE_REFERENCE_MAP_STRING_STRING - ); - IndexerZkConfig newConfig = new IndexerZkConfig( - zkPathsConfig, - value.get("base"), - value.get("announcementsPath"), - value.get("tasksPath"), - value.get("statusPath") - ); - - Assert.assertEquals(indexerZkConfig, newConfig); - } -} diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/rpc/ControllerResource.java b/multi-stage-query/src/main/java/org/apache/druid/msq/rpc/ControllerResource.java index cc570ec992ad..90b259ad39dd 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/rpc/ControllerResource.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/rpc/ControllerResource.java @@ -199,8 +199,8 @@ public Response httpGetTaskList(@Context final HttpServletRequest req) } /** - * See {@link org.apache.druid.indexing.overlord.RemoteTaskRunner#streamTaskReports} for the client-side code that - * calls this API. + * See {@link org.apache.druid.indexing.overlord.hrtr.HttpRemoteTaskRunner#streamTaskReports} for the client-side code + * that calls this API. */ @GET @Path("/liveReports") diff --git a/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java b/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java index 954610c4e287..70dd1cc51519 100644 --- a/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java +++ b/processing/src/main/java/org/apache/druid/guice/StartupInjectorBuilder.java @@ -55,6 +55,8 @@ public class StartupInjectorBuilder extends BaseInjectorBuildercurator-x-discovery-server + * curator-x-discovery-server. * - * This method is marked protected because it should never be used outside of the org.apache.druid.curator.discovery - * package. If you are tempted to use this method anywhere else you are most likely doing something wrong. - * Mapping the actual service name to the name used within curator should be left to {@link CuratorServiceAnnouncer} - * and {@link ServerDiscoveryFactory} + *

This method is marked protected because it should never be used outside of the + * {@code org.apache.druid.curator.discovery} package. If you are tempted to use this method anywhere else you are + * most likely doing something wrong. Mapping the actual service name to the name used within curator should be left + * to {@link CuratorServiceAnnouncer}. * * @see CuratorServiceAnnouncer - * @see ServerDiscoveryFactory - * - * @param serviceName - * @return */ protected static String makeCanonicalServiceName(String serviceName) { diff --git a/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java b/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java index bd1ad64aacaa..d1fbcf4672e4 100644 --- a/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java +++ b/server/src/main/java/org/apache/druid/curator/discovery/DiscoveryModule.java @@ -322,15 +322,6 @@ public void stop() return serviceDiscovery; } - @Provides - @LazySingleton - public ServerDiscoveryFactory getServerDiscoveryFactory( - ServiceDiscovery serviceDiscovery - ) - { - return new ServerDiscoveryFactory(serviceDiscovery); - } - private static class NoopServiceDiscovery implements ServiceDiscovery { @Override diff --git a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoveryFactory.java b/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoveryFactory.java deleted file mode 100644 index ca3cba0132ca..000000000000 --- a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoveryFactory.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.curator.discovery; - -import com.google.inject.Inject; -import org.apache.curator.x.discovery.ServiceDiscovery; -import org.apache.curator.x.discovery.ServiceInstance; -import org.apache.curator.x.discovery.ServiceProvider; - -import java.util.Collection; - -/** - * Use {@link org.apache.druid.discovery.DruidNodeDiscovery} for discovery. - */ -@Deprecated -public class ServerDiscoveryFactory -{ - private final ServiceDiscovery serviceDiscovery; - - @Inject - public ServerDiscoveryFactory( - ServiceDiscovery serviceDiscovery - ) - { - this.serviceDiscovery = serviceDiscovery; - } - - public ServerDiscoverySelector createSelector(String serviceName) - { - if (serviceName == null) { - return new ServerDiscoverySelector(new NoopServiceProvider(), serviceName); - } - - final ServiceProvider serviceProvider = serviceDiscovery - .serviceProviderBuilder() - .serviceName(CuratorServiceUtils.makeCanonicalServiceName(serviceName)) - .build(); - return new ServerDiscoverySelector(serviceProvider, serviceName); - } - - private static class NoopServiceProvider implements ServiceProvider - { - @Override - public void start() - { - // do nothing - } - - @Override - public ServiceInstance getInstance() - { - return null; - } - - @Override - public Collection> getAllInstances() - { - return null; - } - - @Override - public void noteError(ServiceInstance tServiceInstance) - { - // do nothing - } - - @Override - public void close() - { - // do nothing - } - } - -} diff --git a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoverySelector.java b/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoverySelector.java deleted file mode 100644 index 84f4fe81ce0f..000000000000 --- a/server/src/main/java/org/apache/druid/curator/discovery/ServerDiscoverySelector.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.curator.discovery; - -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.Collections2; -import com.google.common.net.HostAndPort; -import org.apache.curator.x.discovery.ServiceInstance; -import org.apache.curator.x.discovery.ServiceProvider; -import org.apache.druid.client.selector.DiscoverySelector; -import org.apache.druid.client.selector.Server; -import org.apache.druid.java.util.common.lifecycle.LifecycleStart; -import org.apache.druid.java.util.common.lifecycle.LifecycleStop; -import org.apache.druid.java.util.common.logger.Logger; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; - -/** - * Use {@link org.apache.druid.discovery.DruidNodeDiscovery} for discovery. - */ -@Deprecated -public class ServerDiscoverySelector implements DiscoverySelector -{ - private static final Logger log = new Logger(ServerDiscoverySelector.class); - - private final ServiceProvider serviceProvider; - private final String name; - - public ServerDiscoverySelector(ServiceProvider serviceProvider, String name) - { - this.serviceProvider = serviceProvider; - this.name = name; - } - - private static final Function TO_SERVER = new Function<>() - { - @Override - public Server apply(final ServiceInstance instance) - { - Preconditions.checkState( - instance.getPort() >= 0 || (instance.getSslPort() != null && instance.getSslPort() >= 0), - "Both port and sslPort not set" - ); - final int port; - final String scheme; - if (instance.getSslPort() == null) { - port = instance.getPort(); - scheme = "http"; - } else { - port = instance.getSslPort() >= 0 ? instance.getSslPort() : instance.getPort(); - scheme = instance.getSslPort() >= 0 ? "https" : "http"; - } - return new Server() - { - @Override - public String getHost() - { - return HostAndPort.fromParts(getAddress(), getPort()).toString(); - } - - @Override - public String getAddress() - { - return instance.getAddress(); - } - - @Override - public int getPort() - { - return port; - } - - @Override - public String getScheme() - { - return scheme; - } - }; - } - }; - - @Nullable - @Override - public Server pick() - { - final ServiceInstance instance; - try { - instance = serviceProvider.getInstance(); - } - catch (Exception e) { - log.info(e, "Exception getting instance for [%s]", name); - return null; - } - - if (instance == null) { - log.error("No server instance found for [%s]", name); - return null; - } - - return TO_SERVER.apply(instance); - } - - public Collection getAll() - { - try { - return Collections2.transform(serviceProvider.getAllInstances(), TO_SERVER); - } - catch (Exception e) { - log.info(e, "Unable to get all instances"); - return Collections.emptyList(); - } - } - - @LifecycleStart - public void start() throws Exception - { - serviceProvider.start(); - } - - @LifecycleStop - public void stop() throws IOException - { - serviceProvider.close(); - } -} diff --git a/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java b/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java index e48822afab6c..2476c7d9af92 100644 --- a/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java +++ b/server/src/main/java/org/apache/druid/guice/AnnouncerModule.java @@ -27,7 +27,6 @@ import org.apache.druid.curator.announcement.NodeAnnouncer; import org.apache.druid.curator.announcement.PathChildrenAnnouncer; import org.apache.druid.curator.announcement.ServiceAnnouncer; -import org.apache.druid.guice.annotations.DirectExecutorAnnouncer; import org.apache.druid.guice.annotations.SingleThreadedAnnouncer; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.server.coordination.BatchDataSegmentAnnouncer; @@ -57,16 +56,4 @@ public ServiceAnnouncer getAnnouncerWithSingleThreadedExecutorService(CuratorFra } } - @Provides - @DirectExecutorAnnouncer - @ManageLifecycleAnnouncements - public ServiceAnnouncer getAnnouncerWithDirectExecutorService(CuratorFramework curator, CuratorConfig config) - { - boolean usingPathChildrenCacheAnnouncer = config.getPathChildrenCacheStrategy(); - if (usingPathChildrenCacheAnnouncer) { - return new PathChildrenAnnouncer(curator, Execs.directExecutor()); - } else { - return new NodeAnnouncer(curator, Execs.directExecutor()); - } - } } diff --git a/server/src/main/java/org/apache/druid/guice/annotations/DirectExecutorAnnouncer.java b/server/src/main/java/org/apache/druid/guice/annotations/DirectExecutorAnnouncer.java deleted file mode 100644 index 0d675469222b..000000000000 --- a/server/src/main/java/org/apache/druid/guice/annotations/DirectExecutorAnnouncer.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.guice.annotations; - -import com.google.inject.BindingAnnotation; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -@BindingAnnotation -@Target({ElementType.FIELD, ElementType.PARAMETER, ElementType.METHOD}) -@Retention(RetentionPolicy.RUNTIME) -public @interface DirectExecutorAnnouncer -{ -} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/InlineSchemaDataSourceCompactionConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/InlineSchemaDataSourceCompactionConfig.java index 55f73758b69a..23eccb3f4326 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/InlineSchemaDataSourceCompactionConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/InlineSchemaDataSourceCompactionConfig.java @@ -49,8 +49,8 @@ public static Builder builder() } /** - * The number of input segments is limited because the byte size of a serialized task spec is limited by - * org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig.maxZnodeBytes. + * The number of input segments is limited because the byte size of a serialized task spec is bounded by the + * maximum payload size accepted by the task runner. */ @Nullable private final Integer maxRowsPerSegment; diff --git a/server/src/test/java/org/apache/druid/curator/discovery/ServerDiscoverySelectorTest.java b/server/src/test/java/org/apache/druid/curator/discovery/ServerDiscoverySelectorTest.java deleted file mode 100644 index 0d0180725d1f..000000000000 --- a/server/src/test/java/org/apache/druid/curator/discovery/ServerDiscoverySelectorTest.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.curator.discovery; - -import org.apache.curator.x.discovery.ServiceInstance; -import org.apache.curator.x.discovery.ServiceProvider; -import org.apache.druid.client.selector.Server; -import org.apache.druid.java.util.common.StringUtils; -import org.easymock.EasyMock; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.io.IOException; -import java.net.URI; - -public class ServerDiscoverySelectorTest -{ - - private ServiceProvider serviceProvider; - private ServerDiscoverySelector serverDiscoverySelector; - private ServiceInstance instance; - private static final int PORT = 8080; - private static final int SSL_PORT = 8280; - private static final String ADDRESS = "localhost"; - - @Before - public void setUp() - { - serviceProvider = EasyMock.createMock(ServiceProvider.class); - instance = EasyMock.createMock(ServiceInstance.class); - serverDiscoverySelector = new ServerDiscoverySelector(serviceProvider, "test"); - } - - @Test - public void testPick() throws Exception - { - EasyMock.expect(serviceProvider.getInstance()).andReturn(instance).anyTimes(); - EasyMock.expect(instance.getAddress()).andReturn(ADDRESS).anyTimes(); - EasyMock.expect(instance.getPort()).andReturn(PORT).anyTimes(); - EasyMock.expect(instance.getSslPort()).andReturn(-1).anyTimes(); - EasyMock.replay(instance, serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertEquals(PORT, server.getPort()); - Assert.assertEquals(ADDRESS, server.getAddress()); - Assert.assertTrue(server.getHost().contains(Integer.toString(PORT))); - Assert.assertTrue(server.getHost().contains(ADDRESS)); - Assert.assertEquals("http", server.getScheme()); - EasyMock.verify(instance, serviceProvider); - final URI uri = new URI( - server.getScheme(), - null, - server.getAddress(), - server.getPort(), - "/druid/indexer/v1/action", - null, - null - ); - Assert.assertEquals(PORT, uri.getPort()); - Assert.assertEquals(ADDRESS, uri.getHost()); - Assert.assertEquals("http", uri.getScheme()); - } - - @Test - public void testPickWithNullSslPort() throws Exception - { - EasyMock.expect(serviceProvider.getInstance()).andReturn(instance).anyTimes(); - EasyMock.expect(instance.getAddress()).andReturn(ADDRESS).anyTimes(); - EasyMock.expect(instance.getPort()).andReturn(PORT).anyTimes(); - EasyMock.expect(instance.getSslPort()).andReturn(null).anyTimes(); - EasyMock.replay(instance, serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertEquals(PORT, server.getPort()); - Assert.assertEquals(ADDRESS, server.getAddress()); - Assert.assertTrue(server.getHost().contains(Integer.toString(PORT))); - Assert.assertTrue(server.getHost().contains(ADDRESS)); - Assert.assertEquals("http", server.getScheme()); - EasyMock.verify(instance, serviceProvider); - final URI uri = new URI( - server.getScheme(), - null, - server.getAddress(), - server.getPort(), - "/druid/indexer/v1/action", - null, - null - ); - Assert.assertEquals(PORT, uri.getPort()); - Assert.assertEquals(ADDRESS, uri.getHost()); - Assert.assertEquals("http", uri.getScheme()); - } - - @Test - public void testPickWithSslPort() throws Exception - { - EasyMock.expect(serviceProvider.getInstance()).andReturn(instance).anyTimes(); - EasyMock.expect(instance.getAddress()).andReturn(ADDRESS).anyTimes(); - EasyMock.expect(instance.getPort()).andReturn(PORT).anyTimes(); - EasyMock.expect(instance.getSslPort()).andReturn(SSL_PORT).anyTimes(); - EasyMock.replay(instance, serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertEquals(SSL_PORT, server.getPort()); - Assert.assertEquals(ADDRESS, server.getAddress()); - Assert.assertTrue(server.getHost().contains(Integer.toString(SSL_PORT))); - Assert.assertTrue(server.getHost().contains(ADDRESS)); - Assert.assertEquals("https", server.getScheme()); - EasyMock.verify(instance, serviceProvider); - final URI uri = new URI( - server.getScheme(), - null, - server.getAddress(), - server.getPort(), - "/druid/indexer/v1/action", - null, - null - ); - Assert.assertEquals(SSL_PORT, uri.getPort()); - Assert.assertEquals(ADDRESS, uri.getHost()); - Assert.assertEquals("https", uri.getScheme()); - } - - @Test - public void testPickIPv6() throws Exception - { - final String address = "2001:0db8:0000:0000:0000:ff00:0042:8329"; - EasyMock.expect(serviceProvider.getInstance()).andReturn(instance).anyTimes(); - EasyMock.expect(instance.getAddress()).andReturn(address).anyTimes(); - EasyMock.expect(instance.getPort()).andReturn(PORT).anyTimes(); - EasyMock.expect(instance.getSslPort()).andReturn(-1).anyTimes(); - EasyMock.replay(instance, serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertEquals(PORT, server.getPort()); - Assert.assertEquals(address, server.getAddress()); - Assert.assertTrue(server.getHost().contains(Integer.toString(PORT))); - Assert.assertTrue(server.getHost().contains(address)); - Assert.assertEquals("http", server.getScheme()); - EasyMock.verify(instance, serviceProvider); - final URI uri = new URI( - server.getScheme(), - null, - server.getAddress(), - server.getPort(), - "/druid/indexer/v1/action", - null, - null - ); - Assert.assertEquals(PORT, uri.getPort()); - Assert.assertEquals(StringUtils.format("[%s]", address), uri.getHost()); - Assert.assertEquals("http", uri.getScheme()); - } - - - @Test - public void testPickIPv6Bracket() throws Exception - { - final String address = "[2001:0db8:0000:0000:0000:ff00:0042:8329]"; - EasyMock.expect(serviceProvider.getInstance()).andReturn(instance).anyTimes(); - EasyMock.expect(instance.getAddress()).andReturn(address).anyTimes(); - EasyMock.expect(instance.getPort()).andReturn(PORT).anyTimes(); - EasyMock.expect(instance.getSslPort()).andReturn(-1).anyTimes(); - EasyMock.replay(instance, serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertEquals(PORT, server.getPort()); - Assert.assertEquals(address, server.getAddress()); - Assert.assertTrue(server.getHost().contains(Integer.toString(PORT))); - Assert.assertTrue(server.getHost().contains(address)); - Assert.assertEquals("http", server.getScheme()); - EasyMock.verify(instance, serviceProvider); - final URI uri = new URI( - server.getScheme(), - null, - server.getAddress(), - server.getPort(), - "/druid/indexer/v1/action", - null, - null - ); - Assert.assertEquals(PORT, uri.getPort()); - Assert.assertEquals(address, uri.getHost()); - Assert.assertEquals("http", uri.getScheme()); - } - - @Test - public void testPickWithNullInstance() throws Exception - { - EasyMock.expect(serviceProvider.getInstance()).andReturn(null).anyTimes(); - EasyMock.replay(serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertNull(server); - EasyMock.verify(serviceProvider); - } - - @Test - public void testPickWithException() throws Exception - { - EasyMock.expect(serviceProvider.getInstance()).andThrow(new Exception()).anyTimes(); - EasyMock.replay(serviceProvider); - Server server = serverDiscoverySelector.pick(); - Assert.assertNull(server); - EasyMock.verify(serviceProvider); - } - - @Test - public void testStart() throws Exception - { - serviceProvider.start(); - EasyMock.replay(serviceProvider); - serverDiscoverySelector.start(); - EasyMock.verify(serviceProvider); - } - - @Test - public void testStop() throws IOException - { - serviceProvider.close(); - EasyMock.replay(serviceProvider); - serverDiscoverySelector.stop(); - EasyMock.verify(serviceProvider); - } -} diff --git a/services/src/main/java/org/apache/druid/cli/CliIndexer.java b/services/src/main/java/org/apache/druid/cli/CliIndexer.java index acbeae18f6ad..7839ba7e6b46 100644 --- a/services/src/main/java/org/apache/druid/cli/CliIndexer.java +++ b/services/src/main/java/org/apache/druid/cli/CliIndexer.java @@ -30,7 +30,6 @@ import com.google.inject.name.Names; import org.apache.druid.client.DruidServer; import org.apache.druid.client.DruidServerConfig; -import org.apache.druid.curator.ZkEnablementConfig; import org.apache.druid.discovery.DataNodeService; import org.apache.druid.discovery.NodeRole; import org.apache.druid.discovery.WorkerNodeService; @@ -102,7 +101,6 @@ public class CliIndexer extends ServerRunnable private static final Logger log = new Logger(CliIndexer.class); private Properties properties; - private boolean isZkEnabled = true; public CliIndexer() { @@ -113,7 +111,6 @@ public CliIndexer() public void configure(Properties properties) { this.properties = properties; - isZkEnabled = ZkEnablementConfig.isEnabled(properties); } @Override @@ -166,7 +163,7 @@ public void configure(Binder binder) CliPeon.bindPeonDataSegmentHandlers(binder); CliPeon.bindRealtimeCache(binder); CliPeon.bindCoordinatorHandoffNotifer(binder); - binder.install(CliMiddleManager.makeWorkerManagementModule(isZkEnabled)); + binder.install(CliMiddleManager.makeWorkerManagementModule()); binder.bind(AppenderatorsManager.class) .to(UnifiedIndexerAppenderatorsManager.class) diff --git a/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java b/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java index 57bd4672b287..e95506ebf89f 100644 --- a/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java +++ b/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java @@ -25,7 +25,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.inject.Binder; -import com.google.inject.Inject; import com.google.inject.Key; import com.google.inject.Module; import com.google.inject.Provides; @@ -33,7 +32,6 @@ import com.google.inject.name.Named; import com.google.inject.name.Names; import com.google.inject.util.Providers; -import org.apache.druid.curator.ZkEnablementConfig; import org.apache.druid.discovery.NodeRole; import org.apache.druid.discovery.WorkerNodeService; import org.apache.druid.guice.IndexingServiceInputSourceModule; @@ -57,9 +55,7 @@ import org.apache.druid.indexing.overlord.ForkingTaskRunner; import org.apache.druid.indexing.overlord.TaskRunner; import org.apache.druid.indexing.worker.Worker; -import org.apache.druid.indexing.worker.WorkerCuratorCoordinator; import org.apache.druid.indexing.worker.WorkerTaskManager; -import org.apache.druid.indexing.worker.WorkerTaskMonitor; import org.apache.druid.indexing.worker.config.WorkerConfig; import org.apache.druid.indexing.worker.http.TaskManagementResource; import org.apache.druid.indexing.worker.http.WorkerResource; @@ -104,19 +100,11 @@ public class CliMiddleManager extends ServerRunnable { private static final Logger log = new Logger(CliMiddleManager.class); - private boolean isZkEnabled = true; - public CliMiddleManager() { super(log); } - @Inject - public void configure(Properties properties) - { - isZkEnabled = ZkEnablementConfig.isEnabled(properties); - } - @Override protected Set getNodeRoles(Properties properties) { @@ -167,7 +155,7 @@ public void configure(Binder binder) .in(LazySingleton.class); binder.bind(DropwizardRowIngestionMetersFactory.class).in(LazySingleton.class); - binder.install(makeWorkerManagementModule(isZkEnabled)); + binder.install(makeWorkerManagementModule()); binder.bind(JettyServerInitializer.class) .to(MiddleManagerJettyServerInitializer.class) @@ -260,21 +248,14 @@ public WorkerNodeService getWorkerNodeService(WorkerConfig workerConfig) ); } - public static Module makeWorkerManagementModule(boolean isZkEnabled) + public static Module makeWorkerManagementModule() { return new Module() { @Override public void configure(Binder binder) { - if (isZkEnabled) { - binder.bind(WorkerTaskManager.class).to(WorkerTaskMonitor.class); - binder.bind(WorkerTaskMonitor.class).in(ManageLifecycle.class); - binder.bind(WorkerCuratorCoordinator.class).in(ManageLifecycle.class); - LifecycleModule.register(binder, WorkerTaskMonitor.class); - } else { - binder.bind(WorkerTaskManager.class).in(ManageLifecycle.class); - } + binder.bind(WorkerTaskManager.class).in(ManageLifecycle.class); Jerseys.addResource(binder, WorkerResource.class); Jerseys.addResource(binder, TaskManagementResource.class); diff --git a/services/src/main/java/org/apache/druid/cli/CliOverlord.java b/services/src/main/java/org/apache/druid/cli/CliOverlord.java index b96f299dfcd5..78161ed098e5 100644 --- a/services/src/main/java/org/apache/druid/cli/CliOverlord.java +++ b/services/src/main/java/org/apache/druid/cli/CliOverlord.java @@ -77,7 +77,6 @@ import org.apache.druid.indexing.overlord.HeapMemoryTaskStorage; import org.apache.druid.indexing.overlord.IndexerMetadataStorageAdapter; import org.apache.druid.indexing.overlord.MetadataTaskStorage; -import org.apache.druid.indexing.overlord.RemoteTaskRunnerFactory; import org.apache.druid.indexing.overlord.TaskMaster; import org.apache.druid.indexing.overlord.TaskQueryTool; import org.apache.druid.indexing.overlord.TaskRunnerFactory; @@ -388,11 +387,6 @@ public void configure(Binder binder) biddy.addBinding("local").to(ForkingTaskRunnerFactory.class); binder.bind(ForkingTaskRunnerFactory.class).in(LazySingleton.class); - biddy.addBinding(RemoteTaskRunnerFactory.TYPE_NAME) - .to(RemoteTaskRunnerFactory.class) - .in(LazySingleton.class); - binder.bind(RemoteTaskRunnerFactory.class).in(LazySingleton.class); - biddy.addBinding(HttpRemoteTaskRunnerFactory.TYPE_NAME) .to(HttpRemoteTaskRunnerFactory.class) .in(LazySingleton.class); From f0d06c45ada0ce6177e0e0663e27a79a45b917db Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 22 May 2026 23:34:14 -0700 Subject: [PATCH 04/12] ci: fix unit tests, bump actions-timeline (#19509) * fix unit tests, bump actions-timeline CI is failing to startup to run unit tests, complaining about actions-timeline version not being allowed, switched to latest per https://github.com/apache/infrastructure-actions/blob/main/actions.yml * fix S3InputSourceTest --- .github/workflows/unit-and-integration-tests-unified.yml | 2 +- .../org/apache/druid/data/input/s3/S3InputSourceTest.java | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unit-and-integration-tests-unified.yml b/.github/workflows/unit-and-integration-tests-unified.yml index b5860b494557..e960493dba31 100644 --- a/.github/workflows/unit-and-integration-tests-unified.yml +++ b/.github/workflows/unit-and-integration-tests-unified.yml @@ -64,4 +64,4 @@ jobs: runs-on: ubuntu-latest if: ${{ !cancelled() }} steps: - - uses: Kesin11/actions-timeline@54d513e0b5ff1158f1cf8321108d666a5a6c1fca + - uses: Kesin11/actions-timeline@44c9c178ffb2fb1d9859614a3ffa79ccfb77565e diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java index 96db444aa088..e9291c739249 100644 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java @@ -408,10 +408,8 @@ public void testSerdeWithCloudConfigPropertiesWithSessionToken() throws Exceptio public void testSchemelessEndpointConfigUrlWithNullClientConfigResolvesSupplier() throws Exception { EasyMock.reset(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); - EasyMock.expect(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER.setS3ClientSupplier(EasyMock.anyObject())) - .andReturn(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); - EasyMock.expect(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER.build()) - .andReturn(SERVICE); + EasyMock.expect(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER.getS3StorageConfig()) + .andStubReturn(S3_STORAGE_CONFIG); EasyMock.replay(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); final AWSEndpointConfig schemelessEndpoint = MAPPER.readValue( From 215f415e5a20d58074ab1e86ab139d6db09be8a7 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sat, 23 May 2026 13:55:48 -0700 Subject: [PATCH 05/12] feat: partial segment cache infrastructure (#19496) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit changes: * add `PartialSegmentMetadataCacheEntry` a `CacheEntry` that range-reads the V10 header on mount, constructs `PartialSegmentFileMapperV10`, and shrinks its reservation to actual on-disk size * add `PartialSegmentBundleCacheEntry` and `PartialSegmentBundleCacheEntryIdentifier` are `CacheEntry` associated with each file bundle of a v10 segment that sparse-allocates and evicts its containers as a unit; places holds metadata and transitive parent bundle entries holds via the `StorageLocation` methods (weak reference holds on the parent cache entries) and reference-counted usage references * add `PartialSegmentCacheBootstrap` a helper that restores partial-format entries from on-disk layout on historical startup (not wired up yet); cleans orphaned bundles * add `ResizableCacheEntry` interface and `StorageLocation.adjustReservation` (shrink-only) so the metadata entry can tighten its reservation post-mount * rename `SegmentFileBuilder.startFileGroup` → `startFileBundle`; introduce `ROOT_BUNDLE_NAME` as the default bundle for containers written without an explicit declaration * rename json field `SegmentFileContainerMetadata.fileGroup` → `bundle`; now non-null via getter, normalizes to `ROOT_BUNDLE_NAME` in the constructor, default value omitted from JSON using a custom `JsonInclude` filter * Extract shared `DirectoryBackedRangeReader` and `CountingRangeReader` test helpers; consolidate duplicates across processing + server tests --- .../apache/druid/segment/IndexMergerBase.java | 2 +- .../apache/druid/segment/IndexMergerV10.java | 2 +- .../file/PartialSegmentFileMapperV10.java | 221 ++++- .../segment/file/SegmentFileBuilder.java | 20 +- .../segment/file/SegmentFileBuilderV10.java | 145 +-- .../file/SegmentFileContainerMetadata.java | 58 +- .../segment/PartialQueryableIndexTest.java | 68 +- .../segment/file/CountingRangeReader.java | 77 ++ .../file/DirectoryBackedRangeReader.java | 56 ++ .../file/PartialSegmentFileMapperV10Test.java | 58 -- .../file/SegmentFileBuilderV10Test.java | 116 ++- .../SegmentFileContainerMetadataTest.java | 37 +- .../PartialSegmentBundleCacheEntry.java | 630 +++++++++++++ ...tialSegmentBundleCacheEntryIdentifier.java | 41 + .../loading/PartialSegmentCacheBootstrap.java | 395 +++++++++ .../PartialSegmentMetadataCacheEntry.java | 576 ++++++++++++ .../segment/loading/ResizableCacheEntry.java | 39 + .../loading/SegmentCacheEntryIdentifier.java | 29 +- .../segment/loading/SegmentLoaderConfig.java | 17 + .../segment/loading/StorageLocation.java | 72 ++ ...SegmentBundleCacheEntryIdentifierTest.java | 48 + .../PartialSegmentBundleCacheEntryTest.java | 832 ++++++++++++++++++ .../PartialSegmentCacheBootstrapTest.java | 506 +++++++++++ .../PartialSegmentMetadataCacheEntryTest.java | 423 +++++++++ .../segment/loading/StorageLocationTest.java | 187 ++++ 25 files changed, 4352 insertions(+), 303 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/segment/file/CountingRangeReader.java create mode 100644 processing/src/test/java/org/apache/druid/segment/file/DirectoryBackedRangeReader.java create mode 100644 server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntry.java create mode 100644 server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifier.java create mode 100644 server/src/main/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrap.java create mode 100644 server/src/main/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntry.java create mode 100644 server/src/main/java/org/apache/druid/segment/loading/ResizableCacheEntry.java create mode 100644 server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifierTest.java create mode 100644 server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryTest.java create mode 100644 server/src/test/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrapTest.java create mode 100644 server/src/test/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntryTest.java diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java index 1283b4475b44..50c68de30c35 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerBase.java @@ -630,7 +630,7 @@ protected Metadata makeProjections( final String section2 = "build projection[" + projectionSchema.getName() + "] inverted index and columns"; progress.startSection(section2); - segmentFileBuilder.startFileGroup(projectionSchema.getName()); + segmentFileBuilder.startFileBundle(projectionSchema.getName()); if (projectionSchema.getTimeColumnName() != null) { makeTimeColumn( segmentFileBuilder, diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java index 91d2661841ec..28d9ee56345c 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV10.java @@ -218,7 +218,7 @@ protected File makeIndexFiles( /************ Create Inverted Indexes and Finalize Build Columns *************/ final String section = "build inverted index and columns"; progress.startSection(section); - v10Smoosher.startFileGroup(Projections.BASE_TABLE_PROJECTION_NAME); + v10Smoosher.startFileBundle(Projections.BASE_TABLE_PROJECTION_NAME); makeTimeColumn(v10Smoosher, progress, timeWriter, indexSpec, basePrefix + ColumnHolder.TIME_COLUMN_NAME); makeMetricsColumns( v10Smoosher, diff --git a/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java b/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java index c622ec756418..5db4914a464a 100644 --- a/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java +++ b/processing/src/main/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10.java @@ -28,6 +28,7 @@ import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.data.CompressionStrategy; import org.apache.druid.segment.loading.SegmentRangeReader; import org.apache.druid.utils.CloseableUtils; @@ -83,7 +84,13 @@ */ public class PartialSegmentFileMapperV10 implements SegmentFileMapper { - static final String METADATA_HEADER_SUFFIX = ".header"; + private static final Logger LOG = new Logger(PartialSegmentFileMapperV10.class); + + /** + * Suffix appended to the target filename to form the local header file. Public so cache-manager components can + * recognize the partial-download on-disk layout during bootstrap restore and reservation cleanup. + */ + public static final String METADATA_HEADER_SUFFIX = ".header"; /** * Create (or restore) a lazy mapper for the main segment file with attached external file mappers. If persisted state @@ -146,9 +153,16 @@ static PartialSegmentFileMapperV10 createForFile( bitmapBuffer = mmapBitmap(headerFile, result); } catch (Exception e) { - // corrupted file (partial write, truncated bitmap, bad JSON, etc.) — delete and re-fetch + // corrupted file (partial write, truncated bitmap, bad JSON, etc.), delete and re-fetch result = null; - headerFile.delete(); + if (!headerFile.delete()) { + LOG.warn( + e, + "Failed to delete corrupted header file[%s] for [%s]; will be overwritten by re-fetch", + headerFile, + targetFilename + ); + } } } @@ -167,7 +181,32 @@ static PartialSegmentFileMapperV10 createForFile( bitmapBuffer ); - // restore downloaded files from the bitmap + // bitmap-vs-container repair pre-pass: if the bitmap claims a file is downloaded but its container file is + // missing on disk, the bitmap is lying (e.g. partial-cache eviction that cleared containers but couldn't atomically + // clear bits, or external file-system damage). Clear those bits before the restore loop so we don't spuriously + // sparse-allocate empty containers in the restore loop's ensureContainerInitialized call and treat their files as + // downloaded. + for (int i = 0; i < mapper.sortedFileNames.size(); i++) { + final int byteIndex = i / 8; + final int bitMask = 1 << (i % 8); + if ((bitmapBuffer.get(byteIndex) & bitMask) == 0) { + continue; + } + final String name = mapper.sortedFileNames.get(i); + final SegmentInternalFileMetadata fileMetadata = result.getMetadata().getFiles().get(name); + if (fileMetadata == null) { + continue; + } + final File containerFile = new File( + localCacheDir, + StringUtils.format("%s.container.%05d", targetFilename, fileMetadata.getContainer()) + ); + if (!containerFile.exists()) { + bitmapBuffer.put(byteIndex, (byte) (bitmapBuffer.get(byteIndex) & ~bitMask)); + } + } + + // restore downloaded files from the (now-repaired) bitmap for (int i = 0; i < mapper.sortedFileNames.size(); i++) { final int byteIndex = i / 8; final int bitIndex = i % 8; @@ -249,6 +288,57 @@ public SegmentFileMetadata getSegmentFileMetadata() return metadata; } + /** + * Names of the external segment files attached to this mapper (each one is its own {@link PartialSegmentFileMapperV10} + * accessible via {@link #getExternalMapper}). Empty for mappers with no externals. + */ + public Set getExternalFilenames() + { + return externalMappers.keySet(); + } + + /** + * Look up the child mapper for an external segment file. Returns {@code null} if no external with that name is + * attached. Cache-layer callers use this to walk external files' {@link SegmentFileMetadata} and route + * {@link #initializeContainer} / {@link #evictContainer} calls to the right physical file. + */ + @Nullable + public PartialSegmentFileMapperV10 getExternalMapper(String externalFilename) + { + return externalMappers.get(externalFilename); + } + + /** + * Resolve {@code this} when {@code externalFilename} is null (main file), otherwise the named external child + * mapper. Throws if the external is not attached. Useful for routing container operations from cache-layer code + * that holds {@code (externalFilename, containerIndex)} refs. + */ + public PartialSegmentFileMapperV10 mapperForContainer(@Nullable String externalFilename) + { + if (externalFilename == null) { + return this; + } + final PartialSegmentFileMapperV10 external = externalMappers.get(externalFilename); + if (external == null) { + throw DruidException.defensive( + "External mapper[%s] is not attached to this mapper for [%s]", + externalFilename, + targetFilename + ); + } + return external; + } + + /** + * The {@code targetFilename} this mapper writes/reads to/from inside the cache directory. For the entry-point + * mapper this is e.g. {@link org.apache.druid.segment.IndexIO#V10_FILE_NAME}; for an external child mapper it's + * the external file's name. + */ + public String getTargetFilename() + { + return targetFilename; + } + @Override public Set getInternalFilenames() { @@ -290,8 +380,8 @@ public ByteBuffer mapExternalFile(String filename, String name) throws IOExcepti /** * Pre-download a set of internal files so that subsequent {@link #mapFile(String)} calls for these files will not - * trigger individual downloads. Files that are already downloaded are skipped. This is useful for batch-downloading - * all files for a projection at once. + * trigger individual downloads. Files that are already downloaded are skipped. Useful for batch-downloading all + * files in a bundle at once (see {@link SegmentFileBuilder#startFileBundle}). */ public void ensureFilesAvailable(Set fileNames) throws IOException { @@ -303,6 +393,27 @@ public void ensureFilesAvailable(Set fileNames) throws IOException } } + /** + * Total on-disk size of the header file(s) backing this mapper, summed across the main file and any external file + * mappers. This is the actual reservation size that should be charged against the local cache once the metadata has + * been fetched and persisted; callers can compare it against an up-front pessimistic estimate to decide whether to + * shrink the reservation. + */ + public long getOnDiskHeaderSize() + { + long total = headerFileSize(localCacheDir, targetFilename); + for (PartialSegmentFileMapperV10 ext : externalMappers.values()) { + total += headerFileSize(ext.localCacheDir, ext.targetFilename); + } + return total; + } + + private static long headerFileSize(File dir, String filename) + { + final File header = new File(dir, filename + METADATA_HEADER_SUFFIX); + return header.exists() ? header.length() : 0; + } + /** * Total bytes downloaded so far across all internal files, including external mappers. */ @@ -384,6 +495,104 @@ private void ensureFileDownloaded(String name, SegmentInternalFileMetadata fileM } } + /** + * Public entry point for cache-layer code that wants to ensure a container is materialized before any data is + * downloaded into it (e.g. when a per-bundle cache entry is mounted, the entry pre-allocates its container files + * so that subsequent {@link #mapFile} calls have somewhere to write into and the cache layer can charge the + * reservation up front). + */ + public void initializeContainer(int containerIndex) throws IOException + { + checkClosed(); + ensureContainerInitialized(containerIndex); + } + + /** + * Reverse of {@link #initializeContainer(int)}: unmap the in-memory view of the container, delete the local + * container file, and clear the bitmap bits + {@link #downloadedFiles} entries for every internal file that lived + * in this container. + *

+ * Used by per-bundle cache entries on unmount/eviction to release the disk and memory footprint of one bundle + * without affecting other bundles sharing the same {@link PartialSegmentFileMapperV10}. After eviction, subsequent + * {@link #mapFile} calls for files in this container will re-trigger downloads via {@link #initializeContainer} + * and the bitmap will be repopulated incrementally. + *

+ * Concurrency contract. The caller is responsible for ensuring no concurrent {@link #mapFile} (or + * {@link #ensureFilesAvailable}) call is in flight for any file in this container. This is enforced one layer up + * by the cache-entry refcount: {@code PartialSegmentBundleCacheEntry} only invokes {@code evictContainer} from its + * {@code doActualUnmount} callback, which fires only after every reference acquired via {@code acquireReference()} + * has been closed. Bypassing that gate is dangerous, {@link ByteBufferUtils#unmap} frees the off-heap mapping, so a + * {@link ByteBuffer#slice} from a concurrent reader is a JVM SIGSEGV, not a recoverable error. + *

+ * No-op if the container has not been initialized. + */ + public void evictContainer(int containerIndex) + { + checkClosed(); + containerLocks[containerIndex].lock(); + try { + final MappedByteBuffer existing = containers[containerIndex]; + if (existing != null) { + ByteBufferUtils.unmap(existing); + containers[containerIndex] = null; + } + // Try the cached containerFiles[i] first. If it's null, the container was never initialized in this mapper + // instance (typical right after create() with an empty bitmap), but the on-disk file may still exist from a + // previous run. Fall back to the deterministic path so eviction is always effective. + File containerFile = containerFiles[containerIndex]; + if (containerFile == null) { + containerFile = new File( + localCacheDir, + StringUtils.format("%s.container.%05d", targetFilename, containerIndex) + ); + } + if (containerFile.exists() && !containerFile.delete()) { + LOG.warn( + "Failed to delete container file[%s] during eviction of container[%d] for [%s]; leaking on disk", + containerFile, + containerIndex, + targetFilename + ); + } + containerFiles[containerIndex] = null; + } + finally { + containerLocks[containerIndex].unlock(); + } + + // clear bitmap bits + downloadedFiles entries for files that lived in this container. Iterates + // metadata.getFiles() without external synchronization: SegmentFileMetadata is constructed once at mapper + // creation and its file map is effectively immutable for the mapper's lifetime, so concurrent iteration is safe. + for (Map.Entry entry : metadata.getFiles().entrySet()) { + if (entry.getValue().getContainer() != containerIndex) { + continue; + } + final String fileName = entry.getKey(); + if (downloadedFiles.remove(fileName)) { + downloadedBytes.addAndGet(-entry.getValue().getSize()); + } + clearBitmapBit(fileName); + } + } + + private void clearBitmapBit(String name) + { + final Integer index = fileNameToIndex.get(name); + if (index == null) { + return; + } + final int byteIndex = index / 8; + final int bitMask = 1 << (index % 8); + bitmapLock.lock(); + try { + final byte existing = bitmapBuffer.get(byteIndex); + bitmapBuffer.put(byteIndex, (byte) (existing & ~bitMask)); + } + finally { + bitmapLock.unlock(); + } + } + /** * Initialize a local container file if not already done. Creates a sparse file at the original container size * and memory-maps it. The channel is closed immediately after mapping, the mmap persists independently, backed by diff --git a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java index 6d5aea47374c..d589213f6054 100644 --- a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilder.java @@ -41,25 +41,33 @@ */ public interface SegmentFileBuilder extends Closeable { + /** + * Default bundle name for containers written without an explicit {@link #startFileBundle} call. Thinking of file + * bundles as directories, this is the root directory that sits above any named subdirectories the writer declares. + * Containers always carry a non-null bundle name; if the writer never calls {@code startFileBundle}, they are + * tagged with this default. Cache-layer readers treat all containers sharing this name as one mount/evict unit. + */ + String ROOT_BUNDLE_NAME = "__root__"; + /** * Add a column to the metadata of this segment file */ void addColumn(String name, ColumnDescriptor columnDescriptor); /** - * Declare that subsequent writes belong to a named group of files that should be stored together. This is a hint + * Declare that subsequent writes belong to a named bundle of files that should be stored together. This is a hint * about physical layout, it does not constrain the names of files subsequently added, and implementations are free * to ignore it entirely (the default is a no-op for formats that don't organize data into coarse-grained * groupings). Projections are the primary caller today, but the mechanism is generic, it's equally applicable to * grouping internal metadata, data shared across columns, etc. *

- * Callers should invoke this before writing each group's files; passing {@code null} clears the current group. - * Callers should not invoke this while a writer returned by {@link #addWithChannel} is still open (implementations - * may reject such calls). + * Callers should invoke this before writing each bundle's files; passing {@code null} resets the current bundle to + * the {@link #ROOT_BUNDLE_NAME} default. Callers should not invoke this while a writer returned by + * {@link #addWithChannel} is still open (implementations may reject such calls). * - * @see SegmentFileBuilderV10#startFileGroup(String) for the V10 semantics + * @see SegmentFileBuilderV10#startFileBundle(String) for the V10 semantics */ - default void startFileGroup(@Nullable String groupName) + default void startFileBundle(@Nullable String bundleName) { } diff --git a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java index 0b17960aa573..ba4a79cb0c8c 100644 --- a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java +++ b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileBuilderV10.java @@ -50,7 +50,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.TreeMap; /** @@ -61,20 +60,21 @@ * V10 file format: * | version (byte) | meta compression (byte) | meta length (int) | meta json | container 0 | ... | container n | *

- * Containers are scoped to at most one declared file group. Callers declare which group they are writing via - * {@link #startFileGroup(String)} before writing its files; a new container is started when the declared group - * changes or the current container would exceed {@link #maxContainerSize}. A group whose total size exceeds the max - * container size spans multiple containers, all tagged with the same group. This gives readers a clean 1:1 (or 1:N) - * mapping between groups and containers, which supports per-group partial loading without any read-side reorganization. - * Projections are the primary caller today, but the mechanism is equally usable for other organizational needs - * (shared data across columns, internal metadata, etc.). + * Containers are scoped to exactly one declared bundle. Callers declare which bundle they are writing via + * {@link #startFileBundle(String)} before writing its files; a new container is started when the declared bundle + * changes or the current container would exceed {@link #maxContainerSize}. A bundle whose total size exceeds the max + * container size spans multiple containers, all tagged with the same bundle. This gives readers a clean 1:1 (or 1:N) + * mapping between bundles and containers, which supports per-bundle partial loading without any read-side + * reorganization. Projections are the primary caller today, but the mechanism is equally usable for other + * organizational needs (shared data across columns, internal metadata, etc.). *

- * Callers that never invoke {@link #startFileGroup(String)} are mapped to a null-group container. + * Callers that never invoke {@link #startFileBundle(String)} have all writes tagged with the + * {@link SegmentFileBuilder#ROOT_BUNDLE_NAME} default bundle. *

* Much of the logic here was ported from {@link org.apache.druid.java.util.common.io.smoosh.FileSmoosher} of the V9 * format and there is a fair bit of overlap. In fact, the initial implementation of this class wrapped a V9 smoosher * to build the files before combining them into the V10 format. The main difference is that V9 fills each container to - * the max while here we organize with file groups. + * the max while here we organize with bundles. */ public class SegmentFileBuilderV10 implements SegmentFileBuilder { @@ -115,8 +115,8 @@ public static SegmentFileBuilderV10 create(ObjectMapper jsonMapper, File baseDir // Nested addWithChannel calls (for example a serializer that, while being written, emits sub-files for its own // columnar parts) can't write into the current container concurrently with the outer writer. These nested writes are // redirected to temporary files and merged back into container(s) once the outer writer completes. Each entry - // carries the file group that was active when the delegate was created so that the merge routes it into the - // correct container even if the active group has since changed. + // carries the bundle that was active when the delegate was created so that the merge routes it into the correct + // container even if the active bundle has since changed. private final List completedDelegates = new ArrayList<>(); private final List inProgressDelegates = new ArrayList<>(); private long delegateFileCounter = 0; @@ -124,11 +124,11 @@ public static SegmentFileBuilderV10 create(ObjectMapper jsonMapper, File baseDir @Nullable private ContainerWriter currentContainer = null; private boolean writerCurrentlyInUse = false; - // The file group declared by the most recent {@link #startFileGroup} call. Writes are routed into containers - // tagged with this group. Remains {@code null} if the caller never declares one, in which case all writes share - // a single null-group container. - @Nullable - private String currentFileGroup = null; + + /** + * The bundle declared by the most recent {@link #startFileBundle} call + */ + private String currentBundle = SegmentFileBuilder.ROOT_BUNDLE_NAME; @Nullable private String interval = null; @@ -189,7 +189,7 @@ public SegmentFileChannel addWithChannel(final String name, final long size) thr if (internalFiles.containsKey(name)) { throw new IAE("Cannot add files of the same name, already have [%s]", name); } - ensureNameMatchesActiveGroup(name); + ensureNameMatchesActiveBundle(name); if (size > maxContainerSize) { throw DruidException.forPersona(DruidException.Persona.ADMIN) .ofCategory(DruidException.Category.RUNTIME_FAILURE) @@ -207,7 +207,7 @@ public SegmentFileChannel addWithChannel(final String name, final long size) thr return delegateChannel(name, size); } - ensureContainer(currentFileGroup, size); + ensureContainer(currentBundle, size); final ContainerWriter target = currentContainer; final long startOffset = target.currOffset; writerCurrentlyInUse = true; @@ -284,59 +284,69 @@ public SegmentFileBuilder getExternalBuilder(String externalFile) { return externalSegmentFileBuilders.computeIfAbsent( externalFile, - (k) -> new SegmentFileBuilderV10(jsonMapper, externalFile, baseDir, maxContainerSize, metadataCompression) + (k) -> { + final SegmentFileBuilderV10 fresh = + new SegmentFileBuilderV10(jsonMapper, externalFile, baseDir, maxContainerSize, metadataCompression); + // A late-attached external inherits the parent's currently-active bundle on creation only; subsequent + // bundle changes flow through the parent's startFileBundle broadcast. Re-applying on every fetch would + // close the external's in-progress container, since V10 bundles cannot currently be re-entered. + if (!SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(currentBundle)) { + fresh.startFileBundle(currentBundle); + } + return fresh; + } ); } @Override public void addColumn(String name, ColumnDescriptor columnDescriptor) { - ensureNameMatchesActiveGroup(name); + ensureNameMatchesActiveBundle(name); this.columns.put(name, columnDescriptor); } /** - * If a file group is currently active (set by the most recent {@link #startFileGroup} call), enforce that names of - * files and columns added under it are prefixed by {@code groupName + "/"}. Prevents silent collisions where two - * groups write a file/column of the same bare name and the second silently overwrites the first in the metadata - * maps. Existing production callers (e.g. {@code IndexMergerV10} via - * {@code Projections.getProjectionSegmentInternalFileName}) already construct prefixed names, so this is a no-op - * for them; it catches new writers that forget the convention. + * If a named bundle is currently active (set by the most recent {@link #startFileBundle} call to a non-root value), + * enforce that names of files and columns added under it are prefixed by {@code bundleName + "/"}. The root bundle + * is unconstrained. */ - private void ensureNameMatchesActiveGroup(String name) + private void ensureNameMatchesActiveBundle(String name) { - if (currentFileGroup != null && !name.startsWith(currentFileGroup + "/")) { + if (!SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(currentBundle) && !name.startsWith(currentBundle + "/")) { throw DruidException.defensive( - "Name[%s] must start with the active file group prefix[%s/]", + "Name[%s] must start with the active bundle prefix[%s/]", name, - currentFileGroup + currentBundle ); } } /** - * Declare the file group that subsequent writes belong to. Writes are routed into a container tagged with the - * declared group; a new container is rolled when the group changes or the incoming file won't fit. A group whose - * total size exceeds {@link #maxContainerSize} is split across multiple consecutive containers, all tagged with - * the same group. Passing {@code null} clears the current group; subsequent writes are then routed into a - * null-group container until the next call. + * Declare the bundle that subsequent writes belong to. Writes are routed into a container tagged with the declared + * bundle; a new container is rolled when the bundle changes or the incoming file won't fit. A bundle whose total + * size exceeds {@link #maxContainerSize} is split across multiple consecutive containers, all tagged with the same + * bundle. Passing {@code null} resets to {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}; subsequent writes are then + * routed into a root-bundle container until the next call. *

* Current V10-specific limitations worth knowing: *

    - *
  • Groups cannot be re-entered. Once a different group (or {@code null}) has been declared, the previous - * group's container is closed, and you cannot go back and append more files to it, any such writes would - * open a fresh container for the re-declared group, so the group's files would end up in non-contiguous - * containers. If all of a group's files must land in the same container(s), write them contiguously.
  • + *
  • Bundles cannot be re-entered. Once a different bundle has been declared the previous bundle's container is + * closed, and you cannot go back and append more files to it; any such writes would open a fresh container + * for the re-declared bundle, so the bundle's files would end up in non-contiguous containers. If all of a + * bundle's files must land in the same container(s), write them contiguously.
  • *
  • Throws if called while a writer returned by {@link #addWithChannel} is still open.
  • *
*/ @Override - public void startFileGroup(@Nullable String groupName) + public void startFileBundle(@Nullable String bundleName) { if (writerCurrentlyInUse) { - throw DruidException.defensive("Cannot start file group[%s] while a writer is in progress", groupName); + throw DruidException.defensive("Cannot start file bundle[%s] while a writer is in progress", bundleName); + } + this.currentBundle = bundleName == null ? SegmentFileBuilder.ROOT_BUNDLE_NAME : bundleName; + for (SegmentFileBuilderV10 externalFile : externalSegmentFileBuilders.values()) { + externalFile.startFileBundle(bundleName); } - this.currentFileGroup = groupName; } public void addInterval(String interval) @@ -464,35 +474,35 @@ private List buildContainerMetadata() long offset = 0; for (ContainerWriter container : containers) { final long length = container.file.length(); - result.add(new SegmentFileContainerMetadata(offset, length, container.group)); + result.add(new SegmentFileContainerMetadata(offset, length, container.bundle)); offset += length; } return result; } /** - * Ensure that {@link #currentContainer} is ready to accept {@code size} bytes of a file belonging to {@code group}. + * Ensure that {@link #currentContainer} is ready to accept {@code size} bytes of a file belonging to {@code bundle}. * Rolls the current container and starts a new one when: *
    *
  • there is no current container, or
  • - *
  • the current container is for a different group, or
  • + *
  • the current container is for a different bundle, or
  • *
  • the current container cannot fit the incoming bytes within {@link #maxContainerSize}.
  • *
*/ - private void ensureContainer(@Nullable String group, long size) throws IOException + private void ensureContainer(String bundle, long size) throws IOException { if (currentContainer == null - || !Objects.equals(currentContainer.group, group) + || !currentContainer.bundle.equals(bundle) || !currentContainer.canFit(size)) { if (currentContainer != null) { currentContainer.close(); } - currentContainer = openNewContainer(group); + currentContainer = openNewContainer(bundle); containers.add(currentContainer); } } - private ContainerWriter openNewContainer(@Nullable String group) throws IOException + private ContainerWriter openNewContainer(String bundle) throws IOException { FileUtils.mkdirp(baseDir); final int fileNum = containers.size(); @@ -500,7 +510,7 @@ private ContainerWriter openNewContainer(@Nullable String group) throws IOExcept baseDir, StringUtils.format("%s-%05d.container", outputFileName, fileNum) ); - return new ContainerWriter(fileNum, containerFile, group, maxContainerSize); + return new ContainerWriter(fileNum, containerFile, bundle, maxContainerSize); } private SegmentFileChannel delegateChannel(final String name, final long size) throws IOException @@ -509,9 +519,9 @@ private SegmentFileChannel delegateChannel(final String name, final long size) t // cannot collide, since main and external always have distinct output file names. final String delegateName = StringUtils.format("%s-delegate-%d", outputFileName, delegateFileCounter++); final File tmpFile = new File(baseDir, delegateName); - // Snapshot the active group now so that if this delegate is merged after the outer writer has advanced past - // the group it was created under, it still routes into the correct container. - final DelegateEntry entry = new DelegateEntry(tmpFile, name, currentFileGroup); + // Snapshot the active bundle now so that if this delegate is merged after the outer writer has advanced past + // the bundle it was created under, it still routes into the correct container. + final DelegateEntry entry = new DelegateEntry(tmpFile, name, currentBundle); inProgressDelegates.add(entry); return new SegmentFileChannel() @@ -576,9 +586,9 @@ public void close() throws IOException /** * Move completed delegate temp files into containers by replaying them as regular {@link #add} calls. Only called - * when no outer writer is currently holding the builder. Each entry's snapshotted group is restored as - * {@link #currentFileGroup} during its replay so the file lands in the container that was active when the - * nested write was originally requested, not whichever group happens to be active at merge time. + * when no outer writer is currently holding the builder. Each entry's snapshotted bundle is restored as + * {@link #currentBundle} during its replay so the file lands in the container that was active when the nested + * write was originally requested, not whichever bundle happens to be active at merge time. */ private void mergeDelegatedFiles() throws IOException { @@ -587,10 +597,10 @@ private void mergeDelegatedFiles() throws IOException } final List toProcess = new ArrayList<>(completedDelegates); completedDelegates.clear(); - final String savedGroup = currentFileGroup; + final String savedBundle = currentBundle; try { for (DelegateEntry entry : toProcess) { - currentFileGroup = entry.group; + currentBundle = entry.bundle; add(entry.name, entry.file); if (!entry.file.delete()) { LOG.warn("Unable to delete delegate file[%s]", entry.file); @@ -598,33 +608,32 @@ private void mergeDelegatedFiles() throws IOException } } finally { - currentFileGroup = savedGroup; + currentBundle = savedBundle; } } - private record DelegateEntry(File file, String name, @Nullable String group) + private record DelegateEntry(File file, String name, String bundle) { } /** - * Low-level writer for a single container chunk file. One container holds internal files from at most one group. + * Low-level writer for a single container chunk file. One container holds internal files from exactly one bundle. */ private static class ContainerWriter implements GatheringByteChannel { private final int fileNum; private final File file; - @Nullable - private final String group; + private final String bundle; private final long maxSize; private final Closer closer = Closer.create(); private final GatheringByteChannel channel; private long currOffset = 0; - ContainerWriter(int fileNum, File file, @Nullable String group, long maxSize) throws IOException + ContainerWriter(int fileNum, File file, String bundle, long maxSize) throws IOException { this.fileNum = fileNum; this.file = file; - this.group = group; + this.bundle = bundle; this.maxSize = maxSize; final FileOutputStream outStream = closer.register(new FileOutputStream(file)); this.channel = closer.register(outStream.getChannel()); @@ -675,9 +684,9 @@ public void close() throws IOException closer.close(); if (LOG.isDebugEnabled()) { LOG.debug( - "Created container file[%s] for group[%s] of size[%,d] bytes.", + "Created container file[%s] for bundle[%s] of size[%,d] bytes.", file.getAbsolutePath(), - group, + bundle, file.length() ); } diff --git a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java index 3739eb7718b8..45005a5a9f4d 100644 --- a/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java +++ b/processing/src/main/java/org/apache/druid/segment/file/SegmentFileContainerMetadata.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.annotations.SuppressFBWarnings; import javax.annotation.Nullable; import java.util.Objects; @@ -30,30 +31,29 @@ * Starting offset and size of a 'container' stored in a V10 segment file; think the V10 equivalent of V9's external * 'smoosh' files, e.g. 00000.smoosh. *

- * Each container holds internal files belonging to at most one named file group, as declared at write time via - * {@link SegmentFileBuilder#startFileGroup}. The {@link #fileGroup} field records that name so readers can attribute - * a container to its group without parsing internal-file names. The field is {@code null} for containers written - * without a {@code startFileGroup} call (or with {@code startFileGroup(null)}), and for containers from segments - * produced by writers that pre-date this field; null serializes as a Jackson-omitted property so old segments - * round-trip unchanged. + * Each container holds internal files belonging to exactly one named bundle, as declared at write time via + * {@link SegmentFileBuilder#startFileBundle}. The {@link #bundle} field records that name so readers can attribute a + * container to its bundle without parsing internal-file names. Containers written without an explicit + * {@code startFileBundle} call are tagged with {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}; that default value is + * omitted from JSON output, so segments produced by writers pre-dating this field deserialize cleanly (missing + * property normalizes to the default in the constructor). */ public class SegmentFileContainerMetadata { private final long startOffset; private final long size; - @Nullable - private final String fileGroup; + private final String bundle; @JsonCreator public SegmentFileContainerMetadata( @JsonProperty("startOffset") long startOffset, @JsonProperty("size") long size, - @JsonProperty("fileGroup") @Nullable String fileGroup + @JsonProperty("bundle") @Nullable String bundle ) { this.startOffset = startOffset; this.size = size; - this.fileGroup = fileGroup; + this.bundle = bundle == null ? SegmentFileBuilder.ROOT_BUNDLE_NAME : bundle; } @JsonProperty @@ -69,11 +69,10 @@ public long getSize() } @JsonProperty - @JsonInclude(JsonInclude.Include.NON_NULL) - @Nullable - public String getFileGroup() + @JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = DefaultBundleFilter.class) + public String getBundle() { - return fileGroup; + return bundle; } @Override @@ -88,13 +87,13 @@ public boolean equals(Object o) SegmentFileContainerMetadata that = (SegmentFileContainerMetadata) o; return startOffset == that.startOffset && size == that.size - && Objects.equals(fileGroup, that.fileGroup); + && Objects.equals(bundle, that.bundle); } @Override public int hashCode() { - return Objects.hash(startOffset, size, fileGroup); + return Objects.hash(startOffset, size, bundle); } @Override @@ -103,7 +102,32 @@ public String toString() return "SegmentFileContainerMetadata{" + "startOffset=" + startOffset + ", size=" + size - + ", fileGroup=" + fileGroup + + ", bundle=" + bundle + '}'; } + + /** + * Jackson {@code valueFilter} that omits the {@code bundle} field from JSON when it carries the + * {@link SegmentFileBuilder#ROOT_BUNDLE_NAME} default. Jackson invokes {@code equals(value)} against the filter + * instance with the property value (a {@link String} here, not another filter): returning {@code true} means + * "value equals default, omit it." The asymmetric equals contract is intentional and required by Jackson's filter + * API, so the standard same-class check would defeat the mechanism. + */ + static final class DefaultBundleFilter + { + + @Override + @SuppressWarnings("EqualsDoesntCheckParameterClass") + @SuppressFBWarnings("EQ_CHECK_FOR_OPERAND_NOT_COMPATIBLE_WITH_THIS") + public boolean equals(Object value) + { + return SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(value); + } + + @Override + public int hashCode() + { + return 0; + } + } } diff --git a/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java b/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java index bb3e865fa118..6e82fb4a76a8 100644 --- a/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java +++ b/processing/src/test/java/org/apache/druid/segment/PartialQueryableIndexTest.java @@ -37,9 +37,10 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.file.CountingRangeReader; +import org.apache.druid.segment.file.DirectoryBackedRangeReader; import org.apache.druid.segment.file.PartialSegmentFileMapperV10; import org.apache.druid.segment.incremental.IncrementalIndexSchema; -import org.apache.druid.segment.loading.SegmentRangeReader; import org.apache.druid.segment.projections.QueryableProjection; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -49,19 +50,14 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.io.RandomAccessFile; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicInteger; class PartialQueryableIndexTest extends InitializedNullHandlingTest { @@ -421,7 +417,7 @@ void testMatchesEagerQueryableIndex() throws IOException // verify that the partial index produces the same schema info as the eager (full) index final IndexIO indexIO = TestHelper.getTestIndexIO(); final File cacheDir = newCacheDir("match_eager"); - final DirectoryRangeReader rangeReader = new DirectoryRangeReader(segmentDir); + final DirectoryBackedRangeReader rangeReader = new DirectoryBackedRangeReader(segmentDir); try ( QueryableIndex eagerIndex = indexIO.loadIndex(segmentDir); @@ -470,62 +466,4 @@ private File newCacheDir(String name) throws IOException FileUtils.mkdirp(dir); return dir; } - - static class DirectoryRangeReader implements SegmentRangeReader - { - private final File directory; - - DirectoryRangeReader(File directory) - { - this.directory = directory; - } - - @Override - public InputStream readRange(String filename, long offset, long length) throws IOException - { - File target = new File(directory, filename); - try (RandomAccessFile raf = new RandomAccessFile(target, "r")) { - final int available = (int) Math.min(length, Math.max(0, raf.length() - offset)); - byte[] data = new byte[available]; - raf.seek(offset); - raf.readFully(data); - return new ByteArrayInputStream(data); - } - } - } - - static class CountingRangeReader extends DirectoryRangeReader - { - private final AtomicInteger readCount = new AtomicInteger(0); - private final Set readFilenames = ConcurrentHashMap.newKeySet(); - - CountingRangeReader(File directory) - { - super(directory); - } - - int getReadCount() - { - return readCount.get(); - } - - Set getReadFilenames() - { - return Set.copyOf(readFilenames); - } - - void resetCount() - { - readCount.set(0); - readFilenames.clear(); - } - - @Override - public InputStream readRange(String filename, long offset, long length) throws IOException - { - readCount.incrementAndGet(); - readFilenames.add(filename); - return super.readRange(filename, offset, length); - } - } } diff --git a/processing/src/test/java/org/apache/druid/segment/file/CountingRangeReader.java b/processing/src/test/java/org/apache/druid/segment/file/CountingRangeReader.java new file mode 100644 index 000000000000..37f50250abd0 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/file/CountingRangeReader.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.file; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * A {@link DirectoryBackedRangeReader} that tracks range-read activity across the partial-segment test suite. Records + * total reads, header-only reads (offset == 0, which corresponds to V10 header preamble fetches), and the set of + * filenames that have been read. Each call site reads only the metric(s) it cares about. + */ +public class CountingRangeReader extends DirectoryBackedRangeReader +{ + private final AtomicInteger readCount = new AtomicInteger(0); + private final AtomicInteger headerReadCount = new AtomicInteger(0); + private final Set readFilenames = ConcurrentHashMap.newKeySet(); + + public CountingRangeReader(File directory) + { + super(directory); + } + + public int getReadCount() + { + return readCount.get(); + } + + public int getHeaderReadCount() + { + return headerReadCount.get(); + } + + public Set getReadFilenames() + { + return Set.copyOf(readFilenames); + } + + public void resetCount() + { + readCount.set(0); + headerReadCount.set(0); + readFilenames.clear(); + } + + @Override + public InputStream readRange(String filename, long offset, long length) throws IOException + { + readCount.incrementAndGet(); + if (offset == 0) { + headerReadCount.incrementAndGet(); + } + readFilenames.add(filename); + return super.readRange(filename, offset, length); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/file/DirectoryBackedRangeReader.java b/processing/src/test/java/org/apache/druid/segment/file/DirectoryBackedRangeReader.java new file mode 100644 index 000000000000..bc1df52946a6 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/file/DirectoryBackedRangeReader.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.file; + +import org.apache.druid.segment.loading.SegmentRangeReader; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; + +/** + * A {@link SegmentRangeReader} backed by a directory of files. Used across the partial-segment test suite (processing + * + server modules) to simulate deep-storage range reads against an on-disk layout produced by + * {@link SegmentFileBuilderV10} or {@link org.apache.druid.segment.IndexMergerV10}. + */ +public class DirectoryBackedRangeReader implements SegmentRangeReader +{ + private final File directory; + + public DirectoryBackedRangeReader(File directory) + { + this.directory = directory; + } + + @Override + public InputStream readRange(String filename, long offset, long length) throws IOException + { + final File target = new File(directory, filename); + try (RandomAccessFile raf = new RandomAccessFile(target, "r")) { + final int available = (int) Math.min(length, Math.max(0, raf.length() - offset)); + final byte[] data = new byte[available]; + raf.seek(offset); + raf.readFully(data); + return new ByteArrayInputStream(data); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java b/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java index 790ba10ece73..0294f65e20bf 100644 --- a/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java +++ b/processing/src/test/java/org/apache/druid/segment/file/PartialSegmentFileMapperV10Test.java @@ -33,11 +33,9 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -48,7 +46,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicInteger; class PartialSegmentFileMapperV10Test { @@ -554,59 +551,4 @@ private static PartialSegmentFileMapperV10 createMapper( ); } - /** - * A {@link SegmentRangeReader} backed by a directory of files, supporting both main and external file reads. - */ - static class DirectoryBackedRangeReader implements SegmentRangeReader - { - private final File directory; - - DirectoryBackedRangeReader(File directory) - { - this.directory = directory; - } - - @Override - public InputStream readRange(String filename, long offset, long length) throws IOException - { - File target = new File(directory, filename); - try (RandomAccessFile raf = new RandomAccessFile(target, "r")) { - final int available = (int) Math.min(length, Math.max(0, raf.length() - offset)); - byte[] data = new byte[available]; - raf.seek(offset); - raf.readFully(data); - return new ByteArrayInputStream(data); - } - } - } - - /** - * A {@link DirectoryBackedRangeReader} that counts range reads (excluding metadata fetches). - */ - static class CountingRangeReader extends DirectoryBackedRangeReader - { - private final AtomicInteger readCount = new AtomicInteger(0); - - CountingRangeReader(File directory) - { - super(directory); - } - - int getReadCount() - { - return readCount.get(); - } - - void resetCount() - { - readCount.set(0); - } - - @Override - public InputStream readRange(String filename, long offset, long length) throws IOException - { - readCount.incrementAndGet(); - return super.readRange(filename, offset, length); - } - } } diff --git a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java index 6dd01d8e5bd9..8f065990012b 100644 --- a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java +++ b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileBuilderV10Test.java @@ -53,13 +53,13 @@ void testOneContainerPerProjection() throws IOException { final File baseDir = newBaseDir(); - // matches the production usage pattern in IndexMergerV10: call startFileGroup then write that projection's + // matches the production usage pattern in IndexMergerV10: call startFileBundle then write that projection's // columns, then move on to the next projection. final String[] projections = {"__base", "projA", "projB"}; final int colCount = 3; try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { for (String projection : projections) { - builder.startFileGroup(projection); + builder.startFileBundle(projection); for (int col = 0; col < colCount; col++) { final String name = projection + "/col" + col; final File tmpFile = new File(tempDir, StringUtils.format("%s-%s.bin", projection, col)); @@ -89,14 +89,14 @@ void testProjectionNameWithSlashRoutesCorrectly() throws IOException final String slashyProjection = "nested/projection"; final int colCount = 3; try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("__base"); + builder.startFileBundle("__base"); for (int col = 0; col < colCount; col++) { final String name = "__base/col" + col; final File tmpFile = new File(tempDir, StringUtils.format("base-%s.bin", col)); Files.write(Ints.toByteArray(name.hashCode()), tmpFile); builder.add(name, tmpFile); } - builder.startFileGroup(slashyProjection); + builder.startFileBundle(slashyProjection); for (int col = 0; col < colCount; col++) { final String name = slashyProjection + "/col" + col; final File tmpFile = new File(tempDir, StringUtils.format("slashy-%s.bin", col)); @@ -133,7 +133,7 @@ void testAddWithoutGroupPrefixThrowsWhenGroupActive() throws IOException final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("projA"); + builder.startFileBundle("projA"); final File tmp = new File(tempDir, "no-prefix.bin"); Files.write(Ints.toByteArray(1), tmp); // file name doesn't start with "projA/", so add must throw @@ -147,7 +147,7 @@ void testAddWithChannelWithoutGroupPrefixThrowsWhenGroupActive() throws IOExcept final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("projA"); + builder.startFileBundle("projA"); Assertions.assertThrows(RuntimeException.class, () -> builder.addWithChannel("wrong/col0", 4)); } } @@ -158,7 +158,7 @@ void testAddColumnWithoutGroupPrefixThrowsWhenGroupActive() throws IOException final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("projA"); + builder.startFileBundle("projA"); Assertions.assertThrows( RuntimeException.class, () -> builder.addColumn("wrong_no_prefix", new ColumnDescriptor.Builder() @@ -169,12 +169,12 @@ void testAddColumnWithoutGroupPrefixThrowsWhenGroupActive() throws IOException } @Test - void testAddWithoutPrefixIsAllowedWhenNoGroupActive() throws IOException + void testAddWithoutPrefixIsAllowedInRootBundle() throws IOException { final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - // never call startFileGroup; bare names are fine + // never call startFileBundle; bare names are fine under the default root bundle final File tmp = new File(tempDir, "bare.bin"); Files.write(Ints.toByteArray(1), tmp); builder.add("col0", tmp); @@ -183,7 +183,7 @@ void testAddWithoutPrefixIsAllowedWhenNoGroupActive() throws IOException } @Test - void testContainerMetadataCarriesFileGroup() throws IOException + void testContainerMetadataCarriesBundle() throws IOException { final File baseDir = newBaseDir(); @@ -191,7 +191,7 @@ void testContainerMetadataCarriesFileGroup() throws IOException final int colCount = 2; try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { for (String projection : projections) { - builder.startFileGroup(projection); + builder.startFileBundle(projection); for (int col = 0; col < colCount; col++) { final String name = projection + "/col" + col; final File tmpFile = new File(tempDir, StringUtils.format("%s-%s.bin", projection, col)); @@ -206,35 +206,34 @@ void testContainerMetadataCarriesFileGroup() throws IOException final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata(); Assertions.assertEquals(projections.length, metadata.getContainers().size()); - // Each container's fileGroup must equal the group active when it was written. Build the expected list by - // walking the files: each container holds files from exactly one group, so the first file's group prefix is - // authoritative. + // Each container's bundle must equal the bundle active when it was written. Each container holds files from + // exactly one bundle, so the first file's name prefix is authoritative. for (int ci = 0; ci < metadata.getContainers().size(); ci++) { final int containerIdx = ci; - final String expectedGroup = metadata.getFiles().entrySet().stream() + final String expectedBundle = metadata.getFiles().entrySet().stream() .filter(e -> e.getValue().getContainer() == containerIdx) .map(e -> e.getKey().substring(0, e.getKey().indexOf('/'))) .findFirst() .orElseThrow(); Assertions.assertEquals( - expectedGroup, - metadata.getContainers().get(ci).getFileGroup(), - "container " + ci + " fileGroup mismatch" + expectedBundle, + metadata.getContainers().get(ci).getBundle(), + "container " + ci + " bundle mismatch" ); } } } @Test - void testContainerWrittenWithoutStartFileGroupHasNullFileGroup() throws IOException + void testContainerWrittenWithoutStartFileBundleDefaultsToRoot() throws IOException { final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - // never call startFileGroup; the single container should carry fileGroup == null + // never call startFileBundle; the single container should be tagged with ROOT_BUNDLE_NAME for (int col = 0; col < 3; col++) { final String name = "col" + col; - final File tmpFile = new File(tempDir, StringUtils.format("nogroup-%s.bin", col)); + final File tmpFile = new File(tempDir, StringUtils.format("nobundle-%s.bin", col)); Files.write(Ints.toByteArray(name.hashCode()), tmpFile); builder.add(name, tmpFile); } @@ -244,50 +243,81 @@ void testContainerWrittenWithoutStartFileGroupHasNullFileGroup() throws IOExcept try (SegmentFileMapperV10 mapper = SegmentFileMapperV10.create(segmentFile, JSON_MAPPER)) { final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata(); Assertions.assertEquals(1, metadata.getContainers().size()); - Assertions.assertNull(metadata.getContainers().get(0).getFileGroup()); + Assertions.assertEquals( + SegmentFileBuilder.ROOT_BUNDLE_NAME, + metadata.getContainers().get(0).getBundle() + ); } } @Test - void testStartFileGroupNullClearsCurrentGroup() throws IOException + void testStartFileBundleNullResetsToRoot() throws IOException { final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("first"); + builder.startFileBundle("first"); final File firstFile = new File(tempDir, "first.bin"); Files.write(Ints.toByteArray(1), firstFile); builder.add("first/a", firstFile); - builder.startFileGroup(null); - final File noGroupFile = new File(tempDir, "ng.bin"); - Files.write(Ints.toByteArray(2), noGroupFile); - builder.add("ng/a", noGroupFile); + // Passing null resets to ROOT_BUNDLE_NAME; subsequent writes go in a root-bundle container. + builder.startFileBundle(null); + final File rootFile = new File(tempDir, "root.bin"); + Files.write(Ints.toByteArray(2), rootFile); + builder.add("root_a", rootFile); } final File segmentFile = new File(baseDir, IndexIO.V10_FILE_NAME); try (SegmentFileMapperV10 mapper = SegmentFileMapperV10.create(segmentFile, JSON_MAPPER)) { final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata(); Assertions.assertEquals(2, metadata.getContainers().size()); - Assertions.assertEquals("first", metadata.getContainers().get(0).getFileGroup()); - Assertions.assertNull(metadata.getContainers().get(1).getFileGroup()); + Assertions.assertEquals("first", metadata.getContainers().get(0).getBundle()); + Assertions.assertEquals( + SegmentFileBuilder.ROOT_BUNDLE_NAME, + metadata.getContainers().get(1).getBundle() + ); } } @Test - void testStartFileGroupWhileWriterInUseThrows() throws IOException + void testStartFileBundleWhileWriterInUseThrows() throws IOException { final File baseDir = newBaseDir(); try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("__base"); + builder.startFileBundle("__base"); try (SegmentFileChannel outer = builder.addWithChannel("__base/col0", 4)) { - Assertions.assertThrows(RuntimeException.class, () -> builder.startFileGroup("projA")); + Assertions.assertThrows(RuntimeException.class, () -> builder.startFileBundle("projA")); outer.write(ByteBuffer.wrap(new byte[]{1, 2, 3, 4})); } } } + @Test + void testStartFileBundleWithRootNameIsSameAsNull() throws IOException + { + final File baseDir = newBaseDir(); + + try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { + // Explicit ROOT_BUNDLE_NAME and null are equivalent; both resolve to the default root bundle. + builder.startFileBundle(SegmentFileBuilder.ROOT_BUNDLE_NAME); + final File tmp = new File(baseDir, "tmp.bin"); + Files.write(new byte[]{1, 2, 3, 4}, tmp); + builder.add("col0", tmp); + } + + final File segmentFile = new File(baseDir, IndexIO.V10_FILE_NAME); + try (SegmentFileMapperV10 mapper = SegmentFileMapperV10.create(segmentFile, JSON_MAPPER)) { + final SegmentFileMetadata metadata = mapper.getSegmentFileMetadata(); + Assertions.assertEquals(1, metadata.getContainers().size()); + Assertions.assertEquals( + SegmentFileBuilder.ROOT_BUNDLE_NAME, + metadata.getContainers().get(0).getBundle() + ); + } + } + @Test void testExternalBuilderAlsoSplitsContainersByProjection() throws IOException { @@ -300,7 +330,7 @@ void testExternalBuilderAlsoSplitsContainersByProjection() throws IOException try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { for (String projection : mainProjections) { - builder.startFileGroup(projection); + builder.startFileBundle(projection); for (int col = 0; col < colCount; col++) { final String name = projection + "/col" + col; final File tmpFile = new File(tempDir, StringUtils.format("main-%s-%s.bin", projection, col)); @@ -313,7 +343,7 @@ void testExternalBuilderAlsoSplitsContainersByProjection() throws IOException // sub-file with its own header + containers. Projection-per-container splitting must apply there too. final SegmentFileBuilder external = builder.getExternalBuilder(externalName); for (String projection : externalProjections) { - external.startFileGroup(projection); + external.startFileBundle(projection); for (int col = 0; col < colCount; col++) { final String name = projection + "/col" + (col + 1000); final File tmpFile = new File(tempDir, StringUtils.format("ext-%s-%s.bin", projection, col)); @@ -370,7 +400,7 @@ void testNestedAddWithChannelDelegatesPerBuilder() throws IOException final byte[] nestedBytes = new byte[]{5, 6, 7, 8}; try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("__base"); + builder.startFileBundle("__base"); try (SegmentFileChannel outer = builder.addWithChannel("__base/outer", outerBytes.length)) { // nested write while outer is in use → forced into delegate temp file try (SegmentFileChannel nested = builder.addWithChannel("__base/nested", nestedBytes.length)) { @@ -380,7 +410,7 @@ void testNestedAddWithChannelDelegatesPerBuilder() throws IOException } final SegmentFileBuilder external = builder.getExternalBuilder(externalName); - external.startFileGroup("extProj"); + external.startFileBundle("extProj"); try (SegmentFileChannel extOuter = external.addWithChannel("extProj/outer", outerBytes.length)) { try (SegmentFileChannel extNested = external.addWithChannel("extProj/nested", nestedBytes.length)) { extNested.write(ByteBuffer.wrap(nestedBytes)); @@ -399,12 +429,12 @@ void testNestedAddWithChannelDelegatesPerBuilder() throws IOException } @Test - void testNestedDelegateClosedAfterOuterRoutesToOriginalGroup() throws IOException + void testNestedDelegateClosedAfterOuterRoutesToOriginalBundle() throws IOException { // doing something like this is weird and probably should happen in practice, but if a nested write was requested - // while file group "groupA" was active; even if the caller switches to "groupB" before finally closing the nested - // channel, the delegated bytes must still land in groupA's container, not groupB's. Otherwise the grouping breaks, - // and files from other groups end up in the same container. + // while bundle "groupA" was active; even if the caller switches to "groupB" before finally closing the nested + // channel, the delegated bytes must still land in groupA's container, not groupB's. Otherwise bundles break and + // files from other bundles end up in the same container. final File baseDir = newBaseDir(); final byte[] outerBytes = new byte[]{1, 2, 3, 4}; @@ -412,7 +442,7 @@ void testNestedDelegateClosedAfterOuterRoutesToOriginalGroup() throws IOExceptio final byte[] groupBBytes = new byte[]{9, 10, 11, 12}; try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir)) { - builder.startFileGroup("groupA"); + builder.startFileBundle("groupA"); final SegmentFileChannel outer = builder.addWithChannel("groupA/outer", outerBytes.length); final SegmentFileChannel nested = builder.addWithChannel("groupA/nested", nestedBytes.length); @@ -423,7 +453,7 @@ void testNestedDelegateClosedAfterOuterRoutesToOriginalGroup() throws IOExceptio outer.close(); // switch group before closing the still-open nested delegate; merge must use the snapshotted "groupA" - builder.startFileGroup("groupB"); + builder.startFileBundle("groupB"); nested.close(); // and a real groupB file so we can verify groupB's container is independent of the nested file diff --git a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java index 5a56dcd7faf2..24374071d148 100644 --- a/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java +++ b/processing/src/test/java/org/apache/druid/segment/file/SegmentFileContainerMetadataTest.java @@ -36,33 +36,48 @@ void testEqualsAndHashCode() } @Test - void testSerdeWithFileGroup() throws Exception + void testSerdeWithNamedBundle() throws Exception { final SegmentFileContainerMetadata metadata = new SegmentFileContainerMetadata(100, 4096, "projA"); final String json = JSON_MAPPER.writeValueAsString(metadata); - Assertions.assertTrue(json.contains("\"fileGroup\":\"projA\""), "fileGroup must be present in serialized JSON: " + json); + Assertions.assertTrue(json.contains("\"bundle\":\"projA\""), "bundle must be present in serialized JSON: " + json); Assertions.assertEquals(metadata, JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class)); } @Test - void testSerdeWithNullFileGroupOmitsField() throws Exception + void testNullBundleNormalizesToRootAndOmitsFromJson() throws Exception { - // Old-format segments don't have fileGroup; serializing null must omit the property so older readers (and - // future versions reading old segments) round-trip unchanged. + // Null in the constructor is the writer-side equivalent of "no explicit startFileBundle call"; the field + // normalizes to ROOT_BUNDLE_NAME, and the default value is omitted from JSON so segments without explicit + // bundles stay compact on disk. final SegmentFileContainerMetadata metadata = new SegmentFileContainerMetadata(0, 1024, null); + Assertions.assertEquals(SegmentFileBuilder.ROOT_BUNDLE_NAME, metadata.getBundle()); final String json = JSON_MAPPER.writeValueAsString(metadata); - Assertions.assertFalse(json.contains("fileGroup"), "null fileGroup must be omitted from JSON, got: " + json); + Assertions.assertFalse(json.contains("bundle"), "default bundle must be omitted from JSON, got: " + json); Assertions.assertEquals(metadata, JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class)); } @Test - void testDeserializeLegacyJsonWithoutFileGroup() throws Exception + void testExplicitRootBundleAlsoOmitsFromJson() throws Exception { - // Bytes produced by a writer pre-dating the fileGroup field must deserialize cleanly with fileGroup == null. - final String legacyJson = "{\"startOffset\":42,\"size\":8192}"; - final SegmentFileContainerMetadata metadata = JSON_MAPPER.readValue(legacyJson, SegmentFileContainerMetadata.class); + // Passing ROOT_BUNDLE_NAME explicitly is equivalent to passing null; both normalize to the default and both + // omit the field from JSON. + final SegmentFileContainerMetadata metadata = + new SegmentFileContainerMetadata(0, 1024, SegmentFileBuilder.ROOT_BUNDLE_NAME); + final String json = JSON_MAPPER.writeValueAsString(metadata); + Assertions.assertFalse(json.contains("bundle"), "explicit root bundle must be omitted from JSON, got: " + json); + Assertions.assertEquals(metadata, JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class)); + } + + @Test + void testDeserializeJsonWithoutBundleFieldDefaultsToRoot() throws Exception + { + // Bytes produced by a writer that didn't include a bundle field (old segments, or new segments without + // explicit startFileBundle) must deserialize to the ROOT_BUNDLE_NAME default. + final String json = "{\"startOffset\":42,\"size\":8192}"; + final SegmentFileContainerMetadata metadata = JSON_MAPPER.readValue(json, SegmentFileContainerMetadata.class); Assertions.assertEquals(42, metadata.getStartOffset()); Assertions.assertEquals(8192, metadata.getSize()); - Assertions.assertNull(metadata.getFileGroup()); + Assertions.assertEquals(SegmentFileBuilder.ROOT_BUNDLE_NAME, metadata.getBundle()); } } diff --git a/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntry.java b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntry.java new file mode 100644 index 000000000000..ea7312cd26f6 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntry.java @@ -0,0 +1,630 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.google.common.util.concurrent.SettableFuture; +import com.google.errorprone.annotations.concurrent.GuardedBy; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.segment.ReferenceCountingCloseableObject; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.segment.file.SegmentFileContainerMetadata; +import org.apache.druid.segment.file.SegmentFileMetadata; +import org.apache.druid.timeline.SegmentId; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Cache entry for a single named bundle within a partial-loaded V10 segment. A bundle is a group of containers + * declared at write time via {@link SegmentFileBuilder#startFileBundle}; the cache layer reads each container's + * {@link SegmentFileContainerMetadata#getBundle} field and treats every container in the named bundle as one + * mount/evict unit. Containers written without an explicit {@code startFileBundle} call (and containers from older + * segments that did not carry a bundle name) default to {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}, the implicit + * root bundle that sits above any named ones. A bundle may also span multiple physical files: when the writer + * propagates {@code startFileBundle} to attached external segment files, the cache layer transparently includes their + * matching containers in the same bundle. + *

+ * Mounting a bundle entry sparse-allocates its containers locally via + * {@link PartialSegmentFileMapperV10#initializeContainer}; unmounting evicts them (unmap + delete + clear bitmap) via + * {@link PartialSegmentFileMapperV10#evictContainer}. + *

+ * Dependency holds + references. A bundle entry holds two layers of protection on its metadata cache entry + * plus every transitive parent bundle entry passed in at construction time via {@code parentEntryIds}. The first + * layer is a {@link StorageLocation.ReservationHold} acquired via + * {@link StorageLocation#addWeakReservationHoldIfExists}, which prevents evicting weak dependencies while this bundle + * is mounted (no-op for statically reserved dependencies). The second layer is a reference acquired via + * {@link PartialSegmentMetadataCacheEntry#acquireReference} / {@link #acquireReference} on each dependency, which + * defers each dependency's actual unmap-and-delete work until this bundle's own {@link #unmount} runs; this is the + * protection that matters for statically reserved dependencies where the cache hold is no-op. Both are acquired + * during {@link #mount} and released during {@link #unmount}; if a parent is missing or cannot be acquired, mount + * fails and any holds/references already taken are released. + *

+ * Reference-counted deferred cleanup of this bundle. {@link #unmount()} doesn't necessarily release resources + * synchronously. While any references acquired via {@link #acquireReference()} are outstanding (e.g. an in-flight + * cursor reading this bundle's columns), the actual evict-containers-and-release-dependencies work is deferred until + * the last reference releases. The same instance can be re-mounted after a previous cleanup completes; a fresh + * internal Phaser is installed on the next successful mount. + *

+ * Mount-time dedup. Concurrent {@link #mount} calls are deduplicated via a {@link AtomicReference} of + * {@link SettableFuture}; one thread does the work, the rest wait on the same future. On failure the gate is + * cleared so a subsequent caller gets a fresh attempt; on success the gate stays set until {@link #unmount}. + */ +public class PartialSegmentBundleCacheEntry implements CacheEntry +{ + private static final EmittingLogger LOG = new EmittingLogger(PartialSegmentBundleCacheEntry.class); + + /** + * Build a bundle entry given an already-mounted metadata entry and the bundle's name (as declared at write time via + * {@link SegmentFileBuilder#startFileBundle}, or implicitly {@link SegmentFileBuilder#ROOT_BUNDLE_NAME} for + * containers written without an explicit call). Walks the main file's containers plus each attached external + * file's containers, picking every container whose {@link SegmentFileContainerMetadata#getBundle bundle} equals + * {@code bundleName}. + */ + public static PartialSegmentBundleCacheEntry forBundle( + PartialSegmentMetadataCacheEntry metadataEntry, + String bundleName, + List parentEntryIds + ) + { + final PartialSegmentFileMapperV10 fileMapper = metadataEntry.getFileMapper(); + if (fileMapper == null) { + throw DruidException.defensive( + "Cannot create bundle entry for [%s/%s]: metadata entry is not mounted", + metadataEntry.getSegmentId(), + bundleName + ); + } + + final List refs = findContainersForBundle(fileMapper, bundleName); + if (refs.isEmpty()) { + throw DruidException.defensive( + "Bundle[%s] has no containers in segment[%s]", + bundleName, + metadataEntry.getSegmentId() + ); + } + + long size = 0; + for (BundleContainerRef ref : refs) { + size += fileMapper.mapperForContainer(ref.externalFilename()) + .getSegmentFileMetadata() + .getContainers() + .get(ref.containerIndex()) + .getSize(); + } + + return new PartialSegmentBundleCacheEntry( + metadataEntry.getSegmentId(), + bundleName, + refs, + size, + metadataEntry, + List.copyOf(parentEntryIds) + ); + } + + /** + * Find every {@link BundleContainerRef} that the named bundle owns across the main file and each external file: + * any container whose {@link SegmentFileContainerMetadata#getBundle bundle} equals {@code bundleName}. Shared by + * {@link #forBundle} and the bootstrap path so both observe the same definition of bundle membership. + */ + public static List findContainersForBundle( + PartialSegmentFileMapperV10 fileMapper, + String bundleName + ) + { + final List refs = new ArrayList<>(); + collectMatchingContainers(fileMapper.getSegmentFileMetadata(), bundleName, null, refs); + for (String externalFilename : fileMapper.getExternalFilenames()) { + collectMatchingContainers( + fileMapper.getExternalMapper(externalFilename).getSegmentFileMetadata(), + bundleName, + externalFilename, + refs + ); + } + return List.copyOf(refs); + } + + private final PartialSegmentBundleCacheEntryIdentifier id; + private final SegmentId segmentId; + private final String bundleName; + private final List containerRefs; + private final long size; + private final PartialSegmentMetadataCacheEntry metadataEntry; + private final List parentEntryIds; + + private final ReentrantLock entryLock = new ReentrantLock(); + private final AtomicReference> mountFuture = new AtomicReference<>(); + + @GuardedBy("entryLock") + @Nullable + private StorageLocation location; + @GuardedBy("entryLock") + private final List> holds = new ArrayList<>(); + // references this bundle holds on its metadata entry and each transitive parent bundle for the duration of its + // mounted lifetime. Released in doActualUnmount. Distinct from `holds` (cache-eviction protection): these references + // gate deferred cleanup on the dependencies, so an in-flight query that holds a reference on this bundle keeps + // metadata + parents safe from drop-time unmap even if the dependency is statically reserved. + @GuardedBy("entryLock") + private final List dependencyReferences = new ArrayList<>(); + @GuardedBy("entryLock") + private boolean mounted; + + // Reference-counted gate over the actual cleanup work (evict containers, release parent holds, unregister from + // metadata). Set on successful mount; unmount() closes the wrapper which defers running cleanup until all outstanding + // references (acquired via acquireReference()) are released. Re-created on mount-after-cleanup-completion. Null when + // the entry has never been mounted. + private final AtomicReference> references = new AtomicReference<>(); + + PartialSegmentBundleCacheEntry( + SegmentId segmentId, + String bundleName, + List containerRefs, + long size, + PartialSegmentMetadataCacheEntry metadataEntry, + List parentEntryIds + ) + { + this.segmentId = segmentId; + this.bundleName = bundleName; + this.id = new PartialSegmentBundleCacheEntryIdentifier(segmentId, bundleName); + this.containerRefs = containerRefs; + this.size = size; + this.metadataEntry = metadataEntry; + this.parentEntryIds = parentEntryIds; + } + + @Override + public PartialSegmentBundleCacheEntryIdentifier getId() + { + return id; + } + + @Override + public long getSize() + { + return size; + } + + @Override + public boolean isMounted() + { + entryLock.lock(); + try { + return mounted; + } + finally { + entryLock.unlock(); + } + } + + public SegmentId getSegmentId() + { + return segmentId; + } + + public String getBundleName() + { + return bundleName; + } + + /** + * The list of {@link BundleContainerRef} this bundle owns, across the main file and any external files. + */ + public List getContainerRefs() + { + return containerRefs; + } + + public List getParentEntryIds() + { + return parentEntryIds; + } + + /** + * Mount this bundle entry: acquire holds on the metadata entry and all transitive parent bundle entries, then + * sparse-allocate every container this bundle owns. Concurrent calls are deduplicated via the {@link #mountFuture} + * CAS gate, only one thread runs the work; the rest wait on the same future. + *

+ * On failure, any holds taken are released and the gate is cleared so a subsequent retry gets a fresh attempt. + */ + @Override + public void mount(StorageLocation mountLocation) throws IOException + { + while (true) { + final SettableFuture existing = mountFuture.get(); + if (existing != null) { + awaitMount(existing); + // The completed mount may have been for a different location. Verify the requested location matches. + entryLock.lock(); + try { + if (location != null && !location.equals(mountLocation)) { + throw DruidException.defensive( + "Already mounted[%s] in location[%s] which differs from requested[%s]", + id, + location.getPath(), + mountLocation.getPath() + ); + } + } + finally { + entryLock.unlock(); + } + verifyStillReservedOrRollback(mountLocation); + return; + } + final SettableFuture ours = SettableFuture.create(); + if (!mountFuture.compareAndSet(null, ours)) { + continue; + } + try { + doMount(mountLocation); + ours.set(null); + } + catch (Throwable t) { + // clear the gate so the next caller gets a fresh attempt + mountFuture.set(null); + ours.setException(t); + switch (t) { + case IOException ioException -> throw ioException; + case RuntimeException runtimeException -> throw runtimeException; + case Error error -> throw error; + default -> throw DruidException.defensive(t, "Failed to mount bundle entry[%s]", id); + } + } + verifyStillReservedOrRollback(mountLocation); + return; + } + } + + /** + * Post-mount safety check: confirm the entry is still registered with the location, otherwise roll back. Handles + * the race where a concurrent canceler releases the hold that was keeping this weak entry in {@code + * weakCacheEntries} and the cache evicts it while mount() is still working. Without this check, mount would commit + * local state (sparse-allocated containers on disk, parent holds + references) for an entry the cache manager no + * longer knows about, leaking those resources. Mirrors the same defensive check in {@code SegmentCacheEntry.mount}. + * Returns normally if rollback fires; callers detect via {@link #isMounted}. + */ + private void verifyStillReservedOrRollback(StorageLocation mountLocation) + { + if (!mountLocation.isReserved(id) && !mountLocation.isWeakReserved(id)) { + LOG.debug( + "Aborting mount of bundle[%s] in location[%s]; entry was evicted while mounting", + id, + mountLocation.getPath() + ); + unmount(); + } + } + + private void doMount(StorageLocation mountLocation) throws IOException + { + // Pre-check inside entryLock; after this we release entryLock so the hold-acquisition + container-init work below + // doesn't nest location.readLock under entryLock; same lock-order rule as the metadata entry's mount, which is + // the inverse of StorageLocation.release's writeLock -> entryLock. The CAS+SettableFuture gate in mount() + // guarantees only one thread runs this method at a time per entry, so we don't need entryLock to keep two + // concurrent mounters out. + entryLock.lock(); + try { + if (mounted) { + if (location != null && !location.equals(mountLocation)) { + throw DruidException.defensive( + "Already mounted[%s] in location[%s] which differs from requested[%s]", + id, + location.getPath(), + mountLocation.getPath() + ); + } + return; + } + } + finally { + entryLock.unlock(); + } + + final PartialSegmentFileMapperV10 fileMapper = metadataEntry.getFileMapper(); + if (fileMapper == null) { + throw DruidException.defensive( + "Cannot mount bundle[%s]: metadata entry[%s] is not mounted", + id, + metadataEntry.getId() + ); + } + + final List> acquired = new ArrayList<>(); + final List acquiredRefs = new ArrayList<>(); + boolean registered = false; + boolean committed = false; + try { + // 1. Cache holds on metadata + parents (prevents cache eviction of weak dependencies) + final StorageLocation.ReservationHold metadataHold = + mountLocation.addWeakReservationHoldIfExists(metadataEntry.getId()); + if (metadataHold == null) { + throw DruidException.defensive( + "Cannot acquire metadata hold for [%s]; metadata entry not registered with location[%s]", + metadataEntry.getId(), + mountLocation.getPath() + ); + } + acquired.add(metadataHold); + + for (PartialSegmentBundleCacheEntryIdentifier parentId : parentEntryIds) { + final StorageLocation.ReservationHold parentHold = + mountLocation.addWeakReservationHoldIfExists(parentId); + if (parentHold == null) { + throw DruidException.defensive( + "Cannot acquire parent hold for [%s]; parent entry not registered with location[%s]", + parentId, + mountLocation.getPath() + ); + } + acquired.add(parentHold); + } + + // 2. References on metadata + parents (gates their deferred cleanup on this bundle's lifetime; matters for + // statically-reserved dependencies where a drop fires `release()` directly without going through cache) + acquiredRefs.add(metadataEntry.acquireReference()); + for (PartialSegmentBundleCacheEntryIdentifier parentId : parentEntryIds) { + final CacheEntry parentEntry = mountLocation.getCacheEntry(parentId); + if (!(parentEntry instanceof PartialSegmentBundleCacheEntry)) { + throw DruidException.defensive( + "Parent entry[%s] of bundle[%s] is missing or not a bundle entry; cannot acquire reference", + parentId, + id + ); + } + acquiredRefs.add(((PartialSegmentBundleCacheEntry) parentEntry).acquireReference()); + } + + // 3. Sparse-allocate this bundle's containers, routing to the main mapper or the appropriate external mapper + // depending on each ref's externalFilename. + for (BundleContainerRef ref : containerRefs) { + fileMapper.mapperForContainer(ref.externalFilename()).initializeContainer(ref.containerIndex()); + } + + // Register with metadata BEFORE the state commit. If this throws (it shouldn't, but just in case), no state has + // been committed yet and the catch path releases the holds without leaving an orphaned-but-mounted bundle + metadataEntry.registerBundle(this); + registered = true; + + // Commit state under entryLock. Hold and reference ownership transfers from local lists to fields here. Also + // install (or re-install, after a prior mount/unmount cycle terminated the previous Phaser) the reference- + // counted gate over cleanup; future acquireReference() and unmount() calls operate on this instance. + entryLock.lock(); + try { + location = mountLocation; + holds.addAll(acquired); + dependencyReferences.addAll(acquiredRefs); + mounted = true; + references.set(new ReferenceCountingCloseableObject(this::doActualUnmount) {}); + } + finally { + entryLock.unlock(); + } + committed = true; + } + finally { + if (!committed) { + // Evict any containers that were successfully initialized before the failure. Mirrors the eager + // SegmentCacheEntry behavior: retry from a clean slate is simpler than reasoning about partial on-disk state. + // evictContainer is a no-op for containers that were never initialized, so we can iterate the full set + // without tracking how far the initialization loop got. + for (BundleContainerRef ref : containerRefs) { + try { + fileMapper.mapperForContainer(ref.externalFilename()).evictContainer(ref.containerIndex()); + } + catch (Throwable t) { + LOG.warn(t, "Failed to evict container[%s/%d] for bundle[%s] during mount rollback", + ref.externalFilename(), ref.containerIndex(), id); + } + } + if (registered) { + try { + metadataEntry.unregisterBundle(this); + } + catch (Throwable t) { + LOG.warn(t, "Failed to unregister bundle[%s] during mount rollback", id); + } + } + for (Closeable ref : acquiredRefs) { + try { + ref.close(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to release dependency reference during mount rollback for bundle[%s]", id); + } + } + for (StorageLocation.ReservationHold hold : acquired) { + try { + hold.close(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to release hold[%s] during mount rollback", hold); + } + } + } + } + } + + /** + * Triggers cleanup of this bundle. If any references acquired via {@link #acquireReference()} are still outstanding, + * the actual evict/release work is deferred until the last reference releases; in that case this method returns + * immediately and {@link #doActualUnmount} will fire later on the thread that closes the last reference. With no + * outstanding references, cleanup runs synchronously on the caller's thread. + */ + @Override + public void unmount() + { + final ReferenceCountingCloseableObject current = references.get(); + if (current != null && !current.isClosed()) { + current.close(); + } + } + + /** + * Acquire a reference that keeps this bundle's resources (container files, parent bundle holds) alive across an + * intervening {@link #unmount} call. The returned {@link Closeable} must be closed when the caller is done; at + * that point if {@code unmount()} has already been called and no other references remain, the deferred cleanup + * fires on the closing thread. + * + * @throws DruidException if the bundle has never been mounted, or has already been cleaned up + */ + public Closeable acquireReference() + { + final ReferenceCountingCloseableObject current = references.get(); + if (current == null) { + throw DruidException.defensive( + "Cannot acquire reference on bundle[%s] before it has been mounted", + id + ); + } + return current.incrementReferenceAndDecrementOnceCloseable() + .orElseThrow(() -> DruidException.defensive( + "Cannot acquire reference on bundle[%s]; already being unmounted", + id + )); + } + + /** + * The actual unmount work, invoked by the reference-counted gate's {@code onAdvance} once every outstanding + * reference (plus the wrapper's own initial party) has been released. Evicts owned containers, releases parent + * holds + dependency references, unregisters from the metadata entry, and clears the mount-dedup gate so a fresh + * mount can run. + *

+ * Dependency reference + cache hold releases happen OUTSIDE entryLock so that any cascading parent cleanup (a + * parent whose last reference is this bundle's, draining the parent's Phaser) doesn't run under our lock and keeps + * the entry-lock-then-location-lock convention intact even when the cascade re-enters StorageLocation. + */ + private void doActualUnmount() + { + final List refsToRelease; + final List> holdsToRelease; + entryLock.lock(); + try { + if (!mounted) { + return; + } + final PartialSegmentFileMapperV10 fileMapper = metadataEntry.getFileMapper(); + // file mapper may be null if metadata was already unmounted (out-of-order shutdown); evictContainer would NPE + if (fileMapper != null) { + for (BundleContainerRef ref : containerRefs) { + try { + fileMapper.mapperForContainer(ref.externalFilename()).evictContainer(ref.containerIndex()); + } + catch (Throwable t) { + LOG.warn(t, "Failed to evict container[%s/%d] for bundle[%s]", ref.externalFilename(), ref.containerIndex(), id); + } + } + } + refsToRelease = new ArrayList<>(dependencyReferences); + dependencyReferences.clear(); + holdsToRelease = new ArrayList<>(holds); + holds.clear(); + location = null; + mounted = false; + mountFuture.set(null); + } + finally { + entryLock.unlock(); + } + + // Release dependency references first so any cascading parent cleanup runs before we drop cache holds. The order + // is mostly informational since the two layers are independent, but matches the acquisition order in doMount. + for (Closeable ref : refsToRelease) { + try { + ref.close(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to release dependency reference for bundle[%s]", id); + } + } + releaseHolds(holdsToRelease); + metadataEntry.unregisterBundle(this); + } + + private static void collectMatchingContainers( + SegmentFileMetadata fileMeta, + String bundleName, + @Nullable String externalFilename, + List out + ) + { + final List containers = fileMeta.getContainers(); + for (int ci = 0; ci < containers.size(); ci++) { + if (bundleName.equals(containers.get(ci).getBundle())) { + out.add(new BundleContainerRef(externalFilename, ci)); + } + } + } + + private static void awaitMount(SettableFuture future) throws IOException + { + try { + future.get(); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while waiting for mount", e); + } + catch (ExecutionException e) { + final Throwable cause = e.getCause() == null ? e : e.getCause(); + switch (cause) { + case IOException ioException -> throw ioException; + case RuntimeException runtimeException -> throw runtimeException; + case Error error -> throw error; + default -> throw DruidException.defensive(e, "mount failed"); + } + } + } + + private static void releaseHolds(Collection> holds) + { + for (StorageLocation.ReservationHold hold : holds) { + try { + hold.close(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to release hold[%s]", hold); + } + } + } + + /** + * Reference to a single container that this bundle owns. {@code externalFilename} is {@code null} when the + * container lives in the main V10 file, or the external file's name when the container lives in an attached + * external file. {@code containerIndex} is the position within that file's + * {@link SegmentFileMetadata#getContainers()} list. A single logical bundle (one named group) can span containers + * across the main file and one or more external files when the writer propagates {@code startFileBundle} to both, + * the cache layer treats them as one mount/evict unit regardless. + */ + public record BundleContainerRef(@Nullable String externalFilename, int containerIndex) + { + } +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifier.java b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifier.java new file mode 100644 index 000000000000..2d7a5b823c9a --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifier.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.timeline.SegmentId; + +/** + * Identifier for a {@link PartialSegmentBundleCacheEntry}; a named group of containers within a partial-loaded V10 + * segment that gets mounted and evicted as a unit. The {@code bundleName} is the group name declared at write time + * via {@link SegmentFileBuilder#startFileBundle}. + *

+ * Each partial segment is split across multiple {@link CacheEntry}, with one {@link SegmentCacheEntryIdentifier}-keyed + * metadata entry plus one of these per bundle. + */ +public record PartialSegmentBundleCacheEntryIdentifier(SegmentId segmentId, String bundleName) + implements CacheEntryIdentifier +{ + @Override + public String toString() + { + return segmentId + ":" + bundleName; + } +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrap.java b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrap.java new file mode 100644 index 000000000000..7793d2437206 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrap.java @@ -0,0 +1,395 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.segment.file.SegmentFileContainerMetadata; +import org.apache.druid.segment.file.SegmentFileMetadata; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.timeline.SegmentId; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +/** + * Bootstraps partial-segment cache entries from existing on-disk state. Called by the cache manager on historical + * startup for each segment directory that contains the partial-download layout (`{targetFilename}.header` plus one + * or more `{targetFilename}.container.NNNNN` files). + *

+ * The bootstrap is read-only with respect to deep storage; it never issues a range read. The on-disk header file is + * parsed in-place by {@link PartialSegmentFileMapperV10#create} (which detects header corruption and, for that one + * case, may delete the local copy; bootstrap callers should treat that as "no restorable state" and fall back to a + * cold start). Bundle entries are registered as weak entries on the storage location, mounted (which sparse-allocates + * any container files that weren't already present and re-establishes parent holds), and returned to the caller. + *

+ * Parent-set inference is delegated to {@link PartialSegmentMetadataCacheEntry#inferParentBundles}. A bundle whose + * inferred parent isn't itself present on disk is treated as orphaned: its on-disk container files are deleted + * (via {@link PartialSegmentFileMapperV10#evictContainer}, which also clears the relevant bitmap bits) and the bundle + * is not restored. The next access through the cache manager acquire path then triggers a clean cold re-fetch, the + * same fall-back as when the cache manager finds a segment listed in the info directory but missing on disk. + */ +public final class PartialSegmentCacheBootstrap +{ + private static final EmittingLogger LOG = new EmittingLogger(PartialSegmentCacheBootstrap.class); + + /** + * Restore a single partial segment's cache entries from its local on-disk layout. + * + * @param segmentId the segment whose entries are being restored + * @param localCacheDir the per-segment directory containing the header + container files + * @param targetFilename the V10 entry-point filename + * @param externalFilenames any external segment file names that were registered as children of the entry-point + * file + * @param jsonMapper used to parse the header + * @param location the storage location these entries belong to; the metadata entry is registered as + * static and bundle entries are registered as weak + * @throws DruidException if the expected header file is missing, if a metadata or bundle reservation cannot be + * established on the location, or if the metadata entry mount produces a null file mapper + * @throws IOException propagated from {@link CacheEntry#mount} (metadata or bundle) or from on-disk header/bitmap + * I/O performed during restore + */ + public static RestoreResult restoreFromDisk( + SegmentId segmentId, + File localCacheDir, + String targetFilename, + List externalFilenames, + ObjectMapper jsonMapper, + StorageLocation location + ) throws IOException + { + final File headerFile = new File(localCacheDir, targetFilename + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX); + if (!headerFile.exists()) { + throw DruidException.defensive( + "No on-disk header for partial segment[%s] at [%s]; nothing to restore", + segmentId, + headerFile + ); + } + + // size the metadata reservation to the actual on-disk size so the location accounting is correct from the start + final long actualMetadataSize = computeOnDiskHeaderSize(localCacheDir, targetFilename, externalFilenames); + final PartialSegmentMetadataCacheEntry metadata = new PartialSegmentMetadataCacheEntry( + segmentId, + localCacheDir, + targetFilename, + externalFilenames, + BootstrapRangeReader.INSTANCE, + jsonMapper, + actualMetadataSize + ); + + if (!location.reserve(metadata)) { + throw DruidException.defensive( + "Failed to reserve metadata entry for partial segment[%s] at location[%s]", + segmentId, + location.getPath() + ); + } + + // From here, any throw must roll back: unmount any successfully-mounted bundles (releases their references on + // metadata) and then release the metadata entry from the location. Without this, a mid-bootstrap failure would + // leave the location partially reserved + mounted, which would confuse later restores and leak disk/memory. + final List mountedBundles = new ArrayList<>(); + boolean success = false; + try { + metadata.mount(location); + + // Discover bundle names across the main file and every external file, then keep only those whose owned + // container files actually exist on disk. Walks via the file mapper so the external mappers' SegmentFileMetadata + // are visited too — bundles can legitimately span the main file and one or more externals when the writer + // propagates startFileBundle across them. + final PartialSegmentFileMapperV10 fileMapper = metadata.getFileMapper(); + if (fileMapper == null) { + throw DruidException.defensive( + "Metadata entry mount produced null file mapper for segment[%s]", + segmentId + ); + } + final Set candidateBundleNames = discoverBundleNames(fileMapper); + final List presentBundleNames = filterByContainerPresence( + candidateBundleNames, + fileMapper, + localCacheDir + ); + + // Classify each present bundle as either mountable or orphaned. A bundle is orphaned when its inferred parent + // set includes a bundle that isn't itself present on disk; restoring it would only produce a degenerate state + // where column reads that resolve into the missing parent would fail at query time. Instead, delete the + // orphan's on-disk containers so the next access triggers a clean cold re-fetch from deep storage + final List mountableBundleNames = new ArrayList<>(); + final Set orphanedBundleNames = new HashSet<>(); + for (String name : presentBundleNames) { + boolean orphaned = false; + for (PartialSegmentBundleCacheEntryIdentifier parent : metadata.inferParentBundles(name)) { + if (!presentBundleNames.contains(parent.bundleName())) { + orphaned = true; + break; + } + } + if (orphaned) { + orphanedBundleNames.add(name); + } else { + mountableBundleNames.add(name); + } + } + + for (String orphanName : orphanedBundleNames) { + for (PartialSegmentBundleCacheEntry.BundleContainerRef ref : + PartialSegmentBundleCacheEntry.findContainersForBundle(fileMapper, orphanName)) { + fileMapper.mapperForContainer(ref.externalFilename()).evictContainer(ref.containerIndex()); + } + LOG.debug( + "Deleted on-disk state of orphaned bundle[%s] for segment[%s] (parent unrestorable); next access " + + "will trigger cold re-fetch", + orphanName, + segmentId + ); + } + + // mount base bundle before any dependent bundle so its hold is available when dependents acquire parent holds + mountableBundleNames.sort(Comparator.comparing(name -> !Projections.BASE_TABLE_PROJECTION_NAME.equals(name))); + + for (String bundleName : mountableBundleNames) { + // Mountable bundles have all parents present by construction (orphans were filtered out above), so the + // inferred parent set is exactly what we want, no further filtering needed. + final List parentIds = metadata.inferParentBundles(bundleName); + final PartialSegmentBundleCacheEntry bundle = PartialSegmentBundleCacheEntry.forBundle( + metadata, + bundleName, + parentIds + ); + // weak-reserve with a temporary hold so the mount call's own parent-hold acquisition can succeed; release the + // bootstrap hold immediately after, if the entry should remain alive for query-side access, the runtime hold + // chain (transitive parents from aggregates, segment-level holds from acquire APIs) keeps it pinned. + try (StorageLocation.ReservationHold bootstrapHold = + location.addWeakReservationHold(bundle.getId(), () -> bundle)) { + if (bootstrapHold == null) { + throw DruidException.defensive( + "Failed to reserve bundle entry[%s] in location[%s] during bootstrap", + bundle.getId(), + location.getPath() + ); + } + bundle.mount(location); + } + mountedBundles.add(bundle); + } + + LOG.debug( + "Restored partial segment[%s] from [%s]: metadata size[%d], bundles[%s], orphans[%s]", + segmentId, + localCacheDir, + actualMetadataSize, + mountableBundleNames, + orphanedBundleNames + ); + success = true; + return new RestoreResult(metadata, mountedBundles); + } + finally { + if (!success) { + // Roll back in reverse-dependency order: bundles first (so they release references on metadata + parents) + // then the metadata entry itself. The bundle/metadata cleanup is best-effort, log and continue rather than + // shadow the original failure. + for (PartialSegmentBundleCacheEntry bundle : mountedBundles) { + try { + bundle.unmount(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to roll back bundle[%s] during bootstrap failure for [%s]", bundle.getId(), segmentId); + } + } + try { + location.release(metadata); + } + catch (Throwable t) { + LOG.warn(t, "Failed to roll back metadata entry for partial segment[%s]", segmentId); + } + } + } + } + + /** + * Check whether a directory looks like a partial-segment cache layout for the given target filename. + */ + public static boolean isPartialSegmentLayout(File localCacheDir, String targetFilename) + { + if (localCacheDir == null || !localCacheDir.isDirectory()) { + return false; + } + final File header = new File( + localCacheDir, + targetFilename + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + ); + return header.exists(); + } + + private static long computeOnDiskHeaderSize(File localCacheDir, String targetFilename, List externalFilenames) + { + long total = sizeOf(new File( + localCacheDir, + targetFilename + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + )); + for (String external : externalFilenames) { + total += sizeOf(new File( + localCacheDir, + external + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + )); + } + if (total <= 0) { + // PartialSegmentMetadataCacheEntry requires a positive reservation; if all headers are zero-length the local + // layout is degenerate and should not be restored + throw DruidException.defensive( + "Zero-sized header files in [%s]; refusing to restore", + localCacheDir + ); + } + return total; + } + + private static long sizeOf(File f) + { + return f.exists() ? f.length() : 0; + } + + /** + * Discover the bundle names present in a segment by walking each container's + * {@link SegmentFileContainerMetadata#getBundle bundle} across the main file and every attached external file. + * The bundle field is always non-null — containers written without an explicit {@code startFileBundle} call + * (including those from older segments) default to {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}. + */ + private static Set discoverBundleNames(PartialSegmentFileMapperV10 fileMapper) + { + final Set names = new HashSet<>(); + collectBundleNames(fileMapper.getSegmentFileMetadata(), names); + for (String externalFilename : fileMapper.getExternalFilenames()) { + collectBundleNames(fileMapper.getExternalMapper(externalFilename).getSegmentFileMetadata(), names); + } + return names; + } + + private static void collectBundleNames(SegmentFileMetadata fileMeta, Set out) + { + for (SegmentFileContainerMetadata container : fileMeta.getContainers()) { + out.add(container.getBundle()); + } + } + + /** + * Keep only bundles whose every owned container file exists on disk. The on-disk path for a container is + * {@code {mapperTargetFilename}.container.{containerIndex:05d}} where {@code mapperTargetFilename} is the main + * V10 filename for refs in the main mapper, or the external filename for refs in an external mapper. + */ + private static List filterByContainerPresence( + Set candidateBundleNames, + PartialSegmentFileMapperV10 fileMapper, + File localCacheDir + ) + { + final List restorable = new ArrayList<>(); + for (String bundleName : candidateBundleNames) { + final List refs = + PartialSegmentBundleCacheEntry.findContainersForBundle(fileMapper, bundleName); + if (refs.isEmpty()) { + continue; + } + boolean allPresent = true; + for (PartialSegmentBundleCacheEntry.BundleContainerRef ref : refs) { + final String mapperFilename = fileMapper.mapperForContainer(ref.externalFilename()).getTargetFilename(); + final File cf = new File( + localCacheDir, + StringUtils.format("%s.container.%05d", mapperFilename, ref.containerIndex()) + ); + if (!cf.exists()) { + allPresent = false; + break; + } + } + if (allPresent) { + restorable.add(bundleName); + } + } + return restorable; + } + + private PartialSegmentCacheBootstrap() + { + // utility class + } + + /** + * Stub range reader used during bootstrap: the on-disk header is expected to exist and parse, so no fetch is needed. + * If for any reason {@link PartialSegmentFileMapperV10#create} decides to re-fetch (e.g. header corruption), this + * reader throws so we fail loudly rather than silently re-downloading without the operator's knowledge. + */ + private static final class BootstrapRangeReader implements SegmentRangeReader + { + static final BootstrapRangeReader INSTANCE = new BootstrapRangeReader(); + + @Override + public InputStream readRange(String filename, long offset, long length) + { + throw DruidException.defensive( + "BootstrapRangeReader was asked to fetch [%s] @[%d:%d]; bootstrap should only read from local disk", + Objects.toString(filename), + offset, + length + ); + } + } + + /** + * Hold-acquire result of a partial-segment restore: the always-static metadata entry plus the list of bundle + * entries (already mounted, registered as weak entries) discovered on disk for this segment. + */ + public static final class RestoreResult + { + private final PartialSegmentMetadataCacheEntry metadata; + private final List bundles; + + RestoreResult(PartialSegmentMetadataCacheEntry metadata, List bundles) + { + this.metadata = metadata; + this.bundles = List.copyOf(bundles); + } + + public PartialSegmentMetadataCacheEntry getMetadata() + { + return metadata; + } + + public List getBundles() + { + return bundles; + } + } +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntry.java b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntry.java new file mode 100644 index 000000000000..c301123dd38a --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntry.java @@ -0,0 +1,576 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.util.concurrent.SettableFuture; +import com.google.errorprone.annotations.concurrent.GuardedBy; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.segment.ReferenceCountingCloseableObject; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.segment.file.SegmentFileMetadata; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.timeline.SegmentId; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Cache entry for the metadata header of a V10 segment loaded via partial download. Mounting this entry range-reads + * the V10 header from deep storage, parses {@link SegmentFileMetadata}, and constructs a + * {@link PartialSegmentFileMapperV10} that can later download individual internal files on demand. + *

+ * Reservation is sized via a configurable up-front estimate at construction time, then shrunk to the actual on-disk + * header size after mount via {@link StorageLocation#adjustReservation}. Mount fails fast if the actual size exceeds + * the estimate; the operator must increase the knob to recover. + *

+ * Per-bundle cache entries created downstream of this one share the same {@link PartialSegmentFileMapperV10} + * instance via {@link #getFileMapper()}; closing the metadata entry closes the file mapper, which unmaps all + * containers and external file mappers. + *

+ * Reference-counted deferred cleanup. {@link #unmount()} does not necessarily release resources synchronously. + * Callers that need the file mapper to stay alive across an intervening drop (e.g. a query reading column data + * through {@link PartialSegmentBundleCacheEntry}, or another component that needs the parsed + * {@link SegmentFileMetadata}) acquire a reference via {@link #acquireReference()}; while any references are + * outstanding, the actual close-file-mapper work is deferred. When the last reference releases the cleanup fires on + * that thread. Bundle entries hold one such reference per active mount, so the typical pattern is: mount metadata, + * mount bundle (which acquires a reference on metadata), use the bundle, unmount bundle (releases its reference and + * triggers metadata cleanup if it was the last reference and metadata's own unmount has been called). The same + * instance can be re-mounted after a previous cleanup completes; a fresh internal Phaser is installed on the next + * successful mount. + *

+ * Deferred cleanup hook. Callers can attach a {@link Runnable} via {@link #setOnUnmount} that fires once after + * the mapper is closed in {@link #doActualUnmount}. This is the right place to schedule work that should run only when + * the entry is truly purged. + */ +public class PartialSegmentMetadataCacheEntry implements ResizableCacheEntry +{ + private static final EmittingLogger LOG = new EmittingLogger(PartialSegmentMetadataCacheEntry.class); + + private final SegmentCacheEntryIdentifier id; + private final SegmentId segmentId; + private final File localCacheDir; + private final String targetFilename; + private final List externalFilenames; + private final SegmentRangeReader rangeReader; + private final ObjectMapper jsonMapper; + private final long reservationEstimate; + + // ReentrantLock instead of synchronized to avoid pinning virtual threads pre-JEP 491 + private final ReentrantLock entryLock = new ReentrantLock(); + + // current size for accounting; starts at the estimate, shrunk to actual on-disk size after mount + @GuardedBy("entryLock") + private long currentSize; + + // null until mounted + @GuardedBy("entryLock") + @Nullable + private StorageLocation location; + @GuardedBy("entryLock") + @Nullable + private PartialSegmentFileMapperV10 fileMapper; + + // Optional deferred-cleanup hook invoked by doActualUnmount after the mapper is closed. + private final AtomicReference onUnmount = new AtomicReference<>(); + + // bundle entries that are currently mounted against this segment, registered by PartialSegmentBundleCacheEntry on + // successful mount and removed on unmount. Lets the drop path enumerate bundles for cascade-close without scanning + // the StorageLocation's entry maps. + private final Set linkedBundles = ConcurrentHashMap.newKeySet(); + + // Reference-counted gate over the actual cleanup work (close file mapper, delete header files). Set on + // successful mount; unmount() closes the wrapper which defers running cleanup until all outstanding references + // (acquired via acquireReference()) are released. Re-created on mount-after-cleanup-completion. Null when the entry + // has never been mounted. + private final AtomicReference> references = new AtomicReference<>(); + + // CAS+SettableFuture mount-dedup gate, mirroring the bundle entry's pattern. Without this, mount()'s slow range-read + // would have to hold entryLock for its full duration, blocking concurrent status reads (isMounted, getSize, ...). + // With it: one thread wins the CAS and runs doMount; the rest wait on the same future. On failure the gate is + // cleared so retries get a fresh attempt; on success the gate stays set until doActualUnmount clears it. + private final AtomicReference> mountFuture = new AtomicReference<>(); + + public PartialSegmentMetadataCacheEntry( + SegmentId segmentId, + File localCacheDir, + String targetFilename, + List externalFilenames, + SegmentRangeReader rangeReader, + ObjectMapper jsonMapper, + long reservationEstimate + ) + { + if (reservationEstimate <= 0) { + throw DruidException.defensive( + "Reservation estimate for partial metadata entry[%s] must be positive, got [%d]", + segmentId, + reservationEstimate + ); + } + this.segmentId = segmentId; + this.id = new SegmentCacheEntryIdentifier(segmentId); + this.localCacheDir = localCacheDir; + this.targetFilename = targetFilename; + this.externalFilenames = List.copyOf(externalFilenames); + this.rangeReader = rangeReader; + this.jsonMapper = jsonMapper; + this.reservationEstimate = reservationEstimate; + this.currentSize = reservationEstimate; + } + + @Override + public SegmentCacheEntryIdentifier getId() + { + return id; + } + + public SegmentId getSegmentId() + { + return segmentId; + } + + @Override + public long getSize() + { + entryLock.lock(); + try { + return currentSize; + } + finally { + entryLock.unlock(); + } + } + + @Override + public boolean isMounted() + { + entryLock.lock(); + try { + return fileMapper != null; + } + finally { + entryLock.unlock(); + } + } + + @Override + public void resizeReservation(long newSize) + { + // Called from StorageLocation.adjustReservation under the location's writeLock. Acquires entryLock here as a + // real (non-reentrant) acquisition: mount() releases entryLock BEFORE calling adjustReservation precisely so the + // overall path runs writeLock -> entryLock (matching StorageLocation.release -> unmount), avoiding the + // entryLock -> writeLock inversion that would deadlock. + entryLock.lock(); + try { + this.currentSize = newSize; + } + finally { + entryLock.unlock(); + } + } + + @Override + public void mount(StorageLocation mountLocation) throws IOException + { + while (true) { + final SettableFuture existing = mountFuture.get(); + if (existing != null) { + awaitMount(existing); + // The completed mount may have been for a different location. Verify the requested location matches. + entryLock.lock(); + try { + if (location != null && !location.equals(mountLocation)) { + throw DruidException.defensive( + "Already mounted[%s] in location[%s] which differs from requested[%s]", + id, + location.getPath(), + mountLocation.getPath() + ); + } + } + finally { + entryLock.unlock(); + } + verifyStillReservedOrRollback(mountLocation); + return; + } + final SettableFuture ours = SettableFuture.create(); + if (!mountFuture.compareAndSet(null, ours)) { + continue; + } + try { + doMount(mountLocation); + ours.set(null); + } + catch (Throwable t) { + // clear the future so the next caller gets a fresh attempt + mountFuture.set(null); + ours.setException(t); + if (t instanceof IOException) { + throw (IOException) t; + } + if (t instanceof RuntimeException) { + throw (RuntimeException) t; + } + if (t instanceof Error) { + throw (Error) t; + } + throw DruidException.defensive(t, "Failed to mount metadata entry[%s]", id); + } + verifyStillReservedOrRollback(mountLocation); + return; + } + } + + /** + * Post-mount safety check: confirm the entry is still registered with the location, otherwise roll back. Handles + * the race where the entry's reservation gets evicted (e.g. cache picks a weak entry whose lone hold was released + * by a concurrent canceler, or {@link StorageLocation#release} fires on the static entry from a coordinator drop) + * while mount() is still in progress. Without this check, mount would commit local state for an entry the cache + * manager no longer knows about, leaking files on disk and memory mappings. Mirrors the same defensive check in + * {@code SegmentCacheEntry.mount}. Returns normally if rollback fires; callers detect via {@link #isMounted}. + */ + private void verifyStillReservedOrRollback(StorageLocation mountLocation) + { + if (!mountLocation.isReserved(id) && !mountLocation.isWeakReserved(id)) { + LOG.debug( + "Aborting mount of metadata entry[%s] in location[%s]; entry was evicted while mounting", + id, + mountLocation.getPath() + ); + unmount(); + } + } + + private void doMount(StorageLocation mountLocation) throws IOException + { + // The CAS+SettableFuture gate in mount() guarantees only one thread runs this method at a time per entry, so + // entryLock is only held briefly for state mutations. The slow PartialSegmentFileMapperV10.create() call (which + // may issue a deep-storage range read on first mount) runs outside entryLock so concurrent status reads are not + // blocked on it. adjustReservation also runs outside entryLock: StorageLocation.release goes + // writeLock -> entryLock (via release -> unmount), so entryLock -> writeLock here would be a deadlock-prone + // lock-order inversion. + entryLock.lock(); + try { + if (location != null && fileMapper != null) { + if (!location.equals(mountLocation)) { + throw DruidException.defensive( + "Already mounted[%s] in location[%s] which differs from requested[%s]", + id, + location.getPath(), + mountLocation.getPath() + ); + } + return; + } + } + finally { + entryLock.unlock(); + } + + final PartialSegmentFileMapperV10 mapper = PartialSegmentFileMapperV10.create( + rangeReader, + jsonMapper, + localCacheDir, + targetFilename, + externalFilenames + ); + + final long sizeToAdjust; + try { + final long actualSize = mapper.getOnDiskHeaderSize(); + if (actualSize > reservationEstimate) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build( + "Partial segment metadata for [%s] is [%d] bytes on disk, exceeding the " + + "configured reservation estimate of [%d] bytes. Increase " + + "druid.segmentCache.virtualStorageMetadataReservationEstimate.", + segmentId, + actualSize, + reservationEstimate + ); + } + sizeToAdjust = actualSize < reservationEstimate ? actualSize : -1; + + entryLock.lock(); + try { + location = mountLocation; + fileMapper = mapper; + // Install (or re-install, after a previous mount/unmount cycle terminated the prior Phaser) the + // reference-counted gate over cleanup. Future acquireReference() / unmount() calls operate on this instance. + references.set(new ReferenceCountingCloseableObject(this::doActualUnmount) {}); + } + finally { + entryLock.unlock(); + } + } + catch (Throwable t) { + // mount failed; close mmaps and delete the on-disk header files so a retry starts clean. Mirrors the eager + // SegmentCacheEntry behavior: simpler to redo a small header range-read than to reason about whatever partial + // on-disk state the failure left. Crash-mid-mount across JVM restarts is still handled by the mapper's own + // corruption recovery when bootstrap runs at next startup; this path covers the in-process retry case. + try { + mapper.close(); + } + catch (Throwable closeError) { + t.addSuppressed(closeError); + } + try { + deleteHeaderFiles(); + } + catch (Throwable deleteError) { + t.addSuppressed(deleteError); + } + throw t; + } + + // Only shrink the reservation if the entry is still registered with the location. If we lost the reservation + // mid-mount (concurrent canceler / drop), adjustReservation would throw; defer to the post-mount check in + // mount() to roll back cleanly instead. + if (sizeToAdjust >= 0 && (mountLocation.isReserved(id) || mountLocation.isWeakReserved(id))) { + mountLocation.adjustReservation(id, sizeToAdjust); + } + } + + private static void awaitMount(SettableFuture future) throws IOException + { + try { + future.get(); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while waiting for mount", e); + } + catch (ExecutionException e) { + final Throwable cause = e.getCause() == null ? e : e.getCause(); + switch (cause) { + case IOException ioException -> throw ioException; + case RuntimeException runtimeException -> throw runtimeException; + case Error error -> throw error; + default -> throw DruidException.defensive(e, "mount failed"); + } + } + } + + /** + * Triggers cleanup of this entry. If any references acquired via {@link #acquireReference()} are still outstanding, + * the actual unmap-and-delete work is deferred until the last reference releases; in that case this method returns + * immediately and {@link #doActualUnmount} will fire later on the thread that closes the last reference. With no + * outstanding references, cleanup runs synchronously on the caller's thread. + */ + @Override + public void unmount() + { + final ReferenceCountingCloseableObject current = references.get(); + if (current != null && !current.isClosed()) { + current.close(); + } + } + + /** + * Acquire a reference that keeps this entry's resources (the file mapper, on-disk header files) alive across an + * intervening {@link #unmount} call. The returned {@link Closeable} must be closed when the caller is done; at + * that point if {@code unmount()} has already been called and no other references remain, the deferred cleanup + * fires on the closing thread. + * + * @throws DruidException if the entry has never been mounted, or has already been cleaned up + */ + public Closeable acquireReference() + { + final ReferenceCountingCloseableObject current = references.get(); + if (current == null) { + throw DruidException.defensive( + "Cannot acquire reference on partial segment metadata entry[%s] before it has been mounted", + id + ); + } + return current.incrementReferenceAndDecrementOnceCloseable() + .orElseThrow(() -> DruidException.defensive( + "Cannot acquire reference on partial segment metadata entry[%s]; already being unmounted", + id + )); + } + + /** + * The actual unmount work, invoked by the reference-counted gate's {@code onAdvance} once every outstanding + * reference (plus the wrapper's own initial party) has been released. Closes the file mapper, deletes the on-disk + * header files (the entry owns its storage-location footprint), and runs the optional {@link #setOnUnmount + * onUnmount} hook. + */ + private void doActualUnmount() + { + final Runnable hook; + entryLock.lock(); + try { + if (fileMapper == null) { + return; + } + try { + fileMapper.close(); + } + catch (Throwable t) { + LOG.warn(t, "Failed to close partial segment file mapper for [%s]", segmentId); + } + fileMapper = null; + location = null; + // Clear the mount-dedup gate so a subsequent mount() on this same instance starts a fresh attempt. + mountFuture.set(null); + deleteHeaderFiles(); + hook = onUnmount.getAndSet(null); + } + finally { + entryLock.unlock(); + } + // Run the hook outside entryLock so it can touch the file system / cache manager without contending with + // concurrent status reads, and so a slow or buggy hook can't deadlock against acquireReference paths. + if (hook != null) { + try { + hook.run(); + } + catch (Throwable t) { + LOG.warn(t, "onUnmount hook failed for partial segment metadata entry[%s]", segmentId); + } + } + } + + /** + * Returns the file mapper held by this entry while mounted, or null if the entry has not been mounted. + */ + @Nullable + public PartialSegmentFileMapperV10 getFileMapper() + { + entryLock.lock(); + try { + return fileMapper; + } + finally { + entryLock.unlock(); + } + } + + /** + * Returns the parsed segment file metadata while mounted, or null if not yet mounted. + */ + @Nullable + public SegmentFileMetadata getSegmentFileMetadata() + { + final PartialSegmentFileMapperV10 mapper = getFileMapper(); + return mapper == null ? null : mapper.getSegmentFileMetadata(); + } + + /** + * Structural inference of the parent bundles that the given {@code bundleName} depends on within this segment. + * Single source of truth for both bootstrap (which post-filters by what's actually restorable on disk) and the + * query-time acquire path (which uses the result directly to seed + * {@link PartialSegmentBundleCacheEntry#forBundle}'s {@code parentEntryIds}). + *

+ * Today's rule is structural and trivial: any non-base bundle depends on the base bundle. The base bundle and the + * {@link SegmentFileBuilder#ROOT_BUNDLE_NAME root bundle} have no parents, the root bundle owns everything written + * without an explicit {@code startFileBundle} call (older fileGroup-less segments, or shared internal metadata) and + * is structurally a peer of the base. If future writers introduce richer dependency graphs, the rule will need to + * grow, likely by reading dependency metadata that the writer records explicitly rather than by inference here. + */ + public List inferParentBundles(String bundleName) + { + if (Projections.BASE_TABLE_PROJECTION_NAME.equals(bundleName) + || SegmentFileBuilder.ROOT_BUNDLE_NAME.equals(bundleName)) { + return List.of(); + } + return List.of( + new PartialSegmentBundleCacheEntryIdentifier( + segmentId, + Projections.BASE_TABLE_PROJECTION_NAME + ) + ); + } + + /** + * Register a bundle entry as a current dependent of this metadata entry. Called by + * {@link PartialSegmentBundleCacheEntry} after a successful mount; the drop path uses {@link #snapshotLinkedBundles} + * to enumerate dependents for cascade-close. + */ + void registerBundle(PartialSegmentBundleCacheEntry bundle) + { + linkedBundles.add(bundle); + } + + /** + * Reverse of {@link #registerBundle}. Called by {@link PartialSegmentBundleCacheEntry#unmount} so the metadata's + * view stays consistent with which bundles are actually mounted. + */ + void unregisterBundle(PartialSegmentBundleCacheEntry bundle) + { + linkedBundles.remove(bundle); + } + + /** + * Snapshot of bundle entries currently mounted against this segment. Returned as a defensive copy; callers can + * iterate freely without risk of concurrent-modification surprises while bundles concurrently mount/unmount. Used + * by the drop path to cascade-close bundles before releasing the metadata entry. + */ + public Collection snapshotLinkedBundles() + { + return new ArrayList<>(linkedBundles); + } + + /** + * Attach a deferred-cleanup hook to run when this entry is finally purged. {@link #doActualUnmount} invokes the + * hook after closing the file mapper and deleting the entry's storage-location files, outside the entry lock. + * Replaces any previously-set hook. Pass {@code null} to clear. + */ + public void setOnUnmount(@Nullable Runnable hook) + { + onUnmount.set(hook); + } + + /** + * Delete the on-disk header files this entry owns (main + any externals). Called from both + * {@link #doActualUnmount} on successful purge and the mount-failure cleanup path; safe to invoke independently of + * mount state. + */ + private void deleteHeaderFiles() + { + deleteIfExists(new File(localCacheDir, targetFilename + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX)); + for (String filename : externalFilenames) { + deleteIfExists(new File(localCacheDir, filename + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX)); + } + } + + private void deleteIfExists(File file) + { + if (file.exists() && !file.delete()) { + LOG.warn("Failed to delete header file[%s] during unmount of partial segment[%s]", file, segmentId); + } + } +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/ResizableCacheEntry.java b/server/src/main/java/org/apache/druid/segment/loading/ResizableCacheEntry.java new file mode 100644 index 000000000000..e5ce93e3e319 --- /dev/null +++ b/server/src/main/java/org/apache/druid/segment/loading/ResizableCacheEntry.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +/** + * Opt-in extension of {@link CacheEntry} that supports in-place adjustment of the reservation size after the entry is + * already registered with a {@link StorageLocation}. Used by entry types whose final size is not known at registration + * time and is determined later (e.g., a partial-segment metadata entry that reserves a pessimistic estimate up front + * and shrinks to the actual on-disk header size after the header has been downloaded). + *

+ * Implementations must mutate the field backing {@link #getSize()} so subsequent calls see the new size. Only + * {@link StorageLocation#adjustReservation(CacheEntryIdentifier, long)} should call {@link #resizeReservation(long)}; + * direct calls bypass the location's bookkeeping atomics and will leave reservation accounting incorrect. + */ +public interface ResizableCacheEntry extends CacheEntry +{ + /** + * Mutate this entry's size to {@code newSize}. If you are not calling this method from within + * {@link StorageLocation}, you should be calling {@link StorageLocation#adjustReservation} instead. + */ + void resizeReservation(long newSize); +} diff --git a/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheEntryIdentifier.java b/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheEntryIdentifier.java index c58e04a70302..6f982fc80beb 100644 --- a/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheEntryIdentifier.java +++ b/server/src/main/java/org/apache/druid/segment/loading/SegmentCacheEntryIdentifier.java @@ -21,36 +21,11 @@ import org.apache.druid.timeline.SegmentId; -import java.util.Objects; - /** - * Use a {@link SegmentId} as a {@link CacheEntryIdentifier} + * Use a {@link SegmentId} as a {@link CacheEntryIdentifier}. */ -public final class SegmentCacheEntryIdentifier implements CacheEntryIdentifier +public record SegmentCacheEntryIdentifier(SegmentId segmentId) implements CacheEntryIdentifier { - private final SegmentId segmentId; - - public SegmentCacheEntryIdentifier(SegmentId segmentId) - { - this.segmentId = segmentId; - } - - @Override - public boolean equals(Object o) - { - if (o == null || getClass() != o.getClass()) { - return false; - } - SegmentCacheEntryIdentifier that = (SegmentCacheEntryIdentifier) o; - return Objects.equals(segmentId, that.segmentId); - } - - @Override - public int hashCode() - { - return segmentId.hashCode(); - } - @Override public String toString() { diff --git a/server/src/main/java/org/apache/druid/segment/loading/SegmentLoaderConfig.java b/server/src/main/java/org/apache/druid/segment/loading/SegmentLoaderConfig.java index 3f259eb0e155..a8d3981ad70c 100644 --- a/server/src/main/java/org/apache/druid/segment/loading/SegmentLoaderConfig.java +++ b/server/src/main/java/org/apache/druid/segment/loading/SegmentLoaderConfig.java @@ -94,6 +94,17 @@ public class SegmentLoaderConfig @JsonProperty("virtualStorageIsEphemeral") private boolean virtualStorageIsEphemeral = false; + /** + * Up-front size reservation (in bytes) used when mounting a partial-segment metadata cache entry. The entry + * range-reads the V10 header from deep storage at mount time, then calls + * {@link StorageLocation#adjustReservation} to shrink to the actual on-disk size. If the actual header exceeds this + * estimate, the mount fails with an operator-facing error directing them to raise this value. Defaults to 16 MiB, + * which comfortably covers the metadata of typical V10 segments; outliers with many columns and/or projections may + * need a higher value. + */ + @JsonProperty("virtualStorageMetadataReservationEstimate") + private long virtualStorageMetadataReservationEstimate = 16L * 1024L * 1024L; + private long combinedMaxSize = 0; public List getLocations() @@ -181,6 +192,11 @@ public boolean isVirtualStorageEphemeral() return virtualStorageIsEphemeral; } + public long getVirtualStorageMetadataReservationEstimate() + { + return virtualStorageMetadataReservationEstimate; + } + public SegmentLoaderConfig setLocations(List locations) { this.locations = Lists.newArrayList(locations); @@ -234,6 +250,7 @@ public String toString() ", virtualStorageLoadThreads=" + virtualStorageLoadThreads + ", virtualStorageUseVirtualThreads=" + virtualStorageUseVirtualThreads + ", virtualStorageIsEphemeral=" + virtualStorageIsEphemeral + + ", virtualStorageMetadataReservationEstimate=" + virtualStorageMetadataReservationEstimate + ", combinedMaxSize=" + combinedMaxSize + '}'; } diff --git a/server/src/main/java/org/apache/druid/segment/loading/StorageLocation.java b/server/src/main/java/org/apache/druid/segment/loading/StorageLocation.java index 711d1a5d1021..67f23c2545b3 100644 --- a/server/src/main/java/org/apache/druid/segment/loading/StorageLocation.java +++ b/server/src/main/java/org/apache/druid/segment/loading/StorageLocation.java @@ -423,6 +423,78 @@ public ReservationHold addWeakReservationHold( } } + /** + * Adjusts the reservation size of an already-registered {@link ResizableCacheEntry} downward. Used when an entry's + * final size is not known at registration time (e.g. a partial-segment metadata entry that reserves a pessimistic + * estimate and shrinks to the actual on-disk header size once the header has been downloaded). Returns reclaimed + * capacity to the location's available budget; never triggers eviction. + *

+ * Throws if {@code newSize} is greater than the entry's current size: grow semantics require checking the location's + * available budget and possibly evicting other entries, and aren't needed by the current callers. + */ + public void adjustReservation(CacheEntryIdentifier id, long newSize) + { + lock.writeLock().lock(); + try { + final CacheEntry entry; + final WeakCacheEntry weak; + if (staticCacheEntries.containsKey(id)) { + entry = staticCacheEntries.get(id); + weak = null; + } else { + weak = weakCacheEntries.get(id); + if (weak == null) { + throw DruidException.defensive( + "Cannot adjust reservation for unknown cache entry[%s]", + id + ); + } + entry = weak.cacheEntry; + } + + if (!(entry instanceof ResizableCacheEntry)) { + throw DruidException.defensive( + "Cache entry[%s] of type[%s] does not support reservation adjustment", + id, + entry.getClass().getSimpleName() + ); + } + + final long oldSize = entry.getSize(); + final long delta = oldSize - newSize; + if (delta < 0) { + throw DruidException.defensive( + "Cannot grow reservation for cache entry[%s] from [%d] to [%d] bytes; only shrink is supported", + id, + oldSize, + newSize + ); + } + if (delta == 0) { + return; + } + + ((ResizableCacheEntry) entry).resizeReservation(newSize); + currSizeBytes.getAndAdd(-delta); + if (weak == null) { + currStaticSizeBytes.getAndAdd(-delta); + } else { + currWeakSizeBytes.getAndAdd(-delta); + // Each active hold contributed entry.getSize() to currHoldBytes via trackWeakHold; shrink each hold's + // contribution by the same delta so a future trackWeakRelease (which subtracts the new smaller size) lands + // on the correct total. Clamp at 0 defensively against any pre-existing drift. + final long activeHolds = weak.holdReferents.getRegisteredParties() - 1L; + if (activeHolds > 0) { + final long holdDelta = delta * activeHolds; + currHoldBytes.updateAndGet(v -> Math.max(0L, v - holdDelta)); + } + } + } + finally { + lock.writeLock().unlock(); + } + } + /** * Removes an item from {@link #staticCacheEntries}, reducing {@link #currSizeBytes} by {@link CacheEntry#getSize()}. * If the cache entry exists in {@link #weakCacheEntries}, it is left in place to be removed by diff --git a/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifierTest.java b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifierTest.java new file mode 100644 index 000000000000..831b03e76fa6 --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryIdentifierTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.timeline.SegmentId; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class PartialSegmentBundleCacheEntryIdentifierTest +{ + @Test + void testEqualsAndHashCode() + { + EqualsVerifier.forClass(PartialSegmentBundleCacheEntryIdentifier.class).usingGetClass().verify(); + } + + @Test + void testNotEqualToSegmentCacheEntryIdentifierWithSameSegmentId() + { + final SegmentId segmentId = SegmentId.of("ds", Intervals.of("2025/2026"), "v1", 0); + final PartialSegmentBundleCacheEntryIdentifier bundle = new PartialSegmentBundleCacheEntryIdentifier( + segmentId, + "__base" + ); + final SegmentCacheEntryIdentifier segment = new SegmentCacheEntryIdentifier(segmentId); + Assertions.assertNotEquals(bundle, segment); + Assertions.assertNotEquals(segment, bundle); + } +} diff --git a/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryTest.java b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryTest.java new file mode 100644 index 000000000000..e794c3abcd65 --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentBundleCacheEntryTest.java @@ -0,0 +1,832 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.io.Files; +import com.google.common.primitives.Ints; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.ListBasedInputRow; +import org.apache.druid.data.input.impl.AggregateProjectionSpec; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.LongDimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.FileUtils; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.concurrent.Execs; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.file.DirectoryBackedRangeReader; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilder; +import org.apache.druid.segment.file.SegmentFileBuilderV10; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import org.apache.druid.timeline.SegmentId; +import org.joda.time.DateTime; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +class PartialSegmentBundleCacheEntryTest +{ + private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper(); + private static final SegmentId SEGMENT_ID = SegmentId.of("test", Intervals.of("2025/2026"), "v1", 0); + private static final String AGG_BUNDLE = "dim1_metric1_sum"; + private static final long ESTIMATE = 16 * 1024 * 1024L; + + private static final DateTime TIME = DateTimes.of("2025-01-01"); + private static final RowSignature ROW_SIGNATURE = RowSignature.builder() + .add("dim1", ColumnType.STRING) + .add("metric1", ColumnType.LONG) + .build(); + + private static final List PROJECTIONS = Collections.singletonList( + AggregateProjectionSpec.builder(AGG_BUNDLE) + .groupingColumns(new StringDimensionSchema("dim1")) + .aggregators( + new LongSumAggregatorFactory("_metric1_sum", "metric1"), + new CountAggregatorFactory("_count") + ) + .build() + ); + + private static final List ROWS = Arrays.asList( + new ListBasedInputRow(ROW_SIGNATURE, TIME, ROW_SIGNATURE.getColumnNames(), Arrays.asList("a", 1L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(1), ROW_SIGNATURE.getColumnNames(), Arrays.asList("a", 2L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(2), ROW_SIGNATURE.getColumnNames(), Arrays.asList("b", 3L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(3), ROW_SIGNATURE.getColumnNames(), Arrays.asList("b", 4L)) + ); + + @TempDir + static File sharedTempDir; + + private static File segmentDir; + + @TempDir + File perTestTempDir; + + private File cacheDir; + private File deepStorageDir; + + @BeforeAll + static void buildSegment() + { + final File tmp = new File(sharedTempDir, "build_" + ThreadLocalRandom.current().nextInt()); + segmentDir = IndexBuilder.create() + .useV10() + .tmpDir(tmp) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + IncrementalIndexSchema.builder() + .withDimensionsSpec( + DimensionsSpec.builder() + .setDimensions( + List.of( + new StringDimensionSchema("dim1"), + new LongDimensionSchema("metric1") + ) + ) + .build() + ) + .withRollup(false) + .withMinTimestamp(TIME.getMillis()) + .withProjections(PROJECTIONS) + .build() + ) + .indexSpec(IndexSpec.builder().withMetadataCompression(CompressionStrategy.NONE).build()) + .rows(ROWS) + .buildMMappedIndexFile(); + } + + @BeforeEach + void setup() throws IOException + { + deepStorageDir = segmentDir; + cacheDir = new File(perTestTempDir, "cache_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(cacheDir); + } + + @Test + void testForBundleDerivesContainerIndicesAndSize() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertFalse(baseEntry.getContainerRefs().isEmpty()); + Assertions.assertTrue(baseEntry.getSize() > 0); + Assertions.assertEquals(SEGMENT_ID, baseEntry.getSegmentId()); + Assertions.assertEquals(Projections.BASE_TABLE_PROJECTION_NAME, baseEntry.getBundleName()); + } + + @Test + void testForBundleFailsIfMetadataNotMounted() + { + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertThrows( + DruidException.class, + () -> PartialSegmentBundleCacheEntry.forBundle(metadata, Projections.BASE_TABLE_PROJECTION_NAME, List.of()) + ); + } + + @Test + void testForBundleFailsIfBundleUnknown() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + Assertions.assertThrows( + DruidException.class, + () -> PartialSegmentBundleCacheEntry.forBundle(metadata, "no_such_bundle", List.of()) + ); + } + + @Test + void testMountSparseAllocatesContainerFiles() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + baseEntry.mount(location); + + Assertions.assertTrue(baseEntry.isMounted()); + for (PartialSegmentBundleCacheEntry.BundleContainerRef ref : baseEntry.getContainerRefs()) { + final String mapperFilename = + ref.externalFilename() != null ? ref.externalFilename() : IndexIO.V10_FILE_NAME; + final File containerFile = new File( + cacheDir, + StringUtils.format("%s.container.%05d", mapperFilename, ref.containerIndex()) + ); + Assertions.assertTrue(containerFile.exists(), "container " + ref + " should be sparse-allocated"); + } + } + + @Test + void testMountAcquiresParentHoldsForAggregateBundle() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + final var baseHold = location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry); + Assertions.assertNotNull(baseHold); + baseEntry.mount(location); + // close the bootstrap hold so cache could in principle evict, but the aggregate's transitive hold should keep it + baseHold.close(); + + final PartialSegmentBundleCacheEntry aggEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(baseEntry.getId()) + ); + final var aggHold = location.addWeakReservationHold(aggEntry.getId(), () -> aggEntry); + Assertions.assertNotNull(aggHold); + aggEntry.mount(location); + + // base must still be held by the aggregate's transitive hold; trying to reclaim its bytes should fail + final long baseSize = baseEntry.getSize(); + Assertions.assertTrue( + location.currentSizeBytes() >= baseSize, + "base entry size should remain charged to the location while held by the aggregate" + ); + + // unmounting the aggregate releases the parent hold; base is then evictable + aggHold.close(); + aggEntry.unmount(); + } + + @Test + void testMountFailsIfMetadataNotRegisteredWithLocation() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + // mount metadata standalone without registering with the location, so no hold can be acquired below + final File anotherDir = new File(perTestTempDir, "adhoc"); + FileUtils.mkdirp(anotherDir); + final StorageLocation otherLocation = new StorageLocation(anotherDir, ESTIMATE * 8, null); + Assertions.assertTrue(otherLocation.reserve(metadata)); + metadata.mount(otherLocation); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + + Assertions.assertThrows(DruidException.class, () -> baseEntry.mount(location)); + Assertions.assertFalse(baseEntry.isMounted()); + } + + @Test + void testMountIsIdempotent() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + + baseEntry.mount(location); + Assertions.assertTrue(baseEntry.isMounted()); + baseEntry.mount(location); + Assertions.assertTrue(baseEntry.isMounted()); + } + + @Test + void testUnmountEvictsContainersAndAllowsRemount() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + baseEntry.mount(location); + + // download a file so the bitmap has something set, then verify unmount clears the container file + final PartialSegmentFileMapperV10 mapper = metadata.getFileMapper(); + Assertions.assertNotNull(mapper); + final String anyFile = mapper.getInternalFilenames().stream().findFirst().orElseThrow(); + Assertions.assertNotNull(mapper.mapFile(anyFile)); + Assertions.assertEquals(1, mapper.getDownloadedFiles().size()); + final PartialSegmentBundleCacheEntry.BundleContainerRef evictedRef = baseEntry.getContainerRefs().getFirst(); + final String evictedMapperFilename = + evictedRef.externalFilename() != null ? evictedRef.externalFilename() : IndexIO.V10_FILE_NAME; + final File evictedFile = new File( + cacheDir, + StringUtils.format("%s.container.%05d", evictedMapperFilename, evictedRef.containerIndex()) + ); + Assertions.assertTrue(evictedFile.exists()); + + baseEntry.unmount(); + + Assertions.assertFalse(baseEntry.isMounted()); + Assertions.assertFalse(evictedFile.exists(), "container file should be deleted on unmount"); + + // remount works (e.g. after cache eviction + re-acquire) + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + baseEntry.mount(location); + Assertions.assertTrue(baseEntry.isMounted()); + } + + @Test + void testConcurrentMountIsDeduplicated() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + + final int threads = 8; + final CountDownLatch start = new CountDownLatch(1); + final CountDownLatch done = new CountDownLatch(threads); + final AtomicInteger errors = new AtomicInteger(); + final ExecutorService exec = Execs.multiThreaded(threads, "partial-segment-tests-%d"); + try { + for (int i = 0; i < threads; i++) { + exec.submit(() -> { + try { + start.await(); + baseEntry.mount(location); + } + catch (Throwable t) { + errors.incrementAndGet(); + } + finally { + done.countDown(); + } + }); + } + start.countDown(); + Assertions.assertTrue(done.await(30, TimeUnit.SECONDS)); + Assertions.assertEquals(0, errors.get()); + Assertions.assertTrue(baseEntry.isMounted()); + } + finally { + exec.shutdownNow(); + } + } + + @Test + void testFailedMountClearsGateForRetry() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + // ask for an aggregate entry but pass a parent that was never registered -> mount should fail + final PartialSegmentBundleCacheEntry agg = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(new PartialSegmentBundleCacheEntryIdentifier(SEGMENT_ID, "ghost")) + ); + Assertions.assertNotNull(location.addWeakReservationHold(agg.getId(), () -> agg)); + Assertions.assertThrows(DruidException.class, () -> agg.mount(location)); + Assertions.assertFalse(agg.isMounted()); + + // a subsequent retry with a valid parent should succeed (gate must have been cleared) + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + baseEntry.mount(location); + + final PartialSegmentBundleCacheEntry retry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(baseEntry.getId()) + ); + Assertions.assertNotNull(location.addWeakReservationHold(retry.getId(), () -> retry)); + retry.mount(location); + Assertions.assertTrue(retry.isMounted()); + } + + @Test + void testUnmountDefersContainerEvictionWhileReferenceHeld() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(base.getId(), () -> base)); + base.mount(location); + + // download a file so its container is materialized on disk + final PartialSegmentFileMapperV10 mapper = metadata.getFileMapper(); + Assertions.assertNotNull(mapper); + final String anyFile = mapper.getInternalFilenames().stream().findFirst().orElseThrow(); + Assertions.assertNotNull(mapper.mapFile(anyFile)); + final PartialSegmentBundleCacheEntry.BundleContainerRef containerRef = base.getContainerRefs().getFirst(); + final String mapperFilename = + containerRef.externalFilename() != null ? containerRef.externalFilename() : IndexIO.V10_FILE_NAME; + final File containerFile = new File( + cacheDir, + StringUtils.format("%s.container.%05d", mapperFilename, containerRef.containerIndex()) + ); + Assertions.assertTrue(containerFile.exists()); + + final Closeable ref = base.acquireReference(); + base.unmount(); + Assertions.assertTrue( + containerFile.exists(), + "container file should persist while a reference is held, even after unmount" + ); + Assertions.assertTrue(base.isMounted(), "bundle should not have been cleaned up while reference is held"); + + ref.close(); + Assertions.assertFalse(containerFile.exists(), "container file should be deleted after last reference releases"); + Assertions.assertFalse(base.isMounted()); + } + + @Test + void testForBundleAcceptsBundleNameContainingSlash() throws IOException + { + // Bundle names are matched by exact equality against the container's explicit bundle field, so names with '/' + // are unambiguous. forBundle should accept them as long as a container exists with that exact bundle name. + // Build a V10 segment with a slashy bundle name and verify the cache layer attributes containers correctly. + final File deepDir = writeSlashyGroupSegment("nested/group"); + final File cache = new File(perTestTempDir, "slashy_cache_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(cache); + final StorageLocation location = new StorageLocation(cache, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cache, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(deepDir), + JSON_MAPPER, + ESTIMATE + ); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry bundle = PartialSegmentBundleCacheEntry.forBundle( + metadata, + "nested/group", + List.of() + ); + Assertions.assertEquals("nested/group", bundle.getBundleName()); + Assertions.assertFalse(bundle.getContainerRefs().isEmpty()); + } + + @Test + void testForBundleSpansMainAndExternalContainers() throws IOException + { + // Bundle "proj1" lives in BOTH the main file and an external file. forBundle should pick up containers from + // both via the explicit bundle field, producing a single logical bundle spanning multiple physical files. + final String externalName = "ext.segment"; + final File deepDir = new File(perTestTempDir, "multi_deep_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(deepDir); + try (SegmentFileBuilderV10 builder = + SegmentFileBuilderV10.create(JSON_MAPPER, deepDir)) { + // Attach the external builder BEFORE startFileBundle so the group propagates to it. + final org.apache.druid.segment.file.SegmentFileBuilder external = builder.getExternalBuilder(externalName); + builder.startFileBundle("proj1"); + + final File mainTmp = new File(perTestTempDir, "main-col.bin"); + Files.write(Ints.toByteArray(1), mainTmp); + builder.add("proj1/main_col", mainTmp); + + final File extTmp = new File(perTestTempDir, "ext-col.bin"); + Files.write(Ints.toByteArray(2), extTmp); + external.add("proj1/ext_col", extTmp); + } + + final File cache = new File(perTestTempDir, "multi_cache_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(cache); + final StorageLocation location = new StorageLocation(cache, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cache, + IndexIO.V10_FILE_NAME, + List.of(externalName), + new DirectoryBackedRangeReader(deepDir), + JSON_MAPPER, + ESTIMATE + ); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry bundle = + PartialSegmentBundleCacheEntry.forBundle(metadata, "proj1", List.of()); + // Expect exactly two refs: one in main (externalFilename == null), one in external. + Assertions.assertEquals(2, bundle.getContainerRefs().size()); + final long mainRefCount = bundle.getContainerRefs().stream() + .filter(r -> r.externalFilename() == null).count(); + final long extRefCount = bundle.getContainerRefs().stream() + .filter(r -> externalName.equals(r.externalFilename())).count(); + Assertions.assertEquals(1, mainRefCount, "expected one main-file container ref"); + Assertions.assertEquals(1, extRefCount, "expected one external-file container ref"); + + // Mount the bundle and verify both containers are sparse-allocated under their respective targetFilenames. + Assertions.assertNotNull(location.addWeakReservationHold(bundle.getId(), () -> bundle)); + bundle.mount(location); + for (PartialSegmentBundleCacheEntry.BundleContainerRef ref : bundle.getContainerRefs()) { + final String mf = ref.externalFilename() != null ? ref.externalFilename() : IndexIO.V10_FILE_NAME; + final File cf = new File(cache, StringUtils.format("%s.container.%05d", mf, ref.containerIndex())); + Assertions.assertTrue(cf.exists(), "expected container file " + cf); + } + } + + @Test + void testForBundleRootOwnsAllUngroupedContainers() throws IOException + { + // A V10 segment written without any startFileBundle calls produces containers tagged with ROOT_BUNDLE_NAME. + // forBundle(ROOT_BUNDLE_NAME) must own every such container. + final File deepDir = writeRootOnlySegment(); + final File cache = new File(perTestTempDir, "root_cache_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(cache); + final StorageLocation location = new StorageLocation(cache, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cache, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(deepDir), + JSON_MAPPER, + ESTIMATE + ); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry root = PartialSegmentBundleCacheEntry.forBundle( + metadata, + SegmentFileBuilder.ROOT_BUNDLE_NAME, + List.of() + ); + Assertions.assertEquals( + metadata.getSegmentFileMetadata().getContainers().size(), + root.getContainerRefs().size(), + "root bundle should own every container in a no-startFileBundle segment" + ); + } + + @Test + void testMountRollsBackIfEntryNoLongerWeakReservedAtLocation() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + // ephemeral mode: when a hold drops and no others remain, the weak entry is evicted from weakCacheEntries + // immediately. Lets us simulate the race where the bundle's reservation goes away before mount() finishes. + location.setAreWeakEntriesEphemeral(true); + + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + + // Reserve + immediately release the bootstrap hold. In ephemeral mode this evicts the bundle from + // weakCacheEntries. The bundle itself was never mounted, so its `mounted` flag is still false. + try (StorageLocation.ReservationHold hold = location.addWeakReservationHold(base.getId(), () -> base)) { + Assertions.assertNotNull(hold); + } + Assertions.assertFalse(location.isWeakReserved(base.getId()), "ephemeral release should have evicted"); + + // Mount without re-reserving. doMount's work succeeds (parents are fine, containers sparse-allocate), but the + // post-mount check must detect that this entry is no longer in the location's weak map and roll back. + base.mount(location); + Assertions.assertFalse( + base.isMounted(), + "post-mount check should roll back when entry was evicted from the location during mount" + ); + } + + @Test + void testAcquireReferenceBeforeMountThrows() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertThrows(DruidException.class, base::acquireReference); + } + + @Test + void testAggregateBundleHoldsReferenceOnBaseAndMetadata() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(base.getId(), () -> base)); + base.mount(location); + + final PartialSegmentBundleCacheEntry agg = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(base.getId()) + ); + Assertions.assertNotNull(location.addWeakReservationHold(agg.getId(), () -> agg)); + agg.mount(location); + + // Unmount metadata while bundles are still mounted (and holding references on it). Metadata's actual cleanup + // must defer until the bundles' references on metadata are released. + final File headerFile = new File( + cacheDir, + IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + ); + Assertions.assertTrue(headerFile.exists()); + metadata.unmount(); + Assertions.assertTrue(headerFile.exists(), "metadata header must persist while bundles reference it"); + Assertions.assertTrue(metadata.isMounted(), "metadata must stay mounted while bundles reference it"); + + // Unmount base. Cleanup is deferred because agg still references it. + base.unmount(); + Assertions.assertTrue(base.isMounted(), "base must stay mounted while agg references it"); + Assertions.assertTrue(headerFile.exists(), "metadata still alive via agg's indirect chain"); + + // Unmount agg. Its cleanup fires (releasing references on base + metadata), which cascades base's cleanup + // (releasing its remaining reference on metadata), which finally fires metadata's cleanup. + agg.unmount(); + Assertions.assertFalse(agg.isMounted()); + Assertions.assertFalse(base.isMounted()); + Assertions.assertFalse(metadata.isMounted()); + Assertions.assertFalse(headerFile.exists(), "metadata header deleted after full cascade"); + } + + @Test + void testMountedBundleIsLinkedToMetadata() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + Assertions.assertTrue(metadata.snapshotLinkedBundles().isEmpty()); + + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(base.getId(), () -> base)); + base.mount(location); + + Assertions.assertEquals(1, metadata.snapshotLinkedBundles().size()); + Assertions.assertTrue(metadata.snapshotLinkedBundles().contains(base)); + + final PartialSegmentBundleCacheEntry agg = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(base.getId()) + ); + Assertions.assertNotNull(location.addWeakReservationHold(agg.getId(), () -> agg)); + agg.mount(location); + + Assertions.assertEquals(2, metadata.snapshotLinkedBundles().size()); + + // Unmounting base while agg still holds a dependency reference on it must NOT actually clean up base. agg's + // ref keeps base alive (deferred cleanup). Both remain in the linked set. + base.unmount(); + Assertions.assertTrue( + metadata.snapshotLinkedBundles().contains(base), + "base must stay linked while agg holds its reference" + ); + Assertions.assertTrue(metadata.snapshotLinkedBundles().contains(agg)); + + // Unmounting agg releases its reference on base; base's deferred cleanup then cascades on the same thread. + agg.unmount(); + Assertions.assertTrue(metadata.snapshotLinkedBundles().isEmpty()); + } + + @Test + void testFailedMountDoesNotLeaveDanglingLink() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry agg = PartialSegmentBundleCacheEntry.forBundle( + metadata, + AGG_BUNDLE, + List.of(new PartialSegmentBundleCacheEntryIdentifier(SEGMENT_ID, "ghost")) + ); + Assertions.assertNotNull(location.addWeakReservationHold(agg.getId(), () -> agg)); + Assertions.assertThrows(DruidException.class, () -> agg.mount(location)); + + Assertions.assertTrue(metadata.snapshotLinkedBundles().isEmpty(), "failed mount must not register a link"); + } + + @Test + void testUnmountIsIdempotent() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry metadata = newMetadataEntry(); + Assertions.assertTrue(location.reserve(metadata)); + metadata.mount(location); + + final PartialSegmentBundleCacheEntry baseEntry = PartialSegmentBundleCacheEntry.forBundle( + metadata, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + Assertions.assertNotNull(location.addWeakReservationHold(baseEntry.getId(), () -> baseEntry)); + baseEntry.mount(location); + baseEntry.unmount(); + baseEntry.unmount(); // no-op + } + + private PartialSegmentMetadataCacheEntry newMetadataEntry() + { + return new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + ESTIMATE + ); + } + + /** + * Build a small V10 segment using {@link SegmentFileBuilderV10} directly (i.e., + * without going through IndexMergerV10) with a single bundle whose name contains a {@code /}. Used to verify the + * cache layer attributes containers via the explicit {@code bundle} field and tolerates slashy names. + */ + private File writeSlashyGroupSegment(String groupName) throws IOException + { + final File deepDir = new File(perTestTempDir, "slashy_deep_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(deepDir); + try (SegmentFileBuilderV10 builder = + SegmentFileBuilderV10.create(JSON_MAPPER, deepDir)) { + builder.startFileBundle(groupName); + for (int i = 0; i < 3; i++) { + final File tmp = new File(perTestTempDir, "slashy-col" + i + ".bin"); + Files.write(Ints.toByteArray(i), tmp); + builder.add(groupName + "/col" + i, tmp); + } + } + return deepDir; + } + + /** + * Build a small V10 segment using {@link SegmentFileBuilderV10} directly with NO + * {@code startFileBundle} calls. Containers default to {@link SegmentFileBuilder#ROOT_BUNDLE_NAME}, simulating an + * older unnamed segment whose containers all live under the root bundle. + */ + private File writeRootOnlySegment() throws IOException + { + final File deepDir = new File(perTestTempDir, "root_deep_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(deepDir); + try (SegmentFileBuilderV10 builder = + SegmentFileBuilderV10.create(JSON_MAPPER, deepDir)) { + // Never call startFileBundle; all writes default to the root bundle. + for (int i = 0; i < 3; i++) { + final File tmp = new File(perTestTempDir, "root-col" + i + ".bin"); + Files.write(Ints.toByteArray(i), tmp); + builder.add("col" + i, tmp); + } + } + return deepDir; + } + +} diff --git a/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrapTest.java b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrapTest.java new file mode 100644 index 000000000000..93776d6a6cd5 --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentCacheBootstrapTest.java @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.ListBasedInputRow; +import org.apache.druid.data.input.impl.AggregateProjectionSpec; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.LongDimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.FileUtils; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.file.DirectoryBackedRangeReader; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import org.apache.druid.timeline.SegmentId; +import org.joda.time.DateTime; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.Collectors; + +class PartialSegmentCacheBootstrapTest +{ + private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper(); + private static final SegmentId SEGMENT_ID = SegmentId.of("test", Intervals.of("2025/2026"), "v1", 0); + private static final String AGG_BUNDLE = "dim1_metric1_sum"; + private static final long ESTIMATE = 16 * 1024 * 1024L; + + private static final DateTime TIME = DateTimes.of("2025-01-01"); + private static final RowSignature ROW_SIGNATURE = RowSignature.builder() + .add("dim1", ColumnType.STRING) + .add("metric1", ColumnType.LONG) + .build(); + + private static final List PROJECTIONS = Collections.singletonList( + AggregateProjectionSpec.builder(AGG_BUNDLE) + .groupingColumns(new StringDimensionSchema("dim1")) + .aggregators( + new LongSumAggregatorFactory("_metric1_sum", "metric1"), + new CountAggregatorFactory("_count") + ) + .build() + ); + + private static final List ROWS = Arrays.asList( + new ListBasedInputRow(ROW_SIGNATURE, TIME, ROW_SIGNATURE.getColumnNames(), Arrays.asList("a", 1L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(1), ROW_SIGNATURE.getColumnNames(), Arrays.asList("a", 2L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(2), ROW_SIGNATURE.getColumnNames(), Arrays.asList("b", 3L)), + new ListBasedInputRow(ROW_SIGNATURE, TIME.plusMinutes(3), ROW_SIGNATURE.getColumnNames(), Arrays.asList("b", 4L)) + ); + + @TempDir + static File sharedTempDir; + + private static File deepStorageDir; + + @TempDir + File perTestTempDir; + + private File cacheDir; + + @BeforeAll + static void buildSegment() + { + final File tmp = new File(sharedTempDir, "build_" + ThreadLocalRandom.current().nextInt()); + deepStorageDir = IndexBuilder.create() + .useV10() + .tmpDir(tmp) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + IncrementalIndexSchema.builder() + .withDimensionsSpec( + DimensionsSpec.builder() + .setDimensions( + List.of( + new StringDimensionSchema("dim1"), + new LongDimensionSchema("metric1") + ) + ) + .build() + ) + .withRollup(false) + .withMinTimestamp(TIME.getMillis()) + .withProjections(PROJECTIONS) + .build() + ) + .indexSpec(IndexSpec.builder().withMetadataCompression(CompressionStrategy.NONE).build()) + .rows(ROWS) + .buildMMappedIndexFile(); + } + + @BeforeEach + void setupPerTest() throws IOException + { + cacheDir = new File(perTestTempDir, "cache_" + ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE)); + FileUtils.mkdirp(cacheDir); + } + + @Test + void testRestoreRebuildsBothEntriesFromDisk() throws IOException + { + primeOnDiskState(); + + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentCacheBootstrap.RestoreResult result = PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ); + + Assertions.assertNotNull(result.getMetadata()); + Assertions.assertTrue(result.getMetadata().isMounted()); + // metadata entry size matches on-disk header size, NOT a pessimistic estimate + final long headerSize = new File( + cacheDir, + IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + ).length(); + Assertions.assertEquals(headerSize, result.getMetadata().getSize()); + + // bundles: should have at least __base; aggregate may or may not exist on disk depending on what we primed + Assertions.assertFalse(result.getBundles().isEmpty()); + final Set bundleNames = result.getBundles().stream() + .map(PartialSegmentBundleCacheEntry::getBundleName) + .collect(Collectors.toSet()); + Assertions.assertTrue(bundleNames.contains(Projections.BASE_TABLE_PROJECTION_NAME)); + Assertions.assertTrue(bundleNames.contains(AGG_BUNDLE)); + for (PartialSegmentBundleCacheEntry bundle : result.getBundles()) { + Assertions.assertTrue(bundle.isMounted(), "bundle " + bundle.getBundleName() + " should be mounted"); + } + } + + @Test + void testRestoreEstablishesParentHoldOnBase() throws IOException + { + primeOnDiskState(); + + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentCacheBootstrap.RestoreResult result = PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ); + + // aggregate bundle should declare __base as its parent + final PartialSegmentBundleCacheEntry agg = result.getBundles().stream() + .filter(b -> AGG_BUNDLE.equals(b.getBundleName())) + .findFirst().orElseThrow(); + Assertions.assertEquals(1, agg.getParentEntryIds().size()); + Assertions.assertEquals( + Projections.BASE_TABLE_PROJECTION_NAME, + agg.getParentEntryIds().get(0).bundleName() + ); + } + + @Test + void testRestoreSkipsBundlesWithMissingContainers() throws IOException + { + primeOnDiskState(); + + // remove the aggregate bundle's container file(s), base's containers stay + final PartialSegmentFileMapperV10 introspect = PartialSegmentFileMapperV10.create( + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of() + ); + final List aggContainers = new ArrayList<>(); + final String prefix = AGG_BUNDLE + "/"; + for (var entry : introspect.getSegmentFileMetadata().getFiles().entrySet()) { + if (entry.getKey().startsWith(prefix)) { + aggContainers.add(entry.getValue().getContainer()); + } + } + introspect.close(); + for (Integer ci : aggContainers) { + final File cf = new File(cacheDir, StringUtils.format("%s.container.%05d", IndexIO.V10_FILE_NAME, ci)); + // only delete if no other bundle shares this container + if (cf.exists()) { + cf.delete(); + } + } + + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentCacheBootstrap.RestoreResult result = PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ); + + // base should still be restored; aggregate is skipped because its containers were removed + final Set bundleNames = result.getBundles().stream() + .map(PartialSegmentBundleCacheEntry::getBundleName) + .collect(Collectors.toSet()); + Assertions.assertTrue(bundleNames.contains(Projections.BASE_TABLE_PROJECTION_NAME)); + // Note: base and aggregate may legitimately share a container (small test segment with sub-cap data), in that + // case both end up restored. Don't assert absence of aggregate; assert presence of base. + } + + @Test + void testRestoreDeletesOrphanedBundleAndSkipsIt() throws IOException + { + primeOnDiskState(); + + // Remove base's container files. After this, base is unrestorable on disk, which makes the aggregate (which + // depends on base) an orphan that must be deleted rather than restored in a degenerate state. + final PartialSegmentFileMapperV10 introspect = PartialSegmentFileMapperV10.create( + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of() + ); + final Set baseContainers = new HashSet<>(); + final Set aggContainers = new HashSet<>(); + for (var entry : introspect.getSegmentFileMetadata().getFiles().entrySet()) { + final String fileName = entry.getKey(); + final int slash = fileName.indexOf('/'); + if (slash < 0) { + continue; + } + final String group = fileName.substring(0, slash); + if (Projections.BASE_TABLE_PROJECTION_NAME.equals(group)) { + baseContainers.add(entry.getValue().getContainer()); + } else if (AGG_BUNDLE.equals(group)) { + aggContainers.add(entry.getValue().getContainer()); + } + } + introspect.close(); + + // Only delete base containers; we want to verify the bootstrap deletes the aggregate's containers itself. + for (Integer ci : baseContainers) { + // Skip containers that base shares with aggregate (test segment is small enough that they may share). + if (aggContainers.contains(ci)) { + continue; + } + final File cf = new File(cacheDir, StringUtils.format("%s.container.%05d", IndexIO.V10_FILE_NAME, ci)); + Assertions.assertTrue(cf.exists()); + Assertions.assertTrue(cf.delete()); + } + + // If base shared all its containers with aggregate, this test scenario isn't reachable skip in that case. + final Set orphanContainers = new HashSet<>(aggContainers); + orphanContainers.removeAll(baseContainers); + if (orphanContainers.isEmpty()) { + // base and aggregate share all containers; aggregate isn't truly orphaned. skip. + return; + } + + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentCacheBootstrap.RestoreResult result = PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ); + + // Neither base (containers missing) nor aggregate (orphaned, parent missing) should be restored. + final Set restoredNames = result.getBundles().stream() + .map(PartialSegmentBundleCacheEntry::getBundleName) + .collect(Collectors.toSet()); + Assertions.assertFalse(restoredNames.contains(AGG_BUNDLE), "orphan must not be restored"); + + // Aggregate's container files (those exclusively owned by it) must have been deleted by the orphan cleanup. + for (Integer ci : orphanContainers) { + final File cf = new File(cacheDir, StringUtils.format("%s.container.%05d", IndexIO.V10_FILE_NAME, ci)); + Assertions.assertFalse( + cf.exists(), + "orphan's exclusive container " + ci + " must have been deleted, but " + cf + " still exists" + ); + } + } + + @Test + void testRestoreRollsBackOnBundleReservationFailure() throws IOException + { + primeOnDiskState(); + + final File headerFile = new File( + cacheDir, + IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + ); + // size location exactly to the header size: metadata reservation fits, but the first bundle's weak reservation + // has 0 bytes of remaining budget and no weak entries to reclaim, so addWeakReservationHold returns null + final StorageLocation location = new StorageLocation(cacheDir, headerFile.length(), null); + + Assertions.assertThrows( + DruidException.class, + () -> PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ) + ); + + // rollback must release the metadata reservation and leave no static/weak entries behind + Assertions.assertEquals(0, location.currentSizeBytes(), "metadata reservation must be released on bootstrap failure"); + Assertions.assertFalse( + location.isReserved(new SegmentCacheEntryIdentifier(SEGMENT_ID)), + "metadata entry must be removed from the static map on bootstrap failure" + ); + Assertions.assertEquals(0, location.getWeakEntryCount(), "no bundle entries should linger on bootstrap failure"); + // rollback flows through location.release -> metadata.unmount, which unconditionally clears the entry's + // storage-location footprint. A subsequent acquire will cold-fetch from deep storage. + Assertions.assertFalse(headerFile.exists(), "bootstrap failure deletes the header via the unmount cleanup path"); + } + + @Test + void testRestoreFailsWhenHeaderMissing() + { + // no priming: cacheDir is empty + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 8, null); + Assertions.assertThrows( + DruidException.class, + () -> PartialSegmentCacheBootstrap.restoreFromDisk( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + JSON_MAPPER, + location + ) + ); + } + + @Test + void testIsPartialSegmentLayoutDetectsHeader() throws IOException + { + Assertions.assertFalse(PartialSegmentCacheBootstrap.isPartialSegmentLayout(cacheDir, IndexIO.V10_FILE_NAME)); + primeOnDiskState(); + Assertions.assertTrue(PartialSegmentCacheBootstrap.isPartialSegmentLayout(cacheDir, IndexIO.V10_FILE_NAME)); + Assertions.assertFalse(PartialSegmentCacheBootstrap.isPartialSegmentLayout(null, IndexIO.V10_FILE_NAME)); + Assertions.assertFalse(PartialSegmentCacheBootstrap.isPartialSegmentLayout( + new File(perTestTempDir, "nonexistent"), + IndexIO.V10_FILE_NAME + )); + } + + @Test + void testBitmapRepairClearsBitsForMissingContainers() throws IOException + { + primeOnDiskState(); + // download a file in the aggregate bundle to set a bit, then close (persists the bitmap) + final PartialSegmentFileMapperV10 mapper = PartialSegmentFileMapperV10.create( + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of() + ); + final String prefix = AGG_BUNDLE + "/"; + String fileInAgg = null; + int aggContainerIdx = -1; + for (var entry : mapper.getSegmentFileMetadata().getFiles().entrySet()) { + if (entry.getKey().startsWith(prefix)) { + fileInAgg = entry.getKey(); + aggContainerIdx = entry.getValue().getContainer(); + // ensure this container is exclusively the aggregate's, otherwise we won't be able to test + // the repair behavior; check that __base doesn't also use this container + final int idx = aggContainerIdx; + boolean baseSharesContainer = mapper.getSegmentFileMetadata().getFiles().entrySet().stream() + .anyMatch(e -> e.getKey().startsWith(Projections.BASE_TABLE_PROJECTION_NAME + "/") + && e.getValue().getContainer() == idx); + if (!baseSharesContainer) { + break; + } + fileInAgg = null; + } + } + if (fileInAgg == null) { + // small segment: base + aggregate share container 0. Repair behavior is then a no-op, just skip. + mapper.close(); + return; + } + Assertions.assertNotNull(mapper.mapFile(fileInAgg), "expected file " + fileInAgg + " to be downloadable"); + Assertions.assertTrue(mapper.getDownloadedFiles().contains(fileInAgg)); + mapper.close(); + + // now delete the aggregate container file out from under the bitmap + final File aggContainer = new File( + cacheDir, + StringUtils.format("%s.container.%05d", IndexIO.V10_FILE_NAME, aggContainerIdx) + ); + Assertions.assertTrue(aggContainer.delete()); + + // re-open the mapper: the bitmap-vs-container repair should clear the bit for the missing file + try (PartialSegmentFileMapperV10 restored = PartialSegmentFileMapperV10.create( + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of() + )) { + Assertions.assertFalse( + restored.getDownloadedFiles().contains(fileInAgg), + "bitmap repair should have cleared the bit for " + fileInAgg + ); + } + } + + /** + * Populate the per-segment cache dir with the on-disk artifacts a previous historical run would have left behind: + * the V10 header file plus sparse-allocated container files for the base and aggregate bundles. + */ + private void primeOnDiskState() throws IOException + { + final StorageLocation seedLocation = new StorageLocation(cacheDir, ESTIMATE * 8, null); + final PartialSegmentMetadataCacheEntry seedMeta = new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(deepStorageDir), + JSON_MAPPER, + ESTIMATE + ); + Assertions.assertTrue(seedLocation.reserve(seedMeta)); + seedMeta.mount(seedLocation); + + final PartialSegmentBundleCacheEntry base = PartialSegmentBundleCacheEntry.forBundle( + seedMeta, + Projections.BASE_TABLE_PROJECTION_NAME, + List.of() + ); + final var baseHold = seedLocation.addWeakReservationHold(base.getId(), () -> base); + Assertions.assertNotNull(baseHold); + base.mount(seedLocation); + + final PartialSegmentBundleCacheEntry agg = PartialSegmentBundleCacheEntry.forBundle( + seedMeta, + AGG_BUNDLE, + List.of(base.getId()) + ); + final var aggHold = seedLocation.addWeakReservationHold(agg.getId(), () -> agg); + Assertions.assertNotNull(aggHold); + agg.mount(seedLocation); + + // Leave on-disk state behind: unmount the bundles (which deletes container files!), that's the wrong final + // state. Instead, we want containers ON disk, so leave bundles mounted but close the file mapper. Since the + // restore path re-opens via PartialSegmentFileMapperV10.create which is idempotent w.r.t. on-disk files, + // un-mount on the SEED side AFTER files are sparse-allocated would also delete them. So we just leave the + // seed mounted: at test end @TempDir cleans up. + aggHold.close(); + baseHold.close(); + } + +} diff --git a/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntryTest.java b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntryTest.java new file mode 100644 index 000000000000..4beaf9ba4ffa --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/loading/PartialSegmentMetadataCacheEntryTest.java @@ -0,0 +1,423 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.io.Files; +import com.google.common.primitives.Ints; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.FileUtils; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.concurrent.Execs; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.file.CountingRangeReader; +import org.apache.druid.segment.file.DirectoryBackedRangeReader; +import org.apache.druid.segment.file.PartialSegmentFileMapperV10; +import org.apache.druid.segment.file.SegmentFileBuilderV10; +import org.apache.druid.segment.projections.Projections; +import org.apache.druid.timeline.SegmentId; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +class PartialSegmentMetadataCacheEntryTest +{ + private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper(); + private static final SegmentId SEGMENT_ID = SegmentId.of("test", Intervals.of("2025/2026"), "v1", 0); + private static final long ESTIMATE = 16 * 1024 * 1024L; + + @TempDir + File tempDir; + + private File segmentFile; + private File cacheDir; + + @BeforeEach + void setup() throws IOException + { + segmentFile = buildTestSegment(20); + cacheDir = new File(tempDir, "cache"); + FileUtils.mkdirp(cacheDir); + } + + @Test + void testMountFetchesHeaderAndShrinksReservation() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + Assertions.assertEquals(ESTIMATE, entry.getSize()); + Assertions.assertEquals(ESTIMATE, location.currentSizeBytes()); + + entry.mount(location); + + Assertions.assertTrue(entry.isMounted()); + Assertions.assertNotNull(entry.getFileMapper()); + Assertions.assertNotNull(entry.getSegmentFileMetadata()); + final long actualSize = entry.getSize(); + Assertions.assertTrue(actualSize > 0 && actualSize < ESTIMATE, "expected shrink, got " + actualSize); + Assertions.assertEquals(actualSize, location.currentSizeBytes()); + + final File headerFile = new File(cacheDir, IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX); + Assertions.assertTrue(headerFile.exists()); + Assertions.assertEquals(headerFile.length(), actualSize); + } + + @Test + void testMountFailsWhenActualExceedsEstimate() + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + // estimate of 8 bytes is way too small for any real V10 header + final PartialSegmentMetadataCacheEntry entry = newEntry(8); + Assertions.assertTrue(location.reserve(entry)); + + final DruidException thrown = Assertions.assertThrows( + DruidException.class, + () -> entry.mount(location) + ); + Assertions.assertTrue( + thrown.getMessage().contains("virtualStorageMetadataReservationEstimate"), + "expected operator-facing config hint, got: " + thrown.getMessage() + ); + Assertions.assertFalse(entry.isMounted()); + Assertions.assertNull(entry.getFileMapper()); + // reservation accounting is unchanged + Assertions.assertEquals(8, entry.getSize()); + Assertions.assertEquals(8, location.currentSizeBytes()); + // mount failure must delete the on-disk header so a retry starts clean (matches eager SegmentCacheEntry behavior) + final File headerFile = new File(cacheDir, IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX); + Assertions.assertFalse(headerFile.exists(), "mount failure must delete the on-disk header file"); + } + + @Test + void testMountIsIdempotentInSameLocation() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + + entry.mount(location); + final PartialSegmentFileMapperV10 firstMapper = entry.getFileMapper(); + Assertions.assertNotNull(firstMapper); + + entry.mount(location); + Assertions.assertSame(firstMapper, entry.getFileMapper()); + } + + @Test + void testMountInDifferentLocationThrows() throws IOException + { + final StorageLocation location1 = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final File otherDir = new File(tempDir, "other"); + FileUtils.mkdirp(otherDir); + final StorageLocation location2 = new StorageLocation(otherDir, ESTIMATE * 4, null); + + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location1.reserve(entry)); + entry.mount(location1); + + Assertions.assertThrows(DruidException.class, () -> entry.mount(location2)); + } + + @Test + void testUnmountClearsState() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + Assertions.assertTrue(entry.isMounted()); + + entry.unmount(); + + Assertions.assertFalse(entry.isMounted()); + Assertions.assertNull(entry.getFileMapper()); + Assertions.assertNull(entry.getSegmentFileMetadata()); + } + + @Test + void testUnmountIsIdempotent() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + entry.unmount(); + entry.unmount(); // second call is a no-op + } + + @Test + void testUnmountDeletesHeaderFile() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + + final File headerFile = new File(cacheDir, IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX); + Assertions.assertTrue(headerFile.exists()); + + entry.unmount(); + Assertions.assertFalse(headerFile.exists(), "unmount must delete the entry's storage-location header file"); + } + + @Test + void testOnUnmountHookRunsAfterStorageLocationCleanup() throws IOException + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + + final File headerFile = new File(cacheDir, IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX); + final AtomicReference headerExistsWhenHookFired = new AtomicReference<>(); + final AtomicReference hookFired = new AtomicReference<>(false); + entry.setOnUnmount(() -> { + hookFired.set(true); + headerExistsWhenHookFired.set(headerFile.exists()); + }); + + entry.unmount(); + Assertions.assertTrue(hookFired.get(), "onUnmount hook must run"); + Assertions.assertEquals( + Boolean.FALSE, + headerExistsWhenHookFired.get(), + "hook must observe header already deleted (storage-location cleanup runs first)" + ); + } + + @Test + void testConstructorRejectsNonPositiveEstimate() + { + Assertions.assertThrows( + DruidException.class, + () -> new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(segmentFile.getParentFile()), + JSON_MAPPER, + 0 + ) + ); + } + + @Test + void testGettersReturnNullBeforeMount() + { + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertFalse(entry.isMounted()); + Assertions.assertNull(entry.getFileMapper()); + Assertions.assertNull(entry.getSegmentFileMetadata()); + } + + @Test + void testUnmountDefersHeaderDeleteWhileReferenceHeld() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + + final File headerFile = new File( + cacheDir, + IndexIO.V10_FILE_NAME + PartialSegmentFileMapperV10.METADATA_HEADER_SUFFIX + ); + Assertions.assertTrue(headerFile.exists()); + + final Closeable ref = entry.acquireReference(); + Assertions.assertTrue(entry.isMounted()); + + entry.unmount(); + // Header file MUST persist while the reference is held, even though unmount has been called. + Assertions.assertTrue(headerFile.exists(), "header file should persist while reference is held"); + Assertions.assertTrue(entry.isMounted(), "fileMapper should not be closed while reference is held"); + + ref.close(); + // Last reference released, deferred cleanup fires on this thread. + Assertions.assertFalse(headerFile.exists(), "header file should be deleted after last reference releases"); + Assertions.assertFalse(entry.isMounted()); + } + + @Test + void testConcurrentMountIsDeduplicated() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final CountingRangeReader rangeReader = new CountingRangeReader(segmentFile.getParentFile()); + final PartialSegmentMetadataCacheEntry entry = new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + rangeReader, + JSON_MAPPER, + ESTIMATE + ); + Assertions.assertTrue(location.reserve(entry)); + + final int threads = 8; + final CountDownLatch start = new CountDownLatch(1); + final CountDownLatch done = new CountDownLatch(threads); + final AtomicInteger errors = new AtomicInteger(); + final ExecutorService exec = Execs.multiThreaded(threads, "partial-segment-tests-%d"); + try { + for (int i = 0; i < threads; i++) { + exec.submit(() -> { + try { + start.await(); + entry.mount(location); + } + catch (Throwable t) { + errors.incrementAndGet(); + } + finally { + done.countDown(); + } + }); + } + start.countDown(); + Assertions.assertTrue(done.await(30, TimeUnit.SECONDS)); + Assertions.assertEquals(0, errors.get()); + Assertions.assertTrue(entry.isMounted()); + // Dedup proof: even with 8 concurrent mount() callers, the slow PartialSegmentFileMapperV10.create() path + // (which range-reads the header) ran exactly once. Without CAS+SettableFuture dedup, every caller would + // serialize through entryLock and each would still skip the actual fetch (early-return on already-mounted), + // but the FIRST few callers racing past the pre-check would re-fetch, counting range reads is the cleanest + // way to assert the slow work was deduped end to end. + Assertions.assertEquals( + 1, + rangeReader.getHeaderReadCount(), + "expected exactly one range-read of the header across 8 concurrent mounters" + ); + } + finally { + exec.shutdownNow(); + } + } + + @Test + void testMountRollsBackIfEntryNoLongerReservedAtLocation() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + + // Externally evict the entry by releasing it (without going through entry.unmount() ourselves). This simulates + // the race where a hold/reservation gets dropped (concurrent cancellation, coordinator drop) and the location + // no longer knows about the entry by the time mount() finishes its work. + location.release(entry); + Assertions.assertFalse(entry.isMounted(), "release should have triggered cleanup"); + Assertions.assertFalse(location.isReserved(entry.getId())); + + // Call mount() again without re-reserving. doMount will succeed (on-disk header is still present and the + // file mapper opens), but the post-mount check should detect the missing reservation and roll back. + entry.mount(location); + Assertions.assertFalse( + entry.isMounted(), + "mount must roll back when post-mount check detects the entry is no longer reserved with the location" + ); + Assertions.assertEquals(0, location.currentSizeBytes(), "no reservation should linger after rollback"); + } + + @Test + void testAcquireReferenceBeforeMountThrows() + { + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertThrows(DruidException.class, entry::acquireReference); + } + + @Test + void testAcquireReferenceAfterCleanupCompletesThrows() throws Exception + { + final StorageLocation location = new StorageLocation(cacheDir, ESTIMATE * 4, null); + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertTrue(location.reserve(entry)); + entry.mount(location); + entry.unmount(); // no references; cleanup runs synchronously + Assertions.assertFalse(entry.isMounted()); + Assertions.assertThrows(DruidException.class, entry::acquireReference); + } + + @Test + void testInferParentBundlesForBaseReturnsEmpty() + { + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + Assertions.assertEquals( + List.of(), + entry.inferParentBundles(Projections.BASE_TABLE_PROJECTION_NAME) + ); + } + + @Test + void testInferParentBundlesForAggregateReturnsBase() + { + final PartialSegmentMetadataCacheEntry entry = newEntry(ESTIMATE); + final List parents = entry.inferParentBundles("some_aggregate_projection"); + Assertions.assertEquals(1, parents.size()); + Assertions.assertEquals(SEGMENT_ID, parents.getFirst().segmentId()); + Assertions.assertEquals( + Projections.BASE_TABLE_PROJECTION_NAME, + parents.getFirst().bundleName() + ); + } + + private PartialSegmentMetadataCacheEntry newEntry(long estimate) + { + return new PartialSegmentMetadataCacheEntry( + SEGMENT_ID, + cacheDir, + IndexIO.V10_FILE_NAME, + List.of(), + new DirectoryBackedRangeReader(segmentFile.getParentFile()), + JSON_MAPPER, + estimate + ); + } + + private File buildTestSegment(int numFiles) throws IOException + { + final File baseDir = new File(tempDir, "deep_storage"); + FileUtils.mkdirp(baseDir); + try (SegmentFileBuilderV10 builder = SegmentFileBuilderV10.create(JSON_MAPPER, baseDir, CompressionStrategy.NONE)) { + for (int i = 0; i < numFiles; ++i) { + File tmpFile = new File(tempDir, StringUtils.format("smoosh-%d.bin", i)); + Files.write(Ints.toByteArray(i), tmpFile); + builder.add(StringUtils.format("%d", i), tmpFile); + } + } + return new File(baseDir, IndexIO.V10_FILE_NAME); + } + +} diff --git a/server/src/test/java/org/apache/druid/segment/loading/StorageLocationTest.java b/server/src/test/java/org/apache/druid/segment/loading/StorageLocationTest.java index 30a4b69b97d1..2a0d8d858478 100644 --- a/server/src/test/java/org/apache/druid/segment/loading/StorageLocationTest.java +++ b/server/src/test/java/org/apache/druid/segment/loading/StorageLocationTest.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.loading; import com.google.common.collect.ImmutableMap; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.io.Closer; @@ -33,6 +34,7 @@ import org.junit.jupiter.api.io.TempDir; import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -414,6 +416,142 @@ public void testReclaimRestoreDoesNotCreateZombieEntries() hold2.close(); } + @Test + public void testAdjustReservationStaticEntry() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 50); + Assertions.assertTrue(location.reserve(entry)); + Assertions.assertEquals(50, location.currentSizeBytes()); + Assertions.assertEquals(50, location.availableSizeBytes()); + + location.adjustReservation(entry.getId(), 10); + Assertions.assertEquals(10, entry.getSize()); + Assertions.assertEquals(10, location.currentSizeBytes()); + Assertions.assertEquals(90, location.availableSizeBytes()); + + // after shrink, location can host new entries that wouldn't have fit at the original size + final TestResizableCacheEntry entry2 = new TestResizableCacheEntry("b", 80); + Assertions.assertTrue(location.reserve(entry2)); + + // release accounting still uses the (post-shrink) size + location.release(entry); + Assertions.assertEquals(80, location.currentSizeBytes()); + } + + @Test + public void testAdjustReservationWeakEntry() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 80); + Assertions.assertTrue(location.reserveWeak(entry)); + Assertions.assertEquals(80, location.currentWeakSizeBytes()); + + location.adjustReservation(entry.getId(), 30); + Assertions.assertEquals(30, entry.getSize()); + Assertions.assertEquals(30, location.currentWeakSizeBytes()); + Assertions.assertEquals(30, location.currentSizeBytes()); + } + + @Test + public void testAdjustReservationGrowThrows() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 30); + Assertions.assertTrue(location.reserve(entry)); + + Assertions.assertThrows( + DruidException.class, + () -> location.adjustReservation(entry.getId(), 60) + ); + // entry size and location accounting unchanged + Assertions.assertEquals(30, entry.getSize()); + Assertions.assertEquals(30, location.currentSizeBytes()); + } + + @Test + public void testAdjustReservationUnknownEntryThrows() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + Assertions.assertThrows( + DruidException.class, + () -> location.adjustReservation(new StringCacheIdentifier("nope"), 10) + ); + } + + @Test + public void testAdjustReservationNonResizableEntryThrows() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final CacheEntry entry = new TestCacheEntry("a", 30); + Assertions.assertTrue(location.reserve(entry)); + + Assertions.assertThrows( + DruidException.class, + () -> location.adjustReservation(entry.getId(), 10) + ); + } + + @Test + public void testAdjustReservationToSameSizeIsNoOp() + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 50); + Assertions.assertTrue(location.reserve(entry)); + + location.adjustReservation(entry.getId(), 50); + Assertions.assertEquals(50, entry.getSize()); + Assertions.assertEquals(50, location.currentSizeBytes()); + } + + @Test + public void testAdjustReservationWeakEntryShrinksHeldBytes() throws IOException + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 80); + Assertions.assertTrue(location.reserveWeak(entry)); + + // Acquire a hold BEFORE shrinking. trackWeakHold records 80 bytes against currHoldBytes. + final StorageLocation.ReservationHold hold = location.addWeakReservationHold(entry.getId(), () -> entry); + Assertions.assertNotNull(hold); + Assertions.assertEquals(1, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(80, location.getWeakStats().getHoldBytes()); + + // Shrink to 30: hold-bytes contribution from the active hold must shrink in lockstep so the eventual + // trackWeakRelease (which subtracts the new smaller size) leaves currHoldBytes at 0. + location.adjustReservation(entry.getId(), 30); + Assertions.assertEquals(30, entry.getSize()); + Assertions.assertEquals(30, location.currentWeakSizeBytes()); + Assertions.assertEquals(30, location.getWeakStats().getHoldBytes()); + + hold.close(); + Assertions.assertEquals(0, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(0, location.getWeakStats().getHoldBytes()); + } + + @Test + public void testAdjustReservationWeakEntryShrinksHeldBytesWithMultipleHolds() throws IOException + { + final StorageLocation location = new StorageLocation(tempDir, 100L, null); + final TestResizableCacheEntry entry = new TestResizableCacheEntry("a", 50); + Assertions.assertTrue(location.reserveWeak(entry)); + + // Two concurrent holds: trackWeakHold fires twice, so currHoldBytes = 2 * 50 = 100. + final StorageLocation.ReservationHold hold1 = location.addWeakReservationHold(entry.getId(), () -> entry); + final StorageLocation.ReservationHold hold2 = location.addWeakReservationHold(entry.getId(), () -> entry); + Assertions.assertEquals(2, location.getWeakStats().getHoldCount()); + Assertions.assertEquals(100, location.getWeakStats().getHoldBytes()); + + // Shrink by 30 (50 → 20): each of the two active holds contributes -30, so currHoldBytes drops by 60. + location.adjustReservation(entry.getId(), 20); + Assertions.assertEquals(40, location.getWeakStats().getHoldBytes()); + + hold1.close(); + Assertions.assertEquals(20, location.getWeakStats().getHoldBytes()); + hold2.close(); + Assertions.assertEquals(0, location.getWeakStats().getHoldBytes()); + } + @SuppressWarnings({"GuardedBy", "FieldAccessNotGuarded"}) private void verifyLoc(long maxSize, StorageLocation loc) { @@ -543,6 +681,55 @@ public void unmount() } } + private static final class TestResizableCacheEntry implements ResizableCacheEntry + { + private final StringCacheIdentifier id; + private long size; + private boolean isMounted = false; + + private TestResizableCacheEntry(String id, long size) + { + this.id = new StringCacheIdentifier(id); + this.size = size; + } + + @Override + public StringCacheIdentifier getId() + { + return id; + } + + @Override + public long getSize() + { + return size; + } + + @Override + public boolean isMounted() + { + return isMounted; + } + + @Override + public void mount(StorageLocation location) + { + isMounted = true; + } + + @Override + public void unmount() + { + isMounted = false; + } + + @Override + public void resizeReservation(long newSize) + { + this.size = newSize; + } + } + public static final class StringCacheIdentifier implements CacheEntryIdentifier { private final String string; From 06ef24c1e09a8000c91dd0767e09278b34066252 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sat, 23 May 2026 14:40:06 -0700 Subject: [PATCH 06/12] feat: ingest support for numeric typed ExpressionLambdaAggregatorFactory (#19508) --- docs/querying/aggregations.md | 6 +- .../embedded/compact/CompactionTaskTest.java | 90 ++++++ .../ExpressionLambdaAggregatorFactory.java | 82 +++++ .../ExpressionLambdaAggregationTest.java | 215 +++++++++++++ ...ExpressionLambdaAggregatorFactoryTest.java | 304 ++++++++++++++++++ 5 files changed, 696 insertions(+), 1 deletion(-) create mode 100644 processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregationTest.java diff --git a/docs/querying/aggregations.md b/docs/querying/aggregations.md index c7b7d4e4efc2..3add7863c46a 100644 --- a/docs/querying/aggregations.md +++ b/docs/querying/aggregations.md @@ -471,7 +471,11 @@ For these reasons, we have deprecated this aggregator and recommend using the Da ### Expression aggregator -Aggregator applicable only at query time. Aggregates results using [Druid expressions](./math-expr.md) functions to facilitate building custom functions. +Aggregates results using [Druid expressions](./math-expr.md) functions to facilitate building custom functions. + +The expression aggregator can be used at query time with any intermediate type. It can also be used at ingest time, but +only when the type of `initialValue` is a primitive numeric type (`LONG` or `DOUBLE`) and matches the type of +`initialCombineValue`. Other intermediate types, such as strings, arrays, and complex types, are query-time only. | Property | Description | Required | | --- | --- | --- | diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java index 84ee947c8467..4692ec0715f5 100644 --- a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java +++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTaskTest.java @@ -33,9 +33,12 @@ import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.jackson.JacksonUtils; import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory; import org.apache.druid.query.aggregation.datasketches.hll.HllSketchModule; import org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchModule; import org.apache.druid.query.aggregation.datasketches.theta.SketchModule; +import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; import org.apache.druid.segment.TestHelper; import org.apache.druid.testing.embedded.EmbeddedClusterApis; @@ -55,6 +58,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -107,6 +111,65 @@ public class CompactionTaskTest extends CompactionTestBase "namespace", "continent", "country", "region", "city", "timestamp" ); + /** + * Index task identical in shape to {@link MoreResources.Task#INDEX_TASK_WITH_AGGREGATORS} but with a pair of + * {@link ExpressionLambdaAggregatorFactory} metrics over the {@code added} long field. Used by + * {@link #testCompactionWithExpressionLambdaAggregator} to verify that an expression aggregator works correctly. + */ + private static final Supplier INDEX_TASK_WITH_EXPR_AGG = () -> + TaskBuilder + .ofTypeIndex() + .jsonInputFormat() + .localInputSourceWithFiles( + Resources.DataFile.tinyWiki1Json(), + Resources.DataFile.tinyWiki2Json(), + Resources.DataFile.tinyWiki3Json() + ) + .timestampColumn("timestamp") + .dimensions( + "page", + "language", "tags", "user", "unpatrolled", "newPage", "robot", + "anonymous", "namespace", "continent", "country", "region", "city" + ) + .metricAggregates( + new CountAggregatorFactory("ingested_events"), + new ExpressionLambdaAggregatorFactory( + "added_sum_expr", + Set.of("added"), + null, + "0", + null, + null, + false, + false, + "__acc + added", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ), + new ExpressionLambdaAggregatorFactory( + "added_or_expr", + Set.of("added"), + null, + "0", + null, + null, + false, + false, + "bitwiseOr(\"__acc\", \"added\")", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ) + ) + .dynamicPartitionWithMaxRows(3) + .granularitySpec("DAY", "SECOND", true) + .appendToExisting(false); + private String fullDatasourceName; @BeforeEach @@ -259,6 +322,33 @@ public void testCompactionWithTimestampDimension() throws Exception loadDataAndCompact(INDEX_TASK_WITH_TIMESTAMP.get(), COMPACTION_TASK.get(), null); } + @Test + public void testCompactionWithExpressionLambdaAggregator() throws Exception + { + try (final Closeable ignored = unloader(fullDatasourceName)) { + runTask(INDEX_TASK_WITH_EXPR_AGG.get()); + verifySegmentsCount(4); + + // Snapshot metric values prior to compaction. + final String preCompact = cluster.runSql( + "SELECT SUM(added_sum_expr), SUM(added_or_expr) FROM %s", + fullDatasourceName + ); + + // Compact 4 segments -> 2; this performs cross-segment rollup which drives RowCombiningTimeAndDimsIterator + // into ExpressionLambdaAggregatorFactory.makeAggregateCombiner(). + compactData(COMPACTION_TASK.get(), null, null); + verifySegmentsCount(2); + + // Metric values must round-trip through compaction unchanged. + final String postCompact = cluster.runSql( + "SELECT SUM(added_sum_expr), SUM(added_or_expr) FROM %s", + fullDatasourceName + ); + Assertions.assertEquals(preCompact, postCompact); + } + } + private void loadDataAndCompact( TaskBuilder.Index indexTask, TaskBuilder.Compact compactionResource, diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java index 3235d709eee6..c901b52962fc 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactory.java @@ -40,9 +40,11 @@ import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.virtual.ExpressionPlan; import org.apache.druid.segment.virtual.ExpressionPlanner; import org.apache.druid.segment.virtual.ExpressionSelectors; @@ -347,6 +349,86 @@ public Object combine(@Nullable Object lhs, @Nullable Object rhs) ).value(); } + @Override + public AggregateCombiner makeAggregateCombiner() + { + final ColumnType intermediateType = getIntermediateType(); + // The combiner delegates to combine(), which feeds inputs into combineExpression typed against initialCombineValue. + // If the fold-side intermediate type (what's stored in the segment column) differs from the combine-side type, + // the primitive selector would silently feed wrong-typed values into the expression. Fall through to UOE. + if (!intermediateType.equals(ExpressionType.toColumnType(initialCombineValue.get().type()))) { + return super.makeAggregateCombiner(); + } + if (intermediateType.is(ValueType.LONG)) { + return new LongAggregateCombiner() + { + private long state; + private boolean isNull; + + @Override + public void reset(ColumnValueSelector selector) + { + state = selector.getLong(); + isNull = selector.isNull(); + } + + @Override + public void fold(ColumnValueSelector selector) + { + final Object combined = combine(isNull ? null : state, selector.getObject()); + isNull = combined == null; + state = combined == null ? 0L : ((Number) combined).longValue(); + } + + @Override + public long getLong() + { + return state; + } + + @Override + public boolean isNull() + { + return isNull; + } + }; + } else if (intermediateType.is(ValueType.DOUBLE)) { + return new DoubleAggregateCombiner() + { + private double state; + private boolean isNull; + + @Override + public void reset(ColumnValueSelector selector) + { + state = selector.getDouble(); + isNull = selector.isNull(); + } + + @Override + public void fold(ColumnValueSelector selector) + { + final Object combined = combine(isNull ? null : state, selector.getObject()); + isNull = combined == null; + state = combined == null ? 0.0 : ((Number) combined).doubleValue(); + } + + @Override + public double getDouble() + { + return state; + } + + @Override + public boolean isNull() + { + return isNull; + } + }; + } + return super.makeAggregateCombiner(); + } + @Override public Object deserialize(Object object) { diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregationTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregationTest.java new file mode 100644 index 000000000000..baef2de3a0a2 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregationTest.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; +import org.apache.druid.query.Result; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexSegment; +import org.apache.druid.segment.Segment; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.apache.druid.timeline.SegmentId; +import org.apache.druid.utils.CloseableUtils; +import org.joda.time.DateTime; +import org.junit.After; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.util.Collections; +import java.util.List; +import java.util.Set; + +/** + * Verifies that {@link ExpressionLambdaAggregatorFactory} can be used as an ingest-time metric for primitive numeric + * types. + */ +public class ExpressionLambdaAggregationTest extends InitializedNullHandlingTest +{ + private static final String DIM = "groupKey"; + private static final String LONG_FIELD = "longField"; + private static final String DOUBLE_FIELD = "doubleField"; + private static final DateTime TIMESTAMP = DateTimes.of("2020-01-01"); + + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + private QueryableIndex mergedIndex; + private Segment segment; + + @After + public void tearDown() + { + if (segment != null) { + CloseableUtils.closeAndWrapExceptions(segment); + } + if (mergedIndex != null) { + CloseableUtils.closeAndWrapExceptions(mergedIndex); + } + } + + @Test + public void testNumericExpressionLambdaIngestRollupViaMerge() throws Exception + { + // Three rows sharing the same (timestamp, dim) so they roll up into a single output row during merge. + // longField values: 1 (0b001), 2 (0b010), 4 (0b100) -> sum=7, bitwiseOr=7 + // doubleField values: 1.5, 2.0, 0.25 -> sum=3.75 + final List rows = List.of( + row(1L, 1.5), + row(2L, 2.0), + row(4L, 0.25) + ); + + final ExpressionLambdaAggregatorFactory longSum = new ExpressionLambdaAggregatorFactory( + "long_sum", + Set.of(LONG_FIELD), + null, + "0", + null, + null, + false, + false, + "__acc + " + LONG_FIELD, + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + // BitwiseSqlAggregator-style: same single-field, op("__acc", field) fold + final ExpressionLambdaAggregatorFactory bitwiseOr = new ExpressionLambdaAggregatorFactory( + "bitwise_or", + ImmutableSet.of(LONG_FIELD), + null, + "0", + null, + null, + false, + false, + "bitwiseOr(\"__acc\", \"" + LONG_FIELD + "\")", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + final ExpressionLambdaAggregatorFactory doubleSum = new ExpressionLambdaAggregatorFactory( + "double_sum", + ImmutableSet.of(DOUBLE_FIELD), + null, + "0.0", + null, + null, + false, + false, + "__acc + " + DOUBLE_FIELD, + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + final IncrementalIndexSchema schema = IncrementalIndexSchema.builder() + .withQueryGranularity(Granularities.NONE) + .withRollup(true) + .withDimensionsSpec( + DimensionsSpec.builder() + .setDimensions(ImmutableList.of(new StringDimensionSchema(DIM))) + .build() + ) + .withMetrics( + new CountAggregatorFactory("count"), + longSum, + bitwiseOr, + doubleSum + ) + .build(); + + mergedIndex = IndexBuilder.create() + .tmpDir(tempFolder.newFolder()) + .schema(schema) + .intermediaryPersistSize(1) + .rows(rows) + .buildMMappedMergedIndex(); + + segment = new QueryableIndexSegment(mergedIndex, SegmentId.dummy("test")); + + final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .dataSource("test") + .granularity(Granularities.ALL) + .intervals("1970/2050") + .aggregators( + new LongSumAggregatorFactory("count", "count"), + longSum.getCombiningFactory(), + bitwiseOr.getCombiningFactory(), + doubleSum.getCombiningFactory() + ) + .build(); + + try (final AggregationTestHelper helper = + AggregationTestHelper.createTimeseriesQueryAggregationTestHelper(Collections.emptyList(), tempFolder)) { + + final Sequence> seq = helper.runQueryOnSegmentsObjs( + ImmutableList.of(segment), + query + ); + final TimeseriesResultValue result = Iterables.getOnlyElement(seq.toList()).getValue(); + + // Three input rows rolled up into one, count reflects rollup happened + Assert.assertEquals(3L, result.getLongMetric("count").longValue()); + Assert.assertEquals(7L, result.getLongMetric("long_sum").longValue()); + Assert.assertEquals(7L, result.getLongMetric("bitwise_or").longValue()); + Assert.assertEquals(3.75, result.getDoubleMetric("double_sum").doubleValue(), 0.0); + } + } + + private static InputRow row(long longVal, double doubleVal) + { + return new MapBasedInputRow( + TIMESTAMP, + ImmutableList.of(DIM), + ImmutableMap.of( + DIM, "a", + LONG_FIELD, longVal, + DOUBLE_FIELD, doubleVal + ) + ); + } +} diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java index 499bcef08fe2..29bf850d3d44 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/ExpressionLambdaAggregatorFactoryTest.java @@ -24,24 +24,31 @@ import com.google.common.collect.ImmutableSet; import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.druid.java.util.common.HumanReadableBytes; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.query.Druids; import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.query.timeseries.TimeseriesQuery; import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.selector.TestColumnValueSelector; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import javax.annotation.Nullable; import java.io.IOException; +import java.util.Arrays; +import java.util.List; public class ExpressionLambdaAggregatorFactoryTest extends InitializedNullHandlingTest { @@ -545,6 +552,303 @@ public void testComplexTypeFinalized() Assert.assertEquals(ColumnType.DOUBLE, agg.getResultType()); } + @Test + public void testLongAggregateCombiner() + { + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0", + null, + true, + false, + false, + "__acc + x", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + AggregateCombiner combiner = agg.makeAggregateCombiner(); + TestColumnValueSelector selector = TestColumnValueSelector.of( + Long.class, + Arrays.asList(1L, 2L, 3L) + ); + selector.advance(); + combiner.reset(selector); + Assert.assertEquals(1L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(3L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(6L, combiner.getLong()); + } + + @Test + public void testDoubleAggregateCombiner() + { + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0.0", + null, + true, + false, + false, + "__acc + x", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + AggregateCombiner combiner = agg.makeAggregateCombiner(); + TestColumnValueSelector selector = TestColumnValueSelector.of( + Double.class, + Arrays.asList(1.5, 2.25, 0.25) + ); + selector.advance(); + combiner.reset(selector); + Assert.assertEquals(1.5, combiner.getDouble(), 0.0); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(3.75, combiner.getDouble(), 0.0); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(4.0, combiner.getDouble(), 0.0); + } + + @Test + public void testNullableAggregateCombinerSkipsNulls() + { + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0", + null, + true, + false, + false, + "__acc + x", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + AggregateCombiner combiner = agg.makeNullableAggregateCombiner(); + NullableLongSelector selector = new NullableLongSelector(Arrays.asList(null, 5L, null, 7L)); + selector.advance(); + combiner.reset(selector); + Assert.assertTrue(combiner.isNull()); + + selector.advance(); + combiner.fold(selector); + Assert.assertFalse(combiner.isNull()); + Assert.assertEquals(5L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(5L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(12L, combiner.getLong()); + } + + @Test + public void testNullableAggregateCombinerWhenCombineAggregatesNullsExpressionSeesNulls() + { + // shouldCombineAggregateNullInputs=true means the combine expression sees null inputs directly. The expression + // itself is responsible for handling them; here `nvl` coalesces nulls to 0 so the accumulator keeps advancing. + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0", + null, + true, + true, + true, + "nvl(__acc, 0) + nvl(x, 0)", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + AggregateCombiner combiner = agg.makeNullableAggregateCombiner(); + NullableLongSelector selector = new NullableLongSelector(Arrays.asList(1L, null, 3L)); + selector.advance(); + combiner.reset(selector); + Assert.assertEquals(1L, combiner.getLong()); + + // null is passed through to the expression, which coalesces to 0 + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(1L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertEquals(4L, combiner.getLong()); + } + + @Test + public void testNullableAggregateCombinerNullExpressionResultPropagates() + { + // shouldCombineAggregateNullInputs=true with an expression that doesn't handle nulls: `__acc + null` evaluates + // to null in Druid expression semantics, and the combiner reports isNull accordingly. + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0", + null, + true, + true, + true, + "__acc + x", + null, + null, + null, + null, + TestExprMacroTable.INSTANCE + ); + + AggregateCombiner combiner = agg.makeNullableAggregateCombiner(); + NullableLongSelector selector = new NullableLongSelector(Arrays.asList(1L, null)); + selector.advance(); + combiner.reset(selector); + Assert.assertFalse(combiner.isNull()); + Assert.assertEquals(1L, combiner.getLong()); + + selector.advance(); + combiner.fold(selector); + Assert.assertTrue(combiner.isNull()); + } + + + private static final class NullableLongSelector implements ColumnValueSelector + { + private final List values; + private int index = -1; + + NullableLongSelector(List values) + { + this.values = values; + } + + void advance() + { + index++; + } + + @Override + public long getLong() + { + Long v = values.get(index); + return v == null ? 0L : v; + } + + @Override + public double getDouble() + { + return getLong(); + } + + @Override + public float getFloat() + { + return getLong(); + } + + @Override + public boolean isNull() + { + return values.get(index) == null; + } + + @Nullable + @Override + public Long getObject() + { + return values.get(index); + } + + @Override + public Class classOfObject() + { + return Long.class; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + } + } + + @Test(expected = UOE.class) + public void testAggregateCombinerNotSupportedForNonNumericTypes() + { + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "''", + "''", + true, + true, + true, + "concat(__acc, x)", + "concat(__acc, expr_agg_name)", + null, + null, + new HumanReadableBytes(2048), + TestExprMacroTable.INSTANCE + ); + + agg.makeAggregateCombiner(); + } + + @Test(expected = UOE.class) + public void testAggregateCombinerNotSupportedWhenFoldAndCombineTypesDiffer() + { + // fold seed is LONG (intermediate column type), but combine seed is LONG_ARRAY — combining a long segment column + // with an expression that expects arrays would silently produce wrong values, so the combiner refuses to handle it. + ExpressionLambdaAggregatorFactory agg = new ExpressionLambdaAggregatorFactory( + "expr_agg_name", + ImmutableSet.of("x"), + null, + "0", + "ARRAY[]", + null, + false, + false, + "__acc + x", + "array_set_add(__acc, expr_agg_name)", + null, + null, + new HumanReadableBytes(2048), + TestExprMacroTable.INSTANCE + ); + + Assert.assertEquals(ColumnType.LONG, agg.getIntermediateType()); + agg.makeAggregateCombiner(); + } + @Test public void testResultArraySignature() { From 177cada0f3470a1db64ff781fc89bde2764b14b3 Mon Sep 17 00:00:00 2001 From: Shekhar Prasad Rajak <5774448+Shekharrajak@users.noreply.github.com> Date: Tue, 26 May 2026 00:34:27 +0530 Subject: [PATCH 07/12] fix: OrcInputFormat concurrent FileSystem init race condition (#19491) (#19497) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OrcInputFormat.initialize() — which swaps Thread.currentThread().setContextClassLoader() and calls FileSystem.get(conf) — was invoked on every createReader() call. When a ParallelIndexTask runs multiple ORC subtasks concurrently in the same JVM (as in embedded tests) --- .../druid/data/input/orc/OrcInputFormat.java | 19 +++++++++++-------- .../data/input/orc/OrcInputFormatTest.java | 1 + 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java b/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java index 7474a79a15eb..96bfc03214dc 100644 --- a/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java +++ b/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcInputFormat.java @@ -36,12 +36,14 @@ import java.io.File; import java.io.IOException; import java.util.Objects; +import java.util.concurrent.atomic.AtomicBoolean; public class OrcInputFormat extends NestedInputFormat { static final long SCALE_FACTOR = 8L; private final boolean binaryAsString; private final Configuration conf; + private final AtomicBoolean fileSystemInitialized = new AtomicBoolean(false); @JsonCreator public OrcInputFormat( @@ -55,19 +57,20 @@ public OrcInputFormat( this.conf = conf; } - private void initialize(Configuration conf) + // Init FileSystem once under this class's classloader to avoid concurrent setContextClassLoader races. + private void ensureFileSystemInitialized() { - //Initializing seperately since during eager initialization, resolving - //namenode hostname throws an error if nodes are ephemeral - - // Ensure that FileSystem class level initialization happens with correct CL - // See https://github.com/apache/druid/issues/1714 - ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader(); + if (!fileSystemInitialized.compareAndSet(false, true)) { + return; + } + final ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); FileSystem.get(conf); } catch (IOException ex) { + // Reset so a subsequent createReader can retry init instead of skipping it. + fileSystemInitialized.set(false); throw new RuntimeException(ex); } finally { @@ -91,7 +94,7 @@ public boolean getBinaryAsString() @Override public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity source, File temporaryDirectory) { - initialize(conf); + ensureFileSystemInitialized(); return new OrcReader(conf, inputRowSchema, source, temporaryDirectory, getFlattenSpec(), binaryAsString); } diff --git a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java index a7f6e5131c3a..555d1de2c998 100644 --- a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java +++ b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcInputFormatTest.java @@ -73,6 +73,7 @@ public void testEquals() { EqualsVerifier.forClass(OrcInputFormat.class) .withPrefabValues(Configuration.class, new Configuration(), new Configuration()) + .withIgnoredFields("fileSystemInitialized") .usingGetClass() .verify(); } From ab12df66fbe63bd892ade1302f11e718508515c8 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 27 May 2026 17:21:53 -0700 Subject: [PATCH 08/12] feat: add getDimensionRangeSet support to LikeDimFilter for equality and prefix cases (#19524) --- .../druid/query/filter/LikeDimFilter.java | 53 +++++++ .../druid/query/filter/LikeDimFilterTest.java | 131 ++++++++++++++++++ 2 files changed, 184 insertions(+) diff --git a/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java index b5f67595ffa4..96668b306886 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java @@ -24,10 +24,13 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableRangeSet; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.io.BaseEncoding; import com.google.common.primitives.Chars; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.segment.filter.LikeFilter; @@ -154,6 +157,20 @@ public Filter toFilter() @Override public RangeSet getDimensionRangeSet(String dimension) { + if (!this.dimension.equals(dimension) || extractionFn != null) { + return null; + } + final LikeDimFilter.LikeMatcher.SuffixMatch suffixMatch = likeMatcher.getSuffixMatch(); + final String prefix = likeMatcher.getPrefix(); + if (suffixMatch == LikeMatcher.SuffixMatch.MATCH_EMPTY) { + // The full pattern was a literal (no wildcards); LIKE acts as equality on `prefix`. + return ImmutableRangeSet.of(Range.singleton(prefix)); + } + if (suffixMatch == LikeMatcher.SuffixMatch.MATCH_ANY) { + // LIKE 'prefix%' matches every string starting with `prefix`; bare LIKE '%' matches everything + return ImmutableRangeSet.of(prefix.isEmpty() ? Range.all() : prefixRange(prefix)); + } + // mid-string wildcards aren't expressible as a single Range. return null; } @@ -197,6 +214,42 @@ public String toString() return builder.appendFilterTuning(filterTuning).build(); } + /** + * Range covering every string that starts with {@code prefix} + */ + public static Range prefixRange(String prefix) + { + if (prefix.isEmpty()) { + throw DruidException.defensive("prefix is empty; use Range.all() explicitly for the match-everything case"); + } + final String successor = lexicographicSuccessor(prefix); + return successor == null ? Range.atLeast(prefix) : Range.closedOpen(prefix, successor); + } + + /** + * Smallest string strictly greater than {@code s} in lexicographic (UTF-16) order: increment the last + * non-{@link Character#MAX_VALUE} char and truncate everything after it. Returns {@code null} when {@code s} + * is a non-empty run of {@code MAX_VALUE} chars and the carry would overflow. + */ + @Nullable + @VisibleForTesting + static String lexicographicSuccessor(String s) + { + if (s.isEmpty()) { + return "\u0000"; + } + final char[] chars = s.toCharArray(); + int i = chars.length - 1; + while (i >= 0 && chars[i] == Character.MAX_VALUE) { + i--; + } + if (i < 0) { + return null; + } + chars[i]++; + return new String(chars, 0, i + 1); + } + public static class LikeMatcher { public enum SuffixMatch diff --git a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java index afa450bc471c..d122963f2efc 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/LikeDimFilterTest.java @@ -20,8 +20,11 @@ package org.apache.druid.query.filter; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableRangeSet; +import com.google.common.collect.Range; import com.google.common.collect.Sets; import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.error.DruidException; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.query.extraction.SubstringDimExtractionFn; import org.apache.druid.segment.column.ColumnIndexSupplier; @@ -322,6 +325,134 @@ public void testPatternFindsCorrectMiddleMatch() assertMatch("1 _ 5%6", "1 2 3 1 4 5 6", DruidPredicateMatch.FALSE); } + @Test + public void testGetDimensionRangeSet_literalPattern() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "bar", null, null); + Assert.assertEquals( + ImmutableRangeSet.of(Range.singleton("bar")), + filter.getDimensionRangeSet("foo") + ); + } + + @Test + public void testGetDimensionRangeSet_prefixPattern() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "bar%", null, null); + Assert.assertEquals( + ImmutableRangeSet.of(Range.closedOpen("bar", "bas")), + filter.getDimensionRangeSet("foo") + ); + } + + @Test + public void testGetDimensionRangeSet_midPatternWildcard_returnsNull() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "bar%baz", null, null); + Assert.assertNull(filter.getDimensionRangeSet("foo")); + } + + @Test + public void testGetDimensionRangeSet_suffixPattern_returnsNull() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "%bar", null, null); + Assert.assertNull(filter.getDimensionRangeSet("foo")); + } + + @Test + public void testGetDimensionRangeSet_singleWildcard_returnsAll() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "%", null, null); + Assert.assertEquals( + ImmutableRangeSet.of(Range.all()), + filter.getDimensionRangeSet("foo") + ); + } + + @Test + public void testGetDimensionRangeSet_otherDimension_returnsNull() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "bar%", null, null); + Assert.assertNull(filter.getDimensionRangeSet("other")); + } + + @Test + public void testGetDimensionRangeSet_withExtractionFn_returnsNull() + { + final LikeDimFilter filter = new LikeDimFilter("foo", "bar%", null, new SubstringDimExtractionFn(0, 3)); + Assert.assertNull(filter.getDimensionRangeSet("foo")); + } + + @Test + public void testPrefixRange_singleLowercaseChar() + { + Assert.assertEquals(Range.closedOpen("foo", "fop"), LikeDimFilter.prefixRange("foo")); + } + + @Test + public void testPrefixRange_uppercaseCarryStaysWithinAscii() + { + Assert.assertEquals(Range.closedOpen("foZ", "fo["), LikeDimFilter.prefixRange("foZ")); + } + + @Test + public void testPrefixRange_trailingMaxValue_carriesPastIt() + { + Assert.assertEquals( + Range.closedOpen("foo￿", "fop"), + LikeDimFilter.prefixRange("foo￿") + ); + } + + @Test + public void testPrefixRange_allMaxValue_fallsBackToAtLeast() + { + Assert.assertEquals(Range.atLeast("￿￿"), LikeDimFilter.prefixRange("￿￿")); + } + + @Test + public void testPrefixRange_empty_throws() + { + Assert.assertThrows(DruidException.class, () -> LikeDimFilter.prefixRange("")); + } + + @Test + public void testPrefixRange_enclosesAllPrefixedStrings() + { + final Range range = LikeDimFilter.prefixRange("foo"); + Assert.assertTrue(range.contains("foo")); + Assert.assertTrue(range.contains("foo0")); + Assert.assertTrue(range.contains("foobar")); + Assert.assertTrue(range.contains("foozzz")); + Assert.assertFalse(range.contains("fo")); + Assert.assertFalse(range.contains("fop")); + Assert.assertFalse(range.contains("fox")); + } + + @Test + public void testLexicographicSuccessor_basic() + { + Assert.assertEquals("fop", LikeDimFilter.lexicographicSuccessor("foo")); + } + + @Test + public void testLexicographicSuccessor_empty_returnsNullChar() + { + Assert.assertEquals("\u0000", LikeDimFilter.lexicographicSuccessor("")); + } + + @Test + public void testLexicographicSuccessor_singleMaxValue_returnsNull() + { + Assert.assertNull(LikeDimFilter.lexicographicSuccessor("￿")); + } + + @Test + public void testLexicographicSuccessor_trailingMaxValues_truncatedAndCarried() + { + Assert.assertEquals("fop", LikeDimFilter.lexicographicSuccessor("foo￿￿")); + } + private void assertCompilation(String pattern, String expected) { LikeDimFilter.LikeMatcher matcher = LikeDimFilter.LikeMatcher.from(pattern, '\\'); From 7bdbc2d6b8100c930ce65a48676e1bc9f567b00e Mon Sep 17 00:00:00 2001 From: Virushade Date: Thu, 28 May 2026 20:46:38 +0800 Subject: [PATCH 09/12] Remove raw types in ChainedExecutionQueryRunner (#19529) --- .../org/apache/druid/query/ChainedExecutionQueryRunner.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java b/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java index 74f2ffc634a3..fc56504b90e9 100644 --- a/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java +++ b/processing/src/main/java/org/apache/druid/query/ChainedExecutionQueryRunner.java @@ -79,7 +79,7 @@ public Sequence run(final QueryPlus queryPlus, final ResponseContext respo { Query query = queryPlus.getQuery(); final int priority = query.context().getPriority(); - final Ordering ordering = query.getResultOrdering(); + final Ordering ordering = query.getResultOrdering(); final QueryPlus threadSafeQueryPlus = queryPlus.withoutThreadUnsafeState(); final QueryContext context = query.context(); @@ -91,7 +91,7 @@ public Sequence run(final QueryPlus queryPlus, final ResponseContext respo @Override public Iterator make() { - // Make it a List<> to materialize all of the values (so that it will submit everything to the executor) + // Make it a List<> to materialize all the values (so that it will submit everything to the executor) List>> futures = Lists.newArrayList( Iterables.transform( From 5ba191be9f689a13a16001b3f1b20cc31bd559dd Mon Sep 17 00:00:00 2001 From: Virushade Date: Thu, 28 May 2026 20:47:03 +0800 Subject: [PATCH 10/12] perf: disable processing thread renaming by default (#19518) * Add default value for thread enabling * Peon disable thread renaming * Add benchmark query types * Add groupby benchmark * Specify query type * Docs for thread --- .../SinkQuerySegmentWalkerBenchmark.java | 358 ++++++++++++++++-- docs/querying/query-context-reference.md | 3 +- .../spec/SpecificSegmentQueryRunner.java | 3 +- .../spec/SpecificSegmentQueryRunnerTest.java | 99 ++++- 4 files changed, 427 insertions(+), 36 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java index 6b36d72c672e..777dd297ecb9 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/SinkQuerySegmentWalkerBenchmark.java @@ -19,29 +19,87 @@ package org.apache.druid.benchmark; +import com.fasterxml.jackson.databind.InjectableValues; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.druid.client.cache.CacheConfig; +import org.apache.druid.client.cache.CachePopulatorStats; +import org.apache.druid.client.cache.MapCache; import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.guice.BuiltInTypesModule; +import org.apache.druid.indexer.granularity.UniformGranularitySpec; +import org.apache.druid.jackson.AggregatorsModule; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.java.util.emitter.core.LoggingEmitter; import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.DefaultGenericQueryMetricsFactory; +import org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate; import org.apache.druid.query.Druids; +import org.apache.druid.query.ForwardingQueryProcessingPool; +import org.apache.druid.query.Query; import org.apache.druid.query.QueryPlus; -import org.apache.druid.query.Result; +import org.apache.druid.query.QueryRunnerFactory; +import org.apache.druid.query.QueryRunnerFactoryConglomerate; +import org.apache.druid.query.QueryRunnerTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.context.ResponseContext; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTest; +import org.apache.druid.query.groupby.TestGroupByBuffers; +import org.apache.druid.query.metadata.SegmentMetadataQueryConfig; +import org.apache.druid.query.metadata.SegmentMetadataQueryQueryToolChest; +import org.apache.druid.query.metadata.SegmentMetadataQueryRunnerFactory; +import org.apache.druid.query.metadata.metadata.ListColumnIncluderator; +import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.apache.druid.query.policy.NoopPolicyEnforcer; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.scan.ScanQueryConfig; +import org.apache.druid.query.scan.ScanQueryEngine; +import org.apache.druid.query.scan.ScanQueryQueryToolChest; +import org.apache.druid.query.scan.ScanQueryRunnerFactory; +import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.query.timeseries.TimeseriesQuery; -import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.query.timeseries.TimeseriesQueryEngine; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.IndexMerger; +import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.incremental.ParseExceptionHandler; +import org.apache.druid.segment.incremental.RowIngestionMeters; +import org.apache.druid.segment.incremental.SimpleRowIngestionMeters; +import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.indexing.TuningConfig; +import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.segment.realtime.appenderator.Appenderator; +import org.apache.druid.segment.realtime.appenderator.AppenderatorConfig; +import org.apache.druid.segment.realtime.appenderator.Appenderators; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; -import org.apache.druid.segment.realtime.appenderator.StreamAppenderatorTester; +import org.apache.druid.segment.realtime.appenderator.TestAppenderatorConfig; import org.apache.druid.segment.realtime.sink.Committers; +import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import org.apache.druid.server.coordination.NoopDataSegmentAnnouncer; +import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.LinearShardSpec; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -59,8 +117,11 @@ import org.openjdk.jmh.infra.Blackhole; import java.io.File; +import java.net.URI; import java.util.Arrays; import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; @State(Scope.Benchmark) @@ -71,6 +132,18 @@ @OutputTimeUnit(TimeUnit.MILLISECONDS) public class SinkQuerySegmentWalkerBenchmark { + private static final String DATASOURCE = "foo"; + private static final List QUERY_COLUMNS = ImmutableList.of("__time", "dim", "count", "met"); + private static final MultipleIntervalSegmentSpec QUERY_INTERVALS = + new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2000/2001"))); + private static final String SET_PROCESSING_THREAD_NAMES = "setProcessingThreadNames"; + + @Param({"timeseries", "scan", "segmentMetadata", "groupBy"}) + private String queryType; + + @Param({"false", "true"}) + private boolean setProcessingThreadNames; + @Param({"10", "50", "100", "200"}) private int numFireHydrants; @@ -78,24 +151,66 @@ public class SinkQuerySegmentWalkerBenchmark private final ServiceEmitter serviceEmitter = new ServiceEmitter("test", "test", loggingEmitter); private File cacheDir; + private ExecutorService queryExecutor; private Appenderator appenderator; + private TestGroupByBuffers groupByBuffers; @Setup(Level.Trial) public void setup() throws Exception { final String userConfiguredCacheDir = System.getProperty("druid.benchmark.cacheDir", System.getenv("DRUID_BENCHMARK_CACHE_DIR")); cacheDir = new File(userConfiguredCacheDir); - final StreamAppenderatorTester tester = - new StreamAppenderatorTester.Builder().maxRowsInMemory(1) - .basePersistDirectory(cacheDir) - .withServiceEmitter(serviceEmitter) - .build(); + FileUtils.deleteDirectory(cacheDir); + final ObjectMapper objectMapper = makeObjectMapper(); + final IndexIO indexIO = new IndexIO( + objectMapper, + new ColumnConfig() + { + } + ); + final IndexMergerV9 indexMerger = new IndexMergerV9( + objectMapper, + indexIO, + OffHeapMemorySegmentWriteOutMediumFactory.instance() + ); + final DataSchema schema = makeDataSchema(); + final RowIngestionMeters rowIngestionMeters = new SimpleRowIngestionMeters(); + final AppenderatorConfig tuningConfig = makeTuningConfig(); + + queryExecutor = Execs.singleThreaded("queryExecutor(%d)"); + groupByBuffers = TestGroupByBuffers.createDefault(); - appenderator = tester.getAppenderator(); + serviceEmitter.start(); + EmittingLogger.registerEmitter(serviceEmitter); + + final QueryRunnerFactoryConglomerate conglomerate = makeQueryRunnerFactoryConglomerate(); + appenderator = Appenderators.createRealtime( + null, + schema.getDataSource(), + schema, + tuningConfig, + new SegmentGenerationMetrics(), + makeDataSegmentPusher(), + objectMapper, + indexIO, + indexMerger, + conglomerate, + new NoopDataSegmentAnnouncer(), + serviceEmitter, + new ForwardingQueryProcessingPool(queryExecutor), + MapCache.create(2048), + new CacheConfig(), + new CachePopulatorStats(), + NoopPolicyEnforcer.instance(), + rowIngestionMeters, + new ParseExceptionHandler(rowIngestionMeters, false, Integer.MAX_VALUE, 0), + CentralizedDatasourceSchemaConfig.create(), + interval -> {} + ); appenderator.startJob(); final SegmentIdWithShardSpec segmentIdWithShardSpec = new SegmentIdWithShardSpec( - StreamAppenderatorTester.DATASOURCE, + DATASOURCE, Intervals.of("2000/2001"), "A", new LinearShardSpec(0) @@ -119,33 +234,214 @@ public void setup() throws Exception @TearDown(Level.Trial) public void tearDown() throws Exception { - appenderator.close(); - FileUtils.deleteDirectory(cacheDir); + try { + if (appenderator != null) { + appenderator.close(); + } + } + finally { + if (queryExecutor != null) { + queryExecutor.shutdownNow(); + } + try { + if (groupByBuffers != null) { + groupByBuffers.close(); + } + } + finally { + FileUtils.deleteDirectory(cacheDir); + } + } } @Benchmark @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void emitSinkMetrics(Blackhole blackhole) throws Exception + public void runSinkQuery(Blackhole blackhole) throws Exception { - { - final TimeseriesQuery query1 = Druids.newTimeseriesQueryBuilder() - .dataSource(StreamAppenderatorTester.DATASOURCE) - .intervals(ImmutableList.of(Intervals.of("2000/2001"))) - .aggregators( - Arrays.asList( - new LongSumAggregatorFactory("count", "count"), - new LongSumAggregatorFactory("met", "met") - ) - ) - .granularity(Granularities.DAY) - .build(); - - final List> results = - QueryPlus.wrap(query1).run(appenderator, ResponseContext.createEmpty()).toList(); - blackhole.consume(results); - - serviceEmitter.flush(); + final Query query = makeQuery(); + final List results = QueryPlus.wrap(query).run(appenderator, ResponseContext.createEmpty()).toList(); + blackhole.consume(results); + + serviceEmitter.flush(); + } + + private Query makeQuery() + { + switch (queryType) { + case "timeseries": + return makeTimeseriesQuery(); + case "scan": + return makeScanQuery(); + case "segmentMetadata": + return makeSegmentMetadataQuery(); + case "groupBy": + return makeGroupByQuery(); + default: + throw new IllegalStateException("Unsupported query type[" + queryType + "]"); } } + + private QueryRunnerFactoryConglomerate makeQueryRunnerFactoryConglomerate() + { + return DefaultQueryRunnerFactoryConglomerate.buildFromQueryRunnerFactories( + ImmutableMap., QueryRunnerFactory>builder() + .put( + TimeseriesQuery.class, + new TimeseriesQueryRunnerFactory( + new TimeseriesQueryQueryToolChest(), + new TimeseriesQueryEngine(), + QueryRunnerTestHelper.NOOP_QUERYWATCHER + ) + ) + .put( + ScanQuery.class, + new ScanQueryRunnerFactory( + new ScanQueryQueryToolChest(DefaultGenericQueryMetricsFactory.instance()), + new ScanQueryEngine(), + new ScanQueryConfig() + ) + ) + .put( + SegmentMetadataQuery.class, + new SegmentMetadataQueryRunnerFactory( + new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()), + QueryRunnerTestHelper.NOOP_QUERYWATCHER + ) + ) + .put( + GroupByQuery.class, + GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig(), groupByBuffers) + ) + .build() + ); + } + + private TimeseriesQuery makeTimeseriesQuery() + { + return Druids.newTimeseriesQueryBuilder() + .dataSource(DATASOURCE) + .intervals(QUERY_INTERVALS) + .aggregators(makeAggregators()) + .granularity(Granularities.DAY) + .context(makeQueryContext()) + .build(); + } + + private ScanQuery makeScanQuery() + { + return Druids.newScanQueryBuilder() + .dataSource(DATASOURCE) + .intervals(QUERY_INTERVALS) + .columns(QUERY_COLUMNS) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(makeQueryContext()) + .build(); + } + + private SegmentMetadataQuery makeSegmentMetadataQuery() + { + return Druids.newSegmentMetadataQueryBuilder() + .dataSource(DATASOURCE) + .intervals(QUERY_INTERVALS) + .toInclude(new ListColumnIncluderator(QUERY_COLUMNS)) + .analysisTypes( + SegmentMetadataQuery.AnalysisType.CARDINALITY, + SegmentMetadataQuery.AnalysisType.SIZE, + SegmentMetadataQuery.AnalysisType.INTERVAL, + SegmentMetadataQuery.AnalysisType.MINMAX, + SegmentMetadataQuery.AnalysisType.AGGREGATORS + ) + .merge(true) + .context(makeQueryContext()) + .build(); + } + + private GroupByQuery makeGroupByQuery() + { + return GroupByQuery.builder() + .setDataSource(DATASOURCE) + .setInterval("2000/2001") + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(makeAggregators()) + .setContext(makeQueryContext()) + .build(); + } + + private List makeAggregators() + { + return Arrays.asList( + new LongSumAggregatorFactory("count", "count"), + new LongSumAggregatorFactory("met", "met") + ); + } + + private Map makeQueryContext() + { + return ImmutableMap.of(SET_PROCESSING_THREAD_NAMES, setProcessingThreadNames); + } + + private static ObjectMapper makeObjectMapper() + { + final ObjectMapper objectMapper = new DefaultObjectMapper(); + objectMapper.registerSubtypes(LinearShardSpec.class); + objectMapper.registerModules(new AggregatorsModule()); + objectMapper.registerModules(new BuiltInTypesModule().getJacksonModules()); + objectMapper.setInjectableValues( + new InjectableValues.Std() + .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE) + .addValue(ObjectMapper.class.getName(), objectMapper) + ); + return objectMapper; + } + + private static DataSchema makeDataSchema() + { + return DataSchema.builder() + .withDataSource(DATASOURCE) + .withTimestamp(new TimestampSpec("ts", "auto", null)) + .withDimensions(DimensionsSpec.EMPTY) + .withAggregators( + new CountAggregatorFactory("count"), + new LongSumAggregatorFactory("met", "met") + ) + .withGranularity(new UniformGranularitySpec(Granularities.MINUTE, Granularities.NONE, null)) + .build(); + } + + private AppenderatorConfig makeTuningConfig() + { + return new TestAppenderatorConfig( + TuningConfig.DEFAULT_APPENDABLE_INDEX, + 1, + Runtime.getRuntime().totalMemory() / 3, + false, + IndexSpec.getDefault(), + 0, + false, + 0L, + OffHeapMemorySegmentWriteOutMediumFactory.instance(), + IndexMerger.UNLIMITED_MAX_COLUMNS_TO_MERGE, + cacheDir, + false + ); + } + + private static DataSegmentPusher makeDataSegmentPusher() + { + return new DataSegmentPusher() + { + @Override + public DataSegment push(File file, DataSegment segment, boolean useUniquePath) + { + return segment; + } + + @Override + public Map makeLoadSpec(URI uri) + { + throw new UnsupportedOperationException(); + } + }; + } } diff --git a/docs/querying/query-context-reference.md b/docs/querying/query-context-reference.md index c485c0231c06..41bd206199e7 100644 --- a/docs/querying/query-context-reference.md +++ b/docs/querying/query-context-reference.md @@ -68,7 +68,7 @@ Unless otherwise noted, the following parameters apply to all query types, and t |`useFilterCNF`|`false`| If true, Druid will attempt to convert the query filter to Conjunctive Normal Form (CNF). During query processing, columns can be pre-filtered by intersecting the bitmap indexes of all values that match the eligible filters, often greatly reducing the raw number of rows which need to be scanned. But this effect only happens for the top level filter, or individual clauses of a top level 'and' filter. As such, filters in CNF potentially have a higher chance to utilize a large amount of bitmap indexes on string columns during pre-filtering. However, this setting should be used with great caution, as it can sometimes have a negative effect on performance, and in some cases, the act of computing CNF of a filter can be expensive. We recommend hand tuning your filters to produce an optimal form if possible, or at least verifying through experimentation that using this parameter actually improves your query performance with no ill-effects.| |`secondaryPartitionPruning`|`true`|Enable secondary partition pruning on the Broker. The Broker will always prune unnecessary segments from the input scan based on a filter on time intervals, but if the data is further partitioned with hash or range partitioning, this option will enable additional pruning based on a filter on secondary partition dimensions.| |`debug`| `false` | Flag indicating whether to enable debugging outputs for the query. When set to false, no additional logs will be produced (logs produced will be entirely dependent on your logging level). When set to true, the following addition logs will be produced:
- Log the stack trace of the exception (if any) produced by the query | -|`setProcessingThreadNames`|`true`| Whether processing thread names will be set to `queryType_dataSource_intervals` while processing a query. This aids in interpreting thread dumps, and is on by default. Query overhead can be reduced slightly by setting this to `false`. This has a tiny effect in most scenarios, but can be meaningful in high-QPS, low-per-segment-processing-time scenarios. | +|`setProcessingThreadNames`|`false`| Flag indicating whether processing thread names will be set to `processing_` while processing a query. Thread renaming aids in interpreting thread dumps, but has measurable thread renaming overhead when segment scans are very quick. | |`sqlPlannerBloat`|`1000`|Calcite parameter which controls whether to merge two Project operators when inlining expressions causes complexity to increase. Implemented as a workaround to exception `There are not enough rules to produce a node with desired properties: convention=DRUID, sort=[]` thrown after rejecting the merge of two projects.| |`cloneQueryMode`|`excludeClones`| Indicates whether clone Historicals should be queried by brokers. Clone servers are created by the `cloneServers` Coordinator dynamic configuration. Possible values are `excludeClones`, `includeClones` and `preferClones`. `excludeClones` means that clone Historicals are not queried by the broker. `preferClones` indicates that when given a choice between the clone Historical and the original Historical which is being cloned, the broker chooses the clones. Historicals which are not involved in the cloning process will still be queried. `includeClones` means that broker queries any Historical without regarding clone status. This parameter only affects native queries. MSQ does not query Historicals directly.| |`realtimeSegmentsMode` |`include`| Controls whether realtime segments are queried. `include` queries all segments, including realtime. `exclude` skips realtime segments. `exclusive` queries only realtime segments. | @@ -140,4 +140,3 @@ For more information, see the following topics: - [Set query context](./query-context.md) to learn how to configure query context parameters. - [SQL query context](sql-query-context.md) for query context parameters specific to Druid SQL. - [SQL-based ingestion reference](../multi-stage-query/reference/#context-parameters) for context parameters used in SQL-based ingestion (MSQ). - diff --git a/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java b/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java index 7f1a37f61e63..f888bf87e473 100644 --- a/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java +++ b/processing/src/main/java/org/apache/druid/query/spec/SpecificSegmentQueryRunner.java @@ -48,6 +48,7 @@ public class SpecificSegmentQueryRunner implements QueryRunner @VisibleForTesting static final String CTX_SET_THREAD_NAME = "setProcessingThreadNames"; + static final boolean DEFAULT_SET_THREAD_NAME_ENABLED = false; public SpecificSegmentQueryRunner( QueryRunner base, @@ -68,7 +69,7 @@ public Sequence run(final QueryPlus input, final ResponseContext responseC ) ); - final boolean setName = input.getQuery().context().getBoolean(CTX_SET_THREAD_NAME, true); + final boolean setName = input.getQuery().context().getBoolean(CTX_SET_THREAD_NAME, DEFAULT_SET_THREAD_NAME_ENABLED); final Query query = queryPlus.getQuery(); diff --git a/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java index 5ab3783869ed..0101ce990b49 100644 --- a/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/spec/SpecificSegmentQueryRunnerTest.java @@ -51,6 +51,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; public class SpecificSegmentQueryRunnerTest { @@ -182,8 +184,6 @@ public void run() new CountAggregatorFactory("rows") ) ) - // Do one test with CTX_SET_THREAD_NAME = false. - .context(ImmutableMap.of(SpecificSegmentQueryRunner.CTX_SET_THREAD_NAME, false)) .build(); Sequence results = queryRunner.run(QueryPlus.wrap(query), responseContext); List> res = results.toList(); @@ -197,6 +197,101 @@ public void run() validate(mapper, descriptor, responseContext); } + @Test + public void testSetThreadName() + { + assertThreadNameDuringProcessing(null, "original-test-thread"); + assertThreadNameDuringProcessing(false, "original-test-thread"); + assertThreadNameDuringProcessing(true, "processing_thread-name-query"); + } + + private void assertThreadNameDuringProcessing( + final Boolean setProcessingThreadNames, + final String expectedThreadNameDuringProcessing + ) + { + final String originalThreadName = Thread.currentThread().getName(); + + try { + Thread.currentThread().setName("original-test-thread"); + + final AtomicReference runnerThreadName = new AtomicReference<>(); + final AtomicReference sequenceThreadName = new AtomicReference<>(); + final Result value = makeResult(); + final SegmentDescriptor descriptor = new SegmentDescriptor( + Intervals.of("2012-01-01T00:00:00Z/P1D"), + "version", + 0 + ); + + final SpecificSegmentQueryRunner> queryRunner = new SpecificSegmentQueryRunner<>( + new QueryRunner<>() + { + @Override + public Sequence> run( + QueryPlus> queryPlus, + ResponseContext responseContext + ) + { + runnerThreadName.set(Thread.currentThread().getName()); + return Sequences.withEffect( + Sequences.simple(Collections.singletonList(value)), + () -> sequenceThreadName.set(Thread.currentThread().getName()), + Execs.directExecutor() + ); + } + }, + new SpecificSegmentSpec(descriptor) + ); + + final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .dataSource("foo") + .granularity(Granularities.ALL) + .intervals(ImmutableList.of(Intervals.of("2012-01-01T00:00:00Z/P1D"))) + .aggregators( + ImmutableList.of( + new CountAggregatorFactory("rows") + ) + ) + .context(makeThreadNameContext(setProcessingThreadNames)) + .queryId("thread-name-query") + .build(); + + final Sequence> results = queryRunner.run( + QueryPlus.wrap(query), + ResponseContext.createEmpty() + ); + results.toList(); + + Assertions.assertEquals(expectedThreadNameDuringProcessing, runnerThreadName.get()); + Assertions.assertEquals(expectedThreadNameDuringProcessing, sequenceThreadName.get()); + Assertions.assertEquals("original-test-thread", Thread.currentThread().getName()); + } + finally { + Thread.currentThread().setName(originalThreadName); + } + } + + private static Map makeThreadNameContext(final Boolean setProcessingThreadNames) + { + if (setProcessingThreadNames == null) { + return Collections.emptyMap(); + } else { + return ImmutableMap.of(SpecificSegmentQueryRunner.CTX_SET_THREAD_NAME, setProcessingThreadNames); + } + } + + private static Result makeResult() + { + final TimeseriesResultBuilder builder = new TimeseriesResultBuilder( + DateTimes.of("2012-01-01T00:00:00Z") + ); + final CountAggregator rows = new CountAggregator(); + rows.aggregate(); + builder.addMetric("rows", rows.get()); + return builder.build(); + } + private void validate(ObjectMapper mapper, SegmentDescriptor descriptor, ResponseContext responseContext) throws IOException { From 2f35573537a46f2afca1e239f232db56f57284e7 Mon Sep 17 00:00:00 2001 From: Andreas Maechler Date: Fri, 29 May 2026 07:50:04 -0600 Subject: [PATCH 11/12] build(deps): Bump jackson to `2.21.3` (#19528) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Bump jackson to 2.21.3 Jackson 2.21 (issue #1381) changed the default resolution of @JacksonInject when combined with @JsonProperty on the same parameter: the injected value now wins over the JSON value, where 2.20 treated the inject as a fallback used only when JSON did not supply one. DruidNode's serviceName, port, and tlsPort parameters carry both annotations, with JSON expected to win when supplied — this is how DruidNode JSON config files have always worked. Add the explicit useInput = OptBoolean.TRUE to restore that contract. A repo-wide audit confirmed DruidNode's three parameters are the only sites in Druid where @JacksonInject and @JsonProperty annotate the same parameter; everywhere else the annotations are on distinct parameters and are unaffected. Also adds the previously-missing license entry for org.jspecify:jspecify 1.0.0 in extensions-core/kubernetes-extensions, which the check-licenses dependency report flagged. * Preserve @JacksonInject metadata in GuiceAnnotationIntrospector findInjectableValue was returning JacksonInject.Value.forId(id), which strips useInput and optional from the original annotation. Production deserialization happens to remain correct under jackson 2.21 because AnnotationIntrospectorPair.findInjectableValue falls back to the secondary (default Jackson) introspector and merges the recovered useInput onto the primary's Value via withUseInput. That fallback is undocumented as part of the introspector contract and would silently regress if the pair semantics change, or if this introspector were ever installed standalone for a special-purpose mapper. Construct the Value via JacksonInject.Value.from(annotation) .withId(id) so the introspector returns a complete Value on its own and no longer relies on the pair to fix it up. The annotation lookup is hoisted to the top of findInjectableValue so the non-null contract between it and findGuiceInjectId is explicit — findGuiceInjectId now documents the precondition and trusts the caller to verify, eliminating the duplicate getAnnotation call. Defensive cleanup motivated by FasterXML/jackson-databind#1381; no observable behavior change. --- licenses.yaml | 20 ++++++++++++++----- pom.xml | 2 +- .../guice/GuiceAnnotationIntrospector.java | 18 +++++++++++------ .../org/apache/druid/server/DruidNode.java | 7 ++++--- 4 files changed, 32 insertions(+), 15 deletions(-) diff --git a/licenses.yaml b/licenses.yaml index 23f58a0d3b3d..8032c582975a 100644 --- a/licenses.yaml +++ b/licenses.yaml @@ -321,7 +321,7 @@ name: Jackson license_category: binary module: java-core license_name: Apache License version 2.0 -version: 2.20.2 +version: 2.21.3 libraries: - com.fasterxml.jackson.core: jackson-core - com.fasterxml.jackson.core: jackson-annotations @@ -364,7 +364,7 @@ name: Jackson license_category: binary module: java-core license_name: Apache License version 2.0 -version: "2.20" +version: "2.21" libraries: - com.fasterxml.jackson.core: jackson-annotations @@ -374,7 +374,7 @@ name: Jackson license_category: binary module: extensions-contrib/druid-deltalake-extensions license_name: Apache License version 2.0 -version: 2.20.2 +version: 2.21.3 libraries: - com.fasterxml.jackson.core: jackson-databind notice: | @@ -1002,7 +1002,7 @@ name: Jackson license_category: binary module: extensions-core/kubernetes-overlord-extensions license_name: Apache License version 2.0 -version: 2.20.2 +version: 2.21.3 libraries: - com.fasterxml.jackson.dataformat: jackson-dataformat-properties notice: | @@ -1107,6 +1107,16 @@ libraries: --- +name: org.jspecify jspecify +license_category: binary +module: extensions-core/kubernetes-extensions +license_name: Apache License version 2.0 +version: 1.0.0 +libraries: + - org.jspecify: jspecify + +--- + name: io.gsonfire gson-fire license_category: binary module: extensions-core/kubernetes-extensions @@ -3069,7 +3079,7 @@ libraries: --- name: Jackson Dataformat Yaml -version: 2.20.2 +version: 2.21.3 license_category: binary module: extensions/druid-avro-extensions license_name: Apache License version 2.0 diff --git a/pom.xml b/pom.xml index aabb17d07f08..fa07aa254c1d 100644 --- a/pom.xml +++ b/pom.xml @@ -105,7 +105,7 @@ 1.10.0 12.1.8 1.19.4 - 2.20.2 + 2.21.3 1.9.13 2.25.4 8.2.0 diff --git a/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java b/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java index fd8ee5e9e02a..890e3c233a04 100644 --- a/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java +++ b/processing/src/main/java/org/apache/druid/guice/GuiceAnnotationIntrospector.java @@ -44,19 +44,25 @@ public class GuiceAnnotationIntrospector extends NopAnnotationIntrospector @Override public JacksonInject.Value findInjectableValue(AnnotatedMember m) { - Object id = findGuiceInjectId(m); + // Preserve useInput / optional from the annotation. The simpler Value.forId(id) drops + // them and relies on AnnotationIntrospectorPair's fallback. See FasterXML/jackson-databind#1381. + final JacksonInject annotation = m.getAnnotation(JacksonInject.class); + if (annotation == null) { + return null; + } + final Object id = findGuiceInjectId(m); if (id == null) { return null; } - return JacksonInject.Value.forId(id); + return JacksonInject.Value.from(annotation).withId(id); } + /** + * Resolves the Guice {@link Key} for an annotated member. Callers must verify that {@code m} + * carries a {@link JacksonInject} annotation before invoking; this method does not re-check. + */ private Object findGuiceInjectId(AnnotatedMember m) { - if (m.getAnnotation(JacksonInject.class) == null) { - return null; - } - Type genericType = null; Annotation guiceAnnotation = null; diff --git a/server/src/main/java/org/apache/druid/server/DruidNode.java b/server/src/main/java/org/apache/druid/server/DruidNode.java index 820d8d32a08a..19252fde755c 100644 --- a/server/src/main/java/org/apache/druid/server/DruidNode.java +++ b/server/src/main/java/org/apache/druid/server/DruidNode.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.OptBoolean; import com.google.common.base.Preconditions; import com.google.common.net.HostAndPort; import com.google.inject.name.Named; @@ -130,12 +131,12 @@ public DruidNode( */ @JsonCreator public DruidNode( - @JacksonInject @Named("serviceName") @JsonProperty("service") String serviceName, + @JacksonInject(useInput = OptBoolean.TRUE) @Named("serviceName") @JsonProperty("service") String serviceName, @JsonProperty("host") String host, @JsonProperty("bindOnHost") boolean bindOnHost, @JsonProperty("plaintextPort") Integer plaintextPort, - @JacksonInject @Named("servicePort") @JsonProperty("port") Integer port, - @JacksonInject @Named("tlsServicePort") @JsonProperty("tlsPort") Integer tlsPort, + @JacksonInject(useInput = OptBoolean.TRUE) @Named("servicePort") @JsonProperty("port") Integer port, + @JacksonInject(useInput = OptBoolean.TRUE) @Named("tlsServicePort") @JsonProperty("tlsPort") Integer tlsPort, @JsonProperty("enablePlaintextPort") Boolean enablePlaintextPort, @JsonProperty("enableTlsPort") boolean enableTlsPort, @JsonProperty("labels") @Nullable Map labels From cf275580199e93485062a7b1d7e1dbb4e2c6e2bf Mon Sep 17 00:00:00 2001 From: aho135 Date: Fri, 29 May 2026 18:06:27 -0700 Subject: [PATCH 12/12] feat: resetOffsetsAndBackfill using bounded stream supervisor (#19477) * resetOffsetsAndBackfill using bounded stream supervisor * Reject non-positive backfillTaskCount * Reset supervisor after backfill Supervisor has already been started * Add helper method specHasConcurrentLocks * Fix doc reference * Move validations into helper function * Add embedded-test for resetSupervisorAndBackfill * Remove flaky waitUntilPublishedRecordsAreIngested * Update KafkaBoundedSupervisorTest.java * Wait for supervisor to be RUNNING * Use checkpointed offset if > requested reset offset to prevent duplicate ingestion * Update KafkaBoundedSupervisorTest.java * Revert "Use checkpointed offset if > requested reset offset to prevent duplicate ingestion" resetOffsetsForwardOnly does not fully close the race it targets (the write is still unconditional) and the duplicate scenario it addresses is narrower than the overlap case, which cannot be solved without suspending the main supervisor. Accepting the limitation and documenting it is preferable to the added complexity. This reverts commit 89b5fec25e3a7dc88441d6e995564723557f2312. * Doc update - duplication notice and Kinesis callout * Rename endpoint from resetOffsetsAndBackfill to resetToLatestAndBackfill * Update test name to reflect new endpoint * Address clean up from review comments * Log out start/end offsets * Add abstract createBackfillSpec * Unit test createBackfillSpec * Fix deprecation notices * Rename functions to align with new endpoint name * Add null check and rename for consistency --- docs/api-reference/supervisor-api.md | 103 +++++++ .../indexing/KafkaBoundedSupervisorTest.java | 51 +++ .../supervisor/RabbitStreamSupervisor.java | 6 +- .../RabbitStreamSupervisorSpec.java | 50 +++ .../kafka/supervisor/KafkaSupervisor.java | 10 +- .../kafka/supervisor/KafkaSupervisorSpec.java | 54 ++++ .../supervisor/KafkaSupervisorSpecTest.java | 33 ++ .../kinesis/supervisor/KinesisSupervisor.java | 4 +- .../supervisor/KinesisSupervisorSpec.java | 54 ++++ .../supervisor/SupervisorManager.java | 169 ++++++++-- .../supervisor/SupervisorResource.java | 44 +++ .../supervisor/SeekableStreamSupervisor.java | 10 +- .../SeekableStreamSupervisorSpec.java | 6 + .../supervisor/SupervisorManagerTest.java | 290 ++++++++++++++++++ .../supervisor/SupervisorResourceTest.java | 105 +++++++ .../SeekableStreamSupervisorSpecTest.java | 10 + .../SeekableStreamSupervisorStateTest.java | 8 +- .../SeekableStreamSupervisorTestBase.java | 14 +- .../MSQWorkerTaskLauncherRetryTest.java | 6 + .../rpc/indexing/NoopOverlordClient.java | 6 + .../druid/rpc/indexing/OverlordClient.java | 9 + .../rpc/indexing/OverlordClientImpl.java | 17 + .../testing/embedded/EmbeddedClusterApis.java | 10 + 23 files changed, 1021 insertions(+), 48 deletions(-) diff --git a/docs/api-reference/supervisor-api.md b/docs/api-reference/supervisor-api.md index d321af143020..8f9c5c36dc5c 100644 --- a/docs/api-reference/supervisor-api.md +++ b/docs/api-reference/supervisor-api.md @@ -3539,6 +3539,109 @@ when the supervisor's tasks restart, they resume reading from `{"0": 100, "1": 1 ``` +### Reset offsets to latest and start a backfill supervisor + +This endpoint is supported for Apache Kafka and RabbitMQ Stream supervisors. Amazon Kinesis is not supported yet. + +Resets the supervisor to the latest available stream offsets and starts a new bounded backfill supervisor to ingest the data in the skipped range. + +This endpoint is useful when a supervisor has fallen behind and you want to catch it up to the latest offsets without losing the skipped data. The main supervisor resumes ingesting from the latest offsets, while the backfill supervisor processes the range from the previously checkpointed offsets up to the latest offsets at the time of the reset. + +**Duplicate ingestion notice:** The main supervisor is not quiesced before the reset. This means duplicate data can occur in two ways: +- **Backfill overlap:** Any tasks that were in-flight at the time of the reset may publish segments covering part of the backfill range before being shut down. +- **Reset race:** If a task checkpoint is written to the metadata store between when this endpoint captures the current offsets and when it applies the reset, that checkpoint can be overwritten, causing the main supervisor to re-ingest already-processed data. + +Both windows are narrow in practice, but cannot be fully eliminated without manually suspending the main supervisor before calling this endpoint and waiting for all pending tasks to complete. + +The following requirements must be met before calling this endpoint: + +- The supervisor must be a [streaming supervisor](../ingestion/supervisor.md). +- The supervisor's `useEarliestSequenceNumber` property must be `false`. +- The supervisor context must have `useConcurrentLocks` set to `true` to allow the backfill supervisor's tasks to write concurrently with the main supervisor's tasks. +- The supervisor must be in a `RUNNING` state. + +The backfill supervisor has the same configuration as the source supervisor except for its ID, which takes the form `{supervisorId}_backfill_{randomSuffix}`, and its `boundedStreamConfig`, which is set to the skipped offset range. If `backfillTaskCount` is specified, it overrides the `taskCount` for the backfill supervisor only. + +#### URL + +`POST` `/druid/indexer/v1/supervisor/{supervisorId}/resetToLatestAndBackfill` + +#### Query parameters + +| Parameter | Type | Description | Default | +|---------|---------|---------|---------| +| `backfillTaskCount` | Integer | Number of parallel tasks for the backfill supervisor. | Defaults to `taskCount` from the source supervisor if not specified | + +#### Responses + + + + + + +*Successfully reset and started backfill supervisor* + + + + + +*Supervisor does not meet requirements (wrong type, `useEarliestSequenceNumber` is true, `useConcurrentLocks` not enabled, or supervisor not RUNNING)* + + + + + +*Invalid supervisor ID* + + + + + +*Failed to retrieve stream offsets or serialize the backfill spec* + + + + +--- + +#### Sample request + +The following example resets a supervisor named `social_media` and starts a backfill supervisor with 2 tasks. + + + + + + +```shell +curl --request POST "http://ROUTER_IP:ROUTER_PORT/druid/indexer/v1/supervisor/social_media/resetToLatestAndBackfill?backfillTaskCount=2" +``` + + + + + +```HTTP +POST /druid/indexer/v1/supervisor/social_media/resetToLatestAndBackfill?backfillTaskCount=2 HTTP/1.1 +Host: http://ROUTER_IP:ROUTER_PORT +``` + + + + +#### Sample response + +

+ View the response + + ```json +{ + "id": "social_media", + "backfillSupervisorId": "social_media_backfill_abcdefgh" +} + ``` +
+ ### Terminate a supervisor Terminates a supervisor and its associated indexing tasks, triggering the publishing of their segments. When you terminate a supervisor, Druid places a tombstone marker in the metadata store to prevent reloading on restart. diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java index 7e22d85d9cab..fa184418df52 100644 --- a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java +++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/KafkaBoundedSupervisorTest.java @@ -292,6 +292,48 @@ public void test_boundedSupervisor_doesNotSilentlyCompleteWhenStaleOffsetExceeds Assertions.assertEquals("UNHEALTHY_SUPERVISOR", status2.getState(), "Supervisor state should be UNHEALTHY_SUPERVISOR"); } + @Test + public void test_resetToLatestAndBackfill() + { + final String topic = IdUtils.getRandomId(); + kafkaServer.createTopicWithPartitions(topic, 2); + + // Create a streaming supervisor with concurrent locks and withUseEarliestSequenceNumber=false + final KafkaSupervisorSpec supervisor = createKafkaSupervisor(kafkaServer) + .withContext(Map.of("useConcurrentLocks", true)) + .withIoConfig(io -> io + .withKafkaInputFormat(new JsonInputFormat(null, null, null, null, null)) + .withUseEarliestSequenceNumber(false) + ) + .build(dataSource, topic); + + cluster.callApi().postSupervisor(supervisor); + + waitForSupervisorDetailedState(supervisor.getId(), "RUNNING"); + + final int totalRecords = publish1kRecords(topic, false); + waitUntilPublishedRecordsAreIngested(totalRecords); + + // Reset the main supervisor and spin up a backfill supervisor. + // Since all records are already ingested before the call, the backfill + // supervisor will complete immediately without ingesting anything. + final Map result = cluster.callApi().resetToLatestAndBackfill(supervisor.getId()); + Assertions.assertEquals(supervisor.getId(), result.get("id")); + final String backfillSupervisorId = (String) result.get("backfillSupervisorId"); + + // Wait for the backfill to finish + waitForSupervisorToComplete(backfillSupervisorId); + + // Main supervisor should still be running + final SupervisorStatus mainStatus = cluster.callApi().getSupervisorStatus(supervisor.getId()); + Assertions.assertEquals("RUNNING", mainStatus.getState()); + Assertions.assertTrue(mainStatus.isHealthy()); + + final SupervisorStatus backfillStatus = cluster.callApi().getSupervisorStatus(backfillSupervisorId); + Assertions.assertEquals("COMPLETED", backfillStatus.getState()); + Assertions.assertTrue(backfillStatus.isHealthy()); + } + private void waitForSupervisorToComplete(String supervisorId) { overlord.latchableEmitter().waitForEvent( @@ -301,6 +343,15 @@ private void waitForSupervisorToComplete(String supervisorId) ); } + private void waitForSupervisorDetailedState(String supervisorId, String detailedState) + { + overlord.latchableEmitter().waitForEvent( + event -> event.hasMetricName("supervisor/count") + .hasDimension(DruidMetrics.SUPERVISOR_ID, supervisorId) + .hasDimension("detailedState", detailedState) + ); + } + private void waitForSupervisorToBeUnhealthy(String supervisorId) { overlord.latchableEmitter().waitForEvent( diff --git a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java index 6099105b3374..04973a5272fd 100644 --- a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java +++ b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisor.java @@ -322,7 +322,7 @@ protected Map getTimeLagPerPartition(Map currentOffs } @Override - protected RabbitStreamDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map map) + public RabbitStreamDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map map) { return new RabbitStreamDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, map)); } @@ -408,7 +408,7 @@ public LagStats computeLagStats() } @Override - protected void updatePartitionLagFromStream() + public void updatePartitionLagFromStream() { getRecordSupplierLock().lock(); @@ -435,7 +435,7 @@ protected void updatePartitionLagFromStream() } @Override - protected Map getLatestSequencesFromStream() + public Map getLatestSequencesFromStream() { return latestSequenceFromStream != null ? latestSequenceFromStream : new HashMap<>(); } diff --git a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java index 4a445f6f1c11..4763a949a615 100644 --- a/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java +++ b/extensions-contrib/rabbit-stream-indexing-service/src/main/java/org/apache/druid/indexing/rabbitstream/supervisor/RabbitStreamSupervisorSpec.java @@ -30,6 +30,7 @@ import org.apache.druid.indexing.overlord.supervisor.Supervisor; import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig; import org.apache.druid.indexing.rabbitstream.RabbitStreamIndexTaskClientFactory; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.metrics.DruidMonitorSchedulerConfig; @@ -155,6 +156,55 @@ protected RabbitStreamSupervisorSpec toggleSuspend(boolean suspend) supervisorStateManagerConfig); } + @Override + public RabbitStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + RabbitStreamSupervisorIOConfig ioConfig = getSpec().getIOConfig(); + RabbitStreamSupervisorIOConfig backfillIoConfig = new RabbitStreamSupervisorIOConfig( + ioConfig.getStream(), + ioConfig.getUri(), + ioConfig.getInputFormat(), + ioConfig.getReplicas(), + taskCount != null ? taskCount : ioConfig.getTaskCount(), + ioConfig.getTaskDuration().toPeriod(), + ioConfig.getConsumerProperties(), + ioConfig.getAutoScalerConfig(), + ioConfig.getPollTimeout(), + ioConfig.getStartDelay().toPeriod(), + ioConfig.getPeriod().toPeriod(), + ioConfig.getCompletionTimeout().toPeriod(), + ioConfig.isUseEarliestSequenceNumber(), + ioConfig.getLateMessageRejectionPeriod().isPresent() ? ioConfig.getLateMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getEarlyMessageRejectionPeriod().isPresent() ? ioConfig.getEarlyMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getLateMessageRejectionStartDateTime().isPresent() ? ioConfig.getLateMessageRejectionStartDateTime().get() : null, + ioConfig.getStopTaskCount(), + ioConfig.getServerPriorityToReplicas(), + boundedStreamConfig + ); + return new RabbitStreamSupervisorSpec( + backfillId, + null, + getSpec().getDataSchema(), + getSpec().getTuningConfig(), + backfillIoConfig, + getContext(), + isSuspended(), + taskStorage, + taskMaster, + indexerMetadataStorageCoordinator, + (RabbitStreamIndexTaskClientFactory) indexTaskClientFactory, + mapper, + emitter, + monitorSchedulerConfig, + rowIngestionMetersFactory, + supervisorStateManagerConfig + ); + } + @Override public String toString() { diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java index 727eb52db272..5863284cc2d9 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java @@ -356,7 +356,7 @@ protected Map getTimeLagPerPartition(Map map) + public KafkaDataSourceMetadata createDataSourceMetaDataForReset(String topic, Map map) { return new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, map)); } @@ -548,7 +548,7 @@ private Map getTimestampPerPartitionAtCurrentOffset(S *

*/ @Override - protected void updatePartitionLagFromStream() + public void updatePartitionLagFromStream() { if (getIoConfig().isEmitTimeLagMetrics()) { updatePartitionTimeAndRecordLagFromStream(); @@ -597,7 +597,7 @@ private void updateOffsetSnapshot( } @Override - protected Map getLatestSequencesFromStream() + public Map getLatestSequencesFromStream() { return offsetSnapshotRef.get().getLatestOffsetsFromStream(); } @@ -630,7 +630,7 @@ protected boolean isMultiTopic() * Gets the offsets as stored in the metadata store. The map returned will only contain * offsets from topic partitions that match the current supervisor config stream. This * override is needed because in the case of multi-topic, a user could have updated the supervisor - * config from single topic to mult-topic, where the new multi-topic pattern regex matches the + * config from single topic to multi-topic, where the new multi-topic pattern regex matches the * old config single topic. Without this override, the previously stored metadata for the single * topic would be deemed as different from the currently configure stream, and not be included in * the offset map returned. This implementation handles these cases appropriately. @@ -640,7 +640,7 @@ protected boolean isMultiTopic() * updated to single topic or multi-topic depending on the supervisor config, as needed. */ @Override - protected Map getOffsetsFromMetadataStorage() + public Map getOffsetsFromMetadataStorage() { final DataSourceMetadata dataSourceMetadata = retrieveDataSourceMetadata(); if (checkSourceMetadataMatch(dataSourceMetadata)) { diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java index b607ade1acfe..31d3e8fad691 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java @@ -36,6 +36,7 @@ import org.apache.druid.indexing.overlord.supervisor.Supervisor; import org.apache.druid.indexing.overlord.supervisor.SupervisorSpec; import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.emitter.service.ServiceEmitter; @@ -173,6 +174,59 @@ protected KafkaSupervisorSpec toggleSuspend(boolean suspend) ); } + @Override + public KafkaSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + KafkaSupervisorIOConfig ioConfig = getSpec().getIOConfig(); + KafkaSupervisorIOConfig backfillIoConfig = new KafkaSupervisorIOConfig( + ioConfig.getTopic(), + ioConfig.getTopicPattern(), + ioConfig.getInputFormat(), + ioConfig.getReplicas(), + taskCount != null ? taskCount : ioConfig.getTaskCount(), + ioConfig.getTaskDuration().toPeriod(), + ioConfig.getConsumerProperties(), + ioConfig.getAutoScalerConfig(), + ioConfig.getLagAggregator(), + ioConfig.getPollTimeout(), + ioConfig.getStartDelay().toPeriod(), + ioConfig.getPeriod().toPeriod(), + ioConfig.isUseEarliestSequenceNumber(), + ioConfig.getCompletionTimeout().toPeriod(), + ioConfig.getLateMessageRejectionPeriod().isPresent() ? ioConfig.getLateMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getEarlyMessageRejectionPeriod().isPresent() ? ioConfig.getEarlyMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getLateMessageRejectionStartDateTime().isPresent() ? ioConfig.getLateMessageRejectionStartDateTime().get() : null, + ioConfig.getConfigOverrides(), + ioConfig.getIdleConfig(), + ioConfig.getStopTaskCount(), + ioConfig.isEmitTimeLagMetrics(), + ioConfig.getServerPriorityToReplicas(), + boundedStreamConfig + ); + return new KafkaSupervisorSpec( + backfillId, + null, + getSpec().getDataSchema(), + getSpec().getTuningConfig(), + backfillIoConfig, + getContext(), + isSuspended(), + taskStorage, + taskMaster, + indexerMetadataStorageCoordinator, + (KafkaIndexTaskClientFactory) indexTaskClientFactory, + mapper, + emitter, + monitorSchedulerConfig, + rowIngestionMetersFactory, + supervisorStateManagerConfig + ); + } + /** * Extends {@link SeekableStreamSupervisorSpec#validateSpecUpdateTo} to ensure that the proposed spec and current spec are either both multi-topic or both single-topic. *

diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java index 8879ff6d9753..06ca9b64ced5 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorSpecTest.java @@ -32,6 +32,7 @@ import org.apache.druid.indexing.overlord.TaskStorage; import org.apache.druid.indexing.overlord.supervisor.SupervisorSpec; import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.LagAggregator; import org.apache.druid.indexing.seekablestream.supervisor.autoscaler.CostBasedAutoScalerConfig; import org.apache.druid.jackson.DefaultObjectMapper; @@ -564,6 +565,38 @@ public void test_validateSpecUpdateTo() sourceSpec.validateSpecUpdateTo(validDestSpec); } + @Test + public void testCreateBackfillSpec() + { + KafkaSupervisorSpec spec = new KafkaSupervisorSpecBuilder() + .withDataSchema( + schema -> schema + .withTimestamp(TimestampSpec.DEFAULT) + .withAggregators(new CountAggregatorFactory("rows")) + .withGranularity(new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null)) + ) + .withIoConfig( + ioConfig -> ioConfig + .withJsonInputFormat() + .withConsumerProperties(Map.of("bootstrap.servers", "localhost:9092")) + .withTaskCount(3) + ) + .build("testDs", "metrics"); + + BoundedStreamConfig boundedStreamConfig = new BoundedStreamConfig( + Map.of("0", 100L, "1", 200L), + Map.of("0", 500L, "1", 600L) + ); + + KafkaSupervisorSpec backfill = (KafkaSupervisorSpec) spec.createBackfillSpec("backfill-id", boundedStreamConfig, 2); + + Assert.assertEquals("backfill-id", backfill.getId()); + Assert.assertEquals("testDs", backfill.getSpec().getDataSchema().getDataSource()); + Assert.assertEquals("metrics", backfill.getSpec().getIOConfig().getTopic()); + Assert.assertEquals(2, backfill.getSpec().getIOConfig().getTaskCount()); + Assert.assertEquals(boundedStreamConfig, backfill.getSpec().getIOConfig().getBoundedStreamConfig()); + } + private KafkaSupervisorSpec getSpec(String topic, String topicPattern) { KafkaSupervisorSpecBuilder builder = new KafkaSupervisorSpecBuilder() diff --git a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java index 0f91fc0965db..3f1f4034f3ce 100644 --- a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java +++ b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java @@ -321,7 +321,7 @@ protected Map getTimeLagPerPartition(Map currentOf } @Override - protected SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( + public SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( String stream, Map map ) @@ -336,7 +336,7 @@ protected OrderedSequenceNumber makeSequenceNumber(String seq, boolean i } @Override - protected void updatePartitionLagFromStream() + public void updatePartitionLagFromStream() { KinesisRecordSupplier supplier = (KinesisRecordSupplier) recordSupplier; // this recordSupplier method is thread safe, so does not need to acquire the recordSupplierLock diff --git a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java index 8e6615716809..4899337797bf 100644 --- a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java +++ b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorSpec.java @@ -35,6 +35,7 @@ import org.apache.druid.indexing.overlord.TaskStorage; import org.apache.druid.indexing.overlord.supervisor.Supervisor; import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.metrics.DruidMonitorSchedulerConfig; @@ -193,4 +194,57 @@ protected KinesisSupervisorSpec toggleSuspend(boolean suspend) supervisorStateManagerConfig ); } + + @Override + public KinesisSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + KinesisSupervisorIOConfig ioConfig = getSpec().getIOConfig(); + KinesisSupervisorIOConfig backfillIoConfig = new KinesisSupervisorIOConfig( + ioConfig.getStream(), + ioConfig.getInputFormat(), + ioConfig.getEndpoint(), + null, + ioConfig.getReplicas(), + taskCount != null ? taskCount : ioConfig.getTaskCount(), + ioConfig.getTaskDuration().toPeriod(), + ioConfig.getStartDelay().toPeriod(), + ioConfig.getPeriod().toPeriod(), + ioConfig.isUseEarliestSequenceNumber(), + ioConfig.getCompletionTimeout().toPeriod(), + ioConfig.getLateMessageRejectionPeriod().isPresent() ? ioConfig.getLateMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getEarlyMessageRejectionPeriod().isPresent() ? ioConfig.getEarlyMessageRejectionPeriod().get().toPeriod() : null, + ioConfig.getLateMessageRejectionStartDateTime().isPresent() ? ioConfig.getLateMessageRejectionStartDateTime().get() : null, + ioConfig.getRecordsPerFetch(), + ioConfig.getFetchDelayMillis(), + ioConfig.getAwsAssumedRoleArn(), + ioConfig.getAwsExternalId(), + ioConfig.getAutoScalerConfig(), + ioConfig.isDeaggregate(), + ioConfig.getServerPriorityToReplicas(), + boundedStreamConfig + ); + return new KinesisSupervisorSpec( + backfillId, + null, + getSpec().getDataSchema(), + getSpec().getTuningConfig(), + backfillIoConfig, + getContext(), + isSuspended(), + taskStorage, + taskMaster, + indexerMetadataStorageCoordinator, + (KinesisIndexTaskClientFactory) indexTaskClientFactory, + mapper, + emitter, + monitorSchedulerConfig, + rowIngestionMetersFactory, + awsCredentialsConfig, + supervisorStateManagerConfig + ); + } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java index 52f3cba7fc11..fa7d96634ae6 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java @@ -23,9 +23,11 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.ListenableFuture; import com.google.inject.Inject; import org.apache.druid.common.guava.FutureUtils; +import org.apache.druid.common.utils.IdUtils; import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.error.NotFound; @@ -35,8 +37,11 @@ import org.apache.druid.indexing.overlord.DataSourceMetadata; import org.apache.druid.indexing.overlord.supervisor.autoscaler.SupervisorTaskAutoScaler; import org.apache.druid.indexing.seekablestream.SeekableStreamDataSourceMetadata; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.lifecycle.LifecycleStart; import org.apache.druid.java.util.common.lifecycle.LifecycleStop; @@ -129,33 +134,8 @@ public Optional getActiveSupervisorIdForDatasourceWithAppendLock(String final Supervisor supervisor = entry.getValue().lhs; final SupervisorSpec supervisorSpec = entry.getValue().rhs; - boolean hasAppendLock = Tasks.DEFAULT_USE_CONCURRENT_LOCKS; - if (supervisorSpec instanceof SeekableStreamSupervisorSpec) { - SeekableStreamSupervisorSpec seekableStreamSupervisorSpec = (SeekableStreamSupervisorSpec) supervisorSpec; - Map context = seekableStreamSupervisorSpec.getContext(); - if (context != null) { - Boolean useConcurrentLocks = QueryContexts.getAsBoolean( - Tasks.USE_CONCURRENT_LOCKS, - context.get(Tasks.USE_CONCURRENT_LOCKS) - ); - if (useConcurrentLocks == null) { - TaskLockType taskLockType = QueryContexts.getAsEnum( - Tasks.TASK_LOCK_TYPE, - context.get(Tasks.TASK_LOCK_TYPE), - TaskLockType.class - ); - if (taskLockType == null) { - hasAppendLock = Tasks.DEFAULT_USE_CONCURRENT_LOCKS; - } else if (taskLockType == TaskLockType.APPEND) { - hasAppendLock = true; - } else { - hasAppendLock = false; - } - } else { - hasAppendLock = useConcurrentLocks; - } - } - } + boolean hasAppendLock = supervisorSpec instanceof SeekableStreamSupervisorSpec + && specHasConcurrentLocks((SeekableStreamSupervisorSpec) supervisorSpec); if (supervisor instanceof SeekableStreamSupervisor && !supervisorSpec.isSuspended() @@ -393,6 +373,116 @@ public boolean resetSupervisor(String id, @Nullable DataSourceMetadata resetData return true; } + /** + * Resets a supervisor to the latest stream offsets and starts a bounded backfill supervisor to + * process the skipped range from the previously checkpointed offsets up to the latest offsets. + * + * @param id supervisor ID + * @param backfillTaskCount number of tasks for the backfill supervisor, or null to inherit from the source spec + * @return map with {@code "id"} (the original supervisor ID) and {@code "backfillSupervisorId"} + * @throws IllegalArgumentException if the supervisor is not a {@link SeekableStreamSupervisor}, + * if {@code useEarliestSequenceNumber} is true, + * if {@code useConcurrentLocks} is not set to true in the supervisor context, + * or if the supervisor is not in a RUNNING state + * @throws IllegalStateException if the latest or checkpointed offsets cannot be retrieved, + * or if the backfill spec cannot be serialized + */ + public Map resetToLatestAndBackfill(String id, @Nullable Integer backfillTaskCount) + { + Preconditions.checkState(started, "SupervisorManager not started"); + Preconditions.checkNotNull(id, "id"); + + Pair supervisor = supervisors.get(id); + + if (supervisor == null) { + throw new IAE("Supervisor[%s] does not exist", id); + } + + if (!(supervisor.lhs instanceof SeekableStreamSupervisor)) { + throw new IAE("Supervisor[%s] is not a streaming supervisor", id); + } + + SeekableStreamSupervisor streamSupervisor = (SeekableStreamSupervisor) supervisor.lhs; + SeekableStreamSupervisorSpec streamSpec = (SeekableStreamSupervisorSpec) supervisor.rhs; + + validateResetAndBackfill(id, streamSupervisor, streamSpec); + + log.info("Capturing latest offsets from stream for supervisor[%s]", id); + streamSupervisor.updatePartitionLagFromStream(); + Map endOffsets = streamSupervisor.getLatestSequencesFromStream(); + + log.info("Capturing checkpointed offsets for supervisor[%s]", id); + Map startOffsets = streamSupervisor.getOffsetsFromMetadataStorage(); + + if (endOffsets == null || endOffsets.isEmpty()) { + throw new ISE("Skipping reset: Failed to get latest offsets from stream for supervisor[%s]", id); + } + if (startOffsets == null || startOffsets.isEmpty()) { + throw new ISE("Skipping reset: Failed to get checkpointed offsets for supervisor[%s]", id); + } + + String backfillSupervisorId = IdUtils.getRandomIdWithPrefix(id + "_backfill"); + + try { + Map normalizedStartOffsets = jsonMapper.readValue(jsonMapper.writeValueAsString(startOffsets), Map.class); + Map normalizedEndOffsets = jsonMapper.readValue(jsonMapper.writeValueAsString(endOffsets), Map.class); + BoundedStreamConfig boundedStreamConfig = new BoundedStreamConfig(normalizedStartOffsets, normalizedEndOffsets); + SupervisorSpec backfillSpec = streamSpec.createBackfillSpec(backfillSupervisorId, boundedStreamConfig, backfillTaskCount); + createOrUpdateAndStartSupervisor(backfillSpec); + } + catch (JsonProcessingException e) { + throw new ISE(e, "Failed to serialize offsets for backfill supervisor[%s]", backfillSupervisorId); + } + + log.info( + "Started backfill supervisor[%s] for supervisor[%s] with startOffsets[%s] and endOffsets[%s]", + backfillSupervisorId, + id, + startOffsets, + endOffsets + ); + + log.info("Resetting supervisor[%s] metadata to latest offsets", id); + DataSourceMetadata resetMetadata = streamSupervisor.createDataSourceMetaDataForReset( + streamSupervisor.getIoConfig().getStream(), + endOffsets + ); + + streamSupervisor.resetOffsets(resetMetadata); + + // Reset autoscaler if present + SupervisorTaskAutoScaler autoscaler = autoscalers.get(id); + if (autoscaler != null) { + autoscaler.reset(); + } + + return ImmutableMap.of( + "id", id, + "backfillSupervisorId", backfillSupervisorId + ); + } + + private void validateResetAndBackfill( + String id, + SeekableStreamSupervisor streamSupervisor, + SeekableStreamSupervisorSpec streamSpec + ) + { + if (streamSupervisor.getIoConfig().isUseEarliestSequenceNumber()) { + throw new IAE("Reset with skipped offsets is not supported when useEarliestOffset is true."); + } + + if (!specHasConcurrentLocks(streamSpec)) { + throw new IAE( + "Backfill tasks require 'useConcurrentLocks' to be set to true in the supervisor context to allow concurrent writes with the main supervisor tasks" + ); + } + + if (streamSupervisor.getState() != SupervisorStateManager.BasicState.RUNNING) { + throw new IAE("Supervisor[%s] must be in a RUNNING state to perform a reset and backfill", id); + } + } + public boolean checkPointDataSourceMetadata( String supervisorId, int taskGroupId, @@ -631,4 +721,29 @@ private SupervisorSpec getSpec(String id) return supervisor == null ? null : supervisor.rhs; } } + + /** + * Returns true if the spec's context enables concurrent (append) locks, accepting both + * {@code useConcurrentLocks: true} (or any truthy string) and {@code taskLockType: APPEND}. + */ + private static boolean specHasConcurrentLocks(SeekableStreamSupervisorSpec spec) + { + Map context = spec.getContext(); + if (context == null) { + return Tasks.DEFAULT_USE_CONCURRENT_LOCKS; + } + Boolean useConcurrentLocks = QueryContexts.getAsBoolean( + Tasks.USE_CONCURRENT_LOCKS, + context.get(Tasks.USE_CONCURRENT_LOCKS) + ); + if (useConcurrentLocks != null) { + return useConcurrentLocks; + } + TaskLockType taskLockType = QueryContexts.getAsEnum( + Tasks.TASK_LOCK_TYPE, + context.get(Tasks.TASK_LOCK_TYPE), + TaskLockType.class + ); + return taskLockType == TaskLockType.APPEND; + } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResource.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResource.java index aff9edf19af9..8d0e04eb7988 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResource.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResource.java @@ -640,6 +640,50 @@ private Response handleResetRequest( ); } + @POST + @Path("/{id}/resetToLatestAndBackfill") + @Produces(MediaType.APPLICATION_JSON) + @ResourceFilters(SupervisorResourceFilter.class) + public Response resetToLatestAndBackfill( + @PathParam("id") final String id, + @QueryParam("backfillTaskCount") @Nullable final Integer backfillTaskCount + ) + { + return handleResetToLatestAndBackfill(id, backfillTaskCount); + } + + private Response handleResetToLatestAndBackfill(final String id, @Nullable final Integer backfillTaskCount) + { + if (backfillTaskCount != null && backfillTaskCount < 1) { + return Response.status(Response.Status.BAD_REQUEST) + .entity(ImmutableMap.of("error", "backfillTaskCount must be a positive integer")) + .build(); + } + return asLeaderWithSupervisorManager( + manager -> { + if (!manager.getSupervisorIds().contains(id)) { + return Response.status(Response.Status.NOT_FOUND) + .entity(ImmutableMap.of("error", StringUtils.format("[%s] does not exist", id))) + .build(); + } + try { + Map result = manager.resetToLatestAndBackfill(id, backfillTaskCount); + return Response.ok(result).build(); + } + catch (IllegalArgumentException e) { + return Response.status(Response.Status.BAD_REQUEST) + .entity(ImmutableMap.of("error", e.getMessage())) + .build(); + } + catch (Exception e) { + return Response.serverError() + .entity(ImmutableMap.of("error", e.getMessage())) + .build(); + } + } + ); + } + private Response asLeaderWithSupervisorManager(Function f) { Optional supervisorManager = taskMaster.getSupervisorManager(); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java index 9bef543496b6..74329c68e1d2 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java @@ -2195,7 +2195,7 @@ public void resetOffsetsInternal(@Nonnull final DataSourceMetadata dataSourceMet final boolean metadataUpdateSuccess; final DataSourceMetadata metadata = indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(supervisorId); if (metadata == null) { - log.info("Checkpointed metadata in null for supervisor[%s] for dataSource[%s] - inserting metadata[%s]", supervisorId, dataSource, resetMetadata); + log.info("Checkpointed metadata is null for supervisor[%s] for dataSource[%s] - inserting metadata[%s]", supervisorId, dataSource, resetMetadata); metadataUpdateSuccess = indexerMetadataStorageCoordinator.insertDataSourceMetadata(supervisorId, resetMetadata); } else { if (!checkSourceMetadataMatch(metadata)) { @@ -3311,7 +3311,7 @@ private boolean updatePartitionDataFromStream() /** * gets mapping of partitions in stream to their latest offsets. */ - protected Map getLatestSequencesFromStream() + public Map getLatestSequencesFromStream() { return new HashMap<>(); } @@ -4589,7 +4589,7 @@ private OrderedSequenceNumber getOffsetFromStorageForPartiti } } - protected Map getOffsetsFromMetadataStorage() + public Map getOffsetsFromMetadataStorage() { final DataSourceMetadata dataSourceMetadata = retrieveDataSourceMetadata(); if (dataSourceMetadata instanceof SeekableStreamDataSourceMetadata @@ -4976,7 +4976,7 @@ private void updateCurrentOffsets() throws InterruptedException, ExecutionExcept coalesceAndAwait(futures); } - protected abstract void updatePartitionLagFromStream(); + public abstract void updatePartitionLagFromStream(); /** * Gets 'lag' of currently processed offset behind latest offset as a measure of difference between offsets. @@ -5233,7 +5233,7 @@ protected abstract List sequence * @return specific instance of datasource metadata */ - protected abstract SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( + public abstract SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( String stream, Map map ); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpec.java index 842f0de4774e..ecbd51757c37 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpec.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpec.java @@ -297,4 +297,10 @@ public void merge(@Nullable SupervisorSpec existingSpec) protected abstract SeekableStreamSupervisorSpec toggleSuspend(boolean suspend); + public abstract SeekableStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ); + } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java index 525444e23dea..199e004b4243 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java @@ -19,6 +19,9 @@ package org.apache.druid.indexing.overlord.supervisor; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.common.collect.ImmutableList; @@ -26,6 +29,8 @@ import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.SettableFuture; import org.apache.druid.data.input.impl.ByteEntity; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.error.DruidException; import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.error.InvalidInput; @@ -35,7 +40,11 @@ import org.apache.druid.indexing.overlord.ObjectMetadata; import org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers; import org.apache.druid.indexing.seekablestream.TestSeekableStreamDataSourceMetadata; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; +import org.apache.druid.indexing.seekablestream.supervisor.LagAggregator; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor; +import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIOConfig; +import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIngestionSpec; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; @@ -43,6 +52,7 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.MetadataSupervisorManager; import org.apache.druid.metadata.PendingSegmentRecord; +import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.server.metrics.SupervisorStatsProvider; import org.apache.druid.timeline.partition.NumberedShardSpec; @@ -59,6 +69,7 @@ import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; +import javax.annotation.Nullable; import java.lang.reflect.Field; import java.util.Collection; import java.util.Collections; @@ -1068,6 +1079,186 @@ public void test_isAnotherTaskGroupPublishingToPartitions() ); } + @Test + public void testResetToLatestAndBackfill() throws Exception + { + EasyMock.expect(metadataSupervisorManager.getLatest()).andReturn(ImmutableMap.of()); + replayAll(); + manager.start(); + + final ConcurrentHashMap> supervisorsMap = getSupervisorsMap(); + final SeekableStreamSupervisorSpec streamSpec = EasyMock.createNiceMock(SeekableStreamSupervisorSpec.class); + final SeekableStreamSupervisor streamSupervisor = EasyMock.createNiceMock(SeekableStreamSupervisor.class); + final SeekableStreamSupervisorIOConfig ioConfig = EasyMock.createNiceMock(SeekableStreamSupervisorIOConfig.class); + + // non-SeekableStream supervisor → IAE + // Use a concrete anonymous Supervisor (not a mock) to reliably fail instanceof SeekableStreamSupervisor + final Supervisor nonStreamSupervisor = new Supervisor() + { + @Override + public void start() + { + } + + @Override + public void stop(boolean stopGracefully) + { + } + + @Override + public SupervisorReport getStatus() + { + return null; + } + + @Override + public SupervisorStateManager.State getState() + { + return null; + } + + @Override + public void reset(DataSourceMetadata dataSourceMetadata) + { + } + }; + supervisorsMap.put("id1", Pair.of(nonStreamSupervisor, streamSpec)); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + + // useEarliestSequenceNumber=true → IAE + supervisorsMap.put("id1", Pair.of(streamSupervisor, streamSpec)); + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(true).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // useConcurrentLocks not set (null context) → IAE + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(null).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // useConcurrentLocks=false → IAE + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("useConcurrentLocks", false)).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // useConcurrentLocks="true" (string) → accepted, fails at next guard (not RUNNING) + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("useConcurrentLocks", "true")).once(); + EasyMock.expect(streamSupervisor.getState()).andReturn(SupervisorStateManager.BasicState.SUSPENDED).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // taskLockType=APPEND → accepted, fails at next guard (not RUNNING) + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("taskLockType", "APPEND")).once(); + EasyMock.expect(streamSupervisor.getState()).andReturn(SupervisorStateManager.BasicState.SUSPENDED).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // supervisor not RUNNING → IAE + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("useConcurrentLocks", true)).once(); + EasyMock.expect(streamSupervisor.getState()).andReturn(SupervisorStateManager.BasicState.SUSPENDED).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalArgumentException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // empty latest offsets → ISE + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("useConcurrentLocks", true)).once(); + EasyMock.expect(streamSupervisor.getState()).andReturn(SupervisorStateManager.BasicState.RUNNING).once(); + streamSupervisor.updatePartitionLagFromStream(); + EasyMock.expectLastCall().once(); + EasyMock.expect(streamSupervisor.getLatestSequencesFromStream()).andReturn(ImmutableMap.of()).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalStateException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + EasyMock.reset(streamSupervisor, streamSpec, ioConfig); + + // empty start offsets from metadata → ISE + EasyMock.expect(streamSupervisor.getIoConfig()).andReturn(ioConfig).anyTimes(); + EasyMock.expect(ioConfig.isUseEarliestSequenceNumber()).andReturn(false).once(); + EasyMock.expect(streamSpec.getContext()).andReturn(ImmutableMap.of("useConcurrentLocks", true)).once(); + EasyMock.expect(streamSupervisor.getState()).andReturn(SupervisorStateManager.BasicState.RUNNING).once(); + streamSupervisor.updatePartitionLagFromStream(); + EasyMock.expectLastCall().once(); + EasyMock.expect(streamSupervisor.getLatestSequencesFromStream()).andReturn(ImmutableMap.of("0", 100L)).once(); + EasyMock.expect(streamSupervisor.getOffsetsFromMetadataStorage()).andReturn(ImmutableMap.of()).once(); + EasyMock.replay(streamSupervisor, streamSpec, ioConfig); + Assert.assertThrows( + IllegalStateException.class, + () -> manager.resetToLatestAndBackfill("id1", null) + ); + + verifyAll(); + } + + @Test + public void testCreateBackfillSpec() + { + final TestBackfillSupervisorSpec.IOConfig ioConfig = new TestBackfillSupervisorSpec.IOConfig("test-stream", null, null); + final TestBackfillSupervisorSpec.IngestionSpec ingestionSpec = new TestBackfillSupervisorSpec.IngestionSpec(ioConfig); + final SeekableStreamSupervisorSpec sourceSpec = new TestBackfillSupervisorSpec("original-id", ingestionSpec); + + final BoundedStreamConfig boundedStreamConfig = new BoundedStreamConfig( + ImmutableMap.of("0", 100L), + ImmutableMap.of("0", 200L) + ); + + // Without overriding taskCount + final SupervisorSpec backfillSpec = sourceSpec.createBackfillSpec("backfill-id", boundedStreamConfig, null); + Assert.assertEquals("backfill-id", backfillSpec.getId()); + final TestBackfillSupervisorSpec backfillCast = (TestBackfillSupervisorSpec) backfillSpec; + final BoundedStreamConfig actualConfig = backfillCast.getIoConfig().getBoundedStreamConfig(); + Assert.assertNotNull(actualConfig); + Assert.assertEquals(ImmutableMap.of("0", 100L), actualConfig.getStartSequenceNumbers()); + Assert.assertEquals(ImmutableMap.of("0", 200L), actualConfig.getEndSequenceNumbers()); + Assert.assertEquals(1, backfillCast.getIoConfig().getTaskCount()); + + // With overriding taskCount + final SupervisorSpec backfillSpecWithCount = sourceSpec.createBackfillSpec("backfill-id-2", boundedStreamConfig, 5); + Assert.assertEquals("backfill-id-2", backfillSpecWithCount.getId()); + final TestBackfillSupervisorSpec backfillWithCount = (TestBackfillSupervisorSpec) backfillSpecWithCount; + Assert.assertEquals(5, backfillWithCount.getIoConfig().getTaskCount()); + } + private static class TestSupervisorSpec implements SupervisorSpec { private final String id; @@ -1137,4 +1328,103 @@ public List getDataSources() return Collections.singletonList(id); } } + + @JsonTypeName("testBackfill") + private static class TestBackfillSupervisorSpec extends SeekableStreamSupervisorSpec + { + @JsonCreator + TestBackfillSupervisorSpec( + @JsonProperty("id") String id, + @JsonProperty("spec") IngestionSpec ingestionSpec + ) + { + super( + id, + ingestionSpec, + ImmutableMap.of("useConcurrentLocks", true), + false, + null, null, null, null, + MAPPER, + null, null, null, null + ); + } + + @Override + public Supervisor createSupervisor() + { + return null; + } + + @Override + public String getType() + { + return "testBackfill"; + } + + @Override + public String getSource() + { + return "test-stream"; + } + + @Override + protected SeekableStreamSupervisorSpec toggleSuspend(boolean suspend) + { + return this; + } + + @Override + public SeekableStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + return new TestBackfillSupervisorSpec( + backfillId, + new IngestionSpec(new IOConfig(getIoConfig().getStream(), taskCount, boundedStreamConfig)) + ); + } + + @Override + public SeekableStreamSupervisorIOConfig getIoConfig() + { + return getSpec().getIOConfig(); + } + + @JsonTypeName("testBackfillIngestionSpec") + static class IngestionSpec extends SeekableStreamSupervisorIngestionSpec + { + @JsonCreator + IngestionSpec( + @JsonProperty("ioConfig") IOConfig ioConfig + ) + { + super( + new DataSchema( + "testDS", + new TimestampSpec("time", "auto", null), + new DimensionsSpec(Collections.emptyList()), + null, null, null, null, null + ), + ioConfig, + null + ); + } + } + + @JsonTypeName("testBackfillIOConfig") + static class IOConfig extends SeekableStreamSupervisorIOConfig + { + @JsonCreator + IOConfig( + @JsonProperty("stream") String stream, + @JsonProperty("taskCount") Integer taskCount, + @JsonProperty("boundedStreamConfig") BoundedStreamConfig boundedStreamConfig + ) + { + super(stream, null, 1, taskCount, null, null, null, false, null, null, null, null, LagAggregator.DEFAULT, null, null, null, null, boundedStreamConfig); + } + } + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResourceTest.java index 4ccf4659994f..31e0d604a222 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResourceTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorResourceTest.java @@ -34,6 +34,7 @@ import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskClientFactory; import org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers; import org.apache.druid.indexing.seekablestream.TestSeekableStreamDataSourceMetadata; +import org.apache.druid.indexing.seekablestream.supervisor.BoundedStreamConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIOConfig; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIngestionSpec; import org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec; @@ -1379,6 +1380,100 @@ public void testResetOffsets() verifyAll(); } + @Test + public void testResetToLatestAndBackfill() + { + // 200 - success + EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)); + EasyMock.expect(supervisorManager.getSupervisorIds()).andReturn(ImmutableSet.of("my-id")); + EasyMock.expect(supervisorManager.resetToLatestAndBackfill("my-id", null)) + .andReturn(ImmutableMap.of("id", "my-id", "backfillSupervisorId", "my-id_backfill_abcdefgh")); + replayAll(); + + Response response = supervisorResource.resetToLatestAndBackfill("my-id", null); + Assert.assertEquals(200, response.getStatus()); + Assert.assertEquals( + ImmutableMap.of("id", "my-id", "backfillSupervisorId", "my-id_backfill_abcdefgh"), + response.getEntity() + ); + verifyAll(); + resetAll(); + + // 404 - supervisor does not exist + EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)); + EasyMock.expect(supervisorManager.getSupervisorIds()).andReturn(ImmutableSet.of()); + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", null); + Assert.assertEquals(404, response.getStatus()); + verifyAll(); + resetAll(); + + // 400 - IAE (e.g. supervisor not running) + EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)); + EasyMock.expect(supervisorManager.getSupervisorIds()).andReturn(ImmutableSet.of("my-id")); + EasyMock.expect(supervisorManager.resetToLatestAndBackfill("my-id", null)) + .andThrow(new IllegalArgumentException("Supervisor[my-id] must be in a RUNNING state")); + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", null); + Assert.assertEquals(400, response.getStatus()); + Assert.assertEquals( + ImmutableMap.of("error", "Supervisor[my-id] must be in a RUNNING state"), + response.getEntity() + ); + verifyAll(); + resetAll(); + + // 500 - ISE (e.g. failed to retrieve offsets) + EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)); + EasyMock.expect(supervisorManager.getSupervisorIds()).andReturn(ImmutableSet.of("my-id")); + EasyMock.expect(supervisorManager.resetToLatestAndBackfill("my-id", null)) + .andThrow(new IllegalStateException("Failed to get latest offsets from stream")); + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", null); + Assert.assertEquals(500, response.getStatus()); + Assert.assertEquals( + ImmutableMap.of("error", "Failed to get latest offsets from stream"), + response.getEntity() + ); + verifyAll(); + resetAll(); + + // 400 - invalid backfillTaskCount (zero) + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", 0); + Assert.assertEquals(400, response.getStatus()); + Assert.assertEquals( + ImmutableMap.of("error", "backfillTaskCount must be a positive integer"), + response.getEntity() + ); + verifyAll(); + resetAll(); + + // 400 - invalid backfillTaskCount (negative) + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", -1); + Assert.assertEquals(400, response.getStatus()); + Assert.assertEquals( + ImmutableMap.of("error", "backfillTaskCount must be a positive integer"), + response.getEntity() + ); + verifyAll(); + resetAll(); + + // 503 - no supervisor manager (not leader) + EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.absent()); + replayAll(); + + response = supervisorResource.resetToLatestAndBackfill("my-id", null); + Assert.assertEquals(503, response.getStatus()); + verifyAll(); + } + @Test public void testNoopSupervisorSpecSerde() throws Exception { @@ -1668,6 +1763,16 @@ protected SeekableStreamSupervisorSpec toggleSuspend(boolean suspend) return null; } + @Override + public SeekableStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + return null; + } + @JsonIgnore @Nonnull @Override diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpecTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpecTest.java index 8d1b5350e8ce..80120d07fdf5 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpecTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorSpecTest.java @@ -942,6 +942,16 @@ protected SeekableStreamSupervisorSpec toggleSuspend(boolean suspend) return null; } + @Override + public SeekableStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + return null; + } + @Override public String getType() { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java index d61049777c8f..9e45920ad719 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java @@ -3059,7 +3059,7 @@ public String toString() final TestSeekableStreamSupervisor supervisor = new TestSeekableStreamSupervisor() { @Override - protected SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( + public SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( String stream, Map map ) @@ -3284,7 +3284,7 @@ protected String baseTaskName() } @Override - protected void updatePartitionLagFromStream() + public void updatePartitionLagFromStream() { // do nothing } @@ -3381,7 +3381,7 @@ protected boolean doesTaskMatchSupervisor(Task task) } @Override - protected SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( + public SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( String stream, Map map ) @@ -3521,7 +3521,7 @@ public LagStats computeLagStats() } @Override - protected Map getLatestSequencesFromStream() + public Map getLatestSequencesFromStream() { return streamOffsets; } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorTestBase.java index 4eefaed9bd99..c96a64211b97 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorTestBase.java @@ -124,7 +124,7 @@ protected String baseTaskName() } @Override - protected void updatePartitionLagFromStream() + public void updatePartitionLagFromStream() { // do nothing } @@ -205,7 +205,7 @@ protected boolean doesTaskMatchSupervisor(Task task) } @Override - protected SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( + public SeekableStreamDataSourceMetadata createDataSourceMetaDataForReset( String stream, Map map ) @@ -436,6 +436,16 @@ protected SeekableStreamSupervisorSpec toggleSuspend(boolean suspend) { return null; } + + @Override + public SeekableStreamSupervisorSpec createBackfillSpec( + String backfillId, + BoundedStreamConfig boundedStreamConfig, + @Nullable Integer taskCount + ) + { + return null; + } } protected static SeekableStreamSupervisorTuningConfig getTuningConfig() diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncherRetryTest.java b/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncherRetryTest.java index 0ca643f109f7..35488be081b1 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncherRetryTest.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncherRetryTest.java @@ -322,6 +322,12 @@ public ListenableFuture> terminateSupervisor(String supervis throw new UOE("Not implemented"); } + @Override + public ListenableFuture> resetToLatestAndBackfill(String supervisorId) + { + throw new UOE("Not implemented"); + } + @Override public ListenableFuture> supervisorStatuses() { diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java b/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java index 81fccf19f131..2b1ad6a555a7 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/NoopOverlordClient.java @@ -114,6 +114,12 @@ public ListenableFuture> terminateSupervisor(String supervis throw new UnsupportedOperationException(); } + @Override + public ListenableFuture> resetToLatestAndBackfill(String supervisorId) + { + throw new UnsupportedOperationException(); + } + @Override public ListenableFuture> supervisorStatuses() { diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java index c4d348997779..baf7e4297c9d 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClient.java @@ -197,6 +197,15 @@ ListenableFuture> taskStatuses( */ ListenableFuture> terminateSupervisor(String supervisorId); + /** + * Resets a supervisor to the latest stream offsets and starts a bounded backfill supervisor. + *

+ * API: {@code POST /druid/indexer/v1/supervisor//resetToLatestAndBackfill} + * + * @return Map containing "id" and "backfillSupervisorId" + */ + ListenableFuture> resetToLatestAndBackfill(String supervisorId); + /** * Returns all current supervisor statuses. */ diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java index 0499a62f090a..3657d8b83a6f 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java @@ -265,6 +265,23 @@ public ListenableFuture> terminateSupervisor(String supervis ); } + @Override + public ListenableFuture> resetToLatestAndBackfill(String supervisorId) + { + final String path = StringUtils.format( + "/druid/indexer/v1/supervisor/%s/resetToLatestAndBackfill", + StringUtils.urlEncode(supervisorId) + ); + + return FutureUtils.transform( + client.asyncRequest( + new RequestBuilder(HttpMethod.POST, path), + new BytesFullResponseHandler() + ), + holder -> JacksonUtils.readValue(jsonMapper, holder.getContent(), new TypeReference<>() {}) + ); + } + @Override public ListenableFuture> supervisorStatuses() { diff --git a/services/src/test/java/org/apache/druid/testing/embedded/EmbeddedClusterApis.java b/services/src/test/java/org/apache/druid/testing/embedded/EmbeddedClusterApis.java index 257533aecbd0..6ae8750b8d8e 100644 --- a/services/src/test/java/org/apache/druid/testing/embedded/EmbeddedClusterApis.java +++ b/services/src/test/java/org/apache/druid/testing/embedded/EmbeddedClusterApis.java @@ -430,6 +430,16 @@ public String postSupervisor(SupervisorSpec supervisor) return onLeaderOverlord(o -> o.postSupervisor(supervisor)).get("id"); } + /** + * Resets a supervisor to the latest stream offsets and starts a bounded backfill supervisor. + * + * @return Map containing "id" and "backfillSupervisorId" + */ + public Map resetToLatestAndBackfill(String supervisorId) + { + return onLeaderOverlord(o -> o.resetToLatestAndBackfill(supervisorId)); + } + /** * Fetches the current status of the given supervisor ID. */