Skip to content

Commit 2d563de

Browse files
authored
By default dont allow index_hadoop tasks to run on a cluster, forcing operators to acknowledge that they are using a deprecated feature (apache#18239)
* By default dont allow index_hadoop tasks to run on a cluster, forcing operators to acknolwedge that they are using a deprecated feature * update unclear recommendation from log * Fixup codeql warning * fix UT
1 parent edf2162 commit 2d563de

5 files changed

Lines changed: 88 additions & 5 deletions

File tree

docs/configuration/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1414,6 +1414,7 @@ Additional Peon configs include:
14141414
|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks using the [Druid input source](../ingestion/input-sources.md) will ignore the provided timestampSpec, and will use the `__time` column of the input datasource. This option is provided for compatibility with ingestion specs written before Druid 0.22.0.|false|
14151415
|`druid.indexer.task.storeEmptyColumns`|Boolean value for whether or not to store empty columns during ingestion. When set to true, Druid stores every column specified in the [`dimensionsSpec`](../ingestion/ingestion-spec.md#dimensionsspec). If you use the string-based schemaless ingestion and don't specify any dimensions to ingest, you must also set [`includeAllDimensions`](../ingestion/ingestion-spec.md#dimensionsspec) for Druid to store empty columns.<br/><br/>If you set `storeEmptyColumns` to false, Druid SQL queries referencing empty columns will fail. If you intend to leave `storeEmptyColumns` disabled, you should either ingest placeholder data for empty columns or else not query on empty columns.<br/><br/>You can overwrite this configuration by setting `storeEmptyColumns` in the [task context](../ingestion/tasks.md#context-parameters).|true|
14161416
|`druid.indexer.task.tmpStorageBytesPerTask`|Maximum number of bytes per task to be used to store temporary files on disk. This config is generally intended for internal usage. Attempts to set it are very likely to be overwritten by the TaskRunner that executes the task, so be sure of what you expect to happen before directly adjusting this configuration parameter. The config is documented here primarily to provide an understanding of what it means if/when someone sees that it has been set. A value of -1 disables this limit. |-1|
1417+
|`druid.indexer.task.allowHadoopTaskExecution`|Conditional dictating if the cluster allows `index_hadoop` tasks to be executed. `index_hadoop` is deprecated, and defaulting to false will force cluster operators to acknowledge the deprecation and consciously opt in to using index_hadoop with the understanding that it will be removed in the future.|false|
14171418
|`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests served by a task's chat handler. Set to 0 to disable limiting.|0|
14181419

14191420
If the Peon is running in remote mode, there must be an Overlord up and running. Peons in remote mode can set the following configurations:

indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopIndexTask.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,22 @@ public TaskStatus runTask(TaskToolbox toolbox)
295295
{
296296
try {
297297
taskConfig = toolbox.getConfig();
298+
if (!taskConfig.isAllowHadoopTaskExecution()) {
299+
errorMsg = StringUtils.format(
300+
"Hadoop tasks are deprecated and will be removed in a future release. "
301+
+ "Currently, they are not allowed to run on this cluster. If you wish to run them despite deprecation, "
302+
+ "please set [%s] to true.",
303+
TaskConfig.ALLOW_HADOOP_TASK_EXECUTION_KEY
304+
);
305+
log.error(errorMsg);
306+
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
307+
return TaskStatus.failure(getId(), errorMsg);
308+
}
309+
log.warn("Running deprecated index_hadoop task [%s]. "
310+
+ "Hadoop batch indexing is deprecated and will be removed in a future release. "
311+
+ "Please plan your migration to one of Druid's supported indexing patterns.",
312+
getId()
313+
);
298314
if (chatHandlerProvider.isPresent()) {
299315
log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName());
300316
chatHandlerProvider.get().register(getId(), this, false);

indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopIndexTaskTest.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
import com.google.common.collect.ImmutableList;
2424
import com.google.common.collect.ImmutableMap;
2525
import org.apache.druid.indexer.granularity.UniformGranularitySpec;
26+
import org.apache.druid.indexing.common.TaskToolbox;
27+
import org.apache.druid.indexing.common.config.TaskConfigBuilder;
28+
import org.apache.druid.indexing.overlord.TestTaskToolboxFactory;
2629
import org.apache.druid.jackson.DefaultObjectMapper;
2730
import org.apache.druid.java.util.common.Intervals;
2831
import org.apache.druid.java.util.common.granularity.Granularities;
@@ -41,6 +44,45 @@ public class HadoopIndexTaskTest
4144
{
4245
private final ObjectMapper jsonMapper = new DefaultObjectMapper();
4346

47+
@Test
48+
public void testHadoopTaskWontRunWithDefaultTaskConfig()
49+
{
50+
final HadoopIndexTask task = new HadoopIndexTask(
51+
null,
52+
new HadoopIngestionSpec(
53+
DataSchema.builder()
54+
.withDataSource("foo")
55+
.withGranularity(
56+
new UniformGranularitySpec(
57+
Granularities.DAY,
58+
null,
59+
ImmutableList.of(Intervals.of("2010-01-01/P1D"))
60+
)
61+
)
62+
.withObjectMapper(jsonMapper)
63+
.build(),
64+
new HadoopIOConfig(ImmutableMap.of("paths", "bar"), null, null),
65+
null
66+
),
67+
null,
68+
null,
69+
"blah",
70+
jsonMapper,
71+
null,
72+
AuthTestUtils.TEST_AUTHORIZER_MAPPER,
73+
null,
74+
new HadoopTaskConfig(null, null)
75+
);
76+
77+
TestTaskToolboxFactory.Builder builder = new TestTaskToolboxFactory.Builder().setConfig(new TaskConfigBuilder().build());
78+
TaskToolbox toolbox = new TestTaskToolboxFactory(builder).build(task);
79+
80+
Assert.assertEquals("Hadoop tasks are deprecated and will be removed in a future release. Currently, "
81+
+ "they are not allowed to run on this cluster. If you wish to run them despite deprecation, "
82+
+ "please set [druid.indexer.task.allowHadoopTaskExecution] to true.",
83+
task.runTask(toolbox).getErrorMsg());
84+
}
85+
4486
@Test
4587
public void testCorrectInputSourceResources()
4688
{

indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
*/
4242
public class TaskConfig implements TaskDirectory
4343
{
44+
public static final String ALLOW_HADOOP_TASK_EXECUTION_KEY = "druid.indexer.task.allowHadoopTaskExecution";
4445
private static final Period DEFAULT_DIRECTORY_LOCK_TIMEOUT = new Period("PT10M");
4546
private static final Period DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT = new Period("PT5M");
4647
private static final boolean DEFAULT_STORE_EMPTY_COLUMNS = true;
@@ -76,6 +77,9 @@ public class TaskConfig implements TaskDirectory
7677
@JsonProperty
7778
private final long tmpStorageBytesPerTask;
7879

80+
@JsonProperty
81+
private final boolean allowHadoopTaskExecution;
82+
7983
@JsonCreator
8084
public TaskConfig(
8185
@JsonProperty("baseDir") String baseDir,
@@ -87,7 +91,8 @@ public TaskConfig(
8791
@JsonProperty("ignoreTimestampSpecForDruidInputSource") boolean ignoreTimestampSpecForDruidInputSource,
8892
@JsonProperty("storeEmptyColumns") @Nullable Boolean storeEmptyColumns,
8993
@JsonProperty("encapsulatedTask") boolean enableTaskLevelLogPush,
90-
@JsonProperty("tmpStorageBytesPerTask") @Nullable Long tmpStorageBytesPerTask
94+
@JsonProperty("tmpStorageBytesPerTask") @Nullable Long tmpStorageBytesPerTask,
95+
@JsonProperty("allowHadoopTaskExecution") boolean allowHadoopTaskExecution
9196
)
9297
{
9398
this.baseDir = Configs.valueOrDefault(baseDir, System.getProperty("java.io.tmpdir"));
@@ -113,6 +118,7 @@ public TaskConfig(
113118

114119
this.storeEmptyColumns = Configs.valueOrDefault(storeEmptyColumns, DEFAULT_STORE_EMPTY_COLUMNS);
115120
this.tmpStorageBytesPerTask = Configs.valueOrDefault(tmpStorageBytesPerTask, DEFAULT_TMP_STORAGE_BYTES_PER_TASK);
121+
this.allowHadoopTaskExecution = allowHadoopTaskExecution;
116122
}
117123

118124
private TaskConfig(
@@ -125,7 +131,8 @@ private TaskConfig(
125131
boolean ignoreTimestampSpecForDruidInputSource,
126132
boolean storeEmptyColumns,
127133
boolean encapsulatedTask,
128-
long tmpStorageBytesPerTask
134+
long tmpStorageBytesPerTask,
135+
boolean allowHadoopTaskExecution
129136
)
130137
{
131138
this.baseDir = baseDir;
@@ -138,6 +145,7 @@ private TaskConfig(
138145
this.storeEmptyColumns = storeEmptyColumns;
139146
this.encapsulatedTask = encapsulatedTask;
140147
this.tmpStorageBytesPerTask = tmpStorageBytesPerTask;
148+
this.allowHadoopTaskExecution = allowHadoopTaskExecution;
141149
}
142150

143151
@JsonProperty
@@ -230,6 +238,12 @@ public long getTmpStorageBytesPerTask()
230238
return tmpStorageBytesPerTask;
231239
}
232240

241+
@JsonProperty
242+
public boolean isAllowHadoopTaskExecution()
243+
{
244+
return allowHadoopTaskExecution;
245+
}
246+
233247
private String defaultDir(@Nullable String configParameter, final String defaultVal)
234248
{
235249
if (configParameter == null) {
@@ -251,7 +265,8 @@ public TaskConfig withBaseTaskDir(File baseTaskDir)
251265
ignoreTimestampSpecForDruidInputSource,
252266
storeEmptyColumns,
253267
encapsulatedTask,
254-
tmpStorageBytesPerTask
268+
tmpStorageBytesPerTask,
269+
allowHadoopTaskExecution
255270
);
256271
}
257272

@@ -267,7 +282,8 @@ public TaskConfig withTmpStorageBytesPerTask(long tmpStorageBytesPerTask)
267282
ignoreTimestampSpecForDruidInputSource,
268283
storeEmptyColumns,
269284
encapsulatedTask,
270-
tmpStorageBytesPerTask
285+
tmpStorageBytesPerTask,
286+
allowHadoopTaskExecution
271287
);
272288
}
273289
}

indexing-service/src/test/java/org/apache/druid/indexing/common/config/TaskConfigBuilder.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public class TaskConfigBuilder
3636
private Boolean storeEmptyColumns;
3737
private boolean enableTaskLevelLogPush;
3838
private Long tmpStorageBytesPerTask;
39+
private boolean allowHadoopTaskExecution;
3940

4041
public TaskConfigBuilder setBaseDir(String baseDir)
4142
{
@@ -97,6 +98,12 @@ public TaskConfigBuilder setTmpStorageBytesPerTask(Long tmpStorageBytesPerTask)
9798
return this;
9899
}
99100

101+
public TaskConfigBuilder setAllowHadoopTaskExecution(boolean allowHadoopTaskExecution)
102+
{
103+
this.allowHadoopTaskExecution = allowHadoopTaskExecution;
104+
return this;
105+
}
106+
100107
public TaskConfig build()
101108
{
102109
return new TaskConfig(
@@ -109,7 +116,8 @@ public TaskConfig build()
109116
ignoreTimestampSpecForDruidInputSource,
110117
storeEmptyColumns,
111118
enableTaskLevelLogPush,
112-
tmpStorageBytesPerTask
119+
tmpStorageBytesPerTask,
120+
allowHadoopTaskExecution
113121
);
114122
}
115123
}

0 commit comments

Comments
 (0)