diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/DartControllerContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/DartControllerContext.java index a4a4ff8945a4..52936b8e0f87 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/DartControllerContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/DartControllerContext.java @@ -254,4 +254,10 @@ public int targetPartitionsPerWorker() DEFAULT_TARGET_PARTITIONS_PER_WORKER ); } + + @Override + public boolean isDebug() + { + return context.isDebug(); + } } diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java index 6aaddba48d5a..bde9f8968a0d 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java @@ -274,6 +274,12 @@ public boolean includeAllCounters() return true; } + @Override + public boolean isDebug() + { + return queryContext.isDebug(); + } + @Override public DruidNode selfNode() { diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerContext.java index 2d14c10788b2..e12c5c3b3362 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerContext.java @@ -135,4 +135,9 @@ default File taskTempDir() * shuffle specs that have {@link ShuffleSpec#isAdjustable()} set to true. */ int targetPartitionsPerWorker(); + + /** + * Whether the controller should log full stack traces on error. + */ + boolean isDebug(); } diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index f616db7a032a..83568629be1d 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -487,11 +487,11 @@ private MSQTaskReportPayload runInternal(final QueryListener queryListener, fina // Log the errors we encountered. if (controllerError != null) { - log.warn("Controller: %s", MSQTasks.errorReportToLogMessage(controllerError)); + log.warn("Controller: %s", MSQTasks.errorReportToLogMessage(controllerError, context.isDebug())); } if (workerError != null) { - log.warn("Worker: %s", MSQTasks.errorReportToLogMessage(workerError)); + log.warn("Worker: %s", MSQTasks.errorReportToLogMessage(workerError, context.isDebug())); } } if (queryKernel != null && queryKernel.isSuccess()) { diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java index b47124aef6b4..ea47ded0c922 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java @@ -203,9 +203,10 @@ static MSQErrorReport makeErrorReport( } /** - * Returns a string form of a {@link MSQErrorReport} suitable for logging. + * Returns a string form of a {@link MSQErrorReport} suitable for logging. When {@code forceFullStackTrace} is true, + * the full stack trace is always included (when available), regardless of fault type. */ - static String errorReportToLogMessage(final MSQErrorReport errorReport) + static String errorReportToLogMessage(final MSQErrorReport errorReport, final boolean forceFullStackTrace) { final StringBuilder logMessage = new StringBuilder("Work failed"); @@ -222,7 +223,7 @@ static String errorReportToLogMessage(final MSQErrorReport errorReport) logMessage.append(": ").append(MSQFaultUtils.generateMessageWithErrorCode(errorReport.getFault())); if (errorReport.getExceptionStackTrace() != null) { - if (logFullStackTrace(errorReport.getFault())) { + if (forceFullStackTrace || logFullStackTrace(errorReport.getFault())) { logMessage.append('\n').append(errorReport.getExceptionStackTrace()); } else { // Log first line only (error class, message) for known faults, to avoid polluting logs. diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerContext.java index 61c1dd9654e3..9e696a16cce6 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerContext.java @@ -116,6 +116,11 @@ public interface WorkerContext extends Closeable */ boolean includeAllCounters(); + /** + * Whether to log full stack traces for all errors. + */ + boolean isDebug(); + @Override void close(); } diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java index 0b3bfdc235ae..fa7d5c232ce6 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java @@ -195,7 +195,7 @@ public void run() if (maybeErrorReport.isPresent()) { final MSQErrorReport errorReport = maybeErrorReport.get(); - final String logMessage = MSQTasks.errorReportToLogMessage(errorReport); + final String logMessage = MSQTasks.errorReportToLogMessage(errorReport, context.isDebug()); log.warn("%s", logMessage); // Inform controller of any errors that occur, unless we were canceled. This prevents attempting to contact diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java index cd91376e5c9f..18aebe0bcd21 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java @@ -259,6 +259,12 @@ public int targetPartitionsPerWorker() ); } + @Override + public boolean isDebug() + { + return taskQuerySpecContext.isDebug(); + } + /** * Helper method for {@link #queryKernelConfig(MSQSpec)}. Also used in tests. */ diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java index 1e445f5e2c9b..3a50cdc71f20 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java @@ -93,6 +93,7 @@ public class IndexerWorkerContext implements WorkerContext private final int maxConcurrentStages; private final boolean liveReportCounters; private final boolean includeAllCounters; + private final boolean debug; private final int threadCount; // Written under synchronized(this) using double-checked locking. @@ -134,6 +135,7 @@ public IndexerWorkerContext( ); this.liveReportCounters = MultiStageQueryContext.getLiveReportCounters(queryContext, DEFAULT_LIVE_REPORT_COUNTERS); this.includeAllCounters = MultiStageQueryContext.getIncludeAllCounters(queryContext); + this.debug = queryContext.isDebug(); // Compute thread count once in constructor final int baseThreadCount = memoryIntrospector.numProcessingThreads(); @@ -327,6 +329,12 @@ public boolean includeAllCounters() return includeAllCounters; } + @Override + public boolean isDebug() + { + return debug; + } + public ServiceLocator controllerLocator() { return controllerLocator; diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/DruidExceptionFault.java b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/DruidExceptionFault.java index 32dded914ee3..d58578e7f29f 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/DruidExceptionFault.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/DruidExceptionFault.java @@ -106,6 +106,7 @@ public DruidException toDruidException() return DruidException.forPersona(personaEnum) .ofCategory(categoryEnum) .withErrorCode(druidErrorCode) + .wasDeserialized() .build(getErrorMessage()) .withContext(context); } diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestControllerContext.java b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestControllerContext.java index 5bae51c53e3f..6f09c43ff028 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestControllerContext.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestControllerContext.java @@ -485,6 +485,12 @@ public int targetPartitionsPerWorker() return 1; } + @Override + public boolean isDebug() + { + return true; + } + @Override public ControllerContext newContext(QueryContext context) { diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java index a926aabee6f7..a8773635484c 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java @@ -206,6 +206,12 @@ public boolean includeAllCounters() return true; } + @Override + public boolean isDebug() + { + return true; + } + @Override public void close() { diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/test/TestDartControllerContextFactoryImpl.java b/multi-stage-query/src/test/java/org/apache/druid/msq/test/TestDartControllerContextFactoryImpl.java index 6032afc853bc..c3eaf6e9c9eb 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/test/TestDartControllerContextFactoryImpl.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/test/TestDartControllerContextFactoryImpl.java @@ -108,6 +108,12 @@ public void emitMetric(MSQMetricEventBuilder metricBuilder) { serviceEmitter.emit(metricBuilder.build("controller", queryId())); } + + @Override + public boolean isDebug() + { + return true; + } }; } diff --git a/processing/src/main/java/org/apache/druid/error/DruidException.java b/processing/src/main/java/org/apache/druid/error/DruidException.java index 4883c8115627..54382f5274b6 100644 --- a/processing/src/main/java/org/apache/druid/error/DruidException.java +++ b/processing/src/main/java/org/apache/druid/error/DruidException.java @@ -477,7 +477,7 @@ public DruidExceptionBuilder ofCategory(Category category) * * @return the builder */ - DruidExceptionBuilder wasDeserialized() + public DruidExceptionBuilder wasDeserialized() { this.deserialized = true; return this;