elastic · benchaplin · Mar 24, 2025 · Mar 26, 2025 · Mar 27, 2025 · Mar 27, 2025
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -467,7 +467,7 @@ expensive messages that will usually be discarded:
 
 Logging is an important behaviour of the system and sometimes deserves its own
 unit tests, especially if there is complex logic for computing what is logged
-and when to log it. You can use a `org.elasticsearch.test.MockLogAppender` to
+and when to log it. You can use a `org.elasticsearch.test.MockLog` to
 make assertions about the logs that are being emitted.
 
 Logging is a powerful diagnostic technique, but it is not the only possibility.

diff --git a/docs/changelog/125732.yaml b/docs/changelog/125732.yaml
@@ -0,0 +1,5 @@
+pr: 125732
+summary: Log stack traces on data nodes before they are cleared for transport
+area: Search
+type: bug
+issues: []
diff --git a/...oke-test-http/src/internalClusterTest/java/org/elasticsearch/http/SearchErrorTraceIT.java b/...oke-test-http/src/internalClusterTest/java/org/elasticsearch/http/SearchErrorTraceIT.java
@@ -11,16 +11,25 @@
 
 import org.apache.http.entity.ContentType;
 import org.apache.http.nio.entity.NByteArrayEntity;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.core.config.Configurator;
 import org.elasticsearch.action.search.MultiSearchRequest;
 import org.elasticsearch.action.search.SearchRequest;
 import org.elasticsearch.client.Request;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.CollectionUtils;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.search.ErrorTraceHelper;
+import org.elasticsearch.search.SearchService;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.elasticsearch.test.MockLog;
 import org.elasticsearch.test.transport.MockTransportService;
 import org.elasticsearch.xcontent.XContentType;
+import org.junit.AfterClass;
 import org.junit.Before;
+import org.junit.BeforeClass;
 
 import java.io.IOException;
 import java.nio.charset.Charset;
@@ -32,24 +41,41 @@
 public class SearchErrorTraceIT extends HttpSmokeTestCase {
     private BooleanSupplier hasStackTrace;
 
+    private static final String loggerName = "org.elasticsearch.search.SearchService";
+    private static Level originalLogLevel;
+
     @Override
     protected Collection<Class<? extends Plugin>> nodePlugins() {
         return CollectionUtils.appendToCopyNoNullElements(super.nodePlugins(), MockTransportService.TestPlugin.class);
     }
 
+    @BeforeClass
+    public static void setDebugLogLevel() {
+        originalLogLevel = LogManager.getLogger(loggerName).getLevel();
+        Configurator.setLevel(loggerName, Level.DEBUG);
+    }
+
+    @AfterClass
+    public static void resetLogLevel() {
+        Configurator.setLevel(loggerName, originalLogLevel);
+    }
+
     @Before
     public void setupMessageListener() {
         hasStackTrace = ErrorTraceHelper.setupErrorTraceListener(internalCluster());
     }
 
-    private void setupIndexWithDocs() {
-        createIndex("test1", "test2");
+    private int setupIndexWithDocs() {
+        int numShards = between(DEFAULT_MIN_NUM_SHARDS, DEFAULT_MAX_NUM_SHARDS);
+        createIndex("test1", Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards).build());
+        createIndex("test2", Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards).build());
         indexRandom(
             true,
             prepareIndex("test1").setId("1").setSource("field", "foo"),
             prepareIndex("test2").setId("10").setSource("field", 5)
         );
         refresh();
+        return numShards;
     }
 
     public void testSearchFailingQueryErrorTraceDefault() throws IOException {
@@ -108,6 +134,80 @@ public void testSearchFailingQueryErrorTraceFalse() throws IOException {
         assertFalse(hasStackTrace.getAsBoolean());
     }
 
+    public void testLoggingInSearchFailingQueryErrorTraceDefault() throws IOException {
+        int numShards = setupIndexWithDocs();
+
+        Request searchRequest = new Request("POST", "/_search");
+        searchRequest.setJsonEntity("""
+            {
+                "query": {
+                    "simple_query_string" : {
+                        "query": "foo",
+                        "fields": ["field"]
+                    }
+                }
+            }
+            """);
+
+        String errorTriggeringIndex = "test2";
+        try (var mockLog = MockLog.capture(SearchService.class)) {
+            ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
+
+            getRestClient().performRequest(searchRequest);
+            mockLog.assertAllExpectationsMatched();
+        }
+    }
+
+    public void testNoLoggingInSearchFailingQueryErrorTraceTrue() throws IOException {
+        int numShards = setupIndexWithDocs();
+
+        Request searchRequest = new Request("POST", "/_search");
+        searchRequest.setJsonEntity("""
+            {
+                "query": {
+                    "simple_query_string" : {
+                        "query": "foo",
+                        "fields": ["field"]
+                    }
+                }
+            }
+            """);
+
+        String errorTriggeringIndex = "test2";
+        try (var mockLog = MockLog.capture(SearchService.class)) {
+            ErrorTraceHelper.addUnseenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
+
+            searchRequest.addParameter("error_trace", "true");
+            getRestClient().performRequest(searchRequest);
+            mockLog.assertAllExpectationsMatched();
+        }
+    }
+
+    public void testLoggingInSearchFailingQueryErrorTraceFalse() throws IOException {
+        int numShards = setupIndexWithDocs();
+
+        Request searchRequest = new Request("POST", "/_search");
+        searchRequest.setJsonEntity("""
+            {
+                "query": {
+                    "simple_query_string" : {
+                        "query": "foo",
+                        "fields": ["field"]
+                    }
+                }
+            }
+            """);
+
+        String errorTriggeringIndex = "test2";
+        try (var mockLog = MockLog.capture(SearchService.class)) {
+            ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
+
+            searchRequest.addParameter("error_trace", "false");
+            getRestClient().performRequest(searchRequest);
+            mockLog.assertAllExpectationsMatched();
+        }
+    }
+
     public void testMultiSearchFailingQueryErrorTraceDefault() throws IOException {
         setupIndexWithDocs();
 
@@ -158,4 +258,73 @@ public void testMultiSearchFailingQueryErrorTraceFalse() throws IOException {
 
         assertFalse(hasStackTrace.getAsBoolean());
     }
+
+    public void testLoggingInMultiSearchFailingQueryErrorTraceDefault() throws IOException {
+        int numShards = setupIndexWithDocs();
+
+        XContentType contentType = XContentType.JSON;
+        MultiSearchRequest multiSearchRequest = new MultiSearchRequest().add(
+            new SearchRequest("test*").source(new SearchSourceBuilder().query(simpleQueryStringQuery("foo").field("field")))
+        );
+        Request searchRequest = new Request("POST", "/_msearch");
+        byte[] requestBody = MultiSearchRequest.writeMultiLineFormat(multiSearchRequest, contentType.xContent());
+        searchRequest.setEntity(
+            new NByteArrayEntity(requestBody, ContentType.create(contentType.mediaTypeWithoutParameters(), (Charset) null))
+        );
+
+        String errorTriggeringIndex = "test2";
+        try (var mockLog = MockLog.capture(SearchService.class)) {
+            ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
+
+            getRestClient().performRequest(searchRequest);
+            mockLog.assertAllExpectationsMatched();
+        }
+    }
+
+    public void testLoggingInMultiSearchFailingQueryErrorTraceTrue() throws IOException {
+        int numShards = setupIndexWithDocs();
+
+        XContentType contentType = XContentType.JSON;
+        MultiSearchRequest multiSearchRequest = new MultiSearchRequest().add(
+            new SearchRequest("test*").source(new SearchSourceBuilder().query(simpleQueryStringQuery("foo").field("field")))
+        );
+        Request searchRequest = new Request("POST", "/_msearch");
+        byte[] requestBody = MultiSearchRequest.writeMultiLineFormat(multiSearchRequest, contentType.xContent());
+        searchRequest.setEntity(
+            new NByteArrayEntity(requestBody, ContentType.create(contentType.mediaTypeWithoutParameters(), (Charset) null))
+        );
+
+        searchRequest.addParameter("error_trace", "true");
+
+        String errorTriggeringIndex = "test2";
+        try (var mockLog = MockLog.capture(SearchService.class)) {
+            ErrorTraceHelper.addUnseenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
+
+            getRestClient().performRequest(searchRequest);
+            mockLog.assertAllExpectationsMatched();
+        }
+    }
+
+    public void testLoggingInMultiSearchFailingQueryErrorTraceFalse() throws IOException {
+        int numShards = setupIndexWithDocs();
+
+        XContentType contentType = XContentType.JSON;
+        MultiSearchRequest multiSearchRequest = new MultiSearchRequest().add(
+            new SearchRequest("test*").source(new SearchSourceBuilder().query(simpleQueryStringQuery("foo").field("field")))
+        );
+        Request searchRequest = new Request("POST", "/_msearch");
+        byte[] requestBody = MultiSearchRequest.writeMultiLineFormat(multiSearchRequest, contentType.xContent());
+        searchRequest.setEntity(
+            new NByteArrayEntity(requestBody, ContentType.create(contentType.mediaTypeWithoutParameters(), (Charset) null))
+        );
+        searchRequest.addParameter("error_trace", "false");
+
+        String errorTriggeringIndex = "test2";
+        try (var mockLog = MockLog.capture(SearchService.class)) {
+            ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
+
+            getRestClient().performRequest(searchRequest);
+            mockLog.assertAllExpectationsMatched();
+        }
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java b/server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java
@@ -456,8 +456,7 @@ public static void registerRequestHandler(TransportService transportService, Sea
             (request, channel, task) -> searchService.executeQueryPhase(
                 request,
                 (SearchShardTask) task,
-                new ChannelActionListener<>(channel),
-                channel.getVersion()
+                new ChannelActionListener<>(channel)
             )
         );
         TransportActionProxy.registerProxyAction(transportService, QUERY_ID_ACTION_NAME, true, QuerySearchResult::new);
@@ -469,8 +468,7 @@ public static void registerRequestHandler(TransportService transportService, Sea
             (request, channel, task) -> searchService.executeQueryPhase(
                 request,
                 (SearchShardTask) task,
-                new ChannelActionListener<>(channel),
-                channel.getVersion()
+                new ChannelActionListener<>(channel)
             )
         );
         TransportActionProxy.registerProxyAction(transportService, QUERY_SCROLL_ACTION_NAME, true, ScrollQuerySearchResult::new);

diff --git a/server/src/main/java/org/elasticsearch/search/SearchService.java b/server/src/main/java/org/elasticsearch/search/SearchService.java
@@ -18,7 +18,6 @@
 import org.apache.lucene.search.TopDocs;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ExceptionsHelper;
-import org.elasticsearch.TransportVersion;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.ActionRunnable;
 import org.elasticsearch.action.ResolvedIndices;
@@ -156,6 +155,7 @@
 import java.util.function.Supplier;
 
 import static org.elasticsearch.TransportVersions.ERROR_TRACE_IN_TRANSPORT_HEADER;
+import static org.elasticsearch.common.Strings.format;
 import static org.elasticsearch.core.TimeValue.timeValueHours;
 import static org.elasticsearch.core.TimeValue.timeValueMillis;
 import static org.elasticsearch.core.TimeValue.timeValueMinutes;
@@ -537,21 +537,27 @@ protected void doClose() {
      *
      * @param <T>            the type of the response
      * @param listener       the action listener to be wrapped
-     * @param version        channel version of the request
+     * @param request        the shard request being executed
      * @param threadPool     with context where to write the new header
+     * @param clusterService the cluster service
      * @return the wrapped action listener
      */
     static <T> ActionListener<T> maybeWrapListenerForStackTrace(
         ActionListener<T> listener,
-        TransportVersion version,
-        ThreadPool threadPool
+        ShardSearchRequest request,
+        ThreadPool threadPool,
+        ClusterService clusterService
     ) {
         boolean header = true;
-        if (version.onOrAfter(ERROR_TRACE_IN_TRANSPORT_HEADER) && threadPool.getThreadContext() != null) {
+        if (request.getChannelVersion().onOrAfter(ERROR_TRACE_IN_TRANSPORT_HEADER) && threadPool.getThreadContext() != null) {
             header = Boolean.parseBoolean(threadPool.getThreadContext().getHeaderOrDefault("error_trace", "false"));
         }
         if (header == false) {
             return listener.delegateResponse((l, e) -> {
+                logger.debug(
+                    () -> format("[%s]%s Clearing stack trace before transport:", clusterService.localNode().getId(), request.shardId()),
+                    e
+                );
                 ExceptionsHelper.unwrapCausesAndSuppressed(e, err -> {
                     err.setStackTrace(EMPTY_STACK_TRACE_ARRAY);
                     return false;
@@ -563,7 +569,7 @@ static <T> ActionListener<T> maybeWrapListenerForStackTrace(
     }
 
     public void executeDfsPhase(ShardSearchRequest request, SearchShardTask task, ActionListener<SearchPhaseResult> listener) {
-        listener = maybeWrapListenerForStackTrace(listener, request.getChannelVersion(), threadPool);
+        listener = maybeWrapListenerForStackTrace(listener, request, threadPool, clusterService);
         final IndexShard shard = getShard(request);
         rewriteAndFetchShardRequest(shard, request, listener.delegateFailure((l, rewritten) -> {
             // fork the execution in the search thread pool
@@ -607,7 +613,7 @@ public void executeQueryPhase(ShardSearchRequest request, CancellableTask task,
         rewriteAndFetchShardRequest(
             shard,
             request,
-            maybeWrapListenerForStackTrace(listener, request.getChannelVersion(), threadPool).delegateFailure((l, orig) -> {
+            maybeWrapListenerForStackTrace(listener, request, threadPool, clusterService).delegateFailure((l, orig) -> {
                 // check if we can shortcut the query phase entirely.
                 if (orig.canReturnNullResponseIfMatchNoDocs()) {
                     assert orig.scroll() == null;
@@ -805,9 +811,9 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, Cancella
     }
 
     public void executeRankFeaturePhase(RankFeatureShardRequest request, SearchShardTask task, ActionListener<RankFeatureResult> listener) {
-        listener = maybeWrapListenerForStackTrace(listener, request.getShardSearchRequest().getChannelVersion(), threadPool);
         final ReaderContext readerContext = findReaderContext(request.contextId(), request);
         final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.getShardSearchRequest());
+        listener = maybeWrapListenerForStackTrace(listener, shardSearchRequest, threadPool, clusterService);
         final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(shardSearchRequest));
         runAsync(getExecutor(readerContext.indexShard()), () -> {
             try (SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.RANK_FEATURE, false)) {
@@ -853,11 +859,11 @@ private QueryFetchSearchResult executeFetchPhase(ReaderContext reader, SearchCon
     public void executeQueryPhase(
         InternalScrollSearchRequest request,
         SearchShardTask task,
-        ActionListener<ScrollQuerySearchResult> listener,
-        TransportVersion version
+        ActionListener<ScrollQuerySearchResult> listener
     ) {
-        listener = maybeWrapListenerForStackTrace(listener, version, threadPool);
         final LegacyReaderContext readerContext = (LegacyReaderContext) findReaderContext(request.contextId(), request);
+        final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(null);
+        listener = maybeWrapListenerForStackTrace(listener, shardSearchRequest, threadPool, clusterService);
         final Releasable markAsUsed;
         try {
             markAsUsed = readerContext.markAsUsed(getScrollKeepAlive(request.scroll()));
@@ -867,7 +873,6 @@ public void executeQueryPhase(
             throw e;
         }
         runAsync(getExecutor(readerContext.indexShard()), () -> {
-            final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(null);
             try (SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.QUERY, false);) {
                 var opsListener = searchContext.indexShard().getSearchOperationListener();
                 final long beforeQueryTime = System.nanoTime();
@@ -899,15 +904,10 @@ public void executeQueryPhase(
      * It is the responsibility of the caller to ensure that the ref count is correctly decremented
      * when the object is no longer needed.
      */
-    public void executeQueryPhase(
-        QuerySearchRequest request,
-        SearchShardTask task,
-        ActionListener<QuerySearchResult> listener,
-        TransportVersion version
-    ) {
-        listener = maybeWrapListenerForStackTrace(listener, version, threadPool);
+    public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task, ActionListener<QuerySearchResult> listener) {
         final ReaderContext readerContext = findReaderContext(request.contextId(), request.shardSearchRequest());
         final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.shardSearchRequest());
+        listener = maybeWrapListenerForStackTrace(listener, shardSearchRequest, threadPool, clusterService);
         final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(shardSearchRequest));
         rewriteAndFetchShardRequest(readerContext.indexShard(), shardSearchRequest, listener.delegateFailure((l, rewritten) -> {
             // fork the execution in the search thread pool

diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java
@@ -2630,8 +2630,7 @@ public void testDfsQueryPhaseRewrite() {
         service.executeQueryPhase(
             new QuerySearchRequest(null, context.id(), request, new AggregatedDfs(Map.of(), Map.of(), 10)),
             new SearchShardTask(42L, "", "", "", null, emptyMap()),
-            plainActionFuture,
-            TransportVersion.current()
+            plainActionFuture
         );
 
         plainActionFuture.actionGet();