-
Notifications
You must be signed in to change notification settings - Fork 25.3k
Log stack traces on data nodes before they are cleared for transport #125732
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 7 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
fdaba56
Debug log stack trace before it is cleared
benchaplin 79a06aa
Update docs/changelog/125732.yaml
benchaplin e38d20c
Just set log level to debug
benchaplin 6cb677f
Reword log message, pass lighter params, address test fixes
benchaplin 78d58a4
Fix tests
benchaplin b34afc1
Log at warn level for server errors, update tests
benchaplin 9f527eb
Clarify tests
benchaplin 8eca23f
Merge branch 'main' into log_data_node_failures
benchaplin 93eddcc
Add task ID to log
benchaplin 47751a8
Rename tests, grab numShards differently
benchaplin b4c5baa
[CI] Auto commit changes from spotless
elasticsearchmachine 93e4625
Merge branch 'main' into log_data_node_failures
benchaplin File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 125732 | ||
summary: Log stack traces on data nodes before they are cleared for transport | ||
area: Search | ||
type: bug | ||
issues: [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -156,6 +156,7 @@ | |
import java.util.function.Supplier; | ||
|
||
import static org.elasticsearch.TransportVersions.ERROR_TRACE_IN_TRANSPORT_HEADER; | ||
import static org.elasticsearch.common.Strings.format; | ||
import static org.elasticsearch.core.TimeValue.timeValueHours; | ||
import static org.elasticsearch.core.TimeValue.timeValueMillis; | ||
import static org.elasticsearch.core.TimeValue.timeValueMinutes; | ||
|
@@ -538,12 +539,16 @@ protected void doClose() { | |
* @param <T> the type of the response | ||
* @param listener the action listener to be wrapped | ||
* @param version channel version of the request | ||
* @param nodeId id of the current node | ||
* @param shardId id of the shard being searched | ||
* @param threadPool with context where to write the new header | ||
* @return the wrapped action listener | ||
*/ | ||
static <T> ActionListener<T> maybeWrapListenerForStackTrace( | ||
ActionListener<T> listener, | ||
TransportVersion version, | ||
String nodeId, | ||
ShardId shardId, | ||
ThreadPool threadPool | ||
) { | ||
boolean header = true; | ||
|
@@ -552,6 +557,16 @@ static <T> ActionListener<T> maybeWrapListenerForStackTrace( | |
} | ||
if (header == false) { | ||
javanna marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return listener.delegateResponse((l, e) -> { | ||
org.apache.logging.log4j.util.Supplier<String> messageSupplier = () -> format( | ||
"[%s]%s: failed to execute search request", | ||
nodeId, | ||
shardId | ||
); | ||
if (ExceptionsHelper.status(e).getStatus() < 500 || ExceptionsHelper.isNodeOrShardUnavailableTypeException(e)) { | ||
javanna marked this conversation as resolved.
Show resolved
Hide resolved
|
||
logger.debug(messageSupplier, e); | ||
} else { | ||
logger.warn(messageSupplier, e); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 😍 |
||
ExceptionsHelper.unwrapCausesAndSuppressed(e, err -> { | ||
err.setStackTrace(EMPTY_STACK_TRACE_ARRAY); | ||
return false; | ||
|
@@ -563,7 +578,13 @@ static <T> ActionListener<T> maybeWrapListenerForStackTrace( | |
} | ||
|
||
public void executeDfsPhase(ShardSearchRequest request, SearchShardTask task, ActionListener<SearchPhaseResult> listener) { | ||
listener = maybeWrapListenerForStackTrace(listener, request.getChannelVersion(), threadPool); | ||
listener = maybeWrapListenerForStackTrace( | ||
listener, | ||
request.getChannelVersion(), | ||
clusterService.localNode().getId(), | ||
request.shardId(), | ||
threadPool | ||
); | ||
final IndexShard shard = getShard(request); | ||
rewriteAndFetchShardRequest(shard, request, listener.delegateFailure((l, rewritten) -> { | ||
// fork the execution in the search thread pool | ||
|
@@ -607,7 +628,13 @@ public void executeQueryPhase(ShardSearchRequest request, CancellableTask task, | |
rewriteAndFetchShardRequest( | ||
shard, | ||
request, | ||
maybeWrapListenerForStackTrace(listener, request.getChannelVersion(), threadPool).delegateFailure((l, orig) -> { | ||
maybeWrapListenerForStackTrace( | ||
listener, | ||
request.getChannelVersion(), | ||
clusterService.localNode().getId(), | ||
request.shardId(), | ||
threadPool | ||
).delegateFailure((l, orig) -> { | ||
// check if we can shortcut the query phase entirely. | ||
if (orig.canReturnNullResponseIfMatchNoDocs()) { | ||
assert orig.scroll() == null; | ||
|
@@ -805,9 +832,15 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, Cancella | |
} | ||
|
||
public void executeRankFeaturePhase(RankFeatureShardRequest request, SearchShardTask task, ActionListener<RankFeatureResult> listener) { | ||
listener = maybeWrapListenerForStackTrace(listener, request.getShardSearchRequest().getChannelVersion(), threadPool); | ||
final ReaderContext readerContext = findReaderContext(request.contextId(), request); | ||
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.getShardSearchRequest()); | ||
listener = maybeWrapListenerForStackTrace( | ||
listener, | ||
shardSearchRequest.getChannelVersion(), | ||
clusterService.localNode().getId(), | ||
shardSearchRequest.shardId(), | ||
threadPool | ||
); | ||
final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(shardSearchRequest)); | ||
runAsync(getExecutor(readerContext.indexShard()), () -> { | ||
try (SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.RANK_FEATURE, false)) { | ||
|
@@ -856,8 +889,14 @@ public void executeQueryPhase( | |
ActionListener<ScrollQuerySearchResult> listener, | ||
TransportVersion version | ||
) { | ||
listener = maybeWrapListenerForStackTrace(listener, version, threadPool); | ||
final LegacyReaderContext readerContext = (LegacyReaderContext) findReaderContext(request.contextId(), request); | ||
listener = maybeWrapListenerForStackTrace( | ||
listener, | ||
version, | ||
clusterService.localNode().getId(), | ||
readerContext.indexShard().shardId(), | ||
threadPool | ||
); | ||
final Releasable markAsUsed; | ||
try { | ||
markAsUsed = readerContext.markAsUsed(getScrollKeepAlive(request.scroll())); | ||
|
@@ -905,9 +944,15 @@ public void executeQueryPhase( | |
ActionListener<QuerySearchResult> listener, | ||
TransportVersion version | ||
) { | ||
listener = maybeWrapListenerForStackTrace(listener, version, threadPool); | ||
final ReaderContext readerContext = findReaderContext(request.contextId(), request.shardSearchRequest()); | ||
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.shardSearchRequest()); | ||
listener = maybeWrapListenerForStackTrace( | ||
listener, | ||
version, | ||
clusterService.localNode().getId(), | ||
shardSearchRequest.shardId(), | ||
threadPool | ||
); | ||
final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(shardSearchRequest)); | ||
rewriteAndFetchShardRequest(readerContext.indexShard(), shardSearchRequest, listener.delegateFailure((l, rewritten) -> { | ||
// fork the execution in the search thread pool | ||
|
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.