Open
Description
Describe the bug
When I try to use the GQL query searchAcrossEntities for more then 900 entities, a NullPointerException in Datahub GMS is thrown.
To Reproduce
Steps to reproduce the behavior:
- Go to https://{datahubDomain}/api/graphiql
- Execute
searchAcrossEntities
with:
query {
searchAcrossEntities(
input: {
types: [DATASET, DASHBOARD],
query: "*",
start:0,
count:900
}
)
{
searchResults{
entity {
type
... on Dataset{
lastIngested
name
platform{
name
}
urn
}
},
entity{
type
... on Dashboard{
properties{
name
}
lastIngested
platform{
name
}
urn
}
}
}
}
}
- See error
(5. Same error withscrollAcrossEntities
removing start option)
Expected behavior
We should get the entities if count is less then 10.000 .
Desktop (please complete the following information):
- OS: Ubuntu 20.04.5 LTS
- Version 1.0.0
- Docker dev
Additional context
datahub-gms | 2025-03-28 12:52:01,289 [ForkJoinPool.commonPool-worker-2282] INFO c.datahub.graphql.GraphQLController:143 - Executing operation graphql for qtp765326134-2060
datahub-gms | 2025-03-28 12:52:01,290 [ForkJoinPool.commonPool-worker-2282] INFO c.l.d.g.i.DataHubFieldComplexityCalculator:38 - Query complexity for query: searchAcrossEntities is 17
datahub-gms | 2025-03-28 12:52:01,464 [ForkJoinPool.commonPool-worker-2282] ERROR c.l.m.s.e.query.ESSearchDAO:165 - Search query failed
datahub-gms | java.lang.NullPointerException: null
datahub-gms | 2025-03-28 12:52:01,465 [ForkJoinPool.commonPool-worker-2282] ERROR c.l.d.g.r.s.SearchAcrossEntitiesResolver:119 - Failed to execute search for multiple entities: entity types [DATASET, DASHBOARD], query *, filters: null, start: 0, count: 900
datahub-gms | 2025-03-28 12:52:01,465 [ForkJoinPool.commonPool-worker-2282] ERROR c.l.d.g.e.DataHubDataFetcherExceptionHandler:45 - Failed to execute
datahub-gms | java.util.concurrent.CompletionException: java.lang.RuntimeException: Failed to execute search: entity types [DATASET, DASHBOARD], query *, filters: null, start: 0, count: 900
datahub-gms | at java.base/java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:315)
datahub-gms | at java.base/java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:320)
datahub-gms | at java.base/java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1770)
datahub-gms | at java.base/java.util.concurrent.CompletableFuture$AsyncSupply.exec(CompletableFuture.java:1760)
datahub-gms | at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
datahub-gms | at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.helpAsyncBlocker(ForkJoinPool.java:1253)
datahub-gms | at java.base/java.util.concurrent.ForkJoinPool.helpAsyncBlocker(ForkJoinPool.java:2237)
datahub-gms | at java.base/java.util.concurrent.CompletableFuture.waitingGet(CompletableFuture.java:1887)
datahub-gms | at java.base/java.util.concurrent.CompletableFuture.join(CompletableFuture.java:2117)
datahub-gms | at graphql.GraphQL.execute(GraphQL.java:366)
datahub-gms | at com.linkedin.datahub.graphql.GraphQLEngine.execute(GraphQLEngine.java:119)
datahub-gms | at com.datahub.graphql.GraphQLController.lambda$postGraphQL$0(GraphQLController.java:149)
datahub-gms | at java.base/java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1768)
datahub-gms | at java.base/java.util.concurrent.CompletableFuture$AsyncSupply.exec(CompletableFuture.java:1760)
datahub-gms | at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
datahub-gms | at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1182)
datahub-gms | at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1655)
datahub-gms | at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1622)
datahub-gms | at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:165)
datahub-gms | Caused by: java.lang.RuntimeException: Failed to execute search: entity types [DATASET, DASHBOARD], query *, filters: null, start: 0, count: 900
datahub-gms | at com.linkedin.datahub.graphql.resolvers.search.SearchAcrossEntitiesResolver.lambda$get$1(SearchAcrossEntitiesResolver.java:128)
datahub-gms | at java.base/java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1768)
datahub-gms | ... 16 common frames omitted
datahub-gms | Caused by: com.datahub.util.exception.ESQueryException: Search query failed:
datahub-gms | at com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO.lambda$executeAndExtract$1(ESSearchDAO.java:166)
datahub-gms | at io.datahubproject.metadata.context.TraceContext.withSpan(TraceContext.java:110)
datahub-gms | at io.datahubproject.metadata.context.OperationContext.withSpan(OperationContext.java:391)
datahub-gms | at com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO.executeAndExtract(ESSearchDAO.java:147)
datahub-gms | at com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO.search(ESSearchDAO.java:338)
datahub-gms | at com.linkedin.metadata.search.elasticsearch.ElasticSearchService.search(ElasticSearchService.java:173)
datahub-gms | at com.linkedin.metadata.search.client.CachingEntitySearchService.getRawSearchResults(CachingEntitySearchService.java:359)
datahub-gms | at com.linkedin.metadata.search.client.CachingEntitySearchService.lambda$getCachedSearchResults$0(CachingEntitySearchService.java:160)
datahub-gms | at com.linkedin.metadata.search.cache.CacheableSearcher.lambda$getBatch$1(CacheableSearcher.java:123)
datahub-gms | at io.datahubproject.metadata.context.TraceContext.withSpan(TraceContext.java:110)
datahub-gms | at io.datahubproject.metadata.context.OperationContext.withSpan(OperationContext.java:391)
datahub-gms | at com.linkedin.metadata.search.cache.CacheableSearcher.getBatch(CacheableSearcher.java:94)
datahub-gms | at com.linkedin.metadata.search.cache.CacheableSearcher.lambda$getSearchResults$0(CacheableSearcher.java:56)
datahub-gms | at io.datahubproject.metadata.context.TraceContext.withSpan(TraceContext.java:110)
datahub-gms | at io.datahubproject.metadata.context.OperationContext.withSpan(OperationContext.java:391)
datahub-gms | at com.linkedin.metadata.search.cache.CacheableSearcher.getSearchResults(CacheableSearcher.java:46)
datahub-gms | at com.linkedin.metadata.search.client.CachingEntitySearchService.getCachedSearchResults(CachingEntitySearchService.java:179)
datahub-gms | at com.linkedin.metadata.search.client.CachingEntitySearchService.search(CachingEntitySearchService.java:68)
datahub-gms | at com.linkedin.metadata.search.SearchService.searchAcrossEntities(SearchService.java:152)
datahub-gms | at com.linkedin.metadata.client.JavaEntityClient.searchAcrossEntities(JavaEntityClient.java:490)
datahub-gms | at com.linkedin.datahub.graphql.resolvers.search.SearchAcrossEntitiesResolver.lambda$get$1(SearchAcrossEntitiesResolver.java:106)
datahub-gms | ... 17 common frames omitted
datahub-gms | Caused by: java.lang.NullPointerException: null
datahub-gms | 2025-03-28 12:52:01,465 [ForkJoinPool.commonPool-worker-2282] ERROR c.datahub.graphql.GraphQLController:153 - Errors while executing query: # Welcome to GraphiQL
datahub-gms | #
datahub-gms | # GraphiQL is an in-browser tool for writing, validating, and
datahub-gms | # testing GraphQL queries.
datahub-gms | #
datahub-gms | # Type queries into this side of the screen, and you will see intelligent
datahub-gms | # typeaheads aware of the current GraphQL type schema and live syntax and
datahub-gms | # validation errors highlighted within the text.
datahub-gms | #
datahub-gms | # GraphQL queries typically start with a "{" character. Lines that start
datahub-gms | # with a # are ignored.
datahub-gms | #
datahub-gms | # An example GraphQL query might look like:
datahub-gms | #
datahub-gms | # {
datahub-gms | # field(arg: "value") {
datahub-gms | # su..., result: {errors=[{message=An unknown error occurred., locations=[{line=33, column=15}], path=[searchAcrossEntities], extensions={code=500, type=SERVER_ERROR, classification=DataFetchingException}}], data={searchAcrossEntities=null}, extensions={tracing={version=1, startTime=2025-03-28T12:52:01.289812957Z, endTime=2025-03-28T12:52:01.465679570Z, duration=175868081, parsing={startOffset=348874, duration=302094}, validation={startOffset=625431, duration=267114}, execution={resolvers=[{path=[searchAcrossEntities], parentType=Query, returnType=SearchResults, fieldName=searchAcrossEntities, startOffset=1202111, duration=174345935}]}}}}, errors: [DataHubGraphQLError{path=[searchAcrossEntities], code=SERVER_ERROR, locations=[SourceLocation{line=33, column=15}]}]