|
21 | 21 | import io.quarkus.search.app.quarkiverseio.QuarkiverseIO; |
22 | 22 | import io.quarkus.search.app.quarkusio.QuarkusIO; |
23 | 23 |
|
| 24 | +import io.quarkus.logging.Log; |
| 25 | + |
24 | 26 | import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension; |
| 27 | +import org.hibernate.search.backend.elasticsearch.search.query.ElasticsearchSearchResult; |
25 | 28 | import org.hibernate.search.engine.search.common.BooleanOperator; |
26 | 29 | import org.hibernate.search.engine.search.common.ValueModel; |
27 | 30 | import org.hibernate.search.engine.search.predicate.dsl.MatchPredicateOptionsStep; |
28 | 31 | import org.hibernate.search.engine.search.predicate.dsl.PredicateFinalStep; |
29 | 32 | import org.hibernate.search.engine.search.predicate.dsl.SearchPredicateFactory; |
30 | 33 | import org.hibernate.search.engine.search.predicate.dsl.SimpleQueryFlag; |
31 | 34 | import org.hibernate.search.mapper.pojo.standalone.mapping.SearchMapping; |
| 35 | +import org.hibernate.search.mapper.pojo.standalone.session.SearchSession; |
32 | 36 |
|
33 | 37 | import org.eclipse.microprofile.openapi.annotations.Operation; |
34 | 38 | import org.jboss.resteasy.reactive.RestQuery; |
35 | 39 |
|
| 40 | +import com.google.gson.JsonArray; |
36 | 41 | import com.google.gson.JsonObject; |
37 | 42 |
|
38 | 43 | @ApplicationScoped |
@@ -62,79 +67,97 @@ public SearchResult<GuideSearchHit> search(@RestQuery @DefaultValue(QuarkusVersi |
62 | 67 | @RestQuery @DefaultValue("1") @Min(0) @Max(value = 10, message = MAX_FOR_PERF_MESSAGE) int contentSnippets, |
63 | 68 | @RestQuery @DefaultValue("100") @Min(0) @Max(value = 200, message = MAX_FOR_PERF_MESSAGE) int contentSnippetsLength) { |
64 | 69 | try (var session = searchMapping.createSession()) { |
65 | | - var result = session.search(Guide.class) |
66 | | - .extension(ElasticsearchExtension.get()) |
67 | | - .select(f -> f.composite().from( |
68 | | - f.id(), |
69 | | - f.field("type"), |
70 | | - f.field("origin"), |
71 | | - f.highlight(language.addSuffix("title")).highlighter("highlighter_title_or_summary").single(), |
72 | | - f.highlight(language.addSuffix("summary")).highlighter("highlighter_title_or_summary").single(), |
73 | | - f.highlight(language.addSuffix("fullContent")).highlighter("highlighter_content")) |
74 | | - .asList(GuideSearchHit::new)) |
75 | | - .where((f, root) -> { |
76 | | - // Match all documents by default |
77 | | - root.add(f.matchAll()); |
78 | | - |
79 | | - if (categories != null && !categories.isEmpty()) { |
80 | | - root.add(f.terms().field("categories").matchingAny(categories)); |
81 | | - } |
82 | | - |
83 | | - if (origin != null && !origin.isEmpty()) { |
84 | | - root.add(f.match().field("origin").matching(origin)); |
85 | | - } |
86 | | - |
87 | | - if (q != null && !q.isBlank()) { |
88 | | - root.add(f.or( |
89 | | - // Duplicate the query so that we apply a multiplicative boost to quarkus.io guides. |
90 | | - // The end result is that a low-relevance match on quarkus.io _can_ be scored |
91 | | - // lower than a high-relevance match on quarkiverse.io, |
92 | | - // if it's significantly more relevant. |
93 | | - // Note that we could, alternatively, |
94 | | - // do something like bool().must(textMatch()).should(origin(quarkusio).boost(2f))), |
95 | | - // but then the boost would be additive, so we would ignore relative relevance |
96 | | - // of quarkus.io/quarkiverse.io results. |
97 | | - f.bool().must(textMatch(f, q, language)) |
98 | | - .filter(originMatch(f, QuarkusIO.QUARKUS_ORIGIN)) |
99 | | - // Always score lower for compatibility (legacy) guides. |
100 | | - // TODO: Maybe we should use a duplicate query with multiplicative boost for this too? |
101 | | - .should(f.not(f.match().field(language.addSuffix("topics")) |
102 | | - .matching("compatibility", ValueModel.INDEX)) |
103 | | - .boost(50.0f)) |
104 | | - .boost(2.0f), |
105 | | - f.bool().must(textMatch(f, q, language)) |
106 | | - .filter(originMatch(f, QuarkiverseIO.QUARKIVERSE_ORIGIN)))); |
107 | | - } |
108 | | - }) |
109 | | - .highlighter(f -> f.fastVector() |
110 | | - // Highlighters are going to use spans-with-classes so that we will have more control over styling the visual on the search results screen. |
111 | | - .tag("<span class=\"" + highlightCssClass + "\">", "</span>")) |
112 | | - .highlighter("highlighter_title_or_summary", f -> f.fastVector() |
113 | | - // We want the whole text of the field, regardless of whether it has a match or not. |
114 | | - .noMatchSize(TITLE_OR_SUMMARY_MAX_SIZE) |
115 | | - .fragmentSize(TITLE_OR_SUMMARY_MAX_SIZE) |
116 | | - // We want the whole text as a single fragment |
117 | | - .numberOfFragments(1)) |
118 | | - .highlighter("highlighter_content", f -> f.fastVector() |
119 | | - // If there's no match in the full content we don't want to return anything. |
120 | | - .noMatchSize(0) |
121 | | - // Content is really huge, so we want to only get small parts of the sentences. |
122 | | - // We give control to the caller on the content snippet length and the number of these fragments |
123 | | - .numberOfFragments(contentSnippets) |
124 | | - .fragmentSize(contentSnippetsLength) |
125 | | - // The rest of fragment configuration is static |
126 | | - .orderByScore(true) |
127 | | - // We don't use sentence boundaries because those can result in huge fragments |
128 | | - .boundaryScanner().chars().boundaryMaxScan(10).end()) |
129 | | - .sort(f -> f.score().then().field(language.addSuffix("title_sort"))) |
130 | | - .routing(QuarkusVersionAndLanguageRoutingBinder.searchKeys(version, language)) |
131 | | - .totalHitCountThreshold(TOTAL_HIT_COUNT_THRESHOLD + (page + 1) * PAGE_SIZE) |
132 | | - .requestTransformer(context -> requestSuggestion(context.body(), q, language, highlightCssClass)) |
133 | | - .fetch(page * PAGE_SIZE, PAGE_SIZE); |
134 | | - return new SearchResult<>(result); |
| 70 | + var result = performSearch(version, categories, q, origin, language, highlightCssClass, page, contentSnippets, |
| 71 | + contentSnippetsLength, session); |
| 72 | + if (result.total().hitCount() > 0) { |
| 73 | + return new SearchResult<>(result); |
| 74 | + } else { |
| 75 | + SearchResult.Suggestion suggestion = extractSuggestion(result); |
| 76 | + if (suggestion != null) { |
| 77 | + result = performSearch(version, categories, suggestion.query(), origin, language, highlightCssClass, page, |
| 78 | + contentSnippets, contentSnippetsLength, session); |
| 79 | + } |
| 80 | + return new SearchResult<>(result, suggestion); |
| 81 | + } |
135 | 82 | } |
136 | 83 | } |
137 | 84 |
|
| 85 | + private ElasticsearchSearchResult<GuideSearchHit> performSearch(String version, List<String> categories, String q, |
| 86 | + String origin, Language language, String highlightCssClass, int page, int contentSnippets, |
| 87 | + int contentSnippetsLength, SearchSession session) { |
| 88 | + return session.search(Guide.class) |
| 89 | + .extension(ElasticsearchExtension.get()) |
| 90 | + .select(f -> f.composite().from( |
| 91 | + f.id(), |
| 92 | + f.field("type"), |
| 93 | + f.field("origin"), |
| 94 | + f.highlight(language.addSuffix("title")).highlighter("highlighter_title_or_summary").single(), |
| 95 | + f.highlight(language.addSuffix("summary")).highlighter("highlighter_title_or_summary").single(), |
| 96 | + f.highlight(language.addSuffix("fullContent")).highlighter("highlighter_content")) |
| 97 | + .asList(GuideSearchHit::new)) |
| 98 | + .where((f, root) -> { |
| 99 | + // Match all documents by default |
| 100 | + root.add(f.matchAll()); |
| 101 | + |
| 102 | + if (categories != null && !categories.isEmpty()) { |
| 103 | + root.add(f.terms().field("categories").matchingAny(categories)); |
| 104 | + } |
| 105 | + |
| 106 | + if (origin != null && !origin.isEmpty()) { |
| 107 | + root.add(f.match().field("origin").matching(origin)); |
| 108 | + } |
| 109 | + |
| 110 | + if (q != null && !q.isBlank()) { |
| 111 | + root.add(f.or( |
| 112 | + // Duplicate the query so that we apply a multiplicative boost to quarkus.io guides. |
| 113 | + // The end result is that a low-relevance match on quarkus.io _can_ be scored |
| 114 | + // lower than a high-relevance match on quarkiverse.io, |
| 115 | + // if it's significantly more relevant. |
| 116 | + // Note that we could, alternatively, |
| 117 | + // do something like bool().must(textMatch()).should(origin(quarkusio).boost(2f))), |
| 118 | + // but then the boost would be additive, so we would ignore relative relevance |
| 119 | + // of quarkus.io/quarkiverse.io results. |
| 120 | + f.bool().must(textMatch(f, q, language)) |
| 121 | + .filter(originMatch(f, QuarkusIO.QUARKUS_ORIGIN)) |
| 122 | + // Always score lower for compatibility (legacy) guides. |
| 123 | + // TODO: Maybe we should use a duplicate query with multiplicative boost for this too? |
| 124 | + .should(f.not(f.match().field(language.addSuffix("topics")) |
| 125 | + .matching("compatibility", ValueModel.INDEX)) |
| 126 | + .boost(50.0f)) |
| 127 | + .boost(2.0f), |
| 128 | + f.bool().must(textMatch(f, q, language)) |
| 129 | + .filter(originMatch(f, QuarkiverseIO.QUARKIVERSE_ORIGIN)))); |
| 130 | + } |
| 131 | + }) |
| 132 | + .highlighter(f -> f.fastVector() |
| 133 | + // Highlighters are going to use spans-with-classes so that we will have more control over styling the visual on the search results screen. |
| 134 | + .tag("<span class=\"" + highlightCssClass + "\">", "</span>")) |
| 135 | + .highlighter( |
| 136 | + "highlighter_title_or_summary", f -> f.fastVector() |
| 137 | + // We want the whole text of the field, regardless of whether it has a match or not. |
| 138 | + .noMatchSize(TITLE_OR_SUMMARY_MAX_SIZE) |
| 139 | + .fragmentSize(TITLE_OR_SUMMARY_MAX_SIZE) |
| 140 | + // We want the whole text as a single fragment |
| 141 | + .numberOfFragments(1)) |
| 142 | + .highlighter( |
| 143 | + "highlighter_content", f -> f.fastVector() |
| 144 | + // If there's no match in the full content we don't want to return anything. |
| 145 | + .noMatchSize(0) |
| 146 | + // Content is really huge, so we want to only get small parts of the sentences. |
| 147 | + // We give control to the caller on the content snippet length and the number of these fragments |
| 148 | + .numberOfFragments(contentSnippets) |
| 149 | + .fragmentSize(contentSnippetsLength) |
| 150 | + // The rest of fragment configuration is static |
| 151 | + .orderByScore(true) |
| 152 | + // We don't use sentence boundaries because those can result in huge fragments |
| 153 | + .boundaryScanner().chars().boundaryMaxScan(10).end()) |
| 154 | + .sort(f -> f.score().then().field(language.addSuffix("title_sort"))) |
| 155 | + .routing(QuarkusVersionAndLanguageRoutingBinder.searchKeys(version, language)) |
| 156 | + .totalHitCountThreshold(TOTAL_HIT_COUNT_THRESHOLD + (page + 1) * PAGE_SIZE) |
| 157 | + .requestTransformer(context -> requestSuggestion(context.body(), q, language, highlightCssClass)) |
| 158 | + .fetch(page * PAGE_SIZE, PAGE_SIZE); |
| 159 | + } |
| 160 | + |
138 | 161 | private PredicateFinalStep textMatch(SearchPredicateFactory f, String q, Language language) { |
139 | 162 | return f.simpleQueryString() |
140 | 163 | .field(language.addSuffix("title")).boost(10.0f) |
@@ -178,4 +201,25 @@ private void requestSuggestion(JsonObject payload, String q, Language language, |
178 | 201 | highlight.addProperty("post_tag", "</span>"); |
179 | 202 | } |
180 | 203 |
|
| 204 | + private static SearchResult.Suggestion extractSuggestion(ElasticsearchSearchResult<?> result) { |
| 205 | + try { |
| 206 | + JsonObject suggest = result.responseBody().getAsJsonObject("suggest"); |
| 207 | + if (suggest != null) { |
| 208 | + JsonArray options = suggest |
| 209 | + .getAsJsonArray("didYouMean") |
| 210 | + .get(0).getAsJsonObject() |
| 211 | + .getAsJsonArray("options"); |
| 212 | + if (options != null && !options.isEmpty()) { |
| 213 | + JsonObject suggestion = options.get(0).getAsJsonObject(); |
| 214 | + return new SearchResult.Suggestion(suggestion.get("text").getAsString(), |
| 215 | + suggestion.get("highlighted").getAsString()); |
| 216 | + } |
| 217 | + } |
| 218 | + } catch (RuntimeException e) { |
| 219 | + // Though it shouldn't happen, just in case we will catch any exceptions and return no suggestions: |
| 220 | + Log.warnf(e, "Failed to extract suggestion: %s" + e.getMessage()); |
| 221 | + } |
| 222 | + return null; |
| 223 | + } |
| 224 | + |
181 | 225 | } |
0 commit comments