Skip to content

Commit a5a62db

Browse files
committed
Larger snippets
1 parent 3228047 commit a5a62db

7 files changed

Lines changed: 158 additions & 204 deletions

File tree

common/src/java/com/github/oeuvres/alix/lucene/spans/DocSnipHiliter.java renamed to common/src/java/com/github/oeuvres/alix/lucene/spans/HiliteSnippets.java

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
* analysis. Tag injection is a linear merge that never re-parses HTML.
6565
* </p>
6666
*/
67-
public final class DocSnipHiliter
67+
public final class HiliteSnippets
6868
{
6969
/** Snippet opens; outermost at this offset. */
7070
private static final int SNIP_OPEN = 0;
@@ -110,7 +110,7 @@ public final class DocSnipHiliter
110110
* @throws IllegalArgumentException if {@code mergeGap} is negative
111111
* @throws NullPointerException if {@code searcher} or {@code spanQuery} is {@code null}
112112
*/
113-
public DocSnipHiliter(
113+
public HiliteSnippets(
114114
final IndexSearcher searcher,
115115
final SpanQuery spanQuery,
116116
final int mergeGap) throws IOException
@@ -175,7 +175,7 @@ public String highlight(final int docId, final String content) throws IOExceptio
175175
sb.append(content, cursor, offset);
176176
cursor = offset;
177177
}
178-
writeTag(sb, kind, snipOrd);
178+
writeTag(sb, kind, snipOrd, offset);
179179
}
180180
sb.append(content, cursor, content.length());
181181
return sb.toString();
@@ -208,9 +208,8 @@ private void emitSnippetEvents()
208208
if (snipCharStart < 0) {
209209
continue;
210210
}
211-
final int userOrd = snipOrd + 1;
212-
addEvent(snipCharStart, SNIP_OPEN, userOrd);
213-
addEvent(snipCharEnd, SNIP_CLOSE, userOrd);
211+
addEvent(snipCharStart, SNIP_OPEN, snipOrd);
212+
addEvent(snipCharEnd, SNIP_CLOSE, snipOrd);
214213
}
215214
}
216215

@@ -308,12 +307,21 @@ private void populateSnippets(
308307
/**
309308
* Writes one tag for the given event kind into the output buffer.
310309
*/
311-
private static void writeTag(final StringBuilder sb, final int kind, final int snipOrd)
310+
private static void writeTag(final StringBuilder sb, final int kind, final int snipOrd, final int offset)
312311
{
312+
final int snipAnchor = snipOrd + 1;
313313
switch (kind) {
314314
case SNIP_OPEN:
315-
sb.append("<wbr id=\"snippet").append(snipOrd)
316-
.append("\" class=\"hl-start\" data-hl=\"").append(snipOrd).append("\"/>");
315+
sb
316+
.append("<span")
317+
.append(" class=\"hl-anchor\"")
318+
.append(" data-hl=\"").append(snipAnchor).append("\"")
319+
.append(" id=\"snippet-").append(snipAnchor).append("\"")
320+
.append("></span>")
321+
.append("<wbr")
322+
.append(" class=\"hl-start\"")
323+
.append(" data-hl=\"").append(snipAnchor).append("\"")
324+
.append("/>");
317325
break;
318326
case MATCH_OPEN:
319327
sb.append("<mark class=\"term match\">");
@@ -328,7 +336,7 @@ private static void writeTag(final StringBuilder sb, final int kind, final int s
328336
sb.append("</mark>");
329337
break;
330338
case SNIP_CLOSE:
331-
sb.append("<wbr class=\"hl-end\" data-hl=\"").append(snipOrd).append("\"/>");
339+
sb.append("<wbr class=\"hl-end\" data-hl=\"").append(snipAnchor).append("\"/>");
332340
break;
333341
default:
334342
throw new AssertionError("unknown event kind: " + kind);

common/src/java/com/github/oeuvres/alix/lucene/spans/HtmlSnippets.java renamed to common/src/java/com/github/oeuvres/alix/lucene/spans/ResultsSnippets.java

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
* this renderer. This class is not thread-safe.
5656
* </p>
5757
*/
58-
public class HtmlSnippets implements SnippetsConsumer
58+
public class ResultsSnippets implements SnippetsConsumer
5959
{
6060
private final String contentFieldName;
6161
private final Detagger detagger = new Detagger(Set.of("i", "em"));
@@ -93,7 +93,7 @@ public class HtmlSnippets implements SnippetsConsumer
9393
* @throws IllegalArgumentException if {@code snipLimit < 0}
9494
* @throws NullPointerException if any non-primitive argument is {@code null}
9595
*/
96-
public HtmlSnippets(
96+
public ResultsSnippets(
9797
final Writer writer,
9898
final StoredFields storedFields,
9999
final String contentFieldName,
@@ -106,12 +106,14 @@ public HtmlSnippets(
106106
this.contentFieldName = Objects.requireNonNull(contentFieldName, "contentFieldName");
107107
this.rail = Objects.requireNonNull(rail, "rail");
108108
this.termWeights = Objects.requireNonNull(termWeights, "termWeights");
109-
if (snipLimit < 0) {
110-
throw new IllegalArgumentException("snipLimit=" + snipLimit + ", expected >= 0");
111-
}
112109
this.snipLimit = snipLimit;
113110
this.termDedup = new int[termWeights.length];
114-
this.topSnips = new TopArray(snipLimit);
111+
if (snipLimit > 0) {
112+
this.topSnips = new TopArray(snipLimit);
113+
}
114+
else {
115+
this.topSnips = null; // should be OK for docSnippets()
116+
}
115117
}
116118

117119
/**
@@ -131,7 +133,7 @@ public int ctx()
131133
* @param ctx context width in words
132134
* @return this instance
133135
*/
134-
public HtmlSnippets ctx(final int ctx)
136+
public ResultsSnippets ctx(final int ctx)
135137
{
136138
this.ctx = ctx;
137139
return this;
@@ -169,7 +171,7 @@ public String doclineFieldName()
169171
* @param doclineFieldName stored-field name, or {@code null}
170172
* @return this instance
171173
*/
172-
public HtmlSnippets doclineFieldName(final String doclineFieldName)
174+
public ResultsSnippets doclineFieldName(final String doclineFieldName)
173175
{
174176
this.doclineFieldName = doclineFieldName;
175177
return this;
@@ -183,28 +185,29 @@ public HtmlSnippets doclineFieldName(final String doclineFieldName)
183185
* @param docId Lucene document id
184186
* @throws IOException if the writer or stored-fields access fails
185187
*/
186-
public void docOpen(final int docId) throws IOException
188+
public void docOpen(final int docId, String css) throws IOException
187189
{
190+
if (css == null || css.isBlank()) {css=""; }
191+
else { css = " " + css;};
188192
doc = storedFields.document(docId);
189193
id = doc.get(ALIX_ID);
190194
writer.append("<article")
191195
.append(" id=\"").append(id).append("\"")
192196
.append(" data-docid=\"").append(String.valueOf(docId)).append("\"")
193-
.append(" class=\"result\"")
197+
.append(" class=\"result").append(css).append("\"")
194198
.append(">\n");
195199

196200
String url = hrefBase + id + hrefExt + hrefSearch;
197201
if (doclineFieldName != null) {
198202
final String docline = doc.get(doclineFieldName);
199203
if (docline != null) {
200204
writer.append("<h4")
201-
.append(" data-href=\"").append(url).append("\"")
205+
.append(" class=\"result-title\"")
202206
.append(">\n")
203-
.append("<span>").append(docline).append("</span>\n")
204207
.append("<a")
205208
.append(" href=\"").append(url).append("\"")
206-
.append(" class=\"result-open\"")
207-
.append(">→</a>\n")
209+
.append(">").append(docline)
210+
.append("</a>\n")
208211
.append("</h4>\n");
209212
}
210213
}
@@ -227,10 +230,27 @@ public void docOpen(final int docId) throws IOException
227230
@Override
228231
public void docSnippets(final int docId, final Snippets snippets) throws IOException
229232
{
230-
docOpen(docId);
231-
content = doc.get(contentFieldName);
232233
final int snipCount = snippets.snips4doc();
233-
if (snipLimit > 0 && content != null && snipCount > 0) {
234+
docOpen(docId, "hassnippets");
235+
content = doc.get(contentFieldName);
236+
if (content == null) {
237+
writer.append("<!-- No text stored for field: '" + contentFieldName + "' -->");
238+
}
239+
else if (snipCount <= 0) {
240+
writer.append("<!-- No snippets found -->");
241+
}
242+
else if (snipLimit < 0) {
243+
// list all snippets in document order
244+
writer.append("<ol class=\"snippets\">\n");
245+
for (int snipOrd = 0; snipOrd < snipCount; snipOrd++) {
246+
print(snippets, snipOrd);
247+
}
248+
writer.append("</ol>\n");
249+
}
250+
else if (snipLimit == 0) {
251+
252+
}
253+
else {
234254
topSnips.clear();
235255
for (int snipOrd = 0; snipOrd < snipCount; snipOrd++) {
236256
final int startPos = Math.max(0, snippets.snipStartPosition(snipOrd) - ctx);
@@ -262,7 +282,7 @@ public String hrefBase()
262282
* @param hrefBase URL prefix
263283
* @return this instance
264284
*/
265-
public HtmlSnippets hrefBase(final String hrefBase)
285+
public ResultsSnippets hrefBase(final String hrefBase)
266286
{
267287
this.hrefBase = hrefBase;
268288
return this;
@@ -284,7 +304,7 @@ public String hrefExt()
284304
* @param hrefExt URL suffix
285305
* @return this instance
286306
*/
287-
public HtmlSnippets hrefExt(final String hrefExt)
307+
public ResultsSnippets hrefExt(final String hrefExt)
288308
{
289309
this.hrefExt = hrefExt;
290310
return this;
@@ -306,7 +326,7 @@ public String hrefSearch()
306326
* @param hrefSearch query-string fragment
307327
* @return this instance
308328
*/
309-
public HtmlSnippets hrefSearch(final String hrefSearch)
329+
public ResultsSnippets hrefSearch(final String hrefSearch)
310330
{
311331
this.hrefSearch = hrefSearch;
312332
return this;
@@ -345,7 +365,8 @@ private void print(final Snippets snippets, final int snipOrd) throws IOExceptio
345365
final int leftMatchStartOffset = snippets.matchStartOffset(leftMatchOrd);
346366
final int rightMatchEndOffset = snippets.matchEndOffset(rightMatchOrd);
347367

348-
final String url = hrefBase + id + hrefExt + hrefSearch + "#snippet" + snipOrd;
368+
final int snipAnchor = snipOrd + 1;
369+
final String url = hrefBase + id + hrefExt + hrefSearch + "#snippet-" + snipAnchor;
349370
writer
350371
.append("<li")
351372
.append(" class=\"snippet\"")
@@ -380,7 +401,7 @@ private void print(final Snippets snippets, final int snipOrd) throws IOExceptio
380401
.append("\n<a")
381402
.append(" class=\"snippet-open\"")
382403
.append(" href=\"").append(url).append("\"")
383-
.append("\">→</a>");
404+
.append(">→</a>");
384405
writer.append("</li>\n");
385406
}
386407

test/src/main/java/com/github/oeuvres/alix/lucene/SpanDemo.java

Lines changed: 0 additions & 152 deletions
This file was deleted.

0 commit comments

Comments
 (0)