Skip to content

Commit 2681a69

Browse files
committed
Fix tabular view
Change-Id: I86f35ccc811833f896b68daccd1c68d0a6b2fa9c
1 parent 7aac424 commit 2681a69

File tree

2 files changed

+52
-19
lines changed

2 files changed

+52
-19
lines changed

src/main/java/de/ids_mannheim/korap/response/Match.java

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,6 +1635,10 @@ public ObjectNode getSnippetTokens () {
16351635
PositionsToOffset pto = this.positionsToOffset;
16361636
int ldid = this.localDocID;
16371637

1638+
// Work on local copies of match bounds to avoid mutating global state
1639+
int matchStartTok = this.startPos;
1640+
int matchEndTok = this.endPos; // exclusive bound
1641+
16381642
int startContext = -1;
16391643
int endContext = -1;
16401644
int startContextChar = -1;
@@ -1707,20 +1711,20 @@ public ObjectNode getSnippetTokens () {
17071711
int kwicMax = KrillProperties.getMaxTokenKwicSize();
17081712
if (kwicMax > 0) {
17091713
// Convert endContext to exclusive bound for iteration ease
1710-
int leftLen = (startContext < this.startPos) ? (this.startPos - startContext) : 0;
1711-
int matchLen = (this.endPos > this.startPos) ? (this.endPos - this.startPos) : 0;
1712-
int rightLen = (endContext > this.endPos) ? (endContext - this.endPos) : 0;
1714+
int leftLen = (startContext < matchStartTok) ? (matchStartTok - startContext) : 0;
1715+
int matchLen = (matchEndTok > matchStartTok) ? (matchEndTok - matchStartTok) : 0;
1716+
int rightLen = (endContext > matchEndTok) ? (endContext - matchEndTok) : 0;
17131717
int total = leftLen + matchLen + rightLen;
17141718
if (DEBUG)
17151719
log.info("KWIC tokens pre-cap: total={} (L/M/R={}/{}/{}) cap={} id={} uid={}",
17161720
total, leftLen, matchLen, rightLen, kwicMax, this.getID(), this.getUID());
17171721

17181722
if (matchLen >= kwicMax) {
17191723
// Cut match to kwicMax, drop all context
1720-
this.endPos = this.startPos + kwicMax;
1724+
matchEndTok = matchStartTok + kwicMax;
17211725
this.endCutted = true;
1722-
startContext = this.startPos;
1723-
endContext = this.endPos; // exclusive bound
1726+
startContext = matchStartTok;
1727+
endContext = matchEndTok; // exclusive bound
17241728
}
17251729
else if (total > kwicMax) {
17261730
int toReduce = total - kwicMax;
@@ -1749,10 +1753,10 @@ else if (total > kwicMax) {
17491753
// Trim remaining from match end
17501754
int newMatchLen = matchLen - rest;
17511755
if (newMatchLen < 0) newMatchLen = 0;
1752-
this.endPos = this.startPos + newMatchLen;
1756+
matchEndTok = matchStartTok + newMatchLen;
17531757
this.endCutted = true;
1754-
if (endContext < this.endPos)
1755-
endContext = this.endPos;
1758+
if (endContext < matchEndTok)
1759+
endContext = matchEndTok;
17561760
}
17571761
}
17581762
else {
@@ -1806,7 +1810,7 @@ else if (total > kwicMax) {
18061810
};
18071811

18081812
tokens = json.putArray("match");
1809-
for (i = this.startPos; i < this.endPos; i++) {
1813+
for (i = matchStartTok; i < matchEndTok; i++) {
18101814
offsets = pto.span(ldid,i);
18111815
if (offsets == null) {
18121816
continue;
@@ -1820,7 +1824,7 @@ else if (total > kwicMax) {
18201824
// Create right context token list
18211825
if (endContext > this.endPos) {
18221826
tokens = null;
1823-
for (i = this.endPos; i < endContext; i++) {
1827+
for (i = matchEndTok; i < endContext; i++) {
18241828
offsets = pto.span(ldid,i);
18251829
if (offsets == null) {
18261830
break;
@@ -1849,14 +1853,28 @@ else if (total > kwicMax) {
18491853
if (highlight.end == PB_MARKER || highlight.end == ALL_MARKER)
18501854
continue;
18511855

1852-
if (classes == null)
1853-
classes = json.putArray("classes");
1854-
1855-
ArrayNode cls = mapper.createArrayNode();
1856-
cls.add(highlight.number);
1857-
cls.add(highlight.start - this.startPos);
1858-
cls.add(highlight.end - this.startPos);
1859-
classes.add(cls);
1856+
// Clamp class span to visible match window
1857+
int clsStart = highlight.start - matchStartTok;
1858+
int clsEnd = highlight.end - matchStartTok; // inclusive
1859+
1860+
int visibleMatchLen = Math.max(0, matchEndTok - matchStartTok);
1861+
int maxIdx = visibleMatchLen > 0 ? (visibleMatchLen - 1) : -1;
1862+
1863+
if (maxIdx >= 0) {
1864+
if (clsStart < 0) clsStart = 0;
1865+
if (clsEnd > maxIdx) clsEnd = maxIdx;
1866+
}
1867+
1868+
if (clsStart <= clsEnd && maxIdx >= 0) {
1869+
if (classes == null)
1870+
classes = json.putArray("classes");
1871+
1872+
ArrayNode cls = mapper.createArrayNode();
1873+
cls.add(highlight.number);
1874+
cls.add(clsStart);
1875+
cls.add(clsEnd);
1876+
classes.add(cls);
1877+
}
18601878
};
18611879
};
18621880

src/main/resources/krill.properties.info

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,21 @@ krill.index.textSize.max = 20000000
2222
# 2*krill.context.max.token + krill.match.max.token and log a deprecation warning.
2323
# krill.kwic.max.token = 100
2424

25+
# For tokenized KWIC (tabular) views, optionally include a class entry
26+
# that covers the whole match span. Frontends can use this to highlight
27+
# the complete match in the token window in addition to annotation classes.
28+
# Default: false
29+
# krill.kwic.tokens.match.class = false
30+
31+
# If true, align the HTML KWIC window strictly to the token window produced
32+
# by the tokenized KWIC. This can help ensure both representations show the
33+
# exact same left/match/right boundaries. Default: false
34+
# krill.kwic.enforce.html = false
35+
36+
# If true, clamp HTML snippet character ranges to safe boundaries when
37+
# offsets are incomplete. Default: false
38+
# krill.snippet.safeCharBounds = false
39+
2540
# Deprecated: These are ignored when 'krill.kwic.max.token' is set and will be removed.
2641
# They were previously used to cap match length and per-side context lengths, but
2742
# licensing limits apply to the total snippet size, not to the match alone.

0 commit comments

Comments
 (0)