@@ -1354,10 +1354,24 @@ private boolean _processHighlight () {
13541354 log .trace ("PTO will retrieve {} & {} (Match boundary)" ,
13551355 this .getStartPos (), this .getEndPos ());
13561356
1357- // Set inner match
1358- if (this .innerMatchEndPos != 1 )
1359- this .addHighlight (this .innerMatchStartPos , this .innerMatchEndPos ,
1360- -1 );
1357+ // Set inner match (ensure it's not added twice)
1358+ if (this .innerMatchEndPos != 1 ) {
1359+ boolean alreadyHasInnerMatch = false ;
1360+ if (this .highlight != null ) {
1361+ for (Highlight hl : this .highlight ) {
1362+ if (hl .number == -1 &&
1363+ hl .start == this .innerMatchStartPos &&
1364+ hl .end == this .innerMatchEndPos ) {
1365+ alreadyHasInnerMatch = true ;
1366+ break ;
1367+ }
1368+ }
1369+ }
1370+
1371+ if (!alreadyHasInnerMatch ) {
1372+ this .addHighlight (this .innerMatchStartPos , this .innerMatchEndPos , -1 );
1373+ }
1374+ }
13611375
13621376 // Add all highlights for character retrieval
13631377 if (this .highlight != null ) {
@@ -1656,7 +1670,8 @@ public ObjectNode getSnippetTokens () {
16561670 };
16571671
16581672 if (this .context .right .isToken () && this .context .right .getLength () > 0 ) {
1659- endContext = this .endPos + this .context .right .getLength () - 1 ;
1673+ // Use exclusive bound for endContext to simplify iteration
1674+ endContext = this .endPos + this .context .right .getLength ();
16601675 };
16611676 };
16621677
@@ -1748,14 +1763,12 @@ else if (total > kwicMax) {
17481763
17491764 if (endContextChar == -1 || endContextChar == 0 || endContextChar > pdl ) {
17501765 this .tempSnippet = this .getPrimaryData (startContextChar );
1751- this . endMore = false ;
1766+ // Do not alter endMore here; HTML/Brackets decide based on char offsets
17521767 } else {
1753- this .tempSnippet = this .getPrimaryData (startContextChar ,endContextChar );
1768+ this .tempSnippet = this .getPrimaryData (startContextChar , endContextChar );
17541769 }
17551770
1756- if (startContext == 0 ) {
1757- this .startMore = false ;
1758- }
1771+ // Do not alter startMore here; HTML/Brackets decide based on char offsets
17591772
17601773 Integer [] offsets ;
17611774 ArrayNode tokens ;
@@ -1843,51 +1856,22 @@ else if (total > kwicMax) {
18431856
18441857 @ JsonIgnore
18451858 public String getSnippetHTML () {
1846- // Failsafe: enforce total KWIC cap by rebuilding context if necessary
1847- int kwicMaxFS = KrillProperties .getMaxTokenKwicSize ();
1848- if (kwicMaxFS > 0 ) {
1849- // Build tokens once to measure current KWIC size
1850- ObjectNode tok = this .getSnippetTokens ();
1851- if (tok != null ) {
1852- int left = tok .has ("left" ) ? tok .get ("left" ).size () : 0 ;
1853- int match = tok .has ("match" ) ? tok .get ("match" ).size () : 0 ;
1854- int right = tok .has ("right" ) ? tok .get ("right" ).size () : 0 ;
1855- int total = left + match + right ;
1856- if (total > kwicMaxFS ) {
1857- log .info ("KWIC failsafe (HTML): total={} > cap={}, left={}, match={}, right={}, id={} uid={}" ,
1858- total , kwicMaxFS , left , match , right , this .getID (), this .getUID ());
1859- int allowedCtx = Math .max (kwicMaxFS - match , 0 );
1860- int leftAllowed = Math .min (left , (allowedCtx + 1 ) / 2 );
1861- int rightAllowed = Math .min (right , allowedCtx - leftAllowed );
1862- int rest = allowedCtx - (leftAllowed + rightAllowed );
1863- if (rest > 0 ) {
1864- int extraRight = Math .min (rest , right - rightAllowed );
1865- rightAllowed += extraRight ;
1866- rest -= extraRight ;
1867- }
1868- if (rest > 0 ) {
1869- int extraLeft = Math .min (rest , left - leftAllowed );
1870- leftAllowed += extraLeft ;
1871- }
1872- // Force token-based context (disable span) and rebuild
1873- log .info ("KWIC failsafe (HTML): leftAllowed={}, rightAllowed={}, allowedCtx={}" ,
1874- leftAllowed , rightAllowed , allowedCtx );
1875- this .context = new SearchContext ();
1876- this .context .left .setToken (true ).setLength (leftAllowed );
1877- this .context .right .setToken (true ).setLength (rightAllowed );
1878- this ._reset ();
1879- }
1880- else if (DEBUG ) {
1881- log .debug ("KWIC failsafe (HTML): within cap (total={} ≤ {}) id={}" , total , kwicMaxFS , this .getID ());
1882- }
1883- }
1884- else {
1885- log .warn ("KWIC failsafe (HTML): tokens unavailable (pto/localDocID missing?) id={} uid={}" , this .getID (), this .getUID ());
1886- }
1887- }
1888-
1889- if (!this ._processHighlight ())
1859+ // Entry log: Show context and cap (helps verify HTML path executes)
1860+ log .info (
1861+ "Enter getSnippetHTML: id={} uid={} spanDefined={} left(token={},len={}) right(token={},len={}) cap={}" ,
1862+ this .getID (), this .getUID (), this .getContext ().isSpanDefined (),
1863+ this .getContext ().left .isToken (), this .getContext ().left .getLength (),
1864+ this .getContext ().right .isToken (), this .getContext ().right .getLength (),
1865+ KrillProperties .getMaxTokenKwicSize ()
1866+ );
1867+
1868+ // Note: HTML KWIC enforcement is applied in _processOffsetChars();
1869+ // we don't mutate the context here to avoid diverging from char-based contexts
1870+
1871+ if (!this ._processHighlight ()) {
1872+ log .warn ("getSnippetHTML: _processHighlight() returned false id={} uid={}" , this .getID (), this .getUID ());
18901873 return null ;
1874+ }
18911875
18921876 if (this .processed && this .snippetHTML != null )
18931877 return this .snippetHTML ;
@@ -2019,45 +2003,42 @@ else if (DEBUG) {
20192003
20202004 @ JsonIgnore
20212005 public String getSnippetBrackets () {
2022- // Failsafe: enforce total KWIC cap also for bracket snippets
2023- int kwicMaxFS = KrillProperties .getMaxTokenKwicSize ();
2024- if (kwicMaxFS > 0 ) {
2025- ObjectNode tok = this .getSnippetTokens ();
2026- if (tok != null ) {
2027- int left = tok .has ("left" ) ? tok .get ("left" ).size () : 0 ;
2028- int match = tok .has ("match" ) ? tok .get ("match" ).size () : 0 ;
2029- int right = tok .has ("right" ) ? tok .get ("right" ).size () : 0 ;
2030- int total = left + match + right ;
2031- if (total > kwicMaxFS ) {
2032- log .info ("KWIC failsafe (Brackets): total={} > cap={}, left={}, match={}, right={}, id={} uid={}" ,
2033- total , kwicMaxFS , left , match , right , this .getID (), this .getUID ());
2034- int allowedCtx = Math .max (kwicMaxFS - match , 0 );
2035- int leftAllowed = Math .min (left , (allowedCtx + 1 ) / 2 );
2036- int rightAllowed = Math .min (right , allowedCtx - leftAllowed );
2037- int rest = allowedCtx - (leftAllowed + rightAllowed );
2038- if (rest > 0 ) {
2039- int extraRight = Math .min (rest , right - rightAllowed );
2040- rightAllowed += extraRight ;
2041- rest -= extraRight ;
2042- }
2043- if (rest > 0 ) {
2044- int extraLeft = Math .min (rest , left - leftAllowed );
2045- leftAllowed += extraLeft ;
2006+ // Failsafe: Only adjust context for brackets when not span-defined
2007+ // (e.g., when extendToSentence is active, keep span context intact)
2008+ if (!this .getContext ().isSpanDefined ()) {
2009+ int kwicMaxFS = KrillProperties .getMaxTokenKwicSize ();
2010+ if (kwicMaxFS > 0 ) {
2011+ ObjectNode tok = this .getSnippetTokens ();
2012+ if (tok != null ) {
2013+ int left = tok .has ("left" ) ? tok .get ("left" ).size () : 0 ;
2014+ int match = tok .has ("match" ) ? tok .get ("match" ).size () : 0 ;
2015+ int right = tok .has ("right" ) ? tok .get ("right" ).size () : 0 ;
2016+ int total = left + match + right ;
2017+ if (total > kwicMaxFS ) {
2018+ log .info ("KWIC failsafe (Brackets): total={} > cap={}, left={}, match={}, right={}, id={} uid={}" ,
2019+ total , kwicMaxFS , left , match , right , this .getID (), this .getUID ());
2020+ int allowedCtx = Math .max (kwicMaxFS - match , 0 );
2021+ int leftAllowed = Math .min (left , (allowedCtx + 1 ) / 2 );
2022+ int rightAllowed = Math .min (right , allowedCtx - leftAllowed );
2023+ int rest = allowedCtx - (leftAllowed + rightAllowed );
2024+ if (rest > 0 ) {
2025+ int extraRight = Math .min (rest , right - rightAllowed );
2026+ rightAllowed += extraRight ;
2027+ rest -= extraRight ;
2028+ }
2029+ if (rest > 0 ) {
2030+ int extraLeft = Math .min (rest , left - leftAllowed );
2031+ leftAllowed += extraLeft ;
2032+ }
2033+ log .info ("KWIC failsafe (Brackets): leftAllowed={}, rightAllowed={}, allowedCtx={}" ,
2034+ leftAllowed , rightAllowed , allowedCtx );
2035+ this .context = new SearchContext ();
2036+ this .context .left .setToken (true ).setLength (leftAllowed );
2037+ this .context .right .setToken (true ).setLength (rightAllowed );
2038+ this ._reset ();
20462039 }
2047- log .info ("KWIC failsafe (Brackets): leftAllowed={}, rightAllowed={}, allowedCtx={}" ,
2048- leftAllowed , rightAllowed , allowedCtx );
2049- this .context = new SearchContext ();
2050- this .context .left .setToken (true ).setLength (leftAllowed );
2051- this .context .right .setToken (true ).setLength (rightAllowed );
2052- this ._reset ();
2053- }
2054- else if (DEBUG ) {
2055- log .debug ("KWIC failsafe (Brackets): within cap (total={} ≤ {}) id={}" , total , kwicMaxFS , this .getID ());
20562040 }
20572041 }
2058- else {
2059- log .warn ("KWIC failsafe (Brackets): tokens unavailable (pto/localDocID missing?) id={} uid={}" , this .getID (), this .getUID ());
2060- }
20612042 }
20622043
20632044 if (!this ._processHighlight ())
@@ -2606,6 +2587,7 @@ else if (total > kwicMax) {
26062587 if (endOffset != -1 )
26072588 endOffset = Math .max (endOffset , this .endPos - 1 );
26082589 }
2590+ //
26092591 else {
26102592 if (DEBUG )
26112593 log .debug ("KWIC cap not reached (offset path): total={} ≤ cap={}" ,
@@ -2661,6 +2643,14 @@ else if (total > kwicMax) {
26612643 if (endOffset != -1 )
26622644 endOffset = Math .max (endOffset , this .endPos - 1 );
26632645 }
2646+
2647+ // Log cap application for HTML path
2648+ log .info ("KWIC cap applied (offset path): total={} cap={} reduce L/R={}/{} rest={} new L/M/R={}/{}/{} id={} uid={}" ,
2649+ total , kwicMax , reduceLeft , reduceRight , rest ,
2650+ Math .max (0 , (this .startPos - (startOffset == -1 ? this .startPos : startOffset ))),
2651+ (this .endPos > this .startPos ) ? (this .endPos - this .startPos ) : 0 ,
2652+ Math .max (0 , (endOffset == -1 ? 0 : (endOffset - (this .endPos - 1 )))),
2653+ this .getID (), this .getUID ());
26642654 }
26652655 else {
26662656 if (DEBUG )
@@ -2674,9 +2664,14 @@ else if (total > kwicMax) {
26742664 if (endOffset != -1 )
26752665 endOffsetChar = pto .end (ldid , endOffset );
26762666
2667+ // Diagnostic: show computed offsets and context (debug only)
26772668 if (DEBUG )
2678- log .trace ("Premature found offsets at {}-{}" , startOffsetChar ,
2679- endOffsetChar );
2669+ log .trace ("_processOffsetChars: startOffset={} endOffset={} startOffsetChar={} endOffsetChar={} startPos={} endPos={} leftTok?{} leftLen={} rightTok?{} rightLen={} id={}" ,
2670+ startOffset , endOffset , startOffsetChar , endOffsetChar ,
2671+ this .startPos , this .endPos ,
2672+ this .context .left .isToken (), this .context .left .getLength (),
2673+ this .context .right .isToken (), this .context .right .getLength (),
2674+ this .getID ());
26802675
26812676 // Ensure zero-context means match-only and not full document
26822677 if (startOffset == -1 && (startOffsetChar < 0 || this .context .left .getLength () == 0 ))
@@ -2685,6 +2680,7 @@ else if (total > kwicMax) {
26852680 endOffsetChar = endPosChar ;
26862681
26872682
2683+
26882684 // This can happen in case of non-token characters
26892685 // in the match and null offsets
26902686 if (startOffsetChar > startPosChar )
0 commit comments