@@ -1884,49 +1884,30 @@ func (e *Engine) FindAllSubmatch(haystack []byte, n int) []*MatchWithCaptures {
18841884 return matches
18851885}
18861886
1887- // digitPrefilterAdaptiveThreshold is the number of consecutive false positives
1888- // (digit positions that don't lead to matches) before switching to DFA-only mode.
1889- // This implements runtime adaptive switching based on Rust regex's insight:
1890- // "if a prefilter has a high false positive rate and produces lots of candidates,
1891- // then a prefilter can overall make a regex search slower."
1892- //
1893- // Value rationale:
1894- // - Too low (e.g., 8): May switch prematurely on sparse data
1895- // - Too high (e.g., 256): Wastes time on dense data with many FPs
1896- // - 64: Good balance - gives prefilter fair chance while limiting overhead
1897- const digitPrefilterAdaptiveThreshold = 64
1898-
18991887// findDigitPrefilter searches using SIMD digit scanning + DFA verification.
1900- // Used for digit-lead patterns like IP addresses where literal extraction fails
1888+ // Used for simple digit-lead patterns where literal extraction fails
19011889// but all alternation branches must start with a digit.
19021890//
1891+ // Note: Complex digit-lead patterns (like IP addresses with 74 NFA states) are
1892+ // handled by UseBoth/UseDFA strategies instead. See digitPrefilterMaxNFAStates.
1893+ //
19031894// Algorithm:
19041895// 1. Use SIMD to find next digit position in haystack
19051896// 2. Verify match at digit position using lazy DFA + PikeVM
19061897// 3. If no match, continue from digit position + 1
1907- // 4. ADAPTIVE: If too many consecutive FPs, switch to DFA-only mode
19081898//
19091899// Performance:
1910- // - Sparse data: Skips non-digit regions with SIMD (15-20x faster)
1911- // - Dense data: Adaptively switches to DFA when FP rate is high
1900+ // - Skips non-digit regions with SIMD (15-20x faster for sparse data)
19121901// - Total: O(n) for scan + O(k*m) for k digit candidates
19131902func (e * Engine ) findDigitPrefilter (haystack []byte ) * Match {
19141903 if e .digitPrefilter == nil {
19151904 return e .findNFA (haystack )
19161905 }
19171906
1918- e .stats .PrefilterHits ++ // Count prefilter usage
1907+ e .stats .PrefilterHits ++
19191908 pos := 0
1920- consecutiveFPs := 0 // Track consecutive false positives
19211909
19221910 for pos < len (haystack ) {
1923- // ADAPTIVE: If too many consecutive FPs, abandon prefilter and use DFA directly
1924- // This prevents pathological slowdown on dense digit data (like IP-heavy text)
1925- if consecutiveFPs >= digitPrefilterAdaptiveThreshold {
1926- e .stats .PrefilterAbandoned ++
1927- return e .findAdaptiveAt (haystack , pos )
1928- }
1929-
19301911 // Use SIMD to find next digit position
19311912 digitPos := e .digitPrefilter .Find (haystack , pos )
19321913 if digitPos < 0 {
@@ -1944,17 +1925,13 @@ func (e *Engine) findDigitPrefilter(haystack []byte) *Match {
19441925 return NewMatch (start , end , haystack )
19451926 }
19461927 }
1947- // DFA rejected - count as false positive
1948- consecutiveFPs ++
19491928 } else {
19501929 // No DFA - use PikeVM directly
19511930 e .stats .NFASearches ++
19521931 start , end , found := e .pikevm .SearchAt (haystack , digitPos )
19531932 if found {
19541933 return NewMatch (start , end , haystack )
19551934 }
1956- // NFA rejected - count as false positive
1957- consecutiveFPs ++
19581935 }
19591936
19601937 // No match at this digit position, continue searching
@@ -1965,23 +1942,15 @@ func (e *Engine) findDigitPrefilter(haystack []byte) *Match {
19651942}
19661943
19671944// findDigitPrefilterAt searches using digit prefilter starting at position 'at'.
1968- // Uses adaptive switching like findDigitPrefilter.
19691945func (e * Engine ) findDigitPrefilterAt (haystack []byte , at int ) * Match {
19701946 if e .digitPrefilter == nil || at >= len (haystack ) {
19711947 return e .findNFAAt (haystack , at )
19721948 }
19731949
19741950 e .stats .PrefilterHits ++
19751951 pos := at
1976- consecutiveFPs := 0
19771952
19781953 for pos < len (haystack ) {
1979- // ADAPTIVE: Switch to DFA if too many consecutive FPs
1980- if consecutiveFPs >= digitPrefilterAdaptiveThreshold {
1981- e .stats .PrefilterAbandoned ++
1982- return e .findAdaptiveAt (haystack , pos )
1983- }
1984-
19851954 digitPos := e .digitPrefilter .Find (haystack , pos )
19861955 if digitPos < 0 {
19871956 return nil
@@ -1996,14 +1965,12 @@ func (e *Engine) findDigitPrefilterAt(haystack []byte, at int) *Match {
19961965 return NewMatch (start , end , haystack )
19971966 }
19981967 }
1999- consecutiveFPs ++
20001968 } else {
20011969 e .stats .NFASearches ++
20021970 start , end , found := e .pikevm .SearchAt (haystack , digitPos )
20031971 if found {
20041972 return NewMatch (start , end , haystack )
20051973 }
2006- consecutiveFPs ++
20071974 }
20081975
20091976 pos = digitPos + 1
@@ -2014,23 +1981,15 @@ func (e *Engine) findDigitPrefilterAt(haystack []byte, at int) *Match {
20141981
20151982// isMatchDigitPrefilter checks for match using digit prefilter.
20161983// Optimized for boolean matching with early termination.
2017- // Uses adaptive switching like findDigitPrefilter.
20181984func (e * Engine ) isMatchDigitPrefilter (haystack []byte ) bool {
20191985 if e .digitPrefilter == nil {
20201986 return e .isMatchNFA (haystack )
20211987 }
20221988
20231989 e .stats .PrefilterHits ++
20241990 pos := 0
2025- consecutiveFPs := 0
20261991
20271992 for pos < len (haystack ) {
2028- // ADAPTIVE: Switch to DFA if too many consecutive FPs
2029- if consecutiveFPs >= digitPrefilterAdaptiveThreshold {
2030- e .stats .PrefilterAbandoned ++
2031- return e .isMatchAdaptive (haystack [pos :])
2032- }
2033-
20341993 digitPos := e .digitPrefilter .Find (haystack , pos )
20351994 if digitPos < 0 {
20361995 return false // No more digits
@@ -2039,18 +1998,15 @@ func (e *Engine) isMatchDigitPrefilter(haystack []byte) bool {
20391998 // Use DFA for fast boolean check if available
20401999 if e .dfa != nil {
20412000 e .stats .DFASearches ++
2042- // DFA.FindAt returns end position if match, -1 otherwise
20432001 if e .dfa .FindAt (haystack , digitPos ) != - 1 {
20442002 return true
20452003 }
2046- consecutiveFPs ++
20472004 } else {
20482005 e .stats .NFASearches ++
20492006 _ , _ , found := e .pikevm .SearchAt (haystack , digitPos )
20502007 if found {
20512008 return true
20522009 }
2053- consecutiveFPs ++
20542010 }
20552011
20562012 pos = digitPos + 1
@@ -2060,45 +2016,34 @@ func (e *Engine) isMatchDigitPrefilter(haystack []byte) bool {
20602016}
20612017
20622018// findIndicesDigitPrefilter returns indices using digit prefilter - zero alloc.
2063- // Uses adaptive switching like findDigitPrefilter.
20642019func (e * Engine ) findIndicesDigitPrefilter (haystack []byte ) (int , int , bool ) {
20652020 if e .digitPrefilter == nil {
20662021 return e .findIndicesNFA (haystack )
20672022 }
20682023
20692024 e .stats .PrefilterHits ++
20702025 pos := 0
2071- consecutiveFPs := 0
20722026
20732027 for pos < len (haystack ) {
2074- // ADAPTIVE: Switch to DFA if too many consecutive FPs
2075- if consecutiveFPs >= digitPrefilterAdaptiveThreshold {
2076- e .stats .PrefilterAbandoned ++
2077- return e .findIndicesAdaptiveAt (haystack , pos )
2078- }
2079-
20802028 digitPos := e .digitPrefilter .Find (haystack , pos )
20812029 if digitPos < 0 {
20822030 return - 1 , - 1 , false
20832031 }
20842032
20852033 if e .dfa != nil {
20862034 e .stats .DFASearches ++
2087- endPos := e .dfa .FindAt (haystack , digitPos )
2035+ // Use anchored search - pattern MUST start at digitPos
2036+ // This is much faster than PikeVM for patterns that require digit start
2037+ endPos := e .dfa .SearchAtAnchored (haystack , digitPos )
20882038 if endPos != - 1 {
2089- start , end , found := e .pikevm .SearchAt (haystack , digitPos )
2090- if found {
2091- return start , end , true
2092- }
2039+ return digitPos , endPos , true
20932040 }
2094- consecutiveFPs ++
20952041 } else {
20962042 e .stats .NFASearches ++
20972043 start , end , found := e .pikevm .SearchAt (haystack , digitPos )
20982044 if found {
20992045 return start , end , true
21002046 }
2101- consecutiveFPs ++
21022047 }
21032048
21042049 pos = digitPos + 1
@@ -2108,45 +2053,34 @@ func (e *Engine) findIndicesDigitPrefilter(haystack []byte) (int, int, bool) {
21082053}
21092054
21102055// findIndicesDigitPrefilterAt returns indices starting at position 'at' - zero alloc.
2111- // Uses adaptive switching like findDigitPrefilter.
21122056func (e * Engine ) findIndicesDigitPrefilterAt (haystack []byte , at int ) (int , int , bool ) {
21132057 if e .digitPrefilter == nil || at >= len (haystack ) {
21142058 return e .findIndicesNFAAt (haystack , at )
21152059 }
21162060
21172061 e .stats .PrefilterHits ++
21182062 pos := at
2119- consecutiveFPs := 0
21202063
21212064 for pos < len (haystack ) {
2122- // ADAPTIVE: Switch to DFA if too many consecutive FPs
2123- if consecutiveFPs >= digitPrefilterAdaptiveThreshold {
2124- e .stats .PrefilterAbandoned ++
2125- return e .findIndicesAdaptiveAt (haystack , pos )
2126- }
2127-
21282065 digitPos := e .digitPrefilter .Find (haystack , pos )
21292066 if digitPos < 0 {
21302067 return - 1 , - 1 , false
21312068 }
21322069
21332070 if e .dfa != nil {
21342071 e .stats .DFASearches ++
2135- endPos := e .dfa .FindAt (haystack , digitPos )
2072+ // Use anchored search - pattern MUST start at digitPos
2073+ // This is much faster than PikeVM for patterns that require digit start
2074+ endPos := e .dfa .SearchAtAnchored (haystack , digitPos )
21362075 if endPos != - 1 {
2137- start , end , found := e .pikevm .SearchAt (haystack , digitPos )
2138- if found {
2139- return start , end , true
2140- }
2076+ return digitPos , endPos , true
21412077 }
2142- consecutiveFPs ++
21432078 } else {
21442079 e .stats .NFASearches ++
21452080 start , end , found := e .pikevm .SearchAt (haystack , digitPos )
21462081 if found {
21472082 return start , end , true
21482083 }
2149- consecutiveFPs ++
21502084 }
21512085
21522086 pos = digitPos + 1
0 commit comments