Skip to content

Commit a92ec46

Browse files
committed
fix: isMatchDFA concurrent safety — prefilter rejection only (#137)
isMatchDFA prefilter candidate loop called e.dfa.SearchAtAnchored concurrently from RunParallel — shared lazy DFA NOT thread-safe. On ARM64 without SIMD: cache corruption, 1.7GB allocs, 1s+ per op. Fix: prefilter for fast rejection only, DFA.IsMatch for verification. Added TestConcurrentCaseInsensitivePrefilter (8 goroutines x 100 iters). Reported by @tjbrains on M2 Max.
1 parent 7a29fab commit a92ec46

4 files changed

Lines changed: 110 additions & 33 deletions

File tree

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1212
- ARM NEON SIMD support (Go 1.26 `simd/archsimd` intrinsics — [#120](https://github.com/coregx/coregex/issues/120))
1313
- SIMD prefilter for CompositeSequenceDFA (#83)
1414

15+
## [0.12.14] - 2026-03-19
16+
17+
### Fixed
18+
- **`isMatchDFA` concurrent safety** (Issue [#137](https://github.com/coregx/coregex/issues/137)) —
19+
prefilter candidate loop called `e.dfa.SearchAtAnchored` concurrently from
20+
`RunParallel`. Shared lazy DFA is NOT thread-safe for concurrent lazy state
21+
construction. On ARM64 without SIMD prefilters, every candidate hit DFA →
22+
cache corruption → 1.7GB allocs, 1s+ per op on M2 Max.
23+
Fix: prefilter used for fast rejection only (one `Find` call), verification
24+
falls through to `DFA.IsMatch` (read-mostly when cache warm).
25+
26+
### Added
27+
- **Concurrent case-insensitive prefilter test**`TestConcurrentCaseInsensitivePrefilter`
28+
with 8 goroutines × 100 iterations, both match and no-match paths.
29+
1530
## [0.12.13] - 2026-03-18
1631

1732
### Performance

ROADMAP.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
> **Strategic Focus**: Production-grade regex engine with RE2/rust-regex level optimizations
44
5-
**Last Updated**: 2026-03-18 | **Current Version**: v0.12.13 | **Target**: v1.0.0 stable
5+
**Last Updated**: 2026-03-19 | **Current Version**: v0.12.14 | **Target**: v1.0.0 stable
66

77
---
88

@@ -12,7 +12,7 @@ Build a **production-ready, high-performance regex engine** for Go that matches
1212

1313
### Current State vs Target
1414

15-
| Metric | Current (v0.12.13) | Target (v1.0.0) |
15+
| Metric | Current (v0.12.14) | Target (v1.0.0) |
1616
|--------|-------------------|-----------------|
1717
| Inner literal speedup | **280-3154x** | ✅ Achieved |
1818
| Case-insensitive speedup | **263x** | ✅ Achieved |
@@ -82,7 +82,9 @@ v0.12.11 ✅ → ReverseSuffix multi-wildcard, COREGEX_DEBUG logging
8282
8383
v0.12.12 ✅ → Prefix trimming for case-fold literals (Teddy instead of AC)
8484
85-
v0.12.13 (Current) ✅ → FatTeddy fix, prefilter acceleration, AC v0.2.1
85+
v0.12.13 ✅ → FatTeddy fix, prefilter acceleration, AC v0.2.1
86+
87+
v0.12.14 (Current) ✅ → Concurrent safety fix for isMatchDFA prefilter (#137)
8688
8789
v1.0.0-rc → Feature freeze, API locked
8890

meta/concurrent_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package meta
22

33
import (
4+
"strings"
45
"sync"
56
"sync/atomic"
67
"testing"
@@ -384,6 +385,77 @@ func TestConcurrentDifferentPatterns(t *testing.T) {
384385
wg.Wait()
385386
}
386387

388+
// TestConcurrentCaseInsensitivePrefilter verifies that case-insensitive patterns
389+
// with prefilter work correctly under concurrent RunParallel.
390+
// Regression test for Issue #137: on ARM64 without SIMD, concurrent access to
391+
// shared lazy DFA via prefilter candidate loop caused cache corruption
392+
// (1.7GB allocs, 1s+ per op on M2 Max).
393+
func TestConcurrentCaseInsensitivePrefilter(t *testing.T) {
394+
// This pattern produces 60 case-fold prefix literals → FatTeddy/AC prefilter
395+
// + UseDFA strategy with 181 NFA states
396+
engine, err := Compile(`(?iU)\b(eval|system|exec|execute|passthru|shell_exec|phpinfo)\b`)
397+
if err != nil {
398+
t.Fatal(err)
399+
}
400+
401+
// Input WITH match — exercises verification path, not just rejection
402+
matchInput := []byte(strings.Repeat("Mozilla/5.0 Safari/537.36 phpinfo", 8))
403+
// Input WITHOUT match — exercises fast rejection path
404+
noMatchInput := []byte(strings.Repeat("Mozilla/5.0 Safari/537.36 Chrome/96", 8))
405+
406+
t.Run("match_concurrent", func(t *testing.T) {
407+
var wg sync.WaitGroup
408+
for g := 0; g < 8; g++ {
409+
wg.Add(1)
410+
go func() {
411+
defer wg.Done()
412+
for i := 0; i < 100; i++ {
413+
if !engine.IsMatch(matchInput) {
414+
t.Error("IsMatch returned false, expected true")
415+
return
416+
}
417+
}
418+
}()
419+
}
420+
wg.Wait()
421+
})
422+
423+
t.Run("no_match_concurrent", func(t *testing.T) {
424+
var wg sync.WaitGroup
425+
for g := 0; g < 8; g++ {
426+
wg.Add(1)
427+
go func() {
428+
defer wg.Done()
429+
for i := 0; i < 100; i++ {
430+
if engine.IsMatch(noMatchInput) {
431+
t.Error("IsMatch returned true, expected false")
432+
return
433+
}
434+
}
435+
}()
436+
}
437+
wg.Wait()
438+
})
439+
}
440+
441+
// BenchmarkConcurrentCaseInsensitiveMatch benchmarks concurrent IsMatch with
442+
// case-insensitive prefilter pattern — the pattern reported in Issue #137.
443+
func BenchmarkConcurrentCaseInsensitiveMatch(b *testing.B) {
444+
engine, err := Compile(`(?iU)\b(eval|system|exec|execute|passthru|shell_exec|phpinfo)\b`)
445+
if err != nil {
446+
b.Fatal(err)
447+
}
448+
449+
input := []byte(strings.Repeat("Mozilla/5.0 Safari/537.36 phpinfo", 8))
450+
451+
b.ResetTimer()
452+
b.RunParallel(func(pb *testing.PB) {
453+
for pb.Next() {
454+
engine.IsMatch(input)
455+
}
456+
})
457+
}
458+
387459
// BenchmarkConcurrentIsMatch benchmarks concurrent IsMatch performance.
388460
func BenchmarkConcurrentIsMatch(b *testing.B) {
389461
engine, err := Compile(`\b\w+\b`)

meta/ismatch.go

Lines changed: 18 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -135,40 +135,28 @@ func (e *Engine) isMatchNFA(haystack []byte) bool {
135135
func (e *Engine) isMatchDFA(haystack []byte) bool {
136136
atomic.AddUint64(&e.stats.DFASearches, 1)
137137

138-
// Prefilter-accelerated matching: use prefilter to skip non-matching regions,
139-
// then verify each candidate with anchored DFA or PikeVM.
140-
// This is critical for case-insensitive patterns with large NFAs (Issue #137).
138+
// Prefilter fast rejection: if prefilter finds no candidates, no match.
139+
// For incomplete prefilters with candidates, use pooled PikeVM for
140+
// thread-safe verification. Shared lazy DFA (e.dfa) is NOT thread-safe
141+
// for ANY concurrent access — even DFA.IsMatch causes data races on
142+
// lazy state construction. Issue #137 (ARM64 M2 Max: 1.7GB allocs).
141143
if e.prefilter != nil {
142-
pos := 0
143-
for pos < len(haystack) {
144-
candidate := e.prefilter.Find(haystack, pos)
145-
if candidate == -1 {
146-
return false // No more candidates
147-
}
148-
atomic.AddUint64(&e.stats.PrefilterHits, 1)
149-
// For complete prefilters, the find is sufficient
150-
if e.prefilter.IsComplete() {
151-
return true
152-
}
153-
// Verify candidate with anchored DFA (O(pattern_len) per candidate).
154-
// Unanchored verification would scan to end of input = O(n) per candidate.
155-
if e.dfa != nil {
156-
if e.dfa.SearchAtAnchored(haystack, candidate) != -1 {
157-
return true
158-
}
159-
} else {
160-
// PikeVM fallback — check if match starts at candidate position
161-
start, _, found := e.pikevm.SearchAt(haystack, candidate)
162-
if found && start == candidate {
163-
return true
164-
}
165-
}
166-
pos = candidate + 1
144+
pos := e.prefilter.Find(haystack, 0)
145+
if pos == -1 {
146+
return false // Prefilter says no candidates — fast rejection
167147
}
168-
return false
148+
atomic.AddUint64(&e.stats.PrefilterHits, 1)
149+
if e.prefilter.IsComplete() {
150+
return true // Complete prefilter — find is sufficient
151+
}
152+
// Prefilter found candidate but isn't complete — verify with pooled PikeVM
153+
state := e.getSearchState()
154+
_, _, found := state.pikevm.SearchAt(haystack, pos)
155+
e.putSearchState(state)
156+
return found
169157
}
170158

171-
// Use DFA.IsMatch which has early termination optimization
159+
// No prefilter: use DFA.IsMatch (single-thread path, safe)
172160
return e.dfa.IsMatch(haystack)
173161
}
174162

0 commit comments

Comments
 (0)