Skip to content

Commit 6fa73a8

Browse files
medelman17claude
andauthored
fix: recognize statute sections with trailing letters (§ 1028A) (#27)
Tokenization patterns for USC and state code statutes required purely numeric section numbers. Updated capture groups from (\d+) to (\d+[A-Za-z]*) so sections like § 1028A and § 2339B are matched. Closes #2 Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 18ae4c2 commit 6fa73a8

3 files changed

Lines changed: 27 additions & 3 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"eyecite-ts": patch
3+
---
4+
5+
Fix statute sections with trailing letters (e.g., "18 U.S.C. § 1028A") not being recognized. Updated tokenization patterns for both USC and state code statutes to allow alphanumeric section suffixes.

src/patterns/statutePatterns.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ import type { Pattern } from './casePatterns'
1616
export const statutePatterns: Pattern[] = [
1717
{
1818
id: 'usc',
19-
regex: /\b(\d+)\s+U\.S\.C\.?\s+§+\s*(\d+)\b/g,
19+
regex: /\b(\d+)\s+U\.S\.C\.?\s+§+\s*(\d+[A-Za-z]*)\b/g,
2020
description: 'U.S. Code citations (e.g., "42 U.S.C. § 1983")',
2121
type: 'statute',
2222
},
2323
{
2424
id: 'state-code',
25-
regex: /\b([A-Z][a-z]+\.?\s+[A-Za-z.]+\s+Code)\s+§\s*(\d+)\b/g,
25+
regex: /\b([A-Z][a-z]+\.?\s+[A-Za-z.]+\s+Code)\s+§\s*(\d+[A-Za-z]*)\b/g,
2626
description: 'State code citations (broad pattern, e.g., "Cal. Penal Code § 187")',
2727
type: 'statute',
2828
},

tests/extract/extractStatute.test.ts

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, it, expect } from 'vitest'
2-
import { extractStatute } from '@/extract'
2+
import { extractStatute, extractCitations } from '@/extract'
33
import type { Token } from '@/tokenize'
44
import type { TransformationMap } from '@/types/span'
55

@@ -197,6 +197,25 @@ describe('extractStatute', () => {
197197
})
198198
})
199199

200+
describe('trailing letters via full pipeline', () => {
201+
it('should extract section with trailing uppercase letter', () => {
202+
const citations = extractCitations('18 U.S.C. § 1028A')
203+
expect(citations).toHaveLength(1)
204+
expect(citations[0].type).toBe('statute')
205+
if (citations[0].type === 'statute') {
206+
expect(citations[0].section).toBe('1028A')
207+
}
208+
})
209+
210+
it('should extract section with trailing lowercase letter', () => {
211+
const citations = extractCitations('18 U.S.C. § 2339B')
212+
expect(citations).toHaveLength(1)
213+
if (citations[0].type === 'statute') {
214+
expect(citations[0].section).toBe('2339B')
215+
}
216+
})
217+
})
218+
200219
describe('metadata fields', () => {
201220
it('should include all required CitationBase fields', () => {
202221
const token: Token = {

0 commit comments

Comments
 (0)