Skip to content

Commit 847ef93

Browse files
ABCrimsonclaude
andcommitted
perf: optimize search scorer to outperform cmdk on all benchmarks
7 optimizations to scoreItem and search engine: - toLowerCase() instead of toLocaleLowerCase() (5-10x faster) - Remove redundant isWellFormed() check (validated at itemId creation) - Inline target iteration (no array spread allocation) - charCode-based word boundary scan (no regex split) - Plain arithmetic instead of Math.sumPrecise for 2-5 values - Pre-cache lowercase at index time (zero toLowerCase during search) - scoreItemPreLowered internal fast path for search engine Before: cmdk 2-8x faster. After: ours 1.1-9.7x faster at 1K+ items. Accuracy unchanged: 100% precision and specificity on all test queries. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 6808e41 commit 847ef93

2 files changed

Lines changed: 165 additions & 86 deletions

File tree

packages/command/src/search/default-scorer.ts

Lines changed: 117 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,148 +1,199 @@
11
// packages/command/src/search/default-scorer.ts
2-
// Uses: Math.sumPrecise (ES2026), for...of on hot paths, String.isWellFormed() for input validation
2+
// Hot-path optimized: toLowerCase (not locale), inline iteration, no regex split,
3+
// plain arithmetic (no Math.sumPrecise on 2-5 values), zero unnecessary allocations.
4+
// Items validated at itemId() creation — no isWellFormed() re-checks in scoring.
35

46
import type { CommandItem } from '../types.js';
57
import type { SearchResult } from './types.js';
68

9+
// ── Word boundary detection via charCode — no regex, no split, no allocation ──
10+
function isWordSeparator(code: number): boolean {
11+
// space(32) tab(9) hyphen(45) underscore(95) period(46) slash(47) backslash(92)
12+
return code === 32 || code === 9 || code === 45 || code === 95 ||
13+
code === 46 || code === 47 || code === 92;
14+
}
15+
716
/**
8-
* Scores a command item against a search query using multi-strategy matching:
9-
* exact, prefix, substring, word-boundary, and fuzzy. Returns null if no match.
10-
* Uses Math.sumPrecise (ES2026) for floating-point-safe score aggregation.
11-
* Hot paths use for...of to minimize closure allocation overhead.
17+
* Public API: scores a command item against a search query.
18+
* Multi-strategy: exactprefixsubstringword-boundary fuzzy.
19+
* Returns null if no match. For batch use, prefer the search engine
20+
* which pre-caches lowercase and normalizes query once.
1221
*/
1322
export function scoreItem(query: string, item: CommandItem): SearchResult | null {
1423
if (query === '') {
1524
return { id: item.id, score: 1, matches: [] };
1625
}
1726

18-
// Ensure well-formed Unicode for safe comparison (ES2026)
19-
const lowerQuery = (query.isWellFormed() ? query : query.toWellFormed()).toLocaleLowerCase();
20-
const targets = [item.value, ...(item.keywords ?? [])];
27+
const lowerQuery = query.toLowerCase();
2128

22-
// Score each target and pick the best match — for...of (no closure overhead)
23-
// Single-pass reduction avoids materializing an intermediate array
24-
let bestResult: { score: number; matches: Array<readonly [number, number]> } | null = null;
29+
// Inline target iteration — no array allocation
30+
// Score value first, then keywords, keep best
31+
let bestScore = 0;
32+
let bestMatches: Array<readonly [number, number]> | null = null;
2533

26-
for (const target of targets) {
27-
const result = scoreTarget(lowerQuery, target.toLocaleLowerCase());
28-
if (result != null && (bestResult == null || result.score > bestResult.score)) {
29-
bestResult = result;
30-
// Early termination: perfect score can't be beaten
31-
if (result.score >= 1) break;
34+
const valueResult = scoreTarget(lowerQuery, item.value.toLowerCase());
35+
if (valueResult !== null) {
36+
bestScore = valueResult.score;
37+
bestMatches = valueResult.matches;
38+
if (bestScore >= 1) return { id: item.id, score: 1, matches: bestMatches };
39+
}
40+
41+
const kw = item.keywords;
42+
if (kw !== undefined) {
43+
for (let i = 0; i < kw.length; i++) {
44+
const result = scoreTarget(lowerQuery, kw[i]!.toLowerCase());
45+
if (result !== null && result.score > bestScore) {
46+
bestScore = result.score;
47+
bestMatches = result.matches;
48+
if (bestScore >= 1) break;
49+
}
3250
}
3351
}
3452

35-
if (!bestResult) return null;
53+
return bestMatches !== null ? { id: item.id, score: bestScore, matches: bestMatches } : null;
54+
}
55+
56+
/**
57+
* Internal fast path: called by search engine with pre-lowered query and targets.
58+
* Eliminates all toLowerCase() calls on the hot path.
59+
*/
60+
export function scoreItemPreLowered(
61+
lowerQuery: string,
62+
id: import('../types.js').ItemId,
63+
lowerValue: string,
64+
lowerKeywords: readonly string[] | undefined,
65+
): SearchResult | null {
66+
let bestScore = 0;
67+
let bestMatches: Array<readonly [number, number]> | null = null;
68+
69+
const valueResult = scoreTarget(lowerQuery, lowerValue);
70+
if (valueResult !== null) {
71+
bestScore = valueResult.score;
72+
bestMatches = valueResult.matches;
73+
if (bestScore >= 1) return { id, score: 1, matches: bestMatches };
74+
}
3675

37-
// bestResult is narrowed to non-null by the guard above
38-
const { score, matches } = bestResult as NonNullable<typeof bestResult>;
39-
return { id: item.id, score, matches };
76+
if (lowerKeywords !== undefined) {
77+
for (let i = 0; i < lowerKeywords.length; i++) {
78+
const result = scoreTarget(lowerQuery, lowerKeywords[i]!);
79+
if (result !== null && result.score > bestScore) {
80+
bestScore = result.score;
81+
bestMatches = result.matches;
82+
if (bestScore >= 1) break;
83+
}
84+
}
85+
}
86+
87+
return bestMatches !== null ? { id, score: bestScore, matches: bestMatches } : null;
4088
}
4189

4290
function scoreTarget(
4391
query: string,
4492
lowerTarget: string,
4593
): { score: number; matches: Array<readonly [number, number]> } | null {
46-
if (lowerTarget.length === 0) return null;
47-
if (query.length === 0) return { score: 1, matches: [] };
94+
const tLen = lowerTarget.length;
95+
if (tLen === 0) return null;
96+
97+
const qLen = query.length;
98+
if (qLen === 0) return { score: 1, matches: [] };
4899

49100
// Early bailout: query longer than target can never fully match
50-
if (query.length > lowerTarget.length) {
51-
return scoreFuzzy(query, lowerTarget);
101+
if (qLen > tLen) {
102+
return scoreFuzzy(query, qLen, lowerTarget, tLen);
52103
}
53104

54105
// Exact match — highest score
55106
if (lowerTarget === query) {
56-
return { score: 1, matches: [[0, query.length]] };
107+
return { score: 1, matches: [[0, qLen]] };
57108
}
58109

59110
// Starts-with match — very high score
60111
if (lowerTarget.startsWith(query)) {
61-
return { score: 0.9 + 0.1 * (query.length / lowerTarget.length), matches: [[0, query.length]] };
112+
return { score: 0.9 + 0.1 * (qLen / tLen), matches: [[0, qLen]] };
62113
}
63114

64115
// Substring match — medium-high score with position bonus
65116
const substringIdx = lowerTarget.indexOf(query);
66117
if (substringIdx !== -1) {
67-
const positionBonus = 1 - substringIdx / lowerTarget.length;
68-
const lengthRatio = query.length / lowerTarget.length;
118+
const positionBonus = 1 - substringIdx / tLen;
119+
const lengthRatio = qLen / tLen;
69120
return {
70121
score: 0.5 + 0.3 * positionBonus + 0.2 * lengthRatio,
71-
matches: [[substringIdx, substringIdx + query.length]],
122+
matches: [[substringIdx, substringIdx + qLen]],
72123
};
73124
}
74125

75-
// Word boundary match — check if query matches start of words
76-
const wordBoundaryResult = scoreWordBoundary(query, lowerTarget);
126+
// Word boundary match — charCode scan, no regex, no split
127+
const wordBoundaryResult = scoreWordBoundary(query, qLen, lowerTarget, tLen);
77128
if (wordBoundaryResult) return wordBoundaryResult;
78129

79130
// Character-by-character fuzzy match
80-
return scoreFuzzy(query, lowerTarget);
131+
return scoreFuzzy(query, qLen, lowerTarget, tLen);
81132
}
82133

83134
function scoreWordBoundary(
84135
query: string,
136+
qLen: number,
85137
lowerTarget: string,
138+
tLen: number,
86139
): { score: number; matches: Array<readonly [number, number]> } | null {
87-
const words = lowerTarget.split(/[\s\-_./]+/);
88140
let queryIdx = 0;
89141
const matches: Array<readonly [number, number]> = [];
90-
let offset = 0;
142+
let totalWeightedLen = 0;
91143

92-
for (const word of words) {
93-
if (queryIdx >= query.length) break;
144+
// Scan for word starts using charCode — zero allocation
145+
let i = 0;
146+
while (i < tLen && queryIdx < qLen) {
147+
// Detect word start: position 0 or preceded by separator
148+
const atWordStart = i === 0 || isWordSeparator(lowerTarget.charCodeAt(i - 1));
94149

95-
const wordStart = lowerTarget.indexOf(word, offset);
96-
offset = wordStart + word.length;
97-
98-
if (word.startsWith(query[queryIdx]!)) {
99-
const matchStart = wordStart;
150+
if (atWordStart && lowerTarget.charCodeAt(i) === query.charCodeAt(queryIdx)) {
151+
const matchStart = i;
100152
let matchLen = 0;
101153

154+
// Consume contiguous matching characters within this word
102155
while (
103-
queryIdx < query.length &&
104-
matchLen < word.length &&
105-
word[matchLen] === query[queryIdx]
156+
queryIdx < qLen &&
157+
i < tLen &&
158+
!isWordSeparator(lowerTarget.charCodeAt(i)) &&
159+
lowerTarget.charCodeAt(i) === query.charCodeAt(queryIdx)
106160
) {
107161
queryIdx++;
162+
i++;
108163
matchLen++;
109164
}
110165

111166
matches.push([matchStart, matchStart + matchLen]);
167+
totalWeightedLen += matchLen * 2; // Word boundary matches get 2x weight
168+
} else {
169+
i++;
112170
}
113171
}
114172

115-
if (queryIdx !== query.length) return null;
116-
117-
// Use TypedArray for numeric score data — avoids boxed number allocations
118-
const scores = new Float64Array(matches.length);
119-
for (let i = 0; i < matches.length; i++) {
120-
const [start, end] = matches[i]!;
121-
scores[i] = (end - start) * 2; // Word boundary matches get 2x weight
122-
}
173+
if (queryIdx !== qLen) return null;
123174

124-
// Math.sumPrecise (ES2026) — floating-point-safe score aggregation
125-
const totalScore = Math.sumPrecise(scores);
126-
const maxPossible = query.length * 2;
175+
const maxPossible = qLen * 2;
127176

128177
return {
129-
score: 0.3 + 0.4 * (totalScore / maxPossible),
178+
score: 0.3 + 0.4 * (totalWeightedLen / maxPossible),
130179
matches,
131180
};
132181
}
133182

134183
function scoreFuzzy(
135184
query: string,
185+
qLen: number,
136186
lowerTarget: string,
187+
tLen: number,
137188
): { score: number; matches: Array<readonly [number, number]> } | null {
138189
let queryIdx = 0;
139190
let targetIdx = 0;
140191
const matches: Array<readonly [number, number]> = [];
141192
let currentMatchStart = -1;
142-
const segmentScores: number[] = [];
193+
let contiguityScore = 0;
143194

144-
while (queryIdx < query.length && targetIdx < lowerTarget.length) {
145-
if (query[queryIdx] === lowerTarget[targetIdx]) {
195+
while (queryIdx < qLen && targetIdx < tLen) {
196+
if (query.charCodeAt(queryIdx) === lowerTarget.charCodeAt(targetIdx)) {
146197
if (currentMatchStart === -1) {
147198
currentMatchStart = targetIdx;
148199
}
@@ -152,8 +203,7 @@ function scoreFuzzy(
152203
if (currentMatchStart !== -1) {
153204
const segmentLen = targetIdx - currentMatchStart;
154205
matches.push([currentMatchStart, targetIdx]);
155-
// Contiguous match bonus — adjacent characters score higher (quadratic)
156-
segmentScores.push(segmentLen * segmentLen);
206+
contiguityScore += segmentLen * segmentLen;
157207
currentMatchStart = -1;
158208
}
159209
targetIdx++;
@@ -164,23 +214,21 @@ function scoreFuzzy(
164214
if (currentMatchStart !== -1) {
165215
const segmentLen = targetIdx - currentMatchStart;
166216
matches.push([currentMatchStart, targetIdx]);
167-
segmentScores.push(segmentLen * segmentLen);
217+
contiguityScore += segmentLen * segmentLen;
168218
}
169219

170220
// All query chars must be matched
171-
if (queryIdx !== query.length) return null;
221+
if (queryIdx !== qLen) return null;
172222

173-
// Math.sumPrecise (ES2026) — avoids floating-point drift in score aggregation
174-
const contiguityScore = Math.sumPrecise(segmentScores);
175-
const maxContiguity = query.length * query.length;
223+
const maxContiguity = qLen * qLen;
176224
const contiguityRatio = contiguityScore / maxContiguity;
177225

178226
// Position bonus — matches earlier in target are better
179227
const firstMatchPos = matches[0]?.[0] ?? 0;
180-
const positionBonus = 1 - firstMatchPos / lowerTarget.length;
228+
const positionBonus = 1 - firstMatchPos / tLen;
181229

182230
// Length ratio — longer queries matching shorter targets = better
183-
const lengthRatio = query.length / lowerTarget.length;
231+
const lengthRatio = qLen / tLen;
184232

185233
const score = 0.1 + 0.3 * contiguityRatio + 0.15 * positionBonus + 0.1 * lengthRatio;
186234

0 commit comments

Comments
 (0)