|
| 1 | +export interface FindResultItem { |
| 2 | + uri: string |
| 3 | + score?: number |
| 4 | + category?: string |
| 5 | + abstract?: string |
| 6 | + overview?: string |
| 7 | + level?: number |
| 8 | +} |
| 9 | + |
| 10 | +export interface RecallQueryProfile { |
| 11 | + tokens: string[] |
| 12 | + wantsPreference: boolean |
| 13 | + wantsTemporal: boolean |
| 14 | +} |
| 15 | + |
| 16 | +const LEAF_BOOST = 0.12 |
| 17 | +const EVENT_BOOST = 0.10 |
| 18 | +const PREFERENCE_BOOST = 0.08 |
| 19 | +const OVERLAP_BOOST_MAX = 0.20 |
| 20 | +const OVERLAP_TOKEN_MAX = 8 |
| 21 | +const OVERLAP_DENOM_CAP = 4 |
| 22 | + |
| 23 | +const PREFERENCE_QUERY_RE = /prefer|favorite|favourite|like|偏好|喜欢|爱好|更倾向/i |
| 24 | +const TEMPORAL_QUERY_RE = /when|what time|date|day|month|year|yesterday|today|tomorrow|last|next|什么时候|何时|哪天|几月|几年|昨天|今天|明天|上周|下周|上个月|下个月|去年|明年/i |
| 25 | + |
| 26 | +const TOKEN_REGEX = /[a-z0-9]{2,}/gi |
| 27 | +const STOPWORDS = new Set([ |
| 28 | + 'what', |
| 29 | + 'when', |
| 30 | + 'where', |
| 31 | + 'which', |
| 32 | + 'who', |
| 33 | + 'whom', |
| 34 | + 'whose', |
| 35 | + 'why', |
| 36 | + 'how', |
| 37 | + 'did', |
| 38 | + 'does', |
| 39 | + 'is', |
| 40 | + 'are', |
| 41 | + 'was', |
| 42 | + 'were', |
| 43 | + 'the', |
| 44 | + 'and', |
| 45 | + 'for', |
| 46 | + 'with', |
| 47 | + 'from', |
| 48 | + 'that', |
| 49 | + 'this', |
| 50 | + 'your', |
| 51 | + 'you', |
| 52 | +]) |
| 53 | + |
| 54 | +function clampScore(value: number | undefined): number { |
| 55 | + if (typeof value !== 'number' || Number.isNaN(value)) { |
| 56 | + return 0 |
| 57 | + } |
| 58 | + return Math.max(0, Math.min(1, value)) |
| 59 | +} |
| 60 | + |
| 61 | +function isLeafLikeMemory(item: FindResultItem): boolean { |
| 62 | + return item.level === 2 |
| 63 | +} |
| 64 | + |
| 65 | +function isEventMemory(item: FindResultItem): boolean { |
| 66 | + const category = (item.category ?? '').toLowerCase() |
| 67 | + return category === 'events' || item.uri.includes('/events/') |
| 68 | +} |
| 69 | + |
| 70 | +function isPreferencesMemory(item: FindResultItem): boolean { |
| 71 | + return ( |
| 72 | + item.category === 'preferences' |
| 73 | + || item.uri.includes('/preferences/') |
| 74 | + || item.uri.endsWith('/preferences') |
| 75 | + ) |
| 76 | +} |
| 77 | + |
| 78 | +function lexicalOverlapBoost(tokens: string[], text: string): number { |
| 79 | + if (tokens.length === 0 || !text) { |
| 80 | + return 0 |
| 81 | + } |
| 82 | + const haystack = ` ${text.toLowerCase()} ` |
| 83 | + let matched = 0 |
| 84 | + for (const token of tokens.slice(0, OVERLAP_TOKEN_MAX)) { |
| 85 | + if (haystack.includes(` ${token} `) || haystack.includes(token)) { |
| 86 | + matched += 1 |
| 87 | + } |
| 88 | + } |
| 89 | + return Math.min(OVERLAP_BOOST_MAX, (matched / Math.min(tokens.length, OVERLAP_DENOM_CAP)) * OVERLAP_BOOST_MAX) |
| 90 | +} |
| 91 | + |
| 92 | +export function buildRecallQueryProfile(query: string): RecallQueryProfile { |
| 93 | + const lower = query.toLowerCase() |
| 94 | + const tokens: string[] = [] |
| 95 | + const rawTokens = lower.match(TOKEN_REGEX) ?? [] |
| 96 | + for (const token of rawTokens) { |
| 97 | + if (!STOPWORDS.has(token)) { |
| 98 | + tokens.push(token) |
| 99 | + } |
| 100 | + } |
| 101 | + return { |
| 102 | + tokens, |
| 103 | + wantsPreference: PREFERENCE_QUERY_RE.test(query), |
| 104 | + wantsTemporal: TEMPORAL_QUERY_RE.test(query), |
| 105 | + } |
| 106 | +} |
| 107 | + |
| 108 | +export function rankForInjection(item: FindResultItem, profile: RecallQueryProfile): number { |
| 109 | + const baseScore = clampScore(item.score) |
| 110 | + const leafBoost = isLeafLikeMemory(item) ? LEAF_BOOST : 0 |
| 111 | + const eventBoost = profile.wantsTemporal && isEventMemory(item) ? EVENT_BOOST : 0 |
| 112 | + const preferenceBoost = profile.wantsPreference && isPreferencesMemory(item) ? PREFERENCE_BOOST : 0 |
| 113 | + const abstract = item.abstract ?? item.overview ?? '' |
| 114 | + const textForOverlap = `${item.uri} ${abstract}` |
| 115 | + const overlapBoost = lexicalOverlapBoost(profile.tokens, textForOverlap) |
| 116 | + return baseScore + leafBoost + eventBoost + preferenceBoost + overlapBoost |
| 117 | +} |
| 118 | + |
| 119 | +export function pickMemoriesForInjection( |
| 120 | + items: FindResultItem[], |
| 121 | + limit: number, |
| 122 | + queryText: string, |
| 123 | + scoreThreshold = 0, |
| 124 | +): FindResultItem[] { |
| 125 | + if (items.length === 0 || limit <= 0) { |
| 126 | + return [] |
| 127 | + } |
| 128 | + const profile = buildRecallQueryProfile(queryText) |
| 129 | + const scored: { item: FindResultItem, score: number }[] = [] |
| 130 | + for (const item of items) { |
| 131 | + scored.push({ item, score: rankForInjection(item, profile) }) |
| 132 | + } |
| 133 | + scored.sort((a, b) => b.score - a.score) |
| 134 | + const seen = new Set<string>() |
| 135 | + const deduped: FindResultItem[] = [] |
| 136 | + for (const { item } of scored) { |
| 137 | + const abstractKey = (item.abstract ?? item.overview ?? '').trim().toLowerCase() |
| 138 | + const key = abstractKey || item.uri |
| 139 | + if (seen.has(key)) { |
| 140 | + continue |
| 141 | + } |
| 142 | + seen.add(key) |
| 143 | + deduped.push(item) |
| 144 | + } |
| 145 | + const leaves: FindResultItem[] = [] |
| 146 | + const nonLeaves: FindResultItem[] = [] |
| 147 | + for (const item of deduped) { |
| 148 | + if (isLeafLikeMemory(item)) { |
| 149 | + leaves.push(item) |
| 150 | + } |
| 151 | + else { |
| 152 | + nonLeaves.push(item) |
| 153 | + } |
| 154 | + } |
| 155 | + if (leaves.length >= limit) { |
| 156 | + return leaves.slice(0, limit) |
| 157 | + } |
| 158 | + const result = [...leaves] |
| 159 | + for (const item of nonLeaves) { |
| 160 | + if (result.length >= limit) { |
| 161 | + break |
| 162 | + } |
| 163 | + if (clampScore(item.score) >= scoreThreshold) { |
| 164 | + result.push(item) |
| 165 | + } |
| 166 | + } |
| 167 | + return result |
| 168 | +} |
0 commit comments