Skip to content

Commit 01eb36d

Browse files
yiwangclaude
andauthored
fix: improve memory search with token AND matching and rank-based scoring (#10)
Replace phrase-match FTS queries with tokenized AND-joined queries so searches like "database concurrency locks" match chunks containing all three words in any order, not just as an exact consecutive phrase. Replace max-normalization scoring in hybrid search with rank-based scoring (1/(1+rank)) for both FTS and vector results, preventing pathological cases where a single weak match dominates strong results. https://claude.ai/code/session_0193Gh5J9fEu65cyKyzUp5vT Co-authored-by: Claude <noreply@anthropic.com>
1 parent 5450611 commit 01eb36d

1 file changed

Lines changed: 35 additions & 25 deletions

File tree

src/memory/index.rs

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -337,8 +337,10 @@ impl MemoryIndex {
337337

338338
/// Search using FTS5
339339
pub fn search(&self, query: &str, limit: usize) -> Result<Vec<MemoryChunk>> {
340-
// Escape special FTS5 characters
341-
let escaped_query = escape_fts_query(query);
340+
let fts_query = match build_fts_query(query) {
341+
Some(q) => q,
342+
None => return Ok(Vec::new()),
343+
};
342344

343345
let conn = self
344346
.conn
@@ -356,7 +358,7 @@ impl MemoryIndex {
356358
"#,
357359
)?;
358360

359-
let rows = stmt.query_map(params![&escaped_query, limit as i64], |row| {
361+
let rows = stmt.query_map(params![&fts_query, limit as i64], |row| {
360362
Ok(MemoryChunk {
361363
file: row.get(0)?,
362364
line_start: row.get(1)?,
@@ -870,32 +872,24 @@ impl MemoryIndex {
870872
Vec::new()
871873
};
872874

873-
// Merge results using weighted scores
875+
// Merge results using rank-based scoring (OpenClaw-compatible)
874876
let mut merged: std::collections::HashMap<String, (f32, MemoryChunk)> =
875877
std::collections::HashMap::new();
876878

877-
// Add FTS results (normalize BM25 score to 0-1 range)
878-
let max_fts_score = fts_results
879-
.iter()
880-
.map(|r| r.score)
881-
.fold(0.0f64, |a, b| a.max(b));
882-
let max_fts_score = if max_fts_score > 0.0 {
883-
max_fts_score
884-
} else {
885-
1.0
886-
};
887-
888-
for result in fts_results {
879+
// Add FTS results using rank-based scoring (OpenClaw-compatible)
880+
// BM25 results are already ordered by relevance (best first)
881+
for (rank, result) in fts_results.into_iter().enumerate() {
889882
let key = format!("{}:{}:{}", result.file, result.line_start, result.line_end);
890-
let normalized_score = (result.score / max_fts_score) as f32;
891-
let weighted_score = normalized_score * text_weight;
883+
let rank_score = 1.0 / (1.0 + rank as f32); // rank 0 → 1.0, rank 1 → 0.5, rank 9 → 0.1
884+
let weighted_score = rank_score * text_weight;
892885
merged.insert(key, (weighted_score, result));
893886
}
894887

895-
// Add/merge vector results
896-
for result in vector_results {
888+
// Add/merge vector results using rank-based scoring
889+
for (rank, result) in vector_results.into_iter().enumerate() {
897890
let key = format!("{}:{}:{}", result.file, result.line_start, result.line_end);
898-
let weighted_score = result.score as f32 * vector_weight;
891+
let rank_score = 1.0 / (1.0 + rank as f32);
892+
let weighted_score = rank_score * vector_weight;
899893

900894
if let Some((existing_score, existing_chunk)) = merged.get_mut(&key) {
901895
*existing_score += weighted_score;
@@ -950,10 +944,26 @@ fn embedding_to_blob(embedding: &[f32]) -> Vec<u8> {
950944
blob
951945
}
952946

953-
fn escape_fts_query(query: &str) -> String {
954-
// Wrap in quotes to treat as phrase, escape internal quotes
955-
let escaped = query.replace('"', "\"\"");
956-
format!("\"{}\"", escaped)
947+
/// Build FTS5 query from raw input (OpenClaw-compatible)
948+
/// Tokenizes input and joins with AND so all terms must appear (in any order)
949+
fn build_fts_query(raw: &str) -> Option<String> {
950+
let tokens: Vec<&str> = raw
951+
.split(|c: char| !c.is_alphanumeric() && c != '_')
952+
.map(|t| t.trim())
953+
.filter(|t| !t.is_empty())
954+
.collect();
955+
956+
if tokens.is_empty() {
957+
return None;
958+
}
959+
960+
// Quote each token individually, join with AND
961+
let quoted: Vec<String> = tokens
962+
.iter()
963+
.map(|t| format!("\"{}\"", t.replace('"', "")))
964+
.collect();
965+
966+
Some(quoted.join(" AND "))
957967
}
958968

959969
struct ChunkInfo {

0 commit comments

Comments
 (0)