Skip to content

Commit a33c3f6

Browse files
danielkovclaude
andcommitted
fix: sanitize FTS5 search queries to handle special characters
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 602fbf1 commit a33c3f6

File tree

3 files changed

+109
-4
lines changed

3 files changed

+109
-4
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
### FTS5 search no longer fails on special characters
2+
3+
Queries containing FTS5 operator characters like `-`, `+`, `*`, or keywords like `NOT`/`OR`/`AND` are now properly escaped before being passed to SQLite's FTS5 `MATCH` clause.
4+
5+
Previously, running something like `granary plan "Fix TypeScript v2 build failures - 11 root causes from SDK battery tests"` would fail because the `-` was interpreted as FTS5's NOT operator. Now, each token is individually quoted and purely-punctuation tokens (like a bare `-`) are dropped, so the search works regardless of what characters appear in the plan name.
6+
7+
Search queries also now use `OR` semantics instead of implicit `AND`, which makes prior-art matching more lenient — a project only needs to match *some* of the query terms to appear in results, rather than requiring all of them.

src/cli/plan.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,4 +431,34 @@ mod tests {
431431
"prior art should be empty when the only match is excluded"
432432
);
433433
}
434+
435+
#[tokio::test]
436+
async fn test_find_prior_art_with_special_characters() {
437+
let (pool, _temp) = setup_test_db().await;
438+
439+
let project = services::create_project(
440+
&pool,
441+
CreateProject {
442+
name: "TypeScript SDK battery tests".to_string(),
443+
..Default::default()
444+
},
445+
)
446+
.await
447+
.unwrap();
448+
449+
// Query contains `-` which is an FTS5 NOT operator when unescaped
450+
let results = find_prior_art(
451+
&pool,
452+
"Fix TypeScript v2 build failures - 11 root causes from SDK battery tests",
453+
"nonexistent-id",
454+
)
455+
.await
456+
.unwrap();
457+
458+
let result_ids: Vec<&str> = results.iter().map(|r| r.project.id.as_str()).collect();
459+
assert!(
460+
result_ids.contains(&project.id.as_str()),
461+
"search with special characters should still find matching projects"
462+
);
463+
}
434464
}

src/db/mod.rs

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,8 +1852,27 @@ pub mod search {
18521852
pub rank: f64,
18531853
}
18541854

1855+
/// Sanitize a user query for FTS5 by individually quoting each token
1856+
/// and joining them with OR for lenient matching.
1857+
/// This prevents FTS5 operators (`-`, `+`, `*`, `NOT`, `OR`, `AND`, `NEAR`)
1858+
/// from being interpreted as query syntax, and ensures queries with many
1859+
/// terms still return results when only some terms match.
1860+
fn sanitize_fts5_query(query: &str) -> String {
1861+
query
1862+
.split_whitespace()
1863+
.filter(|token| token.chars().any(|c| c.is_alphanumeric()))
1864+
.map(|token| {
1865+
// Escape any double quotes inside the token, then wrap in quotes
1866+
let escaped = token.replace('"', "\"\"");
1867+
format!("\"{escaped}\"")
1868+
})
1869+
.collect::<Vec<_>>()
1870+
.join(" OR ")
1871+
}
1872+
18551873
/// Search all entity types using FTS5 full-text search with BM25 ranking
18561874
pub async fn search_all(pool: &SqlitePool, query: &str, limit: i32) -> Result<Vec<FtsMatch>> {
1875+
let sanitized = sanitize_fts5_query(query);
18571876
let rows = sqlx::query_as::<_, FtsMatch>(
18581877
r#"
18591878
SELECT entity_type, entity_id, rank
@@ -1863,7 +1882,7 @@ pub mod search {
18631882
LIMIT ?
18641883
"#,
18651884
)
1866-
.bind(query)
1885+
.bind(&sanitized)
18671886
.bind(limit)
18681887
.fetch_all(pool)
18691888
.await?;
@@ -1872,6 +1891,7 @@ pub mod search {
18721891

18731892
/// Search projects using FTS5 full-text search
18741893
pub async fn search_projects(pool: &SqlitePool, query: &str) -> Result<Vec<Project>> {
1894+
let sanitized = sanitize_fts5_query(query);
18751895
let ids = sqlx::query_scalar::<_, String>(
18761896
r#"
18771897
SELECT entity_id
@@ -1881,7 +1901,7 @@ pub mod search {
18811901
LIMIT 50
18821902
"#,
18831903
)
1884-
.bind(query)
1904+
.bind(&sanitized)
18851905
.fetch_all(pool)
18861906
.await?;
18871907

@@ -1901,6 +1921,7 @@ pub mod search {
19011921

19021922
/// Search tasks using FTS5 full-text search
19031923
pub async fn search_tasks(pool: &SqlitePool, query: &str) -> Result<Vec<Task>> {
1924+
let sanitized = sanitize_fts5_query(query);
19041925
let ids = sqlx::query_scalar::<_, String>(
19051926
r#"
19061927
SELECT entity_id
@@ -1910,7 +1931,7 @@ pub mod search {
19101931
LIMIT 50
19111932
"#,
19121933
)
1913-
.bind(query)
1934+
.bind(&sanitized)
19141935
.fetch_all(pool)
19151936
.await?;
19161937

@@ -1933,6 +1954,7 @@ pub mod search {
19331954
pool: &SqlitePool,
19341955
query: &str,
19351956
) -> Result<Vec<crate::models::Initiative>> {
1957+
let sanitized = sanitize_fts5_query(query);
19361958
let ids = sqlx::query_scalar::<_, String>(
19371959
r#"
19381960
SELECT entity_id
@@ -1942,7 +1964,7 @@ pub mod search {
19421964
LIMIT 50
19431965
"#,
19441966
)
1945-
.bind(query)
1967+
.bind(&sanitized)
19461968
.fetch_all(pool)
19471969
.await?;
19481970

@@ -1959,6 +1981,52 @@ pub mod search {
19591981
.await?;
19601982
Ok(initiatives)
19611983
}
1984+
1985+
#[cfg(test)]
1986+
mod tests {
1987+
use super::*;
1988+
1989+
#[test]
1990+
fn sanitize_plain_words() {
1991+
assert_eq!(sanitize_fts5_query("hello world"), r#""hello" OR "world""#);
1992+
}
1993+
1994+
#[test]
1995+
fn sanitize_dash_operator() {
1996+
// The bare `-` is dropped (no alphanumeric chars)
1997+
assert_eq!(
1998+
sanitize_fts5_query("Fix TypeScript v2 build failures - 11 root causes"),
1999+
r#""Fix" OR "TypeScript" OR "v2" OR "build" OR "failures" OR "11" OR "root" OR "causes""#
2000+
);
2001+
}
2002+
2003+
#[test]
2004+
fn sanitize_embedded_quotes() {
2005+
assert_eq!(
2006+
sanitize_fts5_query(r#"say "hello""#),
2007+
"\"say\" OR \"\"\"hello\"\"\"",
2008+
);
2009+
}
2010+
2011+
#[test]
2012+
fn sanitize_empty_query() {
2013+
assert_eq!(sanitize_fts5_query(""), "");
2014+
}
2015+
2016+
#[test]
2017+
fn sanitize_single_word() {
2018+
assert_eq!(sanitize_fts5_query("auth"), r#""auth""#);
2019+
}
2020+
2021+
#[test]
2022+
fn sanitize_various_operators() {
2023+
// NOT, OR, AND are FTS5 operators — quoting neutralises them
2024+
assert_eq!(
2025+
sanitize_fts5_query("NOT this OR that"),
2026+
r#""NOT" OR "this" OR "OR" OR "that""#
2027+
);
2028+
}
2029+
}
19622030
}
19632031

19642032
/// Database operations for getting next tasks across an initiative

0 commit comments

Comments
 (0)