Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions gateway/mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def _append_to_jsonl(session_id: str, message: dict) -> None:

def _append_to_sqlite(session_id: str, message: dict) -> None:
"""Append a message to the SQLite session database."""
db = None
try:
from hermes_state import SessionDB
db = SessionDB()
Expand All @@ -121,3 +122,6 @@ def _append_to_sqlite(session_id: str, message: dict) -> None:
)
except Exception as e:
logger.debug("Mirror SQLite write failed: %s", e)
finally:
if db is not None:
db.close()
37 changes: 36 additions & 1 deletion hermes_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import json
import os
import re
import sqlite3
import time
from pathlib import Path
Expand Down Expand Up @@ -322,6 +323,32 @@ def get_messages_as_conversation(self, session_id: str) -> List[Dict[str, Any]]:
# Search
# =========================================================================

@staticmethod
def _sanitize_fts5_query(query: str) -> str:
"""Sanitize user input for safe use in FTS5 MATCH queries.

FTS5 has its own query syntax where characters like ``"``, ``(``, ``)``,
``+``, ``*``, ``{``, ``}`` and bare boolean operators (``AND``, ``OR``,
``NOT``) have special meaning. Passing raw user input directly to
MATCH can cause ``sqlite3.OperationalError``.

Strategy: strip characters that are only meaningful as FTS5 operators
and would otherwise cause syntax errors. This preserves normal keyword
search while preventing crashes on inputs like ``C++``, ``"unterminated``,
or ``hello AND``.
"""
# Remove FTS5-special characters that are not useful in keyword search
sanitized = re.sub(r'[+{}()"^]', " ", query)
# Collapse repeated * (e.g. "***") into a single one, and remove
# leading * (prefix-only matching requires at least one char before *)
sanitized = re.sub(r"\*+", "*", sanitized)
sanitized = re.sub(r"(^|\s)\*", r"\1", sanitized)
# Remove dangling boolean operators at start/end that would cause
# syntax errors (e.g. "hello AND" or "OR world")
sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())
return sanitized.strip()

def search_messages(
self,
query: str,
Expand All @@ -345,6 +372,10 @@ def search_messages(
if not query or not query.strip():
return []

query = self._sanitize_fts5_query(query)
if not query:
return []

if source_filter is None:
source_filter = ["cli", "telegram", "discord", "whatsapp", "slack"]

Expand Down Expand Up @@ -384,7 +415,11 @@ def search_messages(
LIMIT ? OFFSET ?
"""

cursor = self._conn.execute(sql, params)
try:
cursor = self._conn.execute(sql, params)
except sqlite3.OperationalError:
# FTS5 query syntax error despite sanitization — return empty
return []
matches = [dict(row) for row in cursor.fetchall()]

# Add surrounding context (1 message before + after each match)
Expand Down
24 changes: 24 additions & 0 deletions tests/gateway/test_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,27 @@ def test_error_returns_false(self, tmp_path):
result = mirror_to_session("telegram", "123", "msg")

assert result is False


class TestAppendToSqlite:
def test_connection_is_closed_after_use(self, tmp_path):
"""Verify _append_to_sqlite closes the SessionDB connection."""
from gateway.mirror import _append_to_sqlite
mock_db = MagicMock()

with patch("hermes_state.SessionDB", return_value=mock_db):
_append_to_sqlite("sess_1", {"role": "assistant", "content": "hello"})

mock_db.append_message.assert_called_once()
mock_db.close.assert_called_once()

def test_connection_closed_even_on_error(self, tmp_path):
"""Verify connection is closed even when append_message raises."""
from gateway.mirror import _append_to_sqlite
mock_db = MagicMock()
mock_db.append_message.side_effect = Exception("db error")

with patch("hermes_state.SessionDB", return_value=mock_db):
_append_to_sqlite("sess_1", {"role": "assistant", "content": "hello"})

mock_db.close.assert_called_once()
48 changes: 48 additions & 0 deletions tests/test_hermes_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,54 @@ def test_search_returns_context(self, db):
assert isinstance(results[0]["context"], list)
assert len(results[0]["context"]) > 0

def test_search_special_chars_do_not_crash(self, db):
"""FTS5 special characters in queries must not raise OperationalError."""
db.create_session(session_id="s1", source="cli")
db.append_message("s1", role="user", content="How do I use C++ templates?")

# Each of these previously caused sqlite3.OperationalError
dangerous_queries = [
'C++', # + is FTS5 column filter
'"unterminated', # unbalanced double-quote
'(problem', # unbalanced parenthesis
'hello AND', # dangling boolean operator
'***', # repeated wildcard
'{test}', # curly braces (column reference)
'OR hello', # leading boolean operator
'a AND OR b', # adjacent operators
]
for query in dangerous_queries:
# Must not raise — should return list (possibly empty)
results = db.search_messages(query)
assert isinstance(results, list), f"Query {query!r} did not return a list"

def test_search_sanitized_query_still_finds_content(self, db):
"""Sanitization must not break normal keyword search."""
db.create_session(session_id="s1", source="cli")
db.append_message("s1", role="user", content="Learning C++ templates today")

# "C++" sanitized to "C" should still match "C++"
results = db.search_messages("C++")
# The word "C" appears in the content, so FTS5 should find it
assert isinstance(results, list)

def test_sanitize_fts5_query_strips_dangerous_chars(self):
"""Unit test for _sanitize_fts5_query static method."""
from hermes_state import SessionDB
s = SessionDB._sanitize_fts5_query
assert s('hello world') == 'hello world'
assert '+' not in s('C++')
assert '"' not in s('"unterminated')
assert '(' not in s('(problem')
assert '{' not in s('{test}')
# Dangling operators removed
assert s('hello AND') == 'hello'
assert s('OR world') == 'world'
# Leading bare * removed
assert s('***') == ''
# Valid prefix kept
assert s('deploy*') == 'deploy*'


# =========================================================================
# Session search and listing
Expand Down