diff --git a/gateway/mirror.py b/gateway/mirror.py index 8c2f39983..8ee39f4a7 100644 --- a/gateway/mirror.py +++ b/gateway/mirror.py @@ -111,6 +111,7 @@ def _append_to_jsonl(session_id: str, message: dict) -> None: def _append_to_sqlite(session_id: str, message: dict) -> None: """Append a message to the SQLite session database.""" + db = None try: from hermes_state import SessionDB db = SessionDB() @@ -121,3 +122,6 @@ def _append_to_sqlite(session_id: str, message: dict) -> None: ) except Exception as e: logger.debug("Mirror SQLite write failed: %s", e) + finally: + if db is not None: + db.close() diff --git a/hermes_state.py b/hermes_state.py index 1d1f951c0..eadabf099 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -16,6 +16,7 @@ import json import os +import re import sqlite3 import time from pathlib import Path @@ -322,6 +323,32 @@ def get_messages_as_conversation(self, session_id: str) -> List[Dict[str, Any]]: # Search # ========================================================================= + @staticmethod + def _sanitize_fts5_query(query: str) -> str: + """Sanitize user input for safe use in FTS5 MATCH queries. + + FTS5 has its own query syntax where characters like ``"``, ``(``, ``)``, + ``+``, ``*``, ``{``, ``}`` and bare boolean operators (``AND``, ``OR``, + ``NOT``) have special meaning. Passing raw user input directly to + MATCH can cause ``sqlite3.OperationalError``. + + Strategy: strip characters that are only meaningful as FTS5 operators + and would otherwise cause syntax errors. This preserves normal keyword + search while preventing crashes on inputs like ``C++``, ``"unterminated``, + or ``hello AND``. + """ + # Remove FTS5-special characters that are not useful in keyword search + sanitized = re.sub(r'[+{}()"^]', " ", query) + # Collapse repeated * (e.g. "***") into a single one, and remove + # leading * (prefix-only matching requires at least one char before *) + sanitized = re.sub(r"\*+", "*", sanitized) + sanitized = re.sub(r"(^|\s)\*", r"\1", sanitized) + # Remove dangling boolean operators at start/end that would cause + # syntax errors (e.g. "hello AND" or "OR world") + sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip()) + sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip()) + return sanitized.strip() + def search_messages( self, query: str, @@ -345,6 +372,10 @@ def search_messages( if not query or not query.strip(): return [] + query = self._sanitize_fts5_query(query) + if not query: + return [] + if source_filter is None: source_filter = ["cli", "telegram", "discord", "whatsapp", "slack"] @@ -384,7 +415,11 @@ def search_messages( LIMIT ? OFFSET ? """ - cursor = self._conn.execute(sql, params) + try: + cursor = self._conn.execute(sql, params) + except sqlite3.OperationalError: + # FTS5 query syntax error despite sanitization — return empty + return [] matches = [dict(row) for row in cursor.fetchall()] # Add surrounding context (1 message before + after each match) diff --git a/tests/gateway/test_mirror.py b/tests/gateway/test_mirror.py index efd652188..928f4eac2 100644 --- a/tests/gateway/test_mirror.py +++ b/tests/gateway/test_mirror.py @@ -160,3 +160,27 @@ def test_error_returns_false(self, tmp_path): result = mirror_to_session("telegram", "123", "msg") assert result is False + + +class TestAppendToSqlite: + def test_connection_is_closed_after_use(self, tmp_path): + """Verify _append_to_sqlite closes the SessionDB connection.""" + from gateway.mirror import _append_to_sqlite + mock_db = MagicMock() + + with patch("hermes_state.SessionDB", return_value=mock_db): + _append_to_sqlite("sess_1", {"role": "assistant", "content": "hello"}) + + mock_db.append_message.assert_called_once() + mock_db.close.assert_called_once() + + def test_connection_closed_even_on_error(self, tmp_path): + """Verify connection is closed even when append_message raises.""" + from gateway.mirror import _append_to_sqlite + mock_db = MagicMock() + mock_db.append_message.side_effect = Exception("db error") + + with patch("hermes_state.SessionDB", return_value=mock_db): + _append_to_sqlite("sess_1", {"role": "assistant", "content": "hello"}) + + mock_db.close.assert_called_once() diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index 734db494f..d0bfd0f06 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -179,6 +179,54 @@ def test_search_returns_context(self, db): assert isinstance(results[0]["context"], list) assert len(results[0]["context"]) > 0 + def test_search_special_chars_do_not_crash(self, db): + """FTS5 special characters in queries must not raise OperationalError.""" + db.create_session(session_id="s1", source="cli") + db.append_message("s1", role="user", content="How do I use C++ templates?") + + # Each of these previously caused sqlite3.OperationalError + dangerous_queries = [ + 'C++', # + is FTS5 column filter + '"unterminated', # unbalanced double-quote + '(problem', # unbalanced parenthesis + 'hello AND', # dangling boolean operator + '***', # repeated wildcard + '{test}', # curly braces (column reference) + 'OR hello', # leading boolean operator + 'a AND OR b', # adjacent operators + ] + for query in dangerous_queries: + # Must not raise — should return list (possibly empty) + results = db.search_messages(query) + assert isinstance(results, list), f"Query {query!r} did not return a list" + + def test_search_sanitized_query_still_finds_content(self, db): + """Sanitization must not break normal keyword search.""" + db.create_session(session_id="s1", source="cli") + db.append_message("s1", role="user", content="Learning C++ templates today") + + # "C++" sanitized to "C" should still match "C++" + results = db.search_messages("C++") + # The word "C" appears in the content, so FTS5 should find it + assert isinstance(results, list) + + def test_sanitize_fts5_query_strips_dangerous_chars(self): + """Unit test for _sanitize_fts5_query static method.""" + from hermes_state import SessionDB + s = SessionDB._sanitize_fts5_query + assert s('hello world') == 'hello world' + assert '+' not in s('C++') + assert '"' not in s('"unterminated') + assert '(' not in s('(problem') + assert '{' not in s('{test}') + # Dangling operators removed + assert s('hello AND') == 'hello' + assert s('OR world') == 'world' + # Leading bare * removed + assert s('***') == '' + # Valid prefix kept + assert s('deploy*') == 'deploy*' + # ========================================================================= # Session search and listing