From 33cfe1515dc2312aa306cce97e1de473119b01b2 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Sat, 7 Mar 2026 04:24:45 +0300 Subject: [PATCH] fix: sanitize FTS5 queries and close mirror DB connections Two bugs fixed: 1. search_messages() crashes with OperationalError when user queries contain FTS5 special characters (+, ", (, {, dangling AND/OR, etc). Added _sanitize_fts5_query() to strip dangerous operators and a fallback try-except for edge cases. 2. _append_to_sqlite() in mirror.py creates a new SessionDB per call but never closes it, leaking SQLite connections. Added finally block to ensure db.close() is always called. --- gateway/mirror.py | 4 +++ hermes_state.py | 37 ++++++++++++++++++++++++++- tests/gateway/test_mirror.py | 24 ++++++++++++++++++ tests/test_hermes_state.py | 48 ++++++++++++++++++++++++++++++++++++ 4 files changed, 112 insertions(+), 1 deletion(-) diff --git a/gateway/mirror.py b/gateway/mirror.py index 8c2f39983..8ee39f4a7 100644 --- a/gateway/mirror.py +++ b/gateway/mirror.py @@ -111,6 +111,7 @@ def _append_to_jsonl(session_id: str, message: dict) -> None: def _append_to_sqlite(session_id: str, message: dict) -> None: """Append a message to the SQLite session database.""" + db = None try: from hermes_state import SessionDB db = SessionDB() @@ -121,3 +122,6 @@ def _append_to_sqlite(session_id: str, message: dict) -> None: ) except Exception as e: logger.debug("Mirror SQLite write failed: %s", e) + finally: + if db is not None: + db.close() diff --git a/hermes_state.py b/hermes_state.py index 1d1f951c0..eadabf099 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -16,6 +16,7 @@ Key design decisions: import json import os +import re import sqlite3 import time from pathlib import Path @@ -322,6 +323,32 @@ class SessionDB: # Search # ========================================================================= + @staticmethod + def _sanitize_fts5_query(query: str) -> str: + """Sanitize user input for safe use in FTS5 MATCH queries. + + FTS5 has its own query syntax where characters like ``"``, ``(``, ``)``, + ``+``, ``*``, ``{``, ``}`` and bare boolean operators (``AND``, ``OR``, + ``NOT``) have special meaning. Passing raw user input directly to + MATCH can cause ``sqlite3.OperationalError``. + + Strategy: strip characters that are only meaningful as FTS5 operators + and would otherwise cause syntax errors. This preserves normal keyword + search while preventing crashes on inputs like ``C++``, ``"unterminated``, + or ``hello AND``. + """ + # Remove FTS5-special characters that are not useful in keyword search + sanitized = re.sub(r'[+{}()"^]', " ", query) + # Collapse repeated * (e.g. "***") into a single one, and remove + # leading * (prefix-only matching requires at least one char before *) + sanitized = re.sub(r"\*+", "*", sanitized) + sanitized = re.sub(r"(^|\s)\*", r"\1", sanitized) + # Remove dangling boolean operators at start/end that would cause + # syntax errors (e.g. "hello AND" or "OR world") + sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip()) + sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip()) + return sanitized.strip() + def search_messages( self, query: str, @@ -345,6 +372,10 @@ class SessionDB: if not query or not query.strip(): return [] + query = self._sanitize_fts5_query(query) + if not query: + return [] + if source_filter is None: source_filter = ["cli", "telegram", "discord", "whatsapp", "slack"] @@ -384,7 +415,11 @@ class SessionDB: LIMIT ? OFFSET ? """ - cursor = self._conn.execute(sql, params) + try: + cursor = self._conn.execute(sql, params) + except sqlite3.OperationalError: + # FTS5 query syntax error despite sanitization — return empty + return [] matches = [dict(row) for row in cursor.fetchall()] # Add surrounding context (1 message before + after each match) diff --git a/tests/gateway/test_mirror.py b/tests/gateway/test_mirror.py index efd652188..928f4eac2 100644 --- a/tests/gateway/test_mirror.py +++ b/tests/gateway/test_mirror.py @@ -160,3 +160,27 @@ class TestMirrorToSession: result = mirror_to_session("telegram", "123", "msg") assert result is False + + +class TestAppendToSqlite: + def test_connection_is_closed_after_use(self, tmp_path): + """Verify _append_to_sqlite closes the SessionDB connection.""" + from gateway.mirror import _append_to_sqlite + mock_db = MagicMock() + + with patch("hermes_state.SessionDB", return_value=mock_db): + _append_to_sqlite("sess_1", {"role": "assistant", "content": "hello"}) + + mock_db.append_message.assert_called_once() + mock_db.close.assert_called_once() + + def test_connection_closed_even_on_error(self, tmp_path): + """Verify connection is closed even when append_message raises.""" + from gateway.mirror import _append_to_sqlite + mock_db = MagicMock() + mock_db.append_message.side_effect = Exception("db error") + + with patch("hermes_state.SessionDB", return_value=mock_db): + _append_to_sqlite("sess_1", {"role": "assistant", "content": "hello"}) + + mock_db.close.assert_called_once() diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index 734db494f..d0bfd0f06 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -179,6 +179,54 @@ class TestFTS5Search: assert isinstance(results[0]["context"], list) assert len(results[0]["context"]) > 0 + def test_search_special_chars_do_not_crash(self, db): + """FTS5 special characters in queries must not raise OperationalError.""" + db.create_session(session_id="s1", source="cli") + db.append_message("s1", role="user", content="How do I use C++ templates?") + + # Each of these previously caused sqlite3.OperationalError + dangerous_queries = [ + 'C++', # + is FTS5 column filter + '"unterminated', # unbalanced double-quote + '(problem', # unbalanced parenthesis + 'hello AND', # dangling boolean operator + '***', # repeated wildcard + '{test}', # curly braces (column reference) + 'OR hello', # leading boolean operator + 'a AND OR b', # adjacent operators + ] + for query in dangerous_queries: + # Must not raise — should return list (possibly empty) + results = db.search_messages(query) + assert isinstance(results, list), f"Query {query!r} did not return a list" + + def test_search_sanitized_query_still_finds_content(self, db): + """Sanitization must not break normal keyword search.""" + db.create_session(session_id="s1", source="cli") + db.append_message("s1", role="user", content="Learning C++ templates today") + + # "C++" sanitized to "C" should still match "C++" + results = db.search_messages("C++") + # The word "C" appears in the content, so FTS5 should find it + assert isinstance(results, list) + + def test_sanitize_fts5_query_strips_dangerous_chars(self): + """Unit test for _sanitize_fts5_query static method.""" + from hermes_state import SessionDB + s = SessionDB._sanitize_fts5_query + assert s('hello world') == 'hello world' + assert '+' not in s('C++') + assert '"' not in s('"unterminated') + assert '(' not in s('(problem') + assert '{' not in s('{test}') + # Dangling operators removed + assert s('hello AND') == 'hello' + assert s('OR world') == 'world' + # Leading bare * removed + assert s('***') == '' + # Valid prefix kept + assert s('deploy*') == 'deploy*' + # ========================================================================= # Session search and listing