fix(state): quote dotted terms in FTS5 queries

FTS5 queries containing dots (e.g. P2.2, simulate.p2.test.ts) can trigger query parse edge cases that yield OperationalError or empty results unless quoted. Extend _sanitize_fts5_query to wrap dotted tokens in double quotes (similar to hyphenated terms) and add regression tests.
This commit is contained in:
Lume
2026-03-31 14:52:41 +01:00
committed by Teknium
parent c4e626b1fa
commit 9825cd7b1e
2 changed files with 38 additions and 1 deletions

View File

@@ -1035,7 +1035,13 @@ class SessionDB:
sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())
# Step 5: Wrap unquoted hyphenated terms (e.g. ``chat-send``) in
# Step 5: Wrap unquoted dotted terms (e.g. ``P2.2``, ``simulate.p2.test.ts``)
# in double quotes. In practice, FTS5 query parsing can treat dots as
# syntax boundaries, which may produce OperationalError or zero results.
# Quoting forces phrase semantics and avoids query parse edge cases.
sanitized = re.sub(r"\b([\w-]+(?:\.[\w-]+)+)\b", r'"\1"', sanitized)
# Step 6: Wrap unquoted hyphenated terms (e.g. ``chat-send``) in
# double quotes. FTS5's tokenizer splits on hyphens, turning
# ``chat-send`` into ``chat AND send``. Quoting preserves the
# intended phrase match.

View File

@@ -376,6 +376,20 @@ class TestFTS5Search:
assert any("chat-send" in (r.get("snippet") or r.get("content", "")).lower()
for r in results)
def test_search_dotted_term_does_not_crash(self, db):
"""Dotted terms like 'P2.2' or 'simulate.p2.test.ts' should not crash FTS5."""
db.create_session(session_id="s1", source="cli")
db.append_message("s1", role="user", content="Working on P2.2 session_search edge cases")
db.append_message("s1", role="assistant", content="See simulate.p2.test.ts for details")
results = db.search_messages("P2.2")
assert isinstance(results, list)
assert len(results) >= 1
results2 = db.search_messages("simulate.p2.test.ts")
assert isinstance(results2, list)
assert len(results2) >= 1
def test_search_quoted_phrase_preserved(self, db):
"""User-provided quoted phrases should be preserved for exact matching."""
db.create_session(session_id="s1", source="cli")
@@ -443,6 +457,23 @@ class TestFTS5Search:
# Hyphenated inside a quoted phrase stays as-is
assert s('"my chat-send thing"') == '"my chat-send thing"'
def test_sanitize_fts5_quotes_dotted_terms(self):
"""Dotted terms should be wrapped in quotes to avoid FTS5 query parse edge cases."""
from hermes_state import SessionDB
s = SessionDB._sanitize_fts5_query
assert s('P2.2') == '"P2.2"'
assert s('simulate.p2') == '"simulate.p2"'
assert s('simulate.p2.test.ts') == '"simulate.p2.test.ts"'
# Already quoted — no double quoting
assert s('"P2.2"') == '"P2.2"'
# Works with boolean syntax
result = s('P2.2 OR simulate.p2')
assert '"P2.2"' in result
assert '"simulate.p2"' in result
# =========================================================================
# Session search and listing