fix(state): quote dotted terms in FTS5 queries
FTS5 queries containing dots (e.g. P2.2, simulate.p2.test.ts) can trigger query parse edge cases that yield OperationalError or empty results unless quoted. Extend _sanitize_fts5_query to wrap dotted tokens in double quotes (similar to hyphenated terms) and add regression tests.
This commit is contained in:
@@ -1035,7 +1035,13 @@ class SessionDB:
|
||||
sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
|
||||
sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())
|
||||
|
||||
# Step 5: Wrap unquoted hyphenated terms (e.g. ``chat-send``) in
|
||||
# Step 5: Wrap unquoted dotted terms (e.g. ``P2.2``, ``simulate.p2.test.ts``)
|
||||
# in double quotes. In practice, FTS5 query parsing can treat dots as
|
||||
# syntax boundaries, which may produce OperationalError or zero results.
|
||||
# Quoting forces phrase semantics and avoids query parse edge cases.
|
||||
sanitized = re.sub(r"\b([\w-]+(?:\.[\w-]+)+)\b", r'"\1"', sanitized)
|
||||
|
||||
# Step 6: Wrap unquoted hyphenated terms (e.g. ``chat-send``) in
|
||||
# double quotes. FTS5's tokenizer splits on hyphens, turning
|
||||
# ``chat-send`` into ``chat AND send``. Quoting preserves the
|
||||
# intended phrase match.
|
||||
|
||||
@@ -376,6 +376,20 @@ class TestFTS5Search:
|
||||
assert any("chat-send" in (r.get("snippet") or r.get("content", "")).lower()
|
||||
for r in results)
|
||||
|
||||
def test_search_dotted_term_does_not_crash(self, db):
|
||||
"""Dotted terms like 'P2.2' or 'simulate.p2.test.ts' should not crash FTS5."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message("s1", role="user", content="Working on P2.2 session_search edge cases")
|
||||
db.append_message("s1", role="assistant", content="See simulate.p2.test.ts for details")
|
||||
|
||||
results = db.search_messages("P2.2")
|
||||
assert isinstance(results, list)
|
||||
assert len(results) >= 1
|
||||
|
||||
results2 = db.search_messages("simulate.p2.test.ts")
|
||||
assert isinstance(results2, list)
|
||||
assert len(results2) >= 1
|
||||
|
||||
def test_search_quoted_phrase_preserved(self, db):
|
||||
"""User-provided quoted phrases should be preserved for exact matching."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
@@ -443,6 +457,23 @@ class TestFTS5Search:
|
||||
# Hyphenated inside a quoted phrase stays as-is
|
||||
assert s('"my chat-send thing"') == '"my chat-send thing"'
|
||||
|
||||
def test_sanitize_fts5_quotes_dotted_terms(self):
|
||||
"""Dotted terms should be wrapped in quotes to avoid FTS5 query parse edge cases."""
|
||||
from hermes_state import SessionDB
|
||||
s = SessionDB._sanitize_fts5_query
|
||||
|
||||
assert s('P2.2') == '"P2.2"'
|
||||
assert s('simulate.p2') == '"simulate.p2"'
|
||||
assert s('simulate.p2.test.ts') == '"simulate.p2.test.ts"'
|
||||
|
||||
# Already quoted — no double quoting
|
||||
assert s('"P2.2"') == '"P2.2"'
|
||||
|
||||
# Works with boolean syntax
|
||||
result = s('P2.2 OR simulate.p2')
|
||||
assert '"P2.2"' in result
|
||||
assert '"simulate.p2"' in result
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Session search and listing
|
||||
|
||||
Reference in New Issue
Block a user