diff --git a/hermes_state.py b/hermes_state.py index af74ed6f..5a38e73f 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -1035,7 +1035,13 @@ class SessionDB: sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip()) sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip()) - # Step 5: Wrap unquoted hyphenated terms (e.g. ``chat-send``) in + # Step 5: Wrap unquoted dotted terms (e.g. ``P2.2``, ``simulate.p2.test.ts``) + # in double quotes. In practice, FTS5 query parsing can treat dots as + # syntax boundaries, which may produce OperationalError or zero results. + # Quoting forces phrase semantics and avoids query parse edge cases. + sanitized = re.sub(r"\b([\w-]+(?:\.[\w-]+)+)\b", r'"\1"', sanitized) + + # Step 6: Wrap unquoted hyphenated terms (e.g. ``chat-send``) in # double quotes. FTS5's tokenizer splits on hyphens, turning # ``chat-send`` into ``chat AND send``. Quoting preserves the # intended phrase match. diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index e79c7f4f..f38d9701 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -376,6 +376,20 @@ class TestFTS5Search: assert any("chat-send" in (r.get("snippet") or r.get("content", "")).lower() for r in results) + def test_search_dotted_term_does_not_crash(self, db): + """Dotted terms like 'P2.2' or 'simulate.p2.test.ts' should not crash FTS5.""" + db.create_session(session_id="s1", source="cli") + db.append_message("s1", role="user", content="Working on P2.2 session_search edge cases") + db.append_message("s1", role="assistant", content="See simulate.p2.test.ts for details") + + results = db.search_messages("P2.2") + assert isinstance(results, list) + assert len(results) >= 1 + + results2 = db.search_messages("simulate.p2.test.ts") + assert isinstance(results2, list) + assert len(results2) >= 1 + def test_search_quoted_phrase_preserved(self, db): """User-provided quoted phrases should be preserved for exact matching.""" db.create_session(session_id="s1", source="cli") @@ -443,6 +457,23 @@ class TestFTS5Search: # Hyphenated inside a quoted phrase stays as-is assert s('"my chat-send thing"') == '"my chat-send thing"' + def test_sanitize_fts5_quotes_dotted_terms(self): + """Dotted terms should be wrapped in quotes to avoid FTS5 query parse edge cases.""" + from hermes_state import SessionDB + s = SessionDB._sanitize_fts5_query + + assert s('P2.2') == '"P2.2"' + assert s('simulate.p2') == '"simulate.p2"' + assert s('simulate.p2.test.ts') == '"simulate.p2.test.ts"' + + # Already quoted — no double quoting + assert s('"P2.2"') == '"P2.2"' + + # Works with boolean syntax + result = s('P2.2 OR simulate.p2') + assert '"P2.2"' in result + assert '"simulate.p2"' in result + # ========================================================================= # Session search and listing