From 52c5e491f58f0a685002273206b5f8c294073c77 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 25 Mar 2026 11:10:19 -0700 Subject: [PATCH] fix(session): surface silent SessionDB failures that cause session data loss (#2999) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(session): surface silent SessionDB failures that cause session data loss SessionDB initialization and operation failures were logged at debug level or silently swallowed, causing sessions to never be indexed in the FTS5 database. This made session_search unable to find affected conversations. In practice, ~48% of sessions can be lost without any visible indication. The JSON session files are still written (separate code path), but the SQLite/FTS5 index gets nothing — making session_search return empty results for affected sessions. Changes: - cli.py: Log warnings (not debug) when SessionDB init fails at both __init__ and _start_session entry points - run_agent.py: Log warnings on create_session, append_message, and compression split failures - run_agent.py: Set _session_db = None after create_session failure to fail fast instead of silently dropping every message for the session Root cause: When gateway restarts or DB lock contention occurs during SessionDB() init, the exception is caught and swallowed. The agent continues running normally — JSON session logs are written to disk — but no messages reach the FTS5 index. * fix: use module logger instead of root logging for SessionDB warnings Follow-up to cherry-picked PR #2939 — the original used logging.warning() (root logger) instead of logger.warning() (module logger) in the 5 new warning calls. Module logger preserves the logger hierarchy and shows the correct module name in log output. --------- Co-authored-by: LucidPaths --- cli.py | 6 +++--- run_agent.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cli.py b/cli.py index 9e82e03d3..5e256f275 100644 --- a/cli.py +++ b/cli.py @@ -1186,8 +1186,8 @@ class HermesCLI: try: from hermes_state import SessionDB self._session_db = SessionDB() - except Exception: - pass + except Exception as e: + logger.warning("Failed to initialize SessionDB — session will NOT be indexed for search: %s", e) # Deferred title: stored in memory until the session is created in the DB self._pending_title: Optional[str] = None @@ -1852,7 +1852,7 @@ class HermesCLI: from hermes_state import SessionDB self._session_db = SessionDB() except Exception as e: - logger.debug("SQLite session store not available: %s", e) + logger.warning("SQLite session store not available — session will NOT be indexed: %s", e) # If resuming, validate the session exists and load its history. # _preload_resumed_session() may have already loaded it (called from diff --git a/run_agent.py b/run_agent.py index 80c5fe3e9..6c0262ccf 100644 --- a/run_agent.py +++ b/run_agent.py @@ -887,7 +887,8 @@ class AIAgent: user_id=None, ) except Exception as e: - logger.debug("Session DB create_session failed: %s", e) + logger.warning("Session DB create_session failed — messages will NOT be indexed: %s", e) + self._session_db = None # prevent silent data loss on every subsequent flush # In-memory todo list for task planning (one per agent/session) from tools.todo_tool import TodoStore @@ -1546,7 +1547,7 @@ class AIAgent: ) self._last_flushed_db_idx = len(messages) except Exception as e: - logger.debug("Session DB append_message failed: %s", e) + logger.warning("Session DB append_message failed: %s", e) def _get_messages_up_to_last_assistant(self, messages: List[Dict]) -> List[Dict]: """ @@ -4687,7 +4688,7 @@ class AIAgent: # Reset flush cursor — new session starts with no messages written self._last_flushed_db_idx = 0 except Exception as e: - logger.debug("Session DB compression split failed: %s", e) + logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e) # Reset context pressure warning and token estimate — usage drops # after compaction. Without this, the stale last_prompt_tokens from