From 52c5e491f58f0a685002273206b5f8c294073c77 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 25 Mar 2026 11:10:19 -0700
Subject: [PATCH] fix(session): surface silent SessionDB failures that cause
 session data loss (#2999)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(session): surface silent SessionDB failures that cause session data loss

SessionDB initialization and operation failures were logged at debug level
or silently swallowed, causing sessions to never be indexed in the FTS5
database. This made session_search unable to find affected conversations.

In practice, ~48% of sessions can be lost without any visible indication.
The JSON session files are still written (separate code path), but the
SQLite/FTS5 index gets nothing — making session_search return empty results
for affected sessions.

Changes:
- cli.py: Log warnings (not debug) when SessionDB init fails at both
  __init__ and _start_session entry points
- run_agent.py: Log warnings on create_session, append_message, and
  compression split failures
- run_agent.py: Set _session_db = None after create_session failure to
  fail fast instead of silently dropping every message for the session

Root cause: When gateway restarts or DB lock contention occurs during
SessionDB() init, the exception is caught and swallowed. The agent
continues running normally — JSON session logs are written to disk —
but no messages reach the FTS5 index.

* fix: use module logger instead of root logging for SessionDB warnings

Follow-up to cherry-picked PR #2939 — the original used logging.warning()
(root logger) instead of logger.warning() (module logger) in the 5 new
warning calls. Module logger preserves the logger hierarchy and shows the
correct module name in log output.

---------

Co-authored-by: LucidPaths <lc77@outlook.de>
---
 cli.py       | 6 +++---
 run_agent.py | 7 ++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/cli.py b/cli.py
index 9e82e03d3..5e256f275 100644
--- a/cli.py
+++ b/cli.py
@@ -1186,8 +1186,8 @@ class HermesCLI:
         try:
             from hermes_state import SessionDB
             self._session_db = SessionDB()
-        except Exception:
-            pass
+        except Exception as e:
+            logger.warning("Failed to initialize SessionDB — session will NOT be indexed for search: %s", e)
         
         # Deferred title: stored in memory until the session is created in the DB
         self._pending_title: Optional[str] = None
@@ -1852,7 +1852,7 @@ class HermesCLI:
                 from hermes_state import SessionDB
                 self._session_db = SessionDB()
             except Exception as e:
-                logger.debug("SQLite session store not available: %s", e)
+                logger.warning("SQLite session store not available — session will NOT be indexed: %s", e)
         
         # If resuming, validate the session exists and load its history.
         # _preload_resumed_session() may have already loaded it (called from
diff --git a/run_agent.py b/run_agent.py
index 80c5fe3e9..6c0262ccf 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -887,7 +887,8 @@ class AIAgent:
                     user_id=None,
                 )
             except Exception as e:
-                logger.debug("Session DB create_session failed: %s", e)
+                logger.warning("Session DB create_session failed — messages will NOT be indexed: %s", e)
+                self._session_db = None  # prevent silent data loss on every subsequent flush
         
         # In-memory todo list for task planning (one per agent/session)
         from tools.todo_tool import TodoStore
@@ -1546,7 +1547,7 @@ class AIAgent:
                 )
             self._last_flushed_db_idx = len(messages)
         except Exception as e:
-            logger.debug("Session DB append_message failed: %s", e)
+            logger.warning("Session DB append_message failed: %s", e)
 
     def _get_messages_up_to_last_assistant(self, messages: List[Dict]) -> List[Dict]:
         """
@@ -4687,7 +4688,7 @@ class AIAgent:
                 # Reset flush cursor — new session starts with no messages written
                 self._last_flushed_db_idx = 0
             except Exception as e:
-                logger.debug("Session DB compression split failed: %s", e)
+                logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
 
         # Reset context pressure warning and token estimate — usage drops
         # after compaction.  Without this, the stale last_prompt_tokens from