fix: backfill model on gateway sessions after agent runs

Gateway sessions end up with model=NULL because the session row is created before AIAgent is constructed. After the agent responds, update_session() writes token counts but never fills in the model. Thread agent.model through _run_agent()'s return dict into update_session() → update_token_counts(). The SQL uses COALESCE(model, ?) so it only fills NULL rows — never overwrites a model already set at creation time (e.g. CLI sessions). If the agent falls back to a different provider, agent.model is updated in-place by _try_activate_fallback(), so the recorded value reflects whichever model actually produced the response. Fixes #987
2026-03-11 17:44:37 -07:00
parent c1cca65168
commit 2046a4c08c
3 changed files with 13 additions and 6 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1578,10 +1578,11 @@ class GatewayRunner:
                        skip_db=agent_persisted,
                    )
            
-            # Update session with actual prompt token count from the agent
+            # Update session with actual prompt token count and model from the agent
            self.session_store.update_session(
                session_entry.session_key,
                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
+                model=agent_result.get("model"),
            )
            
            return response
@@ -3586,6 +3587,7 @@ class GatewayRunner:
                    "tools": tools_holder[0] or [],
                    "history_offset": len(agent_history),
                    "last_prompt_tokens": _last_prompt_toks,
+                    "model": agent_holder[0].model if agent_holder[0] else None,
                }
            
            # Scan tool results for MEDIA:<path> tags that need to be delivered
@@ -3648,6 +3650,7 @@ class GatewayRunner:
                "tools": tools_holder[0] or [],
                "history_offset": len(agent_history),
                "last_prompt_tokens": _last_prompt_toks,
+                "model": agent_holder[0].model if agent_holder[0] else None,
                "session_id": effective_session_id,
            }
        
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -590,6 +590,7 @@ class SessionStore:
        input_tokens: int = 0,
        output_tokens: int = 0,
        last_prompt_tokens: int = None,
+        model: str = None,
    ) -> None:
        """Update a session's metadata after an interaction."""
        self._ensure_loaded()
@@ -607,7 +608,8 @@ class SessionStore:
            if self._db:
                try:
                    self._db.update_token_counts(
-                        entry.session_id, input_tokens, output_tokens
+                        entry.session_id, input_tokens, output_tokens,
+                        model=model,
                    )
                except Exception as e:
                    logger.debug("Session DB operation failed: %s", e)
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -227,15 +227,17 @@ class SessionDB:
        self._conn.commit()

    def update_token_counts(
-        self, session_id: str, input_tokens: int = 0, output_tokens: int = 0
+        self, session_id: str, input_tokens: int = 0, output_tokens: int = 0,
+        model: str = None,
    ) -> None:
-        """Increment token counters on a session."""
+        """Increment token counters and backfill model if not already set."""
        self._conn.execute(
            """UPDATE sessions SET
               input_tokens = input_tokens + ?,
-               output_tokens = output_tokens + ?
+               output_tokens = output_tokens + ?,
+               model = COALESCE(model, ?)
               WHERE id = ?""",
-            (input_tokens, output_tokens, session_id),
+            (input_tokens, output_tokens, model, session_id),
        )
        self._conn.commit()