diff --git a/gateway/session.py b/gateway/session.py index d22c6d043..2d5376b07 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -762,14 +762,16 @@ class SessionStore: if session_key in self._entries: entry = self._entries[session_key] entry.updated_at = _now() - entry.input_tokens += input_tokens - entry.output_tokens += output_tokens - entry.cache_read_tokens += cache_read_tokens - entry.cache_write_tokens += cache_write_tokens + # Direct assignment — the gateway receives cumulative totals + # from the cached agent, not per-call deltas. + entry.input_tokens = input_tokens + entry.output_tokens = output_tokens + entry.cache_read_tokens = cache_read_tokens + entry.cache_write_tokens = cache_write_tokens if last_prompt_tokens is not None: entry.last_prompt_tokens = last_prompt_tokens if estimated_cost_usd is not None: - entry.estimated_cost_usd += estimated_cost_usd + entry.estimated_cost_usd = estimated_cost_usd if cost_status: entry.cost_status = cost_status entry.total_tokens = ( @@ -795,6 +797,7 @@ class SessionStore: billing_provider=provider, billing_base_url=base_url, model=model, + absolute=True, ) except Exception as e: logger.debug("Session DB operation failed: %s", e) diff --git a/hermes_state.py b/hermes_state.py index 31ed12190..cf03951c7 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -319,11 +319,39 @@ class SessionDB: billing_provider: Optional[str] = None, billing_base_url: Optional[str] = None, billing_mode: Optional[str] = None, + absolute: bool = False, ) -> None: - """Increment token counters and backfill model if not already set.""" - with self._lock: - self._conn.execute( - """UPDATE sessions SET + """Update token counters and backfill model if not already set. + + When *absolute* is False (default), values are **incremented** — use + this for per-API-call deltas (CLI path). + + When *absolute* is True, values are **set directly** — use this when + the caller already holds cumulative totals (gateway path, where the + cached agent accumulates across messages). + """ + if absolute: + sql = """UPDATE sessions SET + input_tokens = ?, + output_tokens = ?, + cache_read_tokens = ?, + cache_write_tokens = ?, + reasoning_tokens = ?, + estimated_cost_usd = COALESCE(?, 0), + actual_cost_usd = CASE + WHEN ? IS NULL THEN actual_cost_usd + ELSE ? + END, + cost_status = COALESCE(?, cost_status), + cost_source = COALESCE(?, cost_source), + pricing_version = COALESCE(?, pricing_version), + billing_provider = COALESCE(billing_provider, ?), + billing_base_url = COALESCE(billing_base_url, ?), + billing_mode = COALESCE(billing_mode, ?), + model = COALESCE(model, ?) + WHERE id = ?""" + else: + sql = """UPDATE sessions SET input_tokens = input_tokens + ?, output_tokens = output_tokens + ?, cache_read_tokens = cache_read_tokens + ?, @@ -341,7 +369,10 @@ class SessionDB: billing_base_url = COALESCE(billing_base_url, ?), billing_mode = COALESCE(billing_mode, ?), model = COALESCE(model, ?) - WHERE id = ?""", + WHERE id = ?""" + with self._lock: + self._conn.execute( + sql, ( input_tokens, output_tokens, diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index 8d4131ab1..226e50593 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -858,6 +858,7 @@ class TestLastPromptTokens: billing_provider=None, billing_base_url=None, model="openai/gpt-5.4", + absolute=True, )