From c9dc6c474990b36c9997f348fca7c4e9ec597690 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 1 Apr 2026 03:06:47 -0700 Subject: [PATCH] fix(insights): show cache tokens in overview so total adds up (#4428) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The total_tokens field includes cache_read + cache_write tokens, but the display only showed input + output — making the math look wrong (e.g. 765K + 134K displayed but total said 9.2M). Now shows a cache line when cache tokens are present so all visible numbers sum to the displayed total. Affects both terminal (hermes insights) and gateway (/insights) formats. --- agent/insights.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/agent/insights.py b/agent/insights.py index e6875c40b..d529ffedf 100644 --- a/agent/insights.py +++ b/agent/insights.py @@ -644,6 +644,9 @@ class InsightsEngine: lines.append(f" Sessions: {o['total_sessions']:<12} Messages: {o['total_messages']:,}") lines.append(f" Tool calls: {o['total_tool_calls']:<12,} User messages: {o['user_messages']:,}") lines.append(f" Input tokens: {o['total_input_tokens']:<12,} Output tokens: {o['total_output_tokens']:,}") + cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0) + if cache_total > 0: + lines.append(f" Cache read: {o['total_cache_read_tokens']:<12,} Cache write: {o['total_cache_write_tokens']:,}") cost_str = f"${o['estimated_cost']:.2f}" if o.get("models_without_pricing"): cost_str += " *" @@ -746,7 +749,11 @@ class InsightsEngine: # Overview lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}") - lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})") + cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0) + if cache_total > 0: + lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})") + else: + lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})") cost_note = "" if o.get("models_without_pricing"): cost_note = " _(excludes custom/self-hosted models)_"