From c9dc6c474990b36c9997f348fca7c4e9ec597690 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 1 Apr 2026 03:06:47 -0700
Subject: [PATCH] fix(insights): show cache tokens in overview so total adds up
 (#4428)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The total_tokens field includes cache_read + cache_write tokens, but
the display only showed input + output — making the math look wrong
(e.g. 765K + 134K displayed but total said 9.2M). Now shows a cache
line when cache tokens are present so all visible numbers sum to the
displayed total.

Affects both terminal (hermes insights) and gateway (/insights)
formats.
---
 agent/insights.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/agent/insights.py b/agent/insights.py
index e6875c40b..d529ffedf 100644
--- a/agent/insights.py
+++ b/agent/insights.py
@@ -644,6 +644,9 @@ class InsightsEngine:
         lines.append(f"  Sessions:          {o['total_sessions']:<12}  Messages:        {o['total_messages']:,}")
         lines.append(f"  Tool calls:        {o['total_tool_calls']:<12,}  User messages:   {o['user_messages']:,}")
         lines.append(f"  Input tokens:      {o['total_input_tokens']:<12,}  Output tokens:   {o['total_output_tokens']:,}")
+        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
+        if cache_total > 0:
+            lines.append(f"  Cache read:        {o['total_cache_read_tokens']:<12,}  Cache write:     {o['total_cache_write_tokens']:,}")
         cost_str = f"${o['estimated_cost']:.2f}"
         if o.get("models_without_pricing"):
             cost_str += " *"
@@ -746,7 +749,11 @@ class InsightsEngine:
 
         # Overview
         lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
-        lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
+        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
+        if cache_total > 0:
+            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})")
+        else:
+            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
         cost_note = ""
         if o.get("models_without_pricing"):
             cost_note = " _(excludes custom/self-hosted models)_"