From 15cfd2082083099bff7e6d7f61544f802ad06170 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 27 Mar 2026 21:42:09 -0700
Subject: [PATCH] fix: cap context pressure percentage at 100% in display
 (#3480)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: cap context pressure percentage at 100% in display

The forward-looking token estimate can overshoot the compaction threshold
(e.g. a large tool result pushes it from 70% to 109% in one step). The
progress bar was already capped via min(), but pct_int was not — causing
the user to see '109% to compaction' which is confusing.

Cap pct_int at 100 in both CLI and gateway display functions.

Reported by @JoshExile82.

* refactor: use real API token counts for compression decisions

Replace the rough chars/3 estimation with actual prompt_tokens +
completion_tokens from the API response. The estimation was needed to
predict whether tool results would push context past the threshold, but
the default 50% threshold leaves ample headroom — if tool results push
past it, the next API call reports real usage and triggers compression
then.

This removes all estimation from the compression and context pressure
paths, making both 100% data-driven from provider-reported token counts.

Also removes the dead _msg_count_before_tools variable.
---
 agent/display.py               |  4 ++--
 run_agent.py                   | 23 +++++++++++------------
 tests/test_context_pressure.py | 11 ++++++++++-
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/agent/display.py b/agent/display.py
index 84a554176..22b918e1b 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -699,7 +699,7 @@ def format_context_pressure(
         threshold_percent: Compaction threshold as a fraction of context window.
         compression_enabled: Whether auto-compression is active.
     """
-    pct_int = int(compaction_progress * 100)
+    pct_int = min(int(compaction_progress * 100), 100)
     filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
     bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
 
@@ -729,7 +729,7 @@ def format_context_pressure_gateway(
     No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc.
     The percentage shows progress toward the compaction threshold.
     """
-    pct_int = int(compaction_progress * 100)
+    pct_int = min(int(compaction_progress * 100), 100)
     filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
     bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
 
diff --git a/run_agent.py b/run_agent.py
index dd3d1156a..54546ab7b 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7311,7 +7311,6 @@ class AIAgent:
                         except Exception:
                             pass
 
-                    _msg_count_before_tools = len(messages)
                     self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
 
                     # Signal that a paragraph break is needed before the next
@@ -7329,18 +7328,18 @@ class AIAgent:
                     if _tc_names == {"execute_code"}:
                         self.iteration_budget.refund()
                     
-                    # Estimate next prompt size using real token counts from the
-                    # last API response + rough estimate of newly appended tool
-                    # results.  This catches cases where tool results push the
-                    # context past the limit that last_prompt_tokens alone misses
-                    # (e.g. large file reads, web extractions).
+                    # Use real token counts from the API response to decide
+                    # compression.  prompt_tokens + completion_tokens is the
+                    # actual context size the provider reported plus the
+                    # assistant turn — a tight lower bound for the next prompt.
+                    # Tool results appended above aren't counted yet, but the
+                    # threshold (default 50%) leaves ample headroom; if tool
+                    # results push past it, the next API call will report the
+                    # real total and trigger compression then.
                     _compressor = self.context_compressor
-                    _new_tool_msgs = messages[_msg_count_before_tools:]
-                    _new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs)
-                    _estimated_next_prompt = (
+                    _real_tokens = (
                         _compressor.last_prompt_tokens
                         + _compressor.last_completion_tokens
-                        + _new_chars // 3  # conservative: JSON-heavy tool results ≈ 3 chars/token
                     )
 
                     # ── Context pressure warnings (user-facing only) ──────────
@@ -7350,12 +7349,12 @@ class AIAgent:
                     # Does not inject into messages — just prints to CLI output
                     # and fires status_callback for gateway platforms.
                     if _compressor.threshold_tokens > 0:
-                        _compaction_progress = _estimated_next_prompt / _compressor.threshold_tokens
+                        _compaction_progress = _real_tokens / _compressor.threshold_tokens
                         if _compaction_progress >= 0.85 and not self._context_pressure_warned:
                             self._context_pressure_warned = True
                             self._emit_context_pressure(_compaction_progress, _compressor)
 
-                    if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
+                    if self.compression_enabled and _compressor.should_compress(_real_tokens):
                         messages, active_system_prompt = self._compress_context(
                             messages, system_message,
                             approx_tokens=self.context_compressor.last_prompt_tokens,
diff --git a/tests/test_context_pressure.py b/tests/test_context_pressure.py
index f89daef52..522603fdb 100644
--- a/tests/test_context_pressure.py
+++ b/tests/test_context_pressure.py
@@ -69,10 +69,12 @@ class TestFormatContextPressure:
         assert isinstance(result, str)
 
     def test_over_100_percent_capped(self):
-        """Progress > 1.0 should not break the bar."""
+        """Progress > 1.0 should cap both bar and percentage text at 100%."""
         line = format_context_pressure(1.05, 100_000, 0.50)
         assert "▰" in line
         assert line.count("▰") == 20
+        assert "100%" in line
+        assert "105%" not in line
 
 
 class TestFormatContextPressureGateway:
@@ -100,6 +102,13 @@ class TestFormatContextPressureGateway:
         msg = format_context_pressure_gateway(0.80, 0.50)
         assert "▰" in msg
 
+    def test_over_100_percent_capped(self):
+        """Progress > 1.0 should cap percentage text at 100%."""
+        msg = format_context_pressure_gateway(1.09, 0.50)
+        assert "100% to compaction" in msg
+        assert "109%" not in msg
+        assert msg.count("▰") == 20
+
 
 # ---------------------------------------------------------------------------
 # AIAgent context pressure flag tests