From 15cfd2082083099bff7e6d7f61544f802ad06170 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 27 Mar 2026 21:42:09 -0700 Subject: [PATCH] fix: cap context pressure percentage at 100% in display (#3480) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: cap context pressure percentage at 100% in display The forward-looking token estimate can overshoot the compaction threshold (e.g. a large tool result pushes it from 70% to 109% in one step). The progress bar was already capped via min(), but pct_int was not — causing the user to see '109% to compaction' which is confusing. Cap pct_int at 100 in both CLI and gateway display functions. Reported by @JoshExile82. * refactor: use real API token counts for compression decisions Replace the rough chars/3 estimation with actual prompt_tokens + completion_tokens from the API response. The estimation was needed to predict whether tool results would push context past the threshold, but the default 50% threshold leaves ample headroom — if tool results push past it, the next API call reports real usage and triggers compression then. This removes all estimation from the compression and context pressure paths, making both 100% data-driven from provider-reported token counts. Also removes the dead _msg_count_before_tools variable. --- agent/display.py | 4 ++-- run_agent.py | 23 +++++++++++------------ tests/test_context_pressure.py | 11 ++++++++++- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/agent/display.py b/agent/display.py index 84a554176..22b918e1b 100644 --- a/agent/display.py +++ b/agent/display.py @@ -699,7 +699,7 @@ def format_context_pressure( threshold_percent: Compaction threshold as a fraction of context window. compression_enabled: Whether auto-compression is active. """ - pct_int = int(compaction_progress * 100) + pct_int = min(int(compaction_progress * 100), 100) filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH) bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled) @@ -729,7 +729,7 @@ def format_context_pressure_gateway( No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc. The percentage shows progress toward the compaction threshold. """ - pct_int = int(compaction_progress * 100) + pct_int = min(int(compaction_progress * 100), 100) filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH) bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled) diff --git a/run_agent.py b/run_agent.py index dd3d1156a..54546ab7b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7311,7 +7311,6 @@ class AIAgent: except Exception: pass - _msg_count_before_tools = len(messages) self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count) # Signal that a paragraph break is needed before the next @@ -7329,18 +7328,18 @@ class AIAgent: if _tc_names == {"execute_code"}: self.iteration_budget.refund() - # Estimate next prompt size using real token counts from the - # last API response + rough estimate of newly appended tool - # results. This catches cases where tool results push the - # context past the limit that last_prompt_tokens alone misses - # (e.g. large file reads, web extractions). + # Use real token counts from the API response to decide + # compression. prompt_tokens + completion_tokens is the + # actual context size the provider reported plus the + # assistant turn — a tight lower bound for the next prompt. + # Tool results appended above aren't counted yet, but the + # threshold (default 50%) leaves ample headroom; if tool + # results push past it, the next API call will report the + # real total and trigger compression then. _compressor = self.context_compressor - _new_tool_msgs = messages[_msg_count_before_tools:] - _new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs) - _estimated_next_prompt = ( + _real_tokens = ( _compressor.last_prompt_tokens + _compressor.last_completion_tokens - + _new_chars // 3 # conservative: JSON-heavy tool results ≈ 3 chars/token ) # ── Context pressure warnings (user-facing only) ────────── @@ -7350,12 +7349,12 @@ class AIAgent: # Does not inject into messages — just prints to CLI output # and fires status_callback for gateway platforms. if _compressor.threshold_tokens > 0: - _compaction_progress = _estimated_next_prompt / _compressor.threshold_tokens + _compaction_progress = _real_tokens / _compressor.threshold_tokens if _compaction_progress >= 0.85 and not self._context_pressure_warned: self._context_pressure_warned = True self._emit_context_pressure(_compaction_progress, _compressor) - if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt): + if self.compression_enabled and _compressor.should_compress(_real_tokens): messages, active_system_prompt = self._compress_context( messages, system_message, approx_tokens=self.context_compressor.last_prompt_tokens, diff --git a/tests/test_context_pressure.py b/tests/test_context_pressure.py index f89daef52..522603fdb 100644 --- a/tests/test_context_pressure.py +++ b/tests/test_context_pressure.py @@ -69,10 +69,12 @@ class TestFormatContextPressure: assert isinstance(result, str) def test_over_100_percent_capped(self): - """Progress > 1.0 should not break the bar.""" + """Progress > 1.0 should cap both bar and percentage text at 100%.""" line = format_context_pressure(1.05, 100_000, 0.50) assert "▰" in line assert line.count("▰") == 20 + assert "100%" in line + assert "105%" not in line class TestFormatContextPressureGateway: @@ -100,6 +102,13 @@ class TestFormatContextPressureGateway: msg = format_context_pressure_gateway(0.80, 0.50) assert "▰" in msg + def test_over_100_percent_capped(self): + """Progress > 1.0 should cap percentage text at 100%.""" + msg = format_context_pressure_gateway(1.09, 0.50) + assert "100% to compaction" in msg + assert "109%" not in msg + assert msg.count("▰") == 20 + # --------------------------------------------------------------------------- # AIAgent context pressure flag tests