Files
hermes-agent/tests/test_token_budget.py
Alexander Whitestone 07c5b5b83d
Some checks failed
Contributor Attribution Check / check-attribution (pull_request) Failing after 44s
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 45s
Tests / test (pull_request) Failing after 25m21s
Tests / e2e (pull_request) Successful in 3m18s
test: add token budget poka-yoke tests (#925)
2026-04-21 11:41:39 +00:00

238 lines
8.1 KiB
Python

#!/usr/bin/env python3
"""
Tests for agent/token_budget.py — Poka-yoke context overflow guard.
"""
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).parent.parent))
from agent.token_budget import (
TokenBudget,
BudgetLevel,
BudgetStatus,
WARN_PERCENT,
CAUTION_PERCENT,
CRITICAL_PERCENT,
STOP_PERCENT,
)
@pytest.fixture
def budget():
"""Standard 128K context budget."""
return TokenBudget(context_length=128_000)
@pytest.fixture
def small_budget():
"""4K context for tight testing."""
return TokenBudget(context_length=4_000)
# ── Threshold Levels ──────────────────────────────────────────────────
class TestThresholds:
def test_normal_below_60(self, budget):
budget.update(50_000) # 39%
status = budget.check()
assert status.level == BudgetLevel.NORMAL
assert not status.should_compress
assert not status.should_block_tools
assert not status.should_terminate
def test_warning_at_60(self, budget):
budget.update(int(128_000 * 0.62)) # 62%
status = budget.check()
assert status.level == BudgetLevel.WARNING
assert not status.should_compress
assert not status.should_block_tools
def test_caution_at_80(self, budget):
budget.update(int(128_000 * 0.82)) # 82%
status = budget.check()
assert status.level == BudgetLevel.CAUTION
assert status.should_compress
assert not status.should_block_tools
assert not status.should_terminate
def test_critical_at_90(self, budget):
budget.update(int(128_000 * 0.91)) # 91%
status = budget.check()
assert status.level == BudgetLevel.CRITICAL
assert status.should_compress
assert status.should_block_tools
assert not status.should_terminate
def test_stop_at_95(self, budget):
budget.update(int(128_000 * 0.96)) # 96%
status = budget.check()
assert status.level == BudgetLevel.STOP
assert status.should_compress
assert status.should_block_tools
assert status.should_terminate
def test_small_context_thresholds(self, small_budget):
# 4K * 0.60 = 2400
small_budget.update(2450)
assert small_budget.check().level == BudgetLevel.WARNING
small_budget.update(3250) # 4K * 0.81
assert small_budget.check().level == BudgetLevel.CAUTION
small_budget.update(3650) # 4K * 0.91
assert small_budget.check().level == BudgetLevel.CRITICAL
small_budget.update(3850) # 4K * 0.96
assert small_budget.check().level == BudgetLevel.STOP
# ── Convenience Methods ───────────────────────────────────────────────
class TestConvenienceMethods:
def test_should_compress(self, budget):
budget.update(int(128_000 * 0.79))
assert not budget.should_compress()
budget.update(int(128_000 * 0.80))
assert budget.should_compress()
def test_should_block_tools(self, budget):
budget.update(int(128_000 * 0.89))
assert not budget.should_block_tools()
budget.update(int(128_000 * 0.90))
assert budget.should_block_tools()
def test_should_terminate(self, budget):
budget.update(int(128_000 * 0.94))
assert not budget.should_terminate()
budget.update(int(128_000 * 0.95))
assert budget.should_terminate()
# ── Tool Output Budgeting ─────────────────────────────────────────────
class TestToolOutputBudget:
def test_normal_budget(self, budget):
budget.update(int(128_000 * 0.50))
assert budget.tool_output_budget() == 50_000
def test_warning_budget(self, budget):
budget.update(int(128_000 * 0.65))
assert budget.tool_output_budget() == 20_000
def test_caution_budget(self, budget):
budget.update(int(128_000 * 0.85))
assert budget.tool_output_budget() == 8_000
def test_critical_budget(self, budget):
budget.update(int(128_000 * 0.92))
assert budget.tool_output_budget() == 2_000
def test_truncate_short_unchanged(self, budget):
result = budget.truncate_tool_output("short text", max_chars=1000)
assert result == "short text"
def test_truncate_long(self, budget):
long_text = "A" * 100_000
result = budget.truncate_tool_output(long_text, max_chars=5_000)
assert len(result) <= 5_100 # small overhead for notice
assert "truncated" in result
assert "A" in result[:2500] # head preserved
assert "A" in result[-2500:] # tail preserved
def test_truncate_very_small(self, budget):
long_text = "X" * 1000
result = budget.truncate_tool_output(long_text, max_chars=50)
assert len(result) <= 50 + 20
assert "truncated" in result
# ── Growth Tracking ───────────────────────────────────────────────────
class TestGrowthTracking:
def test_growth_rate(self, budget):
budget.update(10_000)
budget.update(15_000)
budget.update(20_000)
assert budget.growth_rate() == 5_000.0
def test_turns_remaining(self, budget):
budget.update(10_000)
budget.update(15_000)
budget.update(20_000)
# rate=5000, remaining=108000, turns=~21
turns = budget.turns_remaining()
assert turns is not None
assert 18 <= turns <= 24
def test_no_history(self, budget):
assert budget.growth_rate() is None
assert budget.turns_remaining() is None
# ── Status Indicators ─────────────────────────────────────────────────
class TestStatusIndicators:
def test_indicator_normal(self, budget):
budget.update(int(128_000 * 0.50))
status = budget.check()
indicator = status.to_indicator()
assert "50" in indicator
def test_indicator_warning(self, budget):
budget.update(int(128_000 * 0.65))
status = budget.check()
indicator = status.to_indicator()
assert "\u26a0" in indicator or "65" in indicator
def test_bar(self, budget):
budget.update(int(128_000 * 0.50))
status = budget.check()
bar = status.to_bar()
assert "50" in bar
def test_summary(self, budget):
budget.update(50_000)
summary = budget.summary()
assert "50,000" in summary
assert "128,000" in summary
assert "NORMAL" in summary
# ── Reset ─────────────────────────────────────────────────────────────
class TestReset:
def test_reset_clears_state(self, budget):
budget.update(int(128_000 * 0.90))
budget.reset()
assert budget.tokens_used == 0
assert budget.check().level == BudgetLevel.NORMAL
assert budget.growth_rate() is None
# ── Edge Cases ────────────────────────────────────────────────────────
class TestEdgeCases:
def test_exact_threshold_boundary(self, budget):
# Exactly at 60%
budget.update(int(128_000 * 0.60))
assert budget.check().level == BudgetLevel.WARNING
def test_zero_context(self):
budget = TokenBudget(context_length=0)
status = budget.check()
assert status.percent_used == 0
def test_remaining_for_response(self, budget):
budget.update(100_000)
remaining = budget.remaining_for_response()
# 128000 - 100000 - 6400 (5% reserve) = 21600
assert remaining > 0
assert remaining < 128_000
if __name__ == "__main__":
pytest.main([__file__, "-v"])