Files
hermes-agent/tests/hermes_cli/test_tool_token_estimation.py
Teknium 2d264a4562 fix(tests): resolve 10 CI failures across hooks, tiktoken, plugins (#3848)
test_hooks.py (7 failures): Built-in boot-md hook was always loaded
by _register_builtin_hooks(), adding +1 to every expected hook count.
Mock out built-in registration in TestDiscoverAndLoad so tests isolate
user-hook discovery logic.

test_tool_token_estimation.py (2 failures): tiktoken is not in
core/[all] dependencies. The estimation function gracefully returns {}
when tiktoken is missing, but tests expected non-empty results. Added
skipif markers for tests that need tiktoken.

test_plugins_cmd.py (1 failure): bare 'hermes plugins' now dispatches
to cmd_toggle() (interactive curses UI) instead of cmd_list(). Updated
test to match the new behavior.
2026-03-29 20:05:59 -07:00

284 lines
9.3 KiB
Python

"""Tests for tool token estimation and curses_ui status_fn support."""
from unittest.mock import patch
import pytest
# tiktoken is not in core/[all] deps — skip estimation tests when unavailable
_has_tiktoken = True
try:
import tiktoken # noqa: F401
except ImportError:
_has_tiktoken = False
_needs_tiktoken = pytest.mark.skipif(not _has_tiktoken, reason="tiktoken not installed")
# ─── Token Estimation Tests ──────────────────────────────────────────────────
@_needs_tiktoken
def test_estimate_tool_tokens_returns_positive_counts():
"""_estimate_tool_tokens should return a non-empty dict with positive values."""
from hermes_cli.tools_config import _estimate_tool_tokens, _tool_token_cache
# Clear cache to force fresh computation
import hermes_cli.tools_config as tc
tc._tool_token_cache = None
tokens = _estimate_tool_tokens()
assert isinstance(tokens, dict)
assert len(tokens) > 0
for name, count in tokens.items():
assert isinstance(name, str)
assert isinstance(count, int)
assert count > 0, f"Tool {name} has non-positive token count: {count}"
@_needs_tiktoken
def test_estimate_tool_tokens_is_cached():
"""Second call should return the same cached dict object."""
import hermes_cli.tools_config as tc
tc._tool_token_cache = None
first = tc._estimate_tool_tokens()
second = tc._estimate_tool_tokens()
assert first is second
def test_estimate_tool_tokens_returns_empty_when_tiktoken_unavailable(monkeypatch):
"""Graceful degradation when tiktoken cannot be imported."""
import hermes_cli.tools_config as tc
tc._tool_token_cache = None
import builtins
real_import = builtins.__import__
def mock_import(name, *args, **kwargs):
if name == "tiktoken":
raise ImportError("mocked")
return real_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", mock_import)
result = tc._estimate_tool_tokens()
assert result == {}
# Reset cache for other tests
tc._tool_token_cache = None
@_needs_tiktoken
def test_estimate_tool_tokens_covers_known_tools():
"""Should include schemas for well-known tools like terminal, web_search."""
import hermes_cli.tools_config as tc
tc._tool_token_cache = None
tokens = tc._estimate_tool_tokens()
# These tools should always be discoverable
for expected in ("terminal", "web_search", "read_file"):
assert expected in tokens, f"Expected {expected!r} in token estimates"
# ─── Status Function Tests ───────────────────────────────────────────────────
def test_prompt_toolset_checklist_passes_status_fn(monkeypatch):
"""_prompt_toolset_checklist should pass a status_fn to curses_checklist."""
import hermes_cli.tools_config as tc
captured_kwargs = {}
def fake_checklist(title, items, selected, *, cancel_returns=None, status_fn=None):
captured_kwargs["status_fn"] = status_fn
captured_kwargs["title"] = title
return selected # Return pre-selected unchanged
monkeypatch.setattr("hermes_cli.curses_ui.curses_checklist", fake_checklist)
tc._prompt_toolset_checklist("CLI", {"web", "terminal"})
assert "status_fn" in captured_kwargs
# If tiktoken is available, status_fn should be set
tokens = tc._estimate_tool_tokens()
if tokens:
assert captured_kwargs["status_fn"] is not None
def test_status_fn_returns_formatted_token_count(monkeypatch):
"""The status_fn should return a human-readable token count string."""
import hermes_cli.tools_config as tc
from hermes_cli.tools_config import CONFIGURABLE_TOOLSETS
captured = {}
def fake_checklist(title, items, selected, *, cancel_returns=None, status_fn=None):
captured["status_fn"] = status_fn
return selected
monkeypatch.setattr("hermes_cli.curses_ui.curses_checklist", fake_checklist)
tc._prompt_toolset_checklist("CLI", {"web", "terminal"})
status_fn = captured.get("status_fn")
if status_fn is None:
pytest.skip("tiktoken unavailable; status_fn not created")
# Find the indices for web and terminal
idx_map = {ts_key: i for i, (ts_key, _, _) in enumerate(CONFIGURABLE_TOOLSETS)}
# Call status_fn with web + terminal selected
result = status_fn({idx_map["web"], idx_map["terminal"]})
assert "tokens" in result
assert "Est. tool context" in result
def test_status_fn_deduplicates_overlapping_tools(monkeypatch):
"""When toolsets overlap (browser includes web_search), tokens should not double-count."""
import hermes_cli.tools_config as tc
from hermes_cli.tools_config import CONFIGURABLE_TOOLSETS
captured = {}
def fake_checklist(title, items, selected, *, cancel_returns=None, status_fn=None):
captured["status_fn"] = status_fn
return selected
monkeypatch.setattr("hermes_cli.curses_ui.curses_checklist", fake_checklist)
tc._prompt_toolset_checklist("CLI", {"web"})
status_fn = captured.get("status_fn")
if status_fn is None:
pytest.skip("tiktoken unavailable; status_fn not created")
idx_map = {ts_key: i for i, (ts_key, _, _) in enumerate(CONFIGURABLE_TOOLSETS)}
# web alone
web_only = status_fn({idx_map["web"]})
# browser includes web_search, so browser + web should not double-count web_search
browser_only = status_fn({idx_map["browser"]})
both = status_fn({idx_map["web"], idx_map["browser"]})
# Extract numeric token counts from strings like "~8.3k tokens" or "~350 tokens"
import re
def parse_tokens(s):
m = re.search(r"~([\d.]+)k?\s+tokens", s)
if not m:
return 0
val = float(m.group(1))
if "k" in s[m.start():m.end()]:
val *= 1000
return val
web_tok = parse_tokens(web_only)
browser_tok = parse_tokens(browser_only)
both_tok = parse_tokens(both)
# Both together should be LESS than naive sum (due to web_search dedup)
naive_sum = web_tok + browser_tok
assert both_tok < naive_sum, (
f"Expected deduplication: web({web_tok}) + browser({browser_tok}) = {naive_sum} "
f"but combined = {both_tok}"
)
def test_status_fn_empty_selection():
"""Status function with no tools selected should return ~0 tokens."""
import hermes_cli.tools_config as tc
tc._tool_token_cache = None
tokens = tc._estimate_tool_tokens()
if not tokens:
pytest.skip("tiktoken unavailable")
from hermes_cli.tools_config import CONFIGURABLE_TOOLSETS
from toolsets import resolve_toolset
ts_keys = [ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS]
def status_fn(chosen: set) -> str:
all_tools: set = set()
for idx in chosen:
all_tools.update(resolve_toolset(ts_keys[idx]))
total = sum(tokens.get(name, 0) for name in all_tools)
if total >= 1000:
return f"Est. tool context: ~{total / 1000:.1f}k tokens"
return f"Est. tool context: ~{total} tokens"
result = status_fn(set())
assert "~0 tokens" in result
# ─── Curses UI Status Bar Tests ──────────────────────────────────────────────
def test_curses_checklist_numbered_fallback_shows_status(monkeypatch, capsys):
"""The numbered fallback should print the status_fn output."""
from hermes_cli.curses_ui import _numbered_fallback
def my_status(chosen):
return f"Selected {len(chosen)} items"
# Simulate user pressing Enter immediately (empty input → confirm)
monkeypatch.setattr("builtins.input", lambda _prompt="": "")
result = _numbered_fallback(
"Test title",
["Item A", "Item B", "Item C"],
{0, 2},
{0, 2},
status_fn=my_status,
)
captured = capsys.readouterr()
assert "Selected 2 items" in captured.out
assert result == {0, 2}
def test_curses_checklist_numbered_fallback_without_status(monkeypatch, capsys):
"""The numbered fallback should work fine without status_fn."""
from hermes_cli.curses_ui import _numbered_fallback
monkeypatch.setattr("builtins.input", lambda _prompt="": "")
result = _numbered_fallback(
"Test title",
["Item A", "Item B"],
{0},
{0},
)
captured = capsys.readouterr()
assert "Est. tool context" not in captured.out
assert result == {0}
# ─── Registry get_schema Tests ───────────────────────────────────────────────
def test_registry_get_schema_returns_schema():
"""registry.get_schema() should return a tool's schema dict."""
from tools.registry import registry
# Import to trigger discovery
import model_tools # noqa: F401
schema = registry.get_schema("terminal")
assert schema is not None
assert "name" in schema
assert schema["name"] == "terminal"
assert "parameters" in schema
def test_registry_get_schema_returns_none_for_unknown():
"""registry.get_schema() should return None for unknown tools."""
from tools.registry import registry
assert registry.get_schema("nonexistent_tool_xyz") is None