Files
hermes-agent/tests/gateway/test_session_info.py
Teknium 58ca875e19 feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)
When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

   Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
2026-03-26 19:27:58 -07:00

111 lines
4.9 KiB
Python

"""Tests for GatewayRunner._format_session_info — session config surfacing."""
import pytest
from unittest.mock import patch, MagicMock
from pathlib import Path
from gateway.run import GatewayRunner
@pytest.fixture()
def runner():
"""Create a bare GatewayRunner without __init__."""
return GatewayRunner.__new__(GatewayRunner)
def _patch_info(tmp_path, config_yaml, model, runtime):
"""Return a context-manager stack that patches _format_session_info deps."""
cfg_path = tmp_path / "config.yaml"
if config_yaml is not None:
cfg_path.write_text(config_yaml)
return (
patch("gateway.run._hermes_home", tmp_path),
patch("gateway.run._resolve_gateway_model", return_value=model),
patch("gateway.run._resolve_runtime_agent_kwargs", return_value=runtime),
)
class TestFormatSessionInfo:
def test_includes_model_name(self, runner, tmp_path):
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: anthropic/claude-opus-4.6\n provider: openrouter\n",
"anthropic/claude-opus-4.6",
{"provider": "openrouter", "base_url": "https://openrouter.ai/api/v1", "api_key": "k"})
with p1, p2, p3:
info = runner._format_session_info()
assert "claude-opus-4.6" in info
def test_includes_provider(self, runner, tmp_path):
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n provider: openrouter\n",
"test-model",
{"provider": "openrouter", "base_url": "", "api_key": ""})
with p1, p2, p3:
info = runner._format_session_info()
assert "openrouter" in info
def test_config_context_length(self, runner, tmp_path):
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n context_length: 32768\n",
"test-model",
{"provider": "custom", "base_url": "", "api_key": ""})
with p1, p2, p3:
info = runner._format_session_info()
assert "32K" in info
assert "config" in info
def test_default_fallback_hint(self, runner, tmp_path):
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: unknown-model-xyz\n",
"unknown-model-xyz",
{"provider": "", "base_url": "", "api_key": ""})
with p1, p2, p3:
info = runner._format_session_info()
assert "128K" in info
assert "model.context_length" in info
def test_local_endpoint_shown(self, runner, tmp_path):
p1, p2, p3 = _patch_info(
tmp_path,
"model:\n default: qwen3:8b\n provider: custom\n base_url: http://localhost:11434/v1\n context_length: 8192\n",
"qwen3:8b",
{"provider": "custom", "base_url": "http://localhost:11434/v1", "api_key": ""})
with p1, p2, p3:
info = runner._format_session_info()
assert "localhost:11434" in info
assert "8K" in info
def test_cloud_endpoint_hidden(self, runner, tmp_path):
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n provider: openrouter\n",
"test-model",
{"provider": "openrouter", "base_url": "https://openrouter.ai/api/v1", "api_key": "k"})
with p1, p2, p3:
info = runner._format_session_info()
assert "Endpoint" not in info
def test_million_context_format(self, runner, tmp_path):
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n context_length: 1000000\n",
"test-model",
{"provider": "", "base_url": "", "api_key": ""})
with p1, p2, p3:
info = runner._format_session_info()
assert "1.0M" in info
def test_missing_config(self, runner, tmp_path):
"""No config.yaml should not crash."""
p1, p2, p3 = _patch_info(tmp_path, None, # don't create config
"anthropic/claude-sonnet-4.6",
{"provider": "openrouter", "base_url": "", "api_key": ""})
with p1, p2, p3:
info = runner._format_session_info()
assert "Model" in info
assert "Context" in info
def test_runtime_resolution_failure_doesnt_crash(self, runner, tmp_path):
"""If runtime resolution raises, should still produce output."""
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text("model:\n default: test-model\n context_length: 4096\n")
with patch("gateway.run._hermes_home", tmp_path), \
patch("gateway.run._resolve_gateway_model", return_value="test-model"), \
patch("gateway.run._resolve_runtime_agent_kwargs", side_effect=RuntimeError("no creds")):
info = runner._format_session_info()
assert "4K" in info
assert "config" in info