tests/conftest.py

"""Shared fixtures for the hermes-agent test suite."""

import asyncio
import os
import signal
import sys
import tempfile
from pathlib import Path
from unittest.mock import patch

import pytest

# Ensure project root is importable
PROJECT_ROOT = Path(__file__).parent.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))


@pytest.fixture(autouse=True)
def _isolate_hermes_home(tmp_path, monkeypatch):
    """Redirect HERMES_HOME to a temp dir so tests never write to ~/.hermes/."""
    fake_home = tmp_path / "hermes_test"
    fake_home.mkdir()
    (fake_home / "sessions").mkdir()
    (fake_home / "cron").mkdir()
    (fake_home / "memories").mkdir()
    (fake_home / "skills").mkdir()
    monkeypatch.setenv("HERMES_HOME", str(fake_home))
    # Reset plugin singleton so tests don't leak plugins from ~/.hermes/plugins/
    try:
        import hermes_cli.plugins as _plugins_mod
        monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
    except Exception:
        pass
    # Tests should not inherit the agent's current gateway/messaging surface.
    # Individual tests that need gateway behavior set these explicitly.
    monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
    monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
    monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
    monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
    # Avoid making real calls during tests if this key is set in the env files
    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)


@pytest.fixture()
def tmp_dir(tmp_path):
    """Provide a temporary directory that is cleaned up automatically."""
    return tmp_path


@pytest.fixture()
def mock_config():
    """Return a minimal hermes config dict suitable for unit tests."""
    return {
        "model": "test/mock-model",
        "toolsets": ["terminal", "file"],
        "max_turns": 10,
        "terminal": {
            "backend": "local",
            "cwd": "/tmp",
            "timeout": 30,
        },
        "compression": {"enabled": False},
        "memory": {"memory_enabled": False, "user_profile_enabled": False},
        "command_allowlist": [],
    }


# ── Global test timeout ─────────────────────────────────────────────────────
# Kill any individual test that takes longer than 30 seconds.
# Prevents hanging tests (subprocess spawns, blocking I/O) from stalling the
# entire test suite.

def _timeout_handler(signum, frame):
    raise TimeoutError("Test exceeded 30 second timeout")

@pytest.fixture(autouse=True)
def _ensure_current_event_loop(request):
    """Provide a default event loop for sync tests that call get_event_loop().

    Python 3.11+ no longer guarantees a current loop for plain synchronous tests.
    A number of gateway tests still use asyncio.get_event_loop().run_until_complete(...).
    Ensure they always have a usable loop without interfering with pytest-asyncio's
    own loop management for @pytest.mark.asyncio tests.
    """
    if request.node.get_closest_marker("asyncio") is not None:
        yield
        return

    try:
        loop = asyncio.get_event_loop_policy().get_event_loop()
    except RuntimeError:
        loop = None

    created = loop is None or loop.is_closed()
    if created:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)

    try:
        yield
    finally:
        if created and loop is not None:
            try:
                loop.close()
            finally:
                asyncio.set_event_loop(None)


@pytest.fixture(autouse=True)
def _enforce_test_timeout():
    """Kill any individual test that takes longer than 30 seconds.
    SIGALRM is Unix-only; skip on Windows."""
    if sys.platform == "win32":
        yield
        return
    old = signal.signal(signal.SIGALRM, _timeout_handler)
    signal.alarm(30)
    yield
    signal.alarm(0)
    signal.signal(signal.SIGALRM, old)
test: reorganize test structure and add missing unit tests Reorganize flat tests/ directory to mirror source code structure (tools/, gateway/, hermes_cli/, integration/). Add 11 new test files covering previously untested modules: registry, patch_parser, fuzzy_match, todo_tool, approval, file_tools, gateway session/config/ delivery, and hermes_cli config/models. Total: 147 unit tests passing, 9 integration tests gated behind pytest marker. 2026-02-26 03:20:08 +03:00			`"""Shared fixtures for the hermes-agent test suite."""`

test: restore default event loop for sync tests 2026-03-14 03:14:34 -07:00			`import asyncio`
test: reorganize test structure and add missing unit tests Reorganize flat tests/ directory to mirror source code structure (tools/, gateway/, hermes_cli/, integration/). Add 11 new test files covering previously untested modules: registry, patch_parser, fuzzy_match, todo_tool, approval, file_tools, gateway session/config/ delivery, and hermes_cli config/models. Total: 147 unit tests passing, 9 integration tests gated behind pytest marker. 2026-02-26 03:20:08 +03:00			`import os`
fix: skip hanging tests + add global test timeout 4 test files spawn real processes or make live API calls that hang indefinitely in batch/CI runs. Skip them with pytestmark: - tests/tools/test_code_execution.py (subprocess spawns) - tests/tools/test_file_tools_live.py (live LocalEnvironment) - tests/test_413_compression.py (blocks on process) - tests/test_agent_loop_tool_calling.py (live OpenRouter API calls) Also added global 30s signal.alarm timeout in conftest.py as a safety net, and removed stale nous-api test that hung on OAuth browser login. Suite now runs in ~55s with no hangs. 2026-03-12 01:23:28 -07:00			`import signal`
test: reorganize test structure and add missing unit tests Reorganize flat tests/ directory to mirror source code structure (tools/, gateway/, hermes_cli/, integration/). Add 11 new test files covering previously untested modules: registry, patch_parser, fuzzy_match, todo_tool, approval, file_tools, gateway session/config/ delivery, and hermes_cli config/models. Total: 147 unit tests passing, 9 integration tests gated behind pytest marker. 2026-02-26 03:20:08 +03:00			`import sys`
			`import tempfile`
			`from pathlib import Path`
			`from unittest.mock import patch`

			`import pytest`

			`# Ensure project root is importable`
			`PROJECT_ROOT = Path(__file__).parent.parent`
			`if str(PROJECT_ROOT) not in sys.path:`
			`sys.path.insert(0, str(PROJECT_ROOT))`


fix(tests): isolate HERMES_HOME in tests and adjust log directory for debug session Added a fixture to redirect HERMES_HOME to a temporary directory during tests, preventing writes to the user's home directory. Updated the test for DebugSession to create a dedicated log directory for saving logs, ensuring test isolation and accuracy in assertions. 2026-03-02 04:34:21 -08:00			`@pytest.fixture(autouse=True)`
			`def _isolate_hermes_home(tmp_path, monkeypatch):`
			`"""Redirect HERMES_HOME to a temp dir so tests never write to ~/.hermes/."""`
			`fake_home = tmp_path / "hermes_test"`
			`fake_home.mkdir()`
			`(fake_home / "sessions").mkdir()`
			`(fake_home / "cron").mkdir()`
			`(fake_home / "memories").mkdir()`
			`(fake_home / "skills").mkdir()`
			`monkeypatch.setenv("HERMES_HOME", str(fake_home))`
feat: first-class plugin architecture (#1555) Plugin system for extending Hermes with custom tools, hooks, and integrations — no source code changes required. Core system (hermes_cli/plugins.py): - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and pip entry_points (hermes_agent.plugins group) - PluginContext with register_tool() and register_hook() - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call, on_session_start/end - Namespace package handling for relative imports in plugins - Graceful error isolation — broken plugins never crash the agent Integration (model_tools.py): - Plugin discovery runs after built-in + MCP tools - Plugin tools bypass toolset filter via get_plugin_tool_names() - Pre/post tool call hooks fire in handle_function_call() CLI: - /plugins command shows loaded plugins, tool counts, status - Added to COMMANDS dict for autocomplete Docs: - Getting started guide (build-a-hermes-plugin.md) — full tutorial building a calculator plugin step by step - Reference page (features/plugins.md) — quick overview + tables - Covers: file structure, schemas, handlers, hooks, data files, bundled skills, env var gating, pip distribution, common mistakes Tests: 16 tests covering discovery, loading, hooks, tool visibility. 2026-03-16 07:17:36 -07:00			`# Reset plugin singleton so tests don't leak plugins from ~/.hermes/plugins/`
			`try:`
			`import hermes_cli.plugins as _plugins_mod`
			`monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)`
			`except Exception:`
			`pass`
feat: add direct endpoint overrides for auxiliary and delegation Add base_url/api_key overrides for auxiliary tasks and delegation so users can route those flows straight to a custom OpenAI-compatible endpoint without having to rely on provider=main or named custom providers. Also clear gateway session env vars in test isolation so the full suite stays deterministic when run from a messaging-backed agent session. 2026-03-14 20:48:29 -07:00			`# Tests should not inherit the agent's current gateway/messaging surface.`
			`# Individual tests that need gateway behavior set these explicitly.`
			`monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)`
			`monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)`
			`monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)`
			`monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)`
fix(tests): fix several failing/flaky tests on main (#6777) * fix(tests): mock is_safe_url in tests that use example.com Tests using example.com URLs were failing because is_safe_url does a real DNS lookup which fails in environments where example.com doesn't resolve, causing the request to be blocked before reaching the already-mocked HTTP client. This should fix around 17 failing tests. These tests test logic, caching, etc. so mocking this method should not modify them in any way. TestMattermostSendUrlAsFile was already doing this so we follow the same pattern. * fix(test): use case-insensitive lookup for model context length check DEFAULT_CONTEXT_LENGTHS uses inconsistent casing (MiniMax keys are lowercase, Qwen keys are mixed-case) so the test was broken in some cases since it couldn't find the model. * fix(test): patch is_linux in systemd gateway restart test The test only patched is_macos to False but didn't patch is_linux to True. On macOS hosts, is_linux() returns False and the systemd restart code path is skipped entirely, making the assertion fail. * fix(test): use non-blocklisted env var in docker forward_env tests GITHUB_TOKEN is in api_key_env_vars and thus in _HERMES_PROVIDER_ENV_BLOCKLIST so the env var is silently dropped, we replace it with a non-blocked one like DATABASE_URL so the tests actually work. * fix(test): fully isolate _has_any_provider_configured from host env _has_any_provider_configured() checks all env vars from PROVIDER_REGISTRY (not just the 5 the tests were clearing) and also calls get_auth_status() which detects gh auth token for Copilot. On machines with any of these set, the function returns True before reaching the code path under test. Clear all registry vars and mock get_auth_status so host credentials don't interfere. * fix(test): correct path to hermes_base_env.py in tool parser tests Path(__file__).parent.parent resolved to tests/, not the project root. The file lives at environments/hermes_base_env.py so we need one more parent level. * fix(test): accept optional HTML fields in Matrix send payload _send_matrix sometimes adds format and formatted_body when the markdown library is installed. The test was doing an exact dict equality check which broke. Check required fields instead. * fix(test): add config.yaml to codex vision requirements test The test only wrote auth.json but not config.yaml, so _read_main_provider() returned empty and vision auto-detect never tried the codex provider. Add a config.yaml pointing at openai-codex so the fallback path actually resolves the client. * fix(test): clear OPENROUTER_API_KEY in _isolate_hermes_home run_agent.py calls load_hermes_dotenv() at import time, which injects API keys from ~/.hermes/.env into os.environ before any test fixture runs. This caused test_agent_loop_tool_calling to make real API calls instead of skipping, which ends up making some tests fail. * fix(test): add get_rate_limit_state to agent mock in usage report tests _show_usage now calls agent.get_rate_limit_state() for rate limit display. The SimpleNamespace mock was missing this method. * fix(test): update expected Camofox config version from 12 to 13 * fix(test): mock _get_enabled_platforms in nous managed defaults test Importing gateway.run leaks DISCORD_BOT_TOKEN into os.environ, which makes _get_enabled_platforms() return ["cli", "discord"] instead of just ["cli"]. tools_command loops per platform, so apply_nous_managed_defaults runs twice: the first call sets config values, the second sees them as already configured and returns an empty set, causing the assertion to fail. 2026-04-09 17:17:06 -03:00			`# Avoid making real calls during tests if this key is set in the env files`
			`monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)`
fix(tests): isolate HERMES_HOME in tests and adjust log directory for debug session Added a fixture to redirect HERMES_HOME to a temporary directory during tests, preventing writes to the user's home directory. Updated the test for DebugSession to create a dedicated log directory for saving logs, ensuring test isolation and accuracy in assertions. 2026-03-02 04:34:21 -08:00

test: reorganize test structure and add missing unit tests Reorganize flat tests/ directory to mirror source code structure (tools/, gateway/, hermes_cli/, integration/). Add 11 new test files covering previously untested modules: registry, patch_parser, fuzzy_match, todo_tool, approval, file_tools, gateway session/config/ delivery, and hermes_cli config/models. Total: 147 unit tests passing, 9 integration tests gated behind pytest marker. 2026-02-26 03:20:08 +03:00			`@pytest.fixture()`
			`def tmp_dir(tmp_path):`
			`"""Provide a temporary directory that is cleaned up automatically."""`
			`return tmp_path`


			`@pytest.fixture()`
			`def mock_config():`
			`"""Return a minimal hermes config dict suitable for unit tests."""`
			`return {`
			`"model": "test/mock-model",`
			`"toolsets": ["terminal", "file"],`
			`"max_turns": 10,`
			`"terminal": {`
			`"backend": "local",`
			`"cwd": "/tmp",`
			`"timeout": 30,`
			`},`
			`"compression": {"enabled": False},`
			`"memory": {"memory_enabled": False, "user_profile_enabled": False},`
			`"command_allowlist": [],`
			`}`
fix: skip hanging tests + add global test timeout 4 test files spawn real processes or make live API calls that hang indefinitely in batch/CI runs. Skip them with pytestmark: - tests/tools/test_code_execution.py (subprocess spawns) - tests/tools/test_file_tools_live.py (live LocalEnvironment) - tests/test_413_compression.py (blocks on process) - tests/test_agent_loop_tool_calling.py (live OpenRouter API calls) Also added global 30s signal.alarm timeout in conftest.py as a safety net, and removed stale nous-api test that hung on OAuth browser login. Suite now runs in ~55s with no hangs. 2026-03-12 01:23:28 -07:00

			`# ── Global test timeout ─────────────────────────────────────────────────────`
			`# Kill any individual test that takes longer than 30 seconds.`
			`# Prevents hanging tests (subprocess spawns, blocking I/O) from stalling the`
			`# entire test suite.`

			`def _timeout_handler(signum, frame):`
			`raise TimeoutError("Test exceeded 30 second timeout")`

test: restore default event loop for sync tests 2026-03-14 03:14:34 -07:00			`@pytest.fixture(autouse=True)`
			`def _ensure_current_event_loop(request):`
			`"""Provide a default event loop for sync tests that call get_event_loop().`

			`Python 3.11+ no longer guarantees a current loop for plain synchronous tests.`
			`A number of gateway tests still use asyncio.get_event_loop().run_until_complete(...).`
			`Ensure they always have a usable loop without interfering with pytest-asyncio's`
			`own loop management for @pytest.mark.asyncio tests.`
			`"""`
			`if request.node.get_closest_marker("asyncio") is not None:`
			`yield`
			`return`

			`try:`
			`loop = asyncio.get_event_loop_policy().get_event_loop()`
			`except RuntimeError:`
			`loop = None`

			`created = loop is None or loop.is_closed()`
			`if created:`
			`loop = asyncio.new_event_loop()`
			`asyncio.set_event_loop(loop)`

			`try:`
			`yield`
			`finally:`
			`if created and loop is not None:`
			`try:`
			`loop.close()`
			`finally:`
			`asyncio.set_event_loop(None)`


fix: skip hanging tests + add global test timeout 4 test files spawn real processes or make live API calls that hang indefinitely in batch/CI runs. Skip them with pytestmark: - tests/tools/test_code_execution.py (subprocess spawns) - tests/tools/test_file_tools_live.py (live LocalEnvironment) - tests/test_413_compression.py (blocks on process) - tests/test_agent_loop_tool_calling.py (live OpenRouter API calls) Also added global 30s signal.alarm timeout in conftest.py as a safety net, and removed stale nous-api test that hung on OAuth browser login. Suite now runs in ~55s with no hangs. 2026-03-12 01:23:28 -07:00			`@pytest.fixture(autouse=True)`
			`def _enforce_test_timeout():`
fix(approval): show full command in dangerous command approval (#1553) * fix: prevent infinite 400 failure loop on context overflow (#1630) When a gateway session exceeds the model's context window, Anthropic may return a generic 400 invalid_request_error with just 'Error' as the message. This bypassed the phrase-based context-length detection, causing the agent to treat it as a non-retryable client error. Worse, the failed user message was still persisted to the transcript, making the session even larger on each attempt — creating an infinite loop. Three-layer fix: 1. run_agent.py — Fallback heuristic: when a 400 error has a very short generic message AND the session is large (>40% of context or >80 messages), treat it as a probable context overflow and trigger compression instead of aborting. 2. run_agent.py + gateway/run.py — Don't persist failed messages: when the agent returns failed=True before generating any response, skip writing the user's message to the transcript/DB. This prevents the session from growing on each failure. 3. gateway/run.py — Smarter error messages: detect context-overflow failures and suggest /compact or /reset specifically, instead of a generic 'try again' that will fail identically. * fix(skills): detect prompt injection patterns and block cache file reads Adds two security layers to prevent prompt injection via skills hub cache files (#1558): 1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json was the original injection vector — untrusted skill descriptions in the catalog contained adversarial text that the model executed. 2. skill_view: warns when skills are loaded from outside the trusted ~/.hermes/skills/ directory, and detects common injection patterns in skill content ("ignore previous instructions", "<system>", etc.). Cherry-picked from PR #1562 by ygd58. * fix(tools): chunk long messages in send_message_tool before dispatch (#1552) Long messages sent via send_message tool or cron delivery silently failed when exceeding platform limits. Gateway adapters handle this via truncate_message(), but the standalone senders in send_message_tool bypassed that entirely. - Apply truncate_message() chunking in _send_to_platform() before dispatching to individual platform senders - Remove naive message[i:i+2000] character split in _send_discord() in favor of centralized smart splitting - Attach media files to last chunk only for Telegram - Add regression tests for chunking and media placement Cherry-picked from PR #1557 by llbn. * fix(approval): show full command in dangerous command approval (#1553) Previously the command was truncated to 80 chars in CLI (with a [v]iew full option), 500 chars in Discord embeds, and missing entirely in Telegram/Slack approval messages. Now the full command is always displayed everywhere: - CLI: removed 80-char truncation and [v]iew full menu option - Gateway (TG/Slack): approval_required message includes full command in a code block - Discord: embed shows full command up to 4096-char limit - Windows: skip SIGALRM-based test timeout (Unix-only) - Updated tests: replaced view-flow tests with direct approval tests Cherry-picked from PR #1566 by crazywriter1. --------- Co-authored-by: buray <ygd58@users.noreply.github.com> Co-authored-by: lbn <llbn@users.noreply.github.com> Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com> 2026-03-17 02:02:33 -07:00			`"""Kill any individual test that takes longer than 30 seconds.`
			`SIGALRM is Unix-only; skip on Windows."""`
			`if sys.platform == "win32":`
			`yield`
			`return`
fix: skip hanging tests + add global test timeout 4 test files spawn real processes or make live API calls that hang indefinitely in batch/CI runs. Skip them with pytestmark: - tests/tools/test_code_execution.py (subprocess spawns) - tests/tools/test_file_tools_live.py (live LocalEnvironment) - tests/test_413_compression.py (blocks on process) - tests/test_agent_loop_tool_calling.py (live OpenRouter API calls) Also added global 30s signal.alarm timeout in conftest.py as a safety net, and removed stale nous-api test that hung on OAuth browser login. Suite now runs in ~55s with no hangs. 2026-03-12 01:23:28 -07:00			`old = signal.signal(signal.SIGALRM, _timeout_handler)`
			`signal.alarm(30)`
			`yield`
			`signal.alarm(0)`
			`signal.signal(signal.SIGALRM, old)`