tests/conftest.py

"""Shared fixtures for the hermes-agent test suite."""

import asyncio
import os
import signal
import sys
import tempfile
from pathlib import Path
from unittest.mock import patch

import pytest

# Ensure project root is importable
PROJECT_ROOT = Path(__file__).parent.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))


@pytest.fixture(autouse=True)
def _isolate_hermes_home(tmp_path, monkeypatch):
    """Redirect HERMES_HOME to a temp dir so tests never write to ~/.hermes/."""
    fake_home = tmp_path / "hermes_test"
    fake_home.mkdir()
    (fake_home / "sessions").mkdir()
    (fake_home / "cron").mkdir()
    (fake_home / "memories").mkdir()
    (fake_home / "skills").mkdir()
    monkeypatch.setenv("HERMES_HOME", str(fake_home))
    # Reset plugin singleton so tests don't leak plugins from ~/.hermes/plugins/
    try:
        import hermes_cli.plugins as _plugins_mod
        monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
    except Exception:
        pass
    # Tests should not inherit the agent's current gateway/messaging surface.
    # Individual tests that need gateway behavior set these explicitly.
    monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
    monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
    monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
    monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)


@pytest.fixture()
def tmp_dir(tmp_path):
    """Provide a temporary directory that is cleaned up automatically."""
    return tmp_path


@pytest.fixture()
def mock_config():
    """Return a minimal hermes config dict suitable for unit tests."""
    return {
        "model": "test/mock-model",
        "toolsets": ["terminal", "file"],
        "max_turns": 10,
        "terminal": {
            "backend": "local",
            "cwd": "/tmp",
            "timeout": 30,
        },
        "compression": {"enabled": False},
        "memory": {"memory_enabled": False, "user_profile_enabled": False},
        "command_allowlist": [],
    }


# ── Global test timeout ─────────────────────────────────────────────────────
# Kill any individual test that takes longer than 30 seconds.
# Prevents hanging tests (subprocess spawns, blocking I/O) from stalling the
# entire test suite.

def _timeout_handler(signum, frame):
    raise TimeoutError("Test exceeded 30 second timeout")

@pytest.fixture(autouse=True)
def _ensure_current_event_loop(request):
    """Provide a default event loop for sync tests that call get_event_loop().

    Python 3.11+ no longer guarantees a current loop for plain synchronous tests.
    A number of gateway tests still use asyncio.get_event_loop().run_until_complete(...).
    Ensure they always have a usable loop without interfering with pytest-asyncio's
    own loop management for @pytest.mark.asyncio tests.
    """
    if request.node.get_closest_marker("asyncio") is not None:
        yield
        return

    try:
        loop = asyncio.get_event_loop_policy().get_event_loop()
    except RuntimeError:
        loop = None

    created = loop is None or loop.is_closed()
    if created:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)

    try:
        yield
    finally:
        if created and loop is not None:
            try:
                loop.close()
            finally:
                asyncio.set_event_loop(None)


@pytest.fixture(autouse=True)
def _enforce_test_timeout():
    """Kill any individual test that takes longer than 30 seconds.
    SIGALRM is Unix-only; skip on Windows."""
    if sys.platform == "win32":
        yield
        return
    old = signal.signal(signal.SIGALRM, _timeout_handler)
    signal.alarm(30)
    yield
    signal.alarm(0)
    signal.signal(signal.SIGALRM, old)
test: reorganize test structure and add missing unit tests Reorganize flat tests/ directory to mirror source code structure (tools/, gateway/, hermes_cli/, integration/). Add 11 new test files covering previously untested modules: registry, patch_parser, fuzzy_match, todo_tool, approval, file_tools, gateway session/config/ delivery, and hermes_cli config/models. Total: 147 unit tests passing, 9 integration tests gated behind pytest marker. 2026-02-26 03:20:08 +03:00			`"""Shared fixtures for the hermes-agent test suite."""`

test: restore default event loop for sync tests 2026-03-14 03:14:34 -07:00			`import asyncio`
test: reorganize test structure and add missing unit tests Reorganize flat tests/ directory to mirror source code structure (tools/, gateway/, hermes_cli/, integration/). Add 11 new test files covering previously untested modules: registry, patch_parser, fuzzy_match, todo_tool, approval, file_tools, gateway session/config/ delivery, and hermes_cli config/models. Total: 147 unit tests passing, 9 integration tests gated behind pytest marker. 2026-02-26 03:20:08 +03:00			`import os`
fix: skip hanging tests + add global test timeout 4 test files spawn real processes or make live API calls that hang indefinitely in batch/CI runs. Skip them with pytestmark: - tests/tools/test_code_execution.py (subprocess spawns) - tests/tools/test_file_tools_live.py (live LocalEnvironment) - tests/test_413_compression.py (blocks on process) - tests/test_agent_loop_tool_calling.py (live OpenRouter API calls) Also added global 30s signal.alarm timeout in conftest.py as a safety net, and removed stale nous-api test that hung on OAuth browser login. Suite now runs in ~55s with no hangs. 2026-03-12 01:23:28 -07:00			`import signal`
test: reorganize test structure and add missing unit tests Reorganize flat tests/ directory to mirror source code structure (tools/, gateway/, hermes_cli/, integration/). Add 11 new test files covering previously untested modules: registry, patch_parser, fuzzy_match, todo_tool, approval, file_tools, gateway session/config/ delivery, and hermes_cli config/models. Total: 147 unit tests passing, 9 integration tests gated behind pytest marker. 2026-02-26 03:20:08 +03:00			`import sys`
			`import tempfile`
			`from pathlib import Path`
			`from unittest.mock import patch`

			`import pytest`

			`# Ensure project root is importable`
			`PROJECT_ROOT = Path(__file__).parent.parent`
			`if str(PROJECT_ROOT) not in sys.path:`
			`sys.path.insert(0, str(PROJECT_ROOT))`


fix(tests): isolate HERMES_HOME in tests and adjust log directory for debug session Added a fixture to redirect HERMES_HOME to a temporary directory during tests, preventing writes to the user's home directory. Updated the test for DebugSession to create a dedicated log directory for saving logs, ensuring test isolation and accuracy in assertions. 2026-03-02 04:34:21 -08:00			`@pytest.fixture(autouse=True)`
			`def _isolate_hermes_home(tmp_path, monkeypatch):`
			`"""Redirect HERMES_HOME to a temp dir so tests never write to ~/.hermes/."""`
			`fake_home = tmp_path / "hermes_test"`
			`fake_home.mkdir()`
			`(fake_home / "sessions").mkdir()`
			`(fake_home / "cron").mkdir()`
			`(fake_home / "memories").mkdir()`
			`(fake_home / "skills").mkdir()`
			`monkeypatch.setenv("HERMES_HOME", str(fake_home))`
feat: first-class plugin architecture (#1555) Plugin system for extending Hermes with custom tools, hooks, and integrations — no source code changes required. Core system (hermes_cli/plugins.py): - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and pip entry_points (hermes_agent.plugins group) - PluginContext with register_tool() and register_hook() - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call, on_session_start/end - Namespace package handling for relative imports in plugins - Graceful error isolation — broken plugins never crash the agent Integration (model_tools.py): - Plugin discovery runs after built-in + MCP tools - Plugin tools bypass toolset filter via get_plugin_tool_names() - Pre/post tool call hooks fire in handle_function_call() CLI: - /plugins command shows loaded plugins, tool counts, status - Added to COMMANDS dict for autocomplete Docs: - Getting started guide (build-a-hermes-plugin.md) — full tutorial building a calculator plugin step by step - Reference page (features/plugins.md) — quick overview + tables - Covers: file structure, schemas, handlers, hooks, data files, bundled skills, env var gating, pip distribution, common mistakes Tests: 16 tests covering discovery, loading, hooks, tool visibility. 2026-03-16 07:17:36 -07:00			`# Reset plugin singleton so tests don't leak plugins from ~/.hermes/plugins/`
			`try:`
			`import hermes_cli.plugins as _plugins_mod`
			`monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)`
			`except Exception:`
			`pass`
feat: add direct endpoint overrides for auxiliary and delegation Add base_url/api_key overrides for auxiliary tasks and delegation so users can route those flows straight to a custom OpenAI-compatible endpoint without having to rely on provider=main or named custom providers. Also clear gateway session env vars in test isolation so the full suite stays deterministic when run from a messaging-backed agent session. 2026-03-14 20:48:29 -07:00			`# Tests should not inherit the agent's current gateway/messaging surface.`
			`# Individual tests that need gateway behavior set these explicitly.`
			`monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)`
			`monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)`
			`monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)`
			`monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)`
fix(tests): isolate HERMES_HOME in tests and adjust log directory for debug session Added a fixture to redirect HERMES_HOME to a temporary directory during tests, preventing writes to the user's home directory. Updated the test for DebugSession to create a dedicated log directory for saving logs, ensuring test isolation and accuracy in assertions. 2026-03-02 04:34:21 -08:00

test: reorganize test structure and add missing unit tests Reorganize flat tests/ directory to mirror source code structure (tools/, gateway/, hermes_cli/, integration/). Add 11 new test files covering previously untested modules: registry, patch_parser, fuzzy_match, todo_tool, approval, file_tools, gateway session/config/ delivery, and hermes_cli config/models. Total: 147 unit tests passing, 9 integration tests gated behind pytest marker. 2026-02-26 03:20:08 +03:00			`@pytest.fixture()`
			`def tmp_dir(tmp_path):`
			`"""Provide a temporary directory that is cleaned up automatically."""`
			`return tmp_path`


			`@pytest.fixture()`
			`def mock_config():`
			`"""Return a minimal hermes config dict suitable for unit tests."""`
			`return {`
			`"model": "test/mock-model",`
			`"toolsets": ["terminal", "file"],`
			`"max_turns": 10,`
			`"terminal": {`
			`"backend": "local",`
			`"cwd": "/tmp",`
			`"timeout": 30,`
			`},`
			`"compression": {"enabled": False},`
			`"memory": {"memory_enabled": False, "user_profile_enabled": False},`
			`"command_allowlist": [],`
			`}`
fix: skip hanging tests + add global test timeout 4 test files spawn real processes or make live API calls that hang indefinitely in batch/CI runs. Skip them with pytestmark: - tests/tools/test_code_execution.py (subprocess spawns) - tests/tools/test_file_tools_live.py (live LocalEnvironment) - tests/test_413_compression.py (blocks on process) - tests/test_agent_loop_tool_calling.py (live OpenRouter API calls) Also added global 30s signal.alarm timeout in conftest.py as a safety net, and removed stale nous-api test that hung on OAuth browser login. Suite now runs in ~55s with no hangs. 2026-03-12 01:23:28 -07:00

			`# ── Global test timeout ─────────────────────────────────────────────────────`
			`# Kill any individual test that takes longer than 30 seconds.`
			`# Prevents hanging tests (subprocess spawns, blocking I/O) from stalling the`
			`# entire test suite.`

			`def _timeout_handler(signum, frame):`
			`raise TimeoutError("Test exceeded 30 second timeout")`

test: restore default event loop for sync tests 2026-03-14 03:14:34 -07:00			`@pytest.fixture(autouse=True)`
			`def _ensure_current_event_loop(request):`
			`"""Provide a default event loop for sync tests that call get_event_loop().`

			`Python 3.11+ no longer guarantees a current loop for plain synchronous tests.`
			`A number of gateway tests still use asyncio.get_event_loop().run_until_complete(...).`
			`Ensure they always have a usable loop without interfering with pytest-asyncio's`
			`own loop management for @pytest.mark.asyncio tests.`
			`"""`
			`if request.node.get_closest_marker("asyncio") is not None:`
			`yield`
			`return`

			`try:`
			`loop = asyncio.get_event_loop_policy().get_event_loop()`
			`except RuntimeError:`
			`loop = None`

			`created = loop is None or loop.is_closed()`
			`if created:`
			`loop = asyncio.new_event_loop()`
			`asyncio.set_event_loop(loop)`

			`try:`
			`yield`
			`finally:`
			`if created and loop is not None:`
			`try:`
			`loop.close()`
			`finally:`
			`asyncio.set_event_loop(None)`


fix: skip hanging tests + add global test timeout 4 test files spawn real processes or make live API calls that hang indefinitely in batch/CI runs. Skip them with pytestmark: - tests/tools/test_code_execution.py (subprocess spawns) - tests/tools/test_file_tools_live.py (live LocalEnvironment) - tests/test_413_compression.py (blocks on process) - tests/test_agent_loop_tool_calling.py (live OpenRouter API calls) Also added global 30s signal.alarm timeout in conftest.py as a safety net, and removed stale nous-api test that hung on OAuth browser login. Suite now runs in ~55s with no hangs. 2026-03-12 01:23:28 -07:00			`@pytest.fixture(autouse=True)`
			`def _enforce_test_timeout():`
fix(approval): show full command in dangerous command approval (#1553) * fix: prevent infinite 400 failure loop on context overflow (#1630) When a gateway session exceeds the model's context window, Anthropic may return a generic 400 invalid_request_error with just 'Error' as the message. This bypassed the phrase-based context-length detection, causing the agent to treat it as a non-retryable client error. Worse, the failed user message was still persisted to the transcript, making the session even larger on each attempt — creating an infinite loop. Three-layer fix: 1. run_agent.py — Fallback heuristic: when a 400 error has a very short generic message AND the session is large (>40% of context or >80 messages), treat it as a probable context overflow and trigger compression instead of aborting. 2. run_agent.py + gateway/run.py — Don't persist failed messages: when the agent returns failed=True before generating any response, skip writing the user's message to the transcript/DB. This prevents the session from growing on each failure. 3. gateway/run.py — Smarter error messages: detect context-overflow failures and suggest /compact or /reset specifically, instead of a generic 'try again' that will fail identically. * fix(skills): detect prompt injection patterns and block cache file reads Adds two security layers to prevent prompt injection via skills hub cache files (#1558): 1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json was the original injection vector — untrusted skill descriptions in the catalog contained adversarial text that the model executed. 2. skill_view: warns when skills are loaded from outside the trusted ~/.hermes/skills/ directory, and detects common injection patterns in skill content ("ignore previous instructions", "<system>", etc.). Cherry-picked from PR #1562 by ygd58. * fix(tools): chunk long messages in send_message_tool before dispatch (#1552) Long messages sent via send_message tool or cron delivery silently failed when exceeding platform limits. Gateway adapters handle this via truncate_message(), but the standalone senders in send_message_tool bypassed that entirely. - Apply truncate_message() chunking in _send_to_platform() before dispatching to individual platform senders - Remove naive message[i:i+2000] character split in _send_discord() in favor of centralized smart splitting - Attach media files to last chunk only for Telegram - Add regression tests for chunking and media placement Cherry-picked from PR #1557 by llbn. * fix(approval): show full command in dangerous command approval (#1553) Previously the command was truncated to 80 chars in CLI (with a [v]iew full option), 500 chars in Discord embeds, and missing entirely in Telegram/Slack approval messages. Now the full command is always displayed everywhere: - CLI: removed 80-char truncation and [v]iew full menu option - Gateway (TG/Slack): approval_required message includes full command in a code block - Discord: embed shows full command up to 4096-char limit - Windows: skip SIGALRM-based test timeout (Unix-only) - Updated tests: replaced view-flow tests with direct approval tests Cherry-picked from PR #1566 by crazywriter1. --------- Co-authored-by: buray <ygd58@users.noreply.github.com> Co-authored-by: lbn <llbn@users.noreply.github.com> Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com> 2026-03-17 02:02:33 -07:00			`"""Kill any individual test that takes longer than 30 seconds.`
			`SIGALRM is Unix-only; skip on Windows."""`
			`if sys.platform == "win32":`
			`yield`
			`return`
fix: skip hanging tests + add global test timeout 4 test files spawn real processes or make live API calls that hang indefinitely in batch/CI runs. Skip them with pytestmark: - tests/tools/test_code_execution.py (subprocess spawns) - tests/tools/test_file_tools_live.py (live LocalEnvironment) - tests/test_413_compression.py (blocks on process) - tests/test_agent_loop_tool_calling.py (live OpenRouter API calls) Also added global 30s signal.alarm timeout in conftest.py as a safety net, and removed stale nous-api test that hung on OAuth browser login. Suite now runs in ~55s with no hangs. 2026-03-12 01:23:28 -07:00			`old = signal.signal(signal.SIGALRM, _timeout_handler)`
			`signal.alarm(30)`
			`yield`
			`signal.alarm(0)`
			`signal.signal(signal.SIGALRM, old)`