Files

120 lines
3.8 KiB
Python
Raw Permalink Normal View History

"""Shared fixtures for the hermes-agent test suite."""
import asyncio
import os
import signal
import sys
import tempfile
from pathlib import Path
from unittest.mock import patch
import pytest
# Ensure project root is importable
PROJECT_ROOT = Path(__file__).parent.parent
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
@pytest.fixture(autouse=True)
def _isolate_hermes_home(tmp_path, monkeypatch):
"""Redirect HERMES_HOME to a temp dir so tests never write to ~/.hermes/."""
fake_home = tmp_path / "hermes_test"
fake_home.mkdir()
(fake_home / "sessions").mkdir()
(fake_home / "cron").mkdir()
(fake_home / "memories").mkdir()
(fake_home / "skills").mkdir()
monkeypatch.setenv("HERMES_HOME", str(fake_home))
# Reset plugin singleton so tests don't leak plugins from ~/.hermes/plugins/
try:
import hermes_cli.plugins as _plugins_mod
monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
except Exception:
pass
# Tests should not inherit the agent's current gateway/messaging surface.
# Individual tests that need gateway behavior set these explicitly.
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
@pytest.fixture()
def tmp_dir(tmp_path):
"""Provide a temporary directory that is cleaned up automatically."""
return tmp_path
@pytest.fixture()
def mock_config():
"""Return a minimal hermes config dict suitable for unit tests."""
return {
"model": "test/mock-model",
"toolsets": ["terminal", "file"],
"max_turns": 10,
"terminal": {
"backend": "local",
"cwd": "/tmp",
"timeout": 30,
},
"compression": {"enabled": False},
"memory": {"memory_enabled": False, "user_profile_enabled": False},
"command_allowlist": [],
}
# ── Global test timeout ─────────────────────────────────────────────────────
# Kill any individual test that takes longer than 30 seconds.
# Prevents hanging tests (subprocess spawns, blocking I/O) from stalling the
# entire test suite.
def _timeout_handler(signum, frame):
raise TimeoutError("Test exceeded 30 second timeout")
@pytest.fixture(autouse=True)
def _ensure_current_event_loop(request):
"""Provide a default event loop for sync tests that call get_event_loop().
Python 3.11+ no longer guarantees a current loop for plain synchronous tests.
A number of gateway tests still use asyncio.get_event_loop().run_until_complete(...).
Ensure they always have a usable loop without interfering with pytest-asyncio's
own loop management for @pytest.mark.asyncio tests.
"""
if request.node.get_closest_marker("asyncio") is not None:
yield
return
try:
loop = asyncio.get_event_loop_policy().get_event_loop()
except RuntimeError:
loop = None
created = loop is None or loop.is_closed()
if created:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
yield
finally:
if created and loop is not None:
try:
loop.close()
finally:
asyncio.set_event_loop(None)
@pytest.fixture(autouse=True)
def _enforce_test_timeout():
fix(approval): show full command in dangerous command approval (#1553) * fix: prevent infinite 400 failure loop on context overflow (#1630) When a gateway session exceeds the model's context window, Anthropic may return a generic 400 invalid_request_error with just 'Error' as the message. This bypassed the phrase-based context-length detection, causing the agent to treat it as a non-retryable client error. Worse, the failed user message was still persisted to the transcript, making the session even larger on each attempt — creating an infinite loop. Three-layer fix: 1. run_agent.py — Fallback heuristic: when a 400 error has a very short generic message AND the session is large (>40% of context or >80 messages), treat it as a probable context overflow and trigger compression instead of aborting. 2. run_agent.py + gateway/run.py — Don't persist failed messages: when the agent returns failed=True before generating any response, skip writing the user's message to the transcript/DB. This prevents the session from growing on each failure. 3. gateway/run.py — Smarter error messages: detect context-overflow failures and suggest /compact or /reset specifically, instead of a generic 'try again' that will fail identically. * fix(skills): detect prompt injection patterns and block cache file reads Adds two security layers to prevent prompt injection via skills hub cache files (#1558): 1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json was the original injection vector — untrusted skill descriptions in the catalog contained adversarial text that the model executed. 2. skill_view: warns when skills are loaded from outside the trusted ~/.hermes/skills/ directory, and detects common injection patterns in skill content ("ignore previous instructions", "<system>", etc.). Cherry-picked from PR #1562 by ygd58. * fix(tools): chunk long messages in send_message_tool before dispatch (#1552) Long messages sent via send_message tool or cron delivery silently failed when exceeding platform limits. Gateway adapters handle this via truncate_message(), but the standalone senders in send_message_tool bypassed that entirely. - Apply truncate_message() chunking in _send_to_platform() before dispatching to individual platform senders - Remove naive message[i:i+2000] character split in _send_discord() in favor of centralized smart splitting - Attach media files to last chunk only for Telegram - Add regression tests for chunking and media placement Cherry-picked from PR #1557 by llbn. * fix(approval): show full command in dangerous command approval (#1553) Previously the command was truncated to 80 chars in CLI (with a [v]iew full option), 500 chars in Discord embeds, and missing entirely in Telegram/Slack approval messages. Now the full command is always displayed everywhere: - CLI: removed 80-char truncation and [v]iew full menu option - Gateway (TG/Slack): approval_required message includes full command in a code block - Discord: embed shows full command up to 4096-char limit - Windows: skip SIGALRM-based test timeout (Unix-only) - Updated tests: replaced view-flow tests with direct approval tests Cherry-picked from PR #1566 by crazywriter1. --------- Co-authored-by: buray <ygd58@users.noreply.github.com> Co-authored-by: lbn <llbn@users.noreply.github.com> Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
2026-03-17 02:02:33 -07:00
"""Kill any individual test that takes longer than 30 seconds.
SIGALRM is Unix-only; skip on Windows."""
if sys.platform == "win32":
yield
return
old = signal.signal(signal.SIGALRM, _timeout_handler)
signal.alarm(30)
yield
signal.alarm(0)
signal.signal(signal.SIGALRM, old)