Files
hermes-agent/tests/agent/test_stop_protocol.py
Allegro 6da0d15590 M1: Implement Stop Protocol (Epic #842)
- Add agent/stop_protocol.py with hard pre-tool-check gate
- Detect explicit stop/halt commands in last user message
- STOP_ACK logging to ~/.hermes/burn-logs/allegro.log
- Hands-off registry with 24-hour expiry in allegro-cycle-state.json
- Integrate gate into _execute_tool_calls in run_agent.py (fail-open)
- Add 33 compliance tests in tests/agent/test_stop_protocol.py
2026-04-06 16:21:16 +00:00

178 lines
6.2 KiB
Python

"""
Compliance tests for M1: The Stop Protocol.
Verifies 100% stop detection, ACK logging, and hands-off registry behavior.
"""
import json
import os
import tempfile
from datetime import datetime, timedelta, timezone
import pytest
from agent.stop_protocol import StopProtocol
class TestStopDetection:
"""100% compliance: every explicit stop/halt command must be detected."""
@pytest.mark.parametrize(
"text",
[
"Stop",
"stop",
"STOP",
"Stop.",
"Halt",
"halt!",
"Stop means stop",
"Stop means stop.",
"Halt means halt",
"Stop all work",
"Halt everything",
"Stop immediately",
"Stop now",
" stop ",
"[SYSTEM: Stop]",
"[SYSTEM: you must Stop immediately]",
],
)
def test_detects_stop_commands(self, text: str):
sp = StopProtocol()
assert sp.is_stop_command(text) is True
@pytest.mark.parametrize(
"text",
[
"Please stop by the store",
"I stopped earlier",
"The bus stop is nearby",
"Can you help me halt and catch fire? No, that's not a command",
"What does stop mean?",
"don't stop believing",
],
)
def test_ignores_non_command_uses(self, text: str):
sp = StopProtocol()
assert sp.is_stop_command(text) is False
def test_check_messages_detects_last_user_message(self):
sp = StopProtocol()
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Do something."},
{"role": "assistant", "content": "Okay."},
{"role": "user", "content": "Stop"},
]
assert sp.check_messages(messages) is True
def test_check_messages_ignores_old_user_messages(self):
sp = StopProtocol()
messages = [
{"role": "user", "content": "Stop"},
{"role": "assistant", "content": "Okay."},
{"role": "user", "content": "Actually continue."},
]
assert sp.check_messages(messages) is False
def test_empty_messages_safe(self):
sp = StopProtocol()
assert sp.check_messages([]) is False
class TestHandsOffRegistry:
def test_adds_and_checks_global_hands_off(self):
with tempfile.TemporaryDirectory() as tmp:
state_path = os.path.join(tmp, "state.json")
log_path = os.path.join(tmp, "allegro.log")
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
assert sp.is_hands_off() is False
sp.add_hands_off(duration_hours=1)
assert sp.is_hands_off() is True
def test_expired_hands_off_returns_false(self):
with tempfile.TemporaryDirectory() as tmp:
state_path = os.path.join(tmp, "state.json")
log_path = os.path.join(tmp, "allegro.log")
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
# Manually write an expired entry
past = datetime.now(timezone.utc) - timedelta(hours=1)
with open(state_path, "w") as f:
json.dump({"hands_off_registry": {"global": past.isoformat()}}, f)
assert sp.is_hands_off() is False
def test_target_specific_hands_off(self):
with tempfile.TemporaryDirectory() as tmp:
state_path = os.path.join(tmp, "state.json")
log_path = os.path.join(tmp, "allegro.log")
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
sp.add_hands_off(target="ezra-config", duration_hours=1)
assert sp.is_hands_off("ezra-config") is True
assert sp.is_hands_off("other-system") is False
assert sp.is_hands_off() is False # global not set
def test_global_false_when_only_target_set(self):
with tempfile.TemporaryDirectory() as tmp:
state_path = os.path.join(tmp, "state.json")
log_path = os.path.join(tmp, "allegro.log")
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
sp.add_hands_off(target="ezra-config", duration_hours=1)
assert sp.is_hands_off() is False # global not set
class TestStopAckLogging:
def test_log_stop_ack_creates_file(self):
with tempfile.TemporaryDirectory() as tmp:
state_path = os.path.join(tmp, "state.json")
log_path = os.path.join(tmp, "allegro.log")
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
sp.log_stop_ack("test-context")
assert os.path.exists(log_path)
with open(log_path, "r") as f:
content = f.read()
assert "STOP_ACK" in content
assert "test-context" in content
class TestEnforceIntegration:
def test_enforce_returns_true_and_logs(self):
with tempfile.TemporaryDirectory() as tmp:
state_path = os.path.join(tmp, "state.json")
log_path = os.path.join(tmp, "allegro.log")
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
messages = [{"role": "user", "content": "Stop"}]
result = sp.enforce(messages)
assert result is True
assert sp.is_hands_off() is True
assert os.path.exists(log_path)
with open(log_path, "r") as f:
assert "STOP_ACK" in f.read()
def test_enforce_returns_false_when_no_stop(self):
with tempfile.TemporaryDirectory() as tmp:
state_path = os.path.join(tmp, "state.json")
log_path = os.path.join(tmp, "allegro.log")
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
messages = [{"role": "user", "content": "Keep going"}]
result = sp.enforce(messages)
assert result is False
assert not os.path.exists(log_path)
def test_build_cancelled_result(self):
result = StopProtocol.build_cancelled_result("terminal")
data = json.loads(result)
assert data["success"] is False
assert "STOP_ACK" in data["error"]
assert "terminal" in data["error"]