- Add agent/stop_protocol.py with hard pre-tool-check gate - Detect explicit stop/halt commands in last user message - STOP_ACK logging to ~/.hermes/burn-logs/allegro.log - Hands-off registry with 24-hour expiry in allegro-cycle-state.json - Integrate gate into _execute_tool_calls in run_agent.py (fail-open) - Add 33 compliance tests in tests/agent/test_stop_protocol.py
178 lines
6.2 KiB
Python
178 lines
6.2 KiB
Python
"""
|
|
Compliance tests for M1: The Stop Protocol.
|
|
|
|
Verifies 100% stop detection, ACK logging, and hands-off registry behavior.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import tempfile
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
import pytest
|
|
|
|
from agent.stop_protocol import StopProtocol
|
|
|
|
|
|
class TestStopDetection:
|
|
"""100% compliance: every explicit stop/halt command must be detected."""
|
|
|
|
@pytest.mark.parametrize(
|
|
"text",
|
|
[
|
|
"Stop",
|
|
"stop",
|
|
"STOP",
|
|
"Stop.",
|
|
"Halt",
|
|
"halt!",
|
|
"Stop means stop",
|
|
"Stop means stop.",
|
|
"Halt means halt",
|
|
"Stop all work",
|
|
"Halt everything",
|
|
"Stop immediately",
|
|
"Stop now",
|
|
" stop ",
|
|
"[SYSTEM: Stop]",
|
|
"[SYSTEM: you must Stop immediately]",
|
|
],
|
|
)
|
|
def test_detects_stop_commands(self, text: str):
|
|
sp = StopProtocol()
|
|
assert sp.is_stop_command(text) is True
|
|
|
|
@pytest.mark.parametrize(
|
|
"text",
|
|
[
|
|
"Please stop by the store",
|
|
"I stopped earlier",
|
|
"The bus stop is nearby",
|
|
"Can you help me halt and catch fire? No, that's not a command",
|
|
"What does stop mean?",
|
|
"don't stop believing",
|
|
],
|
|
)
|
|
def test_ignores_non_command_uses(self, text: str):
|
|
sp = StopProtocol()
|
|
assert sp.is_stop_command(text) is False
|
|
|
|
def test_check_messages_detects_last_user_message(self):
|
|
sp = StopProtocol()
|
|
messages = [
|
|
{"role": "system", "content": "You are helpful."},
|
|
{"role": "user", "content": "Do something."},
|
|
{"role": "assistant", "content": "Okay."},
|
|
{"role": "user", "content": "Stop"},
|
|
]
|
|
assert sp.check_messages(messages) is True
|
|
|
|
def test_check_messages_ignores_old_user_messages(self):
|
|
sp = StopProtocol()
|
|
messages = [
|
|
{"role": "user", "content": "Stop"},
|
|
{"role": "assistant", "content": "Okay."},
|
|
{"role": "user", "content": "Actually continue."},
|
|
]
|
|
assert sp.check_messages(messages) is False
|
|
|
|
def test_empty_messages_safe(self):
|
|
sp = StopProtocol()
|
|
assert sp.check_messages([]) is False
|
|
|
|
|
|
class TestHandsOffRegistry:
|
|
def test_adds_and_checks_global_hands_off(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
state_path = os.path.join(tmp, "state.json")
|
|
log_path = os.path.join(tmp, "allegro.log")
|
|
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
|
|
assert sp.is_hands_off() is False
|
|
sp.add_hands_off(duration_hours=1)
|
|
assert sp.is_hands_off() is True
|
|
|
|
def test_expired_hands_off_returns_false(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
state_path = os.path.join(tmp, "state.json")
|
|
log_path = os.path.join(tmp, "allegro.log")
|
|
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
|
|
# Manually write an expired entry
|
|
past = datetime.now(timezone.utc) - timedelta(hours=1)
|
|
with open(state_path, "w") as f:
|
|
json.dump({"hands_off_registry": {"global": past.isoformat()}}, f)
|
|
|
|
assert sp.is_hands_off() is False
|
|
|
|
def test_target_specific_hands_off(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
state_path = os.path.join(tmp, "state.json")
|
|
log_path = os.path.join(tmp, "allegro.log")
|
|
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
|
|
sp.add_hands_off(target="ezra-config", duration_hours=1)
|
|
assert sp.is_hands_off("ezra-config") is True
|
|
assert sp.is_hands_off("other-system") is False
|
|
assert sp.is_hands_off() is False # global not set
|
|
|
|
def test_global_false_when_only_target_set(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
state_path = os.path.join(tmp, "state.json")
|
|
log_path = os.path.join(tmp, "allegro.log")
|
|
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
|
|
sp.add_hands_off(target="ezra-config", duration_hours=1)
|
|
assert sp.is_hands_off() is False # global not set
|
|
|
|
|
|
class TestStopAckLogging:
|
|
def test_log_stop_ack_creates_file(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
state_path = os.path.join(tmp, "state.json")
|
|
log_path = os.path.join(tmp, "allegro.log")
|
|
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
|
|
sp.log_stop_ack("test-context")
|
|
assert os.path.exists(log_path)
|
|
with open(log_path, "r") as f:
|
|
content = f.read()
|
|
assert "STOP_ACK" in content
|
|
assert "test-context" in content
|
|
|
|
|
|
class TestEnforceIntegration:
|
|
def test_enforce_returns_true_and_logs(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
state_path = os.path.join(tmp, "state.json")
|
|
log_path = os.path.join(tmp, "allegro.log")
|
|
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
|
|
messages = [{"role": "user", "content": "Stop"}]
|
|
result = sp.enforce(messages)
|
|
|
|
assert result is True
|
|
assert sp.is_hands_off() is True
|
|
assert os.path.exists(log_path)
|
|
with open(log_path, "r") as f:
|
|
assert "STOP_ACK" in f.read()
|
|
|
|
def test_enforce_returns_false_when_no_stop(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
state_path = os.path.join(tmp, "state.json")
|
|
log_path = os.path.join(tmp, "allegro.log")
|
|
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
|
|
messages = [{"role": "user", "content": "Keep going"}]
|
|
result = sp.enforce(messages)
|
|
|
|
assert result is False
|
|
assert not os.path.exists(log_path)
|
|
|
|
def test_build_cancelled_result(self):
|
|
result = StopProtocol.build_cancelled_result("terminal")
|
|
data = json.loads(result)
|
|
assert data["success"] is False
|
|
assert "STOP_ACK" in data["error"]
|
|
assert "terminal" in data["error"]
|