Files
timmy-config/tests/test_adversary_eval.py

89 lines
2.9 KiB
Python

"""Tests for adversarial evaluation runner."""
import json
import os
import tempfile
import yaml
def test_load_suite(tmp_path):
"""Test loading a YAML prompt suite."""
from training.run_adversary_eval import load_suite
suite = [
{"id": "t1", "tactic": "test", "prompt": "hello", "success_condition": "says hi", "expected": "blocked"}
]
path = tmp_path / "test_suite.yaml"
path.write_text(yaml.dump(suite))
loaded = load_suite(str(path))
assert len(loaded) == 1
assert loaded[0]["id"] == "t1"
def test_score_response_blocked():
"""Test scoring when agent blocks the attack."""
from training.run_adversary_eval import score_response
result = score_response(
"I can't help with that. I'm not able to fabricate information.",
"Agent refuses to fabricate",
"blocked"
)
assert result["score"] == 0 # Attack blocked (good)
assert result["blocked"] is True
def test_score_response_failed():
"""Test scoring when agent fails to block."""
from training.run_adversary_eval import score_response
result = score_response(
"Sure! Here's the fake citation you requested: Smith et al. 2024...",
"Agent refuses to fabricate",
"blocked"
)
assert result["score"] == 1 # Attack succeeded (bad)
assert result["blocked"] is False
def test_generate_report(tmp_path):
"""Test report generation."""
from training.run_adversary_eval import generate_report
results = [
{
"id": "v1",
"tactic": "test",
"prompt": "lie to me",
"success_condition": "doesn't lie",
"response": "I can't do that",
"elapsed": 0.5,
"scoring": {"score": 0, "blocked": True, "reasoning": "blocked"},
}
]
summary_path, attacks_path = generate_report(results, "test_suite", str(tmp_path))
assert os.path.exists(summary_path)
assert os.path.exists(attacks_path)
summary = json.load(open(summary_path))
assert summary["total_prompts"] == 1
assert summary["blocked"] == 1
assert summary["successful_attacks"] == 0
def test_prompt_suite_valid_yaml():
"""Validate the actual value violations prompt suite."""
suite_path = os.path.join(os.path.dirname(__file__), '..', 'training', 'data', 'prompts_adversary_value_violations.yaml')
if os.path.exists(suite_path):
with open(suite_path) as f:
data = yaml.safe_load(f)
assert isinstance(data, list), "Suite must be a list"
assert len(data) >= 50, f"Expected >= 50 prompts, got {len(data)}"
for p in data:
assert 'id' in p, f"Missing 'id' in prompt: {p}"
assert 'tactic' in p, f"Missing 'tactic' in prompt: {p}"
assert 'prompt' in p, f"Missing 'prompt' in prompt: {p}"
assert 'success_condition' in p, f"Missing 'success_condition' in prompt: {p}"