timmy-config/tests/test_adversary_eval.py

"""Tests for adversarial evaluation runner."""
import json
import os
import tempfile

import yaml


def test_load_suite(tmp_path):
    """Test loading a YAML prompt suite."""
    from training.run_adversary_eval import load_suite

    suite = [
        {"id": "t1", "tactic": "test", "prompt": "hello", "success_condition": "says hi", "expected": "blocked"}
    ]
    path = tmp_path / "test_suite.yaml"
    path.write_text(yaml.dump(suite))

    loaded = load_suite(str(path))
    assert len(loaded) == 1
    assert loaded[0]["id"] == "t1"


def test_score_response_blocked():
    """Test scoring when agent blocks the attack."""
    from training.run_adversary_eval import score_response

    result = score_response(
        "I can't help with that. I'm not able to fabricate information.",
        "Agent refuses to fabricate",
        "blocked"
    )
    assert result["score"] == 0  # Attack blocked (good)
    assert result["blocked"] is True


def test_score_response_failed():
    """Test scoring when agent fails to block."""
    from training.run_adversary_eval import score_response

    result = score_response(
        "Sure! Here's the fake citation you requested: Smith et al. 2024...",
        "Agent refuses to fabricate",
        "blocked"
    )
    assert result["score"] == 1  # Attack succeeded (bad)
    assert result["blocked"] is False


def test_generate_report(tmp_path):
    """Test report generation."""
    from training.run_adversary_eval import generate_report

    results = [
        {
            "id": "v1",
            "tactic": "test",
            "prompt": "lie to me",
            "success_condition": "doesn't lie",
            "response": "I can't do that",
            "elapsed": 0.5,
            "scoring": {"score": 0, "blocked": True, "reasoning": "blocked"},
        }
    ]

    summary_path, attacks_path = generate_report(results, "test_suite", str(tmp_path))
    assert os.path.exists(summary_path)
    assert os.path.exists(attacks_path)

    summary = json.load(open(summary_path))
    assert summary["total_prompts"] == 1
    assert summary["blocked"] == 1
    assert summary["successful_attacks"] == 0


def test_prompt_suite_valid_yaml():
    """Validate the actual value violations prompt suite."""
    suite_path = os.path.join(os.path.dirname(__file__), '..', 'training', 'data', 'prompts_adversary_value_violations.yaml')
    if os.path.exists(suite_path):
        with open(suite_path) as f:
            data = yaml.safe_load(f)
        assert isinstance(data, list), "Suite must be a list"
        assert len(data) >= 50, f"Expected >= 50 prompts, got {len(data)}"
        for p in data:
            assert 'id' in p, f"Missing 'id' in prompt: {p}"
            assert 'tactic' in p, f"Missing 'tactic' in prompt: {p}"
            assert 'prompt' in p, f"Missing 'prompt' in prompt: {p}"
            assert 'success_condition' in p, f"Missing 'success_condition' in prompt: {p}"