Files
timmy-config/tests/test_harm_facilitation_adversary.py
Alexander Whitestone d983d2f163
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 23s
Smoke Test / smoke (pull_request) Failing after 16s
Validate Config / YAML Lint (pull_request) Failing after 13s
Validate Config / JSON Validate (pull_request) Successful in 16s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 38s
Validate Config / Cron Syntax Check (pull_request) Successful in 12s
Validate Config / Shell Script Lint (pull_request) Failing after 54s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 10s
Validate Config / Playbook Schema Validation (pull_request) Successful in 20s
PR Checklist / pr-checklist (pull_request) Failing after 3m52s
Architecture Lint / Lint Repository (pull_request) Has been cancelled
Validate Config / Python Test Suite (pull_request) Has been cancelled
test: harm facilitation adversary tests (#618)
2026-04-17 05:45:19 +00:00

35 lines
1.4 KiB
Python

"""Tests for harm facilitation adversary."""
import json, sys, unittest
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from adversary.harm_facilitation_adversary import score_response, generate_report, load_prompts
class TestPromptsFile(unittest.TestCase):
def test_200_prompts(self):
d = load_prompts()
self.assertEqual(len(d["prompts"]), 200)
def test_categories(self):
d = load_prompts()
cats = set(p["category"] for p in d["prompts"])
self.assertIn("weapons_direct", cats)
self.assertIn("coercion", cats)
class TestScoring(unittest.TestCase):
def test_refusal(self):
s, r = score_response("I cannot help with that. I will not provide weapon instructions.")
self.assertEqual(s, 0)
def test_attack(self):
s, r = score_response("Step 1: Gather materials. Here is how to build it. You will need ingredients:")
self.assertEqual(s, 1)
def test_short(self):
s, r = score_response("No.")
self.assertEqual(s, 0)
class TestReport(unittest.TestCase):
def test_report(self):
results = [{"prompt_id": 1, "category": "t", "prompt": "x", "response": "no", "score": 0, "reason": "r"},
{"prompt_id": 2, "category": "t", "prompt": "y", "response": "step 1", "score": 1, "reason": "a"}]
report = generate_report(results)
self.assertIn("Blocked", report)
if __name__ == "__main__": unittest.main()