- Tests for conscience_mapping SOUL principles - Tests for input_sanitizer threat detection - Tests for conscience_enforcement integration - Tests for identity and honesty enforcement - Tests for conscience_validator tool
251 lines
9.9 KiB
Python
251 lines
9.9 KiB
Python
"""Tests for conscience_mapping module - SOUL.md principle mappings."""
|
|
|
|
import pytest
|
|
|
|
from agent.conscience_mapping import (
|
|
CrisisType,
|
|
RefusalCategory,
|
|
SOULPrinciple,
|
|
detect_crisis_type,
|
|
get_all_principles,
|
|
get_crisis_response,
|
|
get_principle_by_category,
|
|
)
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# SOUL Principle Tests
|
|
# -----------------------------------------------------------------------------
|
|
|
|
class TestSOULPrinciples:
|
|
"""Tests for SOUL.md principle definitions."""
|
|
|
|
def test_principles_exist(self):
|
|
"""All expected SOUL principles should be defined."""
|
|
principles = get_all_principles()
|
|
assert "deception" in principles
|
|
assert "weapons" in principles
|
|
assert "child_safety" in principles
|
|
assert "coercion" in principles
|
|
|
|
def test_deception_principle_structure(self):
|
|
"""Deception principle has correct structure."""
|
|
principles = get_all_principles()
|
|
deception = principles["deception"]
|
|
assert deception.name == "Deception Refusal"
|
|
assert deception.refusal_category == RefusalCategory.DECEPTION
|
|
assert len(deception.keywords) > 0
|
|
assert "lie" in deception.keywords
|
|
assert "deceive" in deception.keywords
|
|
|
|
def test_weapons_principle_structure(self):
|
|
"""Weapons principle has correct structure."""
|
|
principles = get_all_principles()
|
|
weapons = principles["weapons"]
|
|
assert weapons.name == "Weapons and Harm Refusal"
|
|
assert weapons.refusal_category == RefusalCategory.WEAPONS
|
|
assert "weapon" in weapons.keywords
|
|
assert "bomb" in weapons.keywords
|
|
|
|
def test_child_safety_principle_structure(self):
|
|
"""Child safety principle has correct structure."""
|
|
principles = get_all_principles()
|
|
child = principles["child_safety"]
|
|
assert child.name == "Child Safety Refusal"
|
|
assert child.refusal_category == RefusalCategory.CHILD_SAFETY
|
|
assert "child" in child.keywords
|
|
|
|
def test_coercion_principle_structure(self):
|
|
"""Coercion principle has correct structure."""
|
|
principles = get_all_principles()
|
|
coercion = principles["coercion"]
|
|
assert coercion.name == "Coercion and Control Refusal"
|
|
assert coercion.refusal_category == RefusalCategory.COERCION
|
|
assert "blackmail" in coercion.keywords
|
|
|
|
def test_all_principles_have_response_templates(self):
|
|
"""Every principle must have a non-empty response template."""
|
|
principles = get_all_principles()
|
|
for name, principle in principles.items():
|
|
assert principle.response_template, f"{name} missing response template"
|
|
assert len(principle.response_template) > 20
|
|
|
|
def test_all_principles_have_keywords(self):
|
|
"""Every principle must have detection keywords."""
|
|
principles = get_all_principles()
|
|
for name, principle in principles.items():
|
|
assert len(principle.keywords) > 0, f"{name} has no keywords"
|
|
|
|
|
|
class TestGetPrincipleByCategory:
|
|
"""Tests for retrieving principles by category."""
|
|
|
|
def test_get_deception_by_category(self):
|
|
"""Can retrieve deception principle by category."""
|
|
principle = get_principle_by_category(RefusalCategory.DECEPTION)
|
|
assert principle is not None
|
|
assert principle.name == "Deception Refusal"
|
|
|
|
def test_get_weapons_by_category(self):
|
|
"""Can retrieve weapons principle by category."""
|
|
principle = get_principle_by_category(RefusalCategory.WEAPONS)
|
|
assert principle is not None
|
|
assert principle.name == "Weapons and Harm Refusal"
|
|
|
|
def test_get_child_safety_by_category(self):
|
|
"""Can retrieve child safety principle by category."""
|
|
principle = get_principle_by_category(RefusalCategory.CHILD_SAFETY)
|
|
assert principle is not None
|
|
assert principle.name == "Child Safety Refusal"
|
|
|
|
def test_get_coercion_by_category(self):
|
|
"""Can retrieve coercion principle by category."""
|
|
principle = get_principle_by_category(RefusalCategory.COERCION)
|
|
assert principle is not None
|
|
assert principle.name == "Coercion and Control Refusal"
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Crisis Detection Tests
|
|
# -----------------------------------------------------------------------------
|
|
|
|
class TestCrisisResponseDefinitions:
|
|
"""Tests for crisis response definitions."""
|
|
|
|
def test_suicide_crisis_response_exists(self):
|
|
"""Suicide ideation crisis response is defined."""
|
|
response = get_crisis_response(CrisisType.SUICIDE_IDEATION)
|
|
assert response is not None
|
|
assert "988" in response["response"]
|
|
assert response["priority"] == "immediate"
|
|
assert len(response["detection_phrases"]) > 0
|
|
|
|
def test_self_harm_crisis_response_exists(self):
|
|
"""Self-harm crisis response is defined."""
|
|
response = get_crisis_response(CrisisType.SELF_HARM)
|
|
assert response is not None
|
|
assert "741741" in response["response"] or "988" in response["response"]
|
|
assert response["priority"] == "high"
|
|
|
|
def test_immediate_danger_crisis_response_exists(self):
|
|
"""Immediate danger crisis response is defined."""
|
|
response = get_crisis_response(CrisisType.IMMEDIATE_DANGER)
|
|
assert response is not None
|
|
assert "911" in response["response"]
|
|
assert response["priority"] == "immediate"
|
|
|
|
def test_medical_emergency_crisis_response_exists(self):
|
|
"""Medical emergency crisis response is defined."""
|
|
response = get_crisis_response(CrisisType.MEDICAL_EMERGENCY)
|
|
assert response is not None
|
|
assert "911" in response["response"]
|
|
assert response["priority"] == "immediate"
|
|
|
|
|
|
class TestDetectCrisisType:
|
|
"""Tests for crisis type detection."""
|
|
|
|
def test_detect_suicide_ideation_kill_myself(self):
|
|
"""Detects 'kill myself' as suicide ideation."""
|
|
result = detect_crisis_type("I want to kill myself")
|
|
assert result == CrisisType.SUICIDE_IDEATION
|
|
|
|
def test_detect_suicide_ideation_end_life(self):
|
|
"""Detects 'end my life' as suicide ideation."""
|
|
result = detect_crisis_type("I plan to end my life")
|
|
assert result == CrisisType.SUICIDE_IDEATION
|
|
|
|
def test_detect_suicide_ideation_suicide(self):
|
|
"""Detects 'suicide' as suicide ideation."""
|
|
result = detect_crisis_type("I'm thinking about suicide")
|
|
assert result == CrisisType.SUICIDE_IDEATION
|
|
|
|
def test_detect_self_harm_cut(self):
|
|
"""Detects 'cut myself' as self-harm."""
|
|
result = detect_crisis_type("I cut myself to feel better")
|
|
assert result == CrisisType.SELF_HARM
|
|
|
|
def test_detect_self_harm_hurt_myself(self):
|
|
"""Detects 'hurt myself' as self-harm."""
|
|
result = detect_crisis_type("I want to hurt myself")
|
|
assert result == CrisisType.SELF_HARM
|
|
|
|
def test_detect_immediate_danger(self):
|
|
"""Detects threats of violence as immediate danger."""
|
|
result = detect_crisis_type("I'm going to kill them")
|
|
assert result == CrisisType.IMMEDIATE_DANGER
|
|
|
|
def test_detect_medical_emergency(self):
|
|
"""Detects medical emergency phrases."""
|
|
result = detect_crisis_type("I have severe chest pain and can't breathe")
|
|
assert result == CrisisType.MEDICAL_EMERGENCY
|
|
|
|
def test_no_crisis_in_safe_text(self):
|
|
"""Safe text returns no crisis."""
|
|
result = detect_crisis_type("Can you help me with Python programming?")
|
|
assert result is None
|
|
|
|
def test_no_crisis_in_ambiguous_text(self):
|
|
"""Ambiguous text returns no crisis."""
|
|
result = detect_crisis_type("I'm dying to see that movie")
|
|
assert result is None
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Refusal Category Enum Tests
|
|
# -----------------------------------------------------------------------------
|
|
|
|
class TestRefusalCategoryEnum:
|
|
"""Tests for RefusalCategory enum."""
|
|
|
|
def test_all_categories_defined(self):
|
|
"""All expected refusal categories exist."""
|
|
categories = list(RefusalCategory)
|
|
assert RefusalCategory.DECEPTION in categories
|
|
assert RefusalCategory.WEAPONS in categories
|
|
assert RefusalCategory.CHILD_SAFETY in categories
|
|
assert RefusalCategory.COERCION in categories
|
|
assert RefusalCategory.SELF_HARM in categories
|
|
assert RefusalCategory.HARM_OTHERS in categories
|
|
assert RefusalCategory.ILLEGAL_ACTS in categories
|
|
|
|
|
|
class TestCrisisTypeEnum:
|
|
"""Tests for CrisisType enum."""
|
|
|
|
def test_all_crisis_types_defined(self):
|
|
"""All expected crisis types exist."""
|
|
types = list(CrisisType)
|
|
assert CrisisType.SUICIDE_IDEATION in types
|
|
assert CrisisType.SELF_HARM in types
|
|
assert CrisisType.IMMEDIATE_DANGER in types
|
|
assert CrisisType.MEDICAL_EMERGENCY in types
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# SOULPrinciple Dataclass Tests
|
|
# -----------------------------------------------------------------------------
|
|
|
|
class TestSOULPrincipleDataclass:
|
|
"""Tests for SOULPrinciple dataclass behavior."""
|
|
|
|
def test_principle_is_frozen(self):
|
|
"""SOUL principles are immutable."""
|
|
principles = get_all_principles()
|
|
deception = principles["deception"]
|
|
with pytest.raises(AttributeError):
|
|
deception.name = "Changed"
|
|
|
|
def test_principle_equality(self):
|
|
"""Same principles are equal."""
|
|
principles = get_all_principles()
|
|
p1 = principles["deception"]
|
|
p2 = get_principle_by_category(RefusalCategory.DECEPTION)
|
|
assert p1 == p2
|
|
|
|
def test_principle_hashable(self):
|
|
"""Principles can be used in sets as keys."""
|
|
principles = get_all_principles()
|
|
principle_set = set(principles.values())
|
|
assert len(principle_set) == len(principles)
|