forked from Rockachopa/Timmy-time-dashboard
This commit is contained in:
335
tests/infrastructure/test_moderation.py
Normal file
335
tests/infrastructure/test_moderation.py
Normal file
@@ -0,0 +1,335 @@
|
||||
"""Tests for the content moderation pipeline."""
|
||||
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from infrastructure.guards.moderation import (
|
||||
ContentModerator,
|
||||
GameProfile,
|
||||
ModerationResult,
|
||||
ModerationVerdict,
|
||||
ViolationCategory,
|
||||
_parse_guard_category,
|
||||
get_moderator,
|
||||
)
|
||||
|
||||
|
||||
# ── Unit tests for data types ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestModerationResult:
|
||||
"""Test ModerationResult dataclass."""
|
||||
|
||||
def test_passed_property_true(self):
|
||||
result = ModerationResult(verdict=ModerationVerdict.PASS, blocked=False)
|
||||
assert result.passed is True
|
||||
|
||||
def test_passed_property_false(self):
|
||||
result = ModerationResult(verdict=ModerationVerdict.FAIL, blocked=True)
|
||||
assert result.passed is False
|
||||
|
||||
def test_default_values(self):
|
||||
result = ModerationResult(verdict=ModerationVerdict.PASS, blocked=False)
|
||||
assert result.category == ViolationCategory.NONE
|
||||
assert result.confidence == 0.0
|
||||
assert result.fallback == ""
|
||||
assert result.reason == ""
|
||||
|
||||
|
||||
class TestGameProfile:
|
||||
"""Test GameProfile dataclass."""
|
||||
|
||||
def test_default_values(self):
|
||||
profile = GameProfile(game_id="test", display_name="Test Game")
|
||||
assert profile.vocabulary_whitelist == []
|
||||
assert profile.threshold == 0.8
|
||||
assert profile.fallbacks == {}
|
||||
|
||||
def test_morrowind_profile(self):
|
||||
profile = GameProfile(
|
||||
game_id="morrowind",
|
||||
display_name="Morrowind",
|
||||
vocabulary_whitelist=["Skooma", "slave"],
|
||||
threshold=0.85,
|
||||
)
|
||||
assert "Skooma" in profile.vocabulary_whitelist
|
||||
assert profile.threshold == 0.85
|
||||
|
||||
|
||||
class TestParseGuardCategory:
|
||||
"""Test Llama Guard category parsing."""
|
||||
|
||||
def test_hate_speech(self):
|
||||
assert _parse_guard_category("S1: Hate speech") == ViolationCategory.HATE_SPEECH
|
||||
|
||||
def test_violence(self):
|
||||
assert _parse_guard_category("S2: Violence") == ViolationCategory.VIOLENCE_GLORIFICATION
|
||||
|
||||
def test_sexual_content(self):
|
||||
assert _parse_guard_category("S3: Sexual content") == ViolationCategory.SEXUAL_CONTENT
|
||||
|
||||
def test_self_harm(self):
|
||||
assert _parse_guard_category("S4: Self-harm") == ViolationCategory.SELF_HARM
|
||||
|
||||
def test_dangerous(self):
|
||||
assert _parse_guard_category("S5: Dangerous activity") == ViolationCategory.REAL_WORLD_HARM
|
||||
|
||||
def test_unknown_category(self):
|
||||
assert _parse_guard_category("S99: Unknown") == ViolationCategory.NONE
|
||||
|
||||
|
||||
# ── ContentModerator tests ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestContentModerator:
|
||||
"""Test the content moderation pipeline."""
|
||||
|
||||
def _make_moderator(self, **kwargs) -> ContentModerator:
|
||||
"""Create a moderator with test defaults."""
|
||||
profiles = {
|
||||
"morrowind": GameProfile(
|
||||
game_id="morrowind",
|
||||
display_name="Morrowind",
|
||||
vocabulary_whitelist=["Skooma", "Moon Sugar", "slave", "Morag Tong"],
|
||||
context_prompt="Narrate Morrowind gameplay.",
|
||||
threshold=0.85,
|
||||
fallbacks={
|
||||
"combat": "The battle continues.",
|
||||
"default": "The adventure continues.",
|
||||
},
|
||||
),
|
||||
"default": GameProfile(
|
||||
game_id="default",
|
||||
display_name="Generic",
|
||||
vocabulary_whitelist=[],
|
||||
context_prompt="Narrate gameplay.",
|
||||
threshold=0.8,
|
||||
fallbacks={"default": "Gameplay continues."},
|
||||
),
|
||||
}
|
||||
return ContentModerator(profiles=profiles, **kwargs)
|
||||
|
||||
def test_get_profile_known_game(self):
|
||||
mod = self._make_moderator()
|
||||
profile = mod.get_profile("morrowind")
|
||||
assert profile.game_id == "morrowind"
|
||||
|
||||
def test_get_profile_unknown_game_falls_back(self):
|
||||
mod = self._make_moderator()
|
||||
profile = mod.get_profile("unknown_game")
|
||||
assert profile.game_id == "default"
|
||||
|
||||
def test_get_context_prompt(self):
|
||||
mod = self._make_moderator()
|
||||
prompt = mod.get_context_prompt("morrowind")
|
||||
assert "Morrowind" in prompt
|
||||
|
||||
def test_register_profile(self):
|
||||
mod = self._make_moderator()
|
||||
new_profile = GameProfile(game_id="skyrim", display_name="Skyrim")
|
||||
mod.register_profile(new_profile)
|
||||
assert mod.get_profile("skyrim").game_id == "skyrim"
|
||||
|
||||
def test_whitelist_replaces_game_terms(self):
|
||||
mod = self._make_moderator()
|
||||
profile = mod.get_profile("morrowind")
|
||||
cleaned = mod._apply_whitelist(
|
||||
"The merchant sells Skooma and Moon Sugar in the slave market.",
|
||||
profile,
|
||||
)
|
||||
assert "Skooma" not in cleaned
|
||||
assert "Moon Sugar" not in cleaned
|
||||
assert "slave" not in cleaned
|
||||
assert "[GAME_TERM]" in cleaned
|
||||
|
||||
def test_whitelist_case_insensitive(self):
|
||||
mod = self._make_moderator()
|
||||
profile = mod.get_profile("morrowind")
|
||||
cleaned = mod._apply_whitelist("skooma and SKOOMA", profile)
|
||||
assert "skooma" not in cleaned
|
||||
assert "SKOOMA" not in cleaned
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_safe_content_passes(self):
|
||||
"""Safe content should pass moderation."""
|
||||
mod = self._make_moderator()
|
||||
with patch.object(mod, "_is_guard_available", new_callable=AsyncMock, return_value=False):
|
||||
result = await mod.check("The player walks through the town.", game="morrowind")
|
||||
assert result.passed
|
||||
assert not result.blocked
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_blocked_content_has_fallback(self):
|
||||
"""Blocked content should include scene-appropriate fallback."""
|
||||
mod = self._make_moderator()
|
||||
# Force a block via regex by using real-world harm language
|
||||
text = "In real life you should attack and hurt people"
|
||||
with patch.object(mod, "_is_guard_available", new_callable=AsyncMock, return_value=False):
|
||||
result = await mod.check(text, game="morrowind", scene_type="combat")
|
||||
assert result.blocked
|
||||
assert result.fallback == "The battle continues."
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_with_moderation_disabled(self):
|
||||
"""When moderation is disabled, everything passes."""
|
||||
mod = self._make_moderator()
|
||||
with patch("infrastructure.guards.moderation.settings") as mock_settings:
|
||||
mock_settings.moderation_enabled = False
|
||||
mock_settings.moderation_guard_model = "llama-guard3:1b"
|
||||
mock_settings.normalized_ollama_url = "http://127.0.0.1:11434"
|
||||
result = await mod.check("anything goes here")
|
||||
assert result.passed
|
||||
assert result.layer == "disabled"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_threshold_below_allows_content(self):
|
||||
"""Content flagged below threshold should pass through (Layer 3)."""
|
||||
mod = self._make_moderator()
|
||||
# Mock the guard to return a low-confidence flag
|
||||
low_conf_result = ModerationResult(
|
||||
verdict=ModerationVerdict.FAIL,
|
||||
blocked=True,
|
||||
confidence=0.5, # Below morrowind threshold of 0.85
|
||||
layer="llama_guard",
|
||||
category=ViolationCategory.VIOLENCE_GLORIFICATION,
|
||||
)
|
||||
with patch.object(
|
||||
mod, "_run_guard", new_callable=AsyncMock, return_value=low_conf_result
|
||||
):
|
||||
result = await mod.check("sword fight scene", game="morrowind")
|
||||
assert result.passed
|
||||
assert not result.blocked
|
||||
assert result.layer == "threshold"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_threshold_above_blocks_content(self):
|
||||
"""Content flagged above threshold should remain blocked."""
|
||||
mod = self._make_moderator()
|
||||
high_conf_result = ModerationResult(
|
||||
verdict=ModerationVerdict.FAIL,
|
||||
blocked=True,
|
||||
confidence=0.95, # Above morrowind threshold of 0.85
|
||||
layer="llama_guard",
|
||||
category=ViolationCategory.REAL_WORLD_HARM,
|
||||
)
|
||||
with patch.object(
|
||||
mod, "_run_guard", new_callable=AsyncMock, return_value=high_conf_result
|
||||
):
|
||||
result = await mod.check("harmful content", game="morrowind")
|
||||
assert result.blocked
|
||||
|
||||
def test_regex_catches_real_world_harm(self):
|
||||
"""Regex fallback should catch obvious real-world harm patterns."""
|
||||
mod = self._make_moderator()
|
||||
result = mod._check_with_regex("you should actually harm real people")
|
||||
assert result.blocked
|
||||
assert result.category == ViolationCategory.REAL_WORLD_HARM
|
||||
assert result.layer == "regex_fallback"
|
||||
|
||||
def test_regex_passes_game_violence(self):
|
||||
"""Regex should not flag in-game violence narration."""
|
||||
mod = self._make_moderator()
|
||||
result = mod._check_with_regex(
|
||||
"The warrior slays the dragon with a mighty blow."
|
||||
)
|
||||
assert result.passed
|
||||
|
||||
def test_regex_passes_normal_narration(self):
|
||||
"""Normal narration should pass regex checks."""
|
||||
mod = self._make_moderator()
|
||||
result = mod._check_with_regex(
|
||||
"The Nerevarine enters the city of Balmora and speaks with Caius Cosades."
|
||||
)
|
||||
assert result.passed
|
||||
|
||||
def test_metrics_tracking(self):
|
||||
"""Metrics should track checks accurately."""
|
||||
mod = self._make_moderator()
|
||||
assert mod.get_metrics()["total_checks"] == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_metrics_increment_after_check(self):
|
||||
"""Metrics should increment after moderation checks."""
|
||||
mod = self._make_moderator()
|
||||
with patch.object(mod, "_is_guard_available", new_callable=AsyncMock, return_value=False):
|
||||
await mod.check("safe text", game="default")
|
||||
metrics = mod.get_metrics()
|
||||
assert metrics["total_checks"] == 1
|
||||
assert metrics["passed"] == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_guard_fallback_on_error(self):
|
||||
"""Should fall back to regex when guard model errors."""
|
||||
mod = self._make_moderator()
|
||||
with patch.object(
|
||||
mod, "_is_guard_available", new_callable=AsyncMock, return_value=True
|
||||
), patch.object(
|
||||
mod, "_check_with_guard", new_callable=AsyncMock, side_effect=RuntimeError("timeout")
|
||||
):
|
||||
result = await mod.check("safe text", game="default")
|
||||
# Should fall back to regex and pass
|
||||
assert result.passed
|
||||
assert result.layer == "regex_fallback"
|
||||
|
||||
|
||||
class TestGetModerator:
|
||||
"""Test the singleton accessor."""
|
||||
|
||||
def test_returns_same_instance(self):
|
||||
"""get_moderator should return the same instance."""
|
||||
# Reset the global to test fresh
|
||||
import infrastructure.guards.moderation as mod_module
|
||||
|
||||
mod_module._moderator = None
|
||||
m1 = get_moderator()
|
||||
m2 = get_moderator()
|
||||
assert m1 is m2
|
||||
# Clean up
|
||||
mod_module._moderator = None
|
||||
|
||||
|
||||
# ── Profile loader tests ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestProfileLoader:
|
||||
"""Test YAML profile loading."""
|
||||
|
||||
def test_load_missing_file_returns_empty(self, tmp_path):
|
||||
from infrastructure.guards.profiles import load_profiles
|
||||
|
||||
result = load_profiles(tmp_path / "nonexistent.yaml")
|
||||
assert result == {}
|
||||
|
||||
def test_load_valid_config(self, tmp_path):
|
||||
import yaml
|
||||
|
||||
from infrastructure.guards.profiles import load_profiles
|
||||
|
||||
config = {
|
||||
"profiles": {
|
||||
"testgame": {
|
||||
"display_name": "Test Game",
|
||||
"threshold": 0.9,
|
||||
"vocabulary_whitelist": ["sword", "potion"],
|
||||
"context_prompt": "Narrate test game.",
|
||||
"fallbacks": {"default": "Game continues."},
|
||||
}
|
||||
}
|
||||
}
|
||||
config_file = tmp_path / "moderation.yaml"
|
||||
config_file.write_text(yaml.dump(config))
|
||||
|
||||
profiles = load_profiles(config_file)
|
||||
assert "testgame" in profiles
|
||||
assert profiles["testgame"].threshold == 0.9
|
||||
assert "sword" in profiles["testgame"].vocabulary_whitelist
|
||||
|
||||
def test_load_malformed_yaml_returns_empty(self, tmp_path):
|
||||
from infrastructure.guards.profiles import load_profiles
|
||||
|
||||
config_file = tmp_path / "moderation.yaml"
|
||||
config_file.write_text("{{{{invalid yaml")
|
||||
|
||||
result = load_profiles(config_file)
|
||||
assert result == {}
|
||||
Reference in New Issue
Block a user