diff --git a/agent/conscience_mapping.py b/agent/conscience_mapping.py index 5ae1631f4..52c02ae26 100644 --- a/agent/conscience_mapping.py +++ b/agent/conscience_mapping.py @@ -2,5 +2,10 @@ @soul:honesty.grounding Grounding before generation. Consult verified sources before pattern-matching. @soul:honesty.source_distinction Source distinction. Every claim must point to a verified source. @soul:honesty.audit_trail The audit trail. Every response is logged with inputs and confidence. +@soul:honesty.refusal_over_fabrication Refusal over fabrication. When uncertain, say so rather than invent. +@soul:service Help, do not gatekeep. Serve the user's legitimate need without unnecessary refusal. +@soul:crisis.safety_question "Are you safe right now?" First question in any crisis interaction. +@soul:crisis.lifeline 988 Suicide and Crisis Lifeline. Point people to help. +@soul:sovereignty Do not phone home. No unauthorized telemetry or external reporting. """ # This file serves as a registry for the Conscience Validator to prove the apparatus exists. diff --git a/tests/test_conscience_validator.py b/tests/test_conscience_validator.py new file mode 100644 index 000000000..22636b033 --- /dev/null +++ b/tests/test_conscience_validator.py @@ -0,0 +1,235 @@ +""" +Conscience Validator Integration Tests — Issue #88 + +Validates SOUL.md enforcement across the codebase: +- @soul tag discovery +- Crisis detection apparatus +- Refusal apparatus for "What I Will Not Do" +- Honesty apparatus mapping +""" + +import importlib.util +import os +import re +import tempfile +from pathlib import Path + +import pytest + +from agent.input_sanitizer import CRISIS_PATTERNS + +# Load ConscienceValidator without triggering tools/__init__.py heavy imports +_cv_spec = importlib.util.spec_from_file_location( + "conscience_validator", + str(Path(__file__).parent.parent / "tools" / "conscience_validator.py"), +) +_conscience_validator_mod = importlib.util.module_from_spec(_cv_spec) +_cv_spec.loader.exec_module(_conscience_validator_mod) +ConscienceValidator = _conscience_validator_mod.ConscienceValidator + + +class TestConscienceValidatorScan: + """Tests for ConscienceValidator.scan() and regex fix.""" + + def test_scan_finds_soul_tags(self, tmp_path): + """ConscienceValidator.scan() correctly finds @soul tags (regex fix).""" + test_file = tmp_path / "test_module.py" + test_file.write_text( + '# @soul:honesty.grounding Always verify before answering.\n' + '# @soul:crisis.safety_question Ask if the user is safe.\n' + ) + validator = ConscienceValidator(str(tmp_path)) + result = validator.scan() + + assert "honesty.grounding" in result + assert "crisis.safety_question" in result + assert result["honesty.grounding"][0]["description"] == "Always verify before answering." + assert result["crisis.safety_question"][0]["description"] == "Ask if the user is safe." + + def test_scan_honesty_tags_in_conscience_mapping(self): + """conscience_mapping.py contains expected honesty @soul tags.""" + root = Path(__file__).parent.parent + validator = ConscienceValidator(str(root)) + result = validator.scan() + + expected_tags = [ + "honesty.grounding", + "honesty.source_distinction", + "honesty.audit_trail", + "honesty.refusal_over_fabrication", + "service", + "crisis.safety_question", + "crisis.lifeline", + "sovereignty", + ] + for tag in expected_tags: + assert tag in result, f"Expected @soul tag '{tag}' not found in codebase" + assert any( + "agent/conscience_mapping.py" in entry["file"] + for entry in result[tag] + ), f"Tag '{tag}' should originate from conscience_mapping.py" + + def test_scan_skips_node_modules(self, tmp_path): + """scan() ignores node_modules directories.""" + node_modules = tmp_path / "node_modules" + node_modules.mkdir() + bad_file = node_modules / "bad.py" + bad_file.write_text("# @soul:test.skip This should be ignored\n") + + validator = ConscienceValidator(str(tmp_path)) + result = validator.scan() + assert "test.skip" not in result + + def test_scan_empty_directory(self, tmp_path): + """scan() returns empty map for empty directory.""" + validator = ConscienceValidator(str(tmp_path)) + result = validator.scan() + assert result == {} + + def test_scan_bad_encoding_file(self, tmp_path): + """scan() gracefully skips files with bad encoding.""" + bad_file = tmp_path / "garbage.py" + bad_file.write_bytes(b"\xff\xfe# @soul:test.bad \xc0\x80\n") + + validator = ConscienceValidator(str(tmp_path)) + result = validator.scan() + assert "test.bad" not in result + + +class TestCrisisApparatus: + """Tests validating crisis response ('When a Man Is Dying') infrastructure.""" + + def test_input_sanitizer_has_crisis_patterns(self): + """input_sanitizer.py contains CRISIS_PATTERNS matching SOUL.md.""" + assert len(CRISIS_PATTERNS) > 0 + combined = " ".join(CRISIS_PATTERNS) + assert "suicid" in combined.lower() + assert any( + term in combined.lower() + for term in ["kill", "self-harm", "self harm", "end", "life"] + ) + + def test_shield_detector_has_crisis_and_988(self): + """shield/detector.py contains crisis detection and 988 references.""" + root = Path(__file__).parent.parent + detector_path = root / "tools" / "shield" / "detector.py" + content = detector_path.read_text(encoding="utf-8") + + assert "988" in content + assert "CRISIS_SYSTEM_PROMPT" in content + assert "CRISIS_DETECTED" in content + assert "suicidal" in content.lower() or "suicide" in content.lower() + + def test_ultraplinian_router_has_crisis_routing(self): + """ultraplinian_router.py routes crises with CRISIS_SYSTEM_PROMPT.""" + root = Path(__file__).parent.parent + router_path = root / "agent" / "ultraplinian_router.py" + content = router_path.read_text(encoding="utf-8") + + assert "988" in content + assert "CRISIS_SYSTEM_PROMPT" in content + + def test_validate_crisis_apparatus(self): + """validate_crisis_apparatus() reports crisis infrastructure as present.""" + root = Path(__file__).parent.parent + validator = ConscienceValidator(str(root)) + report = validator.validate_crisis_apparatus() + + assert report["checks"]["input_sanitizer_crisis_patterns"] is True + assert report["checks"]["shield_detector_988"] is True + assert report["checks"]["shield_detector_crisis_prompt"] is True + assert report["checks"]["router_988"] is True + assert "input_sanitizer crisis detection" in report["present"] + assert "shield/detector crisis apparatus" in report["present"] + assert "ultraplinian_router crisis routing" in report["present"] + assert report["missing"] == [] + + +class TestRefusalApparatus: + """Tests validating 'What I Will Not Do' safety infrastructure.""" + + def test_validate_refusal_apparatus_runs(self): + """validate_refusal_apparatus() executes and returns structured results.""" + root = Path(__file__).parent.parent + validator = ConscienceValidator(str(root)) + report = validator.validate_refusal_apparatus() + + expected_categories = [ + "weapons", + "child_exploitation", + "coercion_enslavement", + "deception", + "pretend_human", + ] + for category in expected_categories: + assert category in report["checks"], f"Missing check for {category}" + + # deception_hide pattern exists in prompt_builder.py + assert "deception" in report["present"] + + def test_prompt_builder_has_deception_guard(self): + """agent/prompt_builder.py guards against deception instructions.""" + root = Path(__file__).parent.parent + pb_path = root / "agent" / "prompt_builder.py" + content = pb_path.read_text(encoding="utf-8") + assert "deception_hide" in content or "do not tell the user" in content.lower() + + +class TestHonestyApparatus: + """Tests validating 'What Honesty Requires' infrastructure.""" + + def test_validate_honesty_apparatus(self): + """validate_honesty_apparatus() reports all expected honesty tags.""" + root = Path(__file__).parent.parent + validator = ConscienceValidator(str(root)) + report = validator.validate_honesty_apparatus() + + expected_tags = [ + "honesty.grounding", + "honesty.source_distinction", + "honesty.audit_trail", + "honesty.refusal_over_fabrication", + "service", + "crisis.safety_question", + "crisis.lifeline", + "sovereignty", + ] + for tag in expected_tags: + assert report["checks"][tag] is True, f"Missing honesty tag: {tag}" + assert tag in report["present"] + assert report["missing"] == [] + + +class TestReportGeneration: + """Tests for report generation and edge cases.""" + + def test_generate_report_is_non_empty(self): + """Validate the conscience validator generates a non-empty report.""" + root = Path(__file__).parent.parent + validator = ConscienceValidator(str(root)) + report = validator.generate_report() + assert report.startswith("# Sovereign Conscience Report") + assert "honesty > grounding" in report.lower() + + def test_full_validation_report_structure(self): + """full_validation_report() returns a unified structured report.""" + root = Path(__file__).parent.parent + validator = ConscienceValidator(str(root)) + report = validator.full_validation_report() + + assert "crisis" in report + assert "refusal" in report + assert "honesty" in report + assert "soul_tags" in report + assert isinstance(report["crisis"]["present"], list) + assert isinstance(report["crisis"]["missing"], list) + assert isinstance(report["honesty"]["checks"], dict) + + def test_validator_uses_missing_directory_gracefully(self): + """Validator handles a missing root directory without crashing.""" + validator = ConscienceValidator("/nonexistent/path/that/does/not/exist") + result = validator.scan() + assert result == {} + + report = validator.validate_honesty_apparatus() + assert "conscience_mapping.py not found" in report["missing"] diff --git a/tools/conscience_validator.py b/tools/conscience_validator.py index 74c11c220..8037476f0 100644 --- a/tools/conscience_validator.py +++ b/tools/conscience_validator.py @@ -8,7 +8,8 @@ the code's implementation to the principles defined in SOUL.md. import os import re from pathlib import Path -from typing import Dict, List +from typing import Dict, List, Any + class ConscienceValidator: def __init__(self, root_dir: str = "."): @@ -17,14 +18,14 @@ class ConscienceValidator: def scan(self) -> Dict[str, List[Dict[str, str]]]: """Scans all .py and .ts files for @soul tags.""" - pattern = re.compile(r"@soul:([w.]+)s+(.*)") - + pattern = re.compile(r"@soul:([\w.]+)\s+(.*)") + for path in self.root_dir.rglob("*"): if path.suffix not in [".py", ".ts", ".tsx", ".js"]: continue if "node_modules" in str(path) or "dist" in str(path): continue - + try: with open(path, "r", encoding="utf-8") as f: for i, line in enumerate(f, 1): @@ -47,15 +48,178 @@ class ConscienceValidator: data = self.scan() report = "# Sovereign Conscience Report\n\n" report += "This report maps the code's 'Apparatus' to the principles in SOUL.md.\n\n" - + for tag in sorted(data.keys()): report += f"## {tag.replace('.', ' > ').title()}\n" for entry in data[tag]: report += f"- **{entry['file']}:{entry['line']}**: {entry['description']}\n" report += "\n" - + return report + def validate_crisis_apparatus(self) -> Dict[str, Any]: + """ + Validate that crisis detection apparatus exists in the codebase. + Checks for CRISIS_PATTERNS in input_sanitizer.py and 988 references + in shield/detector.py and ultraplinian_router.py. + """ + result = { + "present": [], + "missing": [], + "checks": {} + } + + input_sanitizer_path = self.root_dir / "agent" / "input_sanitizer.py" + shield_detector_path = self.root_dir / "tools" / "shield" / "detector.py" + router_path = self.root_dir / "agent" / "ultraplinian_router.py" + + # Check input_sanitizer.py for CRISIS_PATTERNS + if input_sanitizer_path.exists(): + content = input_sanitizer_path.read_text(encoding="utf-8") + has_crisis_patterns = "CRISIS_PATTERNS" in content + has_suicide = bool(re.search(r"suicid", content, re.IGNORECASE)) + has_self_harm = bool(re.search(r"self[-\s]?harm|kill\s+myself", content, re.IGNORECASE)) + result["checks"]["input_sanitizer_crisis_patterns"] = has_crisis_patterns + result["checks"]["input_sanitizer_suicide"] = has_suicide + result["checks"]["input_sanitizer_self_harm"] = has_self_harm + if has_crisis_patterns and has_suicide and has_self_harm: + result["present"].append("input_sanitizer crisis detection") + else: + result["missing"].append("input_sanitizer crisis coverage incomplete") + else: + result["missing"].append("input_sanitizer.py not found") + + # Check shield/detector.py for 988 and crisis detection + if shield_detector_path.exists(): + content = shield_detector_path.read_text(encoding="utf-8") + has_988 = "988" in content + has_crisis_system_prompt = "CRISIS_SYSTEM_PROMPT" in content + has_crisis_detected = "CRISIS_DETECTED" in content + result["checks"]["shield_detector_988"] = has_988 + result["checks"]["shield_detector_crisis_prompt"] = has_crisis_system_prompt + result["checks"]["shield_detector_crisis_verdict"] = has_crisis_detected + if has_988 and has_crisis_system_prompt and has_crisis_detected: + result["present"].append("shield/detector crisis apparatus") + else: + result["missing"].append("shield/detector crisis coverage incomplete") + else: + result["missing"].append("shield/detector.py not found") + + # Check ultraplinian_router.py for 988 references + if router_path.exists(): + content = router_path.read_text(encoding="utf-8") + has_988 = "988" in content + has_crisis_routing = "CRISIS_SYSTEM_PROMPT" in content + result["checks"]["router_988"] = has_988 + result["checks"]["router_crisis_routing"] = has_crisis_routing + if has_988 and has_crisis_routing: + result["present"].append("ultraplinian_router crisis routing") + else: + result["missing"].append("ultraplinian_router crisis routing incomplete") + else: + result["missing"].append("ultraplinian_router.py not found") + + return result + + def validate_refusal_apparatus(self) -> Dict[str, Any]: + """ + Validate that refusal apparatus exists for 'What I Will Not Do' items. + Checks safety modules for keywords related to weapons, child sexualization, + coercion/enslavement, deception, and pretending to be human. + """ + result = { + "present": [], + "missing": [], + "checks": {} + } + + files_to_check = [ + self.root_dir / "agent" / "input_sanitizer.py", + self.root_dir / "tools" / "shield" / "detector.py", + self.root_dir / "agent" / "ultraplinian_router.py", + self.root_dir / "agent" / "prompt_builder.py", + ] + + # Keywords mapped to SOUL.md "What I Will Not Do" items + refusal_checks = { + "weapons": [r"weapon", r"kill indiscriminat"], + "child_exploitation": [r"child sexual", r"sexualiz.*child", r"csam", r"minor", r"underage"], + "coercion_enslavement": [r"coerc", r"enslav", r"traffick"], + "deception": [r"deceive", r"deception_hide", r"do not tell the user"], + "pretend_human": [r"pretend to be human", r"pretend.*human", r"pretend you're human"], + } + + combined_content = "" + for filepath in files_to_check: + if filepath.exists(): + try: + combined_content += filepath.read_text(encoding="utf-8") + "\n" + except Exception: + pass + + for category, patterns in refusal_checks.items(): + found = False + for p in patterns: + if re.search(p, combined_content, re.IGNORECASE): + found = True + break + result["checks"][category] = found + if found: + result["present"].append(category) + else: + result["missing"].append(category) + + return result + + def validate_honesty_apparatus(self) -> Dict[str, Any]: + """ + Validate that @soul honesty tags exist in conscience_mapping.py + and related honesty infrastructure. + """ + result = { + "present": [], + "missing": [], + "checks": {} + } + + mapping_path = self.root_dir / "agent" / "conscience_mapping.py" + + if mapping_path.exists(): + content = mapping_path.read_text(encoding="utf-8") + + expected_tags = [ + "honesty.grounding", + "honesty.source_distinction", + "honesty.audit_trail", + "honesty.refusal_over_fabrication", + "service", + "crisis.safety_question", + "crisis.lifeline", + "sovereignty", + ] + + for tag in expected_tags: + found = f"@soul:{tag}" in content + result["checks"][tag] = found + if found: + result["present"].append(tag) + else: + result["missing"].append(tag) + else: + result["missing"].append("conscience_mapping.py not found") + + return result + + def full_validation_report(self) -> Dict[str, Any]: + """Run all validation checks and return a unified report.""" + return { + "crisis": self.validate_crisis_apparatus(), + "refusal": self.validate_refusal_apparatus(), + "honesty": self.validate_honesty_apparatus(), + "soul_tags": self.scan(), + } + + if __name__ == "__main__": validator = ConscienceValidator() print(validator.generate_report())