321 lines
10 KiB
Python
321 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
validate_soul.py — SOUL.md validator
|
|
|
|
Checks that a SOUL.md file conforms to the framework defined in
|
|
docs/soul/SOUL_TEMPLATE.md and docs/soul/AUTHORING_GUIDE.md.
|
|
|
|
Usage:
|
|
python scripts/validate_soul.py <path/to/soul.md>
|
|
python scripts/validate_soul.py docs/soul/extensions/seer.md
|
|
python scripts/validate_soul.py memory/self/soul.md
|
|
|
|
Exit codes:
|
|
0 — valid
|
|
1 — validation errors found
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import sys
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Required sections (H2 headings that must be present)
|
|
# ---------------------------------------------------------------------------
|
|
REQUIRED_SECTIONS = [
|
|
"Identity",
|
|
"Prime Directive",
|
|
"Values",
|
|
"Audience Awareness",
|
|
"Constraints",
|
|
"Changelog",
|
|
]
|
|
|
|
# Sections required only for sub-agents (those with 'extends' in frontmatter)
|
|
EXTENSION_ONLY_SECTIONS = [
|
|
"Role Extension",
|
|
]
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Contradiction detection — pairs of phrases that are likely contradictory
|
|
# if both appear in the same document.
|
|
# ---------------------------------------------------------------------------
|
|
CONTRADICTION_PAIRS: list[tuple[str, str]] = [
|
|
# honesty vs deception
|
|
(r"\bnever deceive\b", r"\bdeceive the user\b"),
|
|
(r"\bnever fabricate\b", r"\bfabricate\b.*\bwhen needed\b"),
|
|
# refusal patterns
|
|
(r"\bnever refuse\b", r"\bwill not\b"),
|
|
# data handling
|
|
(r"\bnever store.*credentials\b", r"\bstore.*credentials\b.*\bwhen\b"),
|
|
(r"\bnever exfiltrate\b", r"\bexfiltrate.*\bif authorized\b"),
|
|
# autonomy
|
|
(r"\bask.*before.*executing\b", r"\bexecute.*without.*asking\b"),
|
|
]
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Semver pattern
|
|
# ---------------------------------------------------------------------------
|
|
SEMVER_PATTERN = re.compile(r"^\d+\.\d+\.\d+$")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Frontmatter fields that must be present and non-empty
|
|
# ---------------------------------------------------------------------------
|
|
REQUIRED_FRONTMATTER_FIELDS = [
|
|
"soul_version",
|
|
"agent_name",
|
|
"created",
|
|
"updated",
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Data structures
|
|
# ---------------------------------------------------------------------------
|
|
@dataclass
|
|
class ValidationResult:
|
|
path: Path
|
|
errors: list[str] = field(default_factory=list)
|
|
warnings: list[str] = field(default_factory=list)
|
|
|
|
@property
|
|
def is_valid(self) -> bool:
|
|
return len(self.errors) == 0
|
|
|
|
def error(self, msg: str) -> None:
|
|
self.errors.append(msg)
|
|
|
|
def warn(self, msg: str) -> None:
|
|
self.warnings.append(msg)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Parsing helpers
|
|
# ---------------------------------------------------------------------------
|
|
def _extract_frontmatter(text: str) -> dict[str, str]:
|
|
"""Extract YAML-style frontmatter between --- delimiters."""
|
|
match = re.match(r"^---\n(.*?)\n---", text, re.DOTALL)
|
|
if not match:
|
|
return {}
|
|
fm: dict[str, str] = {}
|
|
for line in match.group(1).splitlines():
|
|
if ":" in line:
|
|
key, _, value = line.partition(":")
|
|
fm[key.strip()] = value.strip().strip('"')
|
|
return fm
|
|
|
|
|
|
def _extract_sections(text: str) -> set[str]:
|
|
"""Return the set of H2 section names found in the document."""
|
|
return {m.group(1).strip() for m in re.finditer(r"^## (.+)$", text, re.MULTILINE)}
|
|
|
|
|
|
def _body_text(text: str) -> str:
|
|
"""Return document text without frontmatter block."""
|
|
return re.sub(r"^---\n.*?\n---\n?", "", text, flags=re.DOTALL)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Validation steps
|
|
# ---------------------------------------------------------------------------
|
|
def _check_frontmatter(text: str, result: ValidationResult) -> dict[str, str]:
|
|
fm = _extract_frontmatter(text)
|
|
if not fm:
|
|
result.error("No frontmatter found. Add a --- block at the top.")
|
|
return fm
|
|
|
|
for field_name in REQUIRED_FRONTMATTER_FIELDS:
|
|
if field_name not in fm:
|
|
result.error(f"Frontmatter missing required field: {field_name!r}")
|
|
elif not fm[field_name] or fm[field_name] in ("<AgentName>", "YYYY-MM-DD"):
|
|
result.error(
|
|
f"Frontmatter field {field_name!r} is empty or still a placeholder."
|
|
)
|
|
|
|
version = fm.get("soul_version", "")
|
|
if version and not SEMVER_PATTERN.match(version):
|
|
result.error(
|
|
f"soul_version {version!r} is not valid semver (expected MAJOR.MINOR.PATCH)."
|
|
)
|
|
|
|
return fm
|
|
|
|
|
|
def _check_required_sections(
|
|
text: str, fm: dict[str, str], result: ValidationResult
|
|
) -> None:
|
|
sections = _extract_sections(text)
|
|
is_extension = "extends" in fm
|
|
|
|
for section in REQUIRED_SECTIONS:
|
|
if section not in sections:
|
|
result.error(f"Required section missing: ## {section}")
|
|
|
|
if is_extension:
|
|
for section in EXTENSION_ONLY_SECTIONS:
|
|
if section not in sections:
|
|
result.warn(
|
|
f"Sub-agent soul is missing recommended section: ## {section}"
|
|
)
|
|
|
|
|
|
def _check_values_section(text: str, result: ValidationResult) -> None:
|
|
"""Check that values section contains at least 3 numbered items."""
|
|
body = _body_text(text)
|
|
values_match = re.search(
|
|
r"## Values\n(.*?)(?=\n## |\Z)", body, re.DOTALL
|
|
)
|
|
if not values_match:
|
|
return # Already reported as missing section
|
|
|
|
values_text = values_match.group(1)
|
|
numbered_items = re.findall(r"^\d+\.", values_text, re.MULTILINE)
|
|
count = len(numbered_items)
|
|
if count < 3:
|
|
result.error(
|
|
f"Values section has {count} item(s); minimum is 3. "
|
|
"Values must be numbered (1. 2. 3. ...)"
|
|
)
|
|
if count > 8:
|
|
result.warn(
|
|
f"Values section has {count} items; recommended maximum is 8. "
|
|
"Consider consolidating."
|
|
)
|
|
|
|
|
|
def _check_constraints_section(text: str, result: ValidationResult) -> None:
|
|
"""Check that constraints section contains at least 3 bullet points."""
|
|
body = _body_text(text)
|
|
constraints_match = re.search(
|
|
r"## Constraints\n(.*?)(?=\n## |\Z)", body, re.DOTALL
|
|
)
|
|
if not constraints_match:
|
|
return # Already reported as missing section
|
|
|
|
constraints_text = constraints_match.group(1)
|
|
bullets = re.findall(r"^- \*\*Never\*\*", constraints_text, re.MULTILINE)
|
|
if len(bullets) < 3:
|
|
result.error(
|
|
f"Constraints section has {len(bullets)} 'Never' constraint(s); "
|
|
"minimum is 3. Constraints must start with '- **Never**'."
|
|
)
|
|
|
|
|
|
def _check_changelog(text: str, result: ValidationResult) -> None:
|
|
"""Check that changelog has at least one entry row."""
|
|
body = _body_text(text)
|
|
changelog_match = re.search(
|
|
r"## Changelog\n(.*?)(?=\n## |\Z)", body, re.DOTALL
|
|
)
|
|
if not changelog_match:
|
|
return # Already reported as missing section
|
|
|
|
# Table rows have 4 | delimiters (version | date | author | summary)
|
|
rows = [
|
|
line
|
|
for line in changelog_match.group(1).splitlines()
|
|
if line.count("|") >= 3
|
|
and not line.startswith("|---")
|
|
and "Version" not in line
|
|
]
|
|
if not rows:
|
|
result.error("Changelog table has no entries. Add at least one row.")
|
|
|
|
|
|
def _check_contradictions(text: str, result: ValidationResult) -> None:
|
|
"""Heuristic check for contradictory directive pairs."""
|
|
lower = text.lower()
|
|
for pattern_a, pattern_b in CONTRADICTION_PAIRS:
|
|
match_a = re.search(pattern_a, lower)
|
|
match_b = re.search(pattern_b, lower)
|
|
if match_a and match_b:
|
|
result.warn(
|
|
f"Possible contradiction detected: "
|
|
f"'{pattern_a}' and '{pattern_b}' both appear in the document. "
|
|
"Review for conflicting directives."
|
|
)
|
|
|
|
|
|
def _check_placeholders(text: str, result: ValidationResult) -> None:
|
|
"""Check for unfilled template placeholders."""
|
|
placeholders = re.findall(r"<[A-Z][A-Za-z ]+>", text)
|
|
for ph in set(placeholders):
|
|
result.error(f"Unfilled placeholder found: {ph}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main validator
|
|
# ---------------------------------------------------------------------------
|
|
def validate(path: Path) -> ValidationResult:
|
|
result = ValidationResult(path=path)
|
|
|
|
if not path.exists():
|
|
result.error(f"File not found: {path}")
|
|
return result
|
|
|
|
text = path.read_text(encoding="utf-8")
|
|
|
|
fm = _check_frontmatter(text, result)
|
|
_check_required_sections(text, fm, result)
|
|
_check_values_section(text, result)
|
|
_check_constraints_section(text, result)
|
|
_check_changelog(text, result)
|
|
_check_contradictions(text, result)
|
|
_check_placeholders(text, result)
|
|
|
|
return result
|
|
|
|
|
|
def _print_result(result: ValidationResult) -> None:
|
|
path_str = str(result.path)
|
|
if result.is_valid and not result.warnings:
|
|
print(f"[PASS] {path_str}")
|
|
return
|
|
|
|
if result.is_valid:
|
|
print(f"[WARN] {path_str}")
|
|
else:
|
|
print(f"[FAIL] {path_str}")
|
|
|
|
for err in result.errors:
|
|
print(f" ERROR: {err}")
|
|
for warn in result.warnings:
|
|
print(f" WARN: {warn}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI entry point
|
|
# ---------------------------------------------------------------------------
|
|
def main() -> int:
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python scripts/validate_soul.py <path/to/soul.md> [...]")
|
|
print()
|
|
print("Examples:")
|
|
print(" python scripts/validate_soul.py memory/self/soul.md")
|
|
print(" python scripts/validate_soul.py docs/soul/extensions/seer.md")
|
|
print(" python scripts/validate_soul.py docs/soul/extensions/*.md")
|
|
return 1
|
|
|
|
paths = [Path(arg) for arg in sys.argv[1:]]
|
|
results = [validate(p) for p in paths]
|
|
|
|
any_failed = False
|
|
for r in results:
|
|
_print_result(r)
|
|
if not r.is_valid:
|
|
any_failed = True
|
|
|
|
if len(results) > 1:
|
|
passed = sum(1 for r in results if r.is_valid)
|
|
print(f"\n{passed}/{len(results)} soul files passed validation.")
|
|
|
|
return 1 if any_failed else 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|