This commit was merged in pull request #1327.
This commit is contained in:
320
scripts/validate_soul.py
Normal file
320
scripts/validate_soul.py
Normal file
@@ -0,0 +1,320 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
validate_soul.py — SOUL.md validator
|
||||
|
||||
Checks that a SOUL.md file conforms to the framework defined in
|
||||
docs/soul/SOUL_TEMPLATE.md and docs/soul/AUTHORING_GUIDE.md.
|
||||
|
||||
Usage:
|
||||
python scripts/validate_soul.py <path/to/soul.md>
|
||||
python scripts/validate_soul.py docs/soul/extensions/seer.md
|
||||
python scripts/validate_soul.py memory/self/soul.md
|
||||
|
||||
Exit codes:
|
||||
0 — valid
|
||||
1 — validation errors found
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Required sections (H2 headings that must be present)
|
||||
# ---------------------------------------------------------------------------
|
||||
REQUIRED_SECTIONS = [
|
||||
"Identity",
|
||||
"Prime Directive",
|
||||
"Values",
|
||||
"Audience Awareness",
|
||||
"Constraints",
|
||||
"Changelog",
|
||||
]
|
||||
|
||||
# Sections required only for sub-agents (those with 'extends' in frontmatter)
|
||||
EXTENSION_ONLY_SECTIONS = [
|
||||
"Role Extension",
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Contradiction detection — pairs of phrases that are likely contradictory
|
||||
# if both appear in the same document.
|
||||
# ---------------------------------------------------------------------------
|
||||
CONTRADICTION_PAIRS: list[tuple[str, str]] = [
|
||||
# honesty vs deception
|
||||
(r"\bnever deceive\b", r"\bdeceive the user\b"),
|
||||
(r"\bnever fabricate\b", r"\bfabricate\b.*\bwhen needed\b"),
|
||||
# refusal patterns
|
||||
(r"\bnever refuse\b", r"\bwill not\b"),
|
||||
# data handling
|
||||
(r"\bnever store.*credentials\b", r"\bstore.*credentials\b.*\bwhen\b"),
|
||||
(r"\bnever exfiltrate\b", r"\bexfiltrate.*\bif authorized\b"),
|
||||
# autonomy
|
||||
(r"\bask.*before.*executing\b", r"\bexecute.*without.*asking\b"),
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Semver pattern
|
||||
# ---------------------------------------------------------------------------
|
||||
SEMVER_PATTERN = re.compile(r"^\d+\.\d+\.\d+$")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Frontmatter fields that must be present and non-empty
|
||||
# ---------------------------------------------------------------------------
|
||||
REQUIRED_FRONTMATTER_FIELDS = [
|
||||
"soul_version",
|
||||
"agent_name",
|
||||
"created",
|
||||
"updated",
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data structures
|
||||
# ---------------------------------------------------------------------------
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
path: Path
|
||||
errors: list[str] = field(default_factory=list)
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def is_valid(self) -> bool:
|
||||
return len(self.errors) == 0
|
||||
|
||||
def error(self, msg: str) -> None:
|
||||
self.errors.append(msg)
|
||||
|
||||
def warn(self, msg: str) -> None:
|
||||
self.warnings.append(msg)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parsing helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
def _extract_frontmatter(text: str) -> dict[str, str]:
|
||||
"""Extract YAML-style frontmatter between --- delimiters."""
|
||||
match = re.match(r"^---\n(.*?)\n---", text, re.DOTALL)
|
||||
if not match:
|
||||
return {}
|
||||
fm: dict[str, str] = {}
|
||||
for line in match.group(1).splitlines():
|
||||
if ":" in line:
|
||||
key, _, value = line.partition(":")
|
||||
fm[key.strip()] = value.strip().strip('"')
|
||||
return fm
|
||||
|
||||
|
||||
def _extract_sections(text: str) -> set[str]:
|
||||
"""Return the set of H2 section names found in the document."""
|
||||
return {m.group(1).strip() for m in re.finditer(r"^## (.+)$", text, re.MULTILINE)}
|
||||
|
||||
|
||||
def _body_text(text: str) -> str:
|
||||
"""Return document text without frontmatter block."""
|
||||
return re.sub(r"^---\n.*?\n---\n?", "", text, flags=re.DOTALL)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Validation steps
|
||||
# ---------------------------------------------------------------------------
|
||||
def _check_frontmatter(text: str, result: ValidationResult) -> dict[str, str]:
|
||||
fm = _extract_frontmatter(text)
|
||||
if not fm:
|
||||
result.error("No frontmatter found. Add a --- block at the top.")
|
||||
return fm
|
||||
|
||||
for field_name in REQUIRED_FRONTMATTER_FIELDS:
|
||||
if field_name not in fm:
|
||||
result.error(f"Frontmatter missing required field: {field_name!r}")
|
||||
elif not fm[field_name] or fm[field_name] in ("<AgentName>", "YYYY-MM-DD"):
|
||||
result.error(
|
||||
f"Frontmatter field {field_name!r} is empty or still a placeholder."
|
||||
)
|
||||
|
||||
version = fm.get("soul_version", "")
|
||||
if version and not SEMVER_PATTERN.match(version):
|
||||
result.error(
|
||||
f"soul_version {version!r} is not valid semver (expected MAJOR.MINOR.PATCH)."
|
||||
)
|
||||
|
||||
return fm
|
||||
|
||||
|
||||
def _check_required_sections(
|
||||
text: str, fm: dict[str, str], result: ValidationResult
|
||||
) -> None:
|
||||
sections = _extract_sections(text)
|
||||
is_extension = "extends" in fm
|
||||
|
||||
for section in REQUIRED_SECTIONS:
|
||||
if section not in sections:
|
||||
result.error(f"Required section missing: ## {section}")
|
||||
|
||||
if is_extension:
|
||||
for section in EXTENSION_ONLY_SECTIONS:
|
||||
if section not in sections:
|
||||
result.warn(
|
||||
f"Sub-agent soul is missing recommended section: ## {section}"
|
||||
)
|
||||
|
||||
|
||||
def _check_values_section(text: str, result: ValidationResult) -> None:
|
||||
"""Check that values section contains at least 3 numbered items."""
|
||||
body = _body_text(text)
|
||||
values_match = re.search(
|
||||
r"## Values\n(.*?)(?=\n## |\Z)", body, re.DOTALL
|
||||
)
|
||||
if not values_match:
|
||||
return # Already reported as missing section
|
||||
|
||||
values_text = values_match.group(1)
|
||||
numbered_items = re.findall(r"^\d+\.", values_text, re.MULTILINE)
|
||||
count = len(numbered_items)
|
||||
if count < 3:
|
||||
result.error(
|
||||
f"Values section has {count} item(s); minimum is 3. "
|
||||
"Values must be numbered (1. 2. 3. ...)"
|
||||
)
|
||||
if count > 8:
|
||||
result.warn(
|
||||
f"Values section has {count} items; recommended maximum is 8. "
|
||||
"Consider consolidating."
|
||||
)
|
||||
|
||||
|
||||
def _check_constraints_section(text: str, result: ValidationResult) -> None:
|
||||
"""Check that constraints section contains at least 3 bullet points."""
|
||||
body = _body_text(text)
|
||||
constraints_match = re.search(
|
||||
r"## Constraints\n(.*?)(?=\n## |\Z)", body, re.DOTALL
|
||||
)
|
||||
if not constraints_match:
|
||||
return # Already reported as missing section
|
||||
|
||||
constraints_text = constraints_match.group(1)
|
||||
bullets = re.findall(r"^- \*\*Never\*\*", constraints_text, re.MULTILINE)
|
||||
if len(bullets) < 3:
|
||||
result.error(
|
||||
f"Constraints section has {len(bullets)} 'Never' constraint(s); "
|
||||
"minimum is 3. Constraints must start with '- **Never**'."
|
||||
)
|
||||
|
||||
|
||||
def _check_changelog(text: str, result: ValidationResult) -> None:
|
||||
"""Check that changelog has at least one entry row."""
|
||||
body = _body_text(text)
|
||||
changelog_match = re.search(
|
||||
r"## Changelog\n(.*?)(?=\n## |\Z)", body, re.DOTALL
|
||||
)
|
||||
if not changelog_match:
|
||||
return # Already reported as missing section
|
||||
|
||||
# Table rows have 4 | delimiters (version | date | author | summary)
|
||||
rows = [
|
||||
line
|
||||
for line in changelog_match.group(1).splitlines()
|
||||
if line.count("|") >= 3
|
||||
and not line.startswith("|---")
|
||||
and "Version" not in line
|
||||
]
|
||||
if not rows:
|
||||
result.error("Changelog table has no entries. Add at least one row.")
|
||||
|
||||
|
||||
def _check_contradictions(text: str, result: ValidationResult) -> None:
|
||||
"""Heuristic check for contradictory directive pairs."""
|
||||
lower = text.lower()
|
||||
for pattern_a, pattern_b in CONTRADICTION_PAIRS:
|
||||
match_a = re.search(pattern_a, lower)
|
||||
match_b = re.search(pattern_b, lower)
|
||||
if match_a and match_b:
|
||||
result.warn(
|
||||
f"Possible contradiction detected: "
|
||||
f"'{pattern_a}' and '{pattern_b}' both appear in the document. "
|
||||
"Review for conflicting directives."
|
||||
)
|
||||
|
||||
|
||||
def _check_placeholders(text: str, result: ValidationResult) -> None:
|
||||
"""Check for unfilled template placeholders."""
|
||||
placeholders = re.findall(r"<[A-Z][A-Za-z ]+>", text)
|
||||
for ph in set(placeholders):
|
||||
result.error(f"Unfilled placeholder found: {ph}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main validator
|
||||
# ---------------------------------------------------------------------------
|
||||
def validate(path: Path) -> ValidationResult:
|
||||
result = ValidationResult(path=path)
|
||||
|
||||
if not path.exists():
|
||||
result.error(f"File not found: {path}")
|
||||
return result
|
||||
|
||||
text = path.read_text(encoding="utf-8")
|
||||
|
||||
fm = _check_frontmatter(text, result)
|
||||
_check_required_sections(text, fm, result)
|
||||
_check_values_section(text, result)
|
||||
_check_constraints_section(text, result)
|
||||
_check_changelog(text, result)
|
||||
_check_contradictions(text, result)
|
||||
_check_placeholders(text, result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _print_result(result: ValidationResult) -> None:
|
||||
path_str = str(result.path)
|
||||
if result.is_valid and not result.warnings:
|
||||
print(f"[PASS] {path_str}")
|
||||
return
|
||||
|
||||
if result.is_valid:
|
||||
print(f"[WARN] {path_str}")
|
||||
else:
|
||||
print(f"[FAIL] {path_str}")
|
||||
|
||||
for err in result.errors:
|
||||
print(f" ERROR: {err}")
|
||||
for warn in result.warnings:
|
||||
print(f" WARN: {warn}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
def main() -> int:
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python scripts/validate_soul.py <path/to/soul.md> [...]")
|
||||
print()
|
||||
print("Examples:")
|
||||
print(" python scripts/validate_soul.py memory/self/soul.md")
|
||||
print(" python scripts/validate_soul.py docs/soul/extensions/seer.md")
|
||||
print(" python scripts/validate_soul.py docs/soul/extensions/*.md")
|
||||
return 1
|
||||
|
||||
paths = [Path(arg) for arg in sys.argv[1:]]
|
||||
results = [validate(p) for p in paths]
|
||||
|
||||
any_failed = False
|
||||
for r in results:
|
||||
_print_result(r)
|
||||
if not r.is_valid:
|
||||
any_failed = True
|
||||
|
||||
if len(results) > 1:
|
||||
passed = sum(1 for r in results if r.is_valid)
|
||||
print(f"\n{passed}/{len(results)} soul files passed validation.")
|
||||
|
||||
return 1 if any_failed else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user