Add daily build verification system

- scripts/build-verify.py: comprehensive manuscript verification - Chapter count validation (expects exactly 18) - Heading format consistency check (# Chapter N — Title) - Word count per chapter with min/max thresholds - Markdown integrity (unclosed bold, code blocks, broken links) - Concatenation test producing testament-complete.md - Required files check (front-matter, back-matter, Makefile, compile_all.py) - CI mode (--ci) and JSON report (--json) options - .gitea/workflows/build.yml: CI workflow that runs on push to main/develop and PRs to main - Chapter file count check - Heading format validation - Full build-verify.py execution - Output file verification
2026-04-12 12:16:48 -04:00
parent 81f6b28546
commit e8872f2343
2 changed files with 449 additions and 0 deletions
--- a/.gitea/workflows/build.yml
+++ b/.gitea/workflows/build.yml
@@ -0,0 +1,63 @@
+name: Build Verification
+
+on:
+  push:
+    branches: [main, develop]
+  pull_request:
+    branches: [main]
+
+jobs:
+  verify-build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.x'
+
+      - name: Verify chapter count and structure
+        run: |
+          echo "=== Chapter File Check ==="
+          CHAPTER_COUNT=$(ls chapters/chapter-*.md 2>/dev/null | wc -l)
+          echo "Found $CHAPTER_COUNT chapter files"
+          if [ "$CHAPTER_COUNT" -ne 18 ]; then
+            echo "FAIL: Expected 18 chapters, found $CHAPTER_COUNT"
+            exit 1
+          fi
+          echo "PASS: 18 chapters found"
+
+      - name: Verify heading format
+        run: |
+          echo "=== Heading Format Check ==="
+          FAIL=0
+          for f in chapters/chapter-*.md; do
+            HEAD=$(head -1 "$f")
+            if ! echo "$HEAD" | grep -qE '^# Chapter [0-9]+ — .+'; then
+              echo "FAIL: $f — bad heading: $HEAD"
+              FAIL=1
+            fi
+          done
+          if [ "$FAIL" -eq 1 ]; then
+            exit 1
+          fi
+          echo "PASS: All headings valid"
+
+      - name: Run full build verification
+        run: python3 scripts/build-verify.py --ci
+
+      - name: Verify concatenation produces valid output
+        run: |
+          echo "=== Output Verification ==="
+          if [ ! -f testament-complete.md ]; then
+            echo "FAIL: testament-complete.md not generated"
+            exit 1
+          fi
+          WORDS=$(wc -w < testament-complete.md)
+          echo "Total words: $WORDS"
+          if [ "$WORDS" -lt 50000 ]; then
+            echo "FAIL: Word count too low ($WORDS), expected 50000+"
+            exit 1
+          fi
+          echo "PASS: Output file looks good"
--- a/scripts/build-verify.py
+++ b/scripts/build-verify.py
@@ -0,0 +1,386 @@
+#!/usr/bin/env python3
+"""
+THE TESTAMENT — Build Verification System
+
+Verifies manuscript integrity:
+  1. Chapter count (must be exactly 18)
+  2. Chapter file naming and ordering
+  3. Heading format consistency
+  4. Word count per chapter and total
+  5. Markdown structure (unclosed bold/italic, broken links)
+  6. Concatenation test (compile all chapters into one file)
+  7. Outputs a clean build report
+
+Usage:
+  python3 scripts/build-verify.py          # full verification
+  python3 scripts/build-verify.py --ci     # CI mode (fail on any warning)
+  python3 scripts/build-verify.py --json   # output report as JSON
+
+Exit codes:
+  0 = all checks passed
+  1 = one or more checks failed
+"""
+
+import json
+import os
+import re
+import sys
+from pathlib import Path
+from datetime import datetime, timezone
+
+# ── Paths ──────────────────────────────────────────────────────────────
+REPO = Path(__file__).resolve().parent.parent
+CHAPTERS_DIR = REPO / "chapters"
+FRONT_MATTER = REPO / "front-matter.md"
+BACK_MATTER = REPO / "back-matter.md"
+OUTPUT_FILE = REPO / "testament-complete.md"
+
+EXPECTED_CHAPTER_COUNT = 18
+EXPECTED_HEADING_RE = re.compile(r"^# Chapter \d+ — .+")
+CHAPTER_FILENAME_RE = re.compile(r"^chapter-(\d+)\.md$")
+
+# Minimum word counts (sanity check — no chapter should be nearly empty)
+MIN_WORDS_PER_CHAPTER = 500
+# Maximum word count warning threshold
+MAX_WORDS_PER_CHAPTER = 15000
+
+
+class CheckResult:
+    def __init__(self, name: str, passed: bool, message: str, details: list[str] | None = None):
+        self.name = name
+        self.passed = passed
+        self.message = message
+        self.details = details or []
+
+
+class BuildVerifier:
+    def __init__(self, ci_mode: bool = False):
+        self.ci_mode = ci_mode
+        self.results: list[CheckResult] = []
+        self.chapter_data: list[dict] = []
+        self.total_words = 0
+        self.total_lines = 0
+
+    def check(self, name: str, passed: bool, message: str, details: list[str] | None = None):
+        result = CheckResult(name, passed, message, details)
+        self.results.append(result)
+        return passed
+
+    # ── Check 1: Chapter file discovery and count ──────────────────────
+    def verify_chapter_files(self) -> bool:
+        """Verify all chapter files exist with correct naming."""
+        details = []
+        found_chapters = {}
+
+        if not CHAPTERS_DIR.exists():
+            return self.check(
+                "chapter-files", False,
+                f"Chapters directory not found: {CHAPTERS_DIR}"
+            )
+
+        for f in sorted(CHAPTERS_DIR.iterdir()):
+            m = CHAPTER_FILENAME_RE.match(f.name)
+            if m:
+                num = int(m.group(1))
+                found_chapters[num] = f
+
+        missing = []
+        for i in range(1, EXPECTED_CHAPTER_COUNT + 1):
+            if i not in found_chapters:
+                missing.append(i)
+
+        if missing:
+            details.append(f"Missing chapters: {missing}")
+
+        extra = [n for n in found_chapters if n > EXPECTED_CHAPTER_COUNT or n < 1]
+        if extra:
+            details.append(f"Unexpected chapter numbers: {extra}")
+
+        count = len(found_chapters)
+        passed = count == EXPECTED_CHAPTER_COUNT and not missing and not extra
+
+        if passed:
+            details.append(f"Found all {count} chapters in correct order")
+
+        return self.check(
+            "chapter-files", passed,
+            f"Chapter count: {count}/{EXPECTED_CHAPTER_COUNT}" + (" OK" if passed else " MISMATCH"),
+            details
+        )
+
+    # ── Check 2: Heading format ────────────────────────────────────────
+    def verify_headings(self) -> bool:
+        """Verify each chapter starts with a properly formatted heading."""
+        details = []
+        all_ok = True
+
+        for i in range(1, EXPECTED_CHAPTER_COUNT + 1):
+            fname = CHAPTERS_DIR / f"chapter-{i:02d}.md"
+            if not fname.exists():
+                continue
+
+            content = fname.read_text(encoding="utf-8")
+            first_line = content.split("\n")[0].strip()
+
+            if not EXPECTED_HEADING_RE.match(first_line):
+                details.append(f"  chapter-{i:02d}.md: bad heading: '{first_line}'")
+                all_ok = False
+
+        if all_ok:
+            details.append("All chapter headings match format: '# Chapter N — Title'")
+
+        return self.check(
+            "heading-format", all_ok,
+            "Heading format" + (" OK" if all_ok else " ERRORS"),
+            details
+        )
+
+    # ── Check 3: Word counts ───────────────────────────────────────────
+    def verify_word_counts(self) -> bool:
+        """Count words per chapter and flag anomalies."""
+        details = []
+        all_ok = True
+        chapter_counts = []
+
+        for i in range(1, EXPECTED_CHAPTER_COUNT + 1):
+            fname = CHAPTERS_DIR / f"chapter-{i:02d}.md"
+            if not fname.exists():
+                continue
+
+            content = fname.read_text(encoding="utf-8")
+            words = len(content.split())
+            lines = content.count("\n") + 1
+
+            self.chapter_data.append({
+                "number": i,
+                "file": f"chapter-{i:02d}.md",
+                "words": words,
+                "lines": lines,
+            })
+            chapter_counts.append((i, words))
+
+            if words < MIN_WORDS_PER_CHAPTER:
+                details.append(f"  chapter-{i:02d}.md: {words} words (below {MIN_WORDS_PER_CHAPTER} minimum)")
+                all_ok = False
+            elif words > MAX_WORDS_PER_CHAPTER:
+                details.append(f"  chapter-{i:02d}.md: {words} words (above {MAX_WORDS_PER_CHAPTER} threshold — verify)")
+
+        self.total_words = sum(w for _, w in chapter_counts)
+        self.total_lines = sum(d["lines"] for d in self.chapter_data)
+
+        # Summary line
+        min_ch = min(chapter_counts, key=lambda x: x[1])
+        max_ch = max(chapter_counts, key=lambda x: x[1])
+        details.append(f"  Total: {self.total_words:,} words across {len(chapter_counts)} chapters")
+        details.append(f"  Shortest: chapter-{min_ch[0]:02d} ({min_ch[1]:,} words)")
+        details.append(f"  Longest:  chapter-{max_ch[0]:02d} ({max_ch[1]:,} words)")
+
+        return self.check(
+            "word-counts", all_ok,
+            f"Total: {self.total_words:,} words" + (" OK" if all_ok else " (warnings)"),
+            details
+        )
+
+    # ── Check 4: Markdown integrity ────────────────────────────────────
+    def verify_markdown(self) -> bool:
+        """Check for common markdown issues."""
+        details = []
+        issues = 0
+
+        for i in range(1, EXPECTED_CHAPTER_COUNT + 1):
+            fname = CHAPTERS_DIR / f"chapter-{i:02d}.md"
+            if not fname.exists():
+                continue
+
+            content = fname.read_text(encoding="utf-8")
+            lines = content.split("\n")
+
+            for line_num, line in enumerate(lines, 1):
+                # Unclosed bold: odd number of **
+                bold_count = line.count("**")
+                if bold_count % 2 != 0:
+                    details.append(f"  chapter-{i:02d}.md:{line_num}: unmatched ** (bold)")
+                    issues += 1
+
+                # Unclosed backticks
+                backtick_count = line.count("`")
+                if backtick_count % 2 != 0:
+                    details.append(f"  chapter-{i:02d}.md:{line_num}: unmatched ` (code)")
+                    issues += 1
+
+                # Broken markdown links: [text]( with no closing )
+                broken_links = re.findall(r"\[([^\]]*)\]\((?!\))", line)
+                for link_text in broken_links:
+                    if ")" not in line[line.index(f"[{link_text}]("):]:
+                        details.append(f"  chapter-{i:02d}.md:{line_num}: broken link '[{link_text}]('")
+                        issues += 1
+
+            # Check italic matching across full file (prose often has
+            # multi-line italics like *line1\nline2* which are valid)
+            cleaned = content.replace("**", "")
+            italic_count = cleaned.count("*")
+            if italic_count % 2 != 0:
+                details.append(f"  chapter-{i:02d}.md: unmatched * (italic) — {italic_count} asterisks total")
+                issues += 1
+
+        # Also check front/back matter
+        for label, path in [("front-matter.md", FRONT_MATTER), ("back-matter.md", BACK_MATTER)]:
+            if path.exists():
+                content = path.read_text(encoding="utf-8")
+                bold_count = content.count("**")
+                if bold_count % 2 != 0:
+                    details.append(f"  {label}: unmatched ** (bold)")
+                    issues += 1
+
+        if issues == 0:
+            details.append("No markdown issues found")
+
+        return self.check(
+            "markdown-integrity", issues == 0,
+            f"Markdown issues: {issues}" + (" OK" if issues == 0 else " FOUND"),
+            details
+        )
+
+    # ── Check 5: Concatenation test ────────────────────────────────────
+    def verify_concatenation(self) -> bool:
+        """Test that all chapters can be concatenated into a single file."""
+        details = []
+        try:
+            parts = []
+            parts.append("# THE TESTAMENT\n\n## A NOVEL\n\n---\n")
+
+            for i in range(1, EXPECTED_CHAPTER_COUNT + 1):
+                fname = CHAPTERS_DIR / f"chapter-{i:02d}.md"
+                if not fname.exists():
+                    details.append(f"  Missing chapter-{i:02d}.md during concatenation")
+                    return self.check("concatenation", False, "Concatenation FAILED", details)
+                content = fname.read_text(encoding="utf-8")
+                parts.append(f"\n\n{content}\n")
+
+            if BACK_MATTER.exists():
+                parts.append("\n---\n\n")
+                parts.append(BACK_MATTER.read_text(encoding="utf-8"))
+
+            compiled = "\n".join(parts)
+            compiled_words = len(compiled.split())
+
+            # Write the test output
+            OUTPUT_FILE.write_text(compiled, encoding="utf-8")
+            out_size = OUTPUT_FILE.stat().st_size
+
+            details.append(f"  Output: {OUTPUT_FILE.name}")
+            details.append(f"  Size: {out_size:,} bytes")
+            details.append(f"  Words: {compiled_words:,}")
+
+            return self.check(
+                "concatenation", True,
+                f"Concatenation OK — {compiled_words:,} words, {out_size:,} bytes",
+                details
+            )
+        except Exception as e:
+            details.append(f"  Error: {e}")
+            return self.check("concatenation", False, f"Concatenation FAILED: {e}", details)
+
+    # ── Check 6: Required files ────────────────────────────────────────
+    def verify_required_files(self) -> bool:
+        """Verify required supporting files exist."""
+        details = []
+        required = {
+            "front-matter.md": FRONT_MATTER,
+            "back-matter.md": BACK_MATTER,
+            "Makefile": REPO / "Makefile",
+            "compile_all.py": REPO / "compile_all.py",
+        }
+
+        all_ok = True
+        for label, path in required.items():
+            if path.exists():
+                size = path.stat().st_size
+                details.append(f"  {label}: OK ({size:,} bytes)")
+            else:
+                details.append(f"  {label}: MISSING")
+                all_ok = False
+
+        return self.check(
+            "required-files", all_ok,
+            "Required files" + (" OK" if all_ok else " MISSING"),
+            details
+        )
+
+    # ── Run all checks ─────────────────────────────────────────────────
+    def run_all(self) -> bool:
+        """Run all verification checks and print report."""
+        print("=" * 64)
+        print("  THE TESTAMENT — Build Verification")
+        print(f"  {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
+        print("=" * 64)
+        print()
+
+        self.verify_chapter_files()
+        self.verify_headings()
+        self.verify_word_counts()
+        self.verify_markdown()
+        self.verify_concatenation()
+        self.verify_required_files()
+
+        # ── Report ─────────────────────────────────────────────────────
+        print()
+        print("-" * 64)
+        print("  RESULTS")
+        print("-" * 64)
+
+        all_passed = True
+        for r in self.results:
+            icon = "PASS" if r.passed else "FAIL"
+            print(f"  [{icon}] {r.name}: {r.message}")
+            if self.ci_mode or not r.passed:
+                for d in r.details:
+                    print(f"         {d}")
+            if not r.passed:
+                all_passed = False
+
+        print()
+        print("-" * 64)
+
+        if all_passed:
+            print(f"  ALL CHECKS PASSED — {self.total_words:,} words, {len(self.chapter_data)} chapters")
+        else:
+            print("  BUILD VERIFICATION FAILED")
+
+        print("-" * 64)
+
+        # JSON output
+        if "--json" in sys.argv:
+            report = {
+                "timestamp": datetime.now(timezone.utc).isoformat(),
+                "passed": all_passed,
+                "total_words": self.total_words,
+                "total_lines": self.total_lines,
+                "chapter_count": len(self.chapter_data),
+                "chapters": self.chapter_data,
+                "checks": [
+                    {
+                        "name": r.name,
+                        "passed": r.passed,
+                        "message": r.message,
+                        "details": r.details,
+                    }
+                    for r in self.results
+                ],
+            }
+            report_path = REPO / "build-report.json"
+            report_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
+            print(f"\n  Report saved: {report_path.name}")
+
+        return all_passed
+
+
+def main():
+    ci_mode = "--ci" in sys.argv
+    verifier = BuildVerifier(ci_mode=ci_mode)
+    passed = verifier.run_all()
+    sys.exit(0 if passed else 1)
+
+
+if __name__ == "__main__":
+    main()