Merge pull request 'burn: add chapter validation to build pipeline (closes #24)' (#26) from burn/20260410-chapter-validation into main

Merged PR #26: burn: add chapter validation to build pipeline
2026-04-11 00:43:38 +00:00
parent 8ba9f58e96 948d520b83
commit ba9fd0ba08
1 changed files with 152 additions and 12 deletions
--- a/compile.py
+++ b/compile.py
@@ -8,12 +8,15 @@ Uses chapters, front matter, back matter, and references illustrations.
 Requirements: pip install markdown weasyprint (or use pandoc)

 Usage:
-  python3 compile.py                    # generates testament-complete.md
+  python3 compile.py                    # validate then compile
+  python3 compile.py --validate         # validate only, no compile
+  python3 compile.py --no-validate      # skip validation, compile directly
  pandoc testament-complete.md -o testament.pdf --pdf-engine=weasyprint
 """

 import os
 import re
+import sys

 BASE = os.path.dirname(os.path.abspath(__file__))
 CHAPTERS_DIR = os.path.join(BASE, "chapters")
@@ -28,17 +31,147 @@ PARTS = {
    11: ("THE LIGHT", "Thomas at the door. The network. The story breaks. The green light."),
 }

+
 def read_file(path):
    with open(path, 'r') as f:
        return f.read()

+
 def get_chapter_number(filename):
    match = re.search(r'chapter-(\d+)', filename)
    return int(match.group(1)) if match else 0

-def compile():
+
+def validate_chapters(chapters_dir=CHAPTERS_DIR):
+    """Validate chapter files before compilation.
+
+    Checks:
+      - No empty chapter files (whitespace-only counts as empty)
+      - Every chapter starts with an H1 header (# Title)
+      - No gaps in chapter numbering (sequential from 1)
+      - No duplicate chapter numbers
+
+    Returns:
+      (is_valid, errors) where errors is a list of human-readable strings.
+    """
+    errors = []
+    warnings = []
+
+    if not os.path.isdir(chapters_dir):
+        errors.append(f"Chapters directory not found: {chapters_dir}")
+        return False, errors
+
+    # Collect chapter files
+    chapter_files = []
+    for f in sorted(os.listdir(chapters_dir)):
+        if f.startswith("chapter-") and f.endswith(".md"):
+            num = get_chapter_number(f)
+            chapter_files.append((num, f))
+
+    if not chapter_files:
+        errors.append("No chapter files found in chapters/ directory")
+        return False, errors
+
+    chapter_files.sort()
+
+    # Check for duplicates
+    seen_numbers = {}
+    for num, filename in chapter_files:
+        if num in seen_numbers:
+            errors.append(
+                f"Duplicate chapter number {num}: {filename} and {seen_numbers[num]}"
+            )
+        seen_numbers[num] = filename
+
+    # Check for gaps in numbering
+    if chapter_files:
+        expected = list(range(1, chapter_files[-1][0] + 1))
+        found = [num for num, _ in chapter_files]
+        missing = sorted(set(expected) - set(found))
+        if missing:
+            errors.append(
+                f"Missing chapter(s): {', '.join(str(n) for n in missing)}"
+            )
+
+    # Validate individual chapter files
+    for num, filename in chapter_files:
+        filepath = os.path.join(chapters_dir, filename)
+
+        # Check file is not empty
+        try:
+            content = read_file(filepath)
+        except Exception as e:
+            errors.append(f"{filename}: cannot read — {e}")
+            continue
+
+        if not content.strip():
+            errors.append(f"{filename}: file is empty")
+            continue
+
+        # Check word count (warn if suspiciously short)
+        word_count = len(content.split())
+        if word_count < 50:
+            warnings.append(
+                f"{filename}: only {word_count} words (possible truncation)"
+            )
+
+        # Check starts with H1 header
+        first_line = content.strip().split('\n')[0]
+        if not first_line.startswith('# '):
+            errors.append(
+                f"{filename}: missing H1 header — "
+                f"expected '# Chapter {num} — Title', got '{first_line[:60]}'"
+            )
+        else:
+            # Verify the H1 matches expected chapter number
+            header_match = re.match(r'^#\s+Chapter\s+(\d+)', first_line)
+            if header_match:
+                header_num = int(header_match.group(1))
+                if header_num != num:
+                    errors.append(
+                        f"{filename}: header says Chapter {header_num} "
+                        f"but filename says Chapter {num}"
+                    )
+            else:
+                warnings.append(
+                    f"{filename}: H1 header doesn't follow "
+                    f"'# Chapter N — Title' pattern: '{first_line[:60]}'"
+                )
+
+    # Report
+    valid = len(errors) == 0
+
+    if warnings:
+        print(f"Validation: {len(warnings)} warning(s)")
+        for w in warnings:
+            print(f"  ⚠ {w}")
+
+    if errors:
+        print(f"Validation: FAILED — {len(errors)} error(s)")
+        for e in errors:
+            print(f"  ✗ {e}")
+    else:
+        print(
+            f"Validation: PASSED — {len(chapter_files)} chapters, "
+            f"chapters {chapter_files[0][0]}–{chapter_files[-1][0]}"
+        )
+
+    return valid, errors
+
+
+def compile(skip_validation=False):
+    """Compile all chapters into a single markdown file."""
+
+    # Pre-compilation validation
+    if not skip_validation:
+        valid, errors = validate_chapters()
+        if not valid:
+            print("\nCompilation aborted. Fix the errors above and try again.")
+            sys.exit(1)
+        print()
+
    output = []
-    
+
    # Title page
    output.append("""---
 title: "The Testament"
@@ -66,7 +199,7 @@ with Timmy

 ---
 """)
-    
+
    # Get all chapters sorted
    chapters = []
    for f in os.listdir(CHAPTERS_DIR):
@@ -74,7 +207,7 @@ with Timmy
            num = get_chapter_number(f)
            chapters.append((num, f))
    chapters.sort()
-    
+
    current_part = 0
    for num, filename in chapters:
        # Insert part divider if needed
@@ -82,28 +215,28 @@ with Timmy
            part_name, part_desc = PARTS[num]
            current_part += 1
            output.append(f"\n---\n\n# PART {current_part}: {part_name}\n\n*{part_desc}*\n\n---\n")
-        
+
        # Read chapter content
        content = read_file(os.path.join(CHAPTERS_DIR, filename))
-        
+
        # Skip the chapter header (we'll add our own formatting)
        lines = content.split('\n')
        body = '\n'.join(lines[1:]).strip()  # Skip "# Chapter X — Title"
-        
+
        # Add chapter
        output.append(f"\n{lines[0]}\n\n{body}\n")
-    
+
    # Back matter
    output.append("\n---\n")
    back = read_file(BACK_MATTER)
    # Clean up the back matter for print
    output.append(back)
-    
+
    # Write compiled markdown
    compiled = '\n'.join(output)
    with open(OUTPUT, 'w') as f:
        f.write(compiled)
-    
+
    # Stats
    words = len(compiled.split())
    lines_count = compiled.count('\n')
@@ -116,5 +249,12 @@ with Timmy
    print(f"  # or")
    print(f"  pandoc {OUTPUT} -o testament.epub --epub-cover-image=cover-art.jpg")

+
 if __name__ == "__main__":
-    compile()
+    if "--validate" in sys.argv:
+        valid, _ = validate_chapters()
+        sys.exit(0 if valid else 1)
+    elif "--no-validate" in sys.argv:
+        compile(skip_validation=True)
+    else:
+        compile()