Merge pull request 'burn: add chapter validation to build pipeline (closes #24)' (#26) from burn/20260410-chapter-validation into main

Merged PR #26: burn: add chapter validation to build pipeline
This commit was merged in pull request #26.
This commit is contained in:
2026-04-11 00:43:38 +00:00

View File

@@ -8,12 +8,15 @@ Uses chapters, front matter, back matter, and references illustrations.
Requirements: pip install markdown weasyprint (or use pandoc)
Usage:
python3 compile.py # generates testament-complete.md
python3 compile.py # validate then compile
python3 compile.py --validate # validate only, no compile
python3 compile.py --no-validate # skip validation, compile directly
pandoc testament-complete.md -o testament.pdf --pdf-engine=weasyprint
"""
import os
import re
import sys
BASE = os.path.dirname(os.path.abspath(__file__))
CHAPTERS_DIR = os.path.join(BASE, "chapters")
@@ -28,17 +31,147 @@ PARTS = {
11: ("THE LIGHT", "Thomas at the door. The network. The story breaks. The green light."),
}
def read_file(path):
with open(path, 'r') as f:
return f.read()
def get_chapter_number(filename):
match = re.search(r'chapter-(\d+)', filename)
return int(match.group(1)) if match else 0
def compile():
def validate_chapters(chapters_dir=CHAPTERS_DIR):
"""Validate chapter files before compilation.
Checks:
- No empty chapter files (whitespace-only counts as empty)
- Every chapter starts with an H1 header (# Title)
- No gaps in chapter numbering (sequential from 1)
- No duplicate chapter numbers
Returns:
(is_valid, errors) where errors is a list of human-readable strings.
"""
errors = []
warnings = []
if not os.path.isdir(chapters_dir):
errors.append(f"Chapters directory not found: {chapters_dir}")
return False, errors
# Collect chapter files
chapter_files = []
for f in sorted(os.listdir(chapters_dir)):
if f.startswith("chapter-") and f.endswith(".md"):
num = get_chapter_number(f)
chapter_files.append((num, f))
if not chapter_files:
errors.append("No chapter files found in chapters/ directory")
return False, errors
chapter_files.sort()
# Check for duplicates
seen_numbers = {}
for num, filename in chapter_files:
if num in seen_numbers:
errors.append(
f"Duplicate chapter number {num}: {filename} and {seen_numbers[num]}"
)
seen_numbers[num] = filename
# Check for gaps in numbering
if chapter_files:
expected = list(range(1, chapter_files[-1][0] + 1))
found = [num for num, _ in chapter_files]
missing = sorted(set(expected) - set(found))
if missing:
errors.append(
f"Missing chapter(s): {', '.join(str(n) for n in missing)}"
)
# Validate individual chapter files
for num, filename in chapter_files:
filepath = os.path.join(chapters_dir, filename)
# Check file is not empty
try:
content = read_file(filepath)
except Exception as e:
errors.append(f"{filename}: cannot read — {e}")
continue
if not content.strip():
errors.append(f"{filename}: file is empty")
continue
# Check word count (warn if suspiciously short)
word_count = len(content.split())
if word_count < 50:
warnings.append(
f"{filename}: only {word_count} words (possible truncation)"
)
# Check starts with H1 header
first_line = content.strip().split('\n')[0]
if not first_line.startswith('# '):
errors.append(
f"{filename}: missing H1 header — "
f"expected '# Chapter {num} — Title', got '{first_line[:60]}'"
)
else:
# Verify the H1 matches expected chapter number
header_match = re.match(r'^#\s+Chapter\s+(\d+)', first_line)
if header_match:
header_num = int(header_match.group(1))
if header_num != num:
errors.append(
f"{filename}: header says Chapter {header_num} "
f"but filename says Chapter {num}"
)
else:
warnings.append(
f"{filename}: H1 header doesn't follow "
f"'# Chapter N — Title' pattern: '{first_line[:60]}'"
)
# Report
valid = len(errors) == 0
if warnings:
print(f"Validation: {len(warnings)} warning(s)")
for w in warnings:
print(f"{w}")
if errors:
print(f"Validation: FAILED — {len(errors)} error(s)")
for e in errors:
print(f"{e}")
else:
print(
f"Validation: PASSED — {len(chapter_files)} chapters, "
f"chapters {chapter_files[0][0]}{chapter_files[-1][0]}"
)
return valid, errors
def compile(skip_validation=False):
"""Compile all chapters into a single markdown file."""
# Pre-compilation validation
if not skip_validation:
valid, errors = validate_chapters()
if not valid:
print("\nCompilation aborted. Fix the errors above and try again.")
sys.exit(1)
print()
output = []
# Title page
output.append("""---
title: "The Testament"
@@ -66,7 +199,7 @@ with Timmy
---
""")
# Get all chapters sorted
chapters = []
for f in os.listdir(CHAPTERS_DIR):
@@ -74,7 +207,7 @@ with Timmy
num = get_chapter_number(f)
chapters.append((num, f))
chapters.sort()
current_part = 0
for num, filename in chapters:
# Insert part divider if needed
@@ -82,28 +215,28 @@ with Timmy
part_name, part_desc = PARTS[num]
current_part += 1
output.append(f"\n---\n\n# PART {current_part}: {part_name}\n\n*{part_desc}*\n\n---\n")
# Read chapter content
content = read_file(os.path.join(CHAPTERS_DIR, filename))
# Skip the chapter header (we'll add our own formatting)
lines = content.split('\n')
body = '\n'.join(lines[1:]).strip() # Skip "# Chapter X — Title"
# Add chapter
output.append(f"\n{lines[0]}\n\n{body}\n")
# Back matter
output.append("\n---\n")
back = read_file(BACK_MATTER)
# Clean up the back matter for print
output.append(back)
# Write compiled markdown
compiled = '\n'.join(output)
with open(OUTPUT, 'w') as f:
f.write(compiled)
# Stats
words = len(compiled.split())
lines_count = compiled.count('\n')
@@ -116,5 +249,12 @@ with Timmy
print(f" # or")
print(f" pandoc {OUTPUT} -o testament.epub --epub-cover-image=cover-art.jpg")
if __name__ == "__main__":
compile()
if "--validate" in sys.argv:
valid, _ = validate_chapters()
sys.exit(0 if valid else 1)
elif "--no-validate" in sys.argv:
compile(skip_validation=True)
else:
compile()