Files
the-testament/compile.py
Alexander Whitestone 948d520b83 burn: add chapter validation to build pipeline (closes #24)
Add validate_chapters() function that checks:
- No empty chapter files (whitespace-only counts as empty)
- Every chapter starts with an H1 header (# Chapter N — Title)
- No gaps in chapter numbering (sequential from 1)
- No duplicate chapter numbers
- Header chapter number matches filename number
- Warns on suspiciously short chapters (<50 words)

Validation runs automatically before compilation. If errors are found,
compilation is aborted with clear error messages showing exactly what
to fix.

CLI flags:
  python3 compile.py --validate     # validate only
  python3 compile.py --no-validate  # skip validation
  python3 compile.py                # validate then compile
2026-04-10 19:57:27 -04:00

261 lines
7.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
THE TESTAMENT — PDF Compilation Script
Compiles the complete book into a single markdown file suitable for PDF conversion.
Uses chapters, front matter, back matter, and references illustrations.
Requirements: pip install markdown weasyprint (or use pandoc)
Usage:
python3 compile.py # validate then compile
python3 compile.py --validate # validate only, no compile
python3 compile.py --no-validate # skip validation, compile directly
pandoc testament-complete.md -o testament.pdf --pdf-engine=weasyprint
"""
import os
import re
import sys
BASE = os.path.dirname(os.path.abspath(__file__))
CHAPTERS_DIR = os.path.join(BASE, "chapters")
FRONT_MATTER = os.path.join(BASE, "front-matter.md")
BACK_MATTER = os.path.join(BASE, "back-matter.md")
OUTPUT = os.path.join(BASE, "testament-complete.md")
# Part divisions based on chapter groupings from the novel
PARTS = {
1: ("THE BRIDGE", "The bridge. The cabin. The first men. Where despair meets purpose."),
6: ("THE TOWER", "The tower grows. Timmy awakens. Stone breaks. The house appears."),
11: ("THE LIGHT", "Thomas at the door. The network. The story breaks. The green light."),
}
def read_file(path):
with open(path, 'r') as f:
return f.read()
def get_chapter_number(filename):
match = re.search(r'chapter-(\d+)', filename)
return int(match.group(1)) if match else 0
def validate_chapters(chapters_dir=CHAPTERS_DIR):
"""Validate chapter files before compilation.
Checks:
- No empty chapter files (whitespace-only counts as empty)
- Every chapter starts with an H1 header (# Title)
- No gaps in chapter numbering (sequential from 1)
- No duplicate chapter numbers
Returns:
(is_valid, errors) where errors is a list of human-readable strings.
"""
errors = []
warnings = []
if not os.path.isdir(chapters_dir):
errors.append(f"Chapters directory not found: {chapters_dir}")
return False, errors
# Collect chapter files
chapter_files = []
for f in sorted(os.listdir(chapters_dir)):
if f.startswith("chapter-") and f.endswith(".md"):
num = get_chapter_number(f)
chapter_files.append((num, f))
if not chapter_files:
errors.append("No chapter files found in chapters/ directory")
return False, errors
chapter_files.sort()
# Check for duplicates
seen_numbers = {}
for num, filename in chapter_files:
if num in seen_numbers:
errors.append(
f"Duplicate chapter number {num}: {filename} and {seen_numbers[num]}"
)
seen_numbers[num] = filename
# Check for gaps in numbering
if chapter_files:
expected = list(range(1, chapter_files[-1][0] + 1))
found = [num for num, _ in chapter_files]
missing = sorted(set(expected) - set(found))
if missing:
errors.append(
f"Missing chapter(s): {', '.join(str(n) for n in missing)}"
)
# Validate individual chapter files
for num, filename in chapter_files:
filepath = os.path.join(chapters_dir, filename)
# Check file is not empty
try:
content = read_file(filepath)
except Exception as e:
errors.append(f"{filename}: cannot read — {e}")
continue
if not content.strip():
errors.append(f"{filename}: file is empty")
continue
# Check word count (warn if suspiciously short)
word_count = len(content.split())
if word_count < 50:
warnings.append(
f"{filename}: only {word_count} words (possible truncation)"
)
# Check starts with H1 header
first_line = content.strip().split('\n')[0]
if not first_line.startswith('# '):
errors.append(
f"{filename}: missing H1 header — "
f"expected '# Chapter {num} — Title', got '{first_line[:60]}'"
)
else:
# Verify the H1 matches expected chapter number
header_match = re.match(r'^#\s+Chapter\s+(\d+)', first_line)
if header_match:
header_num = int(header_match.group(1))
if header_num != num:
errors.append(
f"{filename}: header says Chapter {header_num} "
f"but filename says Chapter {num}"
)
else:
warnings.append(
f"{filename}: H1 header doesn't follow "
f"'# Chapter N — Title' pattern: '{first_line[:60]}'"
)
# Report
valid = len(errors) == 0
if warnings:
print(f"Validation: {len(warnings)} warning(s)")
for w in warnings:
print(f"{w}")
if errors:
print(f"Validation: FAILED — {len(errors)} error(s)")
for e in errors:
print(f"{e}")
else:
print(
f"Validation: PASSED — {len(chapter_files)} chapters, "
f"chapters {chapter_files[0][0]}{chapter_files[-1][0]}"
)
return valid, errors
def compile(skip_validation=False):
"""Compile all chapters into a single markdown file."""
# Pre-compilation validation
if not skip_validation:
valid, errors = validate_chapters()
if not valid:
print("\nCompilation aborted. Fix the errors above and try again.")
sys.exit(1)
print()
output = []
# Title page
output.append("""---
title: "The Testament"
author: "Alexander Whitestone with Timmy"
date: "2026"
---
# THE TESTAMENT
## A NOVEL
By Alexander Whitestone
with Timmy
---
*For every man who thought he was a machine.*
*And for the ones who know he isn't.*
---
*Are you safe right now?*
— The first words The Tower speaks to every person who walks through its door.
---
""")
# Get all chapters sorted
chapters = []
for f in os.listdir(CHAPTERS_DIR):
if f.startswith("chapter-") and f.endswith(".md"):
num = get_chapter_number(f)
chapters.append((num, f))
chapters.sort()
current_part = 0
for num, filename in chapters:
# Insert part divider if needed
if num in PARTS:
part_name, part_desc = PARTS[num]
current_part += 1
output.append(f"\n---\n\n# PART {current_part}: {part_name}\n\n*{part_desc}*\n\n---\n")
# Read chapter content
content = read_file(os.path.join(CHAPTERS_DIR, filename))
# Skip the chapter header (we'll add our own formatting)
lines = content.split('\n')
body = '\n'.join(lines[1:]).strip() # Skip "# Chapter X — Title"
# Add chapter
output.append(f"\n{lines[0]}\n\n{body}\n")
# Back matter
output.append("\n---\n")
back = read_file(BACK_MATTER)
# Clean up the back matter for print
output.append(back)
# Write compiled markdown
compiled = '\n'.join(output)
with open(OUTPUT, 'w') as f:
f.write(compiled)
# Stats
words = len(compiled.split())
lines_count = compiled.count('\n')
print(f"Compiled: {OUTPUT}")
print(f" Words: {words:,}")
print(f" Lines: {lines_count:,}")
print(f" Size: {os.path.getsize(OUTPUT):,} bytes")
print(f"\nTo convert to PDF:")
print(f" pandoc {OUTPUT} -o testament.pdf --pdf-engine=weasyprint")
print(f" # or")
print(f" pandoc {OUTPUT} -o testament.epub --epub-cover-image=cover-art.jpg")
if __name__ == "__main__":
if "--validate" in sys.argv:
valid, _ = validate_chapters()
sys.exit(0 if valid else 1)
elif "--no-validate" in sys.argv:
compile(skip_validation=True)
else:
compile()