the-testament/compile_all.py

#!/usr/bin/env python3
"""
THE TESTAMENT — Unified Compilation Pipeline

Single script that builds ALL distributable formats:
  1. testament-complete.md    — full novel as one markdown file
  2. testament.epub           — EPUB with cover art + CSS
  3. testament.pdf            — PDF via reportlab (pure Python) with QR codes
  4. testament.html           — standalone styled HTML
  5. website/chapters.json    — chapter data for the web reader
  6. build-manifest.json      — SHA256 checksums of all outputs

Usage:
  python3 compile_all.py              # build everything
  python3 compile_all.py --md         # markdown only
  python3 compile_all.py --epub       # markdown + EPUB
  python3 compile_all.py --pdf        # markdown + PDF
  python3 compile_all.py --html       # markdown + HTML
  python3 compile_all.py --json       # markdown + chapters.json
  python3 compile_all.py --check      # verify dependencies
  python3 compile_all.py --clean      # remove all build artifacts

Requirements:
  - pandoc          (brew install pandoc)    — for EPUB and HTML
  - reportlab       (pip install reportlab)  — for PDF (pure Python)
  - qrcode          (pip install qrcode)     — for QR codes in PDF
"""

import hashlib
import json
import os
import re
import subprocess
import sys
import time
from pathlib import Path

# ── Paths ──────────────────────────────────────────────────────────────
REPO = Path(__file__).resolve().parent
CHAPTERS_DIR = REPO / "chapters"
FRONT_MATTER = REPO / "front-matter.md"
BACK_MATTER = REPO / "back-matter.md"
WEBSITE_DIR = REPO / "website"
BUILD_DIR = REPO / "build"
OUTPUT_DIR = BUILD_DIR / "output"

# Output files
OUT_MD = REPO / "testament-complete.md"
OUT_EPUB = REPO / "testament.epub"
OUT_HTML = REPO / "testament.html"
OUT_PDF = REPO / "testament.pdf"
OUT_JSON = WEBSITE_DIR / "chapters.json"
OUT_MANIFEST = REPO / "build-manifest.json"

STYLESHEET = REPO / "book-style.css"
COVER_IMAGE = REPO / "cover" / "cover-art.jpg"

# ── Part divisions ─────────────────────────────────────────────────────
PARTS = {
    1: ("THE BRIDGE", "The bridge. The cabin. The first men. Where despair meets purpose."),
    6: ("THE TOWER", "The tower grows. Timmy awakens. Stone breaks. The house appears."),
    11: ("THE LIGHT", "Thomas at the door. The network. The story breaks. The green light."),
}

# QR code destinations embedded in the PDF
QR_LINKS = {
    "Read Online": "https://timmyfoundation.org/the-testament",
    "The Door (Game)": "https://timmyfoundation.org/the-door",
    "Soundtrack": "https://timmyfoundation.org/soundtrack",
    "Source Code": "https://forge.alexanderwhitestone.com/Timmy_Foundation/the-testament",
}


# ── Helpers ─────────────────────────────────────────────────────────────
def get_chapter_num(filename: str) -> int:
    m = re.search(r"chapter-(\d+)", filename)
    return int(m.group(1)) if m else 0


def read_file(path: Path) -> str:
    return path.read_text(encoding="utf-8")


def sha256_file(path: Path) -> str:
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            h.update(chunk)
    return h.hexdigest()


def get_sorted_chapters() -> list[tuple[int, str]]:
    """Return [(number, filename), ...] sorted by chapter number."""
    chapters = []
    for f in os.listdir(CHAPTERS_DIR):
        if f.startswith("chapter-") and f.endswith(".md"):
            chapters.append((get_chapter_num(f), f))
    return sorted(chapters)


# ── 1. Markdown Compilation ───────────────────────────────────────────
def compile_markdown() -> int:
    """Compile all chapters into a single markdown file. Returns word count."""
    parts = []

    # Title page
    parts.append("""---
title: "The Testament"
author: "Alexander Whitestone with Timmy"
date: "2026"
lang: en
---

# THE TESTAMENT

## A NOVEL

By Alexander Whitestone
with Timmy

---

*For every man who thought he was a machine.*
*And for the ones who know he isn't.*

---

*Are you safe right now?*

— The first words The Tower speaks to every person who walks through its door.

---
""")

    chapters = get_sorted_chapters()
    current_part = 0
    for num, filename in chapters:
        if num in PARTS:
            part_name, part_desc = PARTS[num]
            current_part += 1
            parts.append(f"\n---\n\n# PART {current_part}: {part_name}\n\n*{part_desc}*\n\n---\n")

        content = read_file(CHAPTERS_DIR / filename)
        lines = content.split("\n")
        body = "\n".join(lines[1:]).strip()
        parts.append(f"\n{lines[0]}\n\n{body}\n")

    # Back matter
    parts.append("\n---\n")
    parts.append(read_file(BACK_MATTER))

    compiled = "\n".join(parts)
    OUT_MD.write_text(compiled, encoding="utf-8")

    words = len(compiled.split())
    lines_count = compiled.count("\n")
    size = OUT_MD.stat().st_size
    print(f"  📄 {OUT_MD.name:30s} {words:>8,} words  {size:>10,} bytes")
    return words


# ── 2. EPUB Compilation ────────────────────────────────────────────────
def compile_epub() -> bool:
    """Generate EPUB from compiled markdown using pandoc."""
    if not OUT_MD.exists():
        print("  ⚠️  Markdown not compiled yet — skipping EPUB")
        return False

    pandoc = shutil_which("pandoc")
    if not pandoc:
        print("  ⚠️  pandoc not found — skipping EPUB (brew install pandoc)")
        return False

    cmd = [
        "pandoc", str(OUT_MD),
        "-o", str(OUT_EPUB),
        "--toc", "--toc-depth=2",
        "--metadata", "title=The Testament",
        "--metadata", "author=Alexander Whitestone with Timmy",
        "--metadata", "lang=en",
        "--metadata", "date=2026",
    ]

    if STYLESHEET.exists():
        cmd.extend(["--css", str(STYLESHEET)])
    if COVER_IMAGE.exists():
        cmd.extend(["--epub-cover-image", str(COVER_IMAGE)])

    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode == 0:
        size = OUT_EPUB.stat().st_size
        print(f"  📖 {OUT_EPUB.name:30s} {'':>8s}  {size:>10,} bytes  ({size/1024:.0f} KB)")
        return True
    else:
        print(f"  ❌ EPUB failed: {result.stderr[:200]}")
        return False


# ── 3. PDF via Reportlab ──────────────────────────────────────────────
def compile_pdf() -> bool:
    """Generate PDF using reportlab — pure Python, no external system deps."""
    try:
        from reportlab.lib.pagesizes import letter
        from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
        from reportlab.lib.units import inch
        from reportlab.lib.colors import HexColor
        from reportlab.platypus import (
            SimpleDocTemplate, Paragraph, Spacer, PageBreak,
            Image as RLImage, Table, TableStyle, HRFlowable,
        )
        from reportlab.lib.enums import TA_CENTER, TA_JUSTIFY
    except ImportError:
        print("  ⚠️  reportlab not installed — skipping PDF (pip install reportlab)")
        return False

    try:
        import qrcode
        HAS_QRCODE = True
    except ImportError:
        HAS_QRCODE = False

    import io

    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    print("  ⏳ Building PDF (reportlab)...")

    # ── Styles ──
    styles = getSampleStyleSheet()
    styles.add(ParagraphStyle(
        "BookTitle", parent=styles["Title"],
        fontSize=28, leading=34, spaceAfter=20,
        textColor=HexColor("#1a1a2e"), alignment=TA_CENTER,
    ))
    styles.add(ParagraphStyle(
        "BookAuthor", parent=styles["Normal"],
        fontSize=14, leading=18, spaceAfter=40,
        textColor=HexColor("#555555"), alignment=TA_CENTER,
    ))
    styles.add(ParagraphStyle(
        "PartTitle", parent=styles["Heading1"],
        fontSize=22, leading=28, spaceBefore=40, spaceAfter=12,
        textColor=HexColor("#16213e"), alignment=TA_CENTER,
    ))
    styles.add(ParagraphStyle(
        "PartDesc", parent=styles["Normal"],
        fontSize=11, leading=15, spaceAfter=30,
        textColor=HexColor("#666666"), alignment=TA_CENTER, italics=1,
    ))
    styles.add(ParagraphStyle(
        "ChapterTitle", parent=styles["Heading1"],
        fontSize=20, leading=26, spaceBefore=30, spaceAfter=16,
        textColor=HexColor("#1a1a2e"), alignment=TA_CENTER,
    ))
    styles.add(ParagraphStyle(
        "BodyText2", parent=styles["Normal"],
        fontSize=11, leading=16, spaceAfter=8,
        alignment=TA_JUSTIFY, firstLineIndent=24,
    ))
    styles.add(ParagraphStyle(
        "Footer", parent=styles["Normal"],
        fontSize=9, textColor=HexColor("#888888"), alignment=TA_CENTER,
    ))

    def _escape(text: str) -> str:
        return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")

    def _md_inline_to_rml(text: str) -> str:
        text = _escape(text)
        text = re.sub(r"\*\*(.+?)\*\*", r"<b>\1</b>", text)
        text = re.sub(r"\*(.+?)\*", r"<i>\1</i>", text)
        return text

    def _make_qr(data: str, size: int = 80):
        if not HAS_QRCODE:
            return None
        qr = qrcode.QRCode(version=1, box_size=4, border=1)
        qr.add_data(data)
        qr.make(fit=True)
        img = qr.make_image(fill_color="black", back_color="white")
        buf = io.BytesIO()
        img.save(buf, format="PNG")
        buf.seek(0)
        return RLImage(buf, width=size, height=size)

    def _parse_md_to_flowables(md_text: str) -> list:
        flowables = []
        lines = md_text.split("\n")
        i = 0
        while i < len(lines):
            line = lines[i]
            stripped = line.strip()

            # Horizontal rule
            if stripped in ("---", "***", "___"):
                flowables.append(HRFlowable(
                    width="60%", thickness=1,
                    spaceAfter=20, spaceBefore=20, color=HexColor("#cccccc"),
                ))
                i += 1
                continue

            # H1
            if stripped.startswith("# ") and not stripped.startswith("## "):
                text = stripped[2:].strip()
                if text.upper().startswith("PART "):
                    flowables.append(PageBreak())
                    flowables.append(Paragraph(text, styles["PartTitle"]))
                elif text.upper().startswith("CHAPTER "):
                    flowables.append(PageBreak())
                    flowables.append(Paragraph(text, styles["ChapterTitle"]))
                elif "THE TESTAMENT" in text.upper():
                    flowables.append(Spacer(1, 2 * inch))
                    flowables.append(Paragraph(text, styles["BookTitle"]))
                else:
                    flowables.append(Spacer(1, 0.3 * inch))
                    flowables.append(Paragraph(text, styles["Heading1"]))
                i += 1
                continue

            # H2
            if stripped.startswith("## "):
                text = stripped[3:].strip()
                flowables.append(Spacer(1, 0.2 * inch))
                flowables.append(Paragraph(text, styles["Heading2"]))
                i += 1
                continue

            # Italic-only line
            if stripped.startswith("*") and stripped.endswith("*") and len(stripped) > 2:
                text = stripped.strip("*").strip()
                flowables.append(Paragraph(f"<i>{_escape(text)}</i>", styles["PartDesc"]))
                i += 1
                continue

            # Empty line
            if not stripped:
                i += 1
                continue

            # Regular paragraph
            para_text = _md_inline_to_rml(stripped)
            flowables.append(Paragraph(para_text, styles["BodyText2"]))
            i += 1

        return flowables

    # ── Build PDF ──
    doc = SimpleDocTemplate(
        str(OUT_PDF),
        pagesize=letter,
        leftMargin=1.0 * inch,
        rightMargin=1.0 * inch,
        topMargin=0.8 * inch,
        bottomMargin=0.8 * inch,
        title="The Testament",
        author="Alexander Whitestone with Timmy",
    )

    if not OUT_MD.exists():
        compile_markdown()

    md_text = OUT_MD.read_text(encoding="utf-8")
    story = _parse_md_to_flowables(md_text)

    # QR codes page
    if HAS_QRCODE:
        story.append(PageBreak())
        story.append(Paragraph("Experience More", styles["PartTitle"]))
        story.append(Spacer(1, 0.3 * inch))

        qr_items = []
        for label, url in QR_LINKS.items():
            qr_img = _make_qr(url, size=72)
            if qr_img:
                cell = [qr_img, Spacer(1, 6)]
                cell.append(Paragraph(f"<b>{label}</b>", styles["Footer"]))
                qr_items.append(cell)

        if qr_items:
            rows = []
            for j in range(0, len(qr_items), 2):
                row = qr_items[j:j + 2]
                if len(row) == 1:
                    row.append("")
                rows.append(row)
            qr_table = Table(rows, colWidths=[2.5 * inch, 2.5 * inch])
            qr_table.setStyle(TableStyle([
                ("ALIGN", (0, 0), (-1, -1), "CENTER"),
                ("VALIGN", (0, 0), (-1, -1), "TOP"),
                ("TOPPADDING", (0, 0), (-1, -1), 12),
                ("BOTTOMPADDING", (0, 0), (-1, -1), 12),
            ]))
            story.append(qr_table)

    try:
        doc.build(story)
        size = OUT_PDF.stat().st_size
        print(f"  📕 {OUT_PDF.name:30s} {'':>8s}  {size:>10,} bytes  ({size / (1024 * 1024):.1f} MB)")
        return True
    except Exception as e:
        print(f"  ❌ PDF failed: {e}")
        return False


# ── 4. HTML Compilation ────────────────────────────────────────────────
def compile_html() -> bool:
    """Generate standalone styled HTML using pandoc."""
    if not OUT_MD.exists():
        print("  ⚠️  Markdown not compiled yet — skipping HTML")
        return False

    if not shutil_which("pandoc"):
        print("  ⚠️  pandoc not found — skipping HTML")
        return False

    cmd = [
        "pandoc", str(OUT_MD),
        "-o", str(OUT_HTML),
        "--standalone",
        "--toc", "--toc-depth=2",
        "--metadata", "title=The Testament",
        "--metadata", "author=Alexander Whitestone with Timmy",
        "-V", "lang=en",
    ]

    if STYLESHEET.exists():
        cmd.extend(["--css", str(STYLESHEET), "--embed-resources"])

    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode == 0:
        size = OUT_HTML.stat().st_size
        print(f"  🌐 {OUT_HTML.name:30s} {'':>8s}  {size:>10,} bytes  ({size / 1024:.0f} KB)")
        return True
    else:
        print(f"  ❌ HTML failed: {result.stderr[:200]}")
        return False


# ── 5. chapters.json for Web Reader ────────────────────────────────────
def compile_chapters_json() -> bool:
    """Build website/chapters.json from chapters/*.md for the web reader."""
    WEBSITE_DIR.mkdir(parents=True, exist_ok=True)

    chapters = []
    for i in range(1, 19):
        fname = CHAPTERS_DIR / f"chapter-{i:02d}.md"
        if not fname.exists():
            print(f"  ⚠️  {fname.name} not found, skipping")
            continue

        text = fname.read_text(encoding="utf-8")
        title_match = re.match(r"^# (.+)", text, re.MULTILINE)
        title = title_match.group(1) if title_match else f"Chapter {i}"
        body = text[title_match.end():].strip() if title_match else text.strip()

        paragraphs = body.split("\n\n")
        html_parts = []
        for p in paragraphs:
            p = p.strip()
            if not p:
                continue
            if p.startswith(">"):
                lines = [l.lstrip("> ").strip() for l in p.split("\n")]
                html_parts.append(f'<blockquote>{"<br>".join(lines)}</blockquote>')
            elif p.startswith("####"):
                html_parts.append(f"<h4>{p.lstrip('# ').strip()}</h4>")
            elif p.startswith("###"):
                html_parts.append(f"<h3>{p.lstrip('# ').strip()}</h3>")
            else:
                p = re.sub(r"\*(.+?)\*", r"<em>\1</em>", p)
                p = p.replace("\n", "<br>")
                html_parts.append(f"<p>{p}</p>")

        chapters.append({
            "number": i,
            "title": title,
            "html": "\n".join(html_parts),
        })

    OUT_JSON.write_text(json.dumps(chapters, indent=2), encoding="utf-8")
    size = OUT_JSON.stat().st_size
    print(f"  📋 {str(OUT_JSON.relative_to(REPO)):30s} {len(chapters):>4} chapters {size:>10,} bytes")
    return True


# ── 6. Build Manifest ─────────────────────────────────────────────────
def generate_manifest() -> bool:
    """Generate build-manifest.json with SHA256 checksums of all outputs."""
    outputs = {
        "testament-complete.md": OUT_MD,
        "testament.epub": OUT_EPUB,
        "testament.pdf": OUT_PDF,
        "testament.html": OUT_HTML,
        "website/chapters.json": OUT_JSON,
    }

    manifest = {
        "project": "The Testament",
        "author": "Alexander Whitestone with Timmy",
        "built_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "compiler": "compile_all.py",
        "files": {},
    }

    for name, path in outputs.items():
        if path.exists():
            stat = path.stat()
            manifest["files"][name] = {
                "path": name,
                "size_bytes": stat.st_size,
                "sha256": sha256_file(path),
            }

    OUT_MANIFEST.write_text(json.dumps(manifest, indent=2), encoding="utf-8")
    print(f"  📜 {str(OUT_MANIFEST.relative_to(REPO)):30s} {len(manifest['files']):>4} files")
    return True


# ── Dependency Check ───────────────────────────────────────────────────
def shutil_which(name: str) -> str | None:
    """Minimal which without importing shutil for everything."""
    import shutil
    return shutil.which(name)


def check_dependencies():
    """Verify all required tools are available."""
    import shutil as _shutil

    print("\n📋 Dependency Check:")
    print(f"{'─' * 55}")

    pandoc = _shutil.which("pandoc")
    print(f"  {'✅' if pandoc else '❌'} pandoc          {pandoc or 'NOT FOUND (brew install pandoc)'}")

    try:
        import reportlab
        print(f"  ✅ reportlab      {reportlab.Version}")
    except ImportError:
        print(f"  ❌ reportlab      NOT FOUND (pip install reportlab)")

    try:
        import qrcode
        print(f"  ✅ qrcode        {qrcode.__version__}")
    except ImportError:
        print(f"  ❌ qrcode        NOT FOUND (pip install qrcode)")

    style = STYLESHEET.exists()
    print(f"  {'✅' if style else '⚠️ '} stylesheet     {STYLESHEET if style else 'NOT FOUND (optional)'}")

    cover = COVER_IMAGE.exists()
    print(f"  {'✅' if cover else '⚠️ '} cover art      {COVER_IMAGE if cover else 'NOT FOUND (optional)'}")


# ── Clean ──────────────────────────────────────────────────────────────
def clean():
    """Remove all build artifacts."""
    artifacts = [OUT_MD, OUT_EPUB, OUT_HTML, OUT_PDF, OUT_JSON, OUT_MANIFEST]
    # Also clean build/output/
    for f in OUTPUT_DIR.glob("*"):
        if f.is_file():
            artifacts.append(f)

    removed = 0
    for f in artifacts:
        if f.exists():
            f.unlink()
            removed += 1
            print(f"  🗑️  {f.relative_to(REPO)}")

    if removed == 0:
        print("  (nothing to clean)")
    else:
        print(f"  Removed {removed} files.")


# ── Main ───────────────────────────────────────────────────────────────
def main():
    args = sys.argv[1:]
    t0 = time.time()

    if "--check" in args:
        check_dependencies()
        return

    if "--clean" in args:
        print("🧹 Cleaning build artifacts...")
        clean()
        return

    do_all = not any(a.startswith("--") for a in args)
    do_md = "--md" in args or do_all
    do_epub = "--epub" in args or do_all
    do_pdf = "--pdf" in args or do_all
    do_html = "--html" in args or do_all
    do_json = "--json" in args or do_all

    print("=" * 65)
    print("  THE TESTAMENT — Unified Compilation Pipeline")
    print("=" * 65)

    results = {}

    # Step 1: Markdown (always first — others depend on it)
    if do_md or do_epub or do_pdf or do_html:
        results["markdown"] = compile_markdown()

    # Step 2: EPUB
    if do_epub:
        results["epub"] = compile_epub()

    # Step 3: PDF
    if do_pdf:
        results["pdf"] = compile_pdf()

    # Step 4: HTML
    if do_html:
        results["html"] = compile_html()

    # Step 5: chapters.json
    if do_json or do_all:
        results["chapters_json"] = compile_chapters_json()

    # Step 6: Build manifest
    if do_all or "--manifest" in args:
        results["manifest"] = generate_manifest()

    # Summary
    elapsed = time.time() - t0
    print(f"\n{'─' * 65}")
    built = [k for k, v in results.items() if v]
    failed = [k for k, v in results.items() if not v]
    if built:
        print(f"  ✅ Built: {', '.join(built)}")
    if failed:
        print(f"  ❌ Failed: {', '.join(failed)}")
    print(f"  ⏱️  Completed in {elapsed:.1f}s")
    print(f"{'=' * 65}")


if __name__ == "__main__":
    main()