alexanderwhitestone.com/scripts/build.py

#!/usr/bin/env python3
"""
Build script for The Scrolls — generates blog pages and RSS feed from
markdown posts in blog/posts/.

Each post is a markdown file with YAML frontmatter:
  ---
  title: "Post Title"
  date: 2026-03-18
  ---
  Body content here.

Generates:
  - blog/index.html with linked post listing
  - blog/posts/<slug>.html for each post
  - blog/feed.xml Atom feed
"""

import html
import json
import os
import re
from datetime import datetime, timezone
from pathlib import Path

SITE_URL = "https://alexanderwhitestone.com"
ROOT_DIR = Path(__file__).parent.parent
BLOG_DIR = ROOT_DIR / "blog"
POSTS_DIR = BLOG_DIR / "posts"
HEALTH_DIR = ROOT_DIR / "api" / "health"

PAGE_STYLE = """\
    * { margin: 0; padding: 0; box-sizing: border-box; }
    body {
      max-width: 640px; margin: 0 auto; padding: 2rem 1rem;
      background: #0a0a0f; color: #e0d8c8;
      font-family: Georgia, serif; line-height: 1.6;
    }
    header { margin-bottom: 3rem; border-bottom: 1px solid #2a2520; padding-bottom: 1rem; }
    header h1 { font-size: 1.2rem; font-weight: normal; letter-spacing: 0.1em; }
    header nav { margin-top: 0.5rem; font-size: 0.8rem; }
    header a, a { color: #8a7f6a; text-decoration: none; }
    header a:hover, a:hover { color: #e0d8c8; }
    .posts { list-style: none; }
    .posts li { margin-bottom: 1.5rem; padding-bottom: 1.5rem; border-bottom: 1px solid #1a1510; }
    .posts .date { font-size: 0.8rem; color: #6a6050; display: block; }
    .posts a { color: #e0d8c8; text-decoration: none; }
    .posts a:hover { color: #fff; }
    .post-date { font-size: 0.85rem; color: #6a6050; margin-bottom: 2rem; display: block; }
    .post-body p { margin-bottom: 1.2rem; }
    .post-body h2 { font-size: 1.1rem; margin: 2rem 0 1rem; color: #c0b8a8; }
    .post-body h3 { font-size: 1rem; margin: 1.5rem 0 0.8rem; color: #a09888; }
    .post-body blockquote { border-left: 2px solid #2a2520; padding-left: 1rem; color: #a09888; margin-bottom: 1.2rem; }
    .post-body code { background: #1a1510; padding: 0.15em 0.4em; font-size: 0.9em; }
    .post-body pre { background: #1a1510; padding: 1rem; overflow-x: auto; margin-bottom: 1.2rem; }
    .post-body pre code { background: none; padding: 0; }
    .post-body em { font-style: italic; }
    .post-body strong { color: #fff; }
"""


def parse_frontmatter(text):
    """Extract YAML frontmatter from markdown text."""
    match = re.match(r"^---\s*\n(.*?)\n---\s*\n(.*)", text, re.DOTALL)
    if not match:
        return {}, text

    meta = {}
    for line in match.group(1).strip().split("\n"):
        if ":" in line:
            key, val = line.split(":", 1)
            meta[key.strip()] = val.strip().strip('"').strip("'")
    return meta, match.group(2).strip()


def md_to_html(text):
    """Minimal markdown to HTML — paragraphs, headers, bold, italic, code, links, blockquotes."""
    lines = text.split("\n")
    out = []
    in_code_block = False
    in_blockquote = False
    paragraph = []

    def flush_paragraph():
        if paragraph:
            content = "\n".join(paragraph)
            content = inline_format(content)
            out.append(f"<p>{content}</p>")
            paragraph.clear()

    def inline_format(s):
        # Code spans first (protect from other formatting)
        s = re.sub(r"`([^`]+)`", r"<code>\1</code>", s)
        # Bold
        s = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", s)
        # Italic
        s = re.sub(r"\*(.+?)\*", r"<em>\1</em>", s)
        # Links
        s = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r'<a href="\2">\1</a>', s)
        return s

    for line in lines:
        # Fenced code blocks
        if line.strip().startswith("```"):
            if in_code_block:
                out.append("</code></pre>")
                in_code_block = False
            else:
                flush_paragraph()
                out.append("<pre><code>")
                in_code_block = True
            continue

        if in_code_block:
            out.append(html.escape(line))
            continue

        stripped = line.strip()

        # Empty line — flush paragraph
        if not stripped:
            flush_paragraph()
            if in_blockquote:
                out.append("</blockquote>")
                in_blockquote = False
            continue

        # Headers
        header_match = re.match(r"^(#{1,3})\s+(.+)$", stripped)
        if header_match:
            flush_paragraph()
            level = len(header_match.group(1)) + 1  # h2, h3, h4 (h1 is title)
            content = inline_format(header_match.group(2))
            out.append(f"<h{level}>{content}</h{level}>")
            continue

        # Blockquotes
        if stripped.startswith("> "):
            flush_paragraph()
            content = inline_format(stripped[2:])
            if not in_blockquote:
                out.append("<blockquote>")
                in_blockquote = True
            out.append(f"<p>{content}</p>")
            continue

        # Regular text — accumulate paragraph
        paragraph.append(stripped)

    flush_paragraph()
    if in_blockquote:
        out.append("</blockquote>")
    if in_code_block:
        out.append("</code></pre>")

    return "\n".join(out)


def load_posts():
    """Load and sort all posts by date (newest first)."""
    posts = []
    if not POSTS_DIR.exists():
        return posts

    for path in sorted(POSTS_DIR.glob("*.md"), reverse=True):
        text = path.read_text()
        meta, body = parse_frontmatter(text)
        if meta.get("title") and meta.get("date"):
            posts.append(
                {
                    "title": meta["title"],
                    "date": meta["date"],
                    "slug": path.stem,
                    "body": body,
                    "path": path,
                }
            )
    return posts


def generate_post_page(post):
    """Generate an individual post HTML page."""
    body_html = md_to_html(post["body"])

    page = f"""<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>{html.escape(post["title"])} — The Scrolls</title>
  <link rel="alternate" type="application/rss+xml" title="The Scrolls" href="/blog/feed.xml">
  <style>
{PAGE_STYLE}
  </style>
</head>
<body>
  <header>
    <h1>{html.escape(post["title"])}</h1>
    <nav><a href="/blog/">← The Scrolls</a> · <a href="/">The Tower</a> · <a href="/blog/feed.xml">RSS</a></nav>
  </header>
  <main>
    <span class="post-date">{post["date"]}</span>
    <article class="post-body">
{body_html}
    </article>
  </main>
</body>
</html>"""

    out_path = POSTS_DIR / f"{post['slug']}.html"
    out_path.write_text(page)
    return out_path


def generate_index(posts):
    """Generate blog/index.html with linked post listing."""
    if not posts:
        items_html = '      <li><span class="date">—</span><span>No scrolls yet.</span></li>'
    else:
        items = []
        for p in posts:
            escaped_title = html.escape(p["title"])
            items.append(
                f'      <li>\n'
                f'        <span class="date">{p["date"]}</span>\n'
                f'        <a href="/blog/posts/{p["slug"]}.html">{escaped_title}</a>\n'
                f'      </li>'
            )
        items_html = "\n".join(items)

    page = f"""<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>The Scrolls — The Wizard's Tower</title>
  <link rel="alternate" type="application/rss+xml" title="The Scrolls" href="/blog/feed.xml">
  <style>
{PAGE_STYLE}
  </style>
</head>
<body>
  <header>
    <h1>The Scrolls</h1>
    <nav><a href="/">← The Tower</a> · <a href="/blog/feed.xml">RSS</a></nav>
  </header>
  <main>
    <ul class="posts">
{items_html}
    </ul>
  </main>
</body>
</html>"""

    (BLOG_DIR / "index.html").write_text(page)
    print(f"  Generated index with {len(posts)} post(s).")


def generate_feed(posts):
    """Generate blog/feed.xml Atom feed."""
    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

    entries = []
    for p in posts[:20]:  # Cap at 20 entries
        body_html = md_to_html(p["body"])
        escaped_title = html.escape(p["title"])
        entries.append(
            f"""  <entry>
    <title>{escaped_title}</title>
    <link href="{SITE_URL}/blog/posts/{p["slug"]}.html"/>
    <id>{SITE_URL}/blog/posts/{p["slug"]}</id>
    <updated>{p["date"]}T00:00:00Z</updated>
    <content type="html"><![CDATA[{body_html}]]></content>
  </entry>"""
        )

    entry_block = "\n".join(entries)

    feed = f"""<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <title>The Scrolls — Alexander Whitestone</title>
  <link href="{SITE_URL}/blog/"/>
  <link rel="self" href="{SITE_URL}/blog/feed.xml"/>
  <id>{SITE_URL}/blog/</id>
  <updated>{now}</updated>
  <author>
    <name>Alexander Whitestone</name>
  </author>
  <subtitle>Words from the Wizard's Tower</subtitle>
{entry_block}
</feed>"""

    (BLOG_DIR / "feed.xml").write_text(feed)
    print(f"  Generated feed with {len(entries)} entry/entries.")


def generate_health():
    """Generate api/health/index.json with build-time metadata."""
    HEALTH_DIR.mkdir(parents=True, exist_ok=True)
    health = {
        "status": "ok",
        "services": {
            "api": True,
            "agent_loop": False,
            "websocket": False,
        },
        "uptime": None,
        "version": datetime.now(timezone.utc).strftime("%Y%m%d.%H%M%S"),
    }
    (HEALTH_DIR / "index.json").write_text(json.dumps(health, indent=2) + "\n")
    print("  Generated api/health endpoint.")


def main():
    print("Building The Scrolls...")
    posts = load_posts()
    for p in posts:
        out = generate_post_page(p)
        print(f"  Built: {out.relative_to(BLOG_DIR.parent)}")
    generate_index(posts)
    generate_feed(posts)
    generate_health()
    print("Build complete.")


if __name__ == "__main__":
    main()