From 948d520b83afa387a13ce07b2409b8f3eecc27ce Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Fri, 10 Apr 2026 19:57:27 -0400 Subject: [PATCH] burn: add chapter validation to build pipeline (closes #24) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add validate_chapters() function that checks: - No empty chapter files (whitespace-only counts as empty) - Every chapter starts with an H1 header (# Chapter N — Title) - No gaps in chapter numbering (sequential from 1) - No duplicate chapter numbers - Header chapter number matches filename number - Warns on suspiciously short chapters (<50 words) Validation runs automatically before compilation. If errors are found, compilation is aborted with clear error messages showing exactly what to fix. CLI flags: python3 compile.py --validate # validate only python3 compile.py --no-validate # skip validation python3 compile.py # validate then compile --- compile.py | 164 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 152 insertions(+), 12 deletions(-) diff --git a/compile.py b/compile.py index 2d68273..de9bdb2 100644 --- a/compile.py +++ b/compile.py @@ -8,12 +8,15 @@ Uses chapters, front matter, back matter, and references illustrations. Requirements: pip install markdown weasyprint (or use pandoc) Usage: - python3 compile.py # generates testament-complete.md + python3 compile.py # validate then compile + python3 compile.py --validate # validate only, no compile + python3 compile.py --no-validate # skip validation, compile directly pandoc testament-complete.md -o testament.pdf --pdf-engine=weasyprint """ import os import re +import sys BASE = os.path.dirname(os.path.abspath(__file__)) CHAPTERS_DIR = os.path.join(BASE, "chapters") @@ -28,17 +31,147 @@ PARTS = { 11: ("THE LIGHT", "Thomas at the door. The network. The story breaks. The green light."), } + def read_file(path): with open(path, 'r') as f: return f.read() + def get_chapter_number(filename): match = re.search(r'chapter-(\d+)', filename) return int(match.group(1)) if match else 0 -def compile(): + +def validate_chapters(chapters_dir=CHAPTERS_DIR): + """Validate chapter files before compilation. + + Checks: + - No empty chapter files (whitespace-only counts as empty) + - Every chapter starts with an H1 header (# Title) + - No gaps in chapter numbering (sequential from 1) + - No duplicate chapter numbers + + Returns: + (is_valid, errors) where errors is a list of human-readable strings. + """ + errors = [] + warnings = [] + + if not os.path.isdir(chapters_dir): + errors.append(f"Chapters directory not found: {chapters_dir}") + return False, errors + + # Collect chapter files + chapter_files = [] + for f in sorted(os.listdir(chapters_dir)): + if f.startswith("chapter-") and f.endswith(".md"): + num = get_chapter_number(f) + chapter_files.append((num, f)) + + if not chapter_files: + errors.append("No chapter files found in chapters/ directory") + return False, errors + + chapter_files.sort() + + # Check for duplicates + seen_numbers = {} + for num, filename in chapter_files: + if num in seen_numbers: + errors.append( + f"Duplicate chapter number {num}: {filename} and {seen_numbers[num]}" + ) + seen_numbers[num] = filename + + # Check for gaps in numbering + if chapter_files: + expected = list(range(1, chapter_files[-1][0] + 1)) + found = [num for num, _ in chapter_files] + missing = sorted(set(expected) - set(found)) + if missing: + errors.append( + f"Missing chapter(s): {', '.join(str(n) for n in missing)}" + ) + + # Validate individual chapter files + for num, filename in chapter_files: + filepath = os.path.join(chapters_dir, filename) + + # Check file is not empty + try: + content = read_file(filepath) + except Exception as e: + errors.append(f"{filename}: cannot read — {e}") + continue + + if not content.strip(): + errors.append(f"{filename}: file is empty") + continue + + # Check word count (warn if suspiciously short) + word_count = len(content.split()) + if word_count < 50: + warnings.append( + f"{filename}: only {word_count} words (possible truncation)" + ) + + # Check starts with H1 header + first_line = content.strip().split('\n')[0] + if not first_line.startswith('# '): + errors.append( + f"{filename}: missing H1 header — " + f"expected '# Chapter {num} — Title', got '{first_line[:60]}'" + ) + else: + # Verify the H1 matches expected chapter number + header_match = re.match(r'^#\s+Chapter\s+(\d+)', first_line) + if header_match: + header_num = int(header_match.group(1)) + if header_num != num: + errors.append( + f"{filename}: header says Chapter {header_num} " + f"but filename says Chapter {num}" + ) + else: + warnings.append( + f"{filename}: H1 header doesn't follow " + f"'# Chapter N — Title' pattern: '{first_line[:60]}'" + ) + + # Report + valid = len(errors) == 0 + + if warnings: + print(f"Validation: {len(warnings)} warning(s)") + for w in warnings: + print(f" ⚠ {w}") + + if errors: + print(f"Validation: FAILED — {len(errors)} error(s)") + for e in errors: + print(f" ✗ {e}") + else: + print( + f"Validation: PASSED — {len(chapter_files)} chapters, " + f"chapters {chapter_files[0][0]}–{chapter_files[-1][0]}" + ) + + return valid, errors + + +def compile(skip_validation=False): + """Compile all chapters into a single markdown file.""" + + # Pre-compilation validation + if not skip_validation: + valid, errors = validate_chapters() + if not valid: + print("\nCompilation aborted. Fix the errors above and try again.") + sys.exit(1) + print() + output = [] - + # Title page output.append("""--- title: "The Testament" @@ -66,7 +199,7 @@ with Timmy --- """) - + # Get all chapters sorted chapters = [] for f in os.listdir(CHAPTERS_DIR): @@ -74,7 +207,7 @@ with Timmy num = get_chapter_number(f) chapters.append((num, f)) chapters.sort() - + current_part = 0 for num, filename in chapters: # Insert part divider if needed @@ -82,28 +215,28 @@ with Timmy part_name, part_desc = PARTS[num] current_part += 1 output.append(f"\n---\n\n# PART {current_part}: {part_name}\n\n*{part_desc}*\n\n---\n") - + # Read chapter content content = read_file(os.path.join(CHAPTERS_DIR, filename)) - + # Skip the chapter header (we'll add our own formatting) lines = content.split('\n') body = '\n'.join(lines[1:]).strip() # Skip "# Chapter X — Title" - + # Add chapter output.append(f"\n{lines[0]}\n\n{body}\n") - + # Back matter output.append("\n---\n") back = read_file(BACK_MATTER) # Clean up the back matter for print output.append(back) - + # Write compiled markdown compiled = '\n'.join(output) with open(OUTPUT, 'w') as f: f.write(compiled) - + # Stats words = len(compiled.split()) lines_count = compiled.count('\n') @@ -116,5 +249,12 @@ with Timmy print(f" # or") print(f" pandoc {OUTPUT} -o testament.epub --epub-cover-image=cover-art.jpg") + if __name__ == "__main__": - compile() + if "--validate" in sys.argv: + valid, _ = validate_chapters() + sys.exit(0 if valid else 1) + elif "--no-validate" in sys.argv: + compile(skip_validation=True) + else: + compile()