timmy-home/scripts/know_thy_father/epic_pipeline.py

#!/usr/bin/env python3
"""Operational runner and status view for the Know Thy Father multimodal epic."""

import argparse
import json
from pathlib import Path
from subprocess import run


PHASES = [
    {
        "id": "phase1_media_indexing",
        "name": "Phase 1 — Media Indexing",
        "script": "scripts/know_thy_father/index_media.py",
        "command_template": "python3 scripts/know_thy_father/index_media.py --tweets twitter-archive/extracted/tweets.jsonl --output twitter-archive/know-thy-father/media_manifest.jsonl",
        "outputs": ["twitter-archive/know-thy-father/media_manifest.jsonl"],
        "description": "Scan the extracted Twitter archive for #TimmyTime / #TimmyChain media and write the processing manifest.",
    },
    {
        "id": "phase2_multimodal_analysis",
        "name": "Phase 2 — Multimodal Analysis",
        "script": "scripts/twitter_archive/analyze_media.py",
        "command_template": "python3 scripts/twitter_archive/analyze_media.py --batch {batch_size}",
        "outputs": [
            "twitter-archive/know-thy-father/analysis.jsonl",
            "twitter-archive/know-thy-father/meaning-kernels.jsonl",
            "twitter-archive/know-thy-father/pipeline-status.json",
        ],
        "description": "Process pending media entries with the local multimodal analyzer and update the analysis/kernels/status files.",
    },
    {
        "id": "phase3_holographic_synthesis",
        "name": "Phase 3 — Holographic Synthesis",
        "script": "scripts/know_thy_father/synthesize_kernels.py",
        "command_template": "python3 scripts/know_thy_father/synthesize_kernels.py --input twitter-archive/media/manifest.jsonl --output twitter-archive/knowledge/fathers_ledger.jsonl --summary twitter-archive/knowledge/fathers_ledger.summary.json",
        "outputs": [
            "twitter-archive/knowledge/fathers_ledger.jsonl",
            "twitter-archive/knowledge/fathers_ledger.summary.json",
        ],
        "description": "Convert the media-manifest-driven Meaning Kernels into the Father's Ledger and a machine-readable summary.",
    },
    {
        "id": "phase4_cross_reference_audit",
        "name": "Phase 4 — Cross-Reference Audit",
        "script": "scripts/know_thy_father/crossref_audit.py",
        "command_template": "python3 scripts/know_thy_father/crossref_audit.py --soul SOUL.md --kernels twitter-archive/notes/know_thy_father_crossref.md --output twitter-archive/notes/crossref_report.md",
        "outputs": ["twitter-archive/notes/crossref_report.md"],
        "description": "Compare Know Thy Father kernels against SOUL.md and related canon, then emit a Markdown audit report.",
    },
    {
        "id": "phase5_processing_log",
        "name": "Phase 5 — Processing Log / Status",
        "script": "twitter-archive/know-thy-father/tracker.py",
        "command_template": "python3 twitter-archive/know-thy-father/tracker.py report",
        "outputs": ["twitter-archive/know-thy-father/REPORT.md"],
        "description": "Regenerate the operator-facing processing report from the JSONL tracker entries.",
    },
]


def build_pipeline_plan(batch_size: int = 10):
    plan = []
    for phase in PHASES:
        plan.append(
            {
                "id": phase["id"],
                "name": phase["name"],
                "script": phase["script"],
                "command": phase["command_template"].format(batch_size=batch_size),
                "outputs": list(phase["outputs"]),
                "description": phase["description"],
            }
        )
    return plan


def build_status_snapshot(repo_root: Path):
    snapshot = {}
    for phase in build_pipeline_plan():
        script_path = repo_root / phase["script"]
        snapshot[phase["id"]] = {
            "name": phase["name"],
            "script": phase["script"],
            "script_exists": script_path.exists(),
            "outputs": [
                {
                    "path": output,
                    "exists": (repo_root / output).exists(),
                }
                for output in phase["outputs"]
            ],
        }
    return snapshot


def run_step(repo_root: Path, step_id: str, batch_size: int = 10):
    plan = {step["id"]: step for step in build_pipeline_plan(batch_size=batch_size)}
    if step_id not in plan:
        raise SystemExit(f"Unknown step: {step_id}")
    step = plan[step_id]
    return run(step["command"], cwd=repo_root, shell=True, check=False)


def main():
    parser = argparse.ArgumentParser(description="Know Thy Father epic orchestration helper")
    parser.add_argument("--batch-size", type=int, default=10)
    parser.add_argument("--status", action="store_true")
    parser.add_argument("--run-step", default=None)
    parser.add_argument("--json", action="store_true")
    args = parser.parse_args()

    repo_root = Path(__file__).resolve().parents[2]

    if args.run_step:
        result = run_step(repo_root, args.run_step, batch_size=args.batch_size)
        raise SystemExit(result.returncode)

    payload = build_status_snapshot(repo_root) if args.status else build_pipeline_plan(batch_size=args.batch_size)
    if args.json or args.status:
        print(json.dumps(payload, indent=2))
    else:
        for step in payload:
            print(f"[{step['id']}] {step['command']}")


if __name__ == "__main__":
    main()