#!/usr/bin/env python3 """Nightly runner for the codebase genome pipeline.""" from __future__ import annotations import argparse import json import os import subprocess import sys import urllib.request from pathlib import Path from typing import NamedTuple class RunPlan(NamedTuple): repo: dict repo_dir: Path output_path: Path command: list[str] def load_state(path: Path) -> dict: if not path.exists(): return {} return json.loads(path.read_text(encoding="utf-8")) def save_state(path: Path, state: dict) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(state, indent=2, sort_keys=True), encoding="utf-8") def select_next_repo(repos: list[dict], state: dict) -> dict: if not repos: raise ValueError("no repositories available for nightly genome run") ordered = sorted(repos, key=lambda item: item.get("full_name", item.get("name", "")).lower()) last_repo = state.get("last_repo") for index, repo in enumerate(ordered): if repo.get("name") == last_repo or repo.get("full_name") == last_repo: return ordered[(index + 1) % len(ordered)] last_index = int(state.get("last_index", -1)) return ordered[(last_index + 1) % len(ordered)] def build_run_plan(repo: dict, workspace_root: Path, output_root: Path, pipeline_script: Path) -> RunPlan: repo_dir = workspace_root / repo["name"] output_path = output_root / repo["name"] / "GENOME.md" command = [ sys.executable, str(pipeline_script), "--repo-root", str(repo_dir), "--repo-name", repo.get("full_name", repo["name"]), "--output", str(output_path), ] return RunPlan(repo=repo, repo_dir=repo_dir, output_path=output_path, command=command) def fetch_org_repos(org: str, host: str, token_file: Path, include_archived: bool = False) -> list[dict]: token = token_file.read_text(encoding="utf-8").strip() page = 1 repos: list[dict] = [] while True: req = urllib.request.Request( f"{host.rstrip('/')}/api/v1/orgs/{org}/repos?limit=100&page={page}", headers={"Authorization": f"token {token}", "Accept": "application/json"}, ) with urllib.request.urlopen(req, timeout=30) as resp: chunk = json.loads(resp.read().decode("utf-8")) if not chunk: break for item in chunk: if item.get("archived") and not include_archived: continue repos.append( { "name": item["name"], "full_name": item["full_name"], "clone_url": item["clone_url"], "default_branch": item.get("default_branch") or "main", } ) page += 1 return repos def _authenticated_clone_url(clone_url: str, token_file: Path) -> str: token = token_file.read_text(encoding="utf-8").strip() if clone_url.startswith("https://"): return f"https://{token}@{clone_url[len('https://') :]}" return clone_url def ensure_checkout(repo: dict, workspace_root: Path, token_file: Path) -> Path: workspace_root.mkdir(parents=True, exist_ok=True) repo_dir = workspace_root / repo["name"] branch = repo.get("default_branch") or "main" clone_url = _authenticated_clone_url(repo["clone_url"], token_file) if (repo_dir / ".git").exists(): subprocess.run(["git", "-C", str(repo_dir), "fetch", "origin", branch, "--depth", "1"], check=True) subprocess.run(["git", "-C", str(repo_dir), "checkout", branch], check=True) subprocess.run(["git", "-C", str(repo_dir), "reset", "--hard", f"origin/{branch}"], check=True) else: subprocess.run( ["git", "clone", "--depth", "1", "--single-branch", "--branch", branch, clone_url, str(repo_dir)], check=True, ) return repo_dir def run_plan(plan: RunPlan) -> None: plan.output_path.parent.mkdir(parents=True, exist_ok=True) subprocess.run(plan.command, check=True) def main() -> None: parser = argparse.ArgumentParser(description="Run one nightly codebase genome pass for the next repo in an org") parser.add_argument("--org", default="Timmy_Foundation") parser.add_argument("--host", default="https://forge.alexanderwhitestone.com") parser.add_argument("--token-file", default=os.path.expanduser("~/.config/gitea/token")) parser.add_argument("--workspace-root", default=os.path.expanduser("~/timmy-foundation-repos")) parser.add_argument("--output-root", default=os.path.expanduser("~/.timmy/codebase-genomes")) parser.add_argument("--state-path", default=os.path.expanduser("~/.timmy/codebase_genome_state.json")) parser.add_argument("--pipeline-script", default=str(Path(__file__).resolve().parents[1] / "pipelines" / "codebase_genome.py")) parser.add_argument("--include-archived", action="store_true") parser.add_argument("--dry-run", action="store_true") args = parser.parse_args() token_file = Path(args.token_file).expanduser() workspace_root = Path(args.workspace_root).expanduser() output_root = Path(args.output_root).expanduser() state_path = Path(args.state_path).expanduser() pipeline_script = Path(args.pipeline_script).expanduser() repos = fetch_org_repos(args.org, args.host, token_file, include_archived=args.include_archived) state = load_state(state_path) repo = select_next_repo(repos, state) plan = build_run_plan(repo, workspace_root=workspace_root, output_root=output_root, pipeline_script=pipeline_script) if args.dry_run: print( json.dumps( { "repo": repo, "repo_dir": str(plan.repo_dir), "output_path": str(plan.output_path), "command": plan.command, }, indent=2, ) ) return ensure_checkout(repo, workspace_root=workspace_root, token_file=token_file) run_plan(plan) save_state( state_path, { "last_index": sorted(repos, key=lambda item: item.get("full_name", item.get("name", "")).lower()).index(repo), "last_repo": repo.get("name"), }, ) print(f"Completed genome run for {repo['full_name']} -> {plan.output_path}") if __name__ == "__main__": main()