diff --git a/docs/CODEBASE_GENOME_PIPELINE.md b/docs/CODEBASE_GENOME_PIPELINE.md index f73cb6d..ec7edea 100644 --- a/docs/CODEBASE_GENOME_PIPELINE.md +++ b/docs/CODEBASE_GENOME_PIPELINE.md @@ -9,6 +9,7 @@ This pipeline gives Timmy a repeatable way to generate a deterministic `GENOME.m - `pipelines/codebase_genome.py` — static analyzer that writes `GENOME.md` - `pipelines/codebase-genome.py` — thin CLI wrapper matching the expected pipeline-style entrypoint - `scripts/codebase_genome_nightly.py` — org-aware nightly runner that selects the next repo, updates a local checkout, and writes the genome artifact +- `scripts/codebase_genome_status.py` — rollup/status reporter for artifact coverage, duplicate paths, and next uncovered repo - `GENOME.md` — generated analysis for `timmy-home` itself ## Genome output diff --git a/scripts/codebase_genome_status.py b/scripts/codebase_genome_status.py new file mode 100644 index 0000000..8664c81 --- /dev/null +++ b/scripts/codebase_genome_status.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +"""Status/reporting helper for the codebase genome pipeline. + +This lands a parent-epic slice for timmy-home #665 by making the current genome +coverage across repos inspectable: which repos have artifacts, which have tests, +what duplicates exist, and which repo is still uncovered next. +""" + +from __future__ import annotations + +import argparse +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Iterable +import urllib.request + + +def artifact_repo_name(path: Path, host_repo_name: str = 'timmy-home') -> str | None: + normalized = path.as_posix() + name = path.name + if normalized == 'GENOME.md': + return host_repo_name + if path.parts[:1] == ('genomes',) and name == 'GENOME.md' and len(path.parts) == 3: + return path.parts[1] + if path.parts[:1] == ('genomes',) and name.endswith('-GENOME.md'): + return name[:-len('-GENOME.md')] + if path.parent == Path('.') and name.startswith('GENOME-') and name.endswith('.md'): + return name[len('GENOME-'):-len('.md')] + if path.parent == Path('.') and name.endswith('-GENOME.md'): + return name[:-len('-GENOME.md')] + return None + + +def test_repo_name(path: Path, host_repo_name: str = 'timmy-home') -> str | None: + if path.name == 'test_codebase_genome_pipeline.py': + return host_repo_name + stem = path.stem + if not stem.startswith('test_') or not stem.endswith('_genome'): + return None + middle = stem[len('test_'):-len('_genome')] + return middle.replace('_', '-') if middle else None + + +def scan_artifacts(repo_root: Path, host_repo_name: str = 'timmy-home') -> dict[str, list[str]]: + artifacts: dict[str, list[str]] = {} + for path in sorted(repo_root.rglob('*.md')): + rel = path.relative_to(repo_root) + repo_name = artifact_repo_name(rel, host_repo_name=host_repo_name) + if repo_name is None: + continue + artifacts.setdefault(repo_name, []).append(rel.as_posix()) + return artifacts + + +def scan_tests(repo_root: Path, host_repo_name: str = 'timmy-home') -> set[str]: + tests = set() + tests_root = repo_root / 'tests' + if not tests_root.exists(): + return tests + for path in sorted(tests_root.rglob('test_*.py')): + repo_name = test_repo_name(path.relative_to(repo_root), host_repo_name=host_repo_name) + if repo_name: + tests.add(repo_name) + return tests + + +def build_status_summary( + *, + repo_root: str | Path, + expected_repos: Iterable[str], + state: dict | None = None, + host_repo_name: str = 'timmy-home', +) -> dict: + root = Path(repo_root) + expected = list(expected_repos) + artifacts = scan_artifacts(root, host_repo_name=host_repo_name) + tested_repos = scan_tests(root, host_repo_name=host_repo_name) + + coverage = {} + duplicates = {} + for repo in sorted(artifacts): + paths = artifacts[repo] + coverage[repo] = { + 'artifact_paths': paths, + 'has_test': repo in tested_repos, + } + if len(paths) > 1: + duplicates[repo] = paths + + missing_repos = [repo for repo in expected if repo not in artifacts] + next_uncovered_repo = missing_repos[0] if missing_repos else None + + return { + 'generated_at': datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + 'total_expected_repos': len(expected), + 'artifact_count': len(artifacts), + 'tested_artifact_count': sum(1 for repo in artifacts if repo in tested_repos), + 'last_repo': (state or {}).get('last_repo'), + 'next_uncovered_repo': next_uncovered_repo, + 'missing_repos': missing_repos, + 'duplicates': duplicates, + 'artifacts': coverage, + } + + +def render_markdown(summary: dict) -> str: + lines = [ + '# Codebase Genome Status', + '', + f"Generated: {summary['generated_at']}", + '', + '## Summary', + '', + f"- expected repos: {summary['total_expected_repos']}", + f"- repos with genome artifacts: {summary['artifact_count']}", + f"- repos with genome tests: {summary['tested_artifact_count']}", + ] + if summary.get('last_repo'): + lines.append(f"- last repo processed by nightly rotation: {summary['last_repo']}") + if summary.get('next_uncovered_repo'): + lines.append(f"- next uncovered repo: {summary['next_uncovered_repo']}") + + lines += [ + '', + '## Coverage Matrix', + '', + '| Repo | Artifact Paths | Test? |', + '|------|----------------|-------|', + ] + for repo, data in summary['artifacts'].items(): + artifact_paths = '
'.join(data['artifact_paths']) + has_test = 'yes' if data['has_test'] else 'no' + lines.append(f'| `{repo}` | `{artifact_paths}` | {has_test} |') + + lines += ['', '## Missing Repo Artifacts', ''] + if summary['missing_repos']: + for repo in summary['missing_repos']: + lines.append(f'- `{repo}`') + else: + lines.append('- none') + + lines += ['', '## Duplicate Artifact Paths', ''] + if summary['duplicates']: + for repo, paths in summary['duplicates'].items(): + lines.append(f'- `{repo}`') + for path in paths: + lines.append(f' - `{path}`') + else: + lines.append('- none') + + return '\n'.join(lines) + '\n' + + +def load_state(path: str | Path | None) -> dict: + if not path: + return {} + state_path = Path(path).expanduser() + if not state_path.exists(): + return {} + return json.loads(state_path.read_text(encoding='utf-8')) + + +def fetch_org_repo_names(org: str, host: str, token_file: str | Path, *, include_archived: bool = False) -> list[str]: + token = Path(token_file).expanduser().read_text(encoding='utf-8').strip() + headers = {'Authorization': f'token {token}', 'Accept': 'application/json'} + repos = [] + page = 1 + while True: + req = urllib.request.Request( + f"{host.rstrip('/')}/api/v1/orgs/{org}/repos?limit=100&page={page}", + headers=headers, + ) + with urllib.request.urlopen(req, timeout=30) as resp: + batch = json.loads(resp.read().decode('utf-8')) + if not batch: + break + for repo in batch: + if repo.get('archived') and not include_archived: + continue + name = repo['name'] + if name.startswith('.'): + continue + repos.append(name) + if len(batch) < 100: + break + page += 1 + return sorted(set(repos)) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description='Summarize codebase genome coverage across repos') + parser.add_argument('--repo-root', default='.', help='timmy-home repo root') + parser.add_argument('--expected-repos-json', help='JSON array of expected repo names') + parser.add_argument('--org', default='Timmy_Foundation', help='Gitea org to inspect when expected repos are not provided') + parser.add_argument('--host', default='https://forge.alexanderwhitestone.com', help='Gitea host') + parser.add_argument('--token-file', default='~/.config/gitea/token', help='Gitea token file for org scan fallback') + parser.add_argument('--include-archived', action='store_true', help='Include archived repos in org scan fallback') + parser.add_argument('--state-path', default='~/.timmy/codebase_genome_state.json') + parser.add_argument('--output', help='Optional markdown output path') + args = parser.parse_args(argv) + + if args.expected_repos_json: + expected = json.loads(args.expected_repos_json) + else: + expected = fetch_org_repo_names(args.org, args.host, args.token_file, include_archived=args.include_archived) + summary = build_status_summary(repo_root=args.repo_root, expected_repos=expected, state=load_state(args.state_path)) + rendered = render_markdown(summary) + print(rendered) + if args.output: + out = Path(args.output) + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(rendered, encoding='utf-8') + return 0 + + +if __name__ == '__main__': + raise SystemExit(main())