Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
b8b8bb65fd feat: GENOME.md template + single-repo analyzer (#666)
Some checks failed
Agent PR Gate / gate (pull_request) Has been cancelled
Agent PR Gate / report (pull_request) Has been cancelled
Self-Healing Smoke / self-healing-smoke (pull_request) Has been cancelled
Smoke Test / smoke (pull_request) Has been cancelled
Template and analyzer for the Codebase Genome batch (issues #667-683).

templates/GENOME-template.md:
  Reusable template with placeholders for repo name, overview,
  architecture, entry points, data flow, abstractions, API surface,
  test coverage, security, and design decisions.

scripts/genome_analyzer.py:
  Auto-generates GENOME.md skeleton from a codebase scan:
  - File counts by extension
  - Directory structure (depth 2)
  - Entry point detection (main.py, scripts/, Makefile, etc.)
  - Test file enumeration
  - README first-paragraph extraction

Usage:
  python3 scripts/genome_analyzer.py /path/to/repo --output GENOME.md
  python3 scripts/genome_analyzer.py /path/to/repo --dry-run
2026-04-16 01:19:41 -04:00
5 changed files with 217 additions and 285 deletions

View File

@@ -43,18 +43,6 @@ Override at runtime if needed:
### 1. `scripts/verify_big_brain.py`
Checks the configured provider using the right protocol for the chosen backend.
### 1b. `scripts/timmy_gemma4_mac.py`
Timmy-specific prove-it helper for Mac Hermes.
Refs #543.
What it adds beyond the generic verifier:
- targets the root config.yaml used by Timmy's Mac Hermes
- reports whether RunPod / Vertex credential files are present without leaking them
- derives a RunPod `/v1` endpoint from a pod id when supplied
- previews the Big Brain provider config update for Timmy
- emits the exact Hermes chat probe command to run once a live endpoint exists
- only spends money if `--apply-runpod` is explicitly passed
For `openai` backends it verifies:
- `GET /models`
- `POST /chat/completions`

171
scripts/genome_analyzer.py Executable file
View File

@@ -0,0 +1,171 @@
#!/usr/bin/env python3
"""
genome_analyzer.py — Generate a GENOME.md from a codebase.
Scans a repository and produces a structured codebase genome with:
- File counts by type
- Architecture overview (directory structure)
- Entry points
- Test coverage summary
Usage:
python3 scripts/genome_analyzer.py /path/to/repo
python3 scripts/genome_analyzer.py /path/to/repo --output GENOME.md
python3 scripts/genome_analyzer.py /path/to/repo --dry-run
Part of #666: GENOME.md Template + Single-Repo Analyzer.
"""
import argparse
import sys
from collections import defaultdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Tuple
SKIP_DIRS = {".git", "__pycache__", ".venv", "venv", "node_modules", ".tox", ".pytest_cache", ".DS_Store"}
def count_files(repo_path: Path) -> Dict[str, int]:
counts = defaultdict(int)
for f in repo_path.rglob("*"):
if any(part in SKIP_DIRS for part in f.parts):
continue
if f.is_file():
ext = f.suffix or "(no ext)"
counts[ext] += 1
return dict(sorted(counts.items(), key=lambda x: -x[1]))
def find_entry_points(repo_path: Path) -> List[str]:
entry_points = []
candidates = [
"main.py", "app.py", "server.py", "cli.py", "manage.py",
"index.html", "index.js", "index.ts",
"Makefile", "Dockerfile", "docker-compose.yml",
"README.md", "deploy.sh", "setup.py", "pyproject.toml",
]
for name in candidates:
if (repo_path / name).exists():
entry_points.append(name)
scripts_dir = repo_path / "scripts"
if scripts_dir.is_dir():
for f in sorted(scripts_dir.iterdir()):
if f.suffix in (".py", ".sh") and not f.name.startswith("test_"):
entry_points.append(f"scripts/{f.name}")
return entry_points[:15]
def find_tests(repo_path: Path) -> Tuple[List[str], int]:
test_files = []
for f in repo_path.rglob("*"):
if any(part in SKIP_DIRS for part in f.parts):
continue
if f.is_file() and (f.name.startswith("test_") or f.name.endswith("_test.py") or f.name.endswith("_test.js")):
test_files.append(str(f.relative_to(repo_path)))
return sorted(test_files), len(test_files)
def find_directories(repo_path: Path, max_depth: int = 2) -> List[str]:
dirs = []
for d in sorted(repo_path.rglob("*")):
if d.is_dir() and len(d.relative_to(repo_path).parts) <= max_depth:
if not any(part in SKIP_DIRS for part in d.parts):
rel = str(d.relative_to(repo_path))
if rel != ".":
dirs.append(rel)
return dirs[:30]
def read_readme(repo_path: Path) -> str:
for name in ["README.md", "README.rst", "README.txt", "README"]:
readme = repo_path / name
if readme.exists():
lines = readme.read_text(encoding="utf-8", errors="replace").split("\n")
para = []
started = False
for line in lines:
if line.startswith("#") and not started:
continue
if line.strip():
started = True
para.append(line.strip())
elif started:
break
return " ".join(para[:5])
return "(no README found)"
def generate_genome(repo_path: Path, repo_name: str = "") -> str:
if not repo_name:
repo_name = repo_path.name
date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
readme_desc = read_readme(repo_path)
file_counts = count_files(repo_path)
total_files = sum(file_counts.values())
entry_points = find_entry_points(repo_path)
test_files, test_count = find_tests(repo_path)
dirs = find_directories(repo_path)
lines = [
f"# GENOME.md — {repo_name}", "",
f"> Codebase analysis generated {date}. {readme_desc[:100]}.", "",
"## Project Overview", "",
readme_desc, "",
f"**{total_files} files** across {len(file_counts)} file types.", "",
"## Architecture", "",
"```",
]
for d in dirs[:20]:
lines.append(f" {d}/")
lines.append("```")
lines += ["", "### File Types", "", "| Type | Count |", "|------|-------|"]
for ext, count in list(file_counts.items())[:15]:
lines.append(f"| {ext} | {count} |")
lines += ["", "## Entry Points", ""]
for ep in entry_points:
lines.append(f"- `{ep}`")
lines += ["", "## Test Coverage", "", f"**{test_count} test files** found.", ""]
if test_files:
for tf in test_files[:10]:
lines.append(f"- `{tf}`")
if len(test_files) > 10:
lines.append(f"- ... and {len(test_files) - 10} more")
else:
lines.append("No test files found.")
lines += ["", "## Security Considerations", "", "(To be filled during analysis)", ""]
lines += ["## Design Decisions", "", "(To be filled during analysis)", ""]
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="Generate GENOME.md from a codebase")
parser.add_argument("repo_path", help="Path to repository")
parser.add_argument("--output", default="", help="Output file (default: stdout)")
parser.add_argument("--name", default="", help="Repository name")
parser.add_argument("--dry-run", action="store_true", help="Print stats only")
args = parser.parse_args()
repo_path = Path(args.repo_path).resolve()
if not repo_path.is_dir():
print(f"ERROR: {repo_path} is not a directory", file=sys.stderr)
sys.exit(1)
repo_name = args.name or repo_path.name
if args.dry_run:
counts = count_files(repo_path)
_, test_count = find_tests(repo_path)
print(f"Repo: {repo_name}")
print(f"Total files: {sum(counts.values())}")
print(f"Test files: {test_count}")
print(f"Top types: {', '.join(f'{k}={v}' for k,v in list(counts.items())[:5])}")
sys.exit(0)
genome = generate_genome(repo_path, repo_name)
if args.output:
with open(args.output, "w") as f:
f.write(genome)
print(f"Written: {args.output}")
else:
print(genome)
if __name__ == "__main__":
main()

View File

@@ -1,194 +0,0 @@
#!/usr/bin/env python3
"""Timmy-specific RunPod/Vertex Gemma 4 prove-it helper for Mac Hermes.
Refs: timmy-home #543
Safe by default:
- reports whether RunPod / Vertex credential files exist
- derives a RunPod OpenAI-compatible base URL from a pod id if provided
- previews the root `config.yaml` Big Brain provider update for Timmy's Mac Hermes
- emits the exact Hermes chat probe command to run once a live endpoint exists
- can call the existing RunPod deployment helper only when --apply-runpod is explicitly used
- can write the repo-root config only when --write-config is explicitly used
- can verify an OpenAI-compatible endpoint only when --verify-chat is explicitly used
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
from typing import Any
REPO_ROOT = Path(__file__).resolve().parents[1]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from scripts.bezalel_gemma4_vps import (
DEFAULT_CLOUD_TYPE,
DEFAULT_GPU_TYPE,
DEFAULT_MODEL,
DEFAULT_PROVIDER_NAME,
build_runpod_endpoint,
deploy_runpod,
update_config_text,
verify_openai_chat,
write_config_file,
)
DEFAULT_RUNPOD_TOKEN_FILE = Path.home() / ".config" / "runpod" / "access_key"
DEFAULT_VERTEX_KEY_FILE = Path.home() / ".config" / "vertex" / "key"
DEFAULT_CONFIG_PATH = Path(__file__).resolve().parents[1] / "config.yaml"
DEFAULT_VERTEX_BASE_URL = "https://YOUR_VERTEX_BRIDGE_HOST/v1"
def detect_credential_files(
*,
runpod_file: Path = DEFAULT_RUNPOD_TOKEN_FILE,
vertex_key_file: Path = DEFAULT_VERTEX_KEY_FILE,
) -> dict[str, Any]:
return {
"runpod_key_present": runpod_file.exists(),
"vertex_key_present": vertex_key_file.exists(),
"runpod_token_file": str(runpod_file),
"vertex_key_file": str(vertex_key_file),
}
def build_hermes_chat_probe_command(
provider_name: str = DEFAULT_PROVIDER_NAME,
model: str = DEFAULT_MODEL,
) -> str:
return (
'hermes chat -q "Reply with exactly: BIG_BRAIN_READY" -Q '
f'--provider "{provider_name}" --model {model}'
)
def build_timmy_proof_summary(
*,
config_text: str,
config_path: Path = DEFAULT_CONFIG_PATH,
pod_id: str | None = None,
base_url: str | None = None,
vertex_base_url: str | None = None,
model: str = DEFAULT_MODEL,
provider_name: str = DEFAULT_PROVIDER_NAME,
runpod_file: Path = DEFAULT_RUNPOD_TOKEN_FILE,
vertex_key_file: Path = DEFAULT_VERTEX_KEY_FILE,
) -> dict[str, Any]:
actions: list[str] = []
resolved_base_url = base_url
if not resolved_base_url and pod_id:
resolved_base_url = build_runpod_endpoint(pod_id)
actions.append("computed_base_url_from_pod_id")
if not resolved_base_url and vertex_base_url:
resolved_base_url = vertex_base_url.rstrip("/")
actions.append("using_vertex_base_url")
if not resolved_base_url:
resolved_base_url = DEFAULT_VERTEX_BASE_URL
actions.append("using_placeholder_vertex_bridge")
credentials = detect_credential_files(runpod_file=runpod_file, vertex_key_file=vertex_key_file)
config_preview = update_config_text(
config_text,
base_url=resolved_base_url,
model=model,
provider_name=provider_name,
)
return {
"config_path": str(config_path),
"provider_name": provider_name,
"model": model,
"base_url": resolved_base_url,
"config_preview": config_preview,
"verify_script_command": "python3 scripts/verify_big_brain.py",
"hermes_chat_probe_command": build_hermes_chat_probe_command(provider_name=provider_name, model=model),
"actions": actions,
**credentials,
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Timmy-specific RunPod/Vertex Gemma 4 prove-it helper for Mac Hermes.")
parser.add_argument("--pod-id", help="Existing RunPod pod id to derive the /v1 endpoint")
parser.add_argument("--base-url", help="Existing OpenAI-compatible base URL to wire directly")
parser.add_argument("--vertex-base-url", help="Vertex/OpenAI bridge base URL (for example https://host/v1)")
parser.add_argument("--pod-name", default="timmy-gemma4")
parser.add_argument("--gpu-type", default=DEFAULT_GPU_TYPE)
parser.add_argument("--cloud-type", default=DEFAULT_CLOUD_TYPE)
parser.add_argument("--model", default=DEFAULT_MODEL)
parser.add_argument("--provider-name", default=DEFAULT_PROVIDER_NAME)
parser.add_argument("--runpod-token-file", type=Path, default=DEFAULT_RUNPOD_TOKEN_FILE)
parser.add_argument("--vertex-key-file", type=Path, default=DEFAULT_VERTEX_KEY_FILE)
parser.add_argument("--config-path", type=Path, default=DEFAULT_CONFIG_PATH)
parser.add_argument("--apply-runpod", action="store_true", help="Call the RunPod API using --runpod-token-file")
parser.add_argument("--write-config", action="store_true", help="Write the updated Timmy config to --config-path")
parser.add_argument("--verify-chat", action="store_true", help="Verify the OpenAI-compatible endpoint with a chat probe")
parser.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
return parser.parse_args()
def main() -> None:
args = parse_args()
config_text = args.config_path.read_text() if args.config_path.exists() else ""
base_url = args.base_url
actions: list[str] = []
deployment: dict[str, Any] | None = None
if args.apply_runpod:
if not args.runpod_token_file.exists():
raise SystemExit(f"RunPod token file not found: {args.runpod_token_file}")
api_key = args.runpod_token_file.read_text().strip()
deployment = deploy_runpod(
api_key=api_key,
name=args.pod_name,
gpu_type=args.gpu_type,
cloud_type=args.cloud_type,
model=args.model,
)
base_url = deployment["base_url"]
actions.append("deployed_runpod_pod")
summary = build_timmy_proof_summary(
config_text=config_text,
config_path=args.config_path,
pod_id=args.pod_id,
base_url=base_url,
vertex_base_url=args.vertex_base_url,
model=args.model,
provider_name=args.provider_name,
runpod_file=args.runpod_token_file,
vertex_key_file=args.vertex_key_file,
)
summary["actions"] = actions + summary["actions"]
if deployment is not None:
summary["deployment"] = deployment
if args.write_config:
write_config_file(args.config_path, base_url=summary["base_url"], model=args.model, provider_name=args.provider_name)
summary["actions"].append("wrote_config")
if args.verify_chat:
summary["verify_response"] = verify_openai_chat(summary["base_url"], model=args.model)
summary["actions"].append("verified_chat")
if args.json:
print(json.dumps(summary, indent=2))
return
print("--- Timmy Gemma4 Mac Prove-It ---")
print(f"Config path: {summary['config_path']}")
print(f"Base URL: {summary['base_url']}")
print(f"Model: {summary['model']}")
print(f"RunPod key present: {summary['runpod_key_present']}")
print(f"Vertex key present: {summary['vertex_key_present']}")
print(f"Verify command: {summary['verify_script_command']}")
print(f"Hermes chat probe: {summary['hermes_chat_probe_command']}")
if summary["actions"]:
print("Actions: " + ", ".join(summary["actions"]))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,46 @@
# GENOME.md — {{REPO_NAME}}
> Codebase analysis generated {{DATE}}. {{SHORT_DESCRIPTION}}.
## Project Overview
{{OVERVIEW}}
## Architecture
{{ARCHITECTURE_DIAGRAM}}
## Entry Points
{{ENTRY_POINTS}}
## Data Flow
{{DATA_FLOW}}
## Key Abstractions
{{ABSTRACTIONS}}
## API Surface
{{API_SURFACE}}
## Test Coverage
### Existing Tests
{{EXISTING_TESTS}}
### Coverage Gaps
{{COVERAGE_GAPS}}
### Critical paths that need tests:
{{CRITICAL_PATHS}}
## Security Considerations
{{SECURITY}}
## Design Decisions
{{DESIGN_DECISIONS}}

View File

@@ -1,79 +0,0 @@
from __future__ import annotations
import json
from pathlib import Path
import yaml
from scripts.timmy_gemma4_mac import (
DEFAULT_CONFIG_PATH,
build_hermes_chat_probe_command,
build_timmy_proof_summary,
detect_credential_files,
)
def test_detect_credential_files_reports_presence_without_secret_material(tmp_path: Path) -> None:
runpod_file = tmp_path / "runpod_access_key"
vertex_file = tmp_path / "vertex_key"
runpod_file.write_text("rp_secret_123")
status = detect_credential_files(runpod_file=runpod_file, vertex_key_file=vertex_file)
assert status["runpod_key_present"] is True
assert status["vertex_key_present"] is False
assert status["runpod_token_file"] == str(runpod_file)
assert status["vertex_key_file"] == str(vertex_file)
assert "rp_secret_123" not in json.dumps(status)
def test_build_timmy_proof_summary_targets_repo_root_config_and_derives_runpod_url(tmp_path: Path) -> None:
config_path = tmp_path / "config.yaml"
config_path.write_text(
yaml.safe_dump(
{
"custom_providers": [
{
"name": "Big Brain",
"base_url": "https://YOUR_BIG_BRAIN_HOST/v1",
"api_key": "",
"model": "gemma4:latest",
}
]
}
)
)
summary = build_timmy_proof_summary(
config_text=config_path.read_text(),
config_path=config_path,
pod_id="podxyz",
)
assert summary["base_url"] == "https://podxyz-11434.proxy.runpod.net/v1"
assert summary["config_path"] == str(config_path)
preview = yaml.safe_load(summary["config_preview"])
provider = preview["custom_providers"][0]
assert provider["name"] == "Big Brain"
assert provider["base_url"] == "https://podxyz-11434.proxy.runpod.net/v1"
assert provider["model"] == "gemma4:latest"
assert "computed_base_url_from_pod_id" in summary["actions"]
def test_build_hermes_chat_probe_command_uses_big_brain_provider_contract() -> None:
command = build_hermes_chat_probe_command()
assert command.startswith("hermes chat ")
assert '--provider "Big Brain"' in command
assert '--model gemma4:latest' in command
assert 'BIG_BRAIN_READY' in command
def test_repo_readme_mentions_timmy_specific_prove_it_script() -> None:
readme = Path("scripts/README_big_brain.md").read_text()
assert "scripts/timmy_gemma4_mac.py" in readme
assert "root config.yaml" in readme
assert "Refs #543" in readme
def test_default_config_path_is_repo_root_config() -> None:
assert DEFAULT_CONFIG_PATH == Path(__file__).resolve().parents[1] / "config.yaml"