compounding-intelligence/scripts/dependency_graph.py

#!/usr/bin/env python3
"""
Cross-Repo Dependency Graph Builder

Scans repos for import/require/reference patterns and builds a directed
dependency graph. Detects circular dependencies. Outputs DOT and Mermaid.

Usage:
  python3 scripts/dependency_graph.py /path/to/repos/
  python3 scripts/dependency_graph.py --repos repo1,repo2,repo3 --format mermaid
  python3 scripts/dependency_graph.py --repos-dir /path/to/ --format dot --output deps.dot

Patterns detected:
  - Python: import X, from X import Y
  - JavaScript: require("X"), import ... from "X"
  - Go: import "X"
  - Ansible: include_role, import_role
  - Docker/Compose: image: X, depends_on
  - Config references: repo-name in YAML/TOML/JSON
"""

import argparse
import json
import os
import re
import sys
from collections import defaultdict
from pathlib import Path


# Known repo names for matching
KNOWN_REPOS = [
    "hermes-agent", "timmy-config", "timmy-home", "the-nexus", "the-door",
    "the-beacon", "fleet-ops", "burn-fleet", "timmy-dispatch", "turboquant",
    "compounding-intelligence", "the-playground", "second-son-of-timmy",
    "ai-safety-review", "the-echo-pattern", "timmy-academy", "wolf",
    "the-testament",
]


def normalize_repo_name(name: str) -> str:
    """Normalize a repo name for comparison."""
    return name.lower().replace("_", "-").replace(".git", "").strip()


def scan_file_for_deps(filepath: str, content: str, own_repo: str) -> set:
    """Scan a file's content for references to other repos."""
    deps = set()
    own_norm = normalize_repo_name(own_repo)

    for repo in KNOWN_REPOS:
        repo_norm = normalize_repo_name(repo)
        if repo_norm == own_norm:
            continue

        # Direct name references
        patterns = [
            repo,  # exact name
            repo.replace("-", "_"),  # underscore variant
            repo.replace("-", ""),  # no separator
            f"/{repo}/",  # path reference
            f'"{repo}"',  # quoted
            f"'{repo}'",  # single quoted
            f"Timmy_Foundation/{repo}",  # full Gitea path
            f"Timmy_Foundation.{repo}",  # Python module path
        ]

        for pattern in patterns:
            if pattern in content:
                deps.add(repo)
                break

    return deps


def scan_repo(repo_path: str, repo_name: str = None) -> dict:
    """Scan a repo directory for dependencies."""
    path = Path(repo_path)
    if not path.is_dir():
        return {"error": f"Not a directory: {repo_path}"}

    if not repo_name:
        repo_name = path.name

    deps = set()
    files_scanned = 0
    exts = {".py", ".js", ".ts", ".go", ".yaml", ".yml", ".toml", ".json",
            ".md", ".sh", ".bash", ".Dockerfile", ".tf", ".hcl"}

    for fpath in path.rglob("*"):
        if not fpath.is_file():
            continue
        if fpath.suffix not in exts:
            continue
        # Skip common non-source dirs
        parts = fpath.parts
        if any(p in (".git", "node_modules", "__pycache__", ".venv", "venv",
                      "vendor", "dist", "build", ".tox") for p in parts):
            continue

        try:
            content = fpath.read_text(errors="ignore")
        except:
            continue

        file_deps = scan_file_for_deps(str(fpath), content, repo_name)
        deps.update(file_deps)
        files_scanned += 1

    return {
        "repo": repo_name,
        "dependencies": sorted(deps),
        "files_scanned": files_scanned,
    }


def detect_cycles(graph: dict) -> list:
    """Detect circular dependencies using DFS."""
    cycles = []
    visited = set()
    rec_stack = set()

    def dfs(node, path):
        visited.add(node)
        rec_stack.add(node)

        for neighbor in graph.get(node, {}).get("dependencies", []):
            if neighbor not in visited:
                result = dfs(neighbor, path + [neighbor])
                if result:
                    return result
            elif neighbor in rec_stack:
                cycle_start = path.index(neighbor)
                return path[cycle_start:] + [neighbor]

        rec_stack.remove(node)
        return None

    for node in graph:
        if node not in visited:
            cycle = dfs(node, [node])
            if cycle:
                cycles.append(cycle)

    return cycles


def to_dot(graph: dict) -> str:
    """Generate DOT format output."""
    lines = ["digraph dependencies {"]
    lines.append("  rankdir=LR;")
    lines.append('  node [shape=box, style=filled, fillcolor="#1a1a2e", fontcolor="#e6edf3"];')
    lines.append('  edge [color="#4a4a6a"];')
    lines.append("")

    for repo, data in sorted(graph.items()):
        dep_count = len(data.get("dependencies", []))
        fill = "#2d1b69" if dep_count > 2 else "#16213e"
        lines.append(f'  "{repo}" [fillcolor="{fill}"];')
        for dep in data.get("dependencies", []):
            lines.append(f'  "{repo}" -> "{dep}";')

    lines.append("}")
    return "\n".join(lines)


def to_mermaid(graph: dict) -> str:
    """Generate Mermaid format output."""
    lines = ["graph LR"]

    for repo, data in sorted(graph.items()):
        for dep in data.get("dependencies", []):
            lines.append(f"    {repo.replace('-','_')} --> {dep.replace('-','_')}")

    # Add node labels
    lines.append("")
    for repo in sorted(graph.keys()):
        lines.append(f"    {repo.replace('-','_')}[{repo}]")

    return "\n".join(lines)


def main():
    parser = argparse.ArgumentParser(description="Build cross-repo dependency graph")
    parser.add_argument("repos_dir", nargs="?", help="Directory containing repos")
    parser.add_argument("--repos", help="Comma-separated list of repo paths")
    parser.add_argument("--format", choices=["dot", "mermaid", "json"], default="json")
    parser.add_argument("--output", "-o", help="Output file (default: stdout)")
    parser.add_argument("--cycles-only", action="store_true", help="Only report cycles")
    args = parser.parse_args()

    results = {}
    repo_paths = []

    if args.repos:
        repo_paths = [p.strip() for p in args.repos.split(",")]
    elif args.repos_dir:
        base = Path(args.repos_dir)
        repo_paths = [str(p) for p in base.iterdir() if p.is_dir() and not p.name.startswith(".")]
    else:
        parser.print_help()
        sys.exit(1)

    for rpath in repo_paths:
        name = Path(rpath).name
        print(f"Scanning {name}...", file=sys.stderr)
        result = scan_repo(rpath, name)
        if "error" not in result:
            results[name] = result

    # Detect cycles
    cycles = detect_cycles(results)

    if args.cycles_only:
        if cycles:
            print("CIRCULAR DEPENDENCIES DETECTED:")
            for cycle in cycles:
                print(f"  {' -> '.join(cycle)}")
            sys.exit(1)
        else:
            print("No circular dependencies found.")
            sys.exit(0)

    # Output
    output = {}
    if args.format == "dot":
        output = to_dot(results)
    elif args.format == "mermaid":
        output = to_mermaid(results)
    else:
        output = json.dumps({
            "repos": results,
            "cycles": cycles,
            "summary": {
                "total_repos": len(results),
                "total_deps": sum(len(r["dependencies"]) for r in results.values()),
                "cycles_found": len(cycles),
            }
        }, indent=2)

    if args.output:
        Path(args.output).write_text(output)
        print(f"Written to {args.output}", file=sys.stderr)
    else:
        print(output)


if __name__ == "__main__":
    main()