compounding-intelligence/scripts/graph_visualizer.py

#!/usr/bin/env python3
"""
graph_visualizer.py — Generate visual graph representations of the knowledge graph.

Reads knowledge/index.json and renders the fact relationship graph.
Supports ASCII terminal output and DOT export for Graphviz.

Usage:
    python3 scripts/graph_visualizer.py                  # ASCII, all nodes
    python3 scripts/graph_visualizer.py --format dot     # DOT output
    python3 scripts/graph_visualizer.py --seed root --max-depth 2
    python3 scripts/graph_visualizer.py --filter-domain hermes-agent
    python3 scripts/graph_visualizer.py --filter-category pitfall

Acceptance: [x] Subgraph extraction [x] ASCII rendering [x] DOT export [x] Configurable depth/filter
"""

import argparse
import json
import sys
from collections import defaultdict, deque
from pathlib import Path
from typing import Optional


def load_index(index_path: Path):
    with open(index_path) as f:
        return json.load(f)


def build_adjacency(facts):
    adj = defaultdict(list)
    all_ids = {f['id'] for f in facts if 'id' in f}
    for f in facts:
        fid = f.get('id')
        if not fid:
            continue
        for rel in f.get('related', []):
            if rel in all_ids:
                adj[fid].append(rel)
    return dict(adj)


def build_reverse_adjacency(adj):
    rev = defaultdict(list)
    for src, targets in adj.items():
        for tgt in targets:
            rev[tgt].append(src)
    return dict(rev)


def extract_subgraph(
    facts,
    adj,
    rev_adj,
    seeds=None,
    max_depth=None,
    filter_domain=None,
    filter_category=None,
):
    filtered_nodes = set()
    for f in facts:
        fid = f.get('id')
        if not fid:
            continue
        if filter_domain and f.get('domain') != filter_domain:
            continue
        if filter_category and f.get('category') != filter_category:
            continue
        filtered_nodes.add(fid)

    if seeds is None:
        return filtered_nodes if filtered_nodes else {f['id'] for f in facts if 'id' in f}

    valid_seeds = [s for s in seeds if s in filtered_nodes]
    if not valid_seeds:
        return set()

    visited = set()
    queue = deque([(s, 0) for s in valid_seeds])
    while queue:
        node, depth = queue.popleft()
        if node in visited or node not in filtered_nodes:
            continue
        visited.add(node)
        if max_depth is not None and depth >= max_depth:
            continue
        for neighbor in adj.get(node, []):
            if neighbor in filtered_nodes and neighbor not in visited:
                queue.append((neighbor, depth + 1))
        for neighbor in rev_adj.get(node, []):
            if neighbor in filtered_nodes and neighbor not in visited:
                queue.append((neighbor, depth + 1))
    return visited


def build_fact_map(facts):
    return {f['id']: f for f in facts if 'id' in f and 'fact' in f}


def render_ascii(subgraph_ids, adj, fact_map):
    lines = []
    visited = set()
    inorder = []
    from collections import deque
    queue = deque()
    inbound = defaultdict(int)
    for src in subgraph_ids:
        for tgt in adj.get(src, []):
            if tgt in subgraph_ids:
                inbound[tgt] += 1
    roots = [n for n in sorted(subgraph_ids) if inbound.get(n, 0) == 0]
    if not roots:
        roots = sorted(subgraph_ids)
    for root in roots:
        queue.append((root, 0, None))
    while queue:
        node, depth, parent_label = queue.popleft()
        if node in visited:
            continue
        visited.add(node)
        fact = fact_map.get(node, {})
        label = fact.get('fact', str(node))[:80]
        category = fact.get('category', 'fact')
        domain = fact.get('domain', 'global')
        node_label = domain + '/' + category + ': ' + label
        if parent_label is None:
            lines.append(f"{'  ' * depth}┌─ {node_label}")
        else:
            lines.append(f"{'  ' * depth}├─ {node_label}")
        children = [c for c in adj.get(node, []) if c in subgraph_ids]
        for i, child in enumerate(children):
            queue.append((child, depth + 1, node))
    if len(visited) < len(subgraph_ids):
        lines.append("\n[Disconnected nodes — not in traversal order:]")
        for n in sorted(subgraph_ids - visited):
            fact = fact_map.get(n, {})
            label = fact.get('fact', n)[:60]
            lines.append(f"  {n} — {label}")
    return "\n".join(lines)


def render_dot(subgraph_ids, adj, fact_map):
    lines = ["digraph knowledge_graph {", "  rankdir=LR;"]
    cat_colors = {
        'fact': '#3498db',
        'pitfall': '#e74c3c',
        'pattern': '#2ecc71',
        'tool-quirk': '#f39c12',
        'question': '#9b59b6',
    }
    for nid in sorted(subgraph_ids):
        fact = fact_map.get(nid, {})
        category = fact.get('category', 'fact')
        domain = fact.get('domain', 'global')
        label = fact.get('fact', nid).replace('"', '\\"')[:80]
        fillcolor = cat_colors.get(category, '#666666')
        lines.append(f'  "{nid}" [label="{domain}\\n{category}\\n{label}", fillcolor="{fillcolor}", style=filled, shape=box];')
    lines.append("")
    for src in sorted(subgraph_ids):
        for tgt in adj.get(src, []):
            if tgt in subgraph_ids:
                lines.append(f'  "{src}" -> "{tgt}";')
    lines.append("}")
    return "\n".join(lines)


def main():
    parser = argparse.ArgumentParser(description="Visualize the knowledge graph (ASCII terminal or DOT for Graphviz).")
    parser.add_argument("--index", type=Path, default=Path(__file__).parent.parent / "knowledge" / "index.json",
        help="Path to knowledge/index.json")
    parser.add_argument("--format", choices=["ascii", "dot"], default="ascii",
        help="Output format (default: ascii)")
    parser.add_argument("--output", "-o", type=Path, help="Write output to file (default: stdout)")
    parser.add_argument("--seed", help="Starting fact ID (comma-sep). Omit to render full graph.")
    parser.add_argument("--max-depth", type=int, help="Max traversal depth from seed nodes (requires --seed).")
    parser.add_argument("--filter-domain", help="Only include facts from this domain.")
    parser.add_argument("--filter-category", help="Only include facts of this category.")
    args = parser.parse_args()

    index = load_index(args.index)
    facts = index.get('facts', [])
    adj = build_adjacency(facts)
    rev_adj = build_reverse_adjacency(adj)
    fact_map = build_fact_map(facts)
    seeds = args.seed.split(',') if args.seed else None
    subgraph_ids = extract_subgraph(facts=facts, adj=adj, rev_adj=rev_adj, seeds=seeds,
                                     max_depth=args.max_depth,
                                     filter_domain=args.filter_domain,
                                     filter_category=args.filter_category)
    if not subgraph_ids:
        print("No nodes match the specified filters.", file=sys.stderr)
        sys.exit(1)
    if args.format == "ascii":
        output = render_ascii(subgraph_ids, adj, fact_map)
    else:
        output = render_dot(subgraph_ids, adj, fact_map)
    if args.output:
        args.output.write_text(output)
        print(f"Written: {args.output}", file=sys.stderr)
    else:
        print(output)


if __name__ == "__main__":
    main()