feat(graph): Add graph visualizer (ASCII + DOT) with subgraph extraction

Add scripts/graph_visualizer.py — standalone tool that: - Builds knowledge graph from knowledge/index.json - Renders ASCII tree for terminal - Exports DOT for Graphviz - Extracts subgraphs by seed + max_depth - Filters by domain and category Includes test_graph_visualizer.py smoke test (8/8) Addresses #151
2026-04-25 21:00:05 -04:00
2 changed files with 311 additions and 0 deletions
--- a/scripts/graph_visualizer.py
+++ b/scripts/graph_visualizer.py
@@ -0,0 +1,206 @@
 #!/usr/bin/env python3
 """
 graph_visualizer.py — Generate visual graph representations of the knowledge graph.
 Reads knowledge/index.json and renders the fact relationship graph.
 Supports ASCII terminal output and DOT export for Graphviz.
 Usage:
    python3 scripts/graph_visualizer.py                  # ASCII, all nodes
    python3 scripts/graph_visualizer.py --format dot     # DOT output
    python3 scripts/graph_visualizer.py --seed root --max-depth 2
    python3 scripts/graph_visualizer.py --filter-domain hermes-agent
    python3 scripts/graph_visualizer.py --filter-category pitfall
 Acceptance: [x] Subgraph extraction [x] ASCII rendering [x] DOT export [x] Configurable depth/filter
 """
 import argparse
 import json
 import sys
 from collections import defaultdict, deque
 from pathlib import Path
 from typing import Optional
 def load_index(index_path: Path):
    with open(index_path) as f:
        return json.load(f)
 def build_adjacency(facts):
    adj = defaultdict(list)
    all_ids = {f['id'] for f in facts if 'id' in f}
    for f in facts:
        fid = f.get('id')
        if not fid:
            continue
        for rel in f.get('related', []):
            if rel in all_ids:
                adj[fid].append(rel)
    return dict(adj)
 def build_reverse_adjacency(adj):
    rev = defaultdict(list)
    for src, targets in adj.items():
        for tgt in targets:
            rev[tgt].append(src)
    return dict(rev)
 def extract_subgraph(
    facts,
    adj,
    rev_adj,
    seeds=None,
    max_depth=None,
    filter_domain=None,
    filter_category=None,
 ):
    filtered_nodes = set()
    for f in facts:
        fid = f.get('id')
        if not fid:
            continue
        if filter_domain and f.get('domain') != filter_domain:
            continue
        if filter_category and f.get('category') != filter_category:
            continue
        filtered_nodes.add(fid)
    if seeds is None:
        return filtered_nodes if filtered_nodes else {f['id'] for f in facts if 'id' in f}
    valid_seeds = [s for s in seeds if s in filtered_nodes]
    if not valid_seeds:
        return set()
    visited = set()
    queue = deque([(s, 0) for s in valid_seeds])
    while queue:
        node, depth = queue.popleft()
        if node in visited or node not in filtered_nodes:
            continue
        visited.add(node)
        if max_depth is not None and depth >= max_depth:
            continue
        for neighbor in adj.get(node, []):
            if neighbor in filtered_nodes and neighbor not in visited:
                queue.append((neighbor, depth + 1))
        for neighbor in rev_adj.get(node, []):
            if neighbor in filtered_nodes and neighbor not in visited:
                queue.append((neighbor, depth + 1))
    return visited
 def build_fact_map(facts):
    return {f['id']: f for f in facts if 'id' in f and 'fact' in f}
 def render_ascii(subgraph_ids, adj, fact_map):
    lines = []
    visited = set()
    inorder = []
    from collections import deque
    queue = deque()
    inbound = defaultdict(int)
    for src in subgraph_ids:
        for tgt in adj.get(src, []):
            if tgt in subgraph_ids:
                inbound[tgt] += 1
    roots = [n for n in sorted(subgraph_ids) if inbound.get(n, 0) == 0]
    if not roots:
        roots = sorted(subgraph_ids)
    for root in roots:
        queue.append((root, 0, None))
    while queue:
        node, depth, parent_label = queue.popleft()
        if node in visited:
            continue
        visited.add(node)
        fact = fact_map.get(node, {})
        label = fact.get('fact', str(node))[:80]
        category = fact.get('category', 'fact')
        domain = fact.get('domain', 'global')
        node_label = domain + '/' + category + ': ' + label
        if parent_label is None:
            lines.append(f"{'  ' * depth}┌─ {node_label}")
        else:
            lines.append(f"{'  ' * depth}├─ {node_label}")
        children = [c for c in adj.get(node, []) if c in subgraph_ids]
        for i, child in enumerate(children):
            queue.append((child, depth + 1, node))
    if len(visited) < len(subgraph_ids):
        lines.append("\n[Disconnected nodes — not in traversal order:]")
        for n in sorted(subgraph_ids - visited):
            fact = fact_map.get(n, {})
            label = fact.get('fact', n)[:60]
            lines.append(f"  {n} — {label}")
    return "\n".join(lines)
 def render_dot(subgraph_ids, adj, fact_map):
    lines = ["digraph knowledge_graph {", "  rankdir=LR;"]
    cat_colors = {
        'fact': '#3498db',
        'pitfall': '#e74c3c',
        'pattern': '#2ecc71',
        'tool-quirk': '#f39c12',
        'question': '#9b59b6',
    }
    for nid in sorted(subgraph_ids):
        fact = fact_map.get(nid, {})
        category = fact.get('category', 'fact')
        domain = fact.get('domain', 'global')
        label = fact.get('fact', nid).replace('"', '\\"')[:80]
        fillcolor = cat_colors.get(category, '#666666')
        lines.append(f'  "{nid}" [label="{domain}\\n{category}\\n{label}", fillcolor="{fillcolor}", style=filled, shape=box];')
    lines.append("")
    for src in sorted(subgraph_ids):
        for tgt in adj.get(src, []):
            if tgt in subgraph_ids:
                lines.append(f'  "{src}" -> "{tgt}";')
    lines.append("}")
    return "\n".join(lines)
 def main():
    parser = argparse.ArgumentParser(description="Visualize the knowledge graph (ASCII terminal or DOT for Graphviz).")
    parser.add_argument("--index", type=Path, default=Path(__file__).parent.parent / "knowledge" / "index.json",
        help="Path to knowledge/index.json")
    parser.add_argument("--format", choices=["ascii", "dot"], default="ascii",
        help="Output format (default: ascii)")
    parser.add_argument("--output", "-o", type=Path, help="Write output to file (default: stdout)")
    parser.add_argument("--seed", help="Starting fact ID (comma-sep). Omit to render full graph.")
    parser.add_argument("--max-depth", type=int, help="Max traversal depth from seed nodes (requires --seed).")
    parser.add_argument("--filter-domain", help="Only include facts from this domain.")
    parser.add_argument("--filter-category", help="Only include facts of this category.")
    args = parser.parse_args()
    index = load_index(args.index)
    facts = index.get('facts', [])
    adj = build_adjacency(facts)
    rev_adj = build_reverse_adjacency(adj)
    fact_map = build_fact_map(facts)
    seeds = args.seed.split(',') if args.seed else None
    subgraph_ids = extract_subgraph(facts=facts, adj=adj, rev_adj=rev_adj, seeds=seeds,
                                     max_depth=args.max_depth,
                                     filter_domain=args.filter_domain,
                                     filter_category=args.filter_category)
    if not subgraph_ids:
        print("No nodes match the specified filters.", file=sys.stderr)
        sys.exit(1)
    if args.format == "ascii":
        output = render_ascii(subgraph_ids, adj, fact_map)
    else:
        output = render_dot(subgraph_ids, adj, fact_map)
    if args.output:
        args.output.write_text(output)
        print(f"Written: {args.output}", file=sys.stderr)
    else:
        print(output)
 if __name__ == "__main__":
    main()
--- a/scripts/test_graph_visualizer.py
+++ b/scripts/test_graph_visualizer.py
@@ -0,0 +1,105 @@
 #!/usr/bin/env python3
 """
 Tests for graph_visualizer.py — smoke test + subgraph logic.
 Run: python3 scripts/test_graph_visualizer.py
 """
 import json, sys, tempfile
 from pathlib import Path
 sys.path.insert(0, str(Path(__file__).resolve().parent))
 import graph_visualizer as gv
 def make_index(facts, tmp_dir):
    p = tmp_dir / "index.json"
    p.write_text(json.dumps({"version": 1, "total_facts": len(facts), "facts": facts}, indent=2))
    return p
 def test_build_adjacency_simple():
    facts = [{"id": "a", "related": ["b", "c"]}, {"id": "b", "related": ["c"]}, {"id": "c", "related": []}]
    adj = gv.build_adjacency(facts)
    assert adj == {"a": ["b", "c"], "b": ["c"]}
    print("  PASS: build_adjacency simple")
 def test_build_adjacency_unknown_nodes():
    facts = [{"id": "a", "related": ["x", "b"]}, {"id": "b", "related": []}]
    adj = gv.build_adjacency(facts)
    assert adj == {"a": ["b"]}
    print("  PASS: build_adjacency filters unknown nodes")
 def test_extract_subgraph_seed_only():
    facts = [{"id": "a", "domain": "t", "category": "f"}, {"id": "b", "domain": "t", "category": "f"}, {"id": "c", "domain": "t", "category": "f"}]
    adj = {"a": ["b"], "b": ["c"], "c": []}
    rev_adj = gv.build_reverse_adjacency(adj)
    sub = gv.extract_subgraph(facts, adj, rev_adj, seeds=["a"])
    assert sub == {"a", "b", "c"}, f"got {sub}"
    print("  PASS: extract_subgraph with seed returns full reachable set")
 def test_extract_subgraph_with_depth():
    facts = [{"id": "a", "domain": "t", "category": "f"}, {"id": "b", "domain": "t", "category": "f"}, {"id": "c", "domain": "t", "category": "f"}, {"id": "d", "domain": "t", "category": "f"}]
    adj = {"a": ["b"], "b": ["c"], "c": ["d"], "d": []}
    rev_adj = gv.build_reverse_adjacency(adj)
    sub = gv.extract_subgraph(facts, adj, rev_adj, seeds=["a"], max_depth=2)
    assert sub == {"a", "b", "c"}
    print("  PASS: extract_subgraph depth=2 includes up to depth 2")
 def test_extract_subgraph_filter_domain():
    facts = [{"id": "a", "domain": "alpha", "category": "f"}, {"id": "b", "domain": "beta", "category": "f"}, {"id": "c", "domain": "alpha", "category": "f"}]
    sub = gv.extract_subgraph(facts, {}, {}, filter_domain="alpha")
    assert sub == {"a", "c"}
    print("  PASS: filter_domain works")
 def test_extract_subgraph_filter_category():
    facts = [{"id": "a", "domain": "g", "category": "pitfall"}, {"id": "b", "domain": "g", "category": "fact"}, {"id": "c", "domain": "g", "category": "pitfall"}]
    sub = gv.extract_subgraph(facts, {}, {}, filter_category="pitfall")
    assert sub == {"a", "c"}
    print("  PASS: filter_category works")
 def test_render_ascii_simple_chain():
    facts = [{"id": "a", "fact": "A", "domain": "t", "category": "f"}, {"id": "b", "fact": "B", "domain": "t", "category": "f"}, {"id": "c", "fact": "C", "domain": "t", "category": "f"}]
    adj = {"a": ["b"], "b": ["c"]}
    fact_map = gv.build_fact_map(facts)
    out = gv.render_ascii({"a", "b", "c"}, adj, fact_map)
    assert "A" in out and "B" in out and "C" in out
    print("  PASS: render_ascii simple chain")
 def test_render_dot_simple():
    facts = [{"id": "x", "fact": "node x", "domain": "d1", "category": "fact"}, {"id": "y", "fact": "node y", "domain": "d2", "category": "pitfall"}]
    adj = {"x": ["y"]}
    fact_map = gv.build_fact_map(facts)
    out = gv.render_dot({"x", "y"}, adj, fact_map)
    assert 'digraph knowledge_graph' in out and '"x"' in out and '"y"' in out and '->' in out
    assert '#3498db' in out and '#e74c3c' in out
    print("  PASS: render_dot basic structure and colors")
 def main():
    print("\n=== graph_visualizer test suite ===\n")
    passed = failed = 0
    tests = [test_build_adjacency_simple, test_build_adjacency_unknown_nodes, test_extract_subgraph_seed_only, test_extract_subgraph_with_depth,
             test_extract_subgraph_filter_domain, test_extract_subgraph_filter_category,
             test_render_ascii_simple_chain, test_render_dot_simple]
    for test in tests:
        try:
            test()
            passed += 1
        except AssertionError as e:
            print(f"  FAIL: {test.__name__} — {e}")
            failed += 1
        except Exception as e:
            print(f"  ERROR: {test.__name__} — {e}")
            failed += 1
    print(f"\n=== Results: {passed}/{passed+failed} passed, {failed} failed ===")
    return failed == 0
 if __name__ == "__main__":
    sys.exit(0 if main() else 1)