From 5f6a7f7265c2b76de0994e7425fafc134f5a713f Mon Sep 17 00:00:00 2001 From: STEP35 Burn Worker Date: Sat, 25 Apr 2026 21:00:05 -0400 Subject: [PATCH] feat(graph): Add graph visualizer (ASCII + DOT) with subgraph extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add scripts/graph_visualizer.py — standalone tool that: - Builds knowledge graph from knowledge/index.json - Renders ASCII tree for terminal - Exports DOT for Graphviz - Extracts subgraphs by seed + max_depth - Filters by domain and category Includes test_graph_visualizer.py smoke test (8/8) Addresses #151 --- scripts/graph_visualizer.py | 206 +++++++++++++++++++++++++++++++ scripts/test_graph_visualizer.py | 105 ++++++++++++++++ 2 files changed, 311 insertions(+) create mode 100755 scripts/graph_visualizer.py create mode 100755 scripts/test_graph_visualizer.py diff --git a/scripts/graph_visualizer.py b/scripts/graph_visualizer.py new file mode 100755 index 0000000..97bbed9 --- /dev/null +++ b/scripts/graph_visualizer.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +""" +graph_visualizer.py — Generate visual graph representations of the knowledge graph. + +Reads knowledge/index.json and renders the fact relationship graph. +Supports ASCII terminal output and DOT export for Graphviz. + +Usage: + python3 scripts/graph_visualizer.py # ASCII, all nodes + python3 scripts/graph_visualizer.py --format dot # DOT output + python3 scripts/graph_visualizer.py --seed root --max-depth 2 + python3 scripts/graph_visualizer.py --filter-domain hermes-agent + python3 scripts/graph_visualizer.py --filter-category pitfall + +Acceptance: [x] Subgraph extraction [x] ASCII rendering [x] DOT export [x] Configurable depth/filter +""" + +import argparse +import json +import sys +from collections import defaultdict, deque +from pathlib import Path +from typing import Optional + + +def load_index(index_path: Path): + with open(index_path) as f: + return json.load(f) + + +def build_adjacency(facts): + adj = defaultdict(list) + all_ids = {f['id'] for f in facts if 'id' in f} + for f in facts: + fid = f.get('id') + if not fid: + continue + for rel in f.get('related', []): + if rel in all_ids: + adj[fid].append(rel) + return dict(adj) + + +def build_reverse_adjacency(adj): + rev = defaultdict(list) + for src, targets in adj.items(): + for tgt in targets: + rev[tgt].append(src) + return dict(rev) + + +def extract_subgraph( + facts, + adj, + rev_adj, + seeds=None, + max_depth=None, + filter_domain=None, + filter_category=None, +): + filtered_nodes = set() + for f in facts: + fid = f.get('id') + if not fid: + continue + if filter_domain and f.get('domain') != filter_domain: + continue + if filter_category and f.get('category') != filter_category: + continue + filtered_nodes.add(fid) + + if seeds is None: + return filtered_nodes if filtered_nodes else {f['id'] for f in facts if 'id' in f} + + valid_seeds = [s for s in seeds if s in filtered_nodes] + if not valid_seeds: + return set() + + visited = set() + queue = deque([(s, 0) for s in valid_seeds]) + while queue: + node, depth = queue.popleft() + if node in visited or node not in filtered_nodes: + continue + visited.add(node) + if max_depth is not None and depth >= max_depth: + continue + for neighbor in adj.get(node, []): + if neighbor in filtered_nodes and neighbor not in visited: + queue.append((neighbor, depth + 1)) + for neighbor in rev_adj.get(node, []): + if neighbor in filtered_nodes and neighbor not in visited: + queue.append((neighbor, depth + 1)) + return visited + + +def build_fact_map(facts): + return {f['id']: f for f in facts if 'id' in f and 'fact' in f} + + +def render_ascii(subgraph_ids, adj, fact_map): + lines = [] + visited = set() + inorder = [] + from collections import deque + queue = deque() + inbound = defaultdict(int) + for src in subgraph_ids: + for tgt in adj.get(src, []): + if tgt in subgraph_ids: + inbound[tgt] += 1 + roots = [n for n in sorted(subgraph_ids) if inbound.get(n, 0) == 0] + if not roots: + roots = sorted(subgraph_ids) + for root in roots: + queue.append((root, 0, None)) + while queue: + node, depth, parent_label = queue.popleft() + if node in visited: + continue + visited.add(node) + fact = fact_map.get(node, {}) + label = fact.get('fact', str(node))[:80] + category = fact.get('category', 'fact') + domain = fact.get('domain', 'global') + node_label = domain + '/' + category + ': ' + label + if parent_label is None: + lines.append(f"{' ' * depth}┌─ {node_label}") + else: + lines.append(f"{' ' * depth}├─ {node_label}") + children = [c for c in adj.get(node, []) if c in subgraph_ids] + for i, child in enumerate(children): + queue.append((child, depth + 1, node)) + if len(visited) < len(subgraph_ids): + lines.append("\n[Disconnected nodes — not in traversal order:]") + for n in sorted(subgraph_ids - visited): + fact = fact_map.get(n, {}) + label = fact.get('fact', n)[:60] + lines.append(f" {n} — {label}") + return "\n".join(lines) + + +def render_dot(subgraph_ids, adj, fact_map): + lines = ["digraph knowledge_graph {", " rankdir=LR;"] + cat_colors = { + 'fact': '#3498db', + 'pitfall': '#e74c3c', + 'pattern': '#2ecc71', + 'tool-quirk': '#f39c12', + 'question': '#9b59b6', + } + for nid in sorted(subgraph_ids): + fact = fact_map.get(nid, {}) + category = fact.get('category', 'fact') + domain = fact.get('domain', 'global') + label = fact.get('fact', nid).replace('"', '\\"')[:80] + fillcolor = cat_colors.get(category, '#666666') + lines.append(f' "{nid}" [label="{domain}\\n{category}\\n{label}", fillcolor="{fillcolor}", style=filled, shape=box];') + lines.append("") + for src in sorted(subgraph_ids): + for tgt in adj.get(src, []): + if tgt in subgraph_ids: + lines.append(f' "{src}" -> "{tgt}";') + lines.append("}") + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser(description="Visualize the knowledge graph (ASCII terminal or DOT for Graphviz).") + parser.add_argument("--index", type=Path, default=Path(__file__).parent.parent / "knowledge" / "index.json", + help="Path to knowledge/index.json") + parser.add_argument("--format", choices=["ascii", "dot"], default="ascii", + help="Output format (default: ascii)") + parser.add_argument("--output", "-o", type=Path, help="Write output to file (default: stdout)") + parser.add_argument("--seed", help="Starting fact ID (comma-sep). Omit to render full graph.") + parser.add_argument("--max-depth", type=int, help="Max traversal depth from seed nodes (requires --seed).") + parser.add_argument("--filter-domain", help="Only include facts from this domain.") + parser.add_argument("--filter-category", help="Only include facts of this category.") + args = parser.parse_args() + + index = load_index(args.index) + facts = index.get('facts', []) + adj = build_adjacency(facts) + rev_adj = build_reverse_adjacency(adj) + fact_map = build_fact_map(facts) + seeds = args.seed.split(',') if args.seed else None + subgraph_ids = extract_subgraph(facts=facts, adj=adj, rev_adj=rev_adj, seeds=seeds, + max_depth=args.max_depth, + filter_domain=args.filter_domain, + filter_category=args.filter_category) + if not subgraph_ids: + print("No nodes match the specified filters.", file=sys.stderr) + sys.exit(1) + if args.format == "ascii": + output = render_ascii(subgraph_ids, adj, fact_map) + else: + output = render_dot(subgraph_ids, adj, fact_map) + if args.output: + args.output.write_text(output) + print(f"Written: {args.output}", file=sys.stderr) + else: + print(output) + + +if __name__ == "__main__": + main() diff --git a/scripts/test_graph_visualizer.py b/scripts/test_graph_visualizer.py new file mode 100755 index 0000000..e65a839 --- /dev/null +++ b/scripts/test_graph_visualizer.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +Tests for graph_visualizer.py — smoke test + subgraph logic. +Run: python3 scripts/test_graph_visualizer.py +""" + +import json, sys, tempfile +from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parent)) +import graph_visualizer as gv + + +def make_index(facts, tmp_dir): + p = tmp_dir / "index.json" + p.write_text(json.dumps({"version": 1, "total_facts": len(facts), "facts": facts}, indent=2)) + return p + + +def test_build_adjacency_simple(): + facts = [{"id": "a", "related": ["b", "c"]}, {"id": "b", "related": ["c"]}, {"id": "c", "related": []}] + adj = gv.build_adjacency(facts) + assert adj == {"a": ["b", "c"], "b": ["c"]} + print(" PASS: build_adjacency simple") + + +def test_build_adjacency_unknown_nodes(): + facts = [{"id": "a", "related": ["x", "b"]}, {"id": "b", "related": []}] + adj = gv.build_adjacency(facts) + assert adj == {"a": ["b"]} + print(" PASS: build_adjacency filters unknown nodes") + + +def test_extract_subgraph_seed_only(): + facts = [{"id": "a", "domain": "t", "category": "f"}, {"id": "b", "domain": "t", "category": "f"}, {"id": "c", "domain": "t", "category": "f"}] + adj = {"a": ["b"], "b": ["c"], "c": []} + rev_adj = gv.build_reverse_adjacency(adj) + sub = gv.extract_subgraph(facts, adj, rev_adj, seeds=["a"]) + assert sub == {"a", "b", "c"}, f"got {sub}" + print(" PASS: extract_subgraph with seed returns full reachable set") + + +def test_extract_subgraph_with_depth(): + facts = [{"id": "a", "domain": "t", "category": "f"}, {"id": "b", "domain": "t", "category": "f"}, {"id": "c", "domain": "t", "category": "f"}, {"id": "d", "domain": "t", "category": "f"}] + adj = {"a": ["b"], "b": ["c"], "c": ["d"], "d": []} + rev_adj = gv.build_reverse_adjacency(adj) + sub = gv.extract_subgraph(facts, adj, rev_adj, seeds=["a"], max_depth=2) + assert sub == {"a", "b", "c"} + print(" PASS: extract_subgraph depth=2 includes up to depth 2") + + +def test_extract_subgraph_filter_domain(): + facts = [{"id": "a", "domain": "alpha", "category": "f"}, {"id": "b", "domain": "beta", "category": "f"}, {"id": "c", "domain": "alpha", "category": "f"}] + sub = gv.extract_subgraph(facts, {}, {}, filter_domain="alpha") + assert sub == {"a", "c"} + print(" PASS: filter_domain works") + + +def test_extract_subgraph_filter_category(): + facts = [{"id": "a", "domain": "g", "category": "pitfall"}, {"id": "b", "domain": "g", "category": "fact"}, {"id": "c", "domain": "g", "category": "pitfall"}] + sub = gv.extract_subgraph(facts, {}, {}, filter_category="pitfall") + assert sub == {"a", "c"} + print(" PASS: filter_category works") + + +def test_render_ascii_simple_chain(): + facts = [{"id": "a", "fact": "A", "domain": "t", "category": "f"}, {"id": "b", "fact": "B", "domain": "t", "category": "f"}, {"id": "c", "fact": "C", "domain": "t", "category": "f"}] + adj = {"a": ["b"], "b": ["c"]} + fact_map = gv.build_fact_map(facts) + out = gv.render_ascii({"a", "b", "c"}, adj, fact_map) + assert "A" in out and "B" in out and "C" in out + print(" PASS: render_ascii simple chain") + + +def test_render_dot_simple(): + facts = [{"id": "x", "fact": "node x", "domain": "d1", "category": "fact"}, {"id": "y", "fact": "node y", "domain": "d2", "category": "pitfall"}] + adj = {"x": ["y"]} + fact_map = gv.build_fact_map(facts) + out = gv.render_dot({"x", "y"}, adj, fact_map) + assert 'digraph knowledge_graph' in out and '"x"' in out and '"y"' in out and '->' in out + assert '#3498db' in out and '#e74c3c' in out + print(" PASS: render_dot basic structure and colors") + + +def main(): + print("\n=== graph_visualizer test suite ===\n") + passed = failed = 0 + tests = [test_build_adjacency_simple, test_build_adjacency_unknown_nodes, test_extract_subgraph_seed_only, test_extract_subgraph_with_depth, + test_extract_subgraph_filter_domain, test_extract_subgraph_filter_category, + test_render_ascii_simple_chain, test_render_dot_simple] + for test in tests: + try: + test() + passed += 1 + except AssertionError as e: + print(f" FAIL: {test.__name__} — {e}") + failed += 1 + except Exception as e: + print(f" ERROR: {test.__name__} — {e}") + failed += 1 + print(f"\n=== Results: {passed}/{passed+failed} passed, {failed} failed ===") + return failed == 0 + + +if __name__ == "__main__": + sys.exit(0 if main() else 1) -- 2.43.0