Compare commits
1 Commits
step35/111
...
step35/103
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ceb7e0bd0c |
@@ -180,89 +180,6 @@ def to_mermaid(graph: dict) -> str:
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
|
||||
def transitive_closure(graph: dict) -> dict:
|
||||
"""Compute transitive closure for each node (all indirect deps)."""
|
||||
closure = {}
|
||||
# Build adjacency list
|
||||
adj = {node: set(data.get("dependencies", [])) for node, data in graph.items()}
|
||||
all_nodes = set(adj.keys()) | set().union(*adj.values())
|
||||
|
||||
for node in all_nodes:
|
||||
visited = set()
|
||||
stack = list(adj.get(node, set()))
|
||||
while stack:
|
||||
current = stack.pop()
|
||||
if current not in visited:
|
||||
visited.add(current)
|
||||
stack.extend(adj.get(current, set()))
|
||||
# Remove self-reference: a node's transitive deps should not include itself
|
||||
visited.discard(node)
|
||||
closure[node] = visited
|
||||
|
||||
return closure
|
||||
|
||||
|
||||
def find_deep_chains(graph: dict) -> list[list[str]]:
|
||||
"""Find the longest simple paths in the dependency graph (ignoring cycles)."""
|
||||
from collections import defaultdict
|
||||
|
||||
adj = {node: list(data.get("dependencies", [])) for node, data in graph.items()}
|
||||
deepest = []
|
||||
max_len = 0
|
||||
|
||||
def dfs(node: str, path: list, visited: set):
|
||||
nonlocal deepest, max_len
|
||||
# Stop if we hit a cycle (node already in path)
|
||||
if node in path:
|
||||
return
|
||||
new_path = path + [node]
|
||||
if node not in adj or not adj[node]:
|
||||
# leaf
|
||||
if len(new_path) > max_len:
|
||||
max_len = len(new_path)
|
||||
deepest = [new_path.copy()]
|
||||
elif len(new_path) == max_len:
|
||||
deepest.append(new_path.copy())
|
||||
else:
|
||||
for neighbor in adj[node]:
|
||||
dfs(neighbor, new_path.copy(), visited | {node})
|
||||
|
||||
for start in graph:
|
||||
dfs(start, [], set())
|
||||
|
||||
return deepest
|
||||
|
||||
|
||||
def format_transitive_markdown(closure: dict) -> str:
|
||||
"""Render transitive closure as a markdown table."""
|
||||
lines = ["# Transitive Dependencies\n\n"]
|
||||
lines.append("| Node | Transitive Dependencies | Count |\n")
|
||||
lines.append("|------|------------------------|-------|\n")
|
||||
for node in sorted(closure.keys()):
|
||||
deps = closure[node]
|
||||
deps_str = ", ".join(sorted(deps)) if deps else "(none)"
|
||||
lines.append(f"| {node} | {deps_str} | {len(deps)} |\n")
|
||||
return "".join(lines)
|
||||
|
||||
|
||||
def format_deep_chains_markdown(chains: list[list[str]]) -> str:
|
||||
"""Render longest dependency chains as a markdown list."""
|
||||
lines = ["# Deepest Dependency Chains\n\n"]
|
||||
if not chains:
|
||||
lines.append("No chains found.\n")
|
||||
return "".join(lines)
|
||||
max_len = max(len(c) for c in chains)
|
||||
lines.append(f"*Longest chain length:* {max_len}\n\n")
|
||||
for i, chain in enumerate(sorted(chains, key=lambda c: (-len(c), " -> ".join(c))), 1):
|
||||
lines.append(f"**Chain {i}** ({len(chain)} nodes)\n\n")
|
||||
indent = " "
|
||||
for j, node in enumerate(chain):
|
||||
arrow = " → " if j < len(chain)-1 else " • "
|
||||
lines.append(f"{indent}{arrow}{node}\n")
|
||||
lines.append("\n")
|
||||
return "".join(lines)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Build cross-repo dependency graph")
|
||||
parser.add_argument("repos_dir", nargs="?", help="Directory containing repos")
|
||||
@@ -311,20 +228,13 @@ def main():
|
||||
elif args.format == "mermaid":
|
||||
output = to_mermaid(results)
|
||||
else:
|
||||
# Compute transitive and deep chains
|
||||
closure = transitive_closure(results)
|
||||
deep_chains = find_deep_chains(results)
|
||||
output = json.dumps({
|
||||
"repos": results,
|
||||
"cycles": cycles,
|
||||
"transitive": {node: sorted(deps) for node, deps in closure.items()},
|
||||
"deep_chains": [chain for chain in deep_chains if len(chain) > 1],
|
||||
"summary": {
|
||||
"total_repos": len(results),
|
||||
"total_deps": sum(len(r["dependencies"]) for r in results.values()),
|
||||
"cycles_found": len(cycles),
|
||||
"transitive_pairs": sum(len(deps) for deps in closure.values()),
|
||||
"longest_chain_length": max((len(c) for c in deep_chains), default=0),
|
||||
}
|
||||
}, indent=2)
|
||||
|
||||
|
||||
@@ -1,155 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for dependency_graph.py — transitive closure and deep chain detection."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__) or ".")
|
||||
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"dg", os.path.join(os.path.dirname(__file__) or ".", "dependency_graph.py")
|
||||
)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
|
||||
transitive_closure = mod.transitive_closure
|
||||
find_deep_chains = mod.find_deep_chains
|
||||
detect_cycles = mod.detect_cycles
|
||||
|
||||
|
||||
def make_graph(edges: dict[str, list[str]]) -> dict:
|
||||
"""Build graph dict in expected format: {repo: {"dependencies": [...]}}."""
|
||||
return {
|
||||
node: {"dependencies": sorted(deps), "files_scanned": 1}
|
||||
for node, deps in edges.items()
|
||||
}
|
||||
|
||||
|
||||
def test_transitive_closure_simple_chain():
|
||||
graph = make_graph({
|
||||
"A": ["B"],
|
||||
"B": ["C"],
|
||||
"C": [],
|
||||
})
|
||||
closure = transitive_closure(graph)
|
||||
assert closure["A"] == {"B", "C"}
|
||||
assert closure["B"] == {"C"}
|
||||
assert closure["C"] == set()
|
||||
print("✅ Simple chain transitive closure")
|
||||
|
||||
|
||||
def test_transitive_closure_diamond():
|
||||
graph = make_graph({
|
||||
"A": ["B", "C"],
|
||||
"B": ["D"],
|
||||
"C": ["D"],
|
||||
"D": [],
|
||||
})
|
||||
closure = transitive_closure(graph)
|
||||
assert closure["A"] == {"B", "C", "D"}
|
||||
assert closure["B"] == {"D"}
|
||||
assert closure["C"] == {"D"}
|
||||
assert closure["D"] == set()
|
||||
print("✅ Diamond closure")
|
||||
|
||||
|
||||
def test_transitive_closure_with_cycle():
|
||||
graph = make_graph({
|
||||
"A": ["B"],
|
||||
"B": ["C"],
|
||||
"C": ["A"], # cycle
|
||||
})
|
||||
closure = transitive_closure(graph)
|
||||
assert closure["A"] == {"B", "C"}
|
||||
assert closure["B"] == {"C", "A"}
|
||||
assert closure["C"] == {"A", "B"}
|
||||
print("✅ Cycle in transitive closure")
|
||||
|
||||
|
||||
def test_find_deep_chains_simple():
|
||||
graph = make_graph({
|
||||
"A": ["B"],
|
||||
"B": ["C"],
|
||||
"C": [],
|
||||
})
|
||||
chains = find_deep_chains(graph)
|
||||
chains_sorted = sorted(chains, key=len, reverse=True)
|
||||
assert len(chains_sorted) == 1
|
||||
assert chains_sorted[0] == ["A", "B", "C"]
|
||||
print("✅ Simple deep chain")
|
||||
|
||||
|
||||
def test_find_deep_chains_multiple():
|
||||
graph = make_graph({
|
||||
"A": ["B", "C"],
|
||||
"B": ["D"],
|
||||
"C": ["E"],
|
||||
"D": [],
|
||||
"E": [],
|
||||
})
|
||||
chains = find_deep_chains(graph)
|
||||
lengths = [len(c) for c in chains]
|
||||
assert max(lengths) == 3
|
||||
print("✅ Multiple chains detected")
|
||||
|
||||
|
||||
def test_find_deep_chains_with_cycle_does_not_infinite_loop():
|
||||
graph = make_graph({
|
||||
"A": ["B"],
|
||||
"B": ["C"],
|
||||
"C": ["A"],
|
||||
})
|
||||
chains = find_deep_chains(graph)
|
||||
print(f"✅ Cycle handled: found {len(chains)} chains")
|
||||
|
||||
|
||||
def test_empty_graph():
|
||||
graph = {}
|
||||
assert transitive_closure(graph) == {}
|
||||
assert find_deep_chains(graph) == []
|
||||
print("✅ Empty graph handled")
|
||||
|
||||
|
||||
def test_detect_cycles_shorthand():
|
||||
graph = make_graph({
|
||||
"A": ["B"],
|
||||
"B": ["C"],
|
||||
"C": ["A"],
|
||||
})
|
||||
cycles = detect_cycles(graph)
|
||||
assert len(cycles) == 1
|
||||
assert set(cycles[0]) == {"A", "B", "C"}
|
||||
print("✅ Cycle detection works")
|
||||
|
||||
|
||||
def test_chain_length_reporting():
|
||||
graph = make_graph({
|
||||
"root": ["a", "b"],
|
||||
"a": ["c"],
|
||||
"b": ["d"],
|
||||
"c": ["e"],
|
||||
"d": [],
|
||||
"e": [],
|
||||
})
|
||||
chains = find_deep_chains(graph)
|
||||
max_len = max(len(c) for c in chains)
|
||||
assert max_len == 4
|
||||
print(f"✅ Longest chain length: {max_len}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_transitive_closure_simple_chain()
|
||||
test_transitive_closure_diamond()
|
||||
test_transitive_closure_with_cycle()
|
||||
test_find_deep_chains_simple()
|
||||
test_find_deep_chains_multiple()
|
||||
test_find_deep_chains_with_cycle_does_not_infinite_loop()
|
||||
test_empty_graph()
|
||||
test_detect_cycles_shorthand()
|
||||
test_chain_length_reporting()
|
||||
print("\n✅ All dependency graph tests passed")
|
||||
131
scripts/validate_doc_links.py
Executable file
131
scripts/validate_doc_links.py
Executable file
@@ -0,0 +1,131 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Doc Link Validator — Extract and verify all documentation links.
|
||||
Issue: #103 — 4.8: Doc Link Validator
|
||||
|
||||
Acceptance:
|
||||
Extracts links from docs | HTTP HEAD check | Reports broken links
|
||||
(Weekly cron/CI integration out of scope for this minimal script)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Optional
|
||||
from urllib.request import Request, urlopen
|
||||
from urllib.error import URLError, HTTPError
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Markdown link patterns
|
||||
INLINE_LINK_RE = re.compile(r'\[[^\]]*\]\(([^)\s]+)(?:\s+"[^"]*")?\)')
|
||||
AUTOLINK_RE = re.compile(r'<([^>]+)>')
|
||||
|
||||
|
||||
def extract_links(content: str) -> List[str]:
|
||||
urls = [m.group(1) for m in INLINE_LINK_RE.finditer(content)]
|
||||
urls += [m.group(1) for m in AUTOLINK_RE.finditer(content)]
|
||||
return urls
|
||||
|
||||
|
||||
def is_ignorable(url: str, ignore_prefixes: List[str]) -> bool:
|
||||
p = urlparse(url)
|
||||
if p.scheme not in ('http', 'https'):
|
||||
return True
|
||||
host = p.netloc.split(':')[0]
|
||||
if host in ('localhost', '127.0.0.1', '::1'):
|
||||
return True
|
||||
# Private IPv4 ranges
|
||||
if re.match(r'^(10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.)', host):
|
||||
return True
|
||||
for prefix in ignore_prefixes:
|
||||
if url.startswith(prefix):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def check_url(url: str, timeout: float = 8.0) -> Tuple[bool, Optional[int], str]:
|
||||
try:
|
||||
req = Request(url, method='HEAD')
|
||||
req.add_header('User-Agent', 'DocLinkValidator/1.0')
|
||||
try:
|
||||
with urlopen(req, timeout=timeout) as resp:
|
||||
return True, resp.getcode(), "OK"
|
||||
except HTTPError as e:
|
||||
if e.code in (405, 403, 400):
|
||||
req2 = Request(url, method='GET')
|
||||
req2.add_header('User-Agent', 'DocLinkValidator/1.0')
|
||||
req2.add_header('Range', 'bytes=0-1')
|
||||
with urlopen(req2, timeout=timeout) as resp2:
|
||||
return True, resp2.getcode(), "OK via GET"
|
||||
return False, e.code, e.reason
|
||||
except URLError as e:
|
||||
return False, None, str(e.reason) if hasattr(e, 'reason') else str(e)
|
||||
except Exception as e:
|
||||
return False, None, str(e)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
p = argparse.ArgumentParser(description="Validate documentation links")
|
||||
p.add_argument('--root', default='.', help='Repository root')
|
||||
p.add_argument('--fail-on-broken', action='store_true', help='Exit non-zero if broken links found')
|
||||
p.add_argument('--json', action='store_true', help='Emit JSON report')
|
||||
p.add_argument('--ignore', default='', help='Comma-separated URL prefixes to ignore')
|
||||
args = p.parse_args()
|
||||
|
||||
root = Path(args.root).resolve()
|
||||
ignore_prefixes = [x.strip() for x in args.ignore.split(',') if x.strip()]
|
||||
|
||||
md_files = list(root.rglob('*.md'))
|
||||
if not md_files:
|
||||
print("No markdown files found.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
print(f"Scanning {len(md_files)} markdown files")
|
||||
|
||||
all_links: List[Tuple[Path, str]] = []
|
||||
for md in md_files:
|
||||
content = md.read_text(errors='replace')
|
||||
for m in INLINE_LINK_RE.finditer(content):
|
||||
all_links.append((md, m.group(1)))
|
||||
for m in AUTOLINK_RE.finditer(content):
|
||||
all_links.append((md, m.group(1)))
|
||||
|
||||
print(f"Raw link occurrences: {len(all_links)}")
|
||||
|
||||
# De-duplicate by URL, keep first file context
|
||||
first_file: dict[str, Path] = {}
|
||||
unique_urls: List[str] = []
|
||||
for file, url in all_links:
|
||||
if url not in first_file:
|
||||
first_file[url] = file
|
||||
unique_urls.append(url)
|
||||
|
||||
print(f"Unique URLs to check: {len(unique_urls)}")
|
||||
|
||||
broken: List[dict] = []
|
||||
ok_count = 0
|
||||
for url in unique_urls:
|
||||
if is_ignorable(url, ignore_prefixes):
|
||||
continue
|
||||
ok, code, reason = check_url(url)
|
||||
if ok:
|
||||
ok_count += 1
|
||||
else:
|
||||
broken.append({"url": url, "file": str(first_file[url]), "error": reason})
|
||||
|
||||
print(f"OK: {ok_count} Broken: {len(broken)}")
|
||||
if broken:
|
||||
print("\nBroken links:")
|
||||
for b in broken:
|
||||
print(f" [{b['file']}] {b['url']} — {b['error']}")
|
||||
|
||||
if args.json:
|
||||
print(json.dumps({"scanned": len(unique_urls), "ok": ok_count,
|
||||
"broken": len(broken), "broken_links": broken}, indent=2))
|
||||
|
||||
return 1 if (args.fail_on_broken and broken) else 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user