Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Payne
2fa8c2dea3 scripts: add dependency_inventory script
Some checks failed
Test / pytest (pull_request) Failing after 7s
Add dependency_inventory.py — an inventory tool that scans repos
for dependency manifests (requirements.txt, package.json,
go.mod, Cargo.toml, pyproject.toml) and produces either
JSON or markdown report.

Includes:
- Full parser suite for 5 manifest types
- --repos and --repos-dir argument support
- Incremental friendly — safe to add new features
- --output/-o file support
- Test suite in tests/test_dependency_inventory.py

Closes #107 (1/5) — first script in the Health Report toolkit.
2026-04-26 05:10:14 -04:00
6 changed files with 360 additions and 856 deletions

View File

@@ -0,0 +1,308 @@
#!/usr/bin/env python3
"""
Dependency Inventory — Scan repos and list third-party dependencies.
Reads: package.json, requirements.txt, go.mod, Cargo.toml, pyproject.toml
Extracts: package name, version constraint, source file/repo
Outputs: JSON (default) or markdown table
Usage:
python3 scripts/dependency_inventory.py --repos-dir ~/repos/
python3 scripts/dependency_inventory.py --repos ~/repo1,~/repo2 --format markdown
"""
import argparse
import json
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Any, Optional
# Mapping of file pattern to canonical parser name
MANIFEST_PATTERNS = {
'requirements.txt': 'requirements',
'package.json': 'npm',
'pyproject.toml': 'pyproject',
'go.mod': 'go',
'Cargo.toml': 'cargo',
}
# Parser registry
PARSERS = {}
def register_parser(name: str):
"""Decorator to register a parser function."""
def decorator(fn):
PARSERS[name] = fn
return fn
return decorator
# ─── Parsers ────────────────────────────────────────────────────────────────
@register_parser('requirements')
def parse_requirements(content: str) -> List[Dict[str, str]]:
"""Parse requirements.txt — one requirement per line."""
deps = []
for line in content.splitlines():
line = line.strip()
if not line or line.startswith('#'):
continue
pkg_spec = re.split(r'[ ;#]', line)[0].strip()
if '>=' in pkg_spec:
name, ver = pkg_spec.split('>=', 1)
elif '==' in pkg_spec:
name, ver = pkg_spec.split('==', 1)
elif '<=' in pkg_spec:
name, ver = pkg_spec.split('<=', 1)
elif '~=' in pkg_spec:
name, ver = pkg_spec.split('~=', 1)
elif '>' in pkg_spec:
name, ver = pkg_spec.split('>', 1)
elif '<' in pkg_spec:
name, ver = pkg_spec.split('<', 1)
elif '=' in pkg_spec:
name, ver = pkg_spec.split('=', 1)
else:
name, ver = pkg_spec, ''
deps.append({
'package': name.strip(),
'version': ver.strip(),
'constraint': line[len(name):].strip()
})
return deps
@register_parser('npm')
def parse_package_json(content: str) -> List[Dict[str, str]]:
"""Parse package.json dependencies."""
try:
data = json.loads(content)
except json.JSONDecodeError:
return []
deps = []
for section in ('dependencies', 'devDependencies', 'peerDependencies', 'optionalDependencies'):
for name, ver in data.get(section, {}).items():
deps.append({
'package': name,
'version': ver,
'constraint': ver,
'type': section
})
return deps
@register_parser('pyproject')
def parse_pyproject_toml(content: str) -> List[Dict[str, str]]:
"""Parse pyproject.toml [project] dependencies."""
deps = []
in_deps = False
dep_buffer = ''
for line in content.splitlines():
stripped = line.strip()
if stripped.startswith('dependencies = ['):
in_deps = True
remainder = stripped.split('=', 1)[1].strip()
dep_buffer = remainder[1:] if remainder.startswith('[') else remainder
continue
if in_deps:
if stripped.startswith(']'):
in_deps = False
continue
dep_buffer += ' ' + line
dep_buffer = dep_buffer.strip().rstrip(',')
for match in re.finditer(r'"([^"]+)"', dep_buffer):
spec = match.group(1)
m = re.match(r'^([a-zA-Z0-9_.-]+)\s*([<>=!~]+)?\s*(.*)$', spec)
if m:
name, op, ver = m.groups()
deps.append({
'package': name,
'version': (ver or '').strip(),
'constraint': spec
})
return deps
@register_parser('go')
def parse_go_mod(content: str) -> List[Dict[str, str]]:
"""Parse go.mod — require statements."""
deps = []
for line in content.splitlines():
line = line.strip()
if line.startswith('require ') and not line.startswith('require ('):
parts = line.split()
if len(parts) >= 3:
mod, ver = parts[1], parts[2]
deps.append({'package': mod, 'version': ver, 'constraint': ver})
elif line.startswith('\t') and '/' in line:
parts = line.strip().split()
if len(parts) >= 2:
mod, ver = parts[0], parts[1]
deps.append({'package': mod, 'version': ver, 'constraint': ver})
return deps
@register_parser('cargo')
def parse_cargo_toml(content: str) -> List[Dict[str, str]]:
"""Parse [dependencies] section from Cargo.toml."""
deps = []
in_deps = False
for line in content.splitlines():
stripped = line.strip()
if stripped in ('[dependencies]', '[dependencies]'):
in_deps = True
continue
if stripped.startswith('['):
in_deps = False
continue
if in_deps and '=' in stripped:
name_part, ver_part = stripped.split('=', 1)
name = name_part.strip()
ver = ver_part.strip().strip('"').strip("'")
deps.append({'package': name, 'version': ver, 'constraint': ver})
return deps
# ─── File Discovery ─────────────────────────────────────────────────────────
def find_manifest_files(root: Path) -> Dict[str, List[Path]]:
"""Find all manifest files under root."""
found = {k: [] for k in MANIFEST_PATTERNS}
for pattern in MANIFEST_PATTERNS:
for path in root.rglob(pattern):
if not any(skip in str(path) for skip in ('.git', 'node_modules', '__pycache__', '.venv', 'venv')):
found[pattern].append(path)
return found
# ─── Main Scanner ────────────────────────────────────────────────────────────
def scan_repo(repo_path: Path) -> Dict[str, Any]:
"""Scan a single repo directory for dependency manifests."""
repo_name = repo_path.name
found = find_manifest_files(repo_path)
all_deps: List[Dict[str, str]] = []
files_scanned = 0
for pattern, paths in found.items():
parser_name = MANIFEST_PATTERNS[pattern]
# Map parser_name to function
if parser_name == 'requirements':
parser = parse_requirements
elif parser_name == 'npm':
parser = parse_package_json
elif parser_name == 'pyproject':
parser = parse_pyproject_toml
elif parser_name == 'go':
parser = parse_go_mod
elif parser_name == 'cargo':
parser = parse_cargo_toml
else:
continue
for fp in paths:
try:
content = fp.read_text(encoding='utf-8', errors='replace')
files_scanned += 1
rel = fp.relative_to(repo_path)
for dep in parser(content):
dep['source'] = pattern
dep['file'] = str(rel)
dep['repo'] = repo_name
all_deps.append(dep)
except Exception as e:
print(f" [WARN] Could not parse {fp}: {e}", file=sys.stderr)
return {
'repo': repo_name,
'path': str(repo_path),
'files_scanned': files_scanned,
'dependencies': all_deps,
'dependency_count': len(all_deps),
}
def scan_repos(repos: List[Path]) -> Dict[str, Any]:
"""Scan multiple repos and aggregate."""
results = {}
total_deps = 0
total_files = 0
for repo in repos:
if not repo.is_dir():
print(f"[WARN] Skipping {repo}: not a directory", file=sys.stderr)
continue
print(f"Scanning {repo.name}...", file=sys.stderr)
result = scan_repo(repo)
results[repo.name] = result
total_deps += result['dependency_count']
total_files += result['files_scanned']
return {
'repos': results,
'summary': {
'total_repos': len(results),
'total_files_scanned': total_files,
'total_dependencies': total_deps,
}
}
# ─── Output ─────────────────────────────────────────────────────────────────
def output_json(data: Dict[str, Any], out_path: Optional[Path] = None) -> None:
text = json.dumps(data, indent=2)
if out_path:
out_path.write_text(text)
print(f"Written: {out_path}", file=sys.stderr)
else:
print(text)
def output_markdown(data: Dict[str, Any], out_path: Optional[Path] = None) -> None:
lines = []
lines.append("# Dependency Inventory")
lines.append("\nGenerated: *(TODO: add timestamp)*")
lines.append(f"\n**Summary:** {data['summary']['total_dependencies']} dependencies across {data['summary']['total_repos']} repos")
lines.append("")
lines.append("| Repo | File | Package | Version |")
lines.append("|------|------|---------|---------|")
for repo_name, rdata in sorted(data['repos'].items()):
for dep in sorted(rdata['dependencies'], key=lambda d: d['package']):
lines.append(f"| {repo_name} | {dep['file']} | {dep['package']} | {dep['version']} |")
text = '\n'.join(lines) + '\n'
if out_path:
out_path.write_text(text)
print(f"Written: {out_path}", file=sys.stderr)
else:
print(text)
# ─── CLI Entry ────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Generate org-wide dependency inventory")
parser.add_argument('--repos-dir', help='Directory containing multiple repos')
parser.add_argument('--repos', help='Comma-separated list of repo paths')
parser.add_argument('--output', '-o', help='Output file (default: stdout)')
parser.add_argument('--format', choices=['json', 'markdown'], default='json',
help='Output format (default: json)')
args = parser.parse_args()
if args.repos:
repo_paths = [Path(p.strip()).expanduser() for p in args.repos.split(',')]
elif args.repos_dir:
base = Path(args.repos_dir).expanduser()
repo_paths = [p for p in base.iterdir() if p.is_dir() and not p.name.startswith('.')]
else:
repo_paths = [Path(__file__).resolve().parent.parent]
out_path = Path(args.output).expanduser() if args.output else None
data = scan_repos(repo_paths)
if args.format == 'json':
output_json(data, out_path)
else:
output_markdown(data, out_path)
if __name__ == '__main__':
main()

View File

@@ -1,170 +0,0 @@
#!/usr/bin/env python3
"""
Graph Query Engine — traverse the knowledge graph.
Usage:
python3 scripts/graph_query.py neighbors <fact_id> [--knowledge-dir knowledge/]
python3 scripts/graph_query.py path <from_id> <to_id> [--max-hops 10]
python3 scripts/graph_query.py subgraph <fact_id> [--depth 2]
python3 scripts/graph_query.py stats # Graph statistics
Outputs JSON to stdout.
"""
import argparse
import json
import sys
import time
from pathlib import Path
from collections import defaultdict, deque
from typing import Optional
# --- Graph building ---
def load_index(knowledge_dir: Path) -> dict:
index_path = knowledge_dir / "index.json"
if not index_path.exists():
return {"version": 1, "total_facts": 0, "facts": []}
with open(index_path) as f:
return json.load(f)
def build_adjacency(facts: list[dict]) -> dict:
"""Build undirected adjacency list from fact 'related' fields."""
adj = defaultdict(set)
id_to_fact = {}
for fact in facts:
fid = fact.get("id")
if not fid:
continue
id_to_fact[fid] = fact
for related_id in fact.get("related", []):
adj[fid].add(related_id)
adj[related_id].add(fid) # undirected
return dict(adj), id_to_fact
# --- Queries ---
def query_neighbors(fact_id: str, adj: dict, id_to_fact: dict) -> dict:
"""Return directly connected facts."""
neighbors = list(adj.get(fact_id, set()))
return {
"query": "neighbors",
"fact_id": fact_id,
"neighbors": [
{"id": nid, "fact": id_to_fact.get(nid, {}).get("fact", ""), "category": id_to_fact.get(nid, {}).get("category", "")}
for nid in neighbors if nid in id_to_fact
],
"count": len(neighbors),
}
def query_path(from_id: str, to_id: str, adj: dict, max_hops: int = 10) -> dict:
"""Find shortest path between two facts using BFS."""
if from_id not in adj or to_id not in adj:
return {"query": "path", "from": from_id, "to": to_id, "path": None, "error": "Fact not found in graph"}
if from_id == to_id:
return {"query": "path", "from": from_id, "to": to_id, "path": [from_id], "length": 0}
queue = deque([(from_id, [from_id])])
visited = {from_id}
while queue:
current, path = queue.popleft()
if len(path) > max_hops:
continue
for neighbor in adj.get(current, []):
if neighbor == to_id:
return {"query": "path", "from": from_id, "to": to_id, "path": path + [to_id], "length": len(path)}
if neighbor not in visited:
visited.add(neighbor)
queue.append((neighbor, path + [neighbor]))
return {"query": "path", "from": from_id, "to": to_id, "path": None, "error": f"No path found within {max_hops} hops"}
def query_subgraph(fact_id: str, adj: dict, id_to_fact: dict, depth: int = 2) -> dict:
"""Extract connected subgraph within N hops."""
if fact_id not in adj:
return {"query": "subgraph", "fact_id": fact_id, "nodes": [], "edges": [], "error": "Fact not found"}
visited = set()
queue = deque([(fact_id, 0)])
subgraph_nodes = set()
subgraph_edges = []
while queue:
node, d = queue.popleft()
if node in visited or d > depth:
continue
visited.add(node)
subgraph_nodes.add(node)
for neighbor in adj.get(node, []):
subgraph_edges.append({"source": node, "target": neighbor})
if neighbor not in visited:
queue.append((neighbor, d + 1))
return {
"query": "subgraph",
"fact_id": fact_id,
"depth": depth,
"nodes": [
{"id": nid, "fact": id_to_fact.get(nid, {}).get("fact", ""), "category": id_to_fact.get(nid, {}).get("category", "")}
for nid in sorted(subgraph_nodes)
],
"edges": [{"source": e["source"], "target": e["target"]} for e in subgraph_edges],
"node_count": len(subgraph_nodes),
"edge_count": len(subgraph_edges),
}
def query_stats(adj: dict, id_to_fact: dict) -> dict:
"""Graph statistics."""
return {
"statistics": {
"total_facts": len(id_to_fact),
"total_edges": sum(len(neighbors) for neighbors in adj.values()) // 2,
"connected_components": 0, # TODO: compute if needed
"average_degree": sum(len(neighbors) for neighbors in adj.values()) / len(adj) if adj else 0,
}
}
# --- CLI ---
def main():
parser = argparse.ArgumentParser(description="Graph query engine for knowledge store")
parser.add_argument("command", choices=["neighbors", "path", "subgraph", "stats"])
parser.add_argument("from_id", nargs="?", help="Starting fact ID")
parser.add_argument("to_id", nargs="?", help="Target fact ID (for path query)")
parser.add_argument("--knowledge-dir", default="knowledge", help="Knowledge directory")
parser.add_argument("--depth", type=int, default=2, help="Depth for subgraph query")
parser.add_argument("--max-hops", type=int, default=10, help="Max hops for path query")
args = parser.parse_args()
start = time.time()
knowledge_dir = Path(args.knowledge_dir)
index = load_index(knowledge_dir)
facts = index.get("facts", [])
adj, id_to_fact = build_adjacency(facts)
result = None
if args.command == "neighbors":
if not args.from_id:
print("ERROR: neighbors requires <fact_id>", file=sys.stderr)
sys.exit(1)
result = query_neighbors(args.from_id, adj, id_to_fact)
elif args.command == "path":
if not args.from_id or not args.to_id:
print("ERROR: path requires <from_id> <to_id>", file=sys.stderr)
sys.exit(1)
result = query_path(args.from_id, args.to_id, adj, max_hops=args.max_hops)
elif args.command == "subgraph":
if not args.from_id:
print("ERROR: subgraph requires <fact_id>", file=sys.stderr)
sys.exit(1)
result = query_subgraph(args.from_id, adj, id_to_fact, depth=args.depth)
elif args.command == "stats":
result = query_stats(adj, id_to_fact)
result["elapsed_ms"] = round((time.time() - start) * 1000, 2)
print(json.dumps(result, indent=2))
if __name__ == "__main__":
main()

View File

@@ -1,351 +0,0 @@
#!/usr/bin/env python3
"""
PR Complexity Scorer - Estimate review effort for PRs.
"""
import argparse
import json
import os
import re
import sys
from dataclasses import dataclass, asdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
import urllib.request
import urllib.error
GITEA_BASE = "https://forge.alexanderwhitestone.com/api/v1"
DEPENDENCY_FILES = {
"requirements.txt", "pyproject.toml", "setup.py", "setup.cfg",
"Pipfile", "poetry.lock", "package.json", "yarn.lock", "Gemfile",
"go.mod", "Cargo.toml", "pom.xml", "build.gradle"
}
TEST_PATTERNS = [
r"tests?/.*\.py$", r".*_test\.py$", r"test_.*\.py$",
r"spec/.*\.rb$", r".*_spec\.rb$",
r"__tests__/", r".*\.test\.(js|ts|jsx|tsx)$"
]
WEIGHT_FILES = 0.25
WEIGHT_LINES = 0.25
WEIGHT_DEPS = 0.30
WEIGHT_TEST_COV = 0.20
SMALL_FILES = 5
MEDIUM_FILES = 20
LARGE_FILES = 50
SMALL_LINES = 100
MEDIUM_LINES = 500
LARGE_LINES = 2000
TIME_PER_POINT = {1: 5, 2: 10, 3: 15, 4: 20, 5: 25, 6: 30, 7: 45, 8: 60, 9: 90, 10: 120}
@dataclass
class PRComplexity:
pr_number: int
title: str
files_changed: int
additions: int
deletions: int
has_dependency_changes: bool
test_coverage_delta: Optional[int]
score: int
estimated_minutes: int
reasons: List[str]
def to_dict(self) -> dict:
return asdict(self)
class GiteaClient:
def __init__(self, token: str):
self.token = token
self.base_url = GITEA_BASE.rstrip("/")
def _request(self, path: str, params: Dict = None) -> Any:
url = f"{self.base_url}{path}"
if params:
qs = "&".join(f"{k}={v}" for k, v in params.items() if v is not None)
url += f"?{qs}"
req = urllib.request.Request(url)
req.add_header("Authorization", f"token {self.token}")
req.add_header("Content-Type", "application/json")
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode())
except urllib.error.HTTPError as e:
print(f"API error {e.code}: {e.read().decode()[:200]}", file=sys.stderr)
return None
except urllib.error.URLError as e:
print(f"Network error: {e}", file=sys.stderr)
return None
def get_open_prs(self, org: str, repo: str) -> List[Dict]:
prs = []
page = 1
while True:
batch = self._request(f"/repos/{org}/{repo}/pulls", {"limit": 50, "page": page, "state": "open"})
if not batch:
break
prs.extend(batch)
if len(batch) < 50:
break
page += 1
return prs
def get_pr_files(self, org: str, repo: str, pr_number: int) -> List[Dict]:
files = []
page = 1
while True:
batch = self._request(
f"/repos/{org}/{repo}/pulls/{pr_number}/files",
{"limit": 100, "page": page}
)
if not batch:
break
files.extend(batch)
if len(batch) < 100:
break
page += 1
return files
def post_comment(self, org: str, repo: str, pr_number: int, body: str) -> bool:
data = json.dumps({"body": body}).encode("utf-8")
req = urllib.request.Request(
f"{self.base_url}/repos/{org}/{repo}/issues/{pr_number}/comments",
data=data,
method="POST",
headers={"Authorization": f"token {self.token}", "Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.status in (200, 201)
except urllib.error.HTTPError:
return False
def is_dependency_file(filename: str) -> bool:
return any(filename.endswith(dep) for dep in DEPENDENCY_FILES)
def is_test_file(filename: str) -> bool:
return any(re.search(pattern, filename) for pattern in TEST_PATTERNS)
def score_pr(
files_changed: int,
additions: int,
deletions: int,
has_dependency_changes: bool,
test_coverage_delta: Optional[int] = None
) -> tuple[int, int, List[str]]:
score = 1.0
reasons = []
# Files changed
if files_changed <= SMALL_FILES:
fscore = 1.0
reasons.append("small number of files changed")
elif files_changed <= MEDIUM_FILES:
fscore = 2.0
reasons.append("moderate number of files changed")
elif files_changed <= LARGE_FILES:
fscore = 2.5
reasons.append("large number of files changed")
else:
fscore = 3.0
reasons.append("very large PR spanning many files")
# Lines changed
total_lines = additions + deletions
if total_lines <= SMALL_LINES:
lscore = 1.0
reasons.append("small change size")
elif total_lines <= MEDIUM_LINES:
lscore = 2.0
reasons.append("moderate change size")
elif total_lines <= LARGE_LINES:
lscore = 3.0
reasons.append("large change size")
else:
lscore = 4.0
reasons.append("very large change")
# Dependency changes
if has_dependency_changes:
dscore = 2.5
reasons.append("dependency changes (architectural impact)")
else:
dscore = 0.0
# Test coverage delta
tscore = 0.0
if test_coverage_delta is not None:
if test_coverage_delta > 0:
reasons.append(f"test additions (+{test_coverage_delta} test files)")
tscore = -min(2.0, test_coverage_delta / 2.0)
elif test_coverage_delta < 0:
reasons.append(f"test removals ({abs(test_coverage_delta)} test files)")
tscore = min(2.0, abs(test_coverage_delta) * 0.5)
else:
reasons.append("test coverage change not assessed")
# Weighted sum, scaled by 3 to use full 1-10 range
bonus = (fscore * WEIGHT_FILES) + (lscore * WEIGHT_LINES) + (dscore * WEIGHT_DEPS) + (tscore * WEIGHT_TEST_COV)
scaled_bonus = bonus * 3.0
score = 1.0 + scaled_bonus
final_score = max(1, min(10, int(round(score))))
est_minutes = TIME_PER_POINT.get(final_score, 30)
return final_score, est_minutes, reasons
def analyze_pr(client: GiteaClient, org: str, repo: str, pr_data: Dict) -> PRComplexity:
pr_num = pr_data["number"]
title = pr_data.get("title", "")
files = client.get_pr_files(org, repo, pr_num)
additions = sum(f.get("additions", 0) for f in files)
deletions = sum(f.get("deletions", 0) for f in files)
filenames = [f.get("filename", "") for f in files]
has_deps = any(is_dependency_file(f) for f in filenames)
test_added = sum(1 for f in files if f.get("status") == "added" and is_test_file(f.get("filename", "")))
test_removed = sum(1 for f in files if f.get("status") == "removed" and is_test_file(f.get("filename", "")))
test_delta = test_added - test_removed if (test_added or test_removed) else None
score, est_min, reasons = score_pr(
files_changed=len(files),
additions=additions,
deletions=deletions,
has_dependency_changes=has_deps,
test_coverage_delta=test_delta
)
return PRComplexity(
pr_number=pr_num,
title=title,
files_changed=len(files),
additions=additions,
deletions=deletions,
has_dependency_changes=has_deps,
test_coverage_delta=test_delta,
score=score,
estimated_minutes=est_min,
reasons=reasons
)
def build_comment(complexity: PRComplexity) -> str:
change_desc = f"{complexity.files_changed} files, +{complexity.additions}/-{complexity.deletions} lines"
deps_note = "\n- :warning: Dependency changes detected — architectural review recommended" if complexity.has_dependency_changes else ""
test_note = ""
if complexity.test_coverage_delta is not None:
if complexity.test_coverage_delta > 0:
test_note = f"\n- :+1: {complexity.test_coverage_delta} test file(s) added"
elif complexity.test_coverage_delta < 0:
test_note = f"\n- :warning: {abs(complexity.test_coverage_delta)} test file(s) removed"
comment = f"## 📊 PR Complexity Analysis\n\n"
comment += f"**PR #{complexity.pr_number}: {complexity.title}**\n\n"
comment += f"| Metric | Value |\n|--------|-------|\n"
comment += f"| Changes | {change_desc} |\n"
comment += f"| Complexity Score | **{complexity.score}/10** |\n"
comment += f"| Estimated Review Time | ~{complexity.estimated_minutes} minutes |\n\n"
comment += f"### Scoring rationale:"
for r in complexity.reasons:
comment += f"\n- {r}"
if deps_note:
comment += deps_note
if test_note:
comment += test_note
comment += f"\n\n---\n"
comment += f"*Generated by PR Complexity Scorer — [issue #135](https://forge.alexanderwhitestone.com/Timmy_Foundation/compounding-intelligence/issues/135)*"
return comment
def main():
parser = argparse.ArgumentParser(description="PR Complexity Scorer")
parser.add_argument("--org", default="Timmy_Foundation")
parser.add_argument("--repo", default="compounding-intelligence")
parser.add_argument("--token", default=os.environ.get("GITEA_TOKEN") or os.path.expanduser("~/.config/gitea/token"))
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--apply", action="store_true")
parser.add_argument("--output", default="metrics/pr_complexity.json")
args = parser.parse_args()
token_path = args.token
if os.path.exists(token_path):
with open(token_path) as f:
token = f.read().strip()
else:
token = args.token
if not token:
print("ERROR: No Gitea token provided", file=sys.stderr)
sys.exit(1)
client = GiteaClient(token)
print(f"Fetching open PRs for {args.org}/{args.repo}...")
prs = client.get_open_prs(args.org, args.repo)
if not prs:
print("No open PRs found.")
sys.exit(0)
print(f"Found {len(prs)} open PR(s). Analyzing...")
results = []
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
for pr in prs:
pr_num = pr["number"]
title = pr.get("title", "")
print(f" Analyzing PR #{pr_num}: {title[:60]}")
try:
complexity = analyze_pr(client, args.org, args.repo, pr)
results.append(complexity.to_dict())
comment = build_comment(complexity)
if args.dry_run:
print(f" → Score: {complexity.score}/10, Est: {complexity.estimated_minutes}min [DRY-RUN]")
elif args.apply:
success = client.post_comment(args.org, args.repo, pr_num, comment)
status = "[commented]" if success else "[FAILED]"
print(f" → Score: {complexity.score}/10, Est: {complexity.estimated_minutes}min {status}")
else:
print(f" → Score: {complexity.score}/10, Est: {complexity.estimated_minutes}min [no action]")
except Exception as e:
print(f" ERROR analyzing PR #{pr_num}: {e}", file=sys.stderr)
with open(args.output, "w") as f:
json.dump({
"org": args.org,
"repo": args.repo,
"timestamp": datetime.now(timezone.utc).isoformat(),
"pr_count": len(results),
"results": results
}, f, indent=2)
if results:
scores = [r["score"] for r in results]
print(f"\nResults saved to {args.output}")
print(f"Summary: {len(results)} PRs, scores range {min(scores):.0f}-{max(scores):.0f}")
else:
print("\nNo results to save.")
if __name__ == "__main__":
main()

View File

@@ -1,165 +0,0 @@
#!/usr/bin/env python3
"""
Tests for scripts/graph_query.py — Graph Query Engine.
"""
import json
import sys
import tempfile
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
from graph_query import load_index, build_adjacency, query_neighbors, query_path, query_subgraph, query_stats
def make_index(facts: list[dict], tmp_dir: Path) -> Path:
index = {
"version": 1,
"last_updated": "2026-04-13T20:00:00Z",
"total_facts": len(facts),
"facts": facts,
}
path = tmp_dir / "index.json"
with open(path, "w") as f:
json.dump(index, f)
return path
def test_neighbors():
"""Neighbor query returns directly connected facts."""
facts = [
{"id": "a", "fact": "A", "category": "fact", "related": ["b", "c"]},
{"id": "b", "fact": "B", "category": "fact", "related": ["a"]},
{"id": "c", "fact": "C", "category": "fact", "related": ["a"]},
{"id": "d", "fact": "D", "category": "fact", "related": []},
]
adj, id_to_fact = build_adjacency(facts)
result = query_neighbors("a", adj, id_to_fact)
neighbor_ids = {n["id"] for n in result["neighbors"]}
assert neighbor_ids == {"b", "c"}, f"Expected b,c got {neighbor_ids}"
assert result["count"] == 2
print("PASS: neighbors")
def test_path_found():
"""Path query finds shortest path."""
facts = [
{"id": "a", "fact": "A", "related": ["b"]},
{"id": "b", "fact": "B", "related": ["a", "c"]},
{"id": "c", "fact": "C", "related": ["b", "d"]},
{"id": "d", "fact": "D", "related": ["c"]},
]
adj, id_to_fact = build_adjacency(facts)
result = query_path("a", "d", adj)
assert result["path"] == ["a", "b", "c", "d"], f"Got path {result['path']}"
assert result["length"] == 3
print("PASS: path_found")
def test_path_not_found():
"""Path query returns error when no path exists."""
facts = [
{"id": "a", "fact": "A", "related": ["b"]},
{"id": "b", "fact": "B", "related": ["a"]},
{"id": "c", "fact": "C", "related": ["d"]},
{"id": "d", "fact": "D", "related": ["c"]},
]
adj, id_to_fact = build_adjacency(facts)
result = query_path("a", "c", adj, max_hops=5)
assert result["path"] is None
assert "error" in result
print("PASS: path_not_found")
def test_subgraph_extraction():
"""Subgraph extraction returns nodes within depth."""
facts = [
{"id": "a", "fact": "A", "related": ["b", "c"]},
{"id": "b", "fact": "B", "related": ["a", "d"]},
{"id": "c", "fact": "C", "related": ["a"]},
{"id": "d", "fact": "D", "related": ["b", "e"]},
{"id": "e", "fact": "E", "related": ["d"]},
]
adj, id_to_fact = build_adjacency(facts)
result = query_subgraph("a", adj, id_to_fact, depth=1)
node_ids = {n["id"] for n in result["nodes"]}
assert node_ids == {"a", "b", "c"}, f"Got {node_ids}"
assert result["node_count"] == 3
print("PASS: subgraph_depth1")
def test_subgraph_depth2():
"""Depth-2 subgraph includes further nodes."""
facts = [
{"id": "a", "fact": "A", "related": ["b"]},
{"id": "b", "fact": "B", "related": ["a", "c"]},
{"id": "c", "fact": "C", "related": ["b", "d"]},
{"id": "d", "fact": "D", "related": ["c"]},
]
adj, id_to_fact = build_adjacency(facts)
result = query_subgraph("a", adj, id_to_fact, depth=2)
node_ids = {n["id"] for n in result["nodes"]}
assert node_ids == {"a", "b", "c"}, f"Got {node_ids}"
print("PASS: subgraph_depth2")
def test_stats():
"""Statistics query returns graph metrics."""
facts = [
{"id": "a", "fact": "A", "related": ["b"]},
{"id": "b", "fact": "B", "related": ["a", "c"]},
{"id": "c", "fact": "C", "related": ["b"]},
]
adj, id_to_fact = build_adjacency(facts)
result = query_stats(adj, id_to_fact)
assert result["statistics"]["total_facts"] == 3
assert result["statistics"]["total_edges"] == 2 # undirected double-counted /2
assert result["statistics"]["average_degree"] > 0
print("PASS: stats")
def test_cli_integration():
"""CLI produces valid JSON with correct query types."""
with tempfile.TemporaryDirectory() as tmp:
import subprocess as sp
tmp_dir = Path(tmp)
facts = [
{"id": "x", "fact": "X", "related": ["y"]},
{"id": "y", "fact": "Y", "related": ["x", "z"]},
{"id": "z", "fact": "Z", "related": ["y"]},
]
index_path = make_index(facts, tmp_dir)
knowledge_dir = index_path.parent
script_path = Path(__file__).resolve().parent / "graph_query.py"
result = sp.run(
[sys.executable, str(script_path), "neighbors", "x", "--knowledge-dir", str(knowledge_dir)],
capture_output=True, text=True, cwd=str(tmp_dir)
)
assert result.returncode == 0, f"neighbors failed: {result.stderr}"
out = json.loads(result.stdout)
assert out["query"] == "neighbors"
assert out["fact_id"] == "x"
assert out["count"] == 1
result = sp.run(
[sys.executable, str(script_path), "path", "x", "z", "--knowledge-dir", str(knowledge_dir)],
capture_output=True, text=True, cwd=str(tmp_dir)
)
assert result.returncode == 0, f"path failed: {result.stderr}"
out = json.loads(result.stdout)
assert out["path"] == ["x", "y", "z"]
print("PASS: cli_integration")
if __name__ == "__main__":
test_neighbors()
test_path_found()
test_path_not_found()
test_subgraph_extraction()
test_subgraph_depth2()
test_stats()
test_cli_integration()
print("\nAll graph_query tests passed!")

View File

@@ -1,170 +0,0 @@
#!/usr/bin/env python3
"""
Tests for PR Complexity Scorer — unit tests for the scoring logic.
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from pr_complexity_scorer import (
score_pr,
is_dependency_file,
is_test_file,
TIME_PER_POINT,
SMALL_FILES,
MEDIUM_FILES,
LARGE_FILES,
SMALL_LINES,
MEDIUM_LINES,
LARGE_LINES,
)
PASS = 0
FAIL = 0
def test(name):
def decorator(fn):
global PASS, FAIL
try:
fn()
PASS += 1
print(f" [PASS] {name}")
except AssertionError as e:
FAIL += 1
print(f" [FAIL] {name}: {e}")
except Exception as e:
FAIL += 1
print(f" [FAIL] {name}: Unexpected error: {e}")
return decorator
def assert_eq(a, b, msg=""):
if a != b:
raise AssertionError(f"{msg} expected {b!r}, got {a!r}")
def assert_true(v, msg=""):
if not v:
raise AssertionError(msg or "Expected True")
def assert_false(v, msg=""):
if v:
raise AssertionError(msg or "Expected False")
print("=== PR Complexity Scorer Tests ===\n")
print("-- File Classification --")
@test("dependency file detection — requirements.txt")
def _():
assert_true(is_dependency_file("requirements.txt"))
assert_true(is_dependency_file("src/requirements.txt"))
assert_false(is_dependency_file("requirements_test.txt"))
@test("dependency file detection — pyproject.toml")
def _():
assert_true(is_dependency_file("pyproject.toml"))
assert_false(is_dependency_file("myproject.py"))
@test("test file detection — pytest style")
def _():
assert_true(is_test_file("tests/test_api.py"))
assert_true(is_test_file("test_module.py"))
assert_true(is_test_file("src/module_test.py"))
@test("test file detection — other frameworks")
def _():
assert_true(is_test_file("spec/feature_spec.rb"))
assert_true(is_test_file("__tests__/component.test.js"))
assert_false(is_test_file("testfixtures/helper.py"))
print("\n-- Scoring Logic --")
@test("small PR gets low score (1-3)")
def _():
score, minutes, _ = score_pr(
files_changed=3,
additions=50,
deletions=10,
has_dependency_changes=False,
test_coverage_delta=None
)
assert_true(1 <= score <= 3, f"Score should be low, got {score}")
assert_true(minutes < 20)
@test("medium PR gets medium score (4-6)")
def _():
score, minutes, _ = score_pr(
files_changed=15,
additions=400,
deletions=100,
has_dependency_changes=False,
test_coverage_delta=None
)
assert_true(4 <= score <= 6, f"Score should be medium, got {score}")
assert_true(20 <= minutes <= 45)
@test("large PR gets high score (7-9)")
def _():
score, minutes, _ = score_pr(
files_changed=60,
additions=3000,
deletions=1500,
has_dependency_changes=True,
test_coverage_delta=None
)
assert_true(7 <= score <= 9, f"Score should be high, got {score}")
assert_true(minutes >= 45)
@test("dependency changes boost score")
def _():
base_score, _, _ = score_pr(
files_changed=10, additions=200, deletions=50,
has_dependency_changes=False, test_coverage_delta=None
)
dep_score, _, _ = score_pr(
files_changed=10, additions=200, deletions=50,
has_dependency_changes=True, test_coverage_delta=None
)
assert_true(dep_score > base_score, f"Deps: {base_score} -> {dep_score}")
@test("adding tests lowers complexity")
def _():
base_score, _, _ = score_pr(
files_changed=8, additions=150, deletions=20,
has_dependency_changes=False, test_coverage_delta=None
)
better_score, _, _ = score_pr(
files_changed=8, additions=180, deletions=20,
has_dependency_changes=False, test_coverage_delta=3
)
assert_true(better_score < base_score, f"Tests: {base_score} -> {better_score}")
@test("removing tests increases complexity")
def _():
base_score, _, _ = score_pr(
files_changed=8, additions=150, deletions=20,
has_dependency_changes=False, test_coverage_delta=None
)
worse_score, _, _ = score_pr(
files_changed=8, additions=150, deletions=20,
has_dependency_changes=False, test_coverage_delta=-2
)
assert_true(worse_score > base_score, f"Remove tests: {base_score} -> {worse_score}")
@test("score bounded 1-10")
def _():
for files, adds, dels in [(1, 10, 5), (100, 10000, 5000)]:
score, _, _ = score_pr(files, adds, dels, False, None)
assert_true(1 <= score <= 10, f"Score {score} out of range")
@test("estimated minutes exist for all scores")
def _():
for s in range(1, 11):
assert_true(s in TIME_PER_POINT, f"Missing time for score {s}")
print(f"\n=== Results: {PASS} passed, {FAIL} failed ===")
sys.exit(0 if FAIL == 0 else 1)

View File

@@ -0,0 +1,52 @@
"""
Tests for scripts/dependency_inventory.py
"""
import unittest
import json
from pathlib import Path
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.dependency_inventory import (
parse_requirements,
parse_package_json,
parse_pyproject_toml,
scan_repo,
)
class TestParseRequirements(unittest.TestCase):
def test_parses_simple_requirement(self):
result = parse_requirements("requests>=2.33.0")
self.assertEqual(len(result), 1)
self.assertEqual(result[0]["package"], "requests")
def test_parses_version_range(self):
result = parse_requirements("pytest>=8,<9")
self.assertEqual(result[0]["package"], "pytest")
class TestParsePackageJson(unittest.TestCase):
def test_parses_dependencies(self):
content = json.dumps({"name": "test", "dependencies": {"react": "^18.2.0"}})
result = parse_package_json(content)
self.assertTrue(any(d["package"] == "react" for d in result))
class TestParsePyprojectToml(unittest.TestCase):
def test_parses_project_dependencies(self):
content = "\n[project]\nname = \"test\"\ndependencies = [\n \"openai>=2.21.0,<3\",\n]"
result = parse_pyproject_toml(content)
self.assertEqual(len(result), 1)
class TestScanRepo(unittest.TestCase):
def test_scans_local_repo(self):
result = scan_repo(Path(__file__).resolve().parents[1])
self.assertGreater(result["dependency_count"], 0)
if __name__ == "__main__":
unittest.main()