Compare commits

..

2 Commits

3 changed files with 273 additions and 282 deletions

View File

@@ -1,282 +0,0 @@
#!/usr/bin/env python3
"""
Dead Code Detector for Python Codebases
AST-based analysis to find defined but never-called functions and classes.
Excludes entry points, plugin hooks, __init__ exports.
Usage:
python3 scripts/dead_code_detector.py /path/to/repo/
python3 scripts/dead_code_detector.py hermes-agent/ --format json
python3 scripts/dead_code_detector.py . --exclude tests/,venv/
Output: file:line, function/class name, last git author (if available)
"""
import argparse
import ast
import json
import os
import subprocess
import sys
from collections import defaultdict
from pathlib import Path
from typing import Optional
# Names that are expected to be unused (entry points, protocol methods, etc.)
SAFE_UNUSED_PATTERNS = {
# Python dunders
"__init__", "__str__", "__repr__", "__eq__", "__hash__", "__len__",
"__getitem__", "__setitem__", "__contains__", "__iter__", "__next__",
"__enter__", "__exit__", "__call__", "__bool__", "__del__",
"__post_init__", "__class_getitem__",
# Common entry points
"main", "app", "handler", "setup", "teardown", "fixture",
# pytest
"conftest", "test_", "pytest_", # prefix patterns
# Protocols / abstract
"abstractmethod", "abc_",
}
def is_safe_unused(name: str, filepath: str) -> bool:
"""Check if an unused name is expected to be unused."""
# Test files are exempt
if "test" in filepath.lower():
return True
# Known patterns
for pattern in SAFE_UNUSED_PATTERNS:
if name.startswith(pattern) or name == pattern:
return True
# __init__.py exports are often unused internally
if filepath.endswith("__init__.py"):
return True
return False
def get_git_blame(filepath: str, lineno: int) -> Optional[str]:
"""Get last author of a line via git blame."""
try:
result = subprocess.run(
["git", "blame", "-L", f"{lineno},{lineno}", "--porcelain", filepath],
capture_output=True, text=True, timeout=5
)
for line in result.stdout.split("\n"):
if line.startswith("author "):
return line[7:]
except:
pass
return None
class DefinitionCollector(ast.NodeVisitor):
"""Collect all function and class definitions."""
def __init__(self):
self.definitions = [] # (name, type, lineno, filepath)
def visit_FunctionDef(self, node):
self.definitions.append((node.name, "function", node.lineno))
self.generic_visit(node)
def visit_AsyncFunctionDef(self, node):
self.definitions.append((node.name, "async_function", node.lineno))
self.generic_visit(node)
def visit_ClassDef(self, node):
self.definitions.append((node.name, "class", node.lineno))
self.generic_visit(node)
class NameUsageCollector(ast.NodeVisitor):
"""Collect all name references (calls, imports, attribute access)."""
def __init__(self):
self.names = set()
self.calls = set()
self.imports = set()
def visit_Name(self, node):
self.names.add(node.id)
self.generic_visit(node)
def visit_Attribute(self, node):
if isinstance(node.value, ast.Name):
self.names.add(node.value.id)
self.generic_visit(node)
def visit_Call(self, node):
if isinstance(node.func, ast.Name):
self.calls.add(node.func.id)
elif isinstance(node.func, ast.Attribute):
if isinstance(node.func.value, ast.Name):
self.names.add(node.func.value.id)
self.calls.add(node.func.attr)
self.generic_visit(node)
def visit_Import(self, node):
for alias in node.names:
self.imports.add(alias.asname or alias.name)
self.generic_visit(node)
def visit_ImportFrom(self, node):
for alias in node.names:
self.imports.add(alias.asname or alias.name)
self.generic_visit(node)
def analyze_file(filepath: str) -> dict:
"""Analyze a single Python file for dead code."""
path = Path(filepath)
try:
content = path.read_text()
tree = ast.parse(content, filename=str(filepath))
except (SyntaxError, UnicodeDecodeError):
return {"error": f"Could not parse {filepath}"}
# Collect definitions
def_collector = DefinitionCollector()
def_collector.visit(tree)
definitions = def_collector.definitions
# Collect usage
usage_collector = NameUsageCollector()
usage_collector.visit(tree)
used_names = usage_collector.names | usage_collector.calls | usage_collector.imports
# Also scan the entire repo for references to this file's definitions
# (this is done at the repo level, not file level)
dead = []
for name, def_type, lineno in definitions:
if name.startswith("_") and not name.startswith("__"):
# Private functions — might be used externally, less likely dead
pass
if name not in used_names:
if not is_safe_unused(name, filepath):
dead.append({
"name": name,
"type": def_type,
"file": filepath,
"line": lineno,
})
return {"definitions": len(definitions), "dead": dead}
def scan_repo(repo_path: str, exclude_patterns: list = None) -> dict:
"""Scan an entire repo for dead code."""
path = Path(repo_path)
exclude = exclude_patterns or ["venv", ".venv", "node_modules", "__pycache__",
".git", "dist", "build", ".tox", "vendor"]
all_definitions = {} # name -> [{file, line, type}]
all_files = []
dead_code = []
# First pass: collect all definitions across repo
for fpath in path.rglob("*.py"):
parts = fpath.parts
if any(ex in parts for ex in exclude):
continue
if fpath.name.startswith("."):
continue
try:
content = fpath.read_text(errors="ignore")
tree = ast.parse(content, filename=str(fpath))
except:
continue
all_files.append(str(fpath))
collector = DefinitionCollector()
collector.visit(tree)
for name, def_type, lineno in collector.definitions:
rel_path = str(fpath.relative_to(path))
if name not in all_definitions:
all_definitions[name] = []
all_definitions[name].append({
"file": rel_path,
"line": lineno,
"type": def_type,
})
# Second pass: check each name for usage across entire repo
all_used_names = set()
for fpath_str in all_files:
try:
content = Path(fpath_str).read_text(errors="ignore")
tree = ast.parse(content)
except:
continue
usage = NameUsageCollector()
usage.visit(tree)
all_used_names.update(usage.names)
all_used_names.update(usage.calls)
all_used_names.update(usage.imports)
# Find dead code
for name, locations in all_definitions.items():
if name not in all_used_names:
for loc in locations:
if not is_safe_unused(name, loc["file"]):
dead_code.append({
"name": name,
"type": loc["type"],
"file": loc["file"],
"line": loc["line"],
})
return {
"repo": path.name,
"files_scanned": len(all_files),
"total_definitions": sum(len(v) for v in all_definitions.values()),
"dead_code_count": len(dead_code),
"dead_code": sorted(dead_code, key=lambda x: (x["file"], x["line"])),
}
def main():
parser = argparse.ArgumentParser(description="Find dead code in Python codebases")
parser.add_argument("repo", help="Repository path to scan")
parser.add_argument("--format", choices=["text", "json"], default="text")
parser.add_argument("--exclude", help="Comma-separated patterns to exclude")
parser.add_argument("--git-blame", action="store_true", help="Include git blame info")
args = parser.parse_args()
exclude = args.exclude.split(",") if args.exclude else None
result = scan_repo(args.repo, exclude)
if args.format == "json":
print(json.dumps(result, indent=2))
else:
print(f"Dead Code Report: {result['repo']}")
print(f"Files scanned: {result['files_scanned']}")
print(f"Total definitions: {result['total_definitions']}")
print(f"Dead code found: {result['dead_code_count']}")
print()
if result["dead_code"]:
print(f"{'File':<45} {'Line':>4} {'Type':<10} {'Name'}")
print("-" * 85)
for item in result["dead_code"]:
author = ""
if args.git_blame:
author = get_git_blame(
os.path.join(args.repo, item["file"]),
item["line"]
) or ""
author = f" ({author})" if author else ""
print(f"{item['file']:<45} {item['line']:>4} {item['type']:<10} {item['name']}{author}")
else:
print("No dead code detected!")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,162 @@
#!/usr/bin/env python3
"""
Gitea Issue Body Parser
Extracts structured data from Gitea issue markdown bodies:
- Title
- Context section
- Acceptance criteria (checkboxes)
- Labels
- Epic/parent references
Usage:
python3 scripts/gitea_issue_parser.py <issue_body.txt
python3 scripts/gitea_issue_parser.py --url https://forge.../api/v1/repos/.../issues/123
echo "issue body" | python3 scripts/gitea_issue_parser.py --stdin
Output: JSON with {title, context, criteria[], labels[], epic_ref}
"""
import argparse
import json
import re
import sys
from typing import Optional
def parse_issue_body(body: str, title: str = "", labels: list = None) -> dict:
"""Parse a Gitea issue body into structured JSON."""
result = {
"title": title,
"context": "",
"criteria": [],
"labels": labels or [],
"epic_ref": None,
"sections": {},
}
if not body:
return result
# Extract epic/parent reference from title or body
epic_pattern = r"#(\d+)"
title_refs = re.findall(epic_pattern, title)
body_refs = re.findall(epic_pattern, body[:200]) # Check early body refs
# Look for "Closes #N" or "Part of #N" or "Epic: #N"
close_match = re.search(r"(?:Closes?|Fixes?|Resolves?)\s+#(\d+)", body, re.IGNORECASE)
part_match = re.search(r"(?:Part of|Epic|Parent|Blocks?)\s+#(\d+)", body, re.IGNORECASE)
if close_match:
result["epic_ref"] = f"#{close_match.group(1)}"
elif part_match:
result["epic_ref"] = f"#{part_match.group(1)}"
elif title_refs:
result["epic_ref"] = f"#{title_refs[0]}"
elif body_refs:
result["epic_ref"] = f"#{body_refs[0]}"
# Split into sections by ## headers
section_pattern = r"^##\s+(.+)$"
lines = body.split("\n")
current_section = None
current_content = []
for line in lines:
header_match = re.match(section_pattern, line)
if header_match:
# Save previous section
if current_section:
result["sections"][current_section] = "\n".join(current_content).strip()
current_section = header_match.group(1).strip().lower()
current_content = []
else:
current_content.append(line)
# Save last section
if current_section:
result["sections"][current_section] = "\n".join(current_content).strip()
# Extract context
for key in ["context", "background", "description", "problem"]:
if key in result["sections"]:
result["context"] = result["sections"][key]
break
# Extract acceptance criteria (checkboxes)
criteria_section = None
for key in ["acceptance criteria", "acceptance_criteria", "criteria", "requirements", "definition of done"]:
if key in result["sections"]:
criteria_section = result["sections"][key]
break
if criteria_section:
checkbox_pattern = r"-\s*\[[ xX]?\]\s*(.+)"
for match in re.finditer(checkbox_pattern, criteria_section):
result["criteria"].append(match.group(1).strip())
# Also try plain numbered/bulleted lists if no checkboxes found
if not result["criteria"]:
list_pattern = r"^\s*(?:\d+\.|-|\*)\s+(.+)"
for match in re.finditer(list_pattern, criteria_section, re.MULTILINE):
result["criteria"].append(match.group(1).strip())
# If no sectioned criteria found, scan whole body for checkboxes
if not result["criteria"]:
for match in re.finditer(r"-\s*\[[ xX]?\]\s*(.+)", body):
result["criteria"].append(match.group(1).strip())
return result
def parse_from_url(api_url: str, token: str = None) -> dict:
"""Parse an issue from a Gitea API URL."""
import urllib.request
headers = {}
if token:
headers["Authorization"] = f"token {token}"
req = urllib.request.Request(api_url, headers=headers)
resp = json.loads(urllib.request.urlopen(req, timeout=30).read())
title = resp.get("title", "")
body = resp.get("body", "")
labels = [l["name"] for l in resp.get("labels", [])]
return parse_issue_body(body, title, labels)
def main():
parser = argparse.ArgumentParser(description="Parse Gitea issue body into structured JSON")
parser.add_argument("input", nargs="?", help="Issue body file (or - for stdin)")
parser.add_argument("--url", help="Gitea API URL for the issue")
parser.add_argument("--stdin", action="store_true", help="Read from stdin")
parser.add_argument("--token", help="Gitea API token (or set GITEA_TOKEN env var)")
parser.add_argument("--title", default="", help="Issue title (for epic ref extraction)")
parser.add_argument("--labels", nargs="*", default=[], help="Issue labels")
parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output")
args = parser.parse_args()
import os
token = args.token or os.environ.get("GITEA_TOKEN")
if args.url:
result = parse_from_url(args.url, token)
elif args.stdin or (args.input and args.input == "-"):
body = sys.stdin.read()
result = parse_issue_body(body, args.title, args.labels)
elif args.input:
with open(args.input) as f:
body = f.read()
result = parse_issue_body(body, args.title, args.labels)
else:
parser.print_help()
sys.exit(1)
indent = 2 if args.pretty else None
print(json.dumps(result, indent=indent))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,111 @@
#!/usr/bin/env python3
"""Tests for gitea_issue_parser."""
import json
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from gitea_issue_parser import parse_issue_body
def test_basic_structure():
body = """## Context
This is the background.
## Acceptance Criteria
- [ ] First criterion
- [x] Second criterion (already done)
- [ ] Third criterion
## Labels
`pipeline`, `extraction`
"""
result = parse_issue_body(body, "Test Issue", ["pipeline", "extraction"])
assert result["title"] == "Test Issue"
assert "background" in result["context"].lower()
assert len(result["criteria"]) == 3
assert "First criterion" in result["criteria"]
assert result["labels"] == ["pipeline", "extraction"]
print("PASS: test_basic_structure")
def test_epic_ref():
body = "Closes #645\n\nSome description."
result = parse_issue_body(body, "feat: thing (#688)")
assert result["epic_ref"] == "#645"
print("PASS: test_epic_ref")
def test_epic_ref_from_title():
body = "Some description without close ref."
result = parse_issue_body(body, "feat: scene descriptions (#645)")
assert result["epic_ref"] == "#645"
print("PASS: test_epic_ref_from_title")
def test_no_checkboxes():
body = """## Requirements
1. First thing
2. Second thing
3. Third thing
"""
result = parse_issue_body(body)
assert len(result["criteria"]) == 3
print("PASS: test_no_checkboxes")
def test_empty_body():
result = parse_issue_body("", "Empty Issue")
assert result["title"] == "Empty Issue"
assert result["criteria"] == []
assert result["context"] == ""
print("PASS: test_empty_body")
def test_real_issue_format():
body = """Closes #681
## Changes
Add `#!/usr/bin/env python3` shebang to 6 Python scripts.
## Verification
All 6 files confirmed missing shebangs before fix.
## Impact
Scripts can now be executed directly.
"""
result = parse_issue_body(body, "fix: add python3 shebangs (#685)")
assert result["epic_ref"] == "#681"
assert "shebang" in result["context"].lower()
print("PASS: test_real_issue_format")
def test_all_sections_captured():
body = """## Context
Background info.
## Acceptance Criteria
- [ ] Do thing
## Labels
`test`
"""
result = parse_issue_body(body)
assert "context" in result["sections"]
assert "acceptance criteria" in result["sections"]
print("PASS: test_all_sections_captured")
if __name__ == "__main__":
test_basic_structure()
test_epic_ref()
test_epic_ref_from_title()
test_no_checkboxes()
test_empty_body()
test_real_issue_format()
test_all_sections_captured()
print("\nAll tests passed.")