Some checks failed
Test / pytest (pull_request) Failing after 7s
Add dependency_inventory.py — an inventory tool that scans repos for dependency manifests (requirements.txt, package.json, go.mod, Cargo.toml, pyproject.toml) and produces either JSON or markdown report. Includes: - Full parser suite for 5 manifest types - --repos and --repos-dir argument support - Incremental friendly — safe to add new features - --output/-o file support - Test suite in tests/test_dependency_inventory.py Closes #107 (1/5) — first script in the Health Report toolkit.
309 lines
11 KiB
Python
309 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Dependency Inventory — Scan repos and list third-party dependencies.
|
|
|
|
Reads: package.json, requirements.txt, go.mod, Cargo.toml, pyproject.toml
|
|
Extracts: package name, version constraint, source file/repo
|
|
Outputs: JSON (default) or markdown table
|
|
|
|
Usage:
|
|
python3 scripts/dependency_inventory.py --repos-dir ~/repos/
|
|
python3 scripts/dependency_inventory.py --repos ~/repo1,~/repo2 --format markdown
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, List, Any, Optional
|
|
|
|
# Mapping of file pattern to canonical parser name
|
|
MANIFEST_PATTERNS = {
|
|
'requirements.txt': 'requirements',
|
|
'package.json': 'npm',
|
|
'pyproject.toml': 'pyproject',
|
|
'go.mod': 'go',
|
|
'Cargo.toml': 'cargo',
|
|
}
|
|
|
|
# Parser registry
|
|
PARSERS = {}
|
|
|
|
|
|
def register_parser(name: str):
|
|
"""Decorator to register a parser function."""
|
|
def decorator(fn):
|
|
PARSERS[name] = fn
|
|
return fn
|
|
return decorator
|
|
|
|
|
|
# ─── Parsers ────────────────────────────────────────────────────────────────
|
|
|
|
@register_parser('requirements')
|
|
def parse_requirements(content: str) -> List[Dict[str, str]]:
|
|
"""Parse requirements.txt — one requirement per line."""
|
|
deps = []
|
|
for line in content.splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith('#'):
|
|
continue
|
|
pkg_spec = re.split(r'[ ;#]', line)[0].strip()
|
|
if '>=' in pkg_spec:
|
|
name, ver = pkg_spec.split('>=', 1)
|
|
elif '==' in pkg_spec:
|
|
name, ver = pkg_spec.split('==', 1)
|
|
elif '<=' in pkg_spec:
|
|
name, ver = pkg_spec.split('<=', 1)
|
|
elif '~=' in pkg_spec:
|
|
name, ver = pkg_spec.split('~=', 1)
|
|
elif '>' in pkg_spec:
|
|
name, ver = pkg_spec.split('>', 1)
|
|
elif '<' in pkg_spec:
|
|
name, ver = pkg_spec.split('<', 1)
|
|
elif '=' in pkg_spec:
|
|
name, ver = pkg_spec.split('=', 1)
|
|
else:
|
|
name, ver = pkg_spec, ''
|
|
deps.append({
|
|
'package': name.strip(),
|
|
'version': ver.strip(),
|
|
'constraint': line[len(name):].strip()
|
|
})
|
|
return deps
|
|
|
|
|
|
@register_parser('npm')
|
|
def parse_package_json(content: str) -> List[Dict[str, str]]:
|
|
"""Parse package.json dependencies."""
|
|
try:
|
|
data = json.loads(content)
|
|
except json.JSONDecodeError:
|
|
return []
|
|
deps = []
|
|
for section in ('dependencies', 'devDependencies', 'peerDependencies', 'optionalDependencies'):
|
|
for name, ver in data.get(section, {}).items():
|
|
deps.append({
|
|
'package': name,
|
|
'version': ver,
|
|
'constraint': ver,
|
|
'type': section
|
|
})
|
|
return deps
|
|
|
|
|
|
@register_parser('pyproject')
|
|
def parse_pyproject_toml(content: str) -> List[Dict[str, str]]:
|
|
"""Parse pyproject.toml [project] dependencies."""
|
|
deps = []
|
|
in_deps = False
|
|
dep_buffer = ''
|
|
for line in content.splitlines():
|
|
stripped = line.strip()
|
|
if stripped.startswith('dependencies = ['):
|
|
in_deps = True
|
|
remainder = stripped.split('=', 1)[1].strip()
|
|
dep_buffer = remainder[1:] if remainder.startswith('[') else remainder
|
|
continue
|
|
if in_deps:
|
|
if stripped.startswith(']'):
|
|
in_deps = False
|
|
continue
|
|
dep_buffer += ' ' + line
|
|
dep_buffer = dep_buffer.strip().rstrip(',')
|
|
for match in re.finditer(r'"([^"]+)"', dep_buffer):
|
|
spec = match.group(1)
|
|
m = re.match(r'^([a-zA-Z0-9_.-]+)\s*([<>=!~]+)?\s*(.*)$', spec)
|
|
if m:
|
|
name, op, ver = m.groups()
|
|
deps.append({
|
|
'package': name,
|
|
'version': (ver or '').strip(),
|
|
'constraint': spec
|
|
})
|
|
return deps
|
|
|
|
|
|
@register_parser('go')
|
|
def parse_go_mod(content: str) -> List[Dict[str, str]]:
|
|
"""Parse go.mod — require statements."""
|
|
deps = []
|
|
for line in content.splitlines():
|
|
line = line.strip()
|
|
if line.startswith('require ') and not line.startswith('require ('):
|
|
parts = line.split()
|
|
if len(parts) >= 3:
|
|
mod, ver = parts[1], parts[2]
|
|
deps.append({'package': mod, 'version': ver, 'constraint': ver})
|
|
elif line.startswith('\t') and '/' in line:
|
|
parts = line.strip().split()
|
|
if len(parts) >= 2:
|
|
mod, ver = parts[0], parts[1]
|
|
deps.append({'package': mod, 'version': ver, 'constraint': ver})
|
|
return deps
|
|
|
|
|
|
@register_parser('cargo')
|
|
def parse_cargo_toml(content: str) -> List[Dict[str, str]]:
|
|
"""Parse [dependencies] section from Cargo.toml."""
|
|
deps = []
|
|
in_deps = False
|
|
for line in content.splitlines():
|
|
stripped = line.strip()
|
|
if stripped in ('[dependencies]', '[dependencies]'):
|
|
in_deps = True
|
|
continue
|
|
if stripped.startswith('['):
|
|
in_deps = False
|
|
continue
|
|
if in_deps and '=' in stripped:
|
|
name_part, ver_part = stripped.split('=', 1)
|
|
name = name_part.strip()
|
|
ver = ver_part.strip().strip('"').strip("'")
|
|
deps.append({'package': name, 'version': ver, 'constraint': ver})
|
|
return deps
|
|
|
|
|
|
# ─── File Discovery ─────────────────────────────────────────────────────────
|
|
|
|
def find_manifest_files(root: Path) -> Dict[str, List[Path]]:
|
|
"""Find all manifest files under root."""
|
|
found = {k: [] for k in MANIFEST_PATTERNS}
|
|
for pattern in MANIFEST_PATTERNS:
|
|
for path in root.rglob(pattern):
|
|
if not any(skip in str(path) for skip in ('.git', 'node_modules', '__pycache__', '.venv', 'venv')):
|
|
found[pattern].append(path)
|
|
return found
|
|
|
|
|
|
# ─── Main Scanner ────────────────────────────────────────────────────────────
|
|
|
|
def scan_repo(repo_path: Path) -> Dict[str, Any]:
|
|
"""Scan a single repo directory for dependency manifests."""
|
|
repo_name = repo_path.name
|
|
found = find_manifest_files(repo_path)
|
|
all_deps: List[Dict[str, str]] = []
|
|
files_scanned = 0
|
|
|
|
for pattern, paths in found.items():
|
|
parser_name = MANIFEST_PATTERNS[pattern]
|
|
# Map parser_name to function
|
|
if parser_name == 'requirements':
|
|
parser = parse_requirements
|
|
elif parser_name == 'npm':
|
|
parser = parse_package_json
|
|
elif parser_name == 'pyproject':
|
|
parser = parse_pyproject_toml
|
|
elif parser_name == 'go':
|
|
parser = parse_go_mod
|
|
elif parser_name == 'cargo':
|
|
parser = parse_cargo_toml
|
|
else:
|
|
continue
|
|
|
|
for fp in paths:
|
|
try:
|
|
content = fp.read_text(encoding='utf-8', errors='replace')
|
|
files_scanned += 1
|
|
rel = fp.relative_to(repo_path)
|
|
for dep in parser(content):
|
|
dep['source'] = pattern
|
|
dep['file'] = str(rel)
|
|
dep['repo'] = repo_name
|
|
all_deps.append(dep)
|
|
except Exception as e:
|
|
print(f" [WARN] Could not parse {fp}: {e}", file=sys.stderr)
|
|
|
|
return {
|
|
'repo': repo_name,
|
|
'path': str(repo_path),
|
|
'files_scanned': files_scanned,
|
|
'dependencies': all_deps,
|
|
'dependency_count': len(all_deps),
|
|
}
|
|
|
|
|
|
def scan_repos(repos: List[Path]) -> Dict[str, Any]:
|
|
"""Scan multiple repos and aggregate."""
|
|
results = {}
|
|
total_deps = 0
|
|
total_files = 0
|
|
for repo in repos:
|
|
if not repo.is_dir():
|
|
print(f"[WARN] Skipping {repo}: not a directory", file=sys.stderr)
|
|
continue
|
|
print(f"Scanning {repo.name}...", file=sys.stderr)
|
|
result = scan_repo(repo)
|
|
results[repo.name] = result
|
|
total_deps += result['dependency_count']
|
|
total_files += result['files_scanned']
|
|
return {
|
|
'repos': results,
|
|
'summary': {
|
|
'total_repos': len(results),
|
|
'total_files_scanned': total_files,
|
|
'total_dependencies': total_deps,
|
|
}
|
|
}
|
|
|
|
|
|
# ─── Output ─────────────────────────────────────────────────────────────────
|
|
|
|
def output_json(data: Dict[str, Any], out_path: Optional[Path] = None) -> None:
|
|
text = json.dumps(data, indent=2)
|
|
if out_path:
|
|
out_path.write_text(text)
|
|
print(f"Written: {out_path}", file=sys.stderr)
|
|
else:
|
|
print(text)
|
|
|
|
|
|
def output_markdown(data: Dict[str, Any], out_path: Optional[Path] = None) -> None:
|
|
lines = []
|
|
lines.append("# Dependency Inventory")
|
|
lines.append("\nGenerated: *(TODO: add timestamp)*")
|
|
lines.append(f"\n**Summary:** {data['summary']['total_dependencies']} dependencies across {data['summary']['total_repos']} repos")
|
|
lines.append("")
|
|
lines.append("| Repo | File | Package | Version |")
|
|
lines.append("|------|------|---------|---------|")
|
|
for repo_name, rdata in sorted(data['repos'].items()):
|
|
for dep in sorted(rdata['dependencies'], key=lambda d: d['package']):
|
|
lines.append(f"| {repo_name} | {dep['file']} | {dep['package']} | {dep['version']} |")
|
|
text = '\n'.join(lines) + '\n'
|
|
if out_path:
|
|
out_path.write_text(text)
|
|
print(f"Written: {out_path}", file=sys.stderr)
|
|
else:
|
|
print(text)
|
|
|
|
|
|
# ─── CLI Entry ────────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate org-wide dependency inventory")
|
|
parser.add_argument('--repos-dir', help='Directory containing multiple repos')
|
|
parser.add_argument('--repos', help='Comma-separated list of repo paths')
|
|
parser.add_argument('--output', '-o', help='Output file (default: stdout)')
|
|
parser.add_argument('--format', choices=['json', 'markdown'], default='json',
|
|
help='Output format (default: json)')
|
|
args = parser.parse_args()
|
|
if args.repos:
|
|
repo_paths = [Path(p.strip()).expanduser() for p in args.repos.split(',')]
|
|
elif args.repos_dir:
|
|
base = Path(args.repos_dir).expanduser()
|
|
repo_paths = [p for p in base.iterdir() if p.is_dir() and not p.name.startswith('.')]
|
|
else:
|
|
repo_paths = [Path(__file__).resolve().parent.parent]
|
|
out_path = Path(args.output).expanduser() if args.output else None
|
|
data = scan_repos(repo_paths)
|
|
if args.format == 'json':
|
|
output_json(data, out_path)
|
|
else:
|
|
output_markdown(data, out_path)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|