From 2fa8c2dea363aa6b88cde601d11cc27b92ea3506 Mon Sep 17 00:00:00 2001 From: Alexander Payne Date: Sun, 26 Apr 2026 05:10:14 -0400 Subject: [PATCH] scripts: add dependency_inventory script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dependency_inventory.py — an inventory tool that scans repos for dependency manifests (requirements.txt, package.json, go.mod, Cargo.toml, pyproject.toml) and produces either JSON or markdown report. Includes: - Full parser suite for 5 manifest types - --repos and --repos-dir argument support - Incremental friendly — safe to add new features - --output/-o file support - Test suite in tests/test_dependency_inventory.py Closes #107 (1/5) — first script in the Health Report toolkit. --- scripts/dependency_inventory.py | 308 +++++++++++++++++++++++++++++ tests/test_dependency_inventory.py | 52 +++++ 2 files changed, 360 insertions(+) create mode 100644 scripts/dependency_inventory.py create mode 100644 tests/test_dependency_inventory.py diff --git a/scripts/dependency_inventory.py b/scripts/dependency_inventory.py new file mode 100644 index 0000000..63a6505 --- /dev/null +++ b/scripts/dependency_inventory.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python3 +""" +Dependency Inventory — Scan repos and list third-party dependencies. + +Reads: package.json, requirements.txt, go.mod, Cargo.toml, pyproject.toml +Extracts: package name, version constraint, source file/repo +Outputs: JSON (default) or markdown table + +Usage: + python3 scripts/dependency_inventory.py --repos-dir ~/repos/ + python3 scripts/dependency_inventory.py --repos ~/repo1,~/repo2 --format markdown +""" + +import argparse +import json +import os +import re +import sys +from pathlib import Path +from typing import Dict, List, Any, Optional + +# Mapping of file pattern to canonical parser name +MANIFEST_PATTERNS = { + 'requirements.txt': 'requirements', + 'package.json': 'npm', + 'pyproject.toml': 'pyproject', + 'go.mod': 'go', + 'Cargo.toml': 'cargo', +} + +# Parser registry +PARSERS = {} + + +def register_parser(name: str): + """Decorator to register a parser function.""" + def decorator(fn): + PARSERS[name] = fn + return fn + return decorator + + +# ─── Parsers ──────────────────────────────────────────────────────────────── + +@register_parser('requirements') +def parse_requirements(content: str) -> List[Dict[str, str]]: + """Parse requirements.txt — one requirement per line.""" + deps = [] + for line in content.splitlines(): + line = line.strip() + if not line or line.startswith('#'): + continue + pkg_spec = re.split(r'[ ;#]', line)[0].strip() + if '>=' in pkg_spec: + name, ver = pkg_spec.split('>=', 1) + elif '==' in pkg_spec: + name, ver = pkg_spec.split('==', 1) + elif '<=' in pkg_spec: + name, ver = pkg_spec.split('<=', 1) + elif '~=' in pkg_spec: + name, ver = pkg_spec.split('~=', 1) + elif '>' in pkg_spec: + name, ver = pkg_spec.split('>', 1) + elif '<' in pkg_spec: + name, ver = pkg_spec.split('<', 1) + elif '=' in pkg_spec: + name, ver = pkg_spec.split('=', 1) + else: + name, ver = pkg_spec, '' + deps.append({ + 'package': name.strip(), + 'version': ver.strip(), + 'constraint': line[len(name):].strip() + }) + return deps + + +@register_parser('npm') +def parse_package_json(content: str) -> List[Dict[str, str]]: + """Parse package.json dependencies.""" + try: + data = json.loads(content) + except json.JSONDecodeError: + return [] + deps = [] + for section in ('dependencies', 'devDependencies', 'peerDependencies', 'optionalDependencies'): + for name, ver in data.get(section, {}).items(): + deps.append({ + 'package': name, + 'version': ver, + 'constraint': ver, + 'type': section + }) + return deps + + +@register_parser('pyproject') +def parse_pyproject_toml(content: str) -> List[Dict[str, str]]: + """Parse pyproject.toml [project] dependencies.""" + deps = [] + in_deps = False + dep_buffer = '' + for line in content.splitlines(): + stripped = line.strip() + if stripped.startswith('dependencies = ['): + in_deps = True + remainder = stripped.split('=', 1)[1].strip() + dep_buffer = remainder[1:] if remainder.startswith('[') else remainder + continue + if in_deps: + if stripped.startswith(']'): + in_deps = False + continue + dep_buffer += ' ' + line + dep_buffer = dep_buffer.strip().rstrip(',') + for match in re.finditer(r'"([^"]+)"', dep_buffer): + spec = match.group(1) + m = re.match(r'^([a-zA-Z0-9_.-]+)\s*([<>=!~]+)?\s*(.*)$', spec) + if m: + name, op, ver = m.groups() + deps.append({ + 'package': name, + 'version': (ver or '').strip(), + 'constraint': spec + }) + return deps + + +@register_parser('go') +def parse_go_mod(content: str) -> List[Dict[str, str]]: + """Parse go.mod — require statements.""" + deps = [] + for line in content.splitlines(): + line = line.strip() + if line.startswith('require ') and not line.startswith('require ('): + parts = line.split() + if len(parts) >= 3: + mod, ver = parts[1], parts[2] + deps.append({'package': mod, 'version': ver, 'constraint': ver}) + elif line.startswith('\t') and '/' in line: + parts = line.strip().split() + if len(parts) >= 2: + mod, ver = parts[0], parts[1] + deps.append({'package': mod, 'version': ver, 'constraint': ver}) + return deps + + +@register_parser('cargo') +def parse_cargo_toml(content: str) -> List[Dict[str, str]]: + """Parse [dependencies] section from Cargo.toml.""" + deps = [] + in_deps = False + for line in content.splitlines(): + stripped = line.strip() + if stripped in ('[dependencies]', '[dependencies]'): + in_deps = True + continue + if stripped.startswith('['): + in_deps = False + continue + if in_deps and '=' in stripped: + name_part, ver_part = stripped.split('=', 1) + name = name_part.strip() + ver = ver_part.strip().strip('"').strip("'") + deps.append({'package': name, 'version': ver, 'constraint': ver}) + return deps + + +# ─── File Discovery ───────────────────────────────────────────────────────── + +def find_manifest_files(root: Path) -> Dict[str, List[Path]]: + """Find all manifest files under root.""" + found = {k: [] for k in MANIFEST_PATTERNS} + for pattern in MANIFEST_PATTERNS: + for path in root.rglob(pattern): + if not any(skip in str(path) for skip in ('.git', 'node_modules', '__pycache__', '.venv', 'venv')): + found[pattern].append(path) + return found + + +# ─── Main Scanner ──────────────────────────────────────────────────────────── + +def scan_repo(repo_path: Path) -> Dict[str, Any]: + """Scan a single repo directory for dependency manifests.""" + repo_name = repo_path.name + found = find_manifest_files(repo_path) + all_deps: List[Dict[str, str]] = [] + files_scanned = 0 + + for pattern, paths in found.items(): + parser_name = MANIFEST_PATTERNS[pattern] + # Map parser_name to function + if parser_name == 'requirements': + parser = parse_requirements + elif parser_name == 'npm': + parser = parse_package_json + elif parser_name == 'pyproject': + parser = parse_pyproject_toml + elif parser_name == 'go': + parser = parse_go_mod + elif parser_name == 'cargo': + parser = parse_cargo_toml + else: + continue + + for fp in paths: + try: + content = fp.read_text(encoding='utf-8', errors='replace') + files_scanned += 1 + rel = fp.relative_to(repo_path) + for dep in parser(content): + dep['source'] = pattern + dep['file'] = str(rel) + dep['repo'] = repo_name + all_deps.append(dep) + except Exception as e: + print(f" [WARN] Could not parse {fp}: {e}", file=sys.stderr) + + return { + 'repo': repo_name, + 'path': str(repo_path), + 'files_scanned': files_scanned, + 'dependencies': all_deps, + 'dependency_count': len(all_deps), + } + + +def scan_repos(repos: List[Path]) -> Dict[str, Any]: + """Scan multiple repos and aggregate.""" + results = {} + total_deps = 0 + total_files = 0 + for repo in repos: + if not repo.is_dir(): + print(f"[WARN] Skipping {repo}: not a directory", file=sys.stderr) + continue + print(f"Scanning {repo.name}...", file=sys.stderr) + result = scan_repo(repo) + results[repo.name] = result + total_deps += result['dependency_count'] + total_files += result['files_scanned'] + return { + 'repos': results, + 'summary': { + 'total_repos': len(results), + 'total_files_scanned': total_files, + 'total_dependencies': total_deps, + } + } + + +# ─── Output ───────────────────────────────────────────────────────────────── + +def output_json(data: Dict[str, Any], out_path: Optional[Path] = None) -> None: + text = json.dumps(data, indent=2) + if out_path: + out_path.write_text(text) + print(f"Written: {out_path}", file=sys.stderr) + else: + print(text) + + +def output_markdown(data: Dict[str, Any], out_path: Optional[Path] = None) -> None: + lines = [] + lines.append("# Dependency Inventory") + lines.append("\nGenerated: *(TODO: add timestamp)*") + lines.append(f"\n**Summary:** {data['summary']['total_dependencies']} dependencies across {data['summary']['total_repos']} repos") + lines.append("") + lines.append("| Repo | File | Package | Version |") + lines.append("|------|------|---------|---------|") + for repo_name, rdata in sorted(data['repos'].items()): + for dep in sorted(rdata['dependencies'], key=lambda d: d['package']): + lines.append(f"| {repo_name} | {dep['file']} | {dep['package']} | {dep['version']} |") + text = '\n'.join(lines) + '\n' + if out_path: + out_path.write_text(text) + print(f"Written: {out_path}", file=sys.stderr) + else: + print(text) + + +# ─── CLI Entry ──────────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser(description="Generate org-wide dependency inventory") + parser.add_argument('--repos-dir', help='Directory containing multiple repos') + parser.add_argument('--repos', help='Comma-separated list of repo paths') + parser.add_argument('--output', '-o', help='Output file (default: stdout)') + parser.add_argument('--format', choices=['json', 'markdown'], default='json', + help='Output format (default: json)') + args = parser.parse_args() + if args.repos: + repo_paths = [Path(p.strip()).expanduser() for p in args.repos.split(',')] + elif args.repos_dir: + base = Path(args.repos_dir).expanduser() + repo_paths = [p for p in base.iterdir() if p.is_dir() and not p.name.startswith('.')] + else: + repo_paths = [Path(__file__).resolve().parent.parent] + out_path = Path(args.output).expanduser() if args.output else None + data = scan_repos(repo_paths) + if args.format == 'json': + output_json(data, out_path) + else: + output_markdown(data, out_path) + + +if __name__ == '__main__': + main() diff --git a/tests/test_dependency_inventory.py b/tests/test_dependency_inventory.py new file mode 100644 index 0000000..69ceb8d --- /dev/null +++ b/tests/test_dependency_inventory.py @@ -0,0 +1,52 @@ +""" +Tests for scripts/dependency_inventory.py +""" + +import unittest +import json +from pathlib import Path +import sys + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from scripts.dependency_inventory import ( + parse_requirements, + parse_package_json, + parse_pyproject_toml, + scan_repo, +) + + +class TestParseRequirements(unittest.TestCase): + def test_parses_simple_requirement(self): + result = parse_requirements("requests>=2.33.0") + self.assertEqual(len(result), 1) + self.assertEqual(result[0]["package"], "requests") + + def test_parses_version_range(self): + result = parse_requirements("pytest>=8,<9") + self.assertEqual(result[0]["package"], "pytest") + + +class TestParsePackageJson(unittest.TestCase): + def test_parses_dependencies(self): + content = json.dumps({"name": "test", "dependencies": {"react": "^18.2.0"}}) + result = parse_package_json(content) + self.assertTrue(any(d["package"] == "react" for d in result)) + + +class TestParsePyprojectToml(unittest.TestCase): + def test_parses_project_dependencies(self): + content = "\n[project]\nname = \"test\"\ndependencies = [\n \"openai>=2.21.0,<3\",\n]" + result = parse_pyproject_toml(content) + self.assertEqual(len(result), 1) + + +class TestScanRepo(unittest.TestCase): + def test_scans_local_repo(self): + result = scan_repo(Path(__file__).resolve().parents[1]) + self.assertGreater(result["dependency_count"], 0) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file -- 2.43.0