#!/usr/bin/env python3 """ Dependency Inventory — Scan repos and list third-party dependencies. Reads: package.json, requirements.txt, go.mod, Cargo.toml, pyproject.toml Extracts: package name, version constraint, source file/repo Outputs: JSON (default) or markdown table Usage: python3 scripts/dependency_inventory.py --repos-dir ~/repos/ python3 scripts/dependency_inventory.py --repos ~/repo1,~/repo2 --format markdown """ import argparse import json import os import re import sys from pathlib import Path from typing import Dict, List, Any, Optional # Mapping of file pattern to canonical parser name MANIFEST_PATTERNS = { 'requirements.txt': 'requirements', 'package.json': 'npm', 'pyproject.toml': 'pyproject', 'go.mod': 'go', 'Cargo.toml': 'cargo', } # Parser registry PARSERS = {} def register_parser(name: str): """Decorator to register a parser function.""" def decorator(fn): PARSERS[name] = fn return fn return decorator # ─── Parsers ──────────────────────────────────────────────────────────────── @register_parser('requirements') def parse_requirements(content: str) -> List[Dict[str, str]]: """Parse requirements.txt — one requirement per line.""" deps = [] for line in content.splitlines(): line = line.strip() if not line or line.startswith('#'): continue pkg_spec = re.split(r'[ ;#]', line)[0].strip() if '>=' in pkg_spec: name, ver = pkg_spec.split('>=', 1) elif '==' in pkg_spec: name, ver = pkg_spec.split('==', 1) elif '<=' in pkg_spec: name, ver = pkg_spec.split('<=', 1) elif '~=' in pkg_spec: name, ver = pkg_spec.split('~=', 1) elif '>' in pkg_spec: name, ver = pkg_spec.split('>', 1) elif '<' in pkg_spec: name, ver = pkg_spec.split('<', 1) elif '=' in pkg_spec: name, ver = pkg_spec.split('=', 1) else: name, ver = pkg_spec, '' deps.append({ 'package': name.strip(), 'version': ver.strip(), 'constraint': line[len(name):].strip() }) return deps @register_parser('npm') def parse_package_json(content: str) -> List[Dict[str, str]]: """Parse package.json dependencies.""" try: data = json.loads(content) except json.JSONDecodeError: return [] deps = [] for section in ('dependencies', 'devDependencies', 'peerDependencies', 'optionalDependencies'): for name, ver in data.get(section, {}).items(): deps.append({ 'package': name, 'version': ver, 'constraint': ver, 'type': section }) return deps @register_parser('pyproject') def parse_pyproject_toml(content: str) -> List[Dict[str, str]]: """Parse pyproject.toml [project] dependencies.""" deps = [] in_deps = False dep_buffer = '' for line in content.splitlines(): stripped = line.strip() if stripped.startswith('dependencies = ['): in_deps = True remainder = stripped.split('=', 1)[1].strip() dep_buffer = remainder[1:] if remainder.startswith('[') else remainder continue if in_deps: if stripped.startswith(']'): in_deps = False continue dep_buffer += ' ' + line dep_buffer = dep_buffer.strip().rstrip(',') for match in re.finditer(r'"([^"]+)"', dep_buffer): spec = match.group(1) m = re.match(r'^([a-zA-Z0-9_.-]+)\s*([<>=!~]+)?\s*(.*)$', spec) if m: name, op, ver = m.groups() deps.append({ 'package': name, 'version': (ver or '').strip(), 'constraint': spec }) return deps @register_parser('go') def parse_go_mod(content: str) -> List[Dict[str, str]]: """Parse go.mod — require statements.""" deps = [] for line in content.splitlines(): line = line.strip() if line.startswith('require ') and not line.startswith('require ('): parts = line.split() if len(parts) >= 3: mod, ver = parts[1], parts[2] deps.append({'package': mod, 'version': ver, 'constraint': ver}) elif line.startswith('\t') and '/' in line: parts = line.strip().split() if len(parts) >= 2: mod, ver = parts[0], parts[1] deps.append({'package': mod, 'version': ver, 'constraint': ver}) return deps @register_parser('cargo') def parse_cargo_toml(content: str) -> List[Dict[str, str]]: """Parse [dependencies] section from Cargo.toml.""" deps = [] in_deps = False for line in content.splitlines(): stripped = line.strip() if stripped in ('[dependencies]', '[dependencies]'): in_deps = True continue if stripped.startswith('['): in_deps = False continue if in_deps and '=' in stripped: name_part, ver_part = stripped.split('=', 1) name = name_part.strip() ver = ver_part.strip().strip('"').strip("'") deps.append({'package': name, 'version': ver, 'constraint': ver}) return deps # ─── File Discovery ───────────────────────────────────────────────────────── def find_manifest_files(root: Path) -> Dict[str, List[Path]]: """Find all manifest files under root.""" found = {k: [] for k in MANIFEST_PATTERNS} for pattern in MANIFEST_PATTERNS: for path in root.rglob(pattern): if not any(skip in str(path) for skip in ('.git', 'node_modules', '__pycache__', '.venv', 'venv')): found[pattern].append(path) return found # ─── Main Scanner ──────────────────────────────────────────────────────────── def scan_repo(repo_path: Path) -> Dict[str, Any]: """Scan a single repo directory for dependency manifests.""" repo_name = repo_path.name found = find_manifest_files(repo_path) all_deps: List[Dict[str, str]] = [] files_scanned = 0 for pattern, paths in found.items(): parser_name = MANIFEST_PATTERNS[pattern] # Map parser_name to function if parser_name == 'requirements': parser = parse_requirements elif parser_name == 'npm': parser = parse_package_json elif parser_name == 'pyproject': parser = parse_pyproject_toml elif parser_name == 'go': parser = parse_go_mod elif parser_name == 'cargo': parser = parse_cargo_toml else: continue for fp in paths: try: content = fp.read_text(encoding='utf-8', errors='replace') files_scanned += 1 rel = fp.relative_to(repo_path) for dep in parser(content): dep['source'] = pattern dep['file'] = str(rel) dep['repo'] = repo_name all_deps.append(dep) except Exception as e: print(f" [WARN] Could not parse {fp}: {e}", file=sys.stderr) return { 'repo': repo_name, 'path': str(repo_path), 'files_scanned': files_scanned, 'dependencies': all_deps, 'dependency_count': len(all_deps), } def scan_repos(repos: List[Path]) -> Dict[str, Any]: """Scan multiple repos and aggregate.""" results = {} total_deps = 0 total_files = 0 for repo in repos: if not repo.is_dir(): print(f"[WARN] Skipping {repo}: not a directory", file=sys.stderr) continue print(f"Scanning {repo.name}...", file=sys.stderr) result = scan_repo(repo) results[repo.name] = result total_deps += result['dependency_count'] total_files += result['files_scanned'] return { 'repos': results, 'summary': { 'total_repos': len(results), 'total_files_scanned': total_files, 'total_dependencies': total_deps, } } # ─── Output ───────────────────────────────────────────────────────────────── def output_json(data: Dict[str, Any], out_path: Optional[Path] = None) -> None: text = json.dumps(data, indent=2) if out_path: out_path.write_text(text) print(f"Written: {out_path}", file=sys.stderr) else: print(text) def output_markdown(data: Dict[str, Any], out_path: Optional[Path] = None) -> None: lines = [] lines.append("# Dependency Inventory") lines.append("\nGenerated: *(TODO: add timestamp)*") lines.append(f"\n**Summary:** {data['summary']['total_dependencies']} dependencies across {data['summary']['total_repos']} repos") lines.append("") lines.append("| Repo | File | Package | Version |") lines.append("|------|------|---------|---------|") for repo_name, rdata in sorted(data['repos'].items()): for dep in sorted(rdata['dependencies'], key=lambda d: d['package']): lines.append(f"| {repo_name} | {dep['file']} | {dep['package']} | {dep['version']} |") text = '\n'.join(lines) + '\n' if out_path: out_path.write_text(text) print(f"Written: {out_path}", file=sys.stderr) else: print(text) # ─── CLI Entry ──────────────────────────────────────────────────────────────── def main(): parser = argparse.ArgumentParser(description="Generate org-wide dependency inventory") parser.add_argument('--repos-dir', help='Directory containing multiple repos') parser.add_argument('--repos', help='Comma-separated list of repo paths') parser.add_argument('--output', '-o', help='Output file (default: stdout)') parser.add_argument('--format', choices=['json', 'markdown'], default='json', help='Output format (default: json)') args = parser.parse_args() if args.repos: repo_paths = [Path(p.strip()).expanduser() for p in args.repos.split(',')] elif args.repos_dir: base = Path(args.repos_dir).expanduser() repo_paths = [p for p in base.iterdir() if p.is_dir() and not p.name.startswith('.')] else: repo_paths = [Path(__file__).resolve().parent.parent] out_path = Path(args.output).expanduser() if args.output else None data = scan_repos(repo_paths) if args.format == 'json': output_json(data, out_path) else: output_markdown(data, out_path) if __name__ == '__main__': main()