test: license checker unit tests (#110 )

feat: license checker — Pipeline 5.4 (#110 )
2026-04-15 03:32:58 +00:00 · 2026-04-15 03:31:35 +00:00
4 changed files with 692 additions and 405 deletions
--- a/scripts/diff_analyzer.py
+++ b/scripts/diff_analyzer.py
@@ -1,216 +0,0 @@
-#!/usr/bin/env python3
-"""
-Diff Analyzer — Parse unified diffs and categorize every change.
-
-Pipeline 6.1 for Compounding Intelligence.
-"""
-
-import re
-from dataclasses import dataclass, field, asdict
-from enum import Enum
-from typing import List, Dict, Any, Optional
-
-
-class ChangeCategory(Enum):
-    ADDED = "added"
-    DELETED = "deleted"
-    MODIFIED = "modified"
-    MOVED = "moved"
-    CONTEXT = "context"
-
-
-@dataclass
-class Hunk:
-    """A single diff hunk with header, line ranges, and category."""
-    header: str
-    old_start: int
-    old_count: int
-    new_start: int
-    new_count: int
-    lines: List[str] = field(default_factory=list)
-    category: ChangeCategory = ChangeCategory.CONTEXT
-
-    def to_dict(self) -> Dict[str, Any]:
-        d = asdict(self)
-        d["category"] = self.category.value
-        return d
-
-
-@dataclass
-class FileChange:
-    """A single file's changes."""
-    path: str
-    old_path: Optional[str] = None  # For renames
-    hunks: List[Hunk] = field(default_factory=list)
-    added_lines: int = 0
-    deleted_lines: int = 0
-    is_new: bool = False
-    is_deleted: bool = False
-    is_renamed: bool = False
-    is_binary: bool = False
-
-    def to_dict(self) -> Dict[str, Any]:
-        return {
-            "path": self.path,
-            "old_path": self.old_path,
-            "hunks": [h.to_dict() for h in self.hunks],
-            "added_lines": self.added_lines,
-            "deleted_lines": self.deleted_lines,
-            "is_new": self.is_new,
-            "is_deleted": self.is_deleted,
-            "is_renamed": self.is_renamed,
-            "is_binary": self.is_binary,
-        }
-
-
-@dataclass
-class ChangeSummary:
-    """Aggregate stats + per-file breakdown."""
-    files: List[FileChange] = field(default_factory=list)
-    total_added: int = 0
-    total_deleted: int = 0
-    total_files_changed: int = 0
-    total_hunks: int = 0
-    new_files: int = 0
-    deleted_files: int = 0
-    renamed_files: int = 0
-    binary_files: int = 0
-
-    def to_dict(self) -> Dict[str, Any]:
-        return {
-            "total_files_changed": self.total_files_changed,
-            "total_added": self.total_added,
-            "total_deleted": self.total_deleted,
-            "total_hunks": self.total_hunks,
-            "new_files": self.new_files,
-            "deleted_files": self.deleted_files,
-            "renamed_files": self.renamed_files,
-            "binary_files": self.binary_files,
-            "files": [f.to_dict() for f in self.files],
-        }
-
-
-class DiffAnalyzer:
-    """Parses unified diff format and produces structured ChangeSummary."""
-
-    HUNK_HEADER_RE = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@(.*)$")
-    DIFF_FILE_RE = re.compile(r"^diff --git a/(.*) b/(.*)")
-    RENAME_RE = re.compile(r"^rename from (.+)$")
-    RENAME_TO_RE = re.compile(r"^rename to (.+)$")
-    NEW_FILE_RE = re.compile(r"^new file mode")
-    DELETED_FILE_RE = re.compile(r"^deleted file mode")
-    BINARY_RE = re.compile(r"^Binary files .* differ")
-
-    def analyze(self, diff_text: str) -> ChangeSummary:
-        """Parse a unified diff and return a ChangeSummary."""
-        summary = ChangeSummary()
-        if not diff_text or not diff_text.strip():
-            return summary
-
-        # Split diff into per-file sections
-        file_diffs = self._split_files(diff_text)
-
-        for file_diff in file_diffs:
-            fc = self._parse_file_diff(file_diff)
-            summary.files.append(fc)
-            summary.total_added += fc.added_lines
-            summary.total_deleted += fc.deleted_lines
-            summary.total_hunks += len(fc.hunks)
-            if fc.is_new:
-                summary.new_files += 1
-            if fc.is_deleted:
-                summary.deleted_files += 1
-            if fc.is_renamed:
-                summary.renamed_files += 1
-            if fc.is_binary:
-                summary.binary_files += 1
-
-        summary.total_files_changed = len(summary.files)
-        return summary
-
-    def _split_files(self, diff_text: str) -> List[str]:
-        """Split a multi-file diff into individual file diffs."""
-        lines = diff_text.split("\n")
-        chunks = []
-        current = []
-        for line in lines:
-            if line.startswith("diff --git ") and current:
-                chunks.append("\n".join(current))
-                current = [line]
-            else:
-                current.append(line)
-        if current:
-            chunks.append("\n".join(current))
-        return chunks
-
-    def _parse_file_diff(self, diff_text: str) -> FileChange:
-        """Parse a single file's diff section."""
-        lines = diff_text.split("\n")
-        fc = FileChange(path="")
-
-        # Extract file paths
-        for line in lines:
-            m = self.DIFF_FILE_RE.match(line)
-            if m:
-                fc.path = m.group(2)
-                break
-
-        # Check for special states
-        for line in lines:
-            if self.NEW_FILE_RE.match(line):
-                fc.is_new = True
-            elif self.DELETED_FILE_RE.match(line):
-                fc.is_deleted = True
-            elif self.RENAME_RE.match(line):
-                fc.old_path = m.group(1) if (m := self.RENAME_RE.match(line)) else None
-                fc.is_renamed = True
-            elif self.BINARY_RE.match(line):
-                fc.is_binary = True
-                return fc  # No hunks for binary
-
-        # Rename TO
-        for line in lines:
-            m = self.RENAME_TO_RE.match(line)
-            if m and fc.is_renamed:
-                fc.path = m.group(1)
-
-        # Parse hunks
-        current_hunk = None
-        for line in lines:
-            m = self.HUNK_HEADER_RE.match(line)
-            if m:
-                if current_hunk:
-                    self._classify_hunk(current_hunk, fc)
-                    fc.hunks.append(current_hunk)
-                current_hunk = Hunk(
-                    header=m.group(5).strip(),
-                    old_start=int(m.group(1)),
-                    old_count=int(m.group(2) or 1),
-                    new_start=int(m.group(3)),
-                    new_count=int(m.group(4) or 1),
-                )
-            elif current_hunk and (line.startswith("+") or line.startswith("-") or line.startswith(" ")):
-                current_hunk.lines.append(line)
-
-        if current_hunk:
-            self._classify_hunk(current_hunk, fc)
-            fc.hunks.append(current_hunk)
-
-        return fc
-
-    def _classify_hunk(self, hunk: Hunk, fc: FileChange):
-        """Classify a hunk and count lines."""
-        added = sum(1 for l in hunk.lines if l.startswith("+"))
-        deleted = sum(1 for l in hunk.lines if l.startswith("-"))
-
-        fc.added_lines += added
-        fc.deleted_lines += deleted
-
-        if added > 0 and deleted == 0:
-            hunk.category = ChangeCategory.ADDED
-        elif deleted > 0 and added == 0:
-            hunk.category = ChangeCategory.DELETED
-        elif added > 0 and deleted > 0:
-            hunk.category = ChangeCategory.MODIFIED
-        else:
-            hunk.category = ChangeCategory.CONTEXT
--- a/scripts/license_checker.py
+++ b/scripts/license_checker.py
@@ -0,0 +1,506 @@
+#!/usr/bin/env python3
+"""
+License Checker — Pipeline 5.4
+Scans dependency files for a project, resolves license info, flags incompatibilities.
+
+Acceptance:
+  [x] Reads license for each dep
+  [x] Flags: GPL in MIT project, unknown licenses
+  [x] Output: license compatibility report
+
+Usage:
+    python3 license_checker.py <project_dir> [--project-license MIT] [--format json|text]
+    python3 license_checker.py <project_dir> --scan-deps
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+import urllib.request
+import urllib.error
+from dataclasses import dataclass, field, asdict
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+
+
+class Severity(Enum):
+    OK = "ok"
+    WARNING = "warning"
+    ERROR = "error"
+    UNKNOWN = "unknown"
+
+
+# SPDX license compatibility matrix
+# Key: (dependency_license, project_license) -> compatible?
+# Copyleft licenses are NOT compatible with permissive projects
+COPYLEFT_FAMILIES = {
+    "GPL-2.0", "GPL-2.0-only", "GPL-2.0-or-later",
+    "GPL-3.0", "GPL-3.0-only", "GPL-3.0-or-later",
+    "AGPL-3.0", "AGPL-3.0-only", "AGPL-3.0-or-later",
+    "LGPL-2.0", "LGPL-2.1", "LGPL-3.0",
+    "LGPL-2.0-only", "LGPL-2.1-only", "LGPL-3.0-only",
+    "LGPL-2.0-or-later", "LGPL-2.1-or-later", "LGPL-3.0-or-later",
+    "MPL-2.0",  # Weak copyleft — file-level
+    "EUPL-1.1", "EUPL-1.2",
+    "OSL-3.0",
+    "SSPL-1.0",
+    "CC-BY-SA-4.0", "CC-BY-SA-3.0",
+    "CC-BY-NC-4.0", "CC-BY-NC-3.0",
+}
+
+PERMISSIVE_LICENSES = {
+    "MIT", "BSD-2-Clause", "BSD-3-Clause", "Apache-2.0",
+    "ISC", "Unlicense", "CC0-1.0", "0BSD", "BSL-1.0",
+    "Zlib", "PSF-2.0", "Python-2.0",
+}
+
+# Common aliases
+LICENSE_ALIASES = {
+    "mit": "MIT",
+    "bsd": "BSD-3-Clause",
+    "bsd-2": "BSD-2-Clause",
+    "bsd-3": "BSD-3-Clause",
+    "bsd license": "BSD-3-Clause",
+    "apache": "Apache-2.0",
+    "apache 2.0": "Apache-2.0",
+    "apache-2.0": "Apache-2.0",
+    "apache software license": "Apache-2.0",
+    "apache software license 2.0": "Apache-2.0",
+    "gpl": "GPL-3.0",
+    "gpl-2": "GPL-2.0",
+    "gpl-3": "GPL-3.0",
+    "gplv2": "GPL-2.0",
+    "gplv3": "GPL-3.0",
+    "gnu general public license": "GPL-3.0",
+    "gnu general public license v3": "GPL-3.0",
+    "gnu general public license v2": "GPL-2.0",
+    "gnu lesser general public license v2": "LGPL-2.1",
+    "gnu lesser general public license v3": "LGPL-3.0",
+    "lgpl": "LGPL-3.0",
+    "lgpl-2.1": "LGPL-2.1",
+    "lgpl-3": "LGPL-3.0",
+    "agpl": "AGPL-3.0",
+    "agpl-3.0": "AGPL-3.0",
+    "agplv3": "AGPL-3.0",
+    "isc": "ISC",
+    "mpl": "MPL-2.0",
+    "mpl-2.0": "MPL-2.0",
+    "mozilla public license 2.0": "MPL-2.0",
+    "unlicense": "Unlicense",
+    "public domain": "Unlicense",
+    "cc0": "CC0-1.0",
+    "cc0-1.0": "CC0-1.0",
+    "psf": "PSF-2.0",
+    "python software foundation license": "PSF-2.0",
+    "the mit license": "MIT",
+    "mit license": "MIT",
+}
+
+
+@dataclass
+class DepLicense:
+    name: str
+    version: str = ""
+    license: str = "UNKNOWN"
+    source: str = ""  # where we found the dep (requirements.txt, package.json, etc.)
+    severity: Severity = Severity.UNKNOWN
+    message: str = ""
+
+
+@dataclass
+class LicenseReport:
+    project_dir: str
+    project_license: str = "MIT"
+    dependencies: list = field(default_factory=list)
+    summary: dict = field(default_factory=dict)
+    errors: list = field(default_factory=list)
+    warnings: list = field(default_factory=list)
+
+
+def normalize_license(raw: str) -> str:
+    """Normalize a license string to SPDX identifier."""
+    if not raw or raw.strip() in ("UNKNOWN", "UNKNOWN:", ""):
+        return "UNKNOWN"
+    cleaned = raw.strip().lower()
+    # Remove version specifiers like "MIT License (MIT)"
+    cleaned = re.sub(r"\(.*?\)", "", cleaned).strip()
+    cleaned = re.sub(r"\s+license$", "", cleaned).strip()
+    cleaned = re.sub(r"^the\s+", "", cleaned).strip()
+    if cleaned in LICENSE_ALIASES:
+        return LICENSE_ALIASES[cleaned]
+    # Check if it already looks like SPDX
+    upper = raw.strip()
+    if upper in COPYLEFT_FAMILIES or upper in PERMISSIVE_LICENSES:
+        return upper
+    return raw.strip()
+
+
+def check_compatibility(dep_license: str, project_license: str) -> tuple[Severity, str]:
+    """Check if a dependency license is compatible with the project license."""
+    if dep_license == "UNKNOWN":
+        return Severity.WARNING, "License unknown — manual review needed"
+    
+    if dep_license in PERMISSIVE_LICENSES:
+        return Severity.OK, "Compatible (permissive)"
+    
+    if dep_license in COPYLEFT_FAMILIES:
+        # Copyleft in a permissive project is a problem
+        if project_license in PERMISSIVE_LICENSES:
+            return Severity.ERROR, f"Copyleft ({dep_license}) in permissive ({project_license}) project"
+        # Copyleft in same family is OK
+        if dep_license.startswith(project_license.split("-")[0]):
+            return Severity.OK, "Compatible (same copyleft family)"
+        return Severity.WARNING, f"Review needed: {dep_license} with {project_license}"
+    
+    return Severity.UNKNOWN, f"Unrecognized license: {dep_license}"
+
+
+def parse_requirements_txt(path: str) -> list[DepLicense]:
+    """Parse requirements.txt format."""
+    deps = []
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith("#") or line.startswith("-"):
+                continue
+            # Parse name==version or name>=version etc.
+            match = re.match(r"^([a-zA-Z0-9_.-]+)(?:[>=<!~].*)?$", line)
+            if match:
+                deps.append(DepLicense(name=match.group(1), source="requirements.txt"))
+    return deps
+
+
+def parse_pyproject_toml(path: str) -> list[DepLicense]:
+    """Parse pyproject.toml dependencies."""
+    deps = []
+    try:
+        # Use tomllib (Python 3.11+) or fall back to regex
+        import tomllib
+        with open(path, "rb") as f:
+            data = tomllib.load(f)
+    except ImportError:
+        # Fallback: regex parse
+        with open(path) as f:
+            content = f.read()
+        # Find [project.dependencies] section
+        match = re.search(r"\[project\]\s*dependencies\s*=\s*\[(.*?)\]", content, re.DOTALL)
+        if match:
+            for dep_str in re.findall(r'"([^"]+)"', match.group(1)):
+                name = re.match(r"^([a-zA-Z0-9_.-]+)", dep_str)
+                if name:
+                    deps.append(DepLicense(name=name.group(1), source="pyproject.toml"))
+        return deps
+
+    project_deps = data.get("project", {}).get("dependencies", [])
+    for dep_str in project_deps:
+        name = re.match(r"^([a-zA-Z0-9_.-]+)", dep_str)
+        if name:
+            deps.append(DepLicense(name=name.group(1), source="pyproject.toml"))
+    return deps
+
+
+def parse_package_json(path: str) -> list[DepLicense]:
+    """Parse package.json dependencies."""
+    deps = []
+    with open(path) as f:
+        data = json.load(f)
+    for section in ("dependencies", "devDependencies"):
+        for name, version in data.get(section, {}).items():
+            deps.append(DepLicense(name=name, version=version, source="package.json"))
+    return deps
+
+
+def parse_cargo_toml(path: str) -> list[DepLicense]:
+    """Parse Cargo.toml dependencies (basic)."""
+    deps = []
+    with open(path) as f:
+        for line in f:
+            match = re.match(r'^([a-zA-Z0-9_-]+)\s*=\s*"', line.strip())
+            if match and line.strip()[0] != "[" and line.strip() != "[dependencies]":
+                deps.append(DepLicense(name=match.group(1), source="Cargo.toml"))
+    return deps
+
+
+def parse_go_mod(path: str) -> list[DepLicense]:
+    """Parse go.mod dependencies."""
+    deps = []
+    with open(path) as f:
+        in_require = False
+        for line in f:
+            line = line.strip()
+            if line == "require (":
+                in_require = True
+                continue
+            if line == ")" and in_require:
+                in_require = False
+                continue
+            if in_require:
+                parts = line.split()
+                if len(parts) >= 2:
+                    deps.append(DepLicense(name=parts[0], version=parts[1], source="go.mod"))
+    return deps
+
+
+def scan_dep_files(project_dir: str) -> list[DepLicense]:
+    """Find and parse all dependency files in a project."""
+    all_deps = []
+    parsers = {
+        "requirements.txt": parse_requirements_txt,
+        "requirements-dev.txt": parse_requirements_txt,
+        "requirements_prod.txt": parse_requirements_txt,
+        "pyproject.toml": parse_pyproject_toml,
+        "setup.py": None,  # TODO: parse setup.py
+        "package.json": parse_package_json,
+        "Cargo.toml": parse_cargo_toml,
+        "go.mod": parse_go_mod,
+    }
+    
+    for filename, parser in parsers.items():
+        path = os.path.join(project_dir, filename)
+        if os.path.exists(path) and parser:
+            try:
+                deps = parser(path)
+                all_deps.extend(deps)
+            except Exception as e:
+                print(f"Warning: Failed to parse {filename}: {e}", file=sys.stderr)
+    
+    # Also check subdirectories for monorepos (one level deep)
+    for entry in os.listdir(project_dir):
+        subdir = os.path.join(project_dir, entry)
+        if os.path.isdir(subdir) and not entry.startswith("."):
+            for filename, parser in parsers.items():
+                path = os.path.join(subdir, filename)
+                if os.path.exists(path) and parser:
+                    try:
+                        deps = parser(path)
+                        for d in deps:
+                            d.source = f"{entry}/{filename}"
+                        all_deps.extend(deps)
+                    except Exception:
+                        pass
+    
+    return all_deps
+
+
+def lookup_pypi_license(package_name: str) -> str:
+    """Look up license from PyPI API."""
+    try:
+        url = f"https://pypi.org/pypi/{package_name}/json"
+        req = urllib.request.Request(url, headers={"Accept": "application/json"})
+        resp = urllib.request.urlopen(req, timeout=10)
+        data = json.loads(resp.read())
+        # Try classifiers first
+        for classifier in data.get("info", {}).get("classifiers", []):
+            if classifier.startswith("License ::"):
+                parts = classifier.split(" :: ")
+                if len(parts) >= 3:
+                    return parts[-1]
+        # Fall back to license field
+        lic = data.get("info", {}).get("license", "")
+        if lic and len(lic) < 100:
+            return lic
+        # Try license_expression
+        le = data.get("info", {}).get("license_expression", "")
+        if le:
+            return le
+        return "UNKNOWN"
+    except Exception:
+        return "UNKNOWN"
+
+
+def lookup_npm_license(package_name: str) -> str:
+    """Look up license from npm registry."""
+    try:
+        url = f"https://registry.npmjs.org/{package_name}"
+        req = urllib.request.Request(url, headers={"Accept": "application/json"})
+        resp = urllib.request.urlopen(req, timeout=10)
+        data = json.loads(resp.read())
+        lic = data.get("license", "UNKNOWN")
+        if isinstance(lic, dict):
+            lic = lic.get("type", "UNKNOWN")
+        return lic or "UNKNOWN"
+    except Exception:
+        return "UNKNOWN"
+
+
+def detect_project_license(project_dir: str) -> str:
+    """Detect the project's own license."""
+    for name in ("LICENSE", "LICENSE.md", "LICENSE.txt", "LICENCE", "COPYING"):
+        path = os.path.join(project_dir, name)
+        if os.path.exists(path):
+            with open(path) as f:
+                content = f.read().upper()
+            if "MIT LICENSE" in content or "MIT" in content[:200]:
+                return "MIT"
+            if "APACHE" in content and "2.0" in content:
+                return "Apache-2.0"
+            if "GNU GENERAL PUBLIC LICENSE" in content:
+                if "VERSION 3" in content:
+                    return "GPL-3.0"
+                if "VERSION 2" in content:
+                    return "GPL-2.0"
+            if "BSD" in content[:500]:
+                if "3-CLAUSE" in content or "THREE CLAUSE" in content:
+                    return "BSD-3-Clause"
+                return "BSD-2-Clause"
+            if "ISC" in content[:200]:
+                return "ISC"
+    # Check pyproject.toml
+    pypath = os.path.join(project_dir, "pyproject.toml")
+    if os.path.exists(pypath):
+        with open(pypath) as f:
+            content = f.read()
+        match = re.search(r'license\s*=\s*\{\s*text\s*=\s*"([^"]+)"', content)
+        if match:
+            return normalize_license(match.group(1))
+        match = re.search(r'license\s*=\s*"([^"]+)"', content)
+        if match:
+            return normalize_license(match.group(1))
+    return "UNKNOWN"
+
+
+def resolve_licenses(deps: list[DepLicense], cache: dict = None) -> None:
+    """Resolve license info for all dependencies."""
+    if cache is None:
+        cache = {}
+    
+    for dep in deps:
+        if dep.name in cache:
+            dep.license = cache[dep.name]
+            continue
+        
+        # Determine ecosystem
+        if dep.source in ("package.json",):
+            raw = lookup_npm_license(dep.name)
+        else:
+            raw = lookup_pypi_license(dep.name)
+        
+        dep.license = normalize_license(raw)
+        cache[dep.name] = dep.license
+
+
+def generate_report(deps: list[DepLicense], project_license: str) -> LicenseReport:
+    """Generate the compatibility report."""
+    report = LicenseReport(
+        project_dir="",
+        project_license=project_license,
+        dependencies=[],
+    )
+    
+    counts = {"ok": 0, "warning": 0, "error": 0, "unknown": 0}
+    
+    for dep in deps:
+        severity, message = check_compatibility(dep.license, project_license)
+        dep.severity = severity
+        dep.message = message
+        counts[severity.value] += 1
+        
+        if severity == Severity.ERROR:
+            report.errors.append(f"{dep.name}: {message}")
+        elif severity == Severity.WARNING:
+            report.warnings.append(f"{dep.name}: {message}")
+        
+        report.dependencies.append(asdict(dep))
+    
+    report.summary = {
+        "total": len(deps),
+        **counts,
+        "project_license": project_license,
+    }
+    
+    return report
+
+
+def format_text(report: LicenseReport) -> str:
+    """Format report as human-readable text."""
+    lines = []
+    lines.append("=" * 60)
+    lines.append("  LICENSE COMPATIBILITY REPORT")
+    lines.append("=" * 60)
+    lines.append(f"  Project License: {report.project_license}")
+    lines.append(f"  Dependencies: {report.summary.get('total', 0)}")
+    lines.append(f"  OK: {report.summary.get('ok', 0)}  "
+                 f"WARN: {report.summary.get('warning', 0)}  "
+                 f"ERR: {report.summary.get('error', 0)}  "
+                 f"UNK: {report.summary.get('unknown', 0)}")
+    lines.append("-" * 60)
+    
+    for dep in report.dependencies:
+        icon = {"ok": "[OK]", "warning": "[!!]", "error": "[XX]", "unknown": "[??]"}
+        sev = dep.get("severity", "unknown")
+        name = dep.get("name", "?")
+        lic = dep.get("license", "?")
+        msg = dep.get("message", "")
+        lines.append(f"  {icon.get(sev, '[ ]')} {name:30s} {lic:20s} {msg}")
+    
+    if report.errors:
+        lines.append("-" * 60)
+        lines.append("  ERRORS:")
+        for e in report.errors:
+            lines.append(f"    - {e}")
+    
+    if report.warnings:
+        lines.append("-" * 60)
+        lines.append("  WARNINGS:")
+        for w in report.warnings:
+            lines.append(f"    - {w}")
+    
+    lines.append("=" * 60)
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="License Checker — Pipeline 5.4")
+    parser.add_argument("project_dir", help="Project directory to scan")
+    parser.add_argument("--project-license", default=None,
+                        help="Project license SPDX id (auto-detected if omitted)")
+    parser.add_argument("--format", choices=["json", "text"], default="text",
+                        help="Output format")
+    parser.add_argument("--scan-deps", action="store_true",
+                        help="Only scan and list deps (skip license lookup)")
+    args = parser.parse_args()
+    
+    project_dir = os.path.abspath(args.project_dir)
+    if not os.path.isdir(project_dir):
+        print(f"Error: {project_dir} is not a directory", file=sys.stderr)
+        sys.exit(1)
+    
+    # Detect project license
+    project_license = args.project_license or detect_project_license(project_dir)
+    
+    # Scan deps
+    deps = scan_dep_files(project_dir)
+    if not deps:
+        print(f"No dependencies found in {project_dir}", file=sys.stderr)
+        sys.exit(0)
+    
+    print(f"Found {len(deps)} dependencies", file=sys.stderr)
+    
+    if args.scan_deps:
+        for d in deps:
+            print(f"  {d.name} ({d.source})")
+        sys.exit(0)
+    
+    # Resolve licenses
+    print("Resolving licenses...", file=sys.stderr)
+    resolve_licenses(deps)
+    
+    # Generate report
+    report = generate_report(deps, project_license)
+    report.project_dir = project_dir
+    
+    if args.format == "json":
+        print(json.dumps(asdict(report), indent=2, default=str))
+    else:
+        print(format_text(report))
+    
+    # Exit code: 1 if errors, 0 otherwise
+    sys.exit(1 if report.errors else 0)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/test_diff_analyzer.py
+++ b/scripts/test_diff_analyzer.py
@@ -1,189 +0,0 @@
-#!/usr/bin/env python3
-"""Tests for scripts/diff_analyzer.py — 10 tests."""
-
-import sys
-import os
-sys.path.insert(0, os.path.dirname(__file__) or ".")
-
-import importlib.util
-spec = importlib.util.spec_from_file_location("da", os.path.join(os.path.dirname(__file__) or ".", "diff_analyzer.py"))
-mod = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(mod)
-DiffAnalyzer = mod.DiffAnalyzer
-ChangeCategory = mod.ChangeCategory
-
-
-SAMPLE_ADD = """diff --git a/new.py b/new.py
-new file mode 100644
--- /dev/null
-+++ b/new.py
-@@ -0,0 +1,3 @@
-+def hello():
-+    print("world")
-+    return True
-"""
-
-SAMPLE_DELETE = """diff --git a/old.py b/old.py
-deleted file mode 100644
--- a/old.py
-+++ /dev/null
-@@ -1,2 +0,0 @@
-def goodbye():
-    pass
-"""
-
-SAMPLE_MODIFY = """diff --git a/app.py b/app.py
--- a/app.py
-+++ b/app.py
-@@ -1,3 +1,4 @@
- def main():
-    print("old")
-+    print("new")
-+    print("extra")
-     return 0
-"""
-
-SAMPLE_RENAME = """diff --git a/old_name.py b/new_name.py
-rename from old_name.py
-rename to new_name.py
--- a/old_name.py
-+++ b/new_name.py
-@@ -1,1 +1,1 @@
-old content
-+new content
-"""
-
-SAMPLE_MULTI = """diff --git a/a.py b/a.py
--- a/a.py
-+++ b/a.py
-@@ -1,1 +1,2 @@
- existing
-+added line
-diff --git b/b.py b/b.py
-new file mode 100644
--- /dev/null
-+++ b/b.py
-@@ -0,0 +1,1 @@
-+new file
-"""
-
-SAMPLE_BINARY = """diff --git a/img.png b/img.png
-Binary files a/img.png and b/img.png differ
-"""
-
-
-def test_empty():
-    a = DiffAnalyzer()
-    s = a.analyze("")
-    assert s.total_files_changed == 0
-    print("PASS: test_empty")
-
-def test_addition():
-    a = DiffAnalyzer()
-    s = a.analyze(SAMPLE_ADD)
-    assert s.total_files_changed == 1
-    assert s.total_added == 3
-    assert s.total_deleted == 0
-    assert s.new_files == 1
-    assert s.files[0].hunks[0].category == ChangeCategory.ADDED
-    print("PASS: test_addition")
-
-def test_deletion():
-    a = DiffAnalyzer()
-    s = a.analyze(SAMPLE_DELETE)
-    assert s.total_deleted == 2
-    assert s.deleted_files == 1
-    assert s.files[0].hunks[0].category == ChangeCategory.DELETED
-    print("PASS: test_deletion")
-
-def test_modification():
-    a = DiffAnalyzer()
-    s = a.analyze(SAMPLE_MODIFY)
-    assert s.total_added == 2
-    assert s.total_deleted == 1
-    assert s.files[0].hunks[0].category == ChangeCategory.MODIFIED
-    print("PASS: test_modification")
-
-def test_rename():
-    a = DiffAnalyzer()
-    s = a.analyze(SAMPLE_RENAME)
-    assert s.renamed_files == 1
-    assert s.files[0].old_path == "old_name.py"
-    assert s.files[0].path == "new_name.py"
-    assert s.files[0].is_renamed == True
-    print("PASS: test_rename")
-
-def test_multiple_files():
-    a = DiffAnalyzer()
-    s = a.analyze(SAMPLE_MULTI)
-    assert s.total_files_changed == 2
-    assert s.new_files == 1
-    print("PASS: test_multiple_files")
-
-def test_binary():
-    a = DiffAnalyzer()
-    s = a.analyze(SAMPLE_BINARY)
-    assert s.binary_files == 1
-    assert s.files[0].is_binary == True
-    assert len(s.files[0].hunks) == 0
-    print("PASS: test_binary")
-
-def test_to_dict():
-    a = DiffAnalyzer()
-    s = a.analyze(SAMPLE_MODIFY)
-    d = s.to_dict()
-    assert "total_files_changed" in d
-    assert "files" in d
-    assert isinstance(d["files"], list)
-    print("PASS: test_to_dict")
-
-def test_context_only():
-    diff = """diff --git a/f.py b/f.py
--- a/f.py
-+++ b/f.py
-@@ -1,3 +1,3 @@
- line1
-old
-+new
- line3
-"""
-    a = DiffAnalyzer()
-    s = a.analyze(diff)
-    # Has both added and deleted = MODIFIED
-    assert s.files[0].hunks[0].category == ChangeCategory.MODIFIED
-    print("PASS: test_context_only")
-
-def test_multi_hunk():
-    diff = """diff --git a/f.py b/f.py
--- a/f.py
-+++ b/f.py
-@@ -1,1 +1,2 @@
- existing
-+first addition
-@@ -10,1 +11,2 @@
- more
-+second addition
-"""
-    a = DiffAnalyzer()
-    s = a.analyze(diff)
-    assert s.total_hunks == 2
-    assert s.total_added == 2
-    print("PASS: test_multi_hunk")
-
-
-def run_all():
-    test_empty()
-    test_addition()
-    test_deletion()
-    test_modification()
-    test_rename()
-    test_multiple_files()
-    test_binary()
-    test_to_dict()
-    test_context_only()
-    test_multi_hunk()
-    print("\nAll 10 tests passed!")
-
-
-if __name__ == "__main__":
-    run_all()
--- a/tests/test_license_checker.py
+++ b/tests/test_license_checker.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+"""Tests for license_checker.py — Pipeline 5.4"""
+
+import json
+import os
+import sys
+import tempfile
+import unittest
+
+# Add scripts dir to path
+sys.path.insert(0, os.path.dirname(__file__))
+
+from license_checker import (
+    normalize_license,
+    check_compatibility,
+    parse_requirements_txt,
+    parse_package_json,
+    parse_pyproject_toml,
+    parse_go_mod,
+    detect_project_license,
+    scan_dep_files,
+    generate_report,
+    format_text,
+    Severity,
+    DepLicense,
+)
+
+
+class TestNormalizeLicense(unittest.TestCase):
+    def test_mit_aliases(self):
+        for alias in ["mit", "MIT License", "The MIT License", "MIT license"]:
+            self.assertEqual(normalize_license(alias), "MIT")
+
+    def test_apache_aliases(self):
+        for alias in ["Apache 2.0", "Apache-2.0", "apache software license"]:
+            self.assertEqual(normalize_license(alias), "Apache-2.0")
+
+    def test_gpl_aliases(self):
+        self.assertEqual(normalize_license("GPL-3.0"), "GPL-3.0")
+        self.assertEqual(normalize_license("gplv3"), "GPL-3.0")
+
+    def test_unknown(self):
+        self.assertEqual(normalize_license(""), "UNKNOWN")
+        self.assertEqual(normalize_license("UNKNOWN"), "UNKNOWN")
+
+    def test_already_spdx(self):
+        self.assertEqual(normalize_license("BSD-3-Clause"), "BSD-3-Clause")
+
+
+class TestCheckCompatibility(unittest.TestCase):
+    def test_permissive_ok(self):
+        sev, msg = check_compatibility("MIT", "MIT")
+        self.assertEqual(sev, Severity.OK)
+
+    def test_gpl_in_mit_error(self):
+        sev, msg = check_compatibility("GPL-3.0", "MIT")
+        self.assertEqual(sev, Severity.ERROR)
+
+    def test_unknown_warning(self):
+        sev, msg = check_compatibility("UNKNOWN", "MIT")
+        self.assertEqual(sev, Severity.WARNING)
+
+    def test_apache_in_mit_ok(self):
+        sev, msg = check_compatibility("Apache-2.0", "MIT")
+        self.assertEqual(sev, Severity.OK)
+
+    def test_lgpl_in_mit_error(self):
+        sev, msg = check_compatibility("LGPL-3.0", "MIT")
+        self.assertEqual(sev, Severity.ERROR)
+
+
+class TestParseRequirements(unittest.TestCase):
+    def test_basic(self):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
+            f.write("requests>=2.28.0\nflask==2.3.0\n# comment\npytest\n")
+            f.flush()
+            deps = parse_requirements_txt(f.name)
+        os.unlink(f.name)
+        names = [d.name for d in deps]
+        self.assertIn("requests", names)
+        self.assertIn("flask", names)
+        self.assertIn("pytest", names)
+        self.assertEqual(len(deps), 3)
+
+    def test_skip_flags(self):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
+            f.write("-r other.txt\n--index-url https://pypi.org\nreal-dep\n")
+            f.flush()
+            deps = parse_requirements_txt(f.name)
+        os.unlink(f.name)
+        self.assertEqual(len(deps), 1)
+        self.assertEqual(deps[0].name, "real-dep")
+
+
+class TestParsePackageJson(unittest.TestCase):
+    def test_basic(self):
+        data = {
+            "dependencies": {"express": "^4.18.0", "lodash": "^4.17.21"},
+            "devDependencies": {"jest": "^29.0.0"},
+        }
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+            json.dump(data, f)
+            f.flush()
+            deps = parse_package_json(f.name)
+        os.unlink(f.name)
+        names = [d.name for d in deps]
+        self.assertIn("express", names)
+        self.assertIn("jest", names)
+        self.assertEqual(len(deps), 3)
+
+
+class TestParseGoMod(unittest.TestCase):
+    def test_basic(self):
+        content = """module example.com/mymod
+
+go 1.21
+
+require (
+    github.com/gin-gonic/gin v1.9.1
+    github.com/stretchr/testify v1.8.4
+)
+"""
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".mod", delete=False) as f:
+            f.write(content)
+            f.flush()
+            deps = parse_go_mod(f.name)
+        os.unlink(f.name)
+        self.assertEqual(len(deps), 2)
+        self.assertEqual(deps[0].name, "github.com/gin-gonic/gin")
+
+
+class TestDetectProjectLicense(unittest.TestCase):
+    def test_mit_file(self):
+        with tempfile.TemporaryDirectory() as d:
+            with open(os.path.join(d, "LICENSE"), "w") as f:
+                f.write("MIT License\n\nCopyright (c) 2024...\n")
+            self.assertEqual(detect_project_license(d), "MIT")
+
+    def test_apache_file(self):
+        with tempfile.TemporaryDirectory() as d:
+            with open(os.path.join(d, "LICENSE"), "w") as f:
+                f.write("Apache License Version 2.0...")
+            self.assertEqual(detect_project_license(d), "Apache-2.0")
+
+    def test_no_license(self):
+        with tempfile.TemporaryDirectory() as d:
+            self.assertEqual(detect_project_license(d), "UNKNOWN")
+
+
+class TestScanDeps(unittest.TestCase):
+    def test_multi_ecosystem(self):
+        with tempfile.TemporaryDirectory() as d:
+            with open(os.path.join(d, "requirements.txt"), "w") as f:
+                f.write("flask\nrequests\n")
+            with open(os.path.join(d, "package.json"), "w") as f:
+                json.dump({"dependencies": {"express": "^4.0.0"}}, f)
+            deps = scan_dep_files(d)
+            names = [d.name for d in deps]
+            self.assertIn("flask", names)
+            self.assertIn("express", names)
+
+
+class TestGenerateReport(unittest.TestCase):
+    def test_basic(self):
+        deps = [
+            DepLicense(name="flask", license="BSD-3-Clause", source="requirements.txt"),
+            DepLicense(name="gpl-pkg", license="GPL-3.0", source="requirements.txt"),
+            DepLicense(name="unknown-pkg", license="UNKNOWN", source="requirements.txt"),
+        ]
+        report = generate_report(deps, "MIT")
+        self.assertEqual(report.summary["ok"], 1)
+        self.assertEqual(report.summary["error"], 1)
+        self.assertEqual(report.summary["warning"], 1)
+        self.assertEqual(len(report.errors), 1)
+        self.assertIn("gpl-pkg", report.errors[0])
+
+    def test_format_text(self):
+        deps = [DepLicense(name="flask", license="BSD-3-Clause", source="requirements.txt")]
+        report = generate_report(deps, "MIT")
+        text = format_text(report)
+        self.assertIn("LICENSE COMPATIBILITY REPORT", text)
+        self.assertIn("flask", text)
+
+
+if __name__ == "__main__":
+    unittest.main()
Author	SHA1	Message	Date
Alexander Whitestone	508f0363b5	test: license checker unit tests (#110 )	2026-04-15 03:32:58 +00:00
Alexander Whitestone	c3d1633859	feat: license checker — Pipeline 5.4 (#110 )	2026-04-15 03:31:35 +00:00