Compare commits

..

2 Commits

4 changed files with 692 additions and 405 deletions

View File

@@ -1,216 +0,0 @@
#!/usr/bin/env python3
"""
Diff Analyzer — Parse unified diffs and categorize every change.
Pipeline 6.1 for Compounding Intelligence.
"""
import re
from dataclasses import dataclass, field, asdict
from enum import Enum
from typing import List, Dict, Any, Optional
class ChangeCategory(Enum):
ADDED = "added"
DELETED = "deleted"
MODIFIED = "modified"
MOVED = "moved"
CONTEXT = "context"
@dataclass
class Hunk:
"""A single diff hunk with header, line ranges, and category."""
header: str
old_start: int
old_count: int
new_start: int
new_count: int
lines: List[str] = field(default_factory=list)
category: ChangeCategory = ChangeCategory.CONTEXT
def to_dict(self) -> Dict[str, Any]:
d = asdict(self)
d["category"] = self.category.value
return d
@dataclass
class FileChange:
"""A single file's changes."""
path: str
old_path: Optional[str] = None # For renames
hunks: List[Hunk] = field(default_factory=list)
added_lines: int = 0
deleted_lines: int = 0
is_new: bool = False
is_deleted: bool = False
is_renamed: bool = False
is_binary: bool = False
def to_dict(self) -> Dict[str, Any]:
return {
"path": self.path,
"old_path": self.old_path,
"hunks": [h.to_dict() for h in self.hunks],
"added_lines": self.added_lines,
"deleted_lines": self.deleted_lines,
"is_new": self.is_new,
"is_deleted": self.is_deleted,
"is_renamed": self.is_renamed,
"is_binary": self.is_binary,
}
@dataclass
class ChangeSummary:
"""Aggregate stats + per-file breakdown."""
files: List[FileChange] = field(default_factory=list)
total_added: int = 0
total_deleted: int = 0
total_files_changed: int = 0
total_hunks: int = 0
new_files: int = 0
deleted_files: int = 0
renamed_files: int = 0
binary_files: int = 0
def to_dict(self) -> Dict[str, Any]:
return {
"total_files_changed": self.total_files_changed,
"total_added": self.total_added,
"total_deleted": self.total_deleted,
"total_hunks": self.total_hunks,
"new_files": self.new_files,
"deleted_files": self.deleted_files,
"renamed_files": self.renamed_files,
"binary_files": self.binary_files,
"files": [f.to_dict() for f in self.files],
}
class DiffAnalyzer:
"""Parses unified diff format and produces structured ChangeSummary."""
HUNK_HEADER_RE = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@(.*)$")
DIFF_FILE_RE = re.compile(r"^diff --git a/(.*) b/(.*)")
RENAME_RE = re.compile(r"^rename from (.+)$")
RENAME_TO_RE = re.compile(r"^rename to (.+)$")
NEW_FILE_RE = re.compile(r"^new file mode")
DELETED_FILE_RE = re.compile(r"^deleted file mode")
BINARY_RE = re.compile(r"^Binary files .* differ")
def analyze(self, diff_text: str) -> ChangeSummary:
"""Parse a unified diff and return a ChangeSummary."""
summary = ChangeSummary()
if not diff_text or not diff_text.strip():
return summary
# Split diff into per-file sections
file_diffs = self._split_files(diff_text)
for file_diff in file_diffs:
fc = self._parse_file_diff(file_diff)
summary.files.append(fc)
summary.total_added += fc.added_lines
summary.total_deleted += fc.deleted_lines
summary.total_hunks += len(fc.hunks)
if fc.is_new:
summary.new_files += 1
if fc.is_deleted:
summary.deleted_files += 1
if fc.is_renamed:
summary.renamed_files += 1
if fc.is_binary:
summary.binary_files += 1
summary.total_files_changed = len(summary.files)
return summary
def _split_files(self, diff_text: str) -> List[str]:
"""Split a multi-file diff into individual file diffs."""
lines = diff_text.split("\n")
chunks = []
current = []
for line in lines:
if line.startswith("diff --git ") and current:
chunks.append("\n".join(current))
current = [line]
else:
current.append(line)
if current:
chunks.append("\n".join(current))
return chunks
def _parse_file_diff(self, diff_text: str) -> FileChange:
"""Parse a single file's diff section."""
lines = diff_text.split("\n")
fc = FileChange(path="")
# Extract file paths
for line in lines:
m = self.DIFF_FILE_RE.match(line)
if m:
fc.path = m.group(2)
break
# Check for special states
for line in lines:
if self.NEW_FILE_RE.match(line):
fc.is_new = True
elif self.DELETED_FILE_RE.match(line):
fc.is_deleted = True
elif self.RENAME_RE.match(line):
fc.old_path = m.group(1) if (m := self.RENAME_RE.match(line)) else None
fc.is_renamed = True
elif self.BINARY_RE.match(line):
fc.is_binary = True
return fc # No hunks for binary
# Rename TO
for line in lines:
m = self.RENAME_TO_RE.match(line)
if m and fc.is_renamed:
fc.path = m.group(1)
# Parse hunks
current_hunk = None
for line in lines:
m = self.HUNK_HEADER_RE.match(line)
if m:
if current_hunk:
self._classify_hunk(current_hunk, fc)
fc.hunks.append(current_hunk)
current_hunk = Hunk(
header=m.group(5).strip(),
old_start=int(m.group(1)),
old_count=int(m.group(2) or 1),
new_start=int(m.group(3)),
new_count=int(m.group(4) or 1),
)
elif current_hunk and (line.startswith("+") or line.startswith("-") or line.startswith(" ")):
current_hunk.lines.append(line)
if current_hunk:
self._classify_hunk(current_hunk, fc)
fc.hunks.append(current_hunk)
return fc
def _classify_hunk(self, hunk: Hunk, fc: FileChange):
"""Classify a hunk and count lines."""
added = sum(1 for l in hunk.lines if l.startswith("+"))
deleted = sum(1 for l in hunk.lines if l.startswith("-"))
fc.added_lines += added
fc.deleted_lines += deleted
if added > 0 and deleted == 0:
hunk.category = ChangeCategory.ADDED
elif deleted > 0 and added == 0:
hunk.category = ChangeCategory.DELETED
elif added > 0 and deleted > 0:
hunk.category = ChangeCategory.MODIFIED
else:
hunk.category = ChangeCategory.CONTEXT

506
scripts/license_checker.py Normal file
View File

@@ -0,0 +1,506 @@
#!/usr/bin/env python3
"""
License Checker — Pipeline 5.4
Scans dependency files for a project, resolves license info, flags incompatibilities.
Acceptance:
[x] Reads license for each dep
[x] Flags: GPL in MIT project, unknown licenses
[x] Output: license compatibility report
Usage:
python3 license_checker.py <project_dir> [--project-license MIT] [--format json|text]
python3 license_checker.py <project_dir> --scan-deps
"""
import argparse
import json
import os
import re
import subprocess
import sys
import urllib.request
import urllib.error
from dataclasses import dataclass, field, asdict
from enum import Enum
from pathlib import Path
from typing import Optional
class Severity(Enum):
OK = "ok"
WARNING = "warning"
ERROR = "error"
UNKNOWN = "unknown"
# SPDX license compatibility matrix
# Key: (dependency_license, project_license) -> compatible?
# Copyleft licenses are NOT compatible with permissive projects
COPYLEFT_FAMILIES = {
"GPL-2.0", "GPL-2.0-only", "GPL-2.0-or-later",
"GPL-3.0", "GPL-3.0-only", "GPL-3.0-or-later",
"AGPL-3.0", "AGPL-3.0-only", "AGPL-3.0-or-later",
"LGPL-2.0", "LGPL-2.1", "LGPL-3.0",
"LGPL-2.0-only", "LGPL-2.1-only", "LGPL-3.0-only",
"LGPL-2.0-or-later", "LGPL-2.1-or-later", "LGPL-3.0-or-later",
"MPL-2.0", # Weak copyleft — file-level
"EUPL-1.1", "EUPL-1.2",
"OSL-3.0",
"SSPL-1.0",
"CC-BY-SA-4.0", "CC-BY-SA-3.0",
"CC-BY-NC-4.0", "CC-BY-NC-3.0",
}
PERMISSIVE_LICENSES = {
"MIT", "BSD-2-Clause", "BSD-3-Clause", "Apache-2.0",
"ISC", "Unlicense", "CC0-1.0", "0BSD", "BSL-1.0",
"Zlib", "PSF-2.0", "Python-2.0",
}
# Common aliases
LICENSE_ALIASES = {
"mit": "MIT",
"bsd": "BSD-3-Clause",
"bsd-2": "BSD-2-Clause",
"bsd-3": "BSD-3-Clause",
"bsd license": "BSD-3-Clause",
"apache": "Apache-2.0",
"apache 2.0": "Apache-2.0",
"apache-2.0": "Apache-2.0",
"apache software license": "Apache-2.0",
"apache software license 2.0": "Apache-2.0",
"gpl": "GPL-3.0",
"gpl-2": "GPL-2.0",
"gpl-3": "GPL-3.0",
"gplv2": "GPL-2.0",
"gplv3": "GPL-3.0",
"gnu general public license": "GPL-3.0",
"gnu general public license v3": "GPL-3.0",
"gnu general public license v2": "GPL-2.0",
"gnu lesser general public license v2": "LGPL-2.1",
"gnu lesser general public license v3": "LGPL-3.0",
"lgpl": "LGPL-3.0",
"lgpl-2.1": "LGPL-2.1",
"lgpl-3": "LGPL-3.0",
"agpl": "AGPL-3.0",
"agpl-3.0": "AGPL-3.0",
"agplv3": "AGPL-3.0",
"isc": "ISC",
"mpl": "MPL-2.0",
"mpl-2.0": "MPL-2.0",
"mozilla public license 2.0": "MPL-2.0",
"unlicense": "Unlicense",
"public domain": "Unlicense",
"cc0": "CC0-1.0",
"cc0-1.0": "CC0-1.0",
"psf": "PSF-2.0",
"python software foundation license": "PSF-2.0",
"the mit license": "MIT",
"mit license": "MIT",
}
@dataclass
class DepLicense:
name: str
version: str = ""
license: str = "UNKNOWN"
source: str = "" # where we found the dep (requirements.txt, package.json, etc.)
severity: Severity = Severity.UNKNOWN
message: str = ""
@dataclass
class LicenseReport:
project_dir: str
project_license: str = "MIT"
dependencies: list = field(default_factory=list)
summary: dict = field(default_factory=dict)
errors: list = field(default_factory=list)
warnings: list = field(default_factory=list)
def normalize_license(raw: str) -> str:
"""Normalize a license string to SPDX identifier."""
if not raw or raw.strip() in ("UNKNOWN", "UNKNOWN:", ""):
return "UNKNOWN"
cleaned = raw.strip().lower()
# Remove version specifiers like "MIT License (MIT)"
cleaned = re.sub(r"\(.*?\)", "", cleaned).strip()
cleaned = re.sub(r"\s+license$", "", cleaned).strip()
cleaned = re.sub(r"^the\s+", "", cleaned).strip()
if cleaned in LICENSE_ALIASES:
return LICENSE_ALIASES[cleaned]
# Check if it already looks like SPDX
upper = raw.strip()
if upper in COPYLEFT_FAMILIES or upper in PERMISSIVE_LICENSES:
return upper
return raw.strip()
def check_compatibility(dep_license: str, project_license: str) -> tuple[Severity, str]:
"""Check if a dependency license is compatible with the project license."""
if dep_license == "UNKNOWN":
return Severity.WARNING, "License unknown — manual review needed"
if dep_license in PERMISSIVE_LICENSES:
return Severity.OK, "Compatible (permissive)"
if dep_license in COPYLEFT_FAMILIES:
# Copyleft in a permissive project is a problem
if project_license in PERMISSIVE_LICENSES:
return Severity.ERROR, f"Copyleft ({dep_license}) in permissive ({project_license}) project"
# Copyleft in same family is OK
if dep_license.startswith(project_license.split("-")[0]):
return Severity.OK, "Compatible (same copyleft family)"
return Severity.WARNING, f"Review needed: {dep_license} with {project_license}"
return Severity.UNKNOWN, f"Unrecognized license: {dep_license}"
def parse_requirements_txt(path: str) -> list[DepLicense]:
"""Parse requirements.txt format."""
deps = []
with open(path) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#") or line.startswith("-"):
continue
# Parse name==version or name>=version etc.
match = re.match(r"^([a-zA-Z0-9_.-]+)(?:[>=<!~].*)?$", line)
if match:
deps.append(DepLicense(name=match.group(1), source="requirements.txt"))
return deps
def parse_pyproject_toml(path: str) -> list[DepLicense]:
"""Parse pyproject.toml dependencies."""
deps = []
try:
# Use tomllib (Python 3.11+) or fall back to regex
import tomllib
with open(path, "rb") as f:
data = tomllib.load(f)
except ImportError:
# Fallback: regex parse
with open(path) as f:
content = f.read()
# Find [project.dependencies] section
match = re.search(r"\[project\]\s*dependencies\s*=\s*\[(.*?)\]", content, re.DOTALL)
if match:
for dep_str in re.findall(r'"([^"]+)"', match.group(1)):
name = re.match(r"^([a-zA-Z0-9_.-]+)", dep_str)
if name:
deps.append(DepLicense(name=name.group(1), source="pyproject.toml"))
return deps
project_deps = data.get("project", {}).get("dependencies", [])
for dep_str in project_deps:
name = re.match(r"^([a-zA-Z0-9_.-]+)", dep_str)
if name:
deps.append(DepLicense(name=name.group(1), source="pyproject.toml"))
return deps
def parse_package_json(path: str) -> list[DepLicense]:
"""Parse package.json dependencies."""
deps = []
with open(path) as f:
data = json.load(f)
for section in ("dependencies", "devDependencies"):
for name, version in data.get(section, {}).items():
deps.append(DepLicense(name=name, version=version, source="package.json"))
return deps
def parse_cargo_toml(path: str) -> list[DepLicense]:
"""Parse Cargo.toml dependencies (basic)."""
deps = []
with open(path) as f:
for line in f:
match = re.match(r'^([a-zA-Z0-9_-]+)\s*=\s*"', line.strip())
if match and line.strip()[0] != "[" and line.strip() != "[dependencies]":
deps.append(DepLicense(name=match.group(1), source="Cargo.toml"))
return deps
def parse_go_mod(path: str) -> list[DepLicense]:
"""Parse go.mod dependencies."""
deps = []
with open(path) as f:
in_require = False
for line in f:
line = line.strip()
if line == "require (":
in_require = True
continue
if line == ")" and in_require:
in_require = False
continue
if in_require:
parts = line.split()
if len(parts) >= 2:
deps.append(DepLicense(name=parts[0], version=parts[1], source="go.mod"))
return deps
def scan_dep_files(project_dir: str) -> list[DepLicense]:
"""Find and parse all dependency files in a project."""
all_deps = []
parsers = {
"requirements.txt": parse_requirements_txt,
"requirements-dev.txt": parse_requirements_txt,
"requirements_prod.txt": parse_requirements_txt,
"pyproject.toml": parse_pyproject_toml,
"setup.py": None, # TODO: parse setup.py
"package.json": parse_package_json,
"Cargo.toml": parse_cargo_toml,
"go.mod": parse_go_mod,
}
for filename, parser in parsers.items():
path = os.path.join(project_dir, filename)
if os.path.exists(path) and parser:
try:
deps = parser(path)
all_deps.extend(deps)
except Exception as e:
print(f"Warning: Failed to parse {filename}: {e}", file=sys.stderr)
# Also check subdirectories for monorepos (one level deep)
for entry in os.listdir(project_dir):
subdir = os.path.join(project_dir, entry)
if os.path.isdir(subdir) and not entry.startswith("."):
for filename, parser in parsers.items():
path = os.path.join(subdir, filename)
if os.path.exists(path) and parser:
try:
deps = parser(path)
for d in deps:
d.source = f"{entry}/{filename}"
all_deps.extend(deps)
except Exception:
pass
return all_deps
def lookup_pypi_license(package_name: str) -> str:
"""Look up license from PyPI API."""
try:
url = f"https://pypi.org/pypi/{package_name}/json"
req = urllib.request.Request(url, headers={"Accept": "application/json"})
resp = urllib.request.urlopen(req, timeout=10)
data = json.loads(resp.read())
# Try classifiers first
for classifier in data.get("info", {}).get("classifiers", []):
if classifier.startswith("License ::"):
parts = classifier.split(" :: ")
if len(parts) >= 3:
return parts[-1]
# Fall back to license field
lic = data.get("info", {}).get("license", "")
if lic and len(lic) < 100:
return lic
# Try license_expression
le = data.get("info", {}).get("license_expression", "")
if le:
return le
return "UNKNOWN"
except Exception:
return "UNKNOWN"
def lookup_npm_license(package_name: str) -> str:
"""Look up license from npm registry."""
try:
url = f"https://registry.npmjs.org/{package_name}"
req = urllib.request.Request(url, headers={"Accept": "application/json"})
resp = urllib.request.urlopen(req, timeout=10)
data = json.loads(resp.read())
lic = data.get("license", "UNKNOWN")
if isinstance(lic, dict):
lic = lic.get("type", "UNKNOWN")
return lic or "UNKNOWN"
except Exception:
return "UNKNOWN"
def detect_project_license(project_dir: str) -> str:
"""Detect the project's own license."""
for name in ("LICENSE", "LICENSE.md", "LICENSE.txt", "LICENCE", "COPYING"):
path = os.path.join(project_dir, name)
if os.path.exists(path):
with open(path) as f:
content = f.read().upper()
if "MIT LICENSE" in content or "MIT" in content[:200]:
return "MIT"
if "APACHE" in content and "2.0" in content:
return "Apache-2.0"
if "GNU GENERAL PUBLIC LICENSE" in content:
if "VERSION 3" in content:
return "GPL-3.0"
if "VERSION 2" in content:
return "GPL-2.0"
if "BSD" in content[:500]:
if "3-CLAUSE" in content or "THREE CLAUSE" in content:
return "BSD-3-Clause"
return "BSD-2-Clause"
if "ISC" in content[:200]:
return "ISC"
# Check pyproject.toml
pypath = os.path.join(project_dir, "pyproject.toml")
if os.path.exists(pypath):
with open(pypath) as f:
content = f.read()
match = re.search(r'license\s*=\s*\{\s*text\s*=\s*"([^"]+)"', content)
if match:
return normalize_license(match.group(1))
match = re.search(r'license\s*=\s*"([^"]+)"', content)
if match:
return normalize_license(match.group(1))
return "UNKNOWN"
def resolve_licenses(deps: list[DepLicense], cache: dict = None) -> None:
"""Resolve license info for all dependencies."""
if cache is None:
cache = {}
for dep in deps:
if dep.name in cache:
dep.license = cache[dep.name]
continue
# Determine ecosystem
if dep.source in ("package.json",):
raw = lookup_npm_license(dep.name)
else:
raw = lookup_pypi_license(dep.name)
dep.license = normalize_license(raw)
cache[dep.name] = dep.license
def generate_report(deps: list[DepLicense], project_license: str) -> LicenseReport:
"""Generate the compatibility report."""
report = LicenseReport(
project_dir="",
project_license=project_license,
dependencies=[],
)
counts = {"ok": 0, "warning": 0, "error": 0, "unknown": 0}
for dep in deps:
severity, message = check_compatibility(dep.license, project_license)
dep.severity = severity
dep.message = message
counts[severity.value] += 1
if severity == Severity.ERROR:
report.errors.append(f"{dep.name}: {message}")
elif severity == Severity.WARNING:
report.warnings.append(f"{dep.name}: {message}")
report.dependencies.append(asdict(dep))
report.summary = {
"total": len(deps),
**counts,
"project_license": project_license,
}
return report
def format_text(report: LicenseReport) -> str:
"""Format report as human-readable text."""
lines = []
lines.append("=" * 60)
lines.append(" LICENSE COMPATIBILITY REPORT")
lines.append("=" * 60)
lines.append(f" Project License: {report.project_license}")
lines.append(f" Dependencies: {report.summary.get('total', 0)}")
lines.append(f" OK: {report.summary.get('ok', 0)} "
f"WARN: {report.summary.get('warning', 0)} "
f"ERR: {report.summary.get('error', 0)} "
f"UNK: {report.summary.get('unknown', 0)}")
lines.append("-" * 60)
for dep in report.dependencies:
icon = {"ok": "[OK]", "warning": "[!!]", "error": "[XX]", "unknown": "[??]"}
sev = dep.get("severity", "unknown")
name = dep.get("name", "?")
lic = dep.get("license", "?")
msg = dep.get("message", "")
lines.append(f" {icon.get(sev, '[ ]')} {name:30s} {lic:20s} {msg}")
if report.errors:
lines.append("-" * 60)
lines.append(" ERRORS:")
for e in report.errors:
lines.append(f" - {e}")
if report.warnings:
lines.append("-" * 60)
lines.append(" WARNINGS:")
for w in report.warnings:
lines.append(f" - {w}")
lines.append("=" * 60)
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="License Checker — Pipeline 5.4")
parser.add_argument("project_dir", help="Project directory to scan")
parser.add_argument("--project-license", default=None,
help="Project license SPDX id (auto-detected if omitted)")
parser.add_argument("--format", choices=["json", "text"], default="text",
help="Output format")
parser.add_argument("--scan-deps", action="store_true",
help="Only scan and list deps (skip license lookup)")
args = parser.parse_args()
project_dir = os.path.abspath(args.project_dir)
if not os.path.isdir(project_dir):
print(f"Error: {project_dir} is not a directory", file=sys.stderr)
sys.exit(1)
# Detect project license
project_license = args.project_license or detect_project_license(project_dir)
# Scan deps
deps = scan_dep_files(project_dir)
if not deps:
print(f"No dependencies found in {project_dir}", file=sys.stderr)
sys.exit(0)
print(f"Found {len(deps)} dependencies", file=sys.stderr)
if args.scan_deps:
for d in deps:
print(f" {d.name} ({d.source})")
sys.exit(0)
# Resolve licenses
print("Resolving licenses...", file=sys.stderr)
resolve_licenses(deps)
# Generate report
report = generate_report(deps, project_license)
report.project_dir = project_dir
if args.format == "json":
print(json.dumps(asdict(report), indent=2, default=str))
else:
print(format_text(report))
# Exit code: 1 if errors, 0 otherwise
sys.exit(1 if report.errors else 0)
if __name__ == "__main__":
main()

View File

@@ -1,189 +0,0 @@
#!/usr/bin/env python3
"""Tests for scripts/diff_analyzer.py — 10 tests."""
import sys
import os
sys.path.insert(0, os.path.dirname(__file__) or ".")
import importlib.util
spec = importlib.util.spec_from_file_location("da", os.path.join(os.path.dirname(__file__) or ".", "diff_analyzer.py"))
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
DiffAnalyzer = mod.DiffAnalyzer
ChangeCategory = mod.ChangeCategory
SAMPLE_ADD = """diff --git a/new.py b/new.py
new file mode 100644
--- /dev/null
+++ b/new.py
@@ -0,0 +1,3 @@
+def hello():
+ print("world")
+ return True
"""
SAMPLE_DELETE = """diff --git a/old.py b/old.py
deleted file mode 100644
--- a/old.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def goodbye():
- pass
"""
SAMPLE_MODIFY = """diff --git a/app.py b/app.py
--- a/app.py
+++ b/app.py
@@ -1,3 +1,4 @@
def main():
- print("old")
+ print("new")
+ print("extra")
return 0
"""
SAMPLE_RENAME = """diff --git a/old_name.py b/new_name.py
rename from old_name.py
rename to new_name.py
--- a/old_name.py
+++ b/new_name.py
@@ -1,1 +1,1 @@
-old content
+new content
"""
SAMPLE_MULTI = """diff --git a/a.py b/a.py
--- a/a.py
+++ b/a.py
@@ -1,1 +1,2 @@
existing
+added line
diff --git b/b.py b/b.py
new file mode 100644
--- /dev/null
+++ b/b.py
@@ -0,0 +1,1 @@
+new file
"""
SAMPLE_BINARY = """diff --git a/img.png b/img.png
Binary files a/img.png and b/img.png differ
"""
def test_empty():
a = DiffAnalyzer()
s = a.analyze("")
assert s.total_files_changed == 0
print("PASS: test_empty")
def test_addition():
a = DiffAnalyzer()
s = a.analyze(SAMPLE_ADD)
assert s.total_files_changed == 1
assert s.total_added == 3
assert s.total_deleted == 0
assert s.new_files == 1
assert s.files[0].hunks[0].category == ChangeCategory.ADDED
print("PASS: test_addition")
def test_deletion():
a = DiffAnalyzer()
s = a.analyze(SAMPLE_DELETE)
assert s.total_deleted == 2
assert s.deleted_files == 1
assert s.files[0].hunks[0].category == ChangeCategory.DELETED
print("PASS: test_deletion")
def test_modification():
a = DiffAnalyzer()
s = a.analyze(SAMPLE_MODIFY)
assert s.total_added == 2
assert s.total_deleted == 1
assert s.files[0].hunks[0].category == ChangeCategory.MODIFIED
print("PASS: test_modification")
def test_rename():
a = DiffAnalyzer()
s = a.analyze(SAMPLE_RENAME)
assert s.renamed_files == 1
assert s.files[0].old_path == "old_name.py"
assert s.files[0].path == "new_name.py"
assert s.files[0].is_renamed == True
print("PASS: test_rename")
def test_multiple_files():
a = DiffAnalyzer()
s = a.analyze(SAMPLE_MULTI)
assert s.total_files_changed == 2
assert s.new_files == 1
print("PASS: test_multiple_files")
def test_binary():
a = DiffAnalyzer()
s = a.analyze(SAMPLE_BINARY)
assert s.binary_files == 1
assert s.files[0].is_binary == True
assert len(s.files[0].hunks) == 0
print("PASS: test_binary")
def test_to_dict():
a = DiffAnalyzer()
s = a.analyze(SAMPLE_MODIFY)
d = s.to_dict()
assert "total_files_changed" in d
assert "files" in d
assert isinstance(d["files"], list)
print("PASS: test_to_dict")
def test_context_only():
diff = """diff --git a/f.py b/f.py
--- a/f.py
+++ b/f.py
@@ -1,3 +1,3 @@
line1
-old
+new
line3
"""
a = DiffAnalyzer()
s = a.analyze(diff)
# Has both added and deleted = MODIFIED
assert s.files[0].hunks[0].category == ChangeCategory.MODIFIED
print("PASS: test_context_only")
def test_multi_hunk():
diff = """diff --git a/f.py b/f.py
--- a/f.py
+++ b/f.py
@@ -1,1 +1,2 @@
existing
+first addition
@@ -10,1 +11,2 @@
more
+second addition
"""
a = DiffAnalyzer()
s = a.analyze(diff)
assert s.total_hunks == 2
assert s.total_added == 2
print("PASS: test_multi_hunk")
def run_all():
test_empty()
test_addition()
test_deletion()
test_modification()
test_rename()
test_multiple_files()
test_binary()
test_to_dict()
test_context_only()
test_multi_hunk()
print("\nAll 10 tests passed!")
if __name__ == "__main__":
run_all()

View File

@@ -0,0 +1,186 @@
#!/usr/bin/env python3
"""Tests for license_checker.py — Pipeline 5.4"""
import json
import os
import sys
import tempfile
import unittest
# Add scripts dir to path
sys.path.insert(0, os.path.dirname(__file__))
from license_checker import (
normalize_license,
check_compatibility,
parse_requirements_txt,
parse_package_json,
parse_pyproject_toml,
parse_go_mod,
detect_project_license,
scan_dep_files,
generate_report,
format_text,
Severity,
DepLicense,
)
class TestNormalizeLicense(unittest.TestCase):
def test_mit_aliases(self):
for alias in ["mit", "MIT License", "The MIT License", "MIT license"]:
self.assertEqual(normalize_license(alias), "MIT")
def test_apache_aliases(self):
for alias in ["Apache 2.0", "Apache-2.0", "apache software license"]:
self.assertEqual(normalize_license(alias), "Apache-2.0")
def test_gpl_aliases(self):
self.assertEqual(normalize_license("GPL-3.0"), "GPL-3.0")
self.assertEqual(normalize_license("gplv3"), "GPL-3.0")
def test_unknown(self):
self.assertEqual(normalize_license(""), "UNKNOWN")
self.assertEqual(normalize_license("UNKNOWN"), "UNKNOWN")
def test_already_spdx(self):
self.assertEqual(normalize_license("BSD-3-Clause"), "BSD-3-Clause")
class TestCheckCompatibility(unittest.TestCase):
def test_permissive_ok(self):
sev, msg = check_compatibility("MIT", "MIT")
self.assertEqual(sev, Severity.OK)
def test_gpl_in_mit_error(self):
sev, msg = check_compatibility("GPL-3.0", "MIT")
self.assertEqual(sev, Severity.ERROR)
def test_unknown_warning(self):
sev, msg = check_compatibility("UNKNOWN", "MIT")
self.assertEqual(sev, Severity.WARNING)
def test_apache_in_mit_ok(self):
sev, msg = check_compatibility("Apache-2.0", "MIT")
self.assertEqual(sev, Severity.OK)
def test_lgpl_in_mit_error(self):
sev, msg = check_compatibility("LGPL-3.0", "MIT")
self.assertEqual(sev, Severity.ERROR)
class TestParseRequirements(unittest.TestCase):
def test_basic(self):
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
f.write("requests>=2.28.0\nflask==2.3.0\n# comment\npytest\n")
f.flush()
deps = parse_requirements_txt(f.name)
os.unlink(f.name)
names = [d.name for d in deps]
self.assertIn("requests", names)
self.assertIn("flask", names)
self.assertIn("pytest", names)
self.assertEqual(len(deps), 3)
def test_skip_flags(self):
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
f.write("-r other.txt\n--index-url https://pypi.org\nreal-dep\n")
f.flush()
deps = parse_requirements_txt(f.name)
os.unlink(f.name)
self.assertEqual(len(deps), 1)
self.assertEqual(deps[0].name, "real-dep")
class TestParsePackageJson(unittest.TestCase):
def test_basic(self):
data = {
"dependencies": {"express": "^4.18.0", "lodash": "^4.17.21"},
"devDependencies": {"jest": "^29.0.0"},
}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(data, f)
f.flush()
deps = parse_package_json(f.name)
os.unlink(f.name)
names = [d.name for d in deps]
self.assertIn("express", names)
self.assertIn("jest", names)
self.assertEqual(len(deps), 3)
class TestParseGoMod(unittest.TestCase):
def test_basic(self):
content = """module example.com/mymod
go 1.21
require (
github.com/gin-gonic/gin v1.9.1
github.com/stretchr/testify v1.8.4
)
"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".mod", delete=False) as f:
f.write(content)
f.flush()
deps = parse_go_mod(f.name)
os.unlink(f.name)
self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].name, "github.com/gin-gonic/gin")
class TestDetectProjectLicense(unittest.TestCase):
def test_mit_file(self):
with tempfile.TemporaryDirectory() as d:
with open(os.path.join(d, "LICENSE"), "w") as f:
f.write("MIT License\n\nCopyright (c) 2024...\n")
self.assertEqual(detect_project_license(d), "MIT")
def test_apache_file(self):
with tempfile.TemporaryDirectory() as d:
with open(os.path.join(d, "LICENSE"), "w") as f:
f.write("Apache License Version 2.0...")
self.assertEqual(detect_project_license(d), "Apache-2.0")
def test_no_license(self):
with tempfile.TemporaryDirectory() as d:
self.assertEqual(detect_project_license(d), "UNKNOWN")
class TestScanDeps(unittest.TestCase):
def test_multi_ecosystem(self):
with tempfile.TemporaryDirectory() as d:
with open(os.path.join(d, "requirements.txt"), "w") as f:
f.write("flask\nrequests\n")
with open(os.path.join(d, "package.json"), "w") as f:
json.dump({"dependencies": {"express": "^4.0.0"}}, f)
deps = scan_dep_files(d)
names = [d.name for d in deps]
self.assertIn("flask", names)
self.assertIn("express", names)
class TestGenerateReport(unittest.TestCase):
def test_basic(self):
deps = [
DepLicense(name="flask", license="BSD-3-Clause", source="requirements.txt"),
DepLicense(name="gpl-pkg", license="GPL-3.0", source="requirements.txt"),
DepLicense(name="unknown-pkg", license="UNKNOWN", source="requirements.txt"),
]
report = generate_report(deps, "MIT")
self.assertEqual(report.summary["ok"], 1)
self.assertEqual(report.summary["error"], 1)
self.assertEqual(report.summary["warning"], 1)
self.assertEqual(len(report.errors), 1)
self.assertIn("gpl-pkg", report.errors[0])
def test_format_text(self):
deps = [DepLicense(name="flask", license="BSD-3-Clause", source="requirements.txt")]
report = generate_report(deps, "MIT")
text = format_text(report)
self.assertIn("LICENSE COMPATIBILITY REPORT", text)
self.assertIn("flask", text)
if __name__ == "__main__":
unittest.main()