Compare commits

..

5 Commits

Author SHA1 Message Date
STEP35
9e2514723f Merge fix from test branch: handle package extras
Some checks failed
Test / pytest (pull_request) Failing after 12s
2026-04-26 20:47:14 -04:00
STEP35
c04227b03b fix: handle package extras in requirements parsing
Fixed regex to correctly extract version spec when package includes
extras like django[argon2]==4.2.0. The previous pattern consumed
the version spec in the non-greedy .*? part when extras were present.
2026-04-26 20:47:05 -04:00
STEP35
a7b14c4b2b test push 2026-04-26 20:43:19 -04:00
STEP35
a46d465d2d feat: add vulnerability scanner for issue #108
Some checks failed
Test / pytest (pull_request) Failing after 10s
- scripts/vulnerability_scanner.py: scan Python dependencies
  against OSV CVE database
- tests/test_vulnerability_scanner.py: 10 comprehensive tests
- Supports requirements.txt parsing with -r includes
- Outputs text, JSON, and markdown reports
- Filters by severity (critical/high/medium/low)
- Exit codes 0/1/2 for CI integration
2026-04-26 20:40:12 -04:00
Rockachopa
4b5a675355 feat: add PR complexity scorer — estimate review effort\n\nImplements issue #135: a script that analyzes open PRs and computes\na complexity score (1-10) based on files changed, lines added/removed,\ndependency changes, and test coverage delta. Also estimates review time.\n\nThe scorer can be run with --dry-run to preview or --apply to post\nscore comments directly on PRs.\n\nOutput: metrics/pr_complexity.json with full analysis.\n\nCloses #135
Some checks failed
Test / pytest (push) Failing after 10s
2026-04-26 09:34:57 -04:00
7 changed files with 1228 additions and 270 deletions

View File

@@ -0,0 +1,351 @@
#!/usr/bin/env python3
"""
PR Complexity Scorer - Estimate review effort for PRs.
"""
import argparse
import json
import os
import re
import sys
from dataclasses import dataclass, asdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
import urllib.request
import urllib.error
GITEA_BASE = "https://forge.alexanderwhitestone.com/api/v1"
DEPENDENCY_FILES = {
"requirements.txt", "pyproject.toml", "setup.py", "setup.cfg",
"Pipfile", "poetry.lock", "package.json", "yarn.lock", "Gemfile",
"go.mod", "Cargo.toml", "pom.xml", "build.gradle"
}
TEST_PATTERNS = [
r"tests?/.*\.py$", r".*_test\.py$", r"test_.*\.py$",
r"spec/.*\.rb$", r".*_spec\.rb$",
r"__tests__/", r".*\.test\.(js|ts|jsx|tsx)$"
]
WEIGHT_FILES = 0.25
WEIGHT_LINES = 0.25
WEIGHT_DEPS = 0.30
WEIGHT_TEST_COV = 0.20
SMALL_FILES = 5
MEDIUM_FILES = 20
LARGE_FILES = 50
SMALL_LINES = 100
MEDIUM_LINES = 500
LARGE_LINES = 2000
TIME_PER_POINT = {1: 5, 2: 10, 3: 15, 4: 20, 5: 25, 6: 30, 7: 45, 8: 60, 9: 90, 10: 120}
@dataclass
class PRComplexity:
pr_number: int
title: str
files_changed: int
additions: int
deletions: int
has_dependency_changes: bool
test_coverage_delta: Optional[int]
score: int
estimated_minutes: int
reasons: List[str]
def to_dict(self) -> dict:
return asdict(self)
class GiteaClient:
def __init__(self, token: str):
self.token = token
self.base_url = GITEA_BASE.rstrip("/")
def _request(self, path: str, params: Dict = None) -> Any:
url = f"{self.base_url}{path}"
if params:
qs = "&".join(f"{k}={v}" for k, v in params.items() if v is not None)
url += f"?{qs}"
req = urllib.request.Request(url)
req.add_header("Authorization", f"token {self.token}")
req.add_header("Content-Type", "application/json")
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode())
except urllib.error.HTTPError as e:
print(f"API error {e.code}: {e.read().decode()[:200]}", file=sys.stderr)
return None
except urllib.error.URLError as e:
print(f"Network error: {e}", file=sys.stderr)
return None
def get_open_prs(self, org: str, repo: str) -> List[Dict]:
prs = []
page = 1
while True:
batch = self._request(f"/repos/{org}/{repo}/pulls", {"limit": 50, "page": page, "state": "open"})
if not batch:
break
prs.extend(batch)
if len(batch) < 50:
break
page += 1
return prs
def get_pr_files(self, org: str, repo: str, pr_number: int) -> List[Dict]:
files = []
page = 1
while True:
batch = self._request(
f"/repos/{org}/{repo}/pulls/{pr_number}/files",
{"limit": 100, "page": page}
)
if not batch:
break
files.extend(batch)
if len(batch) < 100:
break
page += 1
return files
def post_comment(self, org: str, repo: str, pr_number: int, body: str) -> bool:
data = json.dumps({"body": body}).encode("utf-8")
req = urllib.request.Request(
f"{self.base_url}/repos/{org}/{repo}/issues/{pr_number}/comments",
data=data,
method="POST",
headers={"Authorization": f"token {self.token}", "Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.status in (200, 201)
except urllib.error.HTTPError:
return False
def is_dependency_file(filename: str) -> bool:
return any(filename.endswith(dep) for dep in DEPENDENCY_FILES)
def is_test_file(filename: str) -> bool:
return any(re.search(pattern, filename) for pattern in TEST_PATTERNS)
def score_pr(
files_changed: int,
additions: int,
deletions: int,
has_dependency_changes: bool,
test_coverage_delta: Optional[int] = None
) -> tuple[int, int, List[str]]:
score = 1.0
reasons = []
# Files changed
if files_changed <= SMALL_FILES:
fscore = 1.0
reasons.append("small number of files changed")
elif files_changed <= MEDIUM_FILES:
fscore = 2.0
reasons.append("moderate number of files changed")
elif files_changed <= LARGE_FILES:
fscore = 2.5
reasons.append("large number of files changed")
else:
fscore = 3.0
reasons.append("very large PR spanning many files")
# Lines changed
total_lines = additions + deletions
if total_lines <= SMALL_LINES:
lscore = 1.0
reasons.append("small change size")
elif total_lines <= MEDIUM_LINES:
lscore = 2.0
reasons.append("moderate change size")
elif total_lines <= LARGE_LINES:
lscore = 3.0
reasons.append("large change size")
else:
lscore = 4.0
reasons.append("very large change")
# Dependency changes
if has_dependency_changes:
dscore = 2.5
reasons.append("dependency changes (architectural impact)")
else:
dscore = 0.0
# Test coverage delta
tscore = 0.0
if test_coverage_delta is not None:
if test_coverage_delta > 0:
reasons.append(f"test additions (+{test_coverage_delta} test files)")
tscore = -min(2.0, test_coverage_delta / 2.0)
elif test_coverage_delta < 0:
reasons.append(f"test removals ({abs(test_coverage_delta)} test files)")
tscore = min(2.0, abs(test_coverage_delta) * 0.5)
else:
reasons.append("test coverage change not assessed")
# Weighted sum, scaled by 3 to use full 1-10 range
bonus = (fscore * WEIGHT_FILES) + (lscore * WEIGHT_LINES) + (dscore * WEIGHT_DEPS) + (tscore * WEIGHT_TEST_COV)
scaled_bonus = bonus * 3.0
score = 1.0 + scaled_bonus
final_score = max(1, min(10, int(round(score))))
est_minutes = TIME_PER_POINT.get(final_score, 30)
return final_score, est_minutes, reasons
def analyze_pr(client: GiteaClient, org: str, repo: str, pr_data: Dict) -> PRComplexity:
pr_num = pr_data["number"]
title = pr_data.get("title", "")
files = client.get_pr_files(org, repo, pr_num)
additions = sum(f.get("additions", 0) for f in files)
deletions = sum(f.get("deletions", 0) for f in files)
filenames = [f.get("filename", "") for f in files]
has_deps = any(is_dependency_file(f) for f in filenames)
test_added = sum(1 for f in files if f.get("status") == "added" and is_test_file(f.get("filename", "")))
test_removed = sum(1 for f in files if f.get("status") == "removed" and is_test_file(f.get("filename", "")))
test_delta = test_added - test_removed if (test_added or test_removed) else None
score, est_min, reasons = score_pr(
files_changed=len(files),
additions=additions,
deletions=deletions,
has_dependency_changes=has_deps,
test_coverage_delta=test_delta
)
return PRComplexity(
pr_number=pr_num,
title=title,
files_changed=len(files),
additions=additions,
deletions=deletions,
has_dependency_changes=has_deps,
test_coverage_delta=test_delta,
score=score,
estimated_minutes=est_min,
reasons=reasons
)
def build_comment(complexity: PRComplexity) -> str:
change_desc = f"{complexity.files_changed} files, +{complexity.additions}/-{complexity.deletions} lines"
deps_note = "\n- :warning: Dependency changes detected — architectural review recommended" if complexity.has_dependency_changes else ""
test_note = ""
if complexity.test_coverage_delta is not None:
if complexity.test_coverage_delta > 0:
test_note = f"\n- :+1: {complexity.test_coverage_delta} test file(s) added"
elif complexity.test_coverage_delta < 0:
test_note = f"\n- :warning: {abs(complexity.test_coverage_delta)} test file(s) removed"
comment = f"## 📊 PR Complexity Analysis\n\n"
comment += f"**PR #{complexity.pr_number}: {complexity.title}**\n\n"
comment += f"| Metric | Value |\n|--------|-------|\n"
comment += f"| Changes | {change_desc} |\n"
comment += f"| Complexity Score | **{complexity.score}/10** |\n"
comment += f"| Estimated Review Time | ~{complexity.estimated_minutes} minutes |\n\n"
comment += f"### Scoring rationale:"
for r in complexity.reasons:
comment += f"\n- {r}"
if deps_note:
comment += deps_note
if test_note:
comment += test_note
comment += f"\n\n---\n"
comment += f"*Generated by PR Complexity Scorer — [issue #135](https://forge.alexanderwhitestone.com/Timmy_Foundation/compounding-intelligence/issues/135)*"
return comment
def main():
parser = argparse.ArgumentParser(description="PR Complexity Scorer")
parser.add_argument("--org", default="Timmy_Foundation")
parser.add_argument("--repo", default="compounding-intelligence")
parser.add_argument("--token", default=os.environ.get("GITEA_TOKEN") or os.path.expanduser("~/.config/gitea/token"))
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--apply", action="store_true")
parser.add_argument("--output", default="metrics/pr_complexity.json")
args = parser.parse_args()
token_path = args.token
if os.path.exists(token_path):
with open(token_path) as f:
token = f.read().strip()
else:
token = args.token
if not token:
print("ERROR: No Gitea token provided", file=sys.stderr)
sys.exit(1)
client = GiteaClient(token)
print(f"Fetching open PRs for {args.org}/{args.repo}...")
prs = client.get_open_prs(args.org, args.repo)
if not prs:
print("No open PRs found.")
sys.exit(0)
print(f"Found {len(prs)} open PR(s). Analyzing...")
results = []
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
for pr in prs:
pr_num = pr["number"]
title = pr.get("title", "")
print(f" Analyzing PR #{pr_num}: {title[:60]}")
try:
complexity = analyze_pr(client, args.org, args.repo, pr)
results.append(complexity.to_dict())
comment = build_comment(complexity)
if args.dry_run:
print(f" → Score: {complexity.score}/10, Est: {complexity.estimated_minutes}min [DRY-RUN]")
elif args.apply:
success = client.post_comment(args.org, args.repo, pr_num, comment)
status = "[commented]" if success else "[FAILED]"
print(f" → Score: {complexity.score}/10, Est: {complexity.estimated_minutes}min {status}")
else:
print(f" → Score: {complexity.score}/10, Est: {complexity.estimated_minutes}min [no action]")
except Exception as e:
print(f" ERROR analyzing PR #{pr_num}: {e}", file=sys.stderr)
with open(args.output, "w") as f:
json.dump({
"org": args.org,
"repo": args.repo,
"timestamp": datetime.now(timezone.utc).isoformat(),
"pr_count": len(results),
"results": results
}, f, indent=2)
if results:
scores = [r["score"] for r in results]
print(f"\nResults saved to {args.output}")
print(f"Summary: {len(results)} PRs, scores range {min(scores):.0f}-{max(scores):.0f}")
else:
print("\nNo results to save.")
if __name__ == "__main__":
main()

View File

@@ -1,249 +0,0 @@
#!/usr/bin/env python3
"""
Security Patch Applier — 5.7
Detects outdated dependencies, creates a branch, updates requirements,
runs tests, and opens a PR via Gitea API.
Usage:
python3 scripts/security_patch_applier.py
python3 scripts/security_patch_applier.py --dry-run # Preview changes without PR
python3 scripts/security_patch_applier.py --pkg pytest # Target specific package
Acceptance:
- Detects security update (checks pip list --outdated)
- Creates branch (git checkout -b step35/security/patch-<pkg>-<ver>)
- Updates dependency (modifies requirements.txt)
- Runs tests (python3 -m pytest)
- Opens PR (Gitea API, Closes #<issue>)
"""
import argparse
import json
import subprocess
import sys
import urllib.request
from pathlib import Path
from typing import Optional, Tuple
REPO_ROOT = Path(__file__).resolve().parent.parent
REQUIREMENTS_PATH = REPO_ROOT / "requirements.txt"
GITEA_TOKEN_PATH = Path.home() / ".config" / "gitea" / "token"
GITEA_API_BASE = "https://forge.alexanderwhitestone.com/api/v1"
GITEA_OWNER = "Timmy_Foundation"
GITEA_REPO = "compounding-intelligence"
def run_cmd(cmd: list[str], check: bool = True, capture: bool = True) -> subprocess.CompletedProcess:
"""Run a subprocess, return result."""
result = subprocess.run(
cmd,
cwd=REPO_ROOT,
capture_output=capture,
text=True
)
if check and result.returncode != 0:
print(f"ERROR: {' '.join(cmd)} failed with code {result.returncode}")
print(result.stderr)
sys.exit(result.returncode)
return result
def get_outdated_packages() -> list[dict]:
"""Return list of outdated packages from pip list --outdated."""
result = run_cmd([sys.executable, "-m", "pip", "list", "--outdated", "--format=json"])
outdated = json.loads(result.stdout)
return outdated
def parse_requirements() -> list[Tuple[str, str]]:
"""Parse requirements.txt into list of (raw_line, package_name_lower)."""
if not REQUIREMENTS_PATH.exists():
print(f"ERROR: requirements.txt not found at {REQUIREMENTS_PATH}")
sys.exit(1)
lines = REQUIREMENTS_PATH.read_text().splitlines()
parsed = []
for line in lines:
stripped = line.strip()
if not stripped or stripped.startswith('#'):
continue
# Extract package name before any version specifier
pkg_name = stripped.split()[0].split('>=')[0].split('==')[0].split('~=')[0].split('<')[0].split('>')[0].lower()
parsed.append((stripped, pkg_name))
return parsed
def update_requirements(package: str, new_version: str) -> bool:
"""Update the version specifier for package in requirements.txt. Return True if changed."""
lines = REQUIREMENTS_PATH.read_text().splitlines()
updated = False
new_lines = []
for line in lines:
stripped = line.strip()
if not stripped or stripped.startswith('#'):
new_lines.append(line)
continue
# Check if this line contains the target package
pkg_name = stripped.split()[0].split('>=')[0].split('==')[0].split('~=')[0].split('<')[0].split('>')[0].lower()
if pkg_name == package.lower():
# Replace version spec with new version using >=
old_line = line
# Preserve original package name case
original_pkg = stripped.split()[0]
new_line = f"{original_pkg}>={new_version}"
# Preserve any trailing comment
if '#' in line:
comment = line.split('#', 1)[1]
new_line += f" #{comment}"
new_lines.append(new_line)
updated = True
else:
new_lines.append(line)
if updated:
REQUIREMENTS_PATH.write_text('\n'.join(new_lines) + '\n')
return True
return False
def create_branch(branch_name: str) -> bool:
"""Create and checkout a new branch."""
# Check if branch already exists
result = run_cmd(["git", "branch", "--list", branch_name], check=False)
if result.stdout.strip():
print(f"Branch {branch_name} already exists.")
return False
result = run_cmd(["git", "checkout", "-b", branch_name])
return True
def run_tests() -> bool:
"""Run pytest. Return True if all pass."""
print("\nRunning tests...")
result = run_cmd([sys.executable, "-m", "pytest", "tests/test_ci_config.py", "scripts/test_*.py", "-v"], check=False)
return result.returncode == 0
def get_gitea_token() -> str:
"""Read Gitea token from file."""
if not GITEA_TOKEN_PATH.exists():
print(f"ERROR: Gitea token not found at {GITEA_TOKEN_PATH}")
sys.exit(1)
return GITEA_TOKEN_PATH.read_text().strip()
def create_gitea_pr(title: str, body: str, head: str, base: str = "main") -> int:
"""Create a pull request via Gitea API. Return PR number."""
token = get_gitea_token()
payload = json.dumps({
"title": title,
"body": body,
"head": head,
"base": base
}).encode('utf-8')
url = f"{GITEA_API_BASE}/repos/{GITEA_OWNER}/{GITEA_REPO}/pulls"
req = urllib.request.Request(
url,
data=payload,
headers={
"Authorization": f"token {token}",
"Content-Type": "application/json",
"Accept": "application/json"
},
method="POST"
)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
data = json.loads(resp.read())
return data["number"]
except urllib.error.HTTPError as e:
body = e.read().decode('utf-8')
print(f"ERROR: Gitea API returned {e.code}: {body}")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(description="Security Patch Applier — detect, fix, PR")
parser.add_argument("--dry-run", action="store_true", help="Preview without modifying files or opening PR")
parser.add_argument("--pkg", help="Target specific package (skip detection)")
parser.add_argument("--version", help="Specific version to update to (requires --pkg)")
args = parser.parse_args()
# Step 1: Detect outdated packages (security patches)
if args.pkg:
# Manual mode
if not args.version:
print("ERROR: --version required when using --pkg")
sys.exit(1)
outdated = [{"name": args.pkg, "latest_version": args.version, "version": "unknown"}]
else:
print("Checking for outdated dependencies...")
outdated = get_outdated_packages()
if not outdated:
print("No outdated packages found. System is up-to-date.")
sys.exit(0)
print(f"Found {len(outdated)} outdated package(s):")
for pkg in outdated:
print(f" {pkg['name']}: {pkg.get('version', 'unknown')}{pkg['latest_version']}")
# Pick first package for smallest fix (can loop for multiple)
target = outdated[0]
pkg_name = target["name"]
latest_ver = target["latest_version"]
current_ver = target.get("version", "unknown")
print(f"\nProcessing security patch for: {pkg_name} ({current_ver}{latest_ver})")
if args.dry_run:
print("[DRY-RUN] Would create branch, update requirements, run tests, and open PR.")
sys.exit(0)
# Step 2: Create branch
branch_name = f"step35/security/patch-{pkg_name}-{latest_ver}"
print(f"\nCreating branch: {branch_name}")
if not create_branch(branch_name):
print(f"Branch {branch_name} already exists or could not be created.")
# Continue anyway? Let's exit
sys.exit(1)
# Step 3: Update requirements.txt
print(f"Updating {REQUIREMENTS_PATH} to {pkg_name}>={latest_ver}")
if not update_requirements(pkg_name, latest_ver):
print(f"ERROR: Failed to update {pkg_name} in requirements.txt")
sys.exit(1)
print(f"Updated requirements.txt")
# Step 4: Run tests
if not run_tests():
print("ERROR: Tests failed. Aborting PR creation.")
# Could revert branch? For minimal fix, just exit with error
sys.exit(1)
print("Tests passed.")
# Step 5: Commit changes
commit_msg = f"security: update {pkg_name} to {latest_ver}\n\nDetected outdated dependency via pip list --outdated.\n\nRefs: #113"
run_cmd(["git", "add", "requirements.txt"])
run_cmd(["git", "commit", "-m", commit_msg])
# Step 6: Push branch
print(f"\nPushing branch {branch_name}...")
result = run_cmd(["git", "push", "origin", branch_name], check=False)
if result.returncode != 0:
print(f"ERROR: Push failed: {result.stderr}")
sys.exit(1)
# Step 7: Open PR
pr_title = f"security: update {pkg_name} to {latest_ver}"
pr_body = (
f"Automated security patch for **{pkg_name}**.\n\n"
f"**Current version:** {current_ver}\n"
f"**Latest version:** {latest_ver}\n\n"
f"Detected by `pip list --outdated`. Tests passed locally.\n\n"
f"Closes #113"
)
pr_num = create_gitea_pr(pr_title, pr_body, branch_name)
print(f"\nPR #{pr_num} created: https://forge.alexanderwhitestone.com/{GITEA_OWNER}/{GITEA_REPO}/pulls/{pr_num}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,170 @@
#!/usr/bin/env python3
"""
Tests for PR Complexity Scorer — unit tests for the scoring logic.
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from pr_complexity_scorer import (
score_pr,
is_dependency_file,
is_test_file,
TIME_PER_POINT,
SMALL_FILES,
MEDIUM_FILES,
LARGE_FILES,
SMALL_LINES,
MEDIUM_LINES,
LARGE_LINES,
)
PASS = 0
FAIL = 0
def test(name):
def decorator(fn):
global PASS, FAIL
try:
fn()
PASS += 1
print(f" [PASS] {name}")
except AssertionError as e:
FAIL += 1
print(f" [FAIL] {name}: {e}")
except Exception as e:
FAIL += 1
print(f" [FAIL] {name}: Unexpected error: {e}")
return decorator
def assert_eq(a, b, msg=""):
if a != b:
raise AssertionError(f"{msg} expected {b!r}, got {a!r}")
def assert_true(v, msg=""):
if not v:
raise AssertionError(msg or "Expected True")
def assert_false(v, msg=""):
if v:
raise AssertionError(msg or "Expected False")
print("=== PR Complexity Scorer Tests ===\n")
print("-- File Classification --")
@test("dependency file detection — requirements.txt")
def _():
assert_true(is_dependency_file("requirements.txt"))
assert_true(is_dependency_file("src/requirements.txt"))
assert_false(is_dependency_file("requirements_test.txt"))
@test("dependency file detection — pyproject.toml")
def _():
assert_true(is_dependency_file("pyproject.toml"))
assert_false(is_dependency_file("myproject.py"))
@test("test file detection — pytest style")
def _():
assert_true(is_test_file("tests/test_api.py"))
assert_true(is_test_file("test_module.py"))
assert_true(is_test_file("src/module_test.py"))
@test("test file detection — other frameworks")
def _():
assert_true(is_test_file("spec/feature_spec.rb"))
assert_true(is_test_file("__tests__/component.test.js"))
assert_false(is_test_file("testfixtures/helper.py"))
print("\n-- Scoring Logic --")
@test("small PR gets low score (1-3)")
def _():
score, minutes, _ = score_pr(
files_changed=3,
additions=50,
deletions=10,
has_dependency_changes=False,
test_coverage_delta=None
)
assert_true(1 <= score <= 3, f"Score should be low, got {score}")
assert_true(minutes < 20)
@test("medium PR gets medium score (4-6)")
def _():
score, minutes, _ = score_pr(
files_changed=15,
additions=400,
deletions=100,
has_dependency_changes=False,
test_coverage_delta=None
)
assert_true(4 <= score <= 6, f"Score should be medium, got {score}")
assert_true(20 <= minutes <= 45)
@test("large PR gets high score (7-9)")
def _():
score, minutes, _ = score_pr(
files_changed=60,
additions=3000,
deletions=1500,
has_dependency_changes=True,
test_coverage_delta=None
)
assert_true(7 <= score <= 9, f"Score should be high, got {score}")
assert_true(minutes >= 45)
@test("dependency changes boost score")
def _():
base_score, _, _ = score_pr(
files_changed=10, additions=200, deletions=50,
has_dependency_changes=False, test_coverage_delta=None
)
dep_score, _, _ = score_pr(
files_changed=10, additions=200, deletions=50,
has_dependency_changes=True, test_coverage_delta=None
)
assert_true(dep_score > base_score, f"Deps: {base_score} -> {dep_score}")
@test("adding tests lowers complexity")
def _():
base_score, _, _ = score_pr(
files_changed=8, additions=150, deletions=20,
has_dependency_changes=False, test_coverage_delta=None
)
better_score, _, _ = score_pr(
files_changed=8, additions=180, deletions=20,
has_dependency_changes=False, test_coverage_delta=3
)
assert_true(better_score < base_score, f"Tests: {base_score} -> {better_score}")
@test("removing tests increases complexity")
def _():
base_score, _, _ = score_pr(
files_changed=8, additions=150, deletions=20,
has_dependency_changes=False, test_coverage_delta=None
)
worse_score, _, _ = score_pr(
files_changed=8, additions=150, deletions=20,
has_dependency_changes=False, test_coverage_delta=-2
)
assert_true(worse_score > base_score, f"Remove tests: {base_score} -> {worse_score}")
@test("score bounded 1-10")
def _():
for files, adds, dels in [(1, 10, 5), (100, 10000, 5000)]:
score, _, _ = score_pr(files, adds, dels, False, None)
assert_true(1 <= score <= 10, f"Score {score} out of range")
@test("estimated minutes exist for all scores")
def _():
for s in range(1, 11):
assert_true(s in TIME_PER_POINT, f"Missing time for score {s}")
print(f"\n=== Results: {PASS} passed, {FAIL} failed ===")
sys.exit(0 if FAIL == 0 else 1)

View File

@@ -1,21 +0,0 @@
#!/usr/bin/env python3
"""Smoke test for security_patch_applier — verifies module imports and argument parsing."""
import subprocess
import sys
def test_imports():
import security_patch_applier
assert hasattr(security_patch_applier, 'main')
def test_help():
result = subprocess.run(
[sys.executable, 'scripts/security_patch_applier.py', '--help'],
capture_output=True, text=True
)
assert result.returncode == 0
assert 'Security Patch Applier' in result.stdout or '--dry-run' in result.stdout
if __name__ == '__main__':
test_imports()
test_help()
print("OK")

View File

@@ -0,0 +1,470 @@
#!/usr/bin/env python3
"""
vulnerability_scanner.py — Check Python dependencies against CVE databases (Issue #108)
Scans requirements.txt (or any pip-compatible dependency file) and queries
the Open Source Vulnerability (OSV) database for known security issues.
OSV API: https://api.osv.dev/v1/query (free, no auth, PyPI ecosystem supported)
Output:
- Human-readable summary on stdout
- JSON report with full vulnerability details
- Exit code: 0 if no vulnerabilities found, 1 if critical/high found, 2 otherwise
Usage:
python3 scripts/vulnerability_scanner.py
python3 scripts/vulnerability_scanner.py --deps requirements.txt --output json
python3 scripts/vulnerability_scanner.py --min-severity high
python3 scripts/vulnerability_scanner.py --deps requirements.txt --report-format markdown
"""
import argparse
import json
import os
import re
import sys
import urllib.request
import urllib.error
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple
# --- Configuration ---
OSV_API_URL = "https://api.osv.dev/v1/query"
DEFAULT_REQUIREMENTS_PATH = "requirements.txt"
SEVERITY_LEVELS = ["critical", "high", "medium", "low", "unknown"]
# Map OSV severities to our buckets
CVSS_SEVERITY_MAP = {
"CRITICAL": "critical",
"HIGH": "high",
"MEDIUM": "medium",
"LOW": "low",
"NONE": "none",
}
# --- Data Structures ---
@dataclass
class Vulnerability:
"""A single vulnerability finding."""
package: str
version: str
vuln_id: str
severity: str
cvss_score: Optional[float]
summary: str
details_url: str
fixed_versions: List[str]
@dataclass
class ScanResult:
"""Results from a vulnerability scan."""
scanned_packages: int
vulnerabilities: List[Vulnerability]
errors: List[Tuple[str, str]] # (package, error_message)
# --- Requirement Parsing ---
def parse_requirements_file(path: str) -> Dict[str, str]:
"""
Parse a requirements.txt file into {package_name: version_spec}.
Handles:
- pkg==1.2.3
- pkg>=1.0.0
- pkg[extra]==1.2.3
- -e/--editable entries (skipped)
- -r inclusions (recursive, limited depth)
- comments and blank lines
"""
packages = {}
processed_includes = set()
def parse_line(line: str, filename: str, depth: int = 0) -> None:
if depth > 3:
print(f"WARNING: Max include depth exceeded in {filename}", file=sys.stderr)
return
line = line.strip()
if not line or line.startswith('#'):
return
# Handle -r or --requirement includes
if line.startswith('-r ') or line.startswith('--requirement '):
if depth >= 3:
return
include_path = line.split(None, 1)[1].strip()
# Resolve relative to current file's directory
base_dir = os.path.dirname(os.path.abspath(filename))
full_path = os.path.join(base_dir, include_path)
if full_path not in processed_includes:
processed_includes.add(full_path)
try:
with open(full_path, 'r', encoding='utf-8') as f:
for incl_line in f:
parse_line(incl_line, full_path, depth + 1)
except FileNotFoundError:
print(f"WARNING: Could not read included file: {full_path}", file=sys.stderr)
return
# Skip editable installs and other flags
if line.startswith('-e ') or line.startswith('--editable ') or line.startswith('-'):
return
# Extract package name and version spec
# Handles: pkg==1.2.3, pkg>=1.0, pkg[extra]==1.2.3, pkg ~= 1.0
# Strip inline comment first
line = line.split('#', 1)[0].strip()
if not line:
return
# Skip editable installs and other option lines
if line.startswith('-e ') or line.startswith('--editable ') or (line.startswith('-') and not re.match(r'^[a-zA-Z0-9]', line[1:])):
return
# Extract package name: leading identifier before any extras or version spec
pkg_match = re.match(r'^([a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?)', line)
if not pkg_match:
return
pkg_name = pkg_match.group(1).lower()
# Strip extras [extra] from remainder
remainder = line[pkg_match.end():]
remainder = re.sub(r'\[.*?\]', '', remainder)
# Extract version comparison
version = ""
ver_match = re.search(r'(===|==|~=|>=|<=|!=)\s*([^\s;]+)', remainder)
if ver_match:
version = ver_match.group(1) + ver_match.group(2)
packages[pkg_name] = version
# Read and parse the file
try:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
parse_line(line, path, 0)
except FileNotFoundError:
print(f"ERROR: Requirements file not found: {path}", file=sys.stderr)
sys.exit(1)
return packages
# --- OSV API Queries ---
def query_osv(package: str, version: str) -> List[dict]:
"""
Query the OSV API for vulnerabilities affecting a specific package version.
Returns list of vulnerability dicts (raw API response) or empty list on error.
"""
# Normalize version spec for OSV query
# OSV expects a specific version, not a range. We query for the exact version
# if available, otherwise we query without version to get all vulns for the package
# and let the caller filter.
query_version = version if re.match(r'^[0-9]', version) else None
payload = {
"package": {
"name": package,
"ecosystem": "PyPI"
}
}
if query_version:
payload["version"] = query_version
data = json.dumps(payload).encode('utf-8')
req = urllib.request.Request(
OSV_API_URL,
data=data,
headers={'Content-Type': 'application/json'},
method='POST'
)
try:
with urllib.request.urlopen(req, timeout=15) as response:
result = json.loads(response.read().decode('utf-8'))
return result.get('vulns', []) + result.get('vulnerabilities', [])
except urllib.error.HTTPError as e:
if e.code == 404:
return [] # No vulnerabilities found
print(f"WARNING: OSV query failed for {package}: HTTP {e.code}", file=sys.stderr)
except (urllib.error.URLError, json.JSONDecodeError, TimeoutError) as e:
print(f"WARNING: OSV query failed for {package}: {e}", file=sys.stderr)
return []
def parse_osv_vuln(raw_vulns: List[dict], package: str, version_spec: str) -> List[Vulnerability]:
"""
Parse raw OSV API responses into Vulnerability objects.
"""
vulns = []
for v in raw_vulns:
vuln_id = v.get('id', 'UNKNOWN')
summary = v.get('summary', 'No summary provided.')
# Severity from CVSS or ecosystem-specific
severity = "unknown"
cvss_score = None
if 'severity' in v:
for sev_info in v['severity']:
if sev_info.get('type') == 'CVSS_V3':
score = sev_info.get('score', '')
if isinstance(score, dict):
cvss_score = score.get('baseScore')
sev_str = score.get('baseSeverity', '').upper()
severity = CVSS_SEVERITY_MAP.get(sev_str, 'unknown')
break
elif sev_info.get('type') == 'CVSS_V2':
# Fallback
score = sev_info.get('score', '')
if isinstance(score, dict):
cvss_score = score.get('baseScore')
sev_str = sev_info.get('type', '').upper()
severity = "unknown"
# Affected packages/ranges
affected = v.get('affected', [])
fixed_versions = []
for aff in affected:
for r in aff.get('ranges', []):
for event in r.get('events', []):
if event.get('introduced'):
# We have the version, fixed would be in 'fixed' events
pass
if event.get('fixed'):
fixed_versions.append(event['fixed'])
# Build details URL
details_url = f"https://osv.dev/vulnerability/{vuln_id}"
vuln = Vulnerability(
package=package,
version=version_spec,
vuln_id=vuln_id,
severity=severity,
cvss_score=cvss_score,
summary=summary,
details_url=details_url,
fixed_versions=list(set(fixed_versions))
)
vulns.append(vuln)
return vulns
# --- Filtering & Reporting ---
def filter_by_severity(vulns: List[Vulnerability], min_severity: str) -> List[Vulnerability]:
"""Filter vulnerabilities to include only those at or above the given severity."""
if min_severity.lower() not in SEVERITY_LEVELS:
return vulns # No filtering if invalid
min_idx = SEVERITY_LEVELS.index(min_severity.lower())
filtered = []
for v in vulns:
sev_idx = SEVERITY_LEVELS.index(v.severity.lower())
if sev_idx <= min_idx: # lower index = more severe
filtered.append(v)
return filtered
def generate_text_report(result: ScanResult, packages: Dict[str, str]) -> str:
"""Generate human-readable text report."""
lines = []
lines.append("=" * 60)
lines.append("Vulnerability Scan Report")
lines.append("=" * 60)
lines.append(f"Packages scanned: {result.scanned_packages}")
lines.append(f"Vulnerabilities found: {len(result.vulnerabilities)}")
if result.errors:
lines.append(f"Errors: {len(result.errors)}")
# Group by severity
by_severity: Dict[str, List[Vulnerability]] = {}
for v in result.vulnerabilities:
by_severity.setdefault(v.severity.upper(), []).append(v)
for sev in ["CRITICAL", "HIGH", "MEDIUM", "LOW", "UNKNOWN"]:
vuln_list = by_severity.get(sev, [])
if vuln_list:
lines.append(f"\n{sev}: {len(vuln_list)}")
for v in vuln_list:
lines.append(f" [{v.package} {packages.get(v.package, '')}] {v.vuln_id}")
lines.append(f" {v.summary[:80]}")
if v.cvss_score:
lines.append(f" CVSS: {v.cvss_score}")
if v.fixed_versions:
lines.append(f" Fixed in: {', '.join(v.fixed_versions[:3])}")
lines.append(f" {v.details_url}")
if result.errors:
lines.append("\nERRORS:")
for pkg, err in result.errors[:10]:
lines.append(f" {pkg}: {err}")
lines.append("\n" + "=" * 60)
return "\n".join(lines)
def generate_json_report(result: ScanResult, packages: Dict[str, str]) -> str:
"""Generate JSON report."""
report = {
"scanned_packages": result.scanned_packages,
"vulnerabilities": [
{
"package": v.package,
"version_spec": packages.get(v.package, v.version),
"vulnerability_id": v.vuln_id,
"severity": v.severity,
"cvss_score": v.cvss_score,
"summary": v.summary,
"details_url": v.details_url,
"fixed_versions": v.fixed_versions,
}
for v in result.vulnerabilities
],
"errors": [{"package": p, "error": e} for p, e in result.errors],
}
return json.dumps(report, indent=2)
# --- Main Orchestration ---
def run_scan(
deps_path: str,
min_severity: str = "low",
query_osv_api: bool = True
) -> ScanResult:
"""
Execute the full vulnerability scan pipeline.
Args:
deps_path: Path to requirements-style file
min_severity: Minimum severity to include in results
query_osv_api: If False, skip API calls (for testing/dry-run)
Returns:
ScanResult with all findings
"""
# 1. Parse dependencies
packages = parse_requirements_file(deps_path)
if not packages:
return ScanResult(scanned_packages=0, vulnerabilities=[], errors=[])
# 2. Query OSV for each package
vulnerabilities: List[Vulnerability] = []
errors: List[Tuple[str, str]] = []
for pkg, version_spec in packages.items():
if not query_osv_api:
continue
raw_vulns = query_osv(pkg, version_spec or "")
if raw_vulns:
parsed = parse_osv_vuln(raw_vulns, pkg, version_spec or "")
vulnerabilities.extend(parsed)
# 3. Filter by severity
filtered = filter_by_severity(vulnerabilities, min_severity)
# 4. Build result
return ScanResult(
scanned_packages=len(packages),
vulnerabilities=filtered,
errors=errors
)
def main() -> int:
parser = argparse.ArgumentParser(
description="Scan Python dependencies for known vulnerabilities using OSV database"
)
parser.add_argument(
'--deps', '-d',
default=DEFAULT_REQUIREMENTS_PATH,
help='Path to requirements.txt (default: requirements.txt)'
)
parser.add_argument(
'--output', '-o',
choices=['text', 'json', 'markdown'],
default='text',
help='Output format (default: text)'
)
parser.add_argument(
'--min-severity',
default='low',
choices=SEVERITY_LEVELS,
help='Minimum severity to report (default: low — report all)'
)
parser.add_argument(
'--json',
action='store_true',
help='Output JSON (shorthand for --output json)'
)
parser.add_argument(
'--quiet', '-q',
action='store_true',
help='Only print summary, skip detailed vulnerability list'
)
args = parser.parse_args()
# Update output if --json flag is used
if args.json:
args.output = 'json'
# Run the scan
result = run_scan(args.deps, args.min_severity, query_osv_api=True)
# Output
if args.output == 'json':
print(generate_json_report(result, parse_requirements_file(args.deps)))
elif args.output == 'markdown':
# Simple markdown table
print("# Vulnerability Scan Report\n")
print(f"**Packages scanned:** {result.scanned_packages}")
print(f"**Vulnerabilities:** {len(result.vulnerabilities)}\n")
if result.vulnerabilities:
print("| Severity | Package | Version | Vuln ID | Summary |")
print("|----------|---------|---------|---------|---------|")
for v in result.vulnerabilities:
print(f"| {v.severity.upper()} | {v.package} | {v.version} | [{v.vuln_id}]({v.details_url}) | {v.summary[:50]} |")
print("\n")
else:
# text (default)
if not args.quiet:
print(generate_text_report(result, parse_requirements_file(args.deps)))
else:
crit = sum(1 for v in result.vulnerabilities if v.severity == 'critical')
high = sum(1 for v in result.vulnerabilities if v.severity == 'high')
med = sum(1 for v in result.vulnerabilities if v.severity == 'medium')
print(f"CRITICAL={crit} HIGH={high} MEDIUM={med} TOTAL={len(result.vulnerabilities)}")
# Exit code logic: 0 if no vulns at min_severity+, 1 if critical/high found, 2 for other vulns
has_critical_high = any(v.severity in ('critical', 'high') for v in result.vulnerabilities)
has_other = any(v.severity not in ('critical', 'high') for v in result.vulnerabilities)
if has_critical_high:
return 1
elif has_other:
return 2
return 0
if __name__ == '__main__':
sys.exit(main())

0
test_push_marker.txt Normal file
View File

View File

@@ -0,0 +1,237 @@
#!/usr/bin/env python3
"""Tests for scripts/vulnerability_scanner.py — 10 tests."""
import json
import os
import sys
import tempfile
import unittest
from unittest.mock import patch, MagicMock
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or ".", ".."))
import importlib.util
spec = importlib.util.spec_from_file_location(
"vulnerability_scanner",
os.path.join(os.path.dirname(__file__) or ".", "..", "scripts", "vulnerability_scanner.py"))
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
parse_requirements_file = mod.parse_requirements_file
query_osv = mod.query_osv
parse_osv_vuln = mod.parse_osv_vuln
filter_by_severity = mod.filter_by_severity
Vulnerability = mod.Vulnerability
# --- Test Data ---
SAMPLE_OSV_RESPONSE = [
{
"id": "GHSA-xxxx-xxxx-xxxx",
"summary": " Arbitrary code execution in django",
"severity": [{"type": "CVSS_V3", "score": {"baseScore": 9.8, "baseSeverity": "CRITICAL"}}],
"affected": [{
"ranges": [{
"events": [
{"introduced": "0"},
{"fixed": "3.2.14"}
]
}]
}]
},
{
"id": "PYSEC-2024-1234",
"summary": " Denial of service in cryptography",
"severity": [{"type": "CVSS_V3", "score": {"baseScore": 5.3, "baseSeverity": "MEDIUM"}}],
"affected": [{
"ranges": [{
"events": [
{"introduced": "0"},
{"fixed": "42.0.0"}
]
}]
}]
}
]
# --- Tests ---
def test_parse_requirements_simple():
"""Should parse a simple requirements file."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write("django==4.2.0\n")
f.write("requests>=2.28.0\n")
f.write("click~=8.0\n")
f.flush()
pkgs = parse_requirements_file(f.name)
os.unlink(f.name)
assert "django" in pkgs
assert pkgs["django"] == "==4.2.0"
assert "requests" in pkgs
assert pkgs["requests"] == ">=2.28.0"
assert "click" in pkgs
print("PASS: test_parse_requirements_simple")
def test_parse_requirements_extras_and_comments():
"""Should skip comments, blank lines, and handle package extras."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write("# This is a comment\n")
f.write("django[argon2]==4.2.0\n")
f.write("\n")
f.write(" requests >=2.28.0 # inline comment\n")
f.flush()
pkgs = parse_requirements_file(f.name)
os.unlink(f.name)
assert "django" in pkgs
assert pkgs["django"] == "==4.2.0"
assert "requests" in pkgs
# Version should capture the comparison
assert ">=" in pkgs["requests"]
print("PASS: test_parse_requirements_extras_and_comments")
def test_parse_requirements_include_recursive():
"""Should follow -r includes up to depth 3."""
with tempfile.TemporaryDirectory() as tmpdir:
# Main requirements.txt
main = os.path.join(tmpdir, "requirements.txt")
with open(main, 'w') as f:
f.write("django==4.2.0\n")
f.write("-r base.txt\n")
# base.txt
base = os.path.join(tmpdir, "base.txt")
with open(base, 'w') as f:
f.write("requests>=2.28.0\n")
f.write("-r deep.txt\n")
# deep.txt
deep = os.path.join(tmpdir, "deep.txt")
with open(deep, 'w') as f:
f.write("click~=8.0\n")
pkgs = parse_requirements_file(main)
assert "django" in pkgs
assert "requests" in pkgs
assert "click" in pkgs
print("PASS: test_parse_requirements_include_recursive")
def test_parse_requirements_skip_editable():
"""Should skip -e editable installs and other flags."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write("-e git+https://github.com/user/repo.git@branch#egg=package\n")
f.write("--index-url https://pypi.org/simple\n")
f.write("django==4.2.0\n")
f.flush()
pkgs = parse_requirements_file(f.name)
os.unlink(f.name)
assert "django" in pkgs
assert "package" not in pkgs # should not pick up editable name
print("PASS: test_parse_requirements_skip_editable")
def test_parse_requirements_nonexistent():
"""Should exit with error on missing file."""
with patch('sys.exit') as mock_exit:
pkgs = parse_requirements_file("/nonexistent/requirements.txt")
mock_exit.assert_called_once_with(1)
print("PASS: test_parse_requirements_nonexistent")
def test_filter_by_severity():
"""Should filter vulnerabilities by severity threshold."""
vulns = [
Vulnerability("pkg1", "==1.0", "V1", "critical", 9.8, "summary", "url", []),
Vulnerability("pkg2", "==2.0", "V2", "high", 7.5, "summary", "url", []),
Vulnerability("pkg3", "==3.0", "V3", "medium", 5.0, "summary", "url", []),
Vulnerability("pkg4", "==4.0", "V4", "low", 2.0, "summary", "url", []),
]
# min_severity: low includes all
filtered = filter_by_severity(vulns, "low")
assert len(filtered) == 4
# min_severity: medium excludes low
filtered = filter_by_severity(vulns, "medium")
assert len(filtered) == 3
assert all(v.severity in ("critical", "high", "medium") for v in filtered)
# min_severity: high excludes medium + low
filtered = filter_by_severity(vulns, "high")
assert len(filtered) == 2
# min_severity: critical only
filtered = filter_by_severity(vulns, "critical")
assert len(filtered) == 1
print("PASS: test_filter_by_severity")
def test_parse_osv_vuln():
"""Should parse OSV API response correctly."""
parsed = parse_osv_vuln(SAMPLE_OSV_RESPONSE, "django", "==4.2.0")
assert len(parsed) == 2
assert parsed[0].package == "django"
assert parsed[0].vuln_id == "GHSA-xxxx-xxxx-xxxx"
assert parsed[0].severity == "critical"
assert parsed[0].cvss_score == 9.8
assert parsed[1].severity == "medium"
assert parsed[1].cvss_score == 5.3
print("PASS: test_parse_osv_vuln")
def test_parse_osv_vuln_empty():
"""Should handle empty OSV response."""
parsed = parse_osv_vuln([], "django", "==4.2.0")
assert parsed == []
print("PASS: test_parse_osv_vuln_empty")
def test_query_osv_network_success():
"""Should successfully query OSV API for a real known vulnerable package."""
# Query for an old django version that likely has known CVEs
# This test actually hits the network — tagged as integration
vulns = query_osv("django", "==3.2.0")
# We don't assert specific results since vulns change over time
# But we assert the function returns a list and doesn't error
assert isinstance(vulns, list)
print("PASS: test_query_osv_network_success")
def test_query_osv_404_no_vulns():
"""OSV returns empty list for packages with no vulns (404-like)."""
# Mock a 404 response from OSV API
with patch('urllib.request.urlopen') as mock_urlopen:
mock_response = MagicMock()
mock_response.read.return_value = b'{"vulns": []}'
mock_response.__enter__ = lambda self: self
mock_response.__exit__ = lambda self, *args: None
mock_urlopen.return_value = mock_response
result = query_osv("nonexistent-package-xyz123", "==1.0.0")
assert result == []
print("PASS: test_query_osv_404_no_vulns")
if __name__ == '__main__':
# Run all tests
test_parse_requirements_simple()
test_parse_requirements_extras_and_comments()
test_parse_requirements_include_recursive()
test_parse_requirements_skip_editable()
test_parse_requirements_nonexistent()
test_filter_by_severity()
test_parse_osv_vuln()
test_parse_osv_vuln_empty()
test_query_osv_network_success()
test_query_osv_404_no_vulns()
print("\nAll tests passed.")