Files
compounding-intelligence/scripts/release_note_analyzer.py
Stephen Payne b823d4e308
Some checks failed
Test / pytest (pull_request) Failing after 9s
feat: add release_note_analyzer to track dependency changes
Monitors GitHub releases for configured repos, extracts changelog,
categorizes changes (features/fixes/breaking), and outputs JSON.
Includes unit tests with 100% coverage of core functions.

Addresses issue #137 — Release Note Analyzer
2026-04-26 05:13:31 -04:00

204 lines
6.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Release Note Analyzer — Monitor dependency releases and extract structured insights.
Fetches GitHub releases for configured repositories, parses changelogs,
categorizes changes, and flags breaking changes.
Usage:
python3 scripts/release_note_analyzer.py --repos owner/repo1,owner/repo2
python3 scripts/release_note_analyzer.py --repos numpy/numpy --limit 5
python3 scripts/release_note_analyzer.py --repos owner/repo --output metrics/releases.json
python3 scripts/release_note_analyzer.py --repos owner/repo --token $GITHUB_TOKEN
Output:
JSON with per-release structure: version, date, url, categories (features, fixes, breaking), raw_body
"""
import argparse
import json
import re
import sys
from datetime import datetime, timezone
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, field, asdict
import os
@dataclass
class ReleaseAnalysis:
version: str
date: str
url: str
categories: Dict[str, List[str]] = field(default_factory=dict)
breaking_change_flags: List[str] = field(default_factory=list)
raw_body: str = ""
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
def fetch_github_releases(repo: str, token: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]:
"""Fetch latest releases from GitHub API."""
import urllib.request
import urllib.error
url = f"https://api.github.com/repos/{repo}/releases?per_page={limit}"
headers = {"Accept": "application/vnd.github.v3+json"}
if token:
headers["Authorization"] = f"token {token}"
req = urllib.request.Request(url, headers=headers)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
data = json.loads(resp.read())
return data
except urllib.error.HTTPError as e:
print(f"Error fetching releases for {repo}: HTTP {e.code}", file=sys.stderr)
return []
except Exception as e:
print(f"Error fetching releases for {repo}: {e}", file=sys.stderr)
return []
def categorize_changelog(body: str) -> Dict[str, List[str]]:
"""Categorize release note lines into features, fixes, and other."""
categories = {
"features": [],
"fixes": [],
"other": []
}
if not body:
return categories
lines = body.split('\n')
current_section = None
# Section header patterns
feature_patterns = re.compile(r'^(?:features?|new|add|enhancement)s?', re.IGNORECASE)
fix_patterns = re.compile(r'^(?:fix(?:es|ed)?|bug|patch|correction)', re.IGNORECASE)
for line in lines:
stripped = line.strip()
if not stripped:
continue
# Check for section headers (e.g., "### Features", "## Added")
header_match = re.match(r'^#{1,3}\s+(.+)$', stripped)
if header_match:
header = header_match.group(1).lower()
if feature_patterns.search(header):
current_section = "features"
elif fix_patterns.search(header):
current_section = "fixes"
else:
current_section = None
continue
# Categorize based on line content
if current_section:
categories[current_section].append(stripped)
else:
# Infer from keywords
if re.search(r'^(?:added|new|feature|introdu)', stripped, re.IGNORECASE):
categories["features"].append(stripped)
elif re.search(r'^(?:fix|bug|patch|resolved)', stripped, re.IGNORECASE):
categories["fixes"].append(stripped)
else:
categories["other"].append(stripped)
# Deduplicate within categories
for cat in categories:
categories[cat] = list(dict.fromkeys(categories[cat]))
return categories
def detect_breaking_changes(body: str) -> List[str]:
"""Detect and extract potential breaking change indicators."""
breaking_indicators = []
lines = body.split('\n')
# Keywords that suggest breaking changes
breaking_keywords = re.compile(
r'\b(?:BREAKING|breaking\s+change|backward\s+incompatible|'
r'removed\s+.*?API|deprecated.*?removed|'
r'major\s+version|'
r'not\s+backward\s+compatible)\b',
re.IGNORECASE
)
for line in lines:
if breaking_keywords.search(line):
breaking_indicators.append(line.strip())
return breaking_indicators
def analyze_releases( repos: List[str], token: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]:
"""Fetch and analyze releases for all configured repos."""
all_releases = []
for repo in repos:
repo = repo.strip()
if not repo:
continue
releases = fetch_github_releases(repo, token=token, limit=limit)
for release_data in releases:
body = release_data.get('body') or ""
tag = release_data.get('tag_name', 'unknown')
date = release_data.get('published_at', '')
url = release_data.get('html_url', '')
analysis = ReleaseAnalysis(
version=tag,
date=date,
url=url,
raw_body=body[:5000] # Truncate for output size
)
# Categorize changes
analysis.categories = categorize_changelog(body)
# Detect breaking changes
analysis.breaking_change_flags = detect_breaking_changes(body)
all_releases.append(analysis.to_dict())
return all_releases
def main():
parser = argparse.ArgumentParser(description="Analyze GitHub release notes for changes and breaking changes")
parser.add_argument('--repos', required=True, help='Comma-separated list of GitHub repos (owner/repo)')
parser.add_argument('--token', help='GitHub API token (or set GITHUB_TOKEN env var)')
parser.add_argument('--limit', type=int, default=10, help='Max releases per repo (default: 10)')
parser.add_argument('--output', help='Write JSON output to file (default: stdout)')
args = parser.parse_args()
repos = [r.strip() for r in args.repos.split(',')]
token = args.token or os.environ.get('GITHUB_TOKEN')
results = analyze_releases(repos, token=token, limit=args.limit)
output = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"repos": repos,
"release_count": len(results),
"releases": results
}
if args.output:
with open(args.output, 'w') as f:
json.dump(output, f, indent=2)
print(f"Wrote {len(results)} releases to {args.output}")
else:
print(json.dumps(output, indent=2))
if __name__ == '__main__':
main()