#!/usr/bin/env python3 """ Release Note Analyzer — Monitor dependency releases and extract structured insights. Fetches GitHub releases for configured repositories, parses changelogs, categorizes changes, and flags breaking changes. Usage: python3 scripts/release_note_analyzer.py --repos owner/repo1,owner/repo2 python3 scripts/release_note_analyzer.py --repos numpy/numpy --limit 5 python3 scripts/release_note_analyzer.py --repos owner/repo --output metrics/releases.json python3 scripts/release_note_analyzer.py --repos owner/repo --token $GITHUB_TOKEN Output: JSON with per-release structure: version, date, url, categories (features, fixes, breaking), raw_body """ import argparse import json import re import sys from datetime import datetime, timezone from typing import Dict, List, Any, Optional from dataclasses import dataclass, field, asdict import os @dataclass class ReleaseAnalysis: version: str date: str url: str categories: Dict[str, List[str]] = field(default_factory=dict) breaking_change_flags: List[str] = field(default_factory=list) raw_body: str = "" def to_dict(self) -> Dict[str, Any]: return asdict(self) def fetch_github_releases(repo: str, token: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]: """Fetch latest releases from GitHub API.""" import urllib.request import urllib.error url = f"https://api.github.com/repos/{repo}/releases?per_page={limit}" headers = {"Accept": "application/vnd.github.v3+json"} if token: headers["Authorization"] = f"token {token}" req = urllib.request.Request(url, headers=headers) try: with urllib.request.urlopen(req, timeout=30) as resp: data = json.loads(resp.read()) return data except urllib.error.HTTPError as e: print(f"Error fetching releases for {repo}: HTTP {e.code}", file=sys.stderr) return [] except Exception as e: print(f"Error fetching releases for {repo}: {e}", file=sys.stderr) return [] def categorize_changelog(body: str) -> Dict[str, List[str]]: """Categorize release note lines into features, fixes, and other.""" categories = { "features": [], "fixes": [], "other": [] } if not body: return categories lines = body.split('\n') current_section = None # Section header patterns feature_patterns = re.compile(r'^(?:features?|new|add|enhancement)s?', re.IGNORECASE) fix_patterns = re.compile(r'^(?:fix(?:es|ed)?|bug|patch|correction)', re.IGNORECASE) for line in lines: stripped = line.strip() if not stripped: continue # Check for section headers (e.g., "### Features", "## Added") header_match = re.match(r'^#{1,3}\s+(.+)$', stripped) if header_match: header = header_match.group(1).lower() if feature_patterns.search(header): current_section = "features" elif fix_patterns.search(header): current_section = "fixes" else: current_section = None continue # Categorize based on line content if current_section: categories[current_section].append(stripped) else: # Infer from keywords if re.search(r'^(?:added|new|feature|introdu)', stripped, re.IGNORECASE): categories["features"].append(stripped) elif re.search(r'^(?:fix|bug|patch|resolved)', stripped, re.IGNORECASE): categories["fixes"].append(stripped) else: categories["other"].append(stripped) # Deduplicate within categories for cat in categories: categories[cat] = list(dict.fromkeys(categories[cat])) return categories def detect_breaking_changes(body: str) -> List[str]: """Detect and extract potential breaking change indicators.""" breaking_indicators = [] lines = body.split('\n') # Keywords that suggest breaking changes breaking_keywords = re.compile( r'\b(?:BREAKING|breaking\s+change|backward\s+incompatible|' r'removed\s+.*?API|deprecated.*?removed|' r'major\s+version|' r'not\s+backward\s+compatible)\b', re.IGNORECASE ) for line in lines: if breaking_keywords.search(line): breaking_indicators.append(line.strip()) return breaking_indicators def analyze_releases( repos: List[str], token: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]: """Fetch and analyze releases for all configured repos.""" all_releases = [] for repo in repos: repo = repo.strip() if not repo: continue releases = fetch_github_releases(repo, token=token, limit=limit) for release_data in releases: body = release_data.get('body') or "" tag = release_data.get('tag_name', 'unknown') date = release_data.get('published_at', '') url = release_data.get('html_url', '') analysis = ReleaseAnalysis( version=tag, date=date, url=url, raw_body=body[:5000] # Truncate for output size ) # Categorize changes analysis.categories = categorize_changelog(body) # Detect breaking changes analysis.breaking_change_flags = detect_breaking_changes(body) all_releases.append(analysis.to_dict()) return all_releases def main(): parser = argparse.ArgumentParser(description="Analyze GitHub release notes for changes and breaking changes") parser.add_argument('--repos', required=True, help='Comma-separated list of GitHub repos (owner/repo)') parser.add_argument('--token', help='GitHub API token (or set GITHUB_TOKEN env var)') parser.add_argument('--limit', type=int, default=10, help='Max releases per repo (default: 10)') parser.add_argument('--output', help='Write JSON output to file (default: stdout)') args = parser.parse_args() repos = [r.strip() for r in args.repos.split(',')] token = args.token or os.environ.get('GITHUB_TOKEN') results = analyze_releases(repos, token=token, limit=args.limit) output = { "generated_at": datetime.now(timezone.utc).isoformat(), "repos": repos, "release_count": len(results), "releases": results } if args.output: with open(args.output, 'w') as f: json.dump(output, f, indent=2) print(f"Wrote {len(results)} releases to {args.output}") else: print(json.dumps(output, indent=2)) if __name__ == '__main__': main()