#!/usr/bin/env python3 """ Codebase Genome Diff — Detect structural changes between two versions. Compares two git refs (commits, branches, tags) and produces a human-readable report of structural changes: • Added/removed/renamed files • Changed functions/classes (signature modifications) • New dependencies (imports, requirements, etc.) Usage: python3 scripts/genome_diff.py --ref1 --ref2 python3 scripts/genome_diff.py --ref1 main --ref2 feature-branch python3 scripts/genome_diff.py --ref1 v1.0 --ref2 v2.0 --output report.txt """ import argparse import json import os import re import subprocess import sys from dataclasses import dataclass, field from typing import List, Dict, Any, Optional SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, SCRIPT_DIR) from diff_analyzer import DiffAnalyzer, ChangeCategory @dataclass class FunctionChange: file: str name: str kind: str # 'function' or 'class' change_type: str # 'added' or 'removed' (simplified) old_line: Optional[int] = None new_line: Optional[int] = None @dataclass class DependencyChange: file: str module: str change_type: str # 'added' or 'removed' or 'modified' line: int = 0 @dataclass class GenomeDiffReport: ref1: str ref2: str file_changes: List[Dict[str, Any]] = field(default_factory=list) function_changes: List[FunctionChange] = field(default_factory=list) dependency_changes: List[DependencyChange] = field(default_factory=list) total_files_changed: int = 0 total_functions_changed: int = 0 total_dependencies_changed: int = 0 def to_dict(self) -> Dict[str, Any]: return { "ref1": self.ref1, "ref2": self.ref2, "summary": { "files": self.total_files_changed, "functions": self.total_functions_changed, "dependencies": self.total_dependencies_changed, }, "file_changes": self.file_changes, "function_changes": [fc.__dict__ for fc in self.function_changes], "dependency_changes": [dc.__dict__ for dc in self.dependency_changes], } def human_report(self) -> str: lines = [] lines.append(f"Codebase Genome Diff: {self.ref1} → {self.ref2}") lines.append("=" * 60) lines.append(f" Files changed: {self.total_files_changed}") lines.append(f" Functions changed: {self.total_functions_changed}") lines.append(f" Dependencies changed: {self.total_dependencies_changed}") lines.append("") for fc in self.file_changes: kind = [] if fc.get('is_new'): kind.append("NEW") if fc.get('is_deleted'): kind.append("DELETED") if fc.get('is_renamed'): kind.append("RENAMED") if fc.get('is_binary'): kind.append("BINARY") kind_str = f" [{', '.join(kind)}]" if kind else "" lines.append(f" {fc['path']}{kind_str} (+{fc['added_lines']}/-{fc['deleted_lines']})") lines.append("") for fc in self.function_changes: op = {'added': '+', 'removed': '-', 'modified': '~'}.get(fc.change_type, '?') lines.append(f" [{op}] {fc.file}: {fc.kind} '{fc.name}'") lines.append("") for dc in self.dependency_changes: op = '+' if dc.change_type == 'added' else '-' lines.append(f" [{op}] {dc.file}: {dc.module}") lines.append("") return "\n".join(lines) def run_git_diff(ref1: str, ref2: str) -> str: result = subprocess.run( ['git', 'diff', '--unified=0', f'{ref1}...{ref2}'], capture_output=True, text=True, cwd=SCRIPT_DIR ) if result.returncode not in (0, 1): print(f"git diff failed: {result.stderr}", file=sys.stderr) sys.exit(1) return result.stdout def extract_function_changes(diff_text: str) -> List[FunctionChange]: changes: List[FunctionChange] = [] pattern = re.compile(r'^([+\-])\s*(def|class)\s+(\w+)', re.MULTILINE) hunk_header_re = re.compile(r'^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@') current_old_line: Optional[int] = None current_new_line: Optional[int] = None for line in diff_text.split('\n'): hdr = hunk_header_re.match(line) if hdr: current_old_line = int(hdr.group(1)) current_new_line = int(hdr.group(3)) continue m = pattern.match(line) if m: op = m.group(1) kind = m.group(2) name = m.group(3) change_type = "added" if op == '+' else "removed" line_num = current_new_line if change_type == "added" else current_old_line changes.append(FunctionChange( file="", name=name, kind=kind, change_type=change_type, new_line=line_num if change_type == "added" else None, old_line=line_num if change_type == "removed" else None, )) # Advance line counters heuristically if op == '-': if current_old_line is not None: current_old_line += 1 elif op == '+': if current_new_line is not None: current_new_line += 1 elif line.startswith(' '): if current_old_line is not None: current_old_line += 1 if current_new_line is not None: current_new_line += 1 # lines starting with other prefixes (like \\ No newline) ignored return changes def extract_dependency_changes(diff_text: str, analyzer: DiffAnalyzer) -> List[DependencyChange]: changes: List[DependencyChange] = [] import_pattern = re.compile( r'^([+\-])\s*(?:import\s+([\w\.]+)|from\s+([\w\.]+)\s+import)', re.MULTILINE ) file_diffs = analyzer._split_files(diff_text) for file_diff in file_diffs: file_match = re.search(r'^diff --git a/.*? b/(.*?)$', file_diff, re.MULTILINE) if not file_match: continue filepath = file_match.group(1) # Scan each line for import changes for line in file_diff.split('\n'): m = import_pattern.match(line) if m: change_type = "added" if m.group(1) == '+' else "removed" module = m.group(2) or m.group(3) changes.append(DependencyChange( file=filepath, module=module, change_type=change_type, line=0 )) # Detect if this file is a dependency manifest req_file_pattern = re.compile( r'^[\+\-].*?(requirements(.*?)\.txt|pyproject\.toml|setup\.py|Pipfile)' ) if any(req_file_pattern.match(line) for line in file_diff.split('\n')): if not any(c.file == filepath and c.module == "" for c in changes): changes.append(DependencyChange( file=filepath, module="", change_type="modified", line=0 )) return changes def correlate_function_changes_with_files(diff_text: str, functions: List[FunctionChange]) -> List[FunctionChange]: result: List[FunctionChange] = [] # Split diff into per-file sections file_sections: List[tuple[str, str]] = [] current_file: Optional[str] = None current_lines: List[str] = [] for line in diff_text.split('\n'): if line.startswith('diff --git'): if current_file is not None: file_sections.append((current_file, '\n'.join(current_lines))) m = re.match(r'^diff --git a/.*? b/(.*?)$', line) current_file = m.group(1) if m else "unknown" current_lines = [line] else: current_lines.append(line) if current_file is not None: file_sections.append((current_file, '\n'.join(current_lines))) pattern = re.compile(r'^([+\-])\s*(def|class)\s+(\w+)', re.MULTILINE) for filepath, section in file_sections: for m in pattern.finditer(section): op = m.group(1) kind = m.group(2) name = m.group(3) change_type = "added" if op == '+' else "removed" result.append(FunctionChange( file=filepath, name=name, kind=kind, change_type=change_type )) return result def main(): parser = argparse.ArgumentParser(description="Codebase Genome Diff — structural changes between versions") parser.add_argument("--ref1", required=True, help="First git ref (commit, branch, tag)") parser.add_argument("--ref2", required=True, help="Second git ref") parser.add_argument("--output", help="Write report to file") parser.add_argument("--json", action="store_true", help="Output JSON instead of human report") args = parser.parse_args() try: diff_text = run_git_diff(args.ref1, args.ref2) except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) if not diff_text.strip(): print(f"No differences between {args.ref1} and {args.ref2}.") sys.exit(0) analyzer = DiffAnalyzer() summary = analyzer.analyze(diff_text) file_changes = [fc.to_dict() for fc in summary.files] func_changes = extract_function_changes(diff_text) func_changes = correlate_function_changes_with_files(diff_text, func_changes) dep_changes = extract_dependency_changes(diff_text, analyzer) report = GenomeDiffReport( ref1=args.ref1, ref2=args.ref2, file_changes=file_changes, function_changes=func_changes, dependency_changes=dep_changes, total_files_changed=len(file_changes), total_functions_changed=len(func_changes), total_dependencies_changed=len(dep_changes), ) output = json.dumps(report.to_dict(), indent=2) if args.json else report.human_report() if args.output: with open(args.output, 'w') as f: f.write(output + '\n') print(f"Report written to {args.output}") else: print(output) if __name__ == '__main__': main()