turboquant/scripts/upstream_watch.py

#!/usr/bin/env python3
"""
TurboQuant Upstream Watch Monitor
Monitors llama.cpp and Ollama for TurboQuant/PolarQuant/QJL support.

Issue #15: [P4] Upstream llama.cpp / Ollama TurboQuant watch
"""

import json
import os
import sys
import urllib.request
import subprocess
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
import argparse

# Configuration
GITHUB_API = "https://api.github.com"
LLAMA_CPP_REPO = "ggerganov/llama.cpp"
OLLAMA_REPO = "ollama/ollama"
GGML_REPO = "ggml-org/ggml"

# Keywords to search for
KEYWORDS = [
    "turborot", "turborotquant", "polarquant", "qjl",
    "kv cache compression", "kv cache quantization",
    "quantized kv", "kv quant", "cache compression"
]

class UpstreamWatch:
    def __init__(self, github_token: Optional[str] = None):
        self.github_token = github_token or os.environ.get("GITHUB_TOKEN")
        # Fallback: read from ~/.config/github/token file
        if not self.github_token:
            token_path = os.path.expanduser("~/.config/github/token")
            if os.path.isfile(token_path):
                try:
                    with open(token_path) as f:
                        self.github_token = f.read().strip()
                except Exception:
                    pass
        self.headers = {"Accept": "application/vnd.github.v3+json"}
        if self.github_token:
            self.headers["Authorization"] = f"token {self.github_token}"

    def _github_request(self, endpoint: str) -> Any:
        """Make a GitHub API request."""
        url = f"{GITHUB_API}{endpoint}"
        req = urllib.request.Request(url, headers=self.headers)

        try:
            with urllib.request.urlopen(req) as resp:
                return json.loads(resp.read())
        except urllib.error.HTTPError as e:
            print(f"GitHub API error: {e.code} - {e.reason}")
            return None

    def search_repo_issues_prs(self, repo: str, keywords: List[str], days: int = 30) -> List[Dict]:
        """Search for issues and PRs in a repository."""
        import urllib.parse
        results = []
        since = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ")

        for keyword in keywords:
            # URL encode the keyword
            encoded_keyword = urllib.parse.quote(keyword)

            # Search issues
            endpoint = f"/repos/{repo}/issues?q={encoded_keyword}+created:>{since}&sort=updated&order=desc"
            data = self._github_request(endpoint)

            if data and "items" in data:
                for item in data["items"]:
                    # Filter out PRs (they appear in issues endpoint too)
                    if "pull_request" not in item:
                        results.append({
                            "type": "issue",
                            "repo": repo,
                            "number": item["number"],
                            "title": item["title"],
                            "url": item["html_url"],
                            "created": item["created_at"],
                            "updated": item["updated_at"],
                            "keyword": keyword
                        })

            # Search PRs
            endpoint = f"/repos/{repo}/pulls?q={encoded_keyword}+created:>{since}&sort=updated&order=desc"
            data = self._github_request(endpoint)

            if data and "items" in data:
                for item in data["items"]:
                    results.append({
                        "type": "pr",
                        "repo": repo,
                        "number": item["number"],
                        "title": item["title"],
                        "url": item["html_url"],
                        "created": item["created_at"],
                        "updated": item["updated_at"],
                        "keyword": keyword
                    })

        return results

    def check_ollama_releases(self, days: int = 30) -> List[Dict]:
        """Check Ollama releases for TurboQuant mentions."""
        releases = []
        endpoint = f"/repos/{OLLAMA_REPO}/releases"
        data = self._github_request(endpoint)

        if data:
            since = datetime.now() - timedelta(days=days)
            for release in data:
                published = datetime.strptime(release["published_at"], "%Y-%m-%dT%H:%M:%SZ")
                if published > since:
                    # Check release notes for keywords
                    body = release.get("body", "").lower()
                    found_keywords = [kw for kw in KEYWORDS if kw.lower() in body]

                    if found_keywords:
                        releases.append({
                            "version": release["tag_name"],
                            "name": release["name"],
                            "url": release["html_url"],
                            "published": release["published_at"],
                            "keywords": found_keywords
                        })

        return releases

    def get_fork_status(self) -> Dict[str, Any]:
        """Get status of our TurboQuant fork."""
        # This would typically check the local fork status
        # For now, return placeholder data
        return {
            "fork_url": "https://github.com/TheTom/llama-cpp-turboquant",
            "status": "active",
            "last_updated": datetime.now().isoformat(),
            "upstream_version": "unknown",
            "fork_version": "unknown"
        }

    def generate_report(self, days: int = 30, format: str = "text") -> str:
        """Generate a monitoring report."""
        print(f"Scanning upstream for TurboQuant mentions (last {days} days)...")

        # Search llama.cpp
        llama_results = self.search_repo_issues_prs(LLAMA_CPP_REPO, KEYWORDS, days)

        # Search Ollama
        ollama_results = self.search_repo_issues_prs(OLLAMA_REPO, KEYWORDS, days)

        # Search ggml
        ggml_results = self.search_repo_issues_prs(GGML_REPO, KEYWORDS, days)

        # Check Ollama releases
        ollama_releases = self.check_ollama_releases(days)

        # Get fork status
        fork_status = self.get_fork_status()

        # Combine all results
        all_results = llama_results + ollama_results + ggml_results

        if format == "json":
            return json.dumps({
                "scan_date": datetime.now().isoformat(),
                "days_scanned": days,
                "llama_cpp_results": llama_results,
                "ollama_results": ollama_results,
                "ggml_results": ggml_results,
                "ollama_releases": ollama_releases,
                "fork_status": fork_status,
                "total_found": len(all_results)
            }, indent=2)
        else:
            # Text format
            report = f"TurboQuant Upstream Watch Report\n"
            report += f"Generated: {datetime.now().isoformat()}\n"
            report += f"Scanned: Last {days} days\n"
            report += f"{'='*60}\n\n"

            report += f"## Summary\n"
            report += f"- llama.cpp mentions: {len(llama_results)}\n"
            report += f"- Ollama mentions: {len(ollama_results)}\n"
            report += f"- ggml mentions: {len(ggml_results)}\n"
            report += f"- Ollama releases with keywords: {len(ollama_releases)}\n"
            report += f"- Total findings: {len(all_results)}\n\n"

            if all_results:
                report += f"## Findings\n"
                for result in all_results[:10]:  # Limit to first 10
                    report += f"- [{result['type'].upper()}] {result['repo']}#{result['number']}: {result['title']}\n"
                    report += f"  URL: {result['url']}\n"
                    report += f"  Keyword: {result['keyword']}\n"
                    report += f"  Updated: {result['updated']}\n\n"

            if ollama_releases:
                report += f"## Ollama Releases with TurboQuant Mentions\n"
                for release in ollama_releases:
                    report += f"- {release['version']}: {release['name']}\n"
                    report += f"  URL: {release['url']}\n"
                    report += f"  Keywords: {', '.join(release['keywords'])}\n"
                    report += f"  Published: {release['published']}\n\n"

            report += f"## Fork Status\n"
            report += f"- Fork URL: {fork_status['fork_url']}\n"
            report += f"- Status: {fork_status['status']}\n"
            report += f"- Last Updated: {fork_status['last_updated']}\n\n"

            if not all_results and not ollama_releases:
                report += f"## Conclusion\n"
                report += f"No TurboQuant/PolarQuant/QJL mentions found in upstream repositories.\n"
                report += f"Recommendation: Continue using fork, re-check in {days} days.\n"
            else:
                report += f"## Conclusion\n"
                report += f"Found {len(all_results)} mentions in upstream repositories.\n"
                report += f"Evaluate whether to migrate to upstream or continue using fork.\n"

            return report


def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(description="TurboQuant Upstream Watch Monitor")
    parser.add_argument("--days", type=int, default=30, help="Number of days to scan (default: 30)")
    parser.add_argument("--format", choices=["text", "json"], default="text", help="Output format")
    parser.add_argument("--output", help="Output file (default: stdout)")
    parser.add_argument("--github-token", help="GitHub API token (or set GITHUB_TOKEN env var)")

    args = parser.parse_args()

    # Initialize monitor
    monitor = UpstreamWatch(args.github_token)

    # Generate report
    report = monitor.generate_report(args.days, args.format)

    # Output report
    if args.output:
        with open(args.output, "w") as f:
            f.write(report)
        print(f"Report saved to {args.output}")
    else:
        print(report)


if __name__ == "__main__":
    main()