#!/usr/bin/env python3 """ upstream_watch.py — Monitor upstream llama.cpp and Ollama for TurboQuant support. Checks GitHub for: 1. llama.cpp PRs/issues mentioning TurboQuant, PolarQuant, QJL 2. Ollama release notes mentioning KV cache types 3. ggml commits adding new KV cache types Usage: python3 scripts/upstream_watch.py # generate report python3 scripts/upstream_watch.py --json # machine-readable output python3 scripts/upstream_watch.py --since 7d # check last 7 days """ import argparse import json import os import sys import urllib.request import urllib.parse from datetime import datetime, timedelta, timezone from pathlib import Path SEARCH_TERMS = ["turboquant", "polarquant", "qjl", "kv cache quant", "kv_type"] WATCH_REPOS = { "llama.cpp": "ggerganov/llama.cpp", "ggml": "ggerganov/ggml", "ollama": "ollama/ollama", } def github_api(path, token=None): url = f"https://api.github.com{path}" headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "turboquant-watch"} if token: headers["Authorization"] = f"token {token}" req = urllib.request.Request(url, headers=headers) try: resp = urllib.request.urlopen(req, timeout=30) return json.loads(resp.read()) except urllib.error.HTTPError as e: if e.code == 403: return {"error": "rate_limited", "status": 403} return {"error": str(e), "status": e.code} except Exception as e: return {"error": str(e)} def search_repo(repo, terms, since_date, token=None): findings = [] for term in terms: query = f"repo:{repo} {term} created:>={since_date}" encoded_q = urllib.parse.quote(query) url = f"/search/issues?q={encoded_q}&sort=created&order=desc&per_page=5" result = github_api(url, token) if "error" in result: findings.append({"error": result["error"], "term": term, "repo": repo}) continue for item in result.get("items", []): findings.append({ "repo": repo, "term": term, "number": item["number"], "title": item["title"], "url": item["html_url"], "state": item["state"], "created": item["created_at"], "is_pr": "pull_request" in item, "labels": [l["name"] for l in item.get("labels", [])], }) return findings def check_releases(repo, token=None): url = f"/repos/{repo}/releases?per_page=5" releases = github_api(url, token) if isinstance(releases, dict) and "error" in releases: return [{"error": releases["error"]}] findings = [] for release in releases: body = (release.get("body") or "").lower() name = (release.get("name") or "").lower() text = body + " " + name matched = [t for t in ["turboquant", "polarquant", "qjl", "kv cache", "kv_type"] if t in text] if matched: findings.append({ "repo": repo, "type": "release", "tag": release["tag_name"], "name": release.get("name", ""), "url": release["html_url"], "published": release["published_at"], "matched_terms": matched, "snippet": body[:300] if body else "", }) return findings def check_fork_status(token=None): upstream = github_api("/repos/ggerganov/llama.cpp/commits?per_page=1", token) fork = github_api("/repos/TheTom/llama-cpp-turboquant/commits?per_page=1", token) result = {"fork": "TheTom/llama-cpp-turboquant", "upstream": "ggerganov/llama.cpp"} if isinstance(upstream, list) and upstream: result["upstream_sha"] = upstream[0]["sha"][:8] result["upstream_date"] = upstream[0]["commit"]["committer"]["date"] result["upstream_message"] = upstream[0]["commit"]["message"].split("\n")[0][:100] if isinstance(fork, list) and fork: result["fork_sha"] = fork[0]["sha"][:8] result["fork_date"] = fork[0]["commit"]["committer"]["date"] result["fork_message"] = fork[0]["commit"]["message"].split("\n")[0][:100] if "upstream_date" in result and "fork_date" in result: u = datetime.fromisoformat(result["upstream_date"].replace("Z", "+00:00")) f = datetime.fromisoformat(result["fork_date"].replace("Z", "+00:00")) result["days_behind"] = (u - f).days return result def generate_report(findings, releases, fork_status, since_date): now = datetime.now(timezone.utc) lines = ["# TurboQuant Upstream Watch Report", f"\nGenerated: {now.strftime('%Y-%m-%d %H:%M UTC')}", f"Monitoring since: {since_date}", ""] seen = set() unique = [] errors = [] for f in findings: if "error" in f: errors.append(f) continue key = (f["repo"], f["number"]) if key not in seen: seen.add(key) unique.append(f) lines.append("## Upstream Landing Status") tq = [f for f in unique if any(t in f["term"].lower() for t in ["turboquant", "polarquant", "qjl"])] if tq: lines.append(f"**{len(tq)} findings** mentioning TurboQuant/PolarQuant/QJL:") for f in tq[:10]: kind = "PR" if f["is_pr"] else "Issue" lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]} ({f['state']})") else: lines.append("**No TurboQuant/PolarQuant/QJL mentions found upstream.**") lines.append("TurboQuant has NOT landed in upstream llama.cpp yet.") lines.append("") kv = [f for f in unique if any(t in f["term"].lower() for t in ["kv cache", "kv_type", "ggml_type"])] if kv: lines.append(f"## KV Cache Related ({len(kv)} findings)") for f in kv[:10]: kind = "PR" if f["is_pr"] else "Issue" lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]}") lines.append("") lines.append("## Ollama Releases") if releases and not any("error" in r for r in releases): tq_rel = [r for r in releases if r.get("matched_terms")] if tq_rel: for r in tq_rel: lines.append(f"- [{r['tag']}]({r['url']}): matched {r['matched_terms']}") else: lines.append("No recent Ollama releases mention TurboQuant/KV cache compression.") else: lines.append("Could not check Ollama releases (API error).") lines.append("") lines.append("## Fork Status") if "error" not in fork_status: lines.append(f"- **Upstream (llama.cpp):** {fork_status.get('upstream_sha', 'N/A')} — {fork_status.get('upstream_message', 'N/A')}") lines.append(f"- **Fork (turboquant):** {fork_status.get('fork_sha', 'N/A')} — {fork_status.get('fork_message', 'N/A')}") if "days_behind" in fork_status: d = fork_status["days_behind"] lines.append(f"- **Fork freshness:** {'CURRENT' if d <= 7 else f'{d} days behind'}") lines.append("") lines.append("## Recommendation") if tq: merged = [f for f in tq if f["state"] == "closed"] if merged: lines.append("**ACTION REQUIRED:** TurboQuant PRs merged upstream! Evaluate migration.") else: lines.append("TurboQuant PRs exist upstream but not yet merged. Continue monitoring.") else: lines.append("No upstream TurboQuant support detected. Continue using fork. Re-check weekly.") return "\n".join(lines) def main(): parser = argparse.ArgumentParser(description="TurboQuant upstream watch") parser.add_argument("--json", action="store_true") parser.add_argument("--since", default="30d") args = parser.parse_args() days = int(args.since.replace("d", "")) since_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d") token = None gh_token_path = Path.home() / ".config" / "github" / "token" if gh_token_path.exists(): token = gh_token_path.read_text().strip() all_findings = [] for name, repo in WATCH_REPOS.items(): all_findings.extend(search_repo(repo, SEARCH_TERMS, since_date, token)) releases = check_releases(WATCH_REPOS["ollama"], token) fork_status = check_fork_status(token) if args.json: print(json.dumps({ "generated": datetime.now(timezone.utc).isoformat(), "since": since_date, "findings": [f for f in all_findings if "error" not in f], "errors": [f for f in all_findings if "error" in f], "releases": releases, "fork_status": fork_status, }, indent=2)) else: report = generate_report(all_findings, releases, fork_status, since_date) print(report) docs_dir = Path(__file__).resolve().parent.parent / "docs" docs_dir.mkdir(exist_ok=True) (docs_dir / "upstream-watch-report.md").write_text(report) if __name__ == "__main__": main()