feat: upstream TurboQuant watch tool and report (closes #15 )

Monitoring tool for tracking when TurboQuant lands in upstream llama.cpp and Ollama. Checks GitHub PRs/issues for TurboQuant, PolarQuant, QJL mentions, checks Ollama releases, and compares fork freshness against upstream. scripts/upstream_watch.py — Automated monitoring: - Search llama.cpp/ggml/ollama for TurboQuant keywords - Check Ollama releases for KV cache mentions - Compare fork commit age vs upstream - Generate report or JSON output - Run: python3 scripts/upstream_watch.py --since 30d docs/upstream-watch-report.md — Current status: - TurboQuant has NOT landed upstream yet - Fork is CURRENT with upstream llama.cpp - Continue using TheTom/llama-cpp-turboquant fork
2026-04-14 22:14:07 -04:00
4 changed files with 246 additions and 0 deletions
--- a/docs/upstream-watch-report.md
+++ b/docs/upstream-watch-report.md
@@ -0,0 +1,21 @@
+# TurboQuant Upstream Watch Report
+
+Generated: 2026-04-15 02:07 UTC
+Monitoring since: 2026-03-16
+
+## Upstream Landing Status
+**No TurboQuant/PolarQuant/QJL mentions found upstream.**
+TurboQuant has NOT landed in upstream llama.cpp yet.
+
+## Fork Status
+- **Upstream (llama.cpp):** 5d14e5d1 — hexagon: optimization for HMX mat_mul (#21554)
+- **Fork (turboquant):** 45f8a066 — Merge: ci: fix turbo build + test failures (#66)
+- **Fork freshness:** CURRENT
+
+## Errors
+- turboquant OR polarquant OR qjl: HTTP Error 422: Unprocessable Entity
+- kv cache type: HTTP Error 422: Unprocessable Entity
+- ggml_type: Remote end closed connection without response
+
+## Recommendation
+No upstream TurboQuant support detected. Continue using fork. Re-check weekly.
--- a/scripts/pycache/upstream_watch.cpython-312.pyc
+++ b/scripts/pycache/upstream_watch.cpython-312.pyc
--- a/scripts/upstream_watch.py
+++ b/scripts/upstream_watch.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+"""
+upstream_watch.py — Monitor upstream llama.cpp and Ollama for TurboQuant support.
+
+Checks GitHub for:
+1. llama.cpp PRs/issues mentioning TurboQuant, PolarQuant, QJL
+2. Ollama release notes mentioning KV cache types
+3. ggml commits adding new KV cache types
+
+Usage:
+    python3 scripts/upstream_watch.py              # generate report
+    python3 scripts/upstream_watch.py --json        # machine-readable output
+    python3 scripts/upstream_watch.py --since 7d    # check last 7 days
+"""
+
+import argparse
+import json
+import os
+import sys
+import urllib.request
+import urllib.parse
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+
+
+SEARCH_TERMS = ["turboquant", "polarquant", "qjl",
+                "kv cache quant", "kv_type"]
+
+WATCH_REPOS = {
+    "llama.cpp": "ggerganov/llama.cpp",
+    "ggml": "ggerganov/ggml",
+    "ollama": "ollama/ollama",
+}
+
+
+def github_api(path, token=None):
+    url = f"https://api.github.com{path}"
+    headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "turboquant-watch"}
+    if token:
+        headers["Authorization"] = f"token {token}"
+    req = urllib.request.Request(url, headers=headers)
+    try:
+        resp = urllib.request.urlopen(req, timeout=30)
+        return json.loads(resp.read())
+    except urllib.error.HTTPError as e:
+        if e.code == 403:
+            return {"error": "rate_limited", "status": 403}
+        return {"error": str(e), "status": e.code}
+    except Exception as e:
+        return {"error": str(e)}
+
+
+def search_repo(repo, terms, since_date, token=None):
+    findings = []
+    for term in terms:
+        query = f"repo:{repo} {term} created:>={since_date}"
+        encoded_q = urllib.parse.quote(query)
+        url = f"/search/issues?q={encoded_q}&sort=created&order=desc&per_page=5"
+        result = github_api(url, token)
+        if "error" in result:
+            findings.append({"error": result["error"], "term": term, "repo": repo})
+            continue
+        for item in result.get("items", []):
+            findings.append({
+                "repo": repo, "term": term, "number": item["number"],
+                "title": item["title"], "url": item["html_url"],
+                "state": item["state"], "created": item["created_at"],
+                "is_pr": "pull_request" in item,
+                "labels": [l["name"] for l in item.get("labels", [])],
+            })
+    return findings
+
+
+def check_releases(repo, token=None):
+    url = f"/repos/{repo}/releases?per_page=5"
+    releases = github_api(url, token)
+    if isinstance(releases, dict) and "error" in releases:
+        return [{"error": releases["error"]}]
+    findings = []
+    for release in releases:
+        body = (release.get("body") or "").lower()
+        name = (release.get("name") or "").lower()
+        text = body + " " + name
+        matched = [t for t in ["turboquant", "polarquant", "qjl", "kv cache", "kv_type"] if t in text]
+        if matched:
+            findings.append({
+                "repo": repo, "type": "release", "tag": release["tag_name"],
+                "name": release.get("name", ""), "url": release["html_url"],
+                "published": release["published_at"], "matched_terms": matched,
+                "snippet": body[:300] if body else "",
+            })
+    return findings
+
+
+def check_fork_status(token=None):
+    upstream = github_api("/repos/ggerganov/llama.cpp/commits?per_page=1", token)
+    fork = github_api("/repos/TheTom/llama-cpp-turboquant/commits?per_page=1", token)
+    result = {"fork": "TheTom/llama-cpp-turboquant", "upstream": "ggerganov/llama.cpp"}
+    if isinstance(upstream, list) and upstream:
+        result["upstream_sha"] = upstream[0]["sha"][:8]
+        result["upstream_date"] = upstream[0]["commit"]["committer"]["date"]
+        result["upstream_message"] = upstream[0]["commit"]["message"].split("\n")[0][:100]
+    if isinstance(fork, list) and fork:
+        result["fork_sha"] = fork[0]["sha"][:8]
+        result["fork_date"] = fork[0]["commit"]["committer"]["date"]
+        result["fork_message"] = fork[0]["commit"]["message"].split("\n")[0][:100]
+    if "upstream_date" in result and "fork_date" in result:
+        u = datetime.fromisoformat(result["upstream_date"].replace("Z", "+00:00"))
+        f = datetime.fromisoformat(result["fork_date"].replace("Z", "+00:00"))
+        result["days_behind"] = (u - f).days
+    return result
+
+
+def generate_report(findings, releases, fork_status, since_date):
+    now = datetime.now(timezone.utc)
+    lines = ["# TurboQuant Upstream Watch Report",
+             f"\nGenerated: {now.strftime('%Y-%m-%d %H:%M UTC')}",
+             f"Monitoring since: {since_date}", ""]
+
+    seen = set()
+    unique = []
+    errors = []
+    for f in findings:
+        if "error" in f:
+            errors.append(f)
+            continue
+        key = (f["repo"], f["number"])
+        if key not in seen:
+            seen.add(key)
+            unique.append(f)
+
+    lines.append("## Upstream Landing Status")
+    tq = [f for f in unique if any(t in f["term"].lower() for t in ["turboquant", "polarquant", "qjl"])]
+    if tq:
+        lines.append(f"**{len(tq)} findings** mentioning TurboQuant/PolarQuant/QJL:")
+        for f in tq[:10]:
+            kind = "PR" if f["is_pr"] else "Issue"
+            lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]} ({f['state']})")
+    else:
+        lines.append("**No TurboQuant/PolarQuant/QJL mentions found upstream.**")
+        lines.append("TurboQuant has NOT landed in upstream llama.cpp yet.")
+    lines.append("")
+
+    kv = [f for f in unique if any(t in f["term"].lower() for t in ["kv cache", "kv_type", "ggml_type"])]
+    if kv:
+        lines.append(f"## KV Cache Related ({len(kv)} findings)")
+        for f in kv[:10]:
+            kind = "PR" if f["is_pr"] else "Issue"
+            lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]}")
+        lines.append("")
+
+    lines.append("## Ollama Releases")
+    if releases and not any("error" in r for r in releases):
+        tq_rel = [r for r in releases if r.get("matched_terms")]
+        if tq_rel:
+            for r in tq_rel:
+                lines.append(f"- [{r['tag']}]({r['url']}): matched {r['matched_terms']}")
+        else:
+            lines.append("No recent Ollama releases mention TurboQuant/KV cache compression.")
+    else:
+        lines.append("Could not check Ollama releases (API error).")
+    lines.append("")
+
+    lines.append("## Fork Status")
+    if "error" not in fork_status:
+        lines.append(f"- **Upstream (llama.cpp):** {fork_status.get('upstream_sha', 'N/A')} — {fork_status.get('upstream_message', 'N/A')}")
+        lines.append(f"- **Fork (turboquant):** {fork_status.get('fork_sha', 'N/A')} — {fork_status.get('fork_message', 'N/A')}")
+        if "days_behind" in fork_status:
+            d = fork_status["days_behind"]
+            lines.append(f"- **Fork freshness:** {'CURRENT' if d <= 7 else f'{d} days behind'}")
+    lines.append("")
+
+    lines.append("## Recommendation")
+    if tq:
+        merged = [f for f in tq if f["state"] == "closed"]
+        if merged:
+            lines.append("**ACTION REQUIRED:** TurboQuant PRs merged upstream! Evaluate migration.")
+        else:
+            lines.append("TurboQuant PRs exist upstream but not yet merged. Continue monitoring.")
+    else:
+        lines.append("No upstream TurboQuant support detected. Continue using fork. Re-check weekly.")
+
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="TurboQuant upstream watch")
+    parser.add_argument("--json", action="store_true")
+    parser.add_argument("--since", default="30d")
+    args = parser.parse_args()
+
+    days = int(args.since.replace("d", ""))
+    since_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d")
+
+    token = None
+    gh_token_path = Path.home() / ".config" / "github" / "token"
+    if gh_token_path.exists():
+        token = gh_token_path.read_text().strip()
+
+    all_findings = []
+    for name, repo in WATCH_REPOS.items():
+        all_findings.extend(search_repo(repo, SEARCH_TERMS, since_date, token))
+
+    releases = check_releases(WATCH_REPOS["ollama"], token)
+    fork_status = check_fork_status(token)
+
+    if args.json:
+        print(json.dumps({
+            "generated": datetime.now(timezone.utc).isoformat(),
+            "since": since_date,
+            "findings": [f for f in all_findings if "error" not in f],
+            "errors": [f for f in all_findings if "error" in f],
+            "releases": releases,
+            "fork_status": fork_status,
+        }, indent=2))
+    else:
+        report = generate_report(all_findings, releases, fork_status, since_date)
+        print(report)
+        docs_dir = Path(__file__).resolve().parent.parent / "docs"
+        docs_dir.mkdir(exist_ok=True)
+        (docs_dir / "upstream-watch-report.md").write_text(report)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/pycache/test_polar_quant.cpython-312-pytest-9.0.2.pyc
+++ b/tests/pycache/test_polar_quant.cpython-312-pytest-9.0.2.pyc