feat: upstream TurboQuant watch tool and report (closes #15 )

Monitoring tool for tracking when TurboQuant lands in upstream llama.cpp and Ollama. Checks GitHub PRs/issues for TurboQuant, PolarQuant, QJL mentions, checks Ollama releases, and compares fork freshness against upstream. scripts/upstream_watch.py — Automated monitoring: - Search llama.cpp/ggml/ollama for TurboQuant keywords - Check Ollama releases for KV cache mentions - Compare fork commit age vs upstream - Generate report or JSON output - Run: python3 scripts/upstream_watch.py --since 30d docs/upstream-watch-report.md — Current status: - TurboQuant has NOT landed upstream yet - Fork is CURRENT with upstream llama.cpp - Continue using TheTom/llama-cpp-turboquant fork
2026-04-14 22:14:07 -04:00
4 changed files with 246 additions and 0 deletions
--- a/docs/upstream-watch-report.md
+++ b/docs/upstream-watch-report.md
@@ -0,0 +1,21 @@
 # TurboQuant Upstream Watch Report
 Generated: 2026-04-15 02:07 UTC
 Monitoring since: 2026-03-16
 ## Upstream Landing Status
 **No TurboQuant/PolarQuant/QJL mentions found upstream.**
 TurboQuant has NOT landed in upstream llama.cpp yet.
 ## Fork Status
 - **Upstream (llama.cpp):** 5d14e5d1 — hexagon: optimization for HMX mat_mul (#21554)
 - **Fork (turboquant):** 45f8a066 — Merge: ci: fix turbo build + test failures (#66)
 - **Fork freshness:** CURRENT
 ## Errors
 - turboquant OR polarquant OR qjl: HTTP Error 422: Unprocessable Entity
 - kv cache type: HTTP Error 422: Unprocessable Entity
 - ggml_type: Remote end closed connection without response
 ## Recommendation
 No upstream TurboQuant support detected. Continue using fork. Re-check weekly.
--- a/scripts/pycache/upstream_watch.cpython-312.pyc
+++ b/scripts/pycache/upstream_watch.cpython-312.pyc
--- a/scripts/upstream_watch.py
+++ b/scripts/upstream_watch.py
@@ -0,0 +1,225 @@
 #!/usr/bin/env python3
 """
 upstream_watch.py — Monitor upstream llama.cpp and Ollama for TurboQuant support.
 Checks GitHub for:
 1. llama.cpp PRs/issues mentioning TurboQuant, PolarQuant, QJL
 2. Ollama release notes mentioning KV cache types
 3. ggml commits adding new KV cache types
 Usage:
    python3 scripts/upstream_watch.py              # generate report
    python3 scripts/upstream_watch.py --json        # machine-readable output
    python3 scripts/upstream_watch.py --since 7d    # check last 7 days
 """
 import argparse
 import json
 import os
 import sys
 import urllib.request
 import urllib.parse
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
 SEARCH_TERMS = ["turboquant", "polarquant", "qjl",
                "kv cache quant", "kv_type"]
 WATCH_REPOS = {
    "llama.cpp": "ggerganov/llama.cpp",
    "ggml": "ggerganov/ggml",
    "ollama": "ollama/ollama",
 }
 def github_api(path, token=None):
    url = f"https://api.github.com{path}"
    headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "turboquant-watch"}
    if token:
        headers["Authorization"] = f"token {token}"
    req = urllib.request.Request(url, headers=headers)
    try:
        resp = urllib.request.urlopen(req, timeout=30)
        return json.loads(resp.read())
    except urllib.error.HTTPError as e:
        if e.code == 403:
            return {"error": "rate_limited", "status": 403}
        return {"error": str(e), "status": e.code}
    except Exception as e:
        return {"error": str(e)}
 def search_repo(repo, terms, since_date, token=None):
    findings = []
    for term in terms:
        query = f"repo:{repo} {term} created:>={since_date}"
        encoded_q = urllib.parse.quote(query)
        url = f"/search/issues?q={encoded_q}&sort=created&order=desc&per_page=5"
        result = github_api(url, token)
        if "error" in result:
            findings.append({"error": result["error"], "term": term, "repo": repo})
            continue
        for item in result.get("items", []):
            findings.append({
                "repo": repo, "term": term, "number": item["number"],
                "title": item["title"], "url": item["html_url"],
                "state": item["state"], "created": item["created_at"],
                "is_pr": "pull_request" in item,
                "labels": [l["name"] for l in item.get("labels", [])],
            })
    return findings
 def check_releases(repo, token=None):
    url = f"/repos/{repo}/releases?per_page=5"
    releases = github_api(url, token)
    if isinstance(releases, dict) and "error" in releases:
        return [{"error": releases["error"]}]
    findings = []
    for release in releases:
        body = (release.get("body") or "").lower()
        name = (release.get("name") or "").lower()
        text = body + " " + name
        matched = [t for t in ["turboquant", "polarquant", "qjl", "kv cache", "kv_type"] if t in text]
        if matched:
            findings.append({
                "repo": repo, "type": "release", "tag": release["tag_name"],
                "name": release.get("name", ""), "url": release["html_url"],
                "published": release["published_at"], "matched_terms": matched,
                "snippet": body[:300] if body else "",
            })
    return findings
 def check_fork_status(token=None):
    upstream = github_api("/repos/ggerganov/llama.cpp/commits?per_page=1", token)
    fork = github_api("/repos/TheTom/llama-cpp-turboquant/commits?per_page=1", token)
    result = {"fork": "TheTom/llama-cpp-turboquant", "upstream": "ggerganov/llama.cpp"}
    if isinstance(upstream, list) and upstream:
        result["upstream_sha"] = upstream[0]["sha"][:8]
        result["upstream_date"] = upstream[0]["commit"]["committer"]["date"]
        result["upstream_message"] = upstream[0]["commit"]["message"].split("\n")[0][:100]
    if isinstance(fork, list) and fork:
        result["fork_sha"] = fork[0]["sha"][:8]
        result["fork_date"] = fork[0]["commit"]["committer"]["date"]
        result["fork_message"] = fork[0]["commit"]["message"].split("\n")[0][:100]
    if "upstream_date" in result and "fork_date" in result:
        u = datetime.fromisoformat(result["upstream_date"].replace("Z", "+00:00"))
        f = datetime.fromisoformat(result["fork_date"].replace("Z", "+00:00"))
        result["days_behind"] = (u - f).days
    return result
 def generate_report(findings, releases, fork_status, since_date):
    now = datetime.now(timezone.utc)
    lines = ["# TurboQuant Upstream Watch Report",
             f"\nGenerated: {now.strftime('%Y-%m-%d %H:%M UTC')}",
             f"Monitoring since: {since_date}", ""]
    seen = set()
    unique = []
    errors = []
    for f in findings:
        if "error" in f:
            errors.append(f)
            continue
        key = (f["repo"], f["number"])
        if key not in seen:
            seen.add(key)
            unique.append(f)
    lines.append("## Upstream Landing Status")
    tq = [f for f in unique if any(t in f["term"].lower() for t in ["turboquant", "polarquant", "qjl"])]
    if tq:
        lines.append(f"**{len(tq)} findings** mentioning TurboQuant/PolarQuant/QJL:")
        for f in tq[:10]:
            kind = "PR" if f["is_pr"] else "Issue"
            lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]} ({f['state']})")
    else:
        lines.append("**No TurboQuant/PolarQuant/QJL mentions found upstream.**")
        lines.append("TurboQuant has NOT landed in upstream llama.cpp yet.")
    lines.append("")
    kv = [f for f in unique if any(t in f["term"].lower() for t in ["kv cache", "kv_type", "ggml_type"])]
    if kv:
        lines.append(f"## KV Cache Related ({len(kv)} findings)")
        for f in kv[:10]:
            kind = "PR" if f["is_pr"] else "Issue"
            lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]}")
        lines.append("")
    lines.append("## Ollama Releases")
    if releases and not any("error" in r for r in releases):
        tq_rel = [r for r in releases if r.get("matched_terms")]
        if tq_rel:
            for r in tq_rel:
                lines.append(f"- [{r['tag']}]({r['url']}): matched {r['matched_terms']}")
        else:
            lines.append("No recent Ollama releases mention TurboQuant/KV cache compression.")
    else:
        lines.append("Could not check Ollama releases (API error).")
    lines.append("")
    lines.append("## Fork Status")
    if "error" not in fork_status:
        lines.append(f"- **Upstream (llama.cpp):** {fork_status.get('upstream_sha', 'N/A')} — {fork_status.get('upstream_message', 'N/A')}")
        lines.append(f"- **Fork (turboquant):** {fork_status.get('fork_sha', 'N/A')} — {fork_status.get('fork_message', 'N/A')}")
        if "days_behind" in fork_status:
            d = fork_status["days_behind"]
            lines.append(f"- **Fork freshness:** {'CURRENT' if d <= 7 else f'{d} days behind'}")
    lines.append("")
    lines.append("## Recommendation")
    if tq:
        merged = [f for f in tq if f["state"] == "closed"]
        if merged:
            lines.append("**ACTION REQUIRED:** TurboQuant PRs merged upstream! Evaluate migration.")
        else:
            lines.append("TurboQuant PRs exist upstream but not yet merged. Continue monitoring.")
    else:
        lines.append("No upstream TurboQuant support detected. Continue using fork. Re-check weekly.")
    return "\n".join(lines)
 def main():
    parser = argparse.ArgumentParser(description="TurboQuant upstream watch")
    parser.add_argument("--json", action="store_true")
    parser.add_argument("--since", default="30d")
    args = parser.parse_args()
    days = int(args.since.replace("d", ""))
    since_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d")
    token = None
    gh_token_path = Path.home() / ".config" / "github" / "token"
    if gh_token_path.exists():
        token = gh_token_path.read_text().strip()
    all_findings = []
    for name, repo in WATCH_REPOS.items():
        all_findings.extend(search_repo(repo, SEARCH_TERMS, since_date, token))
    releases = check_releases(WATCH_REPOS["ollama"], token)
    fork_status = check_fork_status(token)
    if args.json:
        print(json.dumps({
            "generated": datetime.now(timezone.utc).isoformat(),
            "since": since_date,
            "findings": [f for f in all_findings if "error" not in f],
            "errors": [f for f in all_findings if "error" in f],
            "releases": releases,
            "fork_status": fork_status,
        }, indent=2))
    else:
        report = generate_report(all_findings, releases, fork_status, since_date)
        print(report)
        docs_dir = Path(__file__).resolve().parent.parent / "docs"
        docs_dir.mkdir(exist_ok=True)
        (docs_dir / "upstream-watch-report.md").write_text(report)
 if __name__ == "__main__":
    main()
--- a/tests/pycache/test_polar_quant.cpython-312-pytest-9.0.2.pyc
+++ b/tests/pycache/test_polar_quant.cpython-312-pytest-9.0.2.pyc