diff --git a/docs/upstream-watch-report.md b/docs/upstream-watch-report.md new file mode 100644 index 00000000..827d1b3a --- /dev/null +++ b/docs/upstream-watch-report.md @@ -0,0 +1,21 @@ +# TurboQuant Upstream Watch Report + +Generated: 2026-04-15 02:07 UTC +Monitoring since: 2026-03-16 + +## Upstream Landing Status +**No TurboQuant/PolarQuant/QJL mentions found upstream.** +TurboQuant has NOT landed in upstream llama.cpp yet. + +## Fork Status +- **Upstream (llama.cpp):** 5d14e5d1 — hexagon: optimization for HMX mat_mul (#21554) +- **Fork (turboquant):** 45f8a066 — Merge: ci: fix turbo build + test failures (#66) +- **Fork freshness:** CURRENT + +## Errors +- turboquant OR polarquant OR qjl: HTTP Error 422: Unprocessable Entity +- kv cache type: HTTP Error 422: Unprocessable Entity +- ggml_type: Remote end closed connection without response + +## Recommendation +No upstream TurboQuant support detected. Continue using fork. Re-check weekly. \ No newline at end of file diff --git a/scripts/__pycache__/upstream_watch.cpython-312.pyc b/scripts/__pycache__/upstream_watch.cpython-312.pyc new file mode 100644 index 00000000..71c243a6 Binary files /dev/null and b/scripts/__pycache__/upstream_watch.cpython-312.pyc differ diff --git a/scripts/upstream_watch.py b/scripts/upstream_watch.py new file mode 100644 index 00000000..85c9fd85 --- /dev/null +++ b/scripts/upstream_watch.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +""" +upstream_watch.py — Monitor upstream llama.cpp and Ollama for TurboQuant support. + +Checks GitHub for: +1. llama.cpp PRs/issues mentioning TurboQuant, PolarQuant, QJL +2. Ollama release notes mentioning KV cache types +3. ggml commits adding new KV cache types + +Usage: + python3 scripts/upstream_watch.py # generate report + python3 scripts/upstream_watch.py --json # machine-readable output + python3 scripts/upstream_watch.py --since 7d # check last 7 days +""" + +import argparse +import json +import os +import sys +import urllib.request +import urllib.parse +from datetime import datetime, timedelta, timezone +from pathlib import Path + + +SEARCH_TERMS = ["turboquant", "polarquant", "qjl", + "kv cache quant", "kv_type"] + +WATCH_REPOS = { + "llama.cpp": "ggerganov/llama.cpp", + "ggml": "ggerganov/ggml", + "ollama": "ollama/ollama", +} + + +def github_api(path, token=None): + url = f"https://api.github.com{path}" + headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "turboquant-watch"} + if token: + headers["Authorization"] = f"token {token}" + req = urllib.request.Request(url, headers=headers) + try: + resp = urllib.request.urlopen(req, timeout=30) + return json.loads(resp.read()) + except urllib.error.HTTPError as e: + if e.code == 403: + return {"error": "rate_limited", "status": 403} + return {"error": str(e), "status": e.code} + except Exception as e: + return {"error": str(e)} + + +def search_repo(repo, terms, since_date, token=None): + findings = [] + for term in terms: + query = f"repo:{repo} {term} created:>={since_date}" + encoded_q = urllib.parse.quote(query) + url = f"/search/issues?q={encoded_q}&sort=created&order=desc&per_page=5" + result = github_api(url, token) + if "error" in result: + findings.append({"error": result["error"], "term": term, "repo": repo}) + continue + for item in result.get("items", []): + findings.append({ + "repo": repo, "term": term, "number": item["number"], + "title": item["title"], "url": item["html_url"], + "state": item["state"], "created": item["created_at"], + "is_pr": "pull_request" in item, + "labels": [l["name"] for l in item.get("labels", [])], + }) + return findings + + +def check_releases(repo, token=None): + url = f"/repos/{repo}/releases?per_page=5" + releases = github_api(url, token) + if isinstance(releases, dict) and "error" in releases: + return [{"error": releases["error"]}] + findings = [] + for release in releases: + body = (release.get("body") or "").lower() + name = (release.get("name") or "").lower() + text = body + " " + name + matched = [t for t in ["turboquant", "polarquant", "qjl", "kv cache", "kv_type"] if t in text] + if matched: + findings.append({ + "repo": repo, "type": "release", "tag": release["tag_name"], + "name": release.get("name", ""), "url": release["html_url"], + "published": release["published_at"], "matched_terms": matched, + "snippet": body[:300] if body else "", + }) + return findings + + +def check_fork_status(token=None): + upstream = github_api("/repos/ggerganov/llama.cpp/commits?per_page=1", token) + fork = github_api("/repos/TheTom/llama-cpp-turboquant/commits?per_page=1", token) + result = {"fork": "TheTom/llama-cpp-turboquant", "upstream": "ggerganov/llama.cpp"} + if isinstance(upstream, list) and upstream: + result["upstream_sha"] = upstream[0]["sha"][:8] + result["upstream_date"] = upstream[0]["commit"]["committer"]["date"] + result["upstream_message"] = upstream[0]["commit"]["message"].split("\n")[0][:100] + if isinstance(fork, list) and fork: + result["fork_sha"] = fork[0]["sha"][:8] + result["fork_date"] = fork[0]["commit"]["committer"]["date"] + result["fork_message"] = fork[0]["commit"]["message"].split("\n")[0][:100] + if "upstream_date" in result and "fork_date" in result: + u = datetime.fromisoformat(result["upstream_date"].replace("Z", "+00:00")) + f = datetime.fromisoformat(result["fork_date"].replace("Z", "+00:00")) + result["days_behind"] = (u - f).days + return result + + +def generate_report(findings, releases, fork_status, since_date): + now = datetime.now(timezone.utc) + lines = ["# TurboQuant Upstream Watch Report", + f"\nGenerated: {now.strftime('%Y-%m-%d %H:%M UTC')}", + f"Monitoring since: {since_date}", ""] + + seen = set() + unique = [] + errors = [] + for f in findings: + if "error" in f: + errors.append(f) + continue + key = (f["repo"], f["number"]) + if key not in seen: + seen.add(key) + unique.append(f) + + lines.append("## Upstream Landing Status") + tq = [f for f in unique if any(t in f["term"].lower() for t in ["turboquant", "polarquant", "qjl"])] + if tq: + lines.append(f"**{len(tq)} findings** mentioning TurboQuant/PolarQuant/QJL:") + for f in tq[:10]: + kind = "PR" if f["is_pr"] else "Issue" + lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]} ({f['state']})") + else: + lines.append("**No TurboQuant/PolarQuant/QJL mentions found upstream.**") + lines.append("TurboQuant has NOT landed in upstream llama.cpp yet.") + lines.append("") + + kv = [f for f in unique if any(t in f["term"].lower() for t in ["kv cache", "kv_type", "ggml_type"])] + if kv: + lines.append(f"## KV Cache Related ({len(kv)} findings)") + for f in kv[:10]: + kind = "PR" if f["is_pr"] else "Issue" + lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]}") + lines.append("") + + lines.append("## Ollama Releases") + if releases and not any("error" in r for r in releases): + tq_rel = [r for r in releases if r.get("matched_terms")] + if tq_rel: + for r in tq_rel: + lines.append(f"- [{r['tag']}]({r['url']}): matched {r['matched_terms']}") + else: + lines.append("No recent Ollama releases mention TurboQuant/KV cache compression.") + else: + lines.append("Could not check Ollama releases (API error).") + lines.append("") + + lines.append("## Fork Status") + if "error" not in fork_status: + lines.append(f"- **Upstream (llama.cpp):** {fork_status.get('upstream_sha', 'N/A')} — {fork_status.get('upstream_message', 'N/A')}") + lines.append(f"- **Fork (turboquant):** {fork_status.get('fork_sha', 'N/A')} — {fork_status.get('fork_message', 'N/A')}") + if "days_behind" in fork_status: + d = fork_status["days_behind"] + lines.append(f"- **Fork freshness:** {'CURRENT' if d <= 7 else f'{d} days behind'}") + lines.append("") + + lines.append("## Recommendation") + if tq: + merged = [f for f in tq if f["state"] == "closed"] + if merged: + lines.append("**ACTION REQUIRED:** TurboQuant PRs merged upstream! Evaluate migration.") + else: + lines.append("TurboQuant PRs exist upstream but not yet merged. Continue monitoring.") + else: + lines.append("No upstream TurboQuant support detected. Continue using fork. Re-check weekly.") + + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser(description="TurboQuant upstream watch") + parser.add_argument("--json", action="store_true") + parser.add_argument("--since", default="30d") + args = parser.parse_args() + + days = int(args.since.replace("d", "")) + since_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d") + + token = None + gh_token_path = Path.home() / ".config" / "github" / "token" + if gh_token_path.exists(): + token = gh_token_path.read_text().strip() + + all_findings = [] + for name, repo in WATCH_REPOS.items(): + all_findings.extend(search_repo(repo, SEARCH_TERMS, since_date, token)) + + releases = check_releases(WATCH_REPOS["ollama"], token) + fork_status = check_fork_status(token) + + if args.json: + print(json.dumps({ + "generated": datetime.now(timezone.utc).isoformat(), + "since": since_date, + "findings": [f for f in all_findings if "error" not in f], + "errors": [f for f in all_findings if "error" in f], + "releases": releases, + "fork_status": fork_status, + }, indent=2)) + else: + report = generate_report(all_findings, releases, fork_status, since_date) + print(report) + docs_dir = Path(__file__).resolve().parent.parent / "docs" + docs_dir.mkdir(exist_ok=True) + (docs_dir / "upstream-watch-report.md").write_text(report) + + +if __name__ == "__main__": + main() diff --git a/tests/__pycache__/test_polar_quant.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_polar_quant.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 00000000..94bccaa6 Binary files /dev/null and b/tests/__pycache__/test_polar_quant.cpython-312-pytest-9.0.2.pyc differ