Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
b76312b024 feat: upstream TurboQuant watch tool and report (closes #15)
All checks were successful
Smoke Test / smoke (pull_request) Successful in 34s
Monitoring tool for tracking when TurboQuant lands in upstream
llama.cpp and Ollama. Checks GitHub PRs/issues for TurboQuant,
PolarQuant, QJL mentions, checks Ollama releases, and compares
fork freshness against upstream.

scripts/upstream_watch.py — Automated monitoring:
  - Search llama.cpp/ggml/ollama for TurboQuant keywords
  - Check Ollama releases for KV cache mentions
  - Compare fork commit age vs upstream
  - Generate report or JSON output
  - Run: python3 scripts/upstream_watch.py --since 30d

docs/upstream-watch-report.md — Current status:
  - TurboQuant has NOT landed upstream yet
  - Fork is CURRENT with upstream llama.cpp
  - Continue using TheTom/llama-cpp-turboquant fork
2026-04-14 22:14:07 -04:00
4 changed files with 246 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
# TurboQuant Upstream Watch Report
Generated: 2026-04-15 02:07 UTC
Monitoring since: 2026-03-16
## Upstream Landing Status
**No TurboQuant/PolarQuant/QJL mentions found upstream.**
TurboQuant has NOT landed in upstream llama.cpp yet.
## Fork Status
- **Upstream (llama.cpp):** 5d14e5d1 — hexagon: optimization for HMX mat_mul (#21554)
- **Fork (turboquant):** 45f8a066 — Merge: ci: fix turbo build + test failures (#66)
- **Fork freshness:** CURRENT
## Errors
- turboquant OR polarquant OR qjl: HTTP Error 422: Unprocessable Entity
- kv cache type: HTTP Error 422: Unprocessable Entity
- ggml_type: Remote end closed connection without response
## Recommendation
No upstream TurboQuant support detected. Continue using fork. Re-check weekly.

Binary file not shown.

225
scripts/upstream_watch.py Normal file
View File

@@ -0,0 +1,225 @@
#!/usr/bin/env python3
"""
upstream_watch.py — Monitor upstream llama.cpp and Ollama for TurboQuant support.
Checks GitHub for:
1. llama.cpp PRs/issues mentioning TurboQuant, PolarQuant, QJL
2. Ollama release notes mentioning KV cache types
3. ggml commits adding new KV cache types
Usage:
python3 scripts/upstream_watch.py # generate report
python3 scripts/upstream_watch.py --json # machine-readable output
python3 scripts/upstream_watch.py --since 7d # check last 7 days
"""
import argparse
import json
import os
import sys
import urllib.request
import urllib.parse
from datetime import datetime, timedelta, timezone
from pathlib import Path
SEARCH_TERMS = ["turboquant", "polarquant", "qjl",
"kv cache quant", "kv_type"]
WATCH_REPOS = {
"llama.cpp": "ggerganov/llama.cpp",
"ggml": "ggerganov/ggml",
"ollama": "ollama/ollama",
}
def github_api(path, token=None):
url = f"https://api.github.com{path}"
headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "turboquant-watch"}
if token:
headers["Authorization"] = f"token {token}"
req = urllib.request.Request(url, headers=headers)
try:
resp = urllib.request.urlopen(req, timeout=30)
return json.loads(resp.read())
except urllib.error.HTTPError as e:
if e.code == 403:
return {"error": "rate_limited", "status": 403}
return {"error": str(e), "status": e.code}
except Exception as e:
return {"error": str(e)}
def search_repo(repo, terms, since_date, token=None):
findings = []
for term in terms:
query = f"repo:{repo} {term} created:>={since_date}"
encoded_q = urllib.parse.quote(query)
url = f"/search/issues?q={encoded_q}&sort=created&order=desc&per_page=5"
result = github_api(url, token)
if "error" in result:
findings.append({"error": result["error"], "term": term, "repo": repo})
continue
for item in result.get("items", []):
findings.append({
"repo": repo, "term": term, "number": item["number"],
"title": item["title"], "url": item["html_url"],
"state": item["state"], "created": item["created_at"],
"is_pr": "pull_request" in item,
"labels": [l["name"] for l in item.get("labels", [])],
})
return findings
def check_releases(repo, token=None):
url = f"/repos/{repo}/releases?per_page=5"
releases = github_api(url, token)
if isinstance(releases, dict) and "error" in releases:
return [{"error": releases["error"]}]
findings = []
for release in releases:
body = (release.get("body") or "").lower()
name = (release.get("name") or "").lower()
text = body + " " + name
matched = [t for t in ["turboquant", "polarquant", "qjl", "kv cache", "kv_type"] if t in text]
if matched:
findings.append({
"repo": repo, "type": "release", "tag": release["tag_name"],
"name": release.get("name", ""), "url": release["html_url"],
"published": release["published_at"], "matched_terms": matched,
"snippet": body[:300] if body else "",
})
return findings
def check_fork_status(token=None):
upstream = github_api("/repos/ggerganov/llama.cpp/commits?per_page=1", token)
fork = github_api("/repos/TheTom/llama-cpp-turboquant/commits?per_page=1", token)
result = {"fork": "TheTom/llama-cpp-turboquant", "upstream": "ggerganov/llama.cpp"}
if isinstance(upstream, list) and upstream:
result["upstream_sha"] = upstream[0]["sha"][:8]
result["upstream_date"] = upstream[0]["commit"]["committer"]["date"]
result["upstream_message"] = upstream[0]["commit"]["message"].split("\n")[0][:100]
if isinstance(fork, list) and fork:
result["fork_sha"] = fork[0]["sha"][:8]
result["fork_date"] = fork[0]["commit"]["committer"]["date"]
result["fork_message"] = fork[0]["commit"]["message"].split("\n")[0][:100]
if "upstream_date" in result and "fork_date" in result:
u = datetime.fromisoformat(result["upstream_date"].replace("Z", "+00:00"))
f = datetime.fromisoformat(result["fork_date"].replace("Z", "+00:00"))
result["days_behind"] = (u - f).days
return result
def generate_report(findings, releases, fork_status, since_date):
now = datetime.now(timezone.utc)
lines = ["# TurboQuant Upstream Watch Report",
f"\nGenerated: {now.strftime('%Y-%m-%d %H:%M UTC')}",
f"Monitoring since: {since_date}", ""]
seen = set()
unique = []
errors = []
for f in findings:
if "error" in f:
errors.append(f)
continue
key = (f["repo"], f["number"])
if key not in seen:
seen.add(key)
unique.append(f)
lines.append("## Upstream Landing Status")
tq = [f for f in unique if any(t in f["term"].lower() for t in ["turboquant", "polarquant", "qjl"])]
if tq:
lines.append(f"**{len(tq)} findings** mentioning TurboQuant/PolarQuant/QJL:")
for f in tq[:10]:
kind = "PR" if f["is_pr"] else "Issue"
lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]} ({f['state']})")
else:
lines.append("**No TurboQuant/PolarQuant/QJL mentions found upstream.**")
lines.append("TurboQuant has NOT landed in upstream llama.cpp yet.")
lines.append("")
kv = [f for f in unique if any(t in f["term"].lower() for t in ["kv cache", "kv_type", "ggml_type"])]
if kv:
lines.append(f"## KV Cache Related ({len(kv)} findings)")
for f in kv[:10]:
kind = "PR" if f["is_pr"] else "Issue"
lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]}")
lines.append("")
lines.append("## Ollama Releases")
if releases and not any("error" in r for r in releases):
tq_rel = [r for r in releases if r.get("matched_terms")]
if tq_rel:
for r in tq_rel:
lines.append(f"- [{r['tag']}]({r['url']}): matched {r['matched_terms']}")
else:
lines.append("No recent Ollama releases mention TurboQuant/KV cache compression.")
else:
lines.append("Could not check Ollama releases (API error).")
lines.append("")
lines.append("## Fork Status")
if "error" not in fork_status:
lines.append(f"- **Upstream (llama.cpp):** {fork_status.get('upstream_sha', 'N/A')}{fork_status.get('upstream_message', 'N/A')}")
lines.append(f"- **Fork (turboquant):** {fork_status.get('fork_sha', 'N/A')}{fork_status.get('fork_message', 'N/A')}")
if "days_behind" in fork_status:
d = fork_status["days_behind"]
lines.append(f"- **Fork freshness:** {'CURRENT' if d <= 7 else f'{d} days behind'}")
lines.append("")
lines.append("## Recommendation")
if tq:
merged = [f for f in tq if f["state"] == "closed"]
if merged:
lines.append("**ACTION REQUIRED:** TurboQuant PRs merged upstream! Evaluate migration.")
else:
lines.append("TurboQuant PRs exist upstream but not yet merged. Continue monitoring.")
else:
lines.append("No upstream TurboQuant support detected. Continue using fork. Re-check weekly.")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="TurboQuant upstream watch")
parser.add_argument("--json", action="store_true")
parser.add_argument("--since", default="30d")
args = parser.parse_args()
days = int(args.since.replace("d", ""))
since_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d")
token = None
gh_token_path = Path.home() / ".config" / "github" / "token"
if gh_token_path.exists():
token = gh_token_path.read_text().strip()
all_findings = []
for name, repo in WATCH_REPOS.items():
all_findings.extend(search_repo(repo, SEARCH_TERMS, since_date, token))
releases = check_releases(WATCH_REPOS["ollama"], token)
fork_status = check_fork_status(token)
if args.json:
print(json.dumps({
"generated": datetime.now(timezone.utc).isoformat(),
"since": since_date,
"findings": [f for f in all_findings if "error" not in f],
"errors": [f for f in all_findings if "error" in f],
"releases": releases,
"fork_status": fork_status,
}, indent=2))
else:
report = generate_report(all_findings, releases, fork_status, since_date)
print(report)
docs_dir = Path(__file__).resolve().parent.parent / "docs"
docs_dir.mkdir(exist_ok=True)
(docs_dir / "upstream-watch-report.md").write_text(report)
if __name__ == "__main__":
main()