Compare commits
1 Commits
step35/55-
...
burn/15-17
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b76312b024 |
21
docs/upstream-watch-report.md
Normal file
21
docs/upstream-watch-report.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# TurboQuant Upstream Watch Report
|
||||
|
||||
Generated: 2026-04-15 02:07 UTC
|
||||
Monitoring since: 2026-03-16
|
||||
|
||||
## Upstream Landing Status
|
||||
**No TurboQuant/PolarQuant/QJL mentions found upstream.**
|
||||
TurboQuant has NOT landed in upstream llama.cpp yet.
|
||||
|
||||
## Fork Status
|
||||
- **Upstream (llama.cpp):** 5d14e5d1 — hexagon: optimization for HMX mat_mul (#21554)
|
||||
- **Fork (turboquant):** 45f8a066 — Merge: ci: fix turbo build + test failures (#66)
|
||||
- **Fork freshness:** CURRENT
|
||||
|
||||
## Errors
|
||||
- turboquant OR polarquant OR qjl: HTTP Error 422: Unprocessable Entity
|
||||
- kv cache type: HTTP Error 422: Unprocessable Entity
|
||||
- ggml_type: Remote end closed connection without response
|
||||
|
||||
## Recommendation
|
||||
No upstream TurboQuant support detected. Continue using fork. Re-check weekly.
|
||||
BIN
scripts/__pycache__/upstream_watch.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/upstream_watch.cpython-312.pyc
Normal file
Binary file not shown.
225
scripts/upstream_watch.py
Normal file
225
scripts/upstream_watch.py
Normal file
@@ -0,0 +1,225 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
upstream_watch.py — Monitor upstream llama.cpp and Ollama for TurboQuant support.
|
||||
|
||||
Checks GitHub for:
|
||||
1. llama.cpp PRs/issues mentioning TurboQuant, PolarQuant, QJL
|
||||
2. Ollama release notes mentioning KV cache types
|
||||
3. ggml commits adding new KV cache types
|
||||
|
||||
Usage:
|
||||
python3 scripts/upstream_watch.py # generate report
|
||||
python3 scripts/upstream_watch.py --json # machine-readable output
|
||||
python3 scripts/upstream_watch.py --since 7d # check last 7 days
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
SEARCH_TERMS = ["turboquant", "polarquant", "qjl",
|
||||
"kv cache quant", "kv_type"]
|
||||
|
||||
WATCH_REPOS = {
|
||||
"llama.cpp": "ggerganov/llama.cpp",
|
||||
"ggml": "ggerganov/ggml",
|
||||
"ollama": "ollama/ollama",
|
||||
}
|
||||
|
||||
|
||||
def github_api(path, token=None):
|
||||
url = f"https://api.github.com{path}"
|
||||
headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "turboquant-watch"}
|
||||
if token:
|
||||
headers["Authorization"] = f"token {token}"
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=30)
|
||||
return json.loads(resp.read())
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 403:
|
||||
return {"error": "rate_limited", "status": 403}
|
||||
return {"error": str(e), "status": e.code}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
def search_repo(repo, terms, since_date, token=None):
|
||||
findings = []
|
||||
for term in terms:
|
||||
query = f"repo:{repo} {term} created:>={since_date}"
|
||||
encoded_q = urllib.parse.quote(query)
|
||||
url = f"/search/issues?q={encoded_q}&sort=created&order=desc&per_page=5"
|
||||
result = github_api(url, token)
|
||||
if "error" in result:
|
||||
findings.append({"error": result["error"], "term": term, "repo": repo})
|
||||
continue
|
||||
for item in result.get("items", []):
|
||||
findings.append({
|
||||
"repo": repo, "term": term, "number": item["number"],
|
||||
"title": item["title"], "url": item["html_url"],
|
||||
"state": item["state"], "created": item["created_at"],
|
||||
"is_pr": "pull_request" in item,
|
||||
"labels": [l["name"] for l in item.get("labels", [])],
|
||||
})
|
||||
return findings
|
||||
|
||||
|
||||
def check_releases(repo, token=None):
|
||||
url = f"/repos/{repo}/releases?per_page=5"
|
||||
releases = github_api(url, token)
|
||||
if isinstance(releases, dict) and "error" in releases:
|
||||
return [{"error": releases["error"]}]
|
||||
findings = []
|
||||
for release in releases:
|
||||
body = (release.get("body") or "").lower()
|
||||
name = (release.get("name") or "").lower()
|
||||
text = body + " " + name
|
||||
matched = [t for t in ["turboquant", "polarquant", "qjl", "kv cache", "kv_type"] if t in text]
|
||||
if matched:
|
||||
findings.append({
|
||||
"repo": repo, "type": "release", "tag": release["tag_name"],
|
||||
"name": release.get("name", ""), "url": release["html_url"],
|
||||
"published": release["published_at"], "matched_terms": matched,
|
||||
"snippet": body[:300] if body else "",
|
||||
})
|
||||
return findings
|
||||
|
||||
|
||||
def check_fork_status(token=None):
|
||||
upstream = github_api("/repos/ggerganov/llama.cpp/commits?per_page=1", token)
|
||||
fork = github_api("/repos/TheTom/llama-cpp-turboquant/commits?per_page=1", token)
|
||||
result = {"fork": "TheTom/llama-cpp-turboquant", "upstream": "ggerganov/llama.cpp"}
|
||||
if isinstance(upstream, list) and upstream:
|
||||
result["upstream_sha"] = upstream[0]["sha"][:8]
|
||||
result["upstream_date"] = upstream[0]["commit"]["committer"]["date"]
|
||||
result["upstream_message"] = upstream[0]["commit"]["message"].split("\n")[0][:100]
|
||||
if isinstance(fork, list) and fork:
|
||||
result["fork_sha"] = fork[0]["sha"][:8]
|
||||
result["fork_date"] = fork[0]["commit"]["committer"]["date"]
|
||||
result["fork_message"] = fork[0]["commit"]["message"].split("\n")[0][:100]
|
||||
if "upstream_date" in result and "fork_date" in result:
|
||||
u = datetime.fromisoformat(result["upstream_date"].replace("Z", "+00:00"))
|
||||
f = datetime.fromisoformat(result["fork_date"].replace("Z", "+00:00"))
|
||||
result["days_behind"] = (u - f).days
|
||||
return result
|
||||
|
||||
|
||||
def generate_report(findings, releases, fork_status, since_date):
|
||||
now = datetime.now(timezone.utc)
|
||||
lines = ["# TurboQuant Upstream Watch Report",
|
||||
f"\nGenerated: {now.strftime('%Y-%m-%d %H:%M UTC')}",
|
||||
f"Monitoring since: {since_date}", ""]
|
||||
|
||||
seen = set()
|
||||
unique = []
|
||||
errors = []
|
||||
for f in findings:
|
||||
if "error" in f:
|
||||
errors.append(f)
|
||||
continue
|
||||
key = (f["repo"], f["number"])
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique.append(f)
|
||||
|
||||
lines.append("## Upstream Landing Status")
|
||||
tq = [f for f in unique if any(t in f["term"].lower() for t in ["turboquant", "polarquant", "qjl"])]
|
||||
if tq:
|
||||
lines.append(f"**{len(tq)} findings** mentioning TurboQuant/PolarQuant/QJL:")
|
||||
for f in tq[:10]:
|
||||
kind = "PR" if f["is_pr"] else "Issue"
|
||||
lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]} ({f['state']})")
|
||||
else:
|
||||
lines.append("**No TurboQuant/PolarQuant/QJL mentions found upstream.**")
|
||||
lines.append("TurboQuant has NOT landed in upstream llama.cpp yet.")
|
||||
lines.append("")
|
||||
|
||||
kv = [f for f in unique if any(t in f["term"].lower() for t in ["kv cache", "kv_type", "ggml_type"])]
|
||||
if kv:
|
||||
lines.append(f"## KV Cache Related ({len(kv)} findings)")
|
||||
for f in kv[:10]:
|
||||
kind = "PR" if f["is_pr"] else "Issue"
|
||||
lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("## Ollama Releases")
|
||||
if releases and not any("error" in r for r in releases):
|
||||
tq_rel = [r for r in releases if r.get("matched_terms")]
|
||||
if tq_rel:
|
||||
for r in tq_rel:
|
||||
lines.append(f"- [{r['tag']}]({r['url']}): matched {r['matched_terms']}")
|
||||
else:
|
||||
lines.append("No recent Ollama releases mention TurboQuant/KV cache compression.")
|
||||
else:
|
||||
lines.append("Could not check Ollama releases (API error).")
|
||||
lines.append("")
|
||||
|
||||
lines.append("## Fork Status")
|
||||
if "error" not in fork_status:
|
||||
lines.append(f"- **Upstream (llama.cpp):** {fork_status.get('upstream_sha', 'N/A')} — {fork_status.get('upstream_message', 'N/A')}")
|
||||
lines.append(f"- **Fork (turboquant):** {fork_status.get('fork_sha', 'N/A')} — {fork_status.get('fork_message', 'N/A')}")
|
||||
if "days_behind" in fork_status:
|
||||
d = fork_status["days_behind"]
|
||||
lines.append(f"- **Fork freshness:** {'CURRENT' if d <= 7 else f'{d} days behind'}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("## Recommendation")
|
||||
if tq:
|
||||
merged = [f for f in tq if f["state"] == "closed"]
|
||||
if merged:
|
||||
lines.append("**ACTION REQUIRED:** TurboQuant PRs merged upstream! Evaluate migration.")
|
||||
else:
|
||||
lines.append("TurboQuant PRs exist upstream but not yet merged. Continue monitoring.")
|
||||
else:
|
||||
lines.append("No upstream TurboQuant support detected. Continue using fork. Re-check weekly.")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="TurboQuant upstream watch")
|
||||
parser.add_argument("--json", action="store_true")
|
||||
parser.add_argument("--since", default="30d")
|
||||
args = parser.parse_args()
|
||||
|
||||
days = int(args.since.replace("d", ""))
|
||||
since_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d")
|
||||
|
||||
token = None
|
||||
gh_token_path = Path.home() / ".config" / "github" / "token"
|
||||
if gh_token_path.exists():
|
||||
token = gh_token_path.read_text().strip()
|
||||
|
||||
all_findings = []
|
||||
for name, repo in WATCH_REPOS.items():
|
||||
all_findings.extend(search_repo(repo, SEARCH_TERMS, since_date, token))
|
||||
|
||||
releases = check_releases(WATCH_REPOS["ollama"], token)
|
||||
fork_status = check_fork_status(token)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps({
|
||||
"generated": datetime.now(timezone.utc).isoformat(),
|
||||
"since": since_date,
|
||||
"findings": [f for f in all_findings if "error" not in f],
|
||||
"errors": [f for f in all_findings if "error" in f],
|
||||
"releases": releases,
|
||||
"fork_status": fork_status,
|
||||
}, indent=2))
|
||||
else:
|
||||
report = generate_report(all_findings, releases, fork_status, since_date)
|
||||
print(report)
|
||||
docs_dir = Path(__file__).resolve().parent.parent / "docs"
|
||||
docs_dir.mkdir(exist_ok=True)
|
||||
(docs_dir / "upstream-watch-report.md").write_text(report)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
tests/__pycache__/test_polar_quant.cpython-312-pytest-9.0.2.pyc
Normal file
BIN
tests/__pycache__/test_polar_quant.cpython-312-pytest-9.0.2.pyc
Normal file
Binary file not shown.
Reference in New Issue
Block a user