Compare commits
1 Commits
step35/67-
...
burn/15-17
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b76312b024 |
21
docs/upstream-watch-report.md
Normal file
21
docs/upstream-watch-report.md
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# TurboQuant Upstream Watch Report
|
||||||
|
|
||||||
|
Generated: 2026-04-15 02:07 UTC
|
||||||
|
Monitoring since: 2026-03-16
|
||||||
|
|
||||||
|
## Upstream Landing Status
|
||||||
|
**No TurboQuant/PolarQuant/QJL mentions found upstream.**
|
||||||
|
TurboQuant has NOT landed in upstream llama.cpp yet.
|
||||||
|
|
||||||
|
## Fork Status
|
||||||
|
- **Upstream (llama.cpp):** 5d14e5d1 — hexagon: optimization for HMX mat_mul (#21554)
|
||||||
|
- **Fork (turboquant):** 45f8a066 — Merge: ci: fix turbo build + test failures (#66)
|
||||||
|
- **Fork freshness:** CURRENT
|
||||||
|
|
||||||
|
## Errors
|
||||||
|
- turboquant OR polarquant OR qjl: HTTP Error 422: Unprocessable Entity
|
||||||
|
- kv cache type: HTTP Error 422: Unprocessable Entity
|
||||||
|
- ggml_type: Remote end closed connection without response
|
||||||
|
|
||||||
|
## Recommendation
|
||||||
|
No upstream TurboQuant support detected. Continue using fork. Re-check weekly.
|
||||||
BIN
scripts/__pycache__/upstream_watch.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/upstream_watch.cpython-312.pyc
Normal file
Binary file not shown.
225
scripts/upstream_watch.py
Normal file
225
scripts/upstream_watch.py
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
upstream_watch.py — Monitor upstream llama.cpp and Ollama for TurboQuant support.
|
||||||
|
|
||||||
|
Checks GitHub for:
|
||||||
|
1. llama.cpp PRs/issues mentioning TurboQuant, PolarQuant, QJL
|
||||||
|
2. Ollama release notes mentioning KV cache types
|
||||||
|
3. ggml commits adding new KV cache types
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/upstream_watch.py # generate report
|
||||||
|
python3 scripts/upstream_watch.py --json # machine-readable output
|
||||||
|
python3 scripts/upstream_watch.py --since 7d # check last 7 days
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
import urllib.parse
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
SEARCH_TERMS = ["turboquant", "polarquant", "qjl",
|
||||||
|
"kv cache quant", "kv_type"]
|
||||||
|
|
||||||
|
WATCH_REPOS = {
|
||||||
|
"llama.cpp": "ggerganov/llama.cpp",
|
||||||
|
"ggml": "ggerganov/ggml",
|
||||||
|
"ollama": "ollama/ollama",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def github_api(path, token=None):
|
||||||
|
url = f"https://api.github.com{path}"
|
||||||
|
headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "turboquant-watch"}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = f"token {token}"
|
||||||
|
req = urllib.request.Request(url, headers=headers)
|
||||||
|
try:
|
||||||
|
resp = urllib.request.urlopen(req, timeout=30)
|
||||||
|
return json.loads(resp.read())
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
if e.code == 403:
|
||||||
|
return {"error": "rate_limited", "status": 403}
|
||||||
|
return {"error": str(e), "status": e.code}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def search_repo(repo, terms, since_date, token=None):
|
||||||
|
findings = []
|
||||||
|
for term in terms:
|
||||||
|
query = f"repo:{repo} {term} created:>={since_date}"
|
||||||
|
encoded_q = urllib.parse.quote(query)
|
||||||
|
url = f"/search/issues?q={encoded_q}&sort=created&order=desc&per_page=5"
|
||||||
|
result = github_api(url, token)
|
||||||
|
if "error" in result:
|
||||||
|
findings.append({"error": result["error"], "term": term, "repo": repo})
|
||||||
|
continue
|
||||||
|
for item in result.get("items", []):
|
||||||
|
findings.append({
|
||||||
|
"repo": repo, "term": term, "number": item["number"],
|
||||||
|
"title": item["title"], "url": item["html_url"],
|
||||||
|
"state": item["state"], "created": item["created_at"],
|
||||||
|
"is_pr": "pull_request" in item,
|
||||||
|
"labels": [l["name"] for l in item.get("labels", [])],
|
||||||
|
})
|
||||||
|
return findings
|
||||||
|
|
||||||
|
|
||||||
|
def check_releases(repo, token=None):
|
||||||
|
url = f"/repos/{repo}/releases?per_page=5"
|
||||||
|
releases = github_api(url, token)
|
||||||
|
if isinstance(releases, dict) and "error" in releases:
|
||||||
|
return [{"error": releases["error"]}]
|
||||||
|
findings = []
|
||||||
|
for release in releases:
|
||||||
|
body = (release.get("body") or "").lower()
|
||||||
|
name = (release.get("name") or "").lower()
|
||||||
|
text = body + " " + name
|
||||||
|
matched = [t for t in ["turboquant", "polarquant", "qjl", "kv cache", "kv_type"] if t in text]
|
||||||
|
if matched:
|
||||||
|
findings.append({
|
||||||
|
"repo": repo, "type": "release", "tag": release["tag_name"],
|
||||||
|
"name": release.get("name", ""), "url": release["html_url"],
|
||||||
|
"published": release["published_at"], "matched_terms": matched,
|
||||||
|
"snippet": body[:300] if body else "",
|
||||||
|
})
|
||||||
|
return findings
|
||||||
|
|
||||||
|
|
||||||
|
def check_fork_status(token=None):
|
||||||
|
upstream = github_api("/repos/ggerganov/llama.cpp/commits?per_page=1", token)
|
||||||
|
fork = github_api("/repos/TheTom/llama-cpp-turboquant/commits?per_page=1", token)
|
||||||
|
result = {"fork": "TheTom/llama-cpp-turboquant", "upstream": "ggerganov/llama.cpp"}
|
||||||
|
if isinstance(upstream, list) and upstream:
|
||||||
|
result["upstream_sha"] = upstream[0]["sha"][:8]
|
||||||
|
result["upstream_date"] = upstream[0]["commit"]["committer"]["date"]
|
||||||
|
result["upstream_message"] = upstream[0]["commit"]["message"].split("\n")[0][:100]
|
||||||
|
if isinstance(fork, list) and fork:
|
||||||
|
result["fork_sha"] = fork[0]["sha"][:8]
|
||||||
|
result["fork_date"] = fork[0]["commit"]["committer"]["date"]
|
||||||
|
result["fork_message"] = fork[0]["commit"]["message"].split("\n")[0][:100]
|
||||||
|
if "upstream_date" in result and "fork_date" in result:
|
||||||
|
u = datetime.fromisoformat(result["upstream_date"].replace("Z", "+00:00"))
|
||||||
|
f = datetime.fromisoformat(result["fork_date"].replace("Z", "+00:00"))
|
||||||
|
result["days_behind"] = (u - f).days
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def generate_report(findings, releases, fork_status, since_date):
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
lines = ["# TurboQuant Upstream Watch Report",
|
||||||
|
f"\nGenerated: {now.strftime('%Y-%m-%d %H:%M UTC')}",
|
||||||
|
f"Monitoring since: {since_date}", ""]
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
unique = []
|
||||||
|
errors = []
|
||||||
|
for f in findings:
|
||||||
|
if "error" in f:
|
||||||
|
errors.append(f)
|
||||||
|
continue
|
||||||
|
key = (f["repo"], f["number"])
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
unique.append(f)
|
||||||
|
|
||||||
|
lines.append("## Upstream Landing Status")
|
||||||
|
tq = [f for f in unique if any(t in f["term"].lower() for t in ["turboquant", "polarquant", "qjl"])]
|
||||||
|
if tq:
|
||||||
|
lines.append(f"**{len(tq)} findings** mentioning TurboQuant/PolarQuant/QJL:")
|
||||||
|
for f in tq[:10]:
|
||||||
|
kind = "PR" if f["is_pr"] else "Issue"
|
||||||
|
lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]} ({f['state']})")
|
||||||
|
else:
|
||||||
|
lines.append("**No TurboQuant/PolarQuant/QJL mentions found upstream.**")
|
||||||
|
lines.append("TurboQuant has NOT landed in upstream llama.cpp yet.")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
kv = [f for f in unique if any(t in f["term"].lower() for t in ["kv cache", "kv_type", "ggml_type"])]
|
||||||
|
if kv:
|
||||||
|
lines.append(f"## KV Cache Related ({len(kv)} findings)")
|
||||||
|
for f in kv[:10]:
|
||||||
|
kind = "PR" if f["is_pr"] else "Issue"
|
||||||
|
lines.append(f"- [{kind} #{f['number']}]({f['url']}): {f['title'][:80]}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("## Ollama Releases")
|
||||||
|
if releases and not any("error" in r for r in releases):
|
||||||
|
tq_rel = [r for r in releases if r.get("matched_terms")]
|
||||||
|
if tq_rel:
|
||||||
|
for r in tq_rel:
|
||||||
|
lines.append(f"- [{r['tag']}]({r['url']}): matched {r['matched_terms']}")
|
||||||
|
else:
|
||||||
|
lines.append("No recent Ollama releases mention TurboQuant/KV cache compression.")
|
||||||
|
else:
|
||||||
|
lines.append("Could not check Ollama releases (API error).")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("## Fork Status")
|
||||||
|
if "error" not in fork_status:
|
||||||
|
lines.append(f"- **Upstream (llama.cpp):** {fork_status.get('upstream_sha', 'N/A')} — {fork_status.get('upstream_message', 'N/A')}")
|
||||||
|
lines.append(f"- **Fork (turboquant):** {fork_status.get('fork_sha', 'N/A')} — {fork_status.get('fork_message', 'N/A')}")
|
||||||
|
if "days_behind" in fork_status:
|
||||||
|
d = fork_status["days_behind"]
|
||||||
|
lines.append(f"- **Fork freshness:** {'CURRENT' if d <= 7 else f'{d} days behind'}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("## Recommendation")
|
||||||
|
if tq:
|
||||||
|
merged = [f for f in tq if f["state"] == "closed"]
|
||||||
|
if merged:
|
||||||
|
lines.append("**ACTION REQUIRED:** TurboQuant PRs merged upstream! Evaluate migration.")
|
||||||
|
else:
|
||||||
|
lines.append("TurboQuant PRs exist upstream but not yet merged. Continue monitoring.")
|
||||||
|
else:
|
||||||
|
lines.append("No upstream TurboQuant support detected. Continue using fork. Re-check weekly.")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="TurboQuant upstream watch")
|
||||||
|
parser.add_argument("--json", action="store_true")
|
||||||
|
parser.add_argument("--since", default="30d")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
days = int(args.since.replace("d", ""))
|
||||||
|
since_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
token = None
|
||||||
|
gh_token_path = Path.home() / ".config" / "github" / "token"
|
||||||
|
if gh_token_path.exists():
|
||||||
|
token = gh_token_path.read_text().strip()
|
||||||
|
|
||||||
|
all_findings = []
|
||||||
|
for name, repo in WATCH_REPOS.items():
|
||||||
|
all_findings.extend(search_repo(repo, SEARCH_TERMS, since_date, token))
|
||||||
|
|
||||||
|
releases = check_releases(WATCH_REPOS["ollama"], token)
|
||||||
|
fork_status = check_fork_status(token)
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
print(json.dumps({
|
||||||
|
"generated": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"since": since_date,
|
||||||
|
"findings": [f for f in all_findings if "error" not in f],
|
||||||
|
"errors": [f for f in all_findings if "error" in f],
|
||||||
|
"releases": releases,
|
||||||
|
"fork_status": fork_status,
|
||||||
|
}, indent=2))
|
||||||
|
else:
|
||||||
|
report = generate_report(all_findings, releases, fork_status, since_date)
|
||||||
|
print(report)
|
||||||
|
docs_dir = Path(__file__).resolve().parent.parent / "docs"
|
||||||
|
docs_dir.mkdir(exist_ok=True)
|
||||||
|
(docs_dir / "upstream-watch-report.md").write_text(report)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
BIN
tests/__pycache__/test_polar_quant.cpython-312-pytest-9.0.2.pyc
Normal file
BIN
tests/__pycache__/test_polar_quant.cpython-312-pytest-9.0.2.pyc
Normal file
Binary file not shown.
Reference in New Issue
Block a user