251 lines
10 KiB
Python
Executable File
251 lines
10 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
TurboQuant Upstream Watch Monitor
|
|
Monitors llama.cpp and Ollama for TurboQuant/PolarQuant/QJL support.
|
|
|
|
Issue #15: [P4] Upstream llama.cpp / Ollama TurboQuant watch
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import urllib.request
|
|
import subprocess
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Any, Optional
|
|
import argparse
|
|
|
|
# Configuration
|
|
GITHUB_API = "https://api.github.com"
|
|
LLAMA_CPP_REPO = "ggerganov/llama.cpp"
|
|
OLLAMA_REPO = "ollama/ollama"
|
|
GGML_REPO = "ggml-org/ggml"
|
|
|
|
# Keywords to search for
|
|
KEYWORDS = [
|
|
"turborot", "turborotquant", "polarquant", "qjl",
|
|
"kv cache compression", "kv cache quantization",
|
|
"quantized kv", "kv quant", "cache compression"
|
|
]
|
|
|
|
class UpstreamWatch:
|
|
def __init__(self, github_token: Optional[str] = None):
|
|
self.github_token = github_token or os.environ.get("GITHUB_TOKEN")
|
|
# Fallback: read from ~/.config/github/token file
|
|
if not self.github_token:
|
|
token_path = os.path.expanduser("~/.config/github/token")
|
|
if os.path.isfile(token_path):
|
|
try:
|
|
with open(token_path) as f:
|
|
self.github_token = f.read().strip()
|
|
except Exception:
|
|
pass
|
|
self.headers = {"Accept": "application/vnd.github.v3+json"}
|
|
if self.github_token:
|
|
self.headers["Authorization"] = f"token {self.github_token}"
|
|
|
|
def _github_request(self, endpoint: str) -> Any:
|
|
"""Make a GitHub API request."""
|
|
url = f"{GITHUB_API}{endpoint}"
|
|
req = urllib.request.Request(url, headers=self.headers)
|
|
|
|
try:
|
|
with urllib.request.urlopen(req) as resp:
|
|
return json.loads(resp.read())
|
|
except urllib.error.HTTPError as e:
|
|
print(f"GitHub API error: {e.code} - {e.reason}")
|
|
return None
|
|
|
|
def search_repo_issues_prs(self, repo: str, keywords: List[str], days: int = 30) -> List[Dict]:
|
|
"""Search for issues and PRs in a repository."""
|
|
import urllib.parse
|
|
results = []
|
|
since = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
for keyword in keywords:
|
|
# URL encode the keyword
|
|
encoded_keyword = urllib.parse.quote(keyword)
|
|
|
|
# Search issues
|
|
endpoint = f"/repos/{repo}/issues?q={encoded_keyword}+created:>{since}&sort=updated&order=desc"
|
|
data = self._github_request(endpoint)
|
|
|
|
if data and "items" in data:
|
|
for item in data["items"]:
|
|
# Filter out PRs (they appear in issues endpoint too)
|
|
if "pull_request" not in item:
|
|
results.append({
|
|
"type": "issue",
|
|
"repo": repo,
|
|
"number": item["number"],
|
|
"title": item["title"],
|
|
"url": item["html_url"],
|
|
"created": item["created_at"],
|
|
"updated": item["updated_at"],
|
|
"keyword": keyword
|
|
})
|
|
|
|
# Search PRs
|
|
endpoint = f"/repos/{repo}/pulls?q={encoded_keyword}+created:>{since}&sort=updated&order=desc"
|
|
data = self._github_request(endpoint)
|
|
|
|
if data and "items" in data:
|
|
for item in data["items"]:
|
|
results.append({
|
|
"type": "pr",
|
|
"repo": repo,
|
|
"number": item["number"],
|
|
"title": item["title"],
|
|
"url": item["html_url"],
|
|
"created": item["created_at"],
|
|
"updated": item["updated_at"],
|
|
"keyword": keyword
|
|
})
|
|
|
|
return results
|
|
|
|
def check_ollama_releases(self, days: int = 30) -> List[Dict]:
|
|
"""Check Ollama releases for TurboQuant mentions."""
|
|
releases = []
|
|
endpoint = f"/repos/{OLLAMA_REPO}/releases"
|
|
data = self._github_request(endpoint)
|
|
|
|
if data:
|
|
since = datetime.now() - timedelta(days=days)
|
|
for release in data:
|
|
published = datetime.strptime(release["published_at"], "%Y-%m-%dT%H:%M:%SZ")
|
|
if published > since:
|
|
# Check release notes for keywords
|
|
body = release.get("body", "").lower()
|
|
found_keywords = [kw for kw in KEYWORDS if kw.lower() in body]
|
|
|
|
if found_keywords:
|
|
releases.append({
|
|
"version": release["tag_name"],
|
|
"name": release["name"],
|
|
"url": release["html_url"],
|
|
"published": release["published_at"],
|
|
"keywords": found_keywords
|
|
})
|
|
|
|
return releases
|
|
|
|
def get_fork_status(self) -> Dict[str, Any]:
|
|
"""Get status of our TurboQuant fork."""
|
|
# This would typically check the local fork status
|
|
# For now, return placeholder data
|
|
return {
|
|
"fork_url": "https://github.com/TheTom/llama-cpp-turboquant",
|
|
"status": "active",
|
|
"last_updated": datetime.now().isoformat(),
|
|
"upstream_version": "unknown",
|
|
"fork_version": "unknown"
|
|
}
|
|
|
|
def generate_report(self, days: int = 30, format: str = "text") -> str:
|
|
"""Generate a monitoring report."""
|
|
print(f"Scanning upstream for TurboQuant mentions (last {days} days)...")
|
|
|
|
# Search llama.cpp
|
|
llama_results = self.search_repo_issues_prs(LLAMA_CPP_REPO, KEYWORDS, days)
|
|
|
|
# Search Ollama
|
|
ollama_results = self.search_repo_issues_prs(OLLAMA_REPO, KEYWORDS, days)
|
|
|
|
# Search ggml
|
|
ggml_results = self.search_repo_issues_prs(GGML_REPO, KEYWORDS, days)
|
|
|
|
# Check Ollama releases
|
|
ollama_releases = self.check_ollama_releases(days)
|
|
|
|
# Get fork status
|
|
fork_status = self.get_fork_status()
|
|
|
|
# Combine all results
|
|
all_results = llama_results + ollama_results + ggml_results
|
|
|
|
if format == "json":
|
|
return json.dumps({
|
|
"scan_date": datetime.now().isoformat(),
|
|
"days_scanned": days,
|
|
"llama_cpp_results": llama_results,
|
|
"ollama_results": ollama_results,
|
|
"ggml_results": ggml_results,
|
|
"ollama_releases": ollama_releases,
|
|
"fork_status": fork_status,
|
|
"total_found": len(all_results)
|
|
}, indent=2)
|
|
else:
|
|
# Text format
|
|
report = f"TurboQuant Upstream Watch Report\n"
|
|
report += f"Generated: {datetime.now().isoformat()}\n"
|
|
report += f"Scanned: Last {days} days\n"
|
|
report += f"{'='*60}\n\n"
|
|
|
|
report += f"## Summary\n"
|
|
report += f"- llama.cpp mentions: {len(llama_results)}\n"
|
|
report += f"- Ollama mentions: {len(ollama_results)}\n"
|
|
report += f"- ggml mentions: {len(ggml_results)}\n"
|
|
report += f"- Ollama releases with keywords: {len(ollama_releases)}\n"
|
|
report += f"- Total findings: {len(all_results)}\n\n"
|
|
|
|
if all_results:
|
|
report += f"## Findings\n"
|
|
for result in all_results[:10]: # Limit to first 10
|
|
report += f"- [{result['type'].upper()}] {result['repo']}#{result['number']}: {result['title']}\n"
|
|
report += f" URL: {result['url']}\n"
|
|
report += f" Keyword: {result['keyword']}\n"
|
|
report += f" Updated: {result['updated']}\n\n"
|
|
|
|
if ollama_releases:
|
|
report += f"## Ollama Releases with TurboQuant Mentions\n"
|
|
for release in ollama_releases:
|
|
report += f"- {release['version']}: {release['name']}\n"
|
|
report += f" URL: {release['url']}\n"
|
|
report += f" Keywords: {', '.join(release['keywords'])}\n"
|
|
report += f" Published: {release['published']}\n\n"
|
|
|
|
report += f"## Fork Status\n"
|
|
report += f"- Fork URL: {fork_status['fork_url']}\n"
|
|
report += f"- Status: {fork_status['status']}\n"
|
|
report += f"- Last Updated: {fork_status['last_updated']}\n\n"
|
|
|
|
if not all_results and not ollama_releases:
|
|
report += f"## Conclusion\n"
|
|
report += f"No TurboQuant/PolarQuant/QJL mentions found in upstream repositories.\n"
|
|
report += f"Recommendation: Continue using fork, re-check in {days} days.\n"
|
|
else:
|
|
report += f"## Conclusion\n"
|
|
report += f"Found {len(all_results)} mentions in upstream repositories.\n"
|
|
report += f"Evaluate whether to migrate to upstream or continue using fork.\n"
|
|
|
|
return report
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
parser = argparse.ArgumentParser(description="TurboQuant Upstream Watch Monitor")
|
|
parser.add_argument("--days", type=int, default=30, help="Number of days to scan (default: 30)")
|
|
parser.add_argument("--format", choices=["text", "json"], default="text", help="Output format")
|
|
parser.add_argument("--output", help="Output file (default: stdout)")
|
|
parser.add_argument("--github-token", help="GitHub API token (or set GITHUB_TOKEN env var)")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Initialize monitor
|
|
monitor = UpstreamWatch(args.github_token)
|
|
|
|
# Generate report
|
|
report = monitor.generate_report(args.days, args.format)
|
|
|
|
# Output report
|
|
if args.output:
|
|
with open(args.output, "w") as f:
|
|
f.write(report)
|
|
print(f"Report saved to {args.output}")
|
|
else:
|
|
print(report)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |