Add/Update wolf/leaderboard.py by Wolf
This commit is contained in:
77
wolf/leaderboard.py
Normal file
77
wolf/leaderboard.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
import logging
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Any, List, Optional
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
class Leaderboard:
|
||||||
|
"""
|
||||||
|
Leaderboard for Wolf.
|
||||||
|
"""
|
||||||
|
def __init__(self, storage_path=None):
|
||||||
|
self.storage_path = Path(storage_path or Path.home() / ".hermes" / "wolf" / "leaderboard.json")
|
||||||
|
self.storage_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.data = self._load()
|
||||||
|
|
||||||
|
def _load(self):
|
||||||
|
if not self.storage_path.exists():
|
||||||
|
return {"models": {}, "history": []}
|
||||||
|
with open(self.storage_path, 'r') as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
def _save(self):
|
||||||
|
with open(self.storage_path, 'w') as f:
|
||||||
|
json.dump(self.data, f, indent=2)
|
||||||
|
|
||||||
|
def record_score(self, model_name, provider, task_id, score_data):
|
||||||
|
"""
|
||||||
|
Record a score for a model on a specific task.
|
||||||
|
"""
|
||||||
|
logging.info(f"Recording score for model {model_name}: {score_data['total_score']}")
|
||||||
|
|
||||||
|
# 1. Update model stats
|
||||||
|
if model_name not in self.data["models"]:
|
||||||
|
self.data["models"][model_name] = {
|
||||||
|
"provider": provider,
|
||||||
|
"total_tasks": 0,
|
||||||
|
"average_score": 0,
|
||||||
|
"scores": [],
|
||||||
|
"serverless_ready": False
|
||||||
|
}
|
||||||
|
|
||||||
|
model_stats = self.data["models"][model_name]
|
||||||
|
model_stats["total_tasks"] += 1
|
||||||
|
model_stats["scores"].append(score_data["total_score"])
|
||||||
|
model_stats["average_score"] = sum(model_stats["scores"]) / len(model_stats["scores"])
|
||||||
|
|
||||||
|
# 2. Check for deployment readiness (e.g., average score > 80 after 5 tasks)
|
||||||
|
if model_stats["total_tasks"] >= 5 and model_stats["average_score"] >= 80:
|
||||||
|
model_stats["serverless_ready"] = True
|
||||||
|
logging.info(f"Model {model_name} is now serverless-ready!")
|
||||||
|
|
||||||
|
# 3. Record history
|
||||||
|
self.data["history"].append({
|
||||||
|
"timestamp": datetime.now().isoformat(),
|
||||||
|
"model_name": model_name,
|
||||||
|
"provider": provider,
|
||||||
|
"task_id": task_id,
|
||||||
|
"score": score_data
|
||||||
|
})
|
||||||
|
|
||||||
|
self._save()
|
||||||
|
|
||||||
|
def get_rankings(self):
|
||||||
|
"""
|
||||||
|
Get model rankings sorted by average score.
|
||||||
|
"""
|
||||||
|
rankings = []
|
||||||
|
for model_name, stats in self.data["models"].items():
|
||||||
|
rankings.append({
|
||||||
|
"model_name": model_name,
|
||||||
|
"provider": stats["provider"],
|
||||||
|
"average_score": stats["average_score"],
|
||||||
|
"total_tasks": stats["total_tasks"],
|
||||||
|
"serverless_ready": stats["serverless_ready"]
|
||||||
|
})
|
||||||
|
|
||||||
|
return sorted(rankings, key=lambda x: x["average_score"], reverse=True)
|
||||||
Reference in New Issue
Block a user