diff --git a/wolf/leaderboard.py b/wolf/leaderboard.py new file mode 100644 index 0000000..4684453 --- /dev/null +++ b/wolf/leaderboard.py @@ -0,0 +1,77 @@ +import logging +import json +from pathlib import Path +from typing import Dict, Any, List, Optional +from datetime import datetime + +class Leaderboard: + """ + Leaderboard for Wolf. + """ + def __init__(self, storage_path=None): + self.storage_path = Path(storage_path or Path.home() / ".hermes" / "wolf" / "leaderboard.json") + self.storage_path.parent.mkdir(parents=True, exist_ok=True) + self.data = self._load() + + def _load(self): + if not self.storage_path.exists(): + return {"models": {}, "history": []} + with open(self.storage_path, 'r') as f: + return json.load(f) + + def _save(self): + with open(self.storage_path, 'w') as f: + json.dump(self.data, f, indent=2) + + def record_score(self, model_name, provider, task_id, score_data): + """ + Record a score for a model on a specific task. + """ + logging.info(f"Recording score for model {model_name}: {score_data['total_score']}") + + # 1. Update model stats + if model_name not in self.data["models"]: + self.data["models"][model_name] = { + "provider": provider, + "total_tasks": 0, + "average_score": 0, + "scores": [], + "serverless_ready": False + } + + model_stats = self.data["models"][model_name] + model_stats["total_tasks"] += 1 + model_stats["scores"].append(score_data["total_score"]) + model_stats["average_score"] = sum(model_stats["scores"]) / len(model_stats["scores"]) + + # 2. Check for deployment readiness (e.g., average score > 80 after 5 tasks) + if model_stats["total_tasks"] >= 5 and model_stats["average_score"] >= 80: + model_stats["serverless_ready"] = True + logging.info(f"Model {model_name} is now serverless-ready!") + + # 3. Record history + self.data["history"].append({ + "timestamp": datetime.now().isoformat(), + "model_name": model_name, + "provider": provider, + "task_id": task_id, + "score": score_data + }) + + self._save() + + def get_rankings(self): + """ + Get model rankings sorted by average score. + """ + rankings = [] + for model_name, stats in self.data["models"].items(): + rankings.append({ + "model_name": model_name, + "provider": stats["provider"], + "average_score": stats["average_score"], + "total_tasks": stats["total_tasks"], + "serverless_ready": stats["serverless_ready"] + }) + + return sorted(rankings, key=lambda x: x["average_score"], reverse=True)