From 1624975dc1f4e5406d8fb8c8591bb3e22818fc0b Mon Sep 17 00:00:00 2001 From: Google AI Agent Date: Sun, 5 Apr 2026 17:59:21 +0000 Subject: [PATCH] Add/Update wolf/evaluator.py by Wolf --- wolf/evaluator.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 wolf/evaluator.py diff --git a/wolf/evaluator.py b/wolf/evaluator.py new file mode 100644 index 0000000..254418d --- /dev/null +++ b/wolf/evaluator.py @@ -0,0 +1,83 @@ +import logging +import json +import re +from typing import Dict, Any, List, Optional +from .gitea import GiteaClient + +class Evaluator: + """ + Evaluator for Wolf. + """ + def __init__(self, gitea_client: GiteaClient): + self.gitea = gitea_client + + def score_pr(self, owner, repo, pr_index): + """ + Score a PR on multiple dimensions. + """ + logging.info(f"Scoring PR {pr_index} in {owner}/{repo}") + + pr = self.gitea.get_pull_request(owner, repo, pr_index) + + # 1. CI Status (0-20 points) + ci_score = self._score_ci(owner, repo, pr_index) + + # 2. Commit Messages (0-10 points) + commit_score = self._score_commits(owner, repo, pr_index) + + # 3. Meaningful Code vs Boilerplate (0-30 points) + code_score = self._score_code_quality(owner, repo, pr_index) + + # 4. Code Functionality (0-30 points) + # This would ideally involve running tests, but for now we'll check for test files + functionality_score = self._score_functionality(owner, repo, pr_index) + + # 5. PR Description (0-10 points) + description_score = self._score_description(pr) + + total_score = ci_score + commit_score + code_score + functionality_score + description_score + + return { + "total_score": total_score, + "ci_score": ci_score, + "commit_score": commit_score, + "code_score": code_score, + "functionality_score": functionality_score, + "description_score": description_score + } + + def _score_ci(self, owner, repo, pr_index): + try: + status = self.gitea.get_pr_status(owner, repo, pr_index) + if status.get("state") == "success": + return 20 + elif status.get("state") == "pending": + return 10 + else: + return 0 + except: + return 0 + + def _score_commits(self, owner, repo, pr_index): + # Fetch commits and check their messages + # For simplicity, we'll assume a good message is > 10 chars and not generic + # This would require a new Gitea API call to get commits + return 8 # Placeholder + + def _score_code_quality(self, owner, repo, pr_index): + # Check for meaningful code vs boilerplate + # This would require fetching the diff and analyzing it + return 25 # Placeholder + + def _score_functionality(self, owner, repo, pr_index): + # Check if tests are included or if the code looks functional + return 20 # Placeholder + + def _score_description(self, pr): + body = pr.get("body", "") + if len(body) > 100: + return 10 + elif len(body) > 20: + return 5 + else: + return 0