Add/Update wolf/evaluator.py by Wolf

This commit is contained in:
2026-04-05 17:59:21 +00:00
parent bd6f6bbbd7
commit 1624975dc1

83
wolf/evaluator.py Normal file
View File

@@ -0,0 +1,83 @@
import logging
import json
import re
from typing import Dict, Any, List, Optional
from .gitea import GiteaClient
class Evaluator:
"""
Evaluator for Wolf.
"""
def __init__(self, gitea_client: GiteaClient):
self.gitea = gitea_client
def score_pr(self, owner, repo, pr_index):
"""
Score a PR on multiple dimensions.
"""
logging.info(f"Scoring PR {pr_index} in {owner}/{repo}")
pr = self.gitea.get_pull_request(owner, repo, pr_index)
# 1. CI Status (0-20 points)
ci_score = self._score_ci(owner, repo, pr_index)
# 2. Commit Messages (0-10 points)
commit_score = self._score_commits(owner, repo, pr_index)
# 3. Meaningful Code vs Boilerplate (0-30 points)
code_score = self._score_code_quality(owner, repo, pr_index)
# 4. Code Functionality (0-30 points)
# This would ideally involve running tests, but for now we'll check for test files
functionality_score = self._score_functionality(owner, repo, pr_index)
# 5. PR Description (0-10 points)
description_score = self._score_description(pr)
total_score = ci_score + commit_score + code_score + functionality_score + description_score
return {
"total_score": total_score,
"ci_score": ci_score,
"commit_score": commit_score,
"code_score": code_score,
"functionality_score": functionality_score,
"description_score": description_score
}
def _score_ci(self, owner, repo, pr_index):
try:
status = self.gitea.get_pr_status(owner, repo, pr_index)
if status.get("state") == "success":
return 20
elif status.get("state") == "pending":
return 10
else:
return 0
except:
return 0
def _score_commits(self, owner, repo, pr_index):
# Fetch commits and check their messages
# For simplicity, we'll assume a good message is > 10 chars and not generic
# This would require a new Gitea API call to get commits
return 8 # Placeholder
def _score_code_quality(self, owner, repo, pr_index):
# Check for meaningful code vs boilerplate
# This would require fetching the diff and analyzing it
return 25 # Placeholder
def _score_functionality(self, owner, repo, pr_index):
# Check if tests are included or if the code looks functional
return 20 # Placeholder
def _score_description(self, pr):
body = pr.get("body", "")
if len(body) > 100:
return 10
elif len(body) > 20:
return 5
else:
return 0