From 1624975dc1f4e5406d8fb8c8591bb3e22818fc0b Mon Sep 17 00:00:00 2001
From: Google AI Agent <gemini@hermes.local>
Date: Sun, 5 Apr 2026 17:59:21 +0000
Subject: [PATCH] Add/Update wolf/evaluator.py by Wolf

---
 wolf/evaluator.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 wolf/evaluator.py

diff --git a/wolf/evaluator.py b/wolf/evaluator.py
new file mode 100644
index 0000000..254418d
--- /dev/null
+++ b/wolf/evaluator.py
@@ -0,0 +1,83 @@
+import logging
+import json
+import re
+from typing import Dict, Any, List, Optional
+from .gitea import GiteaClient
+
+class Evaluator:
+    """
+    Evaluator for Wolf.
+    """
+    def __init__(self, gitea_client: GiteaClient):
+        self.gitea = gitea_client
+
+    def score_pr(self, owner, repo, pr_index):
+        """
+        Score a PR on multiple dimensions.
+        """
+        logging.info(f"Scoring PR {pr_index} in {owner}/{repo}")
+        
+        pr = self.gitea.get_pull_request(owner, repo, pr_index)
+        
+        # 1. CI Status (0-20 points)
+        ci_score = self._score_ci(owner, repo, pr_index)
+        
+        # 2. Commit Messages (0-10 points)
+        commit_score = self._score_commits(owner, repo, pr_index)
+        
+        # 3. Meaningful Code vs Boilerplate (0-30 points)
+        code_score = self._score_code_quality(owner, repo, pr_index)
+        
+        # 4. Code Functionality (0-30 points)
+        # This would ideally involve running tests, but for now we'll check for test files
+        functionality_score = self._score_functionality(owner, repo, pr_index)
+        
+        # 5. PR Description (0-10 points)
+        description_score = self._score_description(pr)
+        
+        total_score = ci_score + commit_score + code_score + functionality_score + description_score
+        
+        return {
+            "total_score": total_score,
+            "ci_score": ci_score,
+            "commit_score": commit_score,
+            "code_score": code_score,
+            "functionality_score": functionality_score,
+            "description_score": description_score
+        }
+
+    def _score_ci(self, owner, repo, pr_index):
+        try:
+            status = self.gitea.get_pr_status(owner, repo, pr_index)
+            if status.get("state") == "success":
+                return 20
+            elif status.get("state") == "pending":
+                return 10
+            else:
+                return 0
+        except:
+            return 0
+
+    def _score_commits(self, owner, repo, pr_index):
+        # Fetch commits and check their messages
+        # For simplicity, we'll assume a good message is > 10 chars and not generic
+        # This would require a new Gitea API call to get commits
+        return 8 # Placeholder
+
+    def _score_code_quality(self, owner, repo, pr_index):
+        # Check for meaningful code vs boilerplate
+        # This would require fetching the diff and analyzing it
+        return 25 # Placeholder
+
+    def _score_functionality(self, owner, repo, pr_index):
+        # Check if tests are included or if the code looks functional
+        return 20 # Placeholder
+
+    def _score_description(self, pr):
+        body = pr.get("body", "")
+        if len(body) > 100:
+            return 10
+        elif len(body) > 20:
+            return 5
+        else:
+            return 0