This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/tests/swarm/test_reward_scoring.py
Claude 211c54bc8c feat: add custom weights, model registry, per-agent models, and reward scoring
Inspired by OpenClaw-RL's multi-model orchestration, this adds four
features for custom model management:

1. Custom model registry (infrastructure/models/registry.py) — SQLite-backed
   registry for GGUF, safetensors, HF checkpoint, and Ollama models with
   role-based lookups (general, reward, teacher, judge).

2. Per-agent model assignment — each swarm persona can use a different model
   instead of sharing the global default. Resolved via registry assignment >
   persona default > global default.

3. Runtime model management API (/api/v1/models) — REST endpoints to register,
   list, assign, enable/disable, and remove custom models without restart.
   Includes a dashboard page at /models.

4. Reward model scoring (PRM-style) — majority-vote quality evaluation of
   agent outputs using a configurable reward model. Scores persist in SQLite
   and feed into the swarm learner.

New config settings: custom_weights_dir, reward_model_enabled,
reward_model_name, reward_model_votes.

54 new tests covering registry CRUD, API endpoints, agent assignments,
role lookups, and reward scoring.

https://claude.ai/code/session_01V4iTozMwcE2gjfnCJdCugC
2026-02-27 01:27:53 +00:00

198 lines
6.9 KiB
Python

"""Tests for reward model scoring in the swarm learner."""
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from swarm.learner import (
RewardScore,
get_reward_scores,
score_output,
)
@pytest.fixture(autouse=True)
def _isolate_db(tmp_path):
"""Point the learner at a temporary database."""
db = tmp_path / "learner_test.db"
with patch("swarm.learner.DB_PATH", db):
yield
class TestScoreOutput:
"""Test the score_output function."""
def test_returns_none_when_disabled(self):
with patch("swarm.learner._settings") as mock_s:
mock_s.reward_model_enabled = False
result = score_output("task-1", "agent-1", "do X", "done X")
assert result is None
def test_returns_none_when_no_model(self):
with patch("swarm.learner._settings") as mock_s:
mock_s.reward_model_enabled = True
mock_s.reward_model_name = ""
with patch(
"infrastructure.models.registry.model_registry"
) as mock_reg:
mock_reg.get_reward_model.return_value = None
result = score_output("task-1", "agent-1", "do X", "done X")
assert result is None
def test_positive_scoring(self):
"""All votes return GOOD → score = 1.0."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"response": "GOOD"}
with patch("swarm.learner._settings") as mock_s:
mock_s.reward_model_enabled = True
mock_s.reward_model_name = "test-model"
mock_s.reward_model_votes = 3
mock_s.ollama_url = "http://localhost:11434"
with patch("requests.post", return_value=mock_response):
result = score_output("task-1", "agent-1", "do X", "done X")
assert result is not None
assert result.score == 1.0
assert result.positive_votes == 3
assert result.negative_votes == 0
assert result.total_votes == 3
assert result.model_used == "test-model"
def test_negative_scoring(self):
"""All votes return BAD → score = -1.0."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"response": "BAD"}
with patch("swarm.learner._settings") as mock_s:
mock_s.reward_model_enabled = True
mock_s.reward_model_name = "test-model"
mock_s.reward_model_votes = 3
mock_s.ollama_url = "http://localhost:11434"
with patch("requests.post", return_value=mock_response):
result = score_output("task-1", "agent-1", "do X", "bad output")
assert result is not None
assert result.score == -1.0
assert result.negative_votes == 3
def test_mixed_scoring(self):
"""2 GOOD + 1 BAD → score ≈ 0.33."""
responses = []
for text in ["GOOD", "GOOD", "BAD"]:
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = {"response": text}
responses.append(resp)
with patch("swarm.learner._settings") as mock_s:
mock_s.reward_model_enabled = True
mock_s.reward_model_name = "test-model"
mock_s.reward_model_votes = 3
mock_s.ollama_url = "http://localhost:11434"
with patch("requests.post", side_effect=responses):
result = score_output("task-1", "agent-1", "do X", "ok output")
assert result is not None
assert abs(result.score - (1 / 3)) < 0.01
assert result.positive_votes == 2
assert result.negative_votes == 1
def test_uses_registry_reward_model(self):
"""Falls back to registry reward model when setting is empty."""
mock_model = MagicMock()
mock_model.path = "registry-reward-model"
mock_model.format = MagicMock()
mock_model.format.value = "ollama"
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"response": "GOOD"}
with patch("swarm.learner._settings") as mock_s:
mock_s.reward_model_enabled = True
mock_s.reward_model_name = ""
mock_s.reward_model_votes = 1
mock_s.ollama_url = "http://localhost:11434"
with patch(
"infrastructure.models.registry.model_registry"
) as mock_reg:
mock_reg.get_reward_model.return_value = mock_model
with patch("requests.post", return_value=mock_response):
result = score_output("task-1", "agent-1", "do X", "ok")
assert result is not None
assert result.model_used == "registry-reward-model"
class TestGetRewardScores:
"""Test retrieving historical reward scores."""
def test_empty_history(self):
scores = get_reward_scores()
assert scores == []
def test_scores_persisted(self):
"""Scores from score_output are retrievable."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"response": "GOOD"}
with patch("swarm.learner._settings") as mock_s:
mock_s.reward_model_enabled = True
mock_s.reward_model_name = "test-model"
mock_s.reward_model_votes = 1
mock_s.ollama_url = "http://localhost:11434"
with patch("requests.post", return_value=mock_response):
score_output("task-1", "agent-1", "do X", "done X")
scores = get_reward_scores()
assert len(scores) == 1
assert scores[0]["task_id"] == "task-1"
assert scores[0]["agent_id"] == "agent-1"
assert scores[0]["score"] == 1.0
def test_filter_by_agent(self):
"""Filter scores by agent_id."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"response": "GOOD"}
with patch("swarm.learner._settings") as mock_s:
mock_s.reward_model_enabled = True
mock_s.reward_model_name = "test-model"
mock_s.reward_model_votes = 1
mock_s.ollama_url = "http://localhost:11434"
with patch("requests.post", return_value=mock_response):
score_output("task-1", "agent-1", "task A", "output A")
score_output("task-2", "agent-2", "task B", "output B")
agent1_scores = get_reward_scores(agent_id="agent-1")
assert len(agent1_scores) == 1
assert agent1_scores[0]["agent_id"] == "agent-1"
class TestRewardScoreDataclass:
"""Test RewardScore construction."""
def test_create_score(self):
score = RewardScore(
score=0.5,
positive_votes=3,
negative_votes=1,
total_votes=4,
model_used="test-model",
)
assert score.score == 0.5
assert score.total_votes == 4