feat: adapt token rewards based on system stress signals (#714)

Implements adaptive token rewards that respond to system stress: - StressDetector module (timmy/stress_detector.py): - Monitors 4 stress signals: flaky test rate, P1 backlog growth, CI failure rate, open bug count - Calculates weighted stress score (0-1) and determines mode: calm (<0.3), elevated (0.3-0.6), high (>0.6) - Applies quest-specific multipliers based on current mode - Configuration (config/stress_modes.yaml): - Thresholds for mode transitions - Signal weights and thresholds - Multipliers per mode (e.g., test_improve: 1.5x in high stress) - Quest system integration: - Rewards now include stress bonus/penalty in notification - Quest status API includes adjusted_reward and multiplier - Agent can see current stress mode and why rewards changed - API endpoints: - GET /quests/api/stress - current stress mode and signals - POST /quests/api/stress/refresh - force refresh stress detection Fixes #714
2026-03-21 17:26:40 -04:00
parent a95cf806c8
commit 919a011cae
5 changed files with 1081 additions and 3 deletions
--- a/tests/unit/test_stress_detector.py
+++ b/tests/unit/test_stress_detector.py
@@ -0,0 +1,294 @@
+"""Unit tests for the stress detector module.
+
+Tests stress signal calculation, mode detection, multipliers,
+and integration with the quest system.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from timmy.stress_detector import (
+    StressMode,
+    StressSignal,
+    StressSnapshot,
+    StressThresholds,
+    _calculate_stress_score,
+    _get_multipliers_for_mode,
+    apply_multiplier,
+    get_default_config,
+    reset_stress_state,
+)
+
+
+@pytest.fixture(autouse=True)
+def clean_stress_state():
+    """Reset stress state between tests."""
+    reset_stress_state()
+    yield
+    reset_stress_state()
+
+
+# ── Stress Mode Tests ──────────────────────────────────────────────────────
+
+
+class TestStressMode:
+    def test_stress_mode_values(self):
+        """StressMode enum has expected values."""
+        assert StressMode.CALM.value == "calm"
+        assert StressMode.ELEVATED.value == "elevated"
+        assert StressMode.HIGH.value == "high"
+
+
+# ── Stress Signal Tests ────────────────────────────────────────────────────
+
+
+class TestStressSignal:
+    def test_signal_not_triggered(self):
+        """Signal with value below threshold is not triggered."""
+        signal = StressSignal(
+            name="test_signal",
+            value=5.0,
+            threshold=10.0,
+            weight=0.5,
+        )
+        assert not signal.is_triggered
+        assert signal.contribution == 0.0
+
+    def test_signal_triggered(self):
+        """Signal with value at threshold is triggered."""
+        signal = StressSignal(
+            name="test_signal",
+            value=10.0,
+            threshold=10.0,
+            weight=0.5,
+        )
+        assert signal.is_triggered
+        assert signal.contribution == 0.5  # weight * min(1, value/threshold)
+
+    def test_signal_contribution_capped(self):
+        """Signal contribution is capped at weight when value >> threshold."""
+        signal = StressSignal(
+            name="test_signal",
+            value=100.0,
+            threshold=10.0,
+            weight=0.5,
+        )
+        assert signal.is_triggered
+        assert signal.contribution == 0.5  # Capped at weight
+
+    def test_signal_partial_contribution(self):
+        """Signal contribution scales with value/threshold ratio."""
+        signal = StressSignal(
+            name="test_signal",
+            value=15.0,
+            threshold=10.0,
+            weight=0.5,
+        )
+        assert signal.is_triggered
+        # contribution = min(1, 15/10) * 0.5 = 0.5 (capped)
+        assert signal.contribution == 0.5
+
+
+# ── Stress Thresholds Tests ────────────────────────────────────────────────
+
+
+class TestStressThresholds:
+    def test_calm_mode(self):
+        """Score below elevated_min returns CALM mode."""
+        thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
+        assert thresholds.get_mode_for_score(0.0) == StressMode.CALM
+        assert thresholds.get_mode_for_score(0.1) == StressMode.CALM
+        assert thresholds.get_mode_for_score(0.29) == StressMode.CALM
+
+    def test_elevated_mode(self):
+        """Score between elevated_min and high_min returns ELEVATED mode."""
+        thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
+        assert thresholds.get_mode_for_score(0.3) == StressMode.ELEVATED
+        assert thresholds.get_mode_for_score(0.5) == StressMode.ELEVATED
+        assert thresholds.get_mode_for_score(0.59) == StressMode.ELEVATED
+
+    def test_high_mode(self):
+        """Score at or above high_min returns HIGH mode."""
+        thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
+        assert thresholds.get_mode_for_score(0.6) == StressMode.HIGH
+        assert thresholds.get_mode_for_score(0.8) == StressMode.HIGH
+        assert thresholds.get_mode_for_score(1.0) == StressMode.HIGH
+
+
+# ── Stress Score Calculation Tests ─────────────────────────────────────────
+
+
+class TestStressScoreCalculation:
+    def test_empty_signals(self):
+        """Empty signal list returns zero stress score."""
+        score = _calculate_stress_score([])
+        assert score == 0.0
+
+    def test_no_triggered_signals(self):
+        """No triggered signals means zero stress score."""
+        signals = [
+            StressSignal(name="s1", value=1.0, threshold=10.0, weight=0.5),
+            StressSignal(name="s2", value=2.0, threshold=10.0, weight=0.5),
+        ]
+        score = _calculate_stress_score(signals)
+        assert score == 0.0
+
+    def test_single_triggered_signal(self):
+        """Single triggered signal contributes its weight."""
+        signals = [
+            StressSignal(name="s1", value=10.0, threshold=10.0, weight=0.5),
+        ]
+        score = _calculate_stress_score(signals)
+        # contribution = 0.5, total_weight = 0.5, score = 0.5/0.5 = 1.0
+        assert score == 1.0
+
+    def test_mixed_signals(self):
+        """Mix of triggered and non-triggered signals."""
+        signals = [
+            StressSignal(name="s1", value=10.0, threshold=10.0, weight=0.3),
+            StressSignal(name="s2", value=1.0, threshold=10.0, weight=0.3),
+            StressSignal(name="s3", value=10.0, threshold=10.0, weight=0.4),
+        ]
+        score = _calculate_stress_score(signals)
+        # triggered contributions: 0.3 + 0.4 = 0.7
+        # total_weight: 0.3 + 0.3 + 0.4 = 1.0
+        # score = 0.7 / 1.0 = 0.7
+        assert score == 0.7
+
+    def test_score_capped_at_one(self):
+        """Stress score is capped at 1.0."""
+        signals = [
+            StressSignal(name="s1", value=100.0, threshold=10.0, weight=1.0),
+            StressSignal(name="s2", value=100.0, threshold=10.0, weight=1.0),
+        ]
+        score = _calculate_stress_score(signals)
+        assert score == 1.0  # Capped
+
+
+# ── Multiplier Tests ───────────────────────────────────────────────────────
+
+
+class TestMultipliers:
+    def test_default_config_structure(self):
+        """Default config has expected structure."""
+        config = get_default_config()
+        assert "thresholds" in config
+        assert "signals" in config
+        assert "multipliers" in config
+
+    def test_calm_mode_multipliers(self):
+        """Calm mode has expected multipliers."""
+        multipliers = _get_multipliers_for_mode(StressMode.CALM)
+        assert multipliers["test_improve"] == 1.0
+        assert multipliers["docs_update"] == 1.2
+        assert multipliers["exploration"] == 1.3
+        assert multipliers["refactor"] == 1.2
+
+    def test_elevated_mode_multipliers(self):
+        """Elevated mode has expected multipliers."""
+        multipliers = _get_multipliers_for_mode(StressMode.ELEVATED)
+        assert multipliers["test_improve"] == 1.2
+        assert multipliers["issue_reduce"] == 1.1
+        assert multipliers["refactor"] == 0.9
+
+    def test_high_mode_multipliers(self):
+        """High stress mode has expected multipliers."""
+        multipliers = _get_multipliers_for_mode(StressMode.HIGH)
+        assert multipliers["test_improve"] == 1.5
+        assert multipliers["issue_reduce"] == 1.4
+        assert multipliers["exploration"] == 0.7
+        assert multipliers["refactor"] == 0.6
+
+    def test_multiplier_fallback_for_unknown_type(self):
+        """Unknown quest types return default multiplier of 1.0."""
+        multipliers = _get_multipliers_for_mode(StressMode.CALM)
+        assert multipliers.get("unknown_type", 1.0) == 1.0
+
+
+# ── Apply Multiplier Tests ─────────────────────────────────────────────────
+
+
+class TestApplyMultiplier:
+    def test_apply_multiplier_calm(self):
+        """Multiplier applies correctly in calm mode."""
+        # This test uses get_multiplier which reads from current stress mode
+        # Since we can't easily mock the stress mode, we test the apply_multiplier logic
+        base = 100
+        # In calm mode with test_improve = 1.0
+        result = apply_multiplier(base, "unknown_type")
+        assert result >= 1  # At least 1 token
+
+    def test_apply_multiplier_minimum_one(self):
+        """Applied reward is at least 1 token."""
+        # Even with very low multiplier, result should be >= 1
+        result = apply_multiplier(1, "any_type")
+        assert result >= 1
+
+
+# ── Stress Snapshot Tests ──────────────────────────────────────────────────
+
+
+class TestStressSnapshot:
+    def test_snapshot_to_dict(self):
+        """Snapshot can be converted to dictionary."""
+        signals = [
+            StressSignal(name="test", value=10.0, threshold=5.0, weight=0.5),
+        ]
+        snapshot = StressSnapshot(
+            mode=StressMode.ELEVATED,
+            score=0.5,
+            signals=signals,
+            multipliers={"test_improve": 1.2},
+        )
+
+        data = snapshot.to_dict()
+        assert data["mode"] == "elevated"
+        assert data["score"] == 0.5
+        assert len(data["signals"]) == 1
+        assert data["multipliers"]["test_improve"] == 1.2
+
+
+# ── Integration Tests ──────────────────────────────────────────────────────
+
+
+class TestStressDetectorIntegration:
+    def test_reset_stress_state(self):
+        """Reset clears internal state."""
+        # Just verify reset doesn't error
+        reset_stress_state()
+
+    def test_default_config_contains_all_signals(self):
+        """Default config defines all expected signals."""
+        config = get_default_config()
+        signals = config["signals"]
+
+        expected_signals = [
+            "flaky_test_rate",
+            "p1_backlog_growth",
+            "ci_failure_rate",
+            "open_bug_count",
+        ]
+
+        for signal in expected_signals:
+            assert signal in signals
+            assert "threshold" in signals[signal]
+            assert "weight" in signals[signal]
+
+    def test_default_config_contains_all_modes(self):
+        """Default config defines all stress modes."""
+        config = get_default_config()
+        multipliers = config["multipliers"]
+
+        assert "calm" in multipliers
+        assert "elevated" in multipliers
+        assert "high" in multipliers
+
+    def test_multiplier_weights_sum_approximately_one(self):
+        """Signal weights should approximately sum to 1.0."""
+        config = get_default_config()
+        signals = config["signals"]
+
+        total_weight = sum(s["weight"] for s in signals.values())
+        # Allow some flexibility but should be close to 1.0
+        assert 0.9 <= total_weight <= 1.1