forked from Rockachopa/Timmy-time-dashboard
feat: adapt token rewards based on system stress signals (#714)
Implements adaptive token rewards that respond to system stress:
- StressDetector module (timmy/stress_detector.py):
- Monitors 4 stress signals: flaky test rate, P1 backlog growth,
CI failure rate, open bug count
- Calculates weighted stress score (0-1) and determines mode:
calm (<0.3), elevated (0.3-0.6), high (>0.6)
- Applies quest-specific multipliers based on current mode
- Configuration (config/stress_modes.yaml):
- Thresholds for mode transitions
- Signal weights and thresholds
- Multipliers per mode (e.g., test_improve: 1.5x in high stress)
- Quest system integration:
- Rewards now include stress bonus/penalty in notification
- Quest status API includes adjusted_reward and multiplier
- Agent can see current stress mode and why rewards changed
- API endpoints:
- GET /quests/api/stress - current stress mode and signals
- POST /quests/api/stress/refresh - force refresh stress detection
Fixes #714
This commit is contained in:
294
tests/unit/test_stress_detector.py
Normal file
294
tests/unit/test_stress_detector.py
Normal file
@@ -0,0 +1,294 @@
|
||||
"""Unit tests for the stress detector module.
|
||||
|
||||
Tests stress signal calculation, mode detection, multipliers,
|
||||
and integration with the quest system.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from timmy.stress_detector import (
|
||||
StressMode,
|
||||
StressSignal,
|
||||
StressSnapshot,
|
||||
StressThresholds,
|
||||
_calculate_stress_score,
|
||||
_get_multipliers_for_mode,
|
||||
apply_multiplier,
|
||||
get_default_config,
|
||||
reset_stress_state,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_stress_state():
|
||||
"""Reset stress state between tests."""
|
||||
reset_stress_state()
|
||||
yield
|
||||
reset_stress_state()
|
||||
|
||||
|
||||
# ── Stress Mode Tests ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestStressMode:
|
||||
def test_stress_mode_values(self):
|
||||
"""StressMode enum has expected values."""
|
||||
assert StressMode.CALM.value == "calm"
|
||||
assert StressMode.ELEVATED.value == "elevated"
|
||||
assert StressMode.HIGH.value == "high"
|
||||
|
||||
|
||||
# ── Stress Signal Tests ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestStressSignal:
|
||||
def test_signal_not_triggered(self):
|
||||
"""Signal with value below threshold is not triggered."""
|
||||
signal = StressSignal(
|
||||
name="test_signal",
|
||||
value=5.0,
|
||||
threshold=10.0,
|
||||
weight=0.5,
|
||||
)
|
||||
assert not signal.is_triggered
|
||||
assert signal.contribution == 0.0
|
||||
|
||||
def test_signal_triggered(self):
|
||||
"""Signal with value at threshold is triggered."""
|
||||
signal = StressSignal(
|
||||
name="test_signal",
|
||||
value=10.0,
|
||||
threshold=10.0,
|
||||
weight=0.5,
|
||||
)
|
||||
assert signal.is_triggered
|
||||
assert signal.contribution == 0.5 # weight * min(1, value/threshold)
|
||||
|
||||
def test_signal_contribution_capped(self):
|
||||
"""Signal contribution is capped at weight when value >> threshold."""
|
||||
signal = StressSignal(
|
||||
name="test_signal",
|
||||
value=100.0,
|
||||
threshold=10.0,
|
||||
weight=0.5,
|
||||
)
|
||||
assert signal.is_triggered
|
||||
assert signal.contribution == 0.5 # Capped at weight
|
||||
|
||||
def test_signal_partial_contribution(self):
|
||||
"""Signal contribution scales with value/threshold ratio."""
|
||||
signal = StressSignal(
|
||||
name="test_signal",
|
||||
value=15.0,
|
||||
threshold=10.0,
|
||||
weight=0.5,
|
||||
)
|
||||
assert signal.is_triggered
|
||||
# contribution = min(1, 15/10) * 0.5 = 0.5 (capped)
|
||||
assert signal.contribution == 0.5
|
||||
|
||||
|
||||
# ── Stress Thresholds Tests ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestStressThresholds:
|
||||
def test_calm_mode(self):
|
||||
"""Score below elevated_min returns CALM mode."""
|
||||
thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
|
||||
assert thresholds.get_mode_for_score(0.0) == StressMode.CALM
|
||||
assert thresholds.get_mode_for_score(0.1) == StressMode.CALM
|
||||
assert thresholds.get_mode_for_score(0.29) == StressMode.CALM
|
||||
|
||||
def test_elevated_mode(self):
|
||||
"""Score between elevated_min and high_min returns ELEVATED mode."""
|
||||
thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
|
||||
assert thresholds.get_mode_for_score(0.3) == StressMode.ELEVATED
|
||||
assert thresholds.get_mode_for_score(0.5) == StressMode.ELEVATED
|
||||
assert thresholds.get_mode_for_score(0.59) == StressMode.ELEVATED
|
||||
|
||||
def test_high_mode(self):
|
||||
"""Score at or above high_min returns HIGH mode."""
|
||||
thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
|
||||
assert thresholds.get_mode_for_score(0.6) == StressMode.HIGH
|
||||
assert thresholds.get_mode_for_score(0.8) == StressMode.HIGH
|
||||
assert thresholds.get_mode_for_score(1.0) == StressMode.HIGH
|
||||
|
||||
|
||||
# ── Stress Score Calculation Tests ─────────────────────────────────────────
|
||||
|
||||
|
||||
class TestStressScoreCalculation:
|
||||
def test_empty_signals(self):
|
||||
"""Empty signal list returns zero stress score."""
|
||||
score = _calculate_stress_score([])
|
||||
assert score == 0.0
|
||||
|
||||
def test_no_triggered_signals(self):
|
||||
"""No triggered signals means zero stress score."""
|
||||
signals = [
|
||||
StressSignal(name="s1", value=1.0, threshold=10.0, weight=0.5),
|
||||
StressSignal(name="s2", value=2.0, threshold=10.0, weight=0.5),
|
||||
]
|
||||
score = _calculate_stress_score(signals)
|
||||
assert score == 0.0
|
||||
|
||||
def test_single_triggered_signal(self):
|
||||
"""Single triggered signal contributes its weight."""
|
||||
signals = [
|
||||
StressSignal(name="s1", value=10.0, threshold=10.0, weight=0.5),
|
||||
]
|
||||
score = _calculate_stress_score(signals)
|
||||
# contribution = 0.5, total_weight = 0.5, score = 0.5/0.5 = 1.0
|
||||
assert score == 1.0
|
||||
|
||||
def test_mixed_signals(self):
|
||||
"""Mix of triggered and non-triggered signals."""
|
||||
signals = [
|
||||
StressSignal(name="s1", value=10.0, threshold=10.0, weight=0.3),
|
||||
StressSignal(name="s2", value=1.0, threshold=10.0, weight=0.3),
|
||||
StressSignal(name="s3", value=10.0, threshold=10.0, weight=0.4),
|
||||
]
|
||||
score = _calculate_stress_score(signals)
|
||||
# triggered contributions: 0.3 + 0.4 = 0.7
|
||||
# total_weight: 0.3 + 0.3 + 0.4 = 1.0
|
||||
# score = 0.7 / 1.0 = 0.7
|
||||
assert score == 0.7
|
||||
|
||||
def test_score_capped_at_one(self):
|
||||
"""Stress score is capped at 1.0."""
|
||||
signals = [
|
||||
StressSignal(name="s1", value=100.0, threshold=10.0, weight=1.0),
|
||||
StressSignal(name="s2", value=100.0, threshold=10.0, weight=1.0),
|
||||
]
|
||||
score = _calculate_stress_score(signals)
|
||||
assert score == 1.0 # Capped
|
||||
|
||||
|
||||
# ── Multiplier Tests ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestMultipliers:
|
||||
def test_default_config_structure(self):
|
||||
"""Default config has expected structure."""
|
||||
config = get_default_config()
|
||||
assert "thresholds" in config
|
||||
assert "signals" in config
|
||||
assert "multipliers" in config
|
||||
|
||||
def test_calm_mode_multipliers(self):
|
||||
"""Calm mode has expected multipliers."""
|
||||
multipliers = _get_multipliers_for_mode(StressMode.CALM)
|
||||
assert multipliers["test_improve"] == 1.0
|
||||
assert multipliers["docs_update"] == 1.2
|
||||
assert multipliers["exploration"] == 1.3
|
||||
assert multipliers["refactor"] == 1.2
|
||||
|
||||
def test_elevated_mode_multipliers(self):
|
||||
"""Elevated mode has expected multipliers."""
|
||||
multipliers = _get_multipliers_for_mode(StressMode.ELEVATED)
|
||||
assert multipliers["test_improve"] == 1.2
|
||||
assert multipliers["issue_reduce"] == 1.1
|
||||
assert multipliers["refactor"] == 0.9
|
||||
|
||||
def test_high_mode_multipliers(self):
|
||||
"""High stress mode has expected multipliers."""
|
||||
multipliers = _get_multipliers_for_mode(StressMode.HIGH)
|
||||
assert multipliers["test_improve"] == 1.5
|
||||
assert multipliers["issue_reduce"] == 1.4
|
||||
assert multipliers["exploration"] == 0.7
|
||||
assert multipliers["refactor"] == 0.6
|
||||
|
||||
def test_multiplier_fallback_for_unknown_type(self):
|
||||
"""Unknown quest types return default multiplier of 1.0."""
|
||||
multipliers = _get_multipliers_for_mode(StressMode.CALM)
|
||||
assert multipliers.get("unknown_type", 1.0) == 1.0
|
||||
|
||||
|
||||
# ── Apply Multiplier Tests ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestApplyMultiplier:
|
||||
def test_apply_multiplier_calm(self):
|
||||
"""Multiplier applies correctly in calm mode."""
|
||||
# This test uses get_multiplier which reads from current stress mode
|
||||
# Since we can't easily mock the stress mode, we test the apply_multiplier logic
|
||||
base = 100
|
||||
# In calm mode with test_improve = 1.0
|
||||
result = apply_multiplier(base, "unknown_type")
|
||||
assert result >= 1 # At least 1 token
|
||||
|
||||
def test_apply_multiplier_minimum_one(self):
|
||||
"""Applied reward is at least 1 token."""
|
||||
# Even with very low multiplier, result should be >= 1
|
||||
result = apply_multiplier(1, "any_type")
|
||||
assert result >= 1
|
||||
|
||||
|
||||
# ── Stress Snapshot Tests ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestStressSnapshot:
|
||||
def test_snapshot_to_dict(self):
|
||||
"""Snapshot can be converted to dictionary."""
|
||||
signals = [
|
||||
StressSignal(name="test", value=10.0, threshold=5.0, weight=0.5),
|
||||
]
|
||||
snapshot = StressSnapshot(
|
||||
mode=StressMode.ELEVATED,
|
||||
score=0.5,
|
||||
signals=signals,
|
||||
multipliers={"test_improve": 1.2},
|
||||
)
|
||||
|
||||
data = snapshot.to_dict()
|
||||
assert data["mode"] == "elevated"
|
||||
assert data["score"] == 0.5
|
||||
assert len(data["signals"]) == 1
|
||||
assert data["multipliers"]["test_improve"] == 1.2
|
||||
|
||||
|
||||
# ── Integration Tests ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestStressDetectorIntegration:
|
||||
def test_reset_stress_state(self):
|
||||
"""Reset clears internal state."""
|
||||
# Just verify reset doesn't error
|
||||
reset_stress_state()
|
||||
|
||||
def test_default_config_contains_all_signals(self):
|
||||
"""Default config defines all expected signals."""
|
||||
config = get_default_config()
|
||||
signals = config["signals"]
|
||||
|
||||
expected_signals = [
|
||||
"flaky_test_rate",
|
||||
"p1_backlog_growth",
|
||||
"ci_failure_rate",
|
||||
"open_bug_count",
|
||||
]
|
||||
|
||||
for signal in expected_signals:
|
||||
assert signal in signals
|
||||
assert "threshold" in signals[signal]
|
||||
assert "weight" in signals[signal]
|
||||
|
||||
def test_default_config_contains_all_modes(self):
|
||||
"""Default config defines all stress modes."""
|
||||
config = get_default_config()
|
||||
multipliers = config["multipliers"]
|
||||
|
||||
assert "calm" in multipliers
|
||||
assert "elevated" in multipliers
|
||||
assert "high" in multipliers
|
||||
|
||||
def test_multiplier_weights_sum_approximately_one(self):
|
||||
"""Signal weights should approximately sum to 1.0."""
|
||||
config = get_default_config()
|
||||
signals = config["signals"]
|
||||
|
||||
total_weight = sum(s["weight"] for s in signals.values())
|
||||
# Allow some flexibility but should be close to 1.0
|
||||
assert 0.9 <= total_weight <= 1.1
|
||||
Reference in New Issue
Block a user