forked from Rockachopa/Timmy-time-dashboard
Implements adaptive token rewards that respond to system stress:
- StressDetector module (timmy/stress_detector.py):
- Monitors 4 stress signals: flaky test rate, P1 backlog growth,
CI failure rate, open bug count
- Calculates weighted stress score (0-1) and determines mode:
calm (<0.3), elevated (0.3-0.6), high (>0.6)
- Applies quest-specific multipliers based on current mode
- Configuration (config/stress_modes.yaml):
- Thresholds for mode transitions
- Signal weights and thresholds
- Multipliers per mode (e.g., test_improve: 1.5x in high stress)
- Quest system integration:
- Rewards now include stress bonus/penalty in notification
- Quest status API includes adjusted_reward and multiplier
- Agent can see current stress mode and why rewards changed
- API endpoints:
- GET /quests/api/stress - current stress mode and signals
- POST /quests/api/stress/refresh - force refresh stress detection
Fixes #714
295 lines
11 KiB
Python
295 lines
11 KiB
Python
"""Unit tests for the stress detector module.
|
|
|
|
Tests stress signal calculation, mode detection, multipliers,
|
|
and integration with the quest system.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from timmy.stress_detector import (
|
|
StressMode,
|
|
StressSignal,
|
|
StressSnapshot,
|
|
StressThresholds,
|
|
_calculate_stress_score,
|
|
_get_multipliers_for_mode,
|
|
apply_multiplier,
|
|
get_default_config,
|
|
reset_stress_state,
|
|
)
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def clean_stress_state():
|
|
"""Reset stress state between tests."""
|
|
reset_stress_state()
|
|
yield
|
|
reset_stress_state()
|
|
|
|
|
|
# ── Stress Mode Tests ──────────────────────────────────────────────────────
|
|
|
|
|
|
class TestStressMode:
|
|
def test_stress_mode_values(self):
|
|
"""StressMode enum has expected values."""
|
|
assert StressMode.CALM.value == "calm"
|
|
assert StressMode.ELEVATED.value == "elevated"
|
|
assert StressMode.HIGH.value == "high"
|
|
|
|
|
|
# ── Stress Signal Tests ────────────────────────────────────────────────────
|
|
|
|
|
|
class TestStressSignal:
|
|
def test_signal_not_triggered(self):
|
|
"""Signal with value below threshold is not triggered."""
|
|
signal = StressSignal(
|
|
name="test_signal",
|
|
value=5.0,
|
|
threshold=10.0,
|
|
weight=0.5,
|
|
)
|
|
assert not signal.is_triggered
|
|
assert signal.contribution == 0.0
|
|
|
|
def test_signal_triggered(self):
|
|
"""Signal with value at threshold is triggered."""
|
|
signal = StressSignal(
|
|
name="test_signal",
|
|
value=10.0,
|
|
threshold=10.0,
|
|
weight=0.5,
|
|
)
|
|
assert signal.is_triggered
|
|
assert signal.contribution == 0.5 # weight * min(1, value/threshold)
|
|
|
|
def test_signal_contribution_capped(self):
|
|
"""Signal contribution is capped at weight when value >> threshold."""
|
|
signal = StressSignal(
|
|
name="test_signal",
|
|
value=100.0,
|
|
threshold=10.0,
|
|
weight=0.5,
|
|
)
|
|
assert signal.is_triggered
|
|
assert signal.contribution == 0.5 # Capped at weight
|
|
|
|
def test_signal_partial_contribution(self):
|
|
"""Signal contribution scales with value/threshold ratio."""
|
|
signal = StressSignal(
|
|
name="test_signal",
|
|
value=15.0,
|
|
threshold=10.0,
|
|
weight=0.5,
|
|
)
|
|
assert signal.is_triggered
|
|
# contribution = min(1, 15/10) * 0.5 = 0.5 (capped)
|
|
assert signal.contribution == 0.5
|
|
|
|
|
|
# ── Stress Thresholds Tests ────────────────────────────────────────────────
|
|
|
|
|
|
class TestStressThresholds:
|
|
def test_calm_mode(self):
|
|
"""Score below elevated_min returns CALM mode."""
|
|
thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
|
|
assert thresholds.get_mode_for_score(0.0) == StressMode.CALM
|
|
assert thresholds.get_mode_for_score(0.1) == StressMode.CALM
|
|
assert thresholds.get_mode_for_score(0.29) == StressMode.CALM
|
|
|
|
def test_elevated_mode(self):
|
|
"""Score between elevated_min and high_min returns ELEVATED mode."""
|
|
thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
|
|
assert thresholds.get_mode_for_score(0.3) == StressMode.ELEVATED
|
|
assert thresholds.get_mode_for_score(0.5) == StressMode.ELEVATED
|
|
assert thresholds.get_mode_for_score(0.59) == StressMode.ELEVATED
|
|
|
|
def test_high_mode(self):
|
|
"""Score at or above high_min returns HIGH mode."""
|
|
thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
|
|
assert thresholds.get_mode_for_score(0.6) == StressMode.HIGH
|
|
assert thresholds.get_mode_for_score(0.8) == StressMode.HIGH
|
|
assert thresholds.get_mode_for_score(1.0) == StressMode.HIGH
|
|
|
|
|
|
# ── Stress Score Calculation Tests ─────────────────────────────────────────
|
|
|
|
|
|
class TestStressScoreCalculation:
|
|
def test_empty_signals(self):
|
|
"""Empty signal list returns zero stress score."""
|
|
score = _calculate_stress_score([])
|
|
assert score == 0.0
|
|
|
|
def test_no_triggered_signals(self):
|
|
"""No triggered signals means zero stress score."""
|
|
signals = [
|
|
StressSignal(name="s1", value=1.0, threshold=10.0, weight=0.5),
|
|
StressSignal(name="s2", value=2.0, threshold=10.0, weight=0.5),
|
|
]
|
|
score = _calculate_stress_score(signals)
|
|
assert score == 0.0
|
|
|
|
def test_single_triggered_signal(self):
|
|
"""Single triggered signal contributes its weight."""
|
|
signals = [
|
|
StressSignal(name="s1", value=10.0, threshold=10.0, weight=0.5),
|
|
]
|
|
score = _calculate_stress_score(signals)
|
|
# contribution = 0.5, total_weight = 0.5, score = 0.5/0.5 = 1.0
|
|
assert score == 1.0
|
|
|
|
def test_mixed_signals(self):
|
|
"""Mix of triggered and non-triggered signals."""
|
|
signals = [
|
|
StressSignal(name="s1", value=10.0, threshold=10.0, weight=0.3),
|
|
StressSignal(name="s2", value=1.0, threshold=10.0, weight=0.3),
|
|
StressSignal(name="s3", value=10.0, threshold=10.0, weight=0.4),
|
|
]
|
|
score = _calculate_stress_score(signals)
|
|
# triggered contributions: 0.3 + 0.4 = 0.7
|
|
# total_weight: 0.3 + 0.3 + 0.4 = 1.0
|
|
# score = 0.7 / 1.0 = 0.7
|
|
assert score == 0.7
|
|
|
|
def test_score_capped_at_one(self):
|
|
"""Stress score is capped at 1.0."""
|
|
signals = [
|
|
StressSignal(name="s1", value=100.0, threshold=10.0, weight=1.0),
|
|
StressSignal(name="s2", value=100.0, threshold=10.0, weight=1.0),
|
|
]
|
|
score = _calculate_stress_score(signals)
|
|
assert score == 1.0 # Capped
|
|
|
|
|
|
# ── Multiplier Tests ───────────────────────────────────────────────────────
|
|
|
|
|
|
class TestMultipliers:
|
|
def test_default_config_structure(self):
|
|
"""Default config has expected structure."""
|
|
config = get_default_config()
|
|
assert "thresholds" in config
|
|
assert "signals" in config
|
|
assert "multipliers" in config
|
|
|
|
def test_calm_mode_multipliers(self):
|
|
"""Calm mode has expected multipliers."""
|
|
multipliers = _get_multipliers_for_mode(StressMode.CALM)
|
|
assert multipliers["test_improve"] == 1.0
|
|
assert multipliers["docs_update"] == 1.2
|
|
assert multipliers["exploration"] == 1.3
|
|
assert multipliers["refactor"] == 1.2
|
|
|
|
def test_elevated_mode_multipliers(self):
|
|
"""Elevated mode has expected multipliers."""
|
|
multipliers = _get_multipliers_for_mode(StressMode.ELEVATED)
|
|
assert multipliers["test_improve"] == 1.2
|
|
assert multipliers["issue_reduce"] == 1.1
|
|
assert multipliers["refactor"] == 0.9
|
|
|
|
def test_high_mode_multipliers(self):
|
|
"""High stress mode has expected multipliers."""
|
|
multipliers = _get_multipliers_for_mode(StressMode.HIGH)
|
|
assert multipliers["test_improve"] == 1.5
|
|
assert multipliers["issue_reduce"] == 1.4
|
|
assert multipliers["exploration"] == 0.7
|
|
assert multipliers["refactor"] == 0.6
|
|
|
|
def test_multiplier_fallback_for_unknown_type(self):
|
|
"""Unknown quest types return default multiplier of 1.0."""
|
|
multipliers = _get_multipliers_for_mode(StressMode.CALM)
|
|
assert multipliers.get("unknown_type", 1.0) == 1.0
|
|
|
|
|
|
# ── Apply Multiplier Tests ─────────────────────────────────────────────────
|
|
|
|
|
|
class TestApplyMultiplier:
|
|
def test_apply_multiplier_calm(self):
|
|
"""Multiplier applies correctly in calm mode."""
|
|
# This test uses get_multiplier which reads from current stress mode
|
|
# Since we can't easily mock the stress mode, we test the apply_multiplier logic
|
|
base = 100
|
|
# In calm mode with test_improve = 1.0
|
|
result = apply_multiplier(base, "unknown_type")
|
|
assert result >= 1 # At least 1 token
|
|
|
|
def test_apply_multiplier_minimum_one(self):
|
|
"""Applied reward is at least 1 token."""
|
|
# Even with very low multiplier, result should be >= 1
|
|
result = apply_multiplier(1, "any_type")
|
|
assert result >= 1
|
|
|
|
|
|
# ── Stress Snapshot Tests ──────────────────────────────────────────────────
|
|
|
|
|
|
class TestStressSnapshot:
|
|
def test_snapshot_to_dict(self):
|
|
"""Snapshot can be converted to dictionary."""
|
|
signals = [
|
|
StressSignal(name="test", value=10.0, threshold=5.0, weight=0.5),
|
|
]
|
|
snapshot = StressSnapshot(
|
|
mode=StressMode.ELEVATED,
|
|
score=0.5,
|
|
signals=signals,
|
|
multipliers={"test_improve": 1.2},
|
|
)
|
|
|
|
data = snapshot.to_dict()
|
|
assert data["mode"] == "elevated"
|
|
assert data["score"] == 0.5
|
|
assert len(data["signals"]) == 1
|
|
assert data["multipliers"]["test_improve"] == 1.2
|
|
|
|
|
|
# ── Integration Tests ──────────────────────────────────────────────────────
|
|
|
|
|
|
class TestStressDetectorIntegration:
|
|
def test_reset_stress_state(self):
|
|
"""Reset clears internal state."""
|
|
# Just verify reset doesn't error
|
|
reset_stress_state()
|
|
|
|
def test_default_config_contains_all_signals(self):
|
|
"""Default config defines all expected signals."""
|
|
config = get_default_config()
|
|
signals = config["signals"]
|
|
|
|
expected_signals = [
|
|
"flaky_test_rate",
|
|
"p1_backlog_growth",
|
|
"ci_failure_rate",
|
|
"open_bug_count",
|
|
]
|
|
|
|
for signal in expected_signals:
|
|
assert signal in signals
|
|
assert "threshold" in signals[signal]
|
|
assert "weight" in signals[signal]
|
|
|
|
def test_default_config_contains_all_modes(self):
|
|
"""Default config defines all stress modes."""
|
|
config = get_default_config()
|
|
multipliers = config["multipliers"]
|
|
|
|
assert "calm" in multipliers
|
|
assert "elevated" in multipliers
|
|
assert "high" in multipliers
|
|
|
|
def test_multiplier_weights_sum_approximately_one(self):
|
|
"""Signal weights should approximately sum to 1.0."""
|
|
config = get_default_config()
|
|
signals = config["signals"]
|
|
|
|
total_weight = sum(s["weight"] for s in signals.values())
|
|
# Allow some flexibility but should be close to 1.0
|
|
assert 0.9 <= total_weight <= 1.1
|