This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/tests/unit/test_stress_detector.py
kimi 919a011cae feat: adapt token rewards based on system stress signals (#714)
Implements adaptive token rewards that respond to system stress:

- StressDetector module (timmy/stress_detector.py):
  - Monitors 4 stress signals: flaky test rate, P1 backlog growth,
    CI failure rate, open bug count
  - Calculates weighted stress score (0-1) and determines mode:
    calm (<0.3), elevated (0.3-0.6), high (>0.6)
  - Applies quest-specific multipliers based on current mode

- Configuration (config/stress_modes.yaml):
  - Thresholds for mode transitions
  - Signal weights and thresholds
  - Multipliers per mode (e.g., test_improve: 1.5x in high stress)

- Quest system integration:
  - Rewards now include stress bonus/penalty in notification
  - Quest status API includes adjusted_reward and multiplier
  - Agent can see current stress mode and why rewards changed

- API endpoints:
  - GET /quests/api/stress - current stress mode and signals
  - POST /quests/api/stress/refresh - force refresh stress detection

Fixes #714
2026-03-21 17:26:40 -04:00

295 lines
11 KiB
Python

"""Unit tests for the stress detector module.
Tests stress signal calculation, mode detection, multipliers,
and integration with the quest system.
"""
from __future__ import annotations
import pytest
from timmy.stress_detector import (
StressMode,
StressSignal,
StressSnapshot,
StressThresholds,
_calculate_stress_score,
_get_multipliers_for_mode,
apply_multiplier,
get_default_config,
reset_stress_state,
)
@pytest.fixture(autouse=True)
def clean_stress_state():
"""Reset stress state between tests."""
reset_stress_state()
yield
reset_stress_state()
# ── Stress Mode Tests ──────────────────────────────────────────────────────
class TestStressMode:
def test_stress_mode_values(self):
"""StressMode enum has expected values."""
assert StressMode.CALM.value == "calm"
assert StressMode.ELEVATED.value == "elevated"
assert StressMode.HIGH.value == "high"
# ── Stress Signal Tests ────────────────────────────────────────────────────
class TestStressSignal:
def test_signal_not_triggered(self):
"""Signal with value below threshold is not triggered."""
signal = StressSignal(
name="test_signal",
value=5.0,
threshold=10.0,
weight=0.5,
)
assert not signal.is_triggered
assert signal.contribution == 0.0
def test_signal_triggered(self):
"""Signal with value at threshold is triggered."""
signal = StressSignal(
name="test_signal",
value=10.0,
threshold=10.0,
weight=0.5,
)
assert signal.is_triggered
assert signal.contribution == 0.5 # weight * min(1, value/threshold)
def test_signal_contribution_capped(self):
"""Signal contribution is capped at weight when value >> threshold."""
signal = StressSignal(
name="test_signal",
value=100.0,
threshold=10.0,
weight=0.5,
)
assert signal.is_triggered
assert signal.contribution == 0.5 # Capped at weight
def test_signal_partial_contribution(self):
"""Signal contribution scales with value/threshold ratio."""
signal = StressSignal(
name="test_signal",
value=15.0,
threshold=10.0,
weight=0.5,
)
assert signal.is_triggered
# contribution = min(1, 15/10) * 0.5 = 0.5 (capped)
assert signal.contribution == 0.5
# ── Stress Thresholds Tests ────────────────────────────────────────────────
class TestStressThresholds:
def test_calm_mode(self):
"""Score below elevated_min returns CALM mode."""
thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
assert thresholds.get_mode_for_score(0.0) == StressMode.CALM
assert thresholds.get_mode_for_score(0.1) == StressMode.CALM
assert thresholds.get_mode_for_score(0.29) == StressMode.CALM
def test_elevated_mode(self):
"""Score between elevated_min and high_min returns ELEVATED mode."""
thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
assert thresholds.get_mode_for_score(0.3) == StressMode.ELEVATED
assert thresholds.get_mode_for_score(0.5) == StressMode.ELEVATED
assert thresholds.get_mode_for_score(0.59) == StressMode.ELEVATED
def test_high_mode(self):
"""Score at or above high_min returns HIGH mode."""
thresholds = StressThresholds(elevated_min=0.3, high_min=0.6)
assert thresholds.get_mode_for_score(0.6) == StressMode.HIGH
assert thresholds.get_mode_for_score(0.8) == StressMode.HIGH
assert thresholds.get_mode_for_score(1.0) == StressMode.HIGH
# ── Stress Score Calculation Tests ─────────────────────────────────────────
class TestStressScoreCalculation:
def test_empty_signals(self):
"""Empty signal list returns zero stress score."""
score = _calculate_stress_score([])
assert score == 0.0
def test_no_triggered_signals(self):
"""No triggered signals means zero stress score."""
signals = [
StressSignal(name="s1", value=1.0, threshold=10.0, weight=0.5),
StressSignal(name="s2", value=2.0, threshold=10.0, weight=0.5),
]
score = _calculate_stress_score(signals)
assert score == 0.0
def test_single_triggered_signal(self):
"""Single triggered signal contributes its weight."""
signals = [
StressSignal(name="s1", value=10.0, threshold=10.0, weight=0.5),
]
score = _calculate_stress_score(signals)
# contribution = 0.5, total_weight = 0.5, score = 0.5/0.5 = 1.0
assert score == 1.0
def test_mixed_signals(self):
"""Mix of triggered and non-triggered signals."""
signals = [
StressSignal(name="s1", value=10.0, threshold=10.0, weight=0.3),
StressSignal(name="s2", value=1.0, threshold=10.0, weight=0.3),
StressSignal(name="s3", value=10.0, threshold=10.0, weight=0.4),
]
score = _calculate_stress_score(signals)
# triggered contributions: 0.3 + 0.4 = 0.7
# total_weight: 0.3 + 0.3 + 0.4 = 1.0
# score = 0.7 / 1.0 = 0.7
assert score == 0.7
def test_score_capped_at_one(self):
"""Stress score is capped at 1.0."""
signals = [
StressSignal(name="s1", value=100.0, threshold=10.0, weight=1.0),
StressSignal(name="s2", value=100.0, threshold=10.0, weight=1.0),
]
score = _calculate_stress_score(signals)
assert score == 1.0 # Capped
# ── Multiplier Tests ───────────────────────────────────────────────────────
class TestMultipliers:
def test_default_config_structure(self):
"""Default config has expected structure."""
config = get_default_config()
assert "thresholds" in config
assert "signals" in config
assert "multipliers" in config
def test_calm_mode_multipliers(self):
"""Calm mode has expected multipliers."""
multipliers = _get_multipliers_for_mode(StressMode.CALM)
assert multipliers["test_improve"] == 1.0
assert multipliers["docs_update"] == 1.2
assert multipliers["exploration"] == 1.3
assert multipliers["refactor"] == 1.2
def test_elevated_mode_multipliers(self):
"""Elevated mode has expected multipliers."""
multipliers = _get_multipliers_for_mode(StressMode.ELEVATED)
assert multipliers["test_improve"] == 1.2
assert multipliers["issue_reduce"] == 1.1
assert multipliers["refactor"] == 0.9
def test_high_mode_multipliers(self):
"""High stress mode has expected multipliers."""
multipliers = _get_multipliers_for_mode(StressMode.HIGH)
assert multipliers["test_improve"] == 1.5
assert multipliers["issue_reduce"] == 1.4
assert multipliers["exploration"] == 0.7
assert multipliers["refactor"] == 0.6
def test_multiplier_fallback_for_unknown_type(self):
"""Unknown quest types return default multiplier of 1.0."""
multipliers = _get_multipliers_for_mode(StressMode.CALM)
assert multipliers.get("unknown_type", 1.0) == 1.0
# ── Apply Multiplier Tests ─────────────────────────────────────────────────
class TestApplyMultiplier:
def test_apply_multiplier_calm(self):
"""Multiplier applies correctly in calm mode."""
# This test uses get_multiplier which reads from current stress mode
# Since we can't easily mock the stress mode, we test the apply_multiplier logic
base = 100
# In calm mode with test_improve = 1.0
result = apply_multiplier(base, "unknown_type")
assert result >= 1 # At least 1 token
def test_apply_multiplier_minimum_one(self):
"""Applied reward is at least 1 token."""
# Even with very low multiplier, result should be >= 1
result = apply_multiplier(1, "any_type")
assert result >= 1
# ── Stress Snapshot Tests ──────────────────────────────────────────────────
class TestStressSnapshot:
def test_snapshot_to_dict(self):
"""Snapshot can be converted to dictionary."""
signals = [
StressSignal(name="test", value=10.0, threshold=5.0, weight=0.5),
]
snapshot = StressSnapshot(
mode=StressMode.ELEVATED,
score=0.5,
signals=signals,
multipliers={"test_improve": 1.2},
)
data = snapshot.to_dict()
assert data["mode"] == "elevated"
assert data["score"] == 0.5
assert len(data["signals"]) == 1
assert data["multipliers"]["test_improve"] == 1.2
# ── Integration Tests ──────────────────────────────────────────────────────
class TestStressDetectorIntegration:
def test_reset_stress_state(self):
"""Reset clears internal state."""
# Just verify reset doesn't error
reset_stress_state()
def test_default_config_contains_all_signals(self):
"""Default config defines all expected signals."""
config = get_default_config()
signals = config["signals"]
expected_signals = [
"flaky_test_rate",
"p1_backlog_growth",
"ci_failure_rate",
"open_bug_count",
]
for signal in expected_signals:
assert signal in signals
assert "threshold" in signals[signal]
assert "weight" in signals[signal]
def test_default_config_contains_all_modes(self):
"""Default config defines all stress modes."""
config = get_default_config()
multipliers = config["multipliers"]
assert "calm" in multipliers
assert "elevated" in multipliers
assert "high" in multipliers
def test_multiplier_weights_sum_approximately_one(self):
"""Signal weights should approximately sum to 1.0."""
config = get_default_config()
signals = config["signals"]
total_weight = sum(s["weight"] for s in signals.values())
# Allow some flexibility but should be close to 1.0
assert 0.9 <= total_weight <= 1.1