Files
timmy-config/tests/deadman_switch/test_config_fallbacks.py
Timmy Agent 69a8bf87d6
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 27s
Smoke Test / smoke (pull_request) Failing after 25s
Validate Matrix Scaffold / validate-scaffold (pull_request) Failing after 22s
Validate Training Data / validate (pull_request) Successful in 21s
PR Checklist / pr-checklist (pull_request) Failing after 4m25s
Architecture Lint / Lint Repository (pull_request) Failing after 23s
feat: Add dead man switch emergency config fallbacks for Bezalel agent
Issue #423 — Dead Man Switch Config Fallbacks - Bezalel Agent
Autonomous Recovery System

Add complete emergency configuration template set for the dead man
switch fallback system. These files are deployed to
/root/wizards/bezalel/home/.hermes/ and enable autonomous recovery
when the primary inference provider (Kimi Coding) fails.

New files:
- wizards/bezalel/home/.hermes/config.emergency.yaml — Full minimal
  local-only configuration using Ollama models (gemma3:4b/12b/27b)
- wizards/bezalel/home/.hermes/.env.emergency — Emergency env var
  template (Telegram alerts, local service endpoints)
- wizards/bezalel/home/.hermes/health_status.json — Health status
  schema/example used by deadman-fallback.py
- wizards/bezalel/home/.hermes/deadman_switch.json — Dead man switch
  configuration (thresholds, fallback chain, recovery policy)
- wizards/bezalel/home/.hermes/DEADMAN_SWITCH_README.md — Complete
  documentation for operators (13 sections)

Tests:
- tests/deadman_switch/test_config_fallbacks.py — Validates presence,
  structure, required fields, consistency across files, and that
  emergency config uses local-only providers

Config:
- .gitignore: Allow .env.emergency template to be tracked (exception
  added for this specific file)

Smallest concrete fix: Add the actual config templates so the dead man
switch system can perform autonomous recovery. Infrastructure exists
(bin/deadman-fallback.py, ansible roles); these are the missing
deployable config files.

Closes #423
2026-04-26 14:08:55 -04:00

180 lines
7.1 KiB
Python

"""
Tests for Dead Man Switch emergency config files.
Validates that all required emergency config templates exist and have
syntactically valid YAML/JSON with required fields.
Run: pytest tests/deadman_switch/test_config_fallbacks.py -v
"""
from __future__ import annotations
import pytest
import yaml
import json
from pathlib import Path
# Base path for emergency config templates
REPO_ROOT = Path(__file__).parents[2] # tests/deadman_switch/.. => repo root
EMERGENCY_DIR = REPO_ROOT / 'wizards' / 'bezalel' / 'home' / '.hermes'
class TestEmergencyConfigPresence:
"""Ensure all emergency config files exist in the repo."""
def test_config_emergency_yaml_exists(self):
path = EMERGENCY_DIR / 'config.emergency.yaml'
assert path.exists(), f"Missing emergency config: {path}"
def test_env_emergency_exists(self):
path = EMERGENCY_DIR / '.env.emergency'
assert path.exists(), f"Missing emergency env: {path}"
def test_health_status_json_exists(self):
path = EMERGENCY_DIR / 'health_status.json'
assert path.exists(), f"Missing health status template: {path}"
def test_deadman_switch_json_exists(self):
path = EMERGENCY_DIR / 'deadman_switch.json'
assert path.exists(), f"Missing deadman switch config: {path}"
def test_readme_exists(self):
path = EMERGENCY_DIR / 'DEADMAN_SWITCH_README.md'
assert path.exists(), f"Missing README: {path}"
class TestEmergencyConfigValidity:
"""Validate structure and required fields of emergency configs."""
def test_config_emergency_yaml_parses(self):
path = EMERGENCY_DIR / 'config.emergency.yaml'
with open(path) as f:
cfg = yaml.safe_load(f)
assert isinstance(cfg, dict), "Config must be a YAML dict"
def test_config_emergency_has_required_sections(self):
path = EMERGENCY_DIR / 'config.emergency.yaml'
with open(path) as f:
cfg = yaml.safe_load(f)
required_top = ['model', 'agent', 'terminal', 'display', 'platforms']
for key in required_top:
assert key in cfg, f"Missing top-level key: {key}"
def test_config_emergency_uses_local_provider(self):
"""Emergency config must NOT depend on external APIs."""
path = EMERGENCY_DIR / 'config.emergency.yaml'
with open(path) as f:
cfg = yaml.safe_load(f)
model = cfg.get('model', {})
assert model.get('provider') == 'ollama', \
"Emergency config must use ollama provider, got: " + str(model.get('provider'))
# Ensure no API keys embedded
yaml_str = f.read() if hasattr(f, 'read') else open(path).read()
assert 'ANTHROPIC_API_KEY' not in yaml_str.upper()
assert 'KIMI_API_KEY' not in yaml_str.upper()
assert 'OPENROUTER_API_KEY' not in yaml_str.upper()
def test_config_emergency_has_fallback_chain(self):
path = EMERGENCY_DIR / 'config.emergency.yaml'
with open(path) as f:
cfg = yaml.safe_load(f)
fb = cfg.get('fallback_providers', [])
assert len(fb) >= 1, "Emergency config needs at least 1 fallback provider"
providers = [p.get('provider') for p in fb]
assert 'ollama' in providers, "Ollama must be in fallback chain"
def test_env_emergency_is_template(self):
"""The .env.emergency should be a template (commented keys)."""
path = EMERGENCY_DIR / '.env.emergency'
content = path.read_text()
# Should contain explanatory comments
assert '#' in content, ".env.emergency should document variables"
# Should NOT contain actual secret values
assert 'sk-ant-' not in content # Anthropic key prefix
assert 'sk-or-' not in content # OpenRouter key prefix
def test_health_status_json_parses(self):
path = EMERGENCY_DIR / 'health_status.json'
with open(path) as f:
health = json.load(f)
assert 'schema_version' in health
assert 'checks' in health
assert isinstance(health['checks'], dict)
def test_health_status_has_required_checks(self):
path = EMERGENCY_DIR / 'health_status.json'
with open(path) as f:
health = json.load(f)
required_checks = ['kimi-coding', 'ollama', 'gitea']
for check in required_checks:
assert check in health['checks'], f"Missing health check: {check}"
def test_deadman_switch_json_parses(self):
path = EMERGENCY_DIR / 'deadman_switch.json'
with open(path) as f:
dms = json.load(f)
assert 'deadman_switch' in dms
assert 'fallback_chain' in dms
def test_deadman_switch_has_thresholds(self):
path = EMERGENCY_DIR / 'deadman_switch.json'
with open(path) as f:
dms = json.load(f)
ds = dms['deadman_switch']
assert 'health_check_interval_seconds' in ds
assert 'heartbeat_timeout_seconds' in ds
assert ds['heartbeat_timeout_seconds'] >= 60, "Timeout must be at least 60s"
def test_deadman_switch_fallback_chain_is_ordered(self):
path = EMERGENCY_DIR / 'deadman_switch.json'
with open(path) as f:
dms = json.load(f)
chain = dms['fallback_chain']
priorities = [item['priority'] for item in chain]
assert priorities == sorted(priorities), "Fallback chain must be ordered by priority"
def test_readme_is_complete(self):
path = EMERGENCY_DIR / 'DEADMAN_SWITCH_README.md'
content = path.read_text()
required_sections = [
'Architecture',
'Deployment',
'How It Works',
'Configuration',
'Logs',
'Monitoring',
'Failure Scenarios',
'Recovery',
'Troubleshooting',
]
for section in required_sections:
assert section in content, f"README missing section: {section}"
class TestEmergencyConfigConsistency:
"""Cross-file consistency checks."""
def test_emergency_config_model_in_fallback_chain(self):
"""The default model in emergency config should be first fallback."""
with open(EMERGENCY_DIR / 'config.emergency.yaml') as f:
cfg = yaml.safe_load(f)
with open(EMERGENCY_DIR / 'deadman_switch.json') as f:
dms = json.load(f)
default_model = cfg['model']['default']
first_fallback = dms['fallback_chain'][0]['model']
# They should match (emergency config is the fallback config)
assert default_model == first_fallback, \
f"Emergency default model ({default_model}) should match first fallback ({first_fallback})"
def test_health_status_schema_matches_deadman_config(self):
"""health_status.json should include deadman switch config fields."""
with open(EMERGENCY_DIR / 'health_status.json') as f:
health = json.load(f)
with open(EMERGENCY_DIR / 'deadman_switch.json') as f:
dms = json.load(f)
# health_status embeds deadman_switch config
assert 'deadman_switch' in health
for key in ['enabled', 'heartbeat_timeout_seconds', 'max_restart_attempts']:
assert key in health['deadman_switch']