Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 27s
Smoke Test / smoke (pull_request) Failing after 25s
Validate Matrix Scaffold / validate-scaffold (pull_request) Failing after 22s
Validate Training Data / validate (pull_request) Successful in 21s
PR Checklist / pr-checklist (pull_request) Failing after 4m25s
Architecture Lint / Lint Repository (pull_request) Failing after 23s
Issue #423 — Dead Man Switch Config Fallbacks - Bezalel Agent Autonomous Recovery System Add complete emergency configuration template set for the dead man switch fallback system. These files are deployed to /root/wizards/bezalel/home/.hermes/ and enable autonomous recovery when the primary inference provider (Kimi Coding) fails. New files: - wizards/bezalel/home/.hermes/config.emergency.yaml — Full minimal local-only configuration using Ollama models (gemma3:4b/12b/27b) - wizards/bezalel/home/.hermes/.env.emergency — Emergency env var template (Telegram alerts, local service endpoints) - wizards/bezalel/home/.hermes/health_status.json — Health status schema/example used by deadman-fallback.py - wizards/bezalel/home/.hermes/deadman_switch.json — Dead man switch configuration (thresholds, fallback chain, recovery policy) - wizards/bezalel/home/.hermes/DEADMAN_SWITCH_README.md — Complete documentation for operators (13 sections) Tests: - tests/deadman_switch/test_config_fallbacks.py — Validates presence, structure, required fields, consistency across files, and that emergency config uses local-only providers Config: - .gitignore: Allow .env.emergency template to be tracked (exception added for this specific file) Smallest concrete fix: Add the actual config templates so the dead man switch system can perform autonomous recovery. Infrastructure exists (bin/deadman-fallback.py, ansible roles); these are the missing deployable config files. Closes #423
180 lines
7.1 KiB
Python
180 lines
7.1 KiB
Python
"""
|
|
Tests for Dead Man Switch emergency config files.
|
|
|
|
Validates that all required emergency config templates exist and have
|
|
syntactically valid YAML/JSON with required fields.
|
|
|
|
Run: pytest tests/deadman_switch/test_config_fallbacks.py -v
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
import yaml
|
|
import json
|
|
from pathlib import Path
|
|
|
|
|
|
# Base path for emergency config templates
|
|
REPO_ROOT = Path(__file__).parents[2] # tests/deadman_switch/.. => repo root
|
|
EMERGENCY_DIR = REPO_ROOT / 'wizards' / 'bezalel' / 'home' / '.hermes'
|
|
|
|
|
|
class TestEmergencyConfigPresence:
|
|
"""Ensure all emergency config files exist in the repo."""
|
|
|
|
def test_config_emergency_yaml_exists(self):
|
|
path = EMERGENCY_DIR / 'config.emergency.yaml'
|
|
assert path.exists(), f"Missing emergency config: {path}"
|
|
|
|
def test_env_emergency_exists(self):
|
|
path = EMERGENCY_DIR / '.env.emergency'
|
|
assert path.exists(), f"Missing emergency env: {path}"
|
|
|
|
def test_health_status_json_exists(self):
|
|
path = EMERGENCY_DIR / 'health_status.json'
|
|
assert path.exists(), f"Missing health status template: {path}"
|
|
|
|
def test_deadman_switch_json_exists(self):
|
|
path = EMERGENCY_DIR / 'deadman_switch.json'
|
|
assert path.exists(), f"Missing deadman switch config: {path}"
|
|
|
|
def test_readme_exists(self):
|
|
path = EMERGENCY_DIR / 'DEADMAN_SWITCH_README.md'
|
|
assert path.exists(), f"Missing README: {path}"
|
|
|
|
|
|
class TestEmergencyConfigValidity:
|
|
"""Validate structure and required fields of emergency configs."""
|
|
|
|
def test_config_emergency_yaml_parses(self):
|
|
path = EMERGENCY_DIR / 'config.emergency.yaml'
|
|
with open(path) as f:
|
|
cfg = yaml.safe_load(f)
|
|
assert isinstance(cfg, dict), "Config must be a YAML dict"
|
|
|
|
def test_config_emergency_has_required_sections(self):
|
|
path = EMERGENCY_DIR / 'config.emergency.yaml'
|
|
with open(path) as f:
|
|
cfg = yaml.safe_load(f)
|
|
required_top = ['model', 'agent', 'terminal', 'display', 'platforms']
|
|
for key in required_top:
|
|
assert key in cfg, f"Missing top-level key: {key}"
|
|
|
|
def test_config_emergency_uses_local_provider(self):
|
|
"""Emergency config must NOT depend on external APIs."""
|
|
path = EMERGENCY_DIR / 'config.emergency.yaml'
|
|
with open(path) as f:
|
|
cfg = yaml.safe_load(f)
|
|
model = cfg.get('model', {})
|
|
assert model.get('provider') == 'ollama', \
|
|
"Emergency config must use ollama provider, got: " + str(model.get('provider'))
|
|
# Ensure no API keys embedded
|
|
yaml_str = f.read() if hasattr(f, 'read') else open(path).read()
|
|
assert 'ANTHROPIC_API_KEY' not in yaml_str.upper()
|
|
assert 'KIMI_API_KEY' not in yaml_str.upper()
|
|
assert 'OPENROUTER_API_KEY' not in yaml_str.upper()
|
|
|
|
def test_config_emergency_has_fallback_chain(self):
|
|
path = EMERGENCY_DIR / 'config.emergency.yaml'
|
|
with open(path) as f:
|
|
cfg = yaml.safe_load(f)
|
|
fb = cfg.get('fallback_providers', [])
|
|
assert len(fb) >= 1, "Emergency config needs at least 1 fallback provider"
|
|
providers = [p.get('provider') for p in fb]
|
|
assert 'ollama' in providers, "Ollama must be in fallback chain"
|
|
|
|
def test_env_emergency_is_template(self):
|
|
"""The .env.emergency should be a template (commented keys)."""
|
|
path = EMERGENCY_DIR / '.env.emergency'
|
|
content = path.read_text()
|
|
# Should contain explanatory comments
|
|
assert '#' in content, ".env.emergency should document variables"
|
|
# Should NOT contain actual secret values
|
|
assert 'sk-ant-' not in content # Anthropic key prefix
|
|
assert 'sk-or-' not in content # OpenRouter key prefix
|
|
|
|
def test_health_status_json_parses(self):
|
|
path = EMERGENCY_DIR / 'health_status.json'
|
|
with open(path) as f:
|
|
health = json.load(f)
|
|
assert 'schema_version' in health
|
|
assert 'checks' in health
|
|
assert isinstance(health['checks'], dict)
|
|
|
|
def test_health_status_has_required_checks(self):
|
|
path = EMERGENCY_DIR / 'health_status.json'
|
|
with open(path) as f:
|
|
health = json.load(f)
|
|
required_checks = ['kimi-coding', 'ollama', 'gitea']
|
|
for check in required_checks:
|
|
assert check in health['checks'], f"Missing health check: {check}"
|
|
|
|
def test_deadman_switch_json_parses(self):
|
|
path = EMERGENCY_DIR / 'deadman_switch.json'
|
|
with open(path) as f:
|
|
dms = json.load(f)
|
|
assert 'deadman_switch' in dms
|
|
assert 'fallback_chain' in dms
|
|
|
|
def test_deadman_switch_has_thresholds(self):
|
|
path = EMERGENCY_DIR / 'deadman_switch.json'
|
|
with open(path) as f:
|
|
dms = json.load(f)
|
|
ds = dms['deadman_switch']
|
|
assert 'health_check_interval_seconds' in ds
|
|
assert 'heartbeat_timeout_seconds' in ds
|
|
assert ds['heartbeat_timeout_seconds'] >= 60, "Timeout must be at least 60s"
|
|
|
|
def test_deadman_switch_fallback_chain_is_ordered(self):
|
|
path = EMERGENCY_DIR / 'deadman_switch.json'
|
|
with open(path) as f:
|
|
dms = json.load(f)
|
|
chain = dms['fallback_chain']
|
|
priorities = [item['priority'] for item in chain]
|
|
assert priorities == sorted(priorities), "Fallback chain must be ordered by priority"
|
|
|
|
def test_readme_is_complete(self):
|
|
path = EMERGENCY_DIR / 'DEADMAN_SWITCH_README.md'
|
|
content = path.read_text()
|
|
required_sections = [
|
|
'Architecture',
|
|
'Deployment',
|
|
'How It Works',
|
|
'Configuration',
|
|
'Logs',
|
|
'Monitoring',
|
|
'Failure Scenarios',
|
|
'Recovery',
|
|
'Troubleshooting',
|
|
]
|
|
for section in required_sections:
|
|
assert section in content, f"README missing section: {section}"
|
|
|
|
|
|
class TestEmergencyConfigConsistency:
|
|
"""Cross-file consistency checks."""
|
|
|
|
def test_emergency_config_model_in_fallback_chain(self):
|
|
"""The default model in emergency config should be first fallback."""
|
|
with open(EMERGENCY_DIR / 'config.emergency.yaml') as f:
|
|
cfg = yaml.safe_load(f)
|
|
with open(EMERGENCY_DIR / 'deadman_switch.json') as f:
|
|
dms = json.load(f)
|
|
default_model = cfg['model']['default']
|
|
first_fallback = dms['fallback_chain'][0]['model']
|
|
# They should match (emergency config is the fallback config)
|
|
assert default_model == first_fallback, \
|
|
f"Emergency default model ({default_model}) should match first fallback ({first_fallback})"
|
|
|
|
def test_health_status_schema_matches_deadman_config(self):
|
|
"""health_status.json should include deadman switch config fields."""
|
|
with open(EMERGENCY_DIR / 'health_status.json') as f:
|
|
health = json.load(f)
|
|
with open(EMERGENCY_DIR / 'deadman_switch.json') as f:
|
|
dms = json.load(f)
|
|
# health_status embeds deadman_switch config
|
|
assert 'deadman_switch' in health
|
|
for key in ['enabled', 'heartbeat_timeout_seconds', 'max_restart_attempts']:
|
|
assert key in health['deadman_switch']
|