chore(lazarus): update registry after first watchdog run
This commit is contained in:
@@ -1,12 +1,7 @@
|
||||
# Lazarus Pit Registry — Single Source of Truth for Fleet Health and Resurrection
|
||||
# Version: 1.0.0
|
||||
# Owner: Bezalel (deployment), Ezra (compilation), Allegro (validation)
|
||||
|
||||
meta:
|
||||
version: "1.0.0"
|
||||
updated_at: "2026-04-07T02:55:00Z"
|
||||
next_review: "2026-04-14T02:55:00Z"
|
||||
|
||||
version: 1.0.0
|
||||
updated_at: '2026-04-07T15:09:53.386648+00:00'
|
||||
next_review: '2026-04-14T02:55:00Z'
|
||||
fleet:
|
||||
bezalel:
|
||||
role: forge-and-testbed wizard
|
||||
@@ -16,23 +11,22 @@ fleet:
|
||||
provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
fallback_chain:
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: big_brain
|
||||
model: gemma3:27b-instruct-q8_0
|
||||
timeout: 300
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: big_brain
|
||||
model: gemma3:27b-instruct-q8_0
|
||||
timeout: 300
|
||||
health_endpoints:
|
||||
gateway: "http://127.0.0.1:8646"
|
||||
api_server: "http://127.0.0.1:8656"
|
||||
gateway: http://127.0.0.1:8646
|
||||
api_server: http://127.0.0.1:8656
|
||||
auto_restart: true
|
||||
|
||||
allegro:
|
||||
role: code-craft wizard
|
||||
host: UNKNOWN
|
||||
@@ -41,22 +35,21 @@ fleet:
|
||||
provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
fallback_chain:
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: kimi-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 120
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
health_endpoints:
|
||||
gateway: "http://127.0.0.1:8645"
|
||||
gateway: http://127.0.0.1:8645
|
||||
auto_restart: true
|
||||
known_issues:
|
||||
- host_and_vps_unknown_to_fleet
|
||||
- config_needs_runtime_refresh
|
||||
|
||||
- host_and_vps_unknown_to_fleet
|
||||
- config_needs_runtime_refresh
|
||||
ezra:
|
||||
role: archivist-and-interpreter wizard
|
||||
host: UNKNOWN
|
||||
@@ -65,16 +58,15 @@ fleet:
|
||||
provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
fallback_chain:
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
auto_restart: true
|
||||
known_issues:
|
||||
- timeout_choking_on_long_operations
|
||||
|
||||
- timeout_choking_on_long_operations
|
||||
timmy:
|
||||
role: sovereign core
|
||||
host: UNKNOWN
|
||||
@@ -83,69 +75,63 @@ fleet:
|
||||
provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
fallback_chain:
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
timeout: 120
|
||||
auto_restart: true
|
||||
|
||||
provider_health_matrix:
|
||||
kimi-coding:
|
||||
status: degraded
|
||||
note: "kimi-for-coding returns 403 access-terminated; use kimi-k2.5 model only"
|
||||
last_checked: "2026-04-07T02:55:00Z"
|
||||
status: healthy
|
||||
note: ''
|
||||
last_checked: '2026-04-07T15:09:53.384900+00:00'
|
||||
rate_limited: false
|
||||
dead: false
|
||||
|
||||
anthropic:
|
||||
status: healthy
|
||||
last_checked: "2026-04-07T02:55:00Z"
|
||||
last_checked: '2026-04-07T15:09:53.385047+00:00'
|
||||
rate_limited: false
|
||||
dead: false
|
||||
|
||||
note: ''
|
||||
openrouter:
|
||||
status: healthy
|
||||
last_checked: "2026-04-07T02:55:00Z"
|
||||
last_checked: '2026-04-07T02:55:00Z'
|
||||
rate_limited: false
|
||||
dead: false
|
||||
|
||||
big_brain:
|
||||
status: provisioning
|
||||
note: "RunPod L40S instance big-brain-bezalel deployed; Ollama endpoint propagating"
|
||||
last_checked: "2026-04-07T02:55:00Z"
|
||||
endpoint: "http://yxw29g3excyddq-64411cd0-11434.tcp.runpod.net:11434/v1"
|
||||
note: RunPod L40S instance big-brain-bezalel deployed; Ollama endpoint propagating
|
||||
last_checked: '2026-04-07T02:55:00Z'
|
||||
endpoint: http://yxw29g3excyddq-64411cd0-11434.tcp.runpod.net:11434/v1
|
||||
rate_limited: false
|
||||
dead: false
|
||||
|
||||
timeout_policies:
|
||||
gateway:
|
||||
inactivity_timeout_seconds: 600
|
||||
diagnostic_on_timeout: true
|
||||
cron:
|
||||
inactivity_timeout_seconds: 0 # unlimited while active
|
||||
inactivity_timeout_seconds: 0
|
||||
agent:
|
||||
default_turn_timeout: 120
|
||||
long_operation_heartbeat: true
|
||||
|
||||
watchdog:
|
||||
enabled: true
|
||||
interval_seconds: 60
|
||||
actions:
|
||||
- ping_agent_gateways
|
||||
- probe_providers
|
||||
- parse_agent_logs
|
||||
- update_registry
|
||||
- auto_promote_fallbacks
|
||||
- auto_restart_dead_agents
|
||||
|
||||
- ping_agent_gateways
|
||||
- probe_providers
|
||||
- parse_agent_logs
|
||||
- update_registry
|
||||
- auto_promote_fallbacks
|
||||
- auto_restart_dead_agents
|
||||
resurrection_protocol:
|
||||
soft:
|
||||
- reload_config_from_registry
|
||||
- rewrite_fallback_providers
|
||||
- promote_first_healthy_fallback
|
||||
- reload_config_from_registry
|
||||
- rewrite_fallback_providers
|
||||
- promote_first_healthy_fallback
|
||||
hard:
|
||||
- systemctl_restart_gateway
|
||||
- log_incident
|
||||
- notify_sovereign
|
||||
- systemctl_restart_gateway
|
||||
- log_incident
|
||||
- notify_sovereign
|
||||
|
||||
Reference in New Issue
Block a user