chore(lazarus): update registry after first watchdog run
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Staging Verification Gate / verify-staging (push) Has been cancelled

This commit is contained in:
2026-04-07 15:10:34 +00:00
parent b0654bac6c
commit 30fe98d569

View File

@@ -1,12 +1,7 @@
# Lazarus Pit Registry — Single Source of Truth for Fleet Health and Resurrection
# Version: 1.0.0
# Owner: Bezalel (deployment), Ezra (compilation), Allegro (validation)
meta:
version: "1.0.0"
updated_at: "2026-04-07T02:55:00Z"
next_review: "2026-04-14T02:55:00Z"
version: 1.0.0
updated_at: '2026-04-07T15:09:53.386648+00:00'
next_review: '2026-04-14T02:55:00Z'
fleet:
bezalel:
role: forge-and-testbed wizard
@@ -16,23 +11,22 @@ fleet:
provider: kimi-coding
model: kimi-k2.5
fallback_chain:
- provider: kimi-coding
model: kimi-k2.5
timeout: 120
- provider: anthropic
model: claude-sonnet-4-20250514
timeout: 120
- provider: openrouter
model: anthropic/claude-sonnet-4-20250514
timeout: 120
- provider: big_brain
model: gemma3:27b-instruct-q8_0
timeout: 300
- provider: kimi-coding
model: kimi-k2.5
timeout: 120
- provider: anthropic
model: claude-sonnet-4-20250514
timeout: 120
- provider: openrouter
model: anthropic/claude-sonnet-4-20250514
timeout: 120
- provider: big_brain
model: gemma3:27b-instruct-q8_0
timeout: 300
health_endpoints:
gateway: "http://127.0.0.1:8646"
api_server: "http://127.0.0.1:8656"
gateway: http://127.0.0.1:8646
api_server: http://127.0.0.1:8656
auto_restart: true
allegro:
role: code-craft wizard
host: UNKNOWN
@@ -41,22 +35,21 @@ fleet:
provider: kimi-coding
model: kimi-k2.5
fallback_chain:
- provider: kimi-coding
model: kimi-k2.5
timeout: 120
- provider: anthropic
model: claude-sonnet-4-20250514
timeout: 120
- provider: openrouter
model: anthropic/claude-sonnet-4-20250514
timeout: 120
- provider: kimi-coding
model: kimi-k2.5
timeout: 120
- provider: anthropic
model: claude-sonnet-4-20250514
timeout: 120
- provider: openrouter
model: anthropic/claude-sonnet-4-20250514
timeout: 120
health_endpoints:
gateway: "http://127.0.0.1:8645"
gateway: http://127.0.0.1:8645
auto_restart: true
known_issues:
- host_and_vps_unknown_to_fleet
- config_needs_runtime_refresh
- host_and_vps_unknown_to_fleet
- config_needs_runtime_refresh
ezra:
role: archivist-and-interpreter wizard
host: UNKNOWN
@@ -65,16 +58,15 @@ fleet:
provider: anthropic
model: claude-sonnet-4-20250514
fallback_chain:
- provider: anthropic
model: claude-sonnet-4-20250514
timeout: 120
- provider: openrouter
model: anthropic/claude-sonnet-4-20250514
timeout: 120
- provider: anthropic
model: claude-sonnet-4-20250514
timeout: 120
- provider: openrouter
model: anthropic/claude-sonnet-4-20250514
timeout: 120
auto_restart: true
known_issues:
- timeout_choking_on_long_operations
- timeout_choking_on_long_operations
timmy:
role: sovereign core
host: UNKNOWN
@@ -83,69 +75,63 @@ fleet:
provider: anthropic
model: claude-sonnet-4-20250514
fallback_chain:
- provider: anthropic
model: claude-sonnet-4-20250514
timeout: 120
- provider: openrouter
model: anthropic/claude-sonnet-4-20250514
timeout: 120
- provider: anthropic
model: claude-sonnet-4-20250514
timeout: 120
- provider: openrouter
model: anthropic/claude-sonnet-4-20250514
timeout: 120
auto_restart: true
provider_health_matrix:
kimi-coding:
status: degraded
note: "kimi-for-coding returns 403 access-terminated; use kimi-k2.5 model only"
last_checked: "2026-04-07T02:55:00Z"
status: healthy
note: ''
last_checked: '2026-04-07T15:09:53.384900+00:00'
rate_limited: false
dead: false
anthropic:
status: healthy
last_checked: "2026-04-07T02:55:00Z"
last_checked: '2026-04-07T15:09:53.385047+00:00'
rate_limited: false
dead: false
note: ''
openrouter:
status: healthy
last_checked: "2026-04-07T02:55:00Z"
last_checked: '2026-04-07T02:55:00Z'
rate_limited: false
dead: false
big_brain:
status: provisioning
note: "RunPod L40S instance big-brain-bezalel deployed; Ollama endpoint propagating"
last_checked: "2026-04-07T02:55:00Z"
endpoint: "http://yxw29g3excyddq-64411cd0-11434.tcp.runpod.net:11434/v1"
note: RunPod L40S instance big-brain-bezalel deployed; Ollama endpoint propagating
last_checked: '2026-04-07T02:55:00Z'
endpoint: http://yxw29g3excyddq-64411cd0-11434.tcp.runpod.net:11434/v1
rate_limited: false
dead: false
timeout_policies:
gateway:
inactivity_timeout_seconds: 600
diagnostic_on_timeout: true
cron:
inactivity_timeout_seconds: 0 # unlimited while active
inactivity_timeout_seconds: 0
agent:
default_turn_timeout: 120
long_operation_heartbeat: true
watchdog:
enabled: true
interval_seconds: 60
actions:
- ping_agent_gateways
- probe_providers
- parse_agent_logs
- update_registry
- auto_promote_fallbacks
- auto_restart_dead_agents
- ping_agent_gateways
- probe_providers
- parse_agent_logs
- update_registry
- auto_promote_fallbacks
- auto_restart_dead_agents
resurrection_protocol:
soft:
- reload_config_from_registry
- rewrite_fallback_providers
- promote_first_healthy_fallback
- reload_config_from_registry
- rewrite_fallback_providers
- promote_first_healthy_fallback
hard:
- systemctl_restart_gateway
- log_incident
- notify_sovereign
- systemctl_restart_gateway
- log_incident
- notify_sovereign