Some checks failed
PR Checklist / pr-checklist (pull_request) Failing after 1m27s
Implements the Ansible Infrastructure as Code story from KT 2026-04-08. One canonical Ansible playbook defines: - Deadman switch (snapshot good config on health, rollback+restart on death) - Golden state config deployment (Anthropic BANNED, Kimi→Gemini→Ollama) - Cron schedule (source-controlled, no manual crontab edits) - Agent startup sequence (pull→validate→start→verify) - request_log telemetry table (every inference call logged) - Thin config pattern (immutable local pointer to upstream) - Gitea webhook handler (deploy on merge) - Config validator (rejects banned providers) Fleet inventory: Timmy (Mac), Allegro (VPS), Bezalel (VPS), Ezra (VPS) Roles: wizard_base, golden_state, deadman_switch, request_log, cron_manager Addresses: timmy-config #442, #443, #444, #445, #446 References: KT Final 2026-04-08 P2, KT Bezalel 2026-04-08 #1-#5
75 lines
2.9 KiB
YAML
75 lines
2.9 KiB
YAML
# =============================================================================
|
|
# Wizard Group Variables — Golden State Configuration
|
|
# =============================================================================
|
|
# These variables are applied to ALL wizards in the fleet.
|
|
# This IS the golden state. If a wizard deviates, Ansible corrects it.
|
|
# =============================================================================
|
|
|
|
# --- Deadman Switch ---
|
|
deadman_enabled: true
|
|
deadman_check_interval: 300 # 5 minutes between health checks
|
|
deadman_snapshot_dir: "~/.local/timmy/snapshots"
|
|
deadman_max_snapshots: 10 # Rolling window of good configs
|
|
deadman_restart_cooldown: 60 # Seconds to wait before restart after failure
|
|
deadman_max_restart_attempts: 3
|
|
deadman_escalation_channel: telegram # Alert Alexander after max attempts
|
|
|
|
# --- Thin Config ---
|
|
thin_config_path: "~/.timmy/thin_config.yml"
|
|
thin_config_mode: "0444" # Read-only — agents CANNOT modify
|
|
upstream_repo: "https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config.git"
|
|
upstream_branch: main
|
|
config_pull_on_wake: true
|
|
config_validation_enabled: true
|
|
|
|
# --- Agent Settings ---
|
|
agent_max_turns: 30
|
|
agent_reasoning_effort: high
|
|
agent_verbose: false
|
|
agent_approval_mode: auto
|
|
|
|
# --- Hermes Harness ---
|
|
hermes_config_dir: "{{ hermes_home }}"
|
|
hermes_bin_dir: "{{ hermes_home }}/bin"
|
|
hermes_skins_dir: "{{ hermes_home }}/skins"
|
|
hermes_playbooks_dir: "{{ hermes_home }}/playbooks"
|
|
hermes_memories_dir: "{{ hermes_home }}/memories"
|
|
|
|
# --- Request Log (Telemetry) ---
|
|
request_log_enabled: true
|
|
request_log_path: "~/.local/timmy/request_log.db"
|
|
request_log_rotation_days: 30 # Archive logs older than 30 days
|
|
request_log_sync_to_gitea: false # Future: push telemetry summaries to Gitea
|
|
|
|
# --- Cron Schedule ---
|
|
# All cron jobs are managed here. No manual crontab edits.
|
|
cron_jobs:
|
|
- name: "Deadman health check"
|
|
job: "cd {{ wizard_home }}/workspace/timmy-config && python3 fleet/health_check.py"
|
|
minute: "*/5"
|
|
hour: "*"
|
|
enabled: "{{ deadman_enabled }}"
|
|
|
|
- name: "Muda audit"
|
|
job: "cd {{ wizard_home }}/workspace/timmy-config && bash fleet/muda-audit.sh >> /tmp/muda-audit.log 2>&1"
|
|
minute: "0"
|
|
hour: "21"
|
|
weekday: "0"
|
|
enabled: true
|
|
|
|
- name: "Config pull from upstream"
|
|
job: "cd {{ wizard_home }}/workspace/timmy-config && git pull --ff-only origin main"
|
|
minute: "*/15"
|
|
hour: "*"
|
|
enabled: "{{ config_pull_on_wake }}"
|
|
|
|
- name: "Request log rotation"
|
|
job: "python3 -c \"import sqlite3,datetime; db=sqlite3.connect('{{ request_log_path }}'); db.execute('DELETE FROM request_log WHERE timestamp < datetime(\\\"now\\\", \\\"-{{ request_log_rotation_days }} days\\\")'); db.commit()\""
|
|
minute: "0"
|
|
hour: "3"
|
|
enabled: "{{ request_log_enabled }}"
|
|
|
|
# --- Provider Enforcement ---
|
|
# These are validated on every Ansible run. Any Anthropic reference = failure.
|
|
provider_ban_enforcement: strict # strict = fail playbook, warn = log only
|