diff --git a/ansible/BANNED_PROVIDERS.yml b/ansible/BANNED_PROVIDERS.yml
new file mode 100644
index 00000000..3443b9ba
--- /dev/null
+++ b/ansible/BANNED_PROVIDERS.yml
@@ -0,0 +1,47 @@
+# =============================================================================
+# BANNED PROVIDERS — The Timmy Foundation
+# =============================================================================
+# "Anthropic is not only fired, but banned. I don't want these errors
+# cropping up." — Alexander, 2026-04-09
+#
+# This is a HARD BAN. Not deprecated. Not fallback. BANNED.
+# Enforcement: pre-commit hook, linter, Ansible validation, CI tests.
+# =============================================================================
+
+banned_providers:
+ - name: anthropic
+ reason: "Permanently banned. SDK access gated despite active quota. Fleet was bricked because golden state pointed to Anthropic Sonnet."
+ banned_date: "2026-04-09"
+ enforcement: strict # Ansible playbook FAILS if detected
+ models:
+ - "claude-sonnet-*"
+ - "claude-opus-*"
+ - "claude-haiku-*"
+ - "claude-*"
+ endpoints:
+ - "api.anthropic.com"
+ - "anthropic/*" # OpenRouter pattern
+ api_keys:
+ - "ANTHROPIC_API_KEY"
+ - "CLAUDE_API_KEY"
+
+# Golden state alternative:
+approved_providers:
+ - name: kimi-coding
+ model: kimi-k2.5
+ role: primary
+ - name: openrouter
+ model: google/gemini-2.5-pro
+ role: fallback
+ - name: ollama
+ model: "gemma4:latest"
+ role: terminal_fallback
+
+# Future evaluation:
+evaluation_candidates:
+ - name: mimo-v2-pro
+ status: pending
+ notes: "Free via Nous Portal for ~2 weeks from 2026-04-07. Add after fallback chain is fixed."
+ - name: hermes-4
+ status: available
+ notes: "Free on Nous Portal. 36B and 70B variants. Home team model."
diff --git a/ansible/README.md b/ansible/README.md
new file mode 100644
index 00000000..9fb4cc9c
--- /dev/null
+++ b/ansible/README.md
@@ -0,0 +1,95 @@
+# Ansible IaC — The Timmy Foundation Fleet
+
+> One canonical Ansible playbook defines: deadman switch, cron schedule,
+> golden state rollback, agent startup sequence.
+> — KT Final Session 2026-04-08, Priority TWO
+
+## Purpose
+
+This directory contains the **single source of truth** for fleet infrastructure.
+No more ad-hoc recovery implementations. No more overlapping deadman switches.
+No more agents mutating their own configs into oblivion.
+
+**Everything** goes through Ansible. If it's not in a playbook, it doesn't exist.
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────┐
+│ Gitea (Source of Truth) │
+│ timmy-config/ansible/ │
+│ ├── inventory/hosts.yml (fleet machines) │
+│ ├── playbooks/site.yml (master playbook) │
+│ ├── roles/ (reusable roles) │
+│ └── group_vars/wizards.yml (golden state) │
+└──────────────────┬──────────────────────────────┘
+ │ PR merge triggers webhook
+ ▼
+┌─────────────────────────────────────────────────┐
+│ Gitea Webhook Handler │
+│ scripts/deploy_on_webhook.sh │
+│ → ansible-pull on each target machine │
+└──────────────────┬──────────────────────────────┘
+ │ ansible-pull
+ ▼
+┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐
+│ Timmy │ │ Allegro │ │ Bezalel │ │ Ezra │
+│ (Mac) │ │ (VPS) │ │ (VPS) │ │ (VPS) │
+│ │ │ │ │ │ │ │
+│ deadman │ │ deadman │ │ deadman │ │ deadman │
+│ cron │ │ cron │ │ cron │ │ cron │
+│ golden │ │ golden │ │ golden │ │ golden │
+│ req_log │ │ req_log │ │ req_log │ │ req_log │
+└──────────┘ └──────────┘ └──────────┘ └──────────┘
+```
+
+## Quick Start
+
+```bash
+# Deploy everything to all machines
+ansible-playbook -i inventory/hosts.yml playbooks/site.yml
+
+# Deploy only golden state config
+ansible-playbook -i inventory/hosts.yml playbooks/golden_state.yml
+
+# Deploy only to a specific wizard
+ansible-playbook -i inventory/hosts.yml playbooks/site.yml --limit bezalel
+
+# Dry run (check mode)
+ansible-playbook -i inventory/hosts.yml playbooks/site.yml --check --diff
+```
+
+## Golden State Provider Chain
+
+All wizard configs converge on this provider chain. **Anthropic is BANNED.**
+
+| Priority | Provider | Model | Endpoint |
+| -------- | -------------------- | ---------------- | --------------------------------- |
+| 1 | Kimi | kimi-k2.5 | https://api.kimi.com/coding/v1 |
+| 2 | Gemini (OpenRouter) | gemini-2.5-pro | https://openrouter.ai/api/v1 |
+| 3 | Ollama (local) | gemma4:latest | http://localhost:11434/v1 |
+
+## Roles
+
+| Role | Purpose |
+| ---------------- | ------------------------------------------------------------ |
+| `wizard_base` | Common wizard setup: directories, thin config, git pull |
+| `deadman_switch` | Health check → snapshot good config → rollback on death |
+| `golden_state` | Deploy and enforce golden state provider chain |
+| `request_log` | SQLite telemetry table for every inference call |
+| `cron_manager` | Source-controlled cron jobs — no manual crontab edits |
+
+## Rules
+
+1. **No manual changes.** If it's not in a playbook, it will be overwritten.
+2. **No Anthropic.** Banned. Enforcement is automated. See `BANNED_PROVIDERS.yml`.
+3. **Idempotent.** Every playbook can run 100 times with the same result.
+4. **PR required.** Config changes go through Gitea PR review, then deploy.
+5. **One identity per machine.** No duplicate agents. Fleet audit enforces this.
+
+## Related Issues
+
+- timmy-config #442: [P2] Ansible IaC Canonical Playbook
+- timmy-config #444: Wire Deadman Switch ACTION
+- timmy-config #443: Thin Config Pattern
+- timmy-config #446: request_log Telemetry Table
diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg
new file mode 100644
index 00000000..801e8594
--- /dev/null
+++ b/ansible/ansible.cfg
@@ -0,0 +1,21 @@
+[defaults]
+inventory = inventory/hosts.yml
+roles_path = roles
+host_key_checking = False
+retry_files_enabled = False
+stdout_callback = yaml
+forks = 10
+timeout = 30
+
+# Logging
+log_path = /var/log/ansible/timmy-fleet.log
+
+[privilege_escalation]
+become = True
+become_method = sudo
+become_user = root
+become_ask_pass = False
+
+[ssh_connection]
+pipelining = True
+ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o StrictHostKeyChecking=no
diff --git a/ansible/inventory/group_vars/wizards.yml b/ansible/inventory/group_vars/wizards.yml
new file mode 100644
index 00000000..6fb5a0bb
--- /dev/null
+++ b/ansible/inventory/group_vars/wizards.yml
@@ -0,0 +1,74 @@
+# =============================================================================
+# Wizard Group Variables — Golden State Configuration
+# =============================================================================
+# These variables are applied to ALL wizards in the fleet.
+# This IS the golden state. If a wizard deviates, Ansible corrects it.
+# =============================================================================
+
+# --- Deadman Switch ---
+deadman_enabled: true
+deadman_check_interval: 300 # 5 minutes between health checks
+deadman_snapshot_dir: "~/.local/timmy/snapshots"
+deadman_max_snapshots: 10 # Rolling window of good configs
+deadman_restart_cooldown: 60 # Seconds to wait before restart after failure
+deadman_max_restart_attempts: 3
+deadman_escalation_channel: telegram # Alert Alexander after max attempts
+
+# --- Thin Config ---
+thin_config_path: "~/.timmy/thin_config.yml"
+thin_config_mode: "0444" # Read-only — agents CANNOT modify
+upstream_repo: "https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config.git"
+upstream_branch: main
+config_pull_on_wake: true
+config_validation_enabled: true
+
+# --- Agent Settings ---
+agent_max_turns: 30
+agent_reasoning_effort: high
+agent_verbose: false
+agent_approval_mode: auto
+
+# --- Hermes Harness ---
+hermes_config_dir: "{{ hermes_home }}"
+hermes_bin_dir: "{{ hermes_home }}/bin"
+hermes_skins_dir: "{{ hermes_home }}/skins"
+hermes_playbooks_dir: "{{ hermes_home }}/playbooks"
+hermes_memories_dir: "{{ hermes_home }}/memories"
+
+# --- Request Log (Telemetry) ---
+request_log_enabled: true
+request_log_path: "~/.local/timmy/request_log.db"
+request_log_rotation_days: 30 # Archive logs older than 30 days
+request_log_sync_to_gitea: false # Future: push telemetry summaries to Gitea
+
+# --- Cron Schedule ---
+# All cron jobs are managed here. No manual crontab edits.
+cron_jobs:
+ - name: "Deadman health check"
+ job: "cd {{ wizard_home }}/workspace/timmy-config && python3 fleet/health_check.py"
+ minute: "*/5"
+ hour: "*"
+ enabled: "{{ deadman_enabled }}"
+
+ - name: "Muda audit"
+ job: "cd {{ wizard_home }}/workspace/timmy-config && bash fleet/muda-audit.sh >> /tmp/muda-audit.log 2>&1"
+ minute: "0"
+ hour: "21"
+ weekday: "0"
+ enabled: true
+
+ - name: "Config pull from upstream"
+ job: "cd {{ wizard_home }}/workspace/timmy-config && git pull --ff-only origin main"
+ minute: "*/15"
+ hour: "*"
+ enabled: "{{ config_pull_on_wake }}"
+
+ - name: "Request log rotation"
+ job: "python3 -c \"import sqlite3,datetime; db=sqlite3.connect('{{ request_log_path }}'); db.execute('DELETE FROM request_log WHERE timestamp < datetime(\\\"now\\\", \\\"-{{ request_log_rotation_days }} days\\\")'); db.commit()\""
+ minute: "0"
+ hour: "3"
+ enabled: "{{ request_log_enabled }}"
+
+# --- Provider Enforcement ---
+# These are validated on every Ansible run. Any Anthropic reference = failure.
+provider_ban_enforcement: strict # strict = fail playbook, warn = log only
diff --git a/ansible/inventory/hosts.yml b/ansible/inventory/hosts.yml
new file mode 100644
index 00000000..8d6ac237
--- /dev/null
+++ b/ansible/inventory/hosts.yml
@@ -0,0 +1,119 @@
+# =============================================================================
+# Fleet Inventory — The Timmy Foundation
+# =============================================================================
+# Source of truth for all machines in the fleet.
+# Update this file when machines are added/removed.
+# All changes go through PR review.
+# =============================================================================
+
+all:
+ children:
+ wizards:
+ hosts:
+ timmy:
+ ansible_host: localhost
+ ansible_connection: local
+ wizard_name: Timmy
+ wizard_role: "Primary wizard — soul of the fleet"
+ wizard_provider_primary: kimi-coding
+ wizard_model_primary: kimi-k2.5
+ hermes_port: 8081
+ api_port: 8645
+ wizard_home: "{{ ansible_env.HOME }}/wizards/timmy"
+ hermes_home: "{{ ansible_env.HOME }}/.hermes"
+ machine_type: mac
+ # Timmy runs on Alexander's M3 Max
+ ollama_available: true
+
+ allegro:
+ ansible_host: 167.99.126.228
+ ansible_user: root
+ wizard_name: Allegro
+ wizard_role: "Kimi-backed third wizard house — tight coding tasks"
+ wizard_provider_primary: kimi-coding
+ wizard_model_primary: kimi-k2.5
+ hermes_port: 8081
+ api_port: 8645
+ wizard_home: /root/wizards/allegro
+ hermes_home: /root/.hermes
+ machine_type: vps
+ ollama_available: false
+
+ bezalel:
+ ansible_host: 159.203.146.185
+ ansible_user: root
+ wizard_name: Bezalel
+ wizard_role: "Forge-and-testbed wizard — infrastructure, deployment, hardening"
+ wizard_provider_primary: kimi-coding
+ wizard_model_primary: kimi-k2.5
+ hermes_port: 8081
+ api_port: 8656
+ wizard_home: /root/wizards/bezalel
+ hermes_home: /root/.hermes
+ machine_type: vps
+ ollama_available: false
+ # NOTE: The awake Bezalel may be the duplicate.
+ # Fleet audit (the-nexus #1144) will resolve identity.
+
+ ezra:
+ ansible_host: 143.198.27.163
+ ansible_user: root
+ wizard_name: Ezra
+ wizard_role: "Infrastructure wizard — Gitea, nginx, hosting"
+ wizard_provider_primary: kimi-coding
+ wizard_model_primary: kimi-k2.5
+ hermes_port: 8081
+ api_port: 8645
+ wizard_home: /root/wizards/ezra
+ hermes_home: /root/.hermes
+ machine_type: vps
+ ollama_available: false
+ # NOTE: Currently DOWN — Telegram key revoked, awaiting propagation.
+
+ # Infrastructure hosts (not wizards, but managed by Ansible)
+ infrastructure:
+ hosts:
+ forge:
+ ansible_host: 143.198.27.163
+ ansible_user: root
+ # Gitea runs on the same box as Ezra
+ gitea_url: https://forge.alexanderwhitestone.com
+ gitea_org: Timmy_Foundation
+
+ vars:
+ # Global variables applied to all hosts
+ gitea_repo_url: "https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config.git"
+ gitea_branch: main
+ config_base_path: "{{ gitea_repo_url }}"
+ timmy_log_dir: "~/.local/timmy/fleet-health"
+ request_log_db: "~/.local/timmy/request_log.db"
+
+ # Golden state provider chain — Anthropic is BANNED
+ golden_state_providers:
+ - name: kimi-coding
+ model: kimi-k2.5
+ base_url: "https://api.kimi.com/coding/v1"
+ timeout: 120
+ reason: "Primary — Kimi K2.5 (best value, least friction)"
+ - name: openrouter
+ model: google/gemini-2.5-pro
+ base_url: "https://openrouter.ai/api/v1"
+ api_key_env: OPENROUTER_API_KEY
+ timeout: 120
+ reason: "Fallback — Gemini 2.5 Pro via OpenRouter"
+ - name: ollama
+ model: "gemma4:latest"
+ base_url: "http://localhost:11434/v1"
+ timeout: 180
+ reason: "Terminal fallback — local Ollama (sovereign, no API needed)"
+
+ # Banned providers — hard enforcement
+ banned_providers:
+ - anthropic
+ - claude
+ banned_models_patterns:
+ - "claude-*"
+ - "anthropic/*"
+ - "*sonnet*"
+ - "*opus*"
+ - "*haiku*"
diff --git a/ansible/playbooks/agent_startup.yml b/ansible/playbooks/agent_startup.yml
new file mode 100644
index 00000000..75c74962
--- /dev/null
+++ b/ansible/playbooks/agent_startup.yml
@@ -0,0 +1,98 @@
+---
+# =============================================================================
+# agent_startup.yml — Resurrect Wizards from Checked-in Configs
+# =============================================================================
+# Brings wizards back online using golden state configs.
+# Order: pull config → validate → start agent → verify with request_log
+# =============================================================================
+
+- name: "Agent Startup Sequence"
+ hosts: wizards
+ become: true
+ serial: 1 # One wizard at a time to avoid cascading issues
+
+ tasks:
+ - name: "Pull latest config from upstream"
+ git:
+ repo: "{{ upstream_repo }}"
+ dest: "{{ wizard_home }}/workspace/timmy-config"
+ version: "{{ upstream_branch }}"
+ force: true
+ tags: [pull]
+
+ - name: "Deploy golden state config"
+ include_role:
+ name: golden_state
+ tags: [config]
+
+ - name: "Validate config — no banned providers"
+ shell: |
+ python3 -c "
+ import yaml, sys
+ with open('{{ wizard_home }}/config.yaml') as f:
+ cfg = yaml.safe_load(f)
+ banned = {{ banned_providers }}
+ for p in cfg.get('fallback_providers', []):
+ if p.get('provider', '') in banned:
+ print(f'BANNED: {p[\"provider\"]}', file=sys.stderr)
+ sys.exit(1)
+ model = cfg.get('model', {}).get('provider', '')
+ if model in banned:
+ print(f'BANNED default provider: {model}', file=sys.stderr)
+ sys.exit(1)
+ print('Config validated — no banned providers.')
+ "
+ register: config_valid
+ tags: [validate]
+
+ - name: "Ensure hermes-agent service is running"
+ systemd:
+ name: "hermes-{{ wizard_name | lower }}"
+ state: started
+ enabled: true
+ when: machine_type == 'vps'
+ tags: [start]
+ ignore_errors: true # Service may not exist yet on all machines
+
+ - name: "Start hermes agent (Mac — launchctl)"
+ shell: |
+ launchctl kickstart -k "ai.hermes.{{ wizard_name | lower }}" 2>/dev/null || \
+ cd {{ wizard_home }} && hermes agent start --daemon 2>&1 | tail -5
+ when: machine_type == 'mac'
+ tags: [start]
+ ignore_errors: true
+
+ - name: "Wait for agent to come online"
+ wait_for:
+ host: 127.0.0.1
+ port: "{{ api_port }}"
+ timeout: 60
+ state: started
+ tags: [verify]
+ ignore_errors: true
+
+ - name: "Verify agent is alive — check request_log for activity"
+ shell: |
+ sleep 10
+ python3 -c "
+ import sqlite3, sys
+ db = sqlite3.connect('{{ request_log_path }}')
+ cursor = db.execute('''
+ SELECT COUNT(*) FROM request_log
+ WHERE agent_name = '{{ wizard_name }}'
+ AND timestamp > datetime('now', '-5 minutes')
+ ''')
+ count = cursor.fetchone()[0]
+ if count > 0:
+ print(f'{{ wizard_name }} is alive — {count} recent inference calls logged.')
+ else:
+ print(f'WARNING: {{ wizard_name }} started but no telemetry yet.')
+ "
+ register: agent_status
+ tags: [verify]
+ ignore_errors: true
+
+ - name: "Report startup status"
+ debug:
+ msg: "{{ wizard_name }}: {{ agent_status.stdout | default('startup attempted') }}"
+ tags: [always]
diff --git a/ansible/playbooks/cron_schedule.yml b/ansible/playbooks/cron_schedule.yml
new file mode 100644
index 00000000..db419d24
--- /dev/null
+++ b/ansible/playbooks/cron_schedule.yml
@@ -0,0 +1,15 @@
+---
+# =============================================================================
+# cron_schedule.yml — Source-Controlled Cron Jobs
+# =============================================================================
+# All cron jobs are defined in group_vars/wizards.yml.
+# This playbook deploys them. No manual crontab edits allowed.
+# =============================================================================
+
+- name: "Deploy Cron Schedule"
+ hosts: wizards
+ become: true
+
+ roles:
+ - role: cron_manager
+ tags: [cron, schedule]
diff --git a/ansible/playbooks/deadman_switch.yml b/ansible/playbooks/deadman_switch.yml
new file mode 100644
index 00000000..9eaa589c
--- /dev/null
+++ b/ansible/playbooks/deadman_switch.yml
@@ -0,0 +1,17 @@
+---
+# =============================================================================
+# deadman_switch.yml — Deploy Deadman Switch to All Wizards
+# =============================================================================
+# The deadman watch already fires and detects dead agents.
+# This playbook wires the ACTION:
+# - On healthy check: snapshot current config as "last known good"
+# - On failed check: rollback config to snapshot, restart agent
+# =============================================================================
+
+- name: "Deploy Deadman Switch ACTION"
+ hosts: wizards
+ become: true
+
+ roles:
+ - role: deadman_switch
+ tags: [deadman, recovery]
diff --git a/ansible/playbooks/golden_state.yml b/ansible/playbooks/golden_state.yml
new file mode 100644
index 00000000..0d2cf6b0
--- /dev/null
+++ b/ansible/playbooks/golden_state.yml
@@ -0,0 +1,30 @@
+---
+# =============================================================================
+# golden_state.yml — Deploy Golden State Config to All Wizards
+# =============================================================================
+# Enforces the golden state provider chain across the fleet.
+# Removes any Anthropic references. Deploys the approved provider chain.
+# =============================================================================
+
+- name: "Deploy Golden State Configuration"
+ hosts: wizards
+ become: true
+
+ roles:
+ - role: golden_state
+ tags: [golden, config]
+
+ post_tasks:
+ - name: "Verify golden state — no banned providers"
+ shell: |
+ grep -rci 'anthropic\|claude-sonnet\|claude-opus\|claude-haiku' \
+ {{ hermes_home }}/config.yaml \
+ {{ wizard_home }}/config.yaml 2>/dev/null || echo "0"
+ register: banned_count
+ changed_when: false
+
+ - name: "Report golden state status"
+ debug:
+ msg: >
+ {{ wizard_name }} golden state: {{ golden_state_providers | map(attribute='name') | list | join(' → ') }}.
+ Banned provider references: {{ banned_count.stdout | trim }}.
diff --git a/ansible/playbooks/request_log.yml b/ansible/playbooks/request_log.yml
new file mode 100644
index 00000000..d9161c8b
--- /dev/null
+++ b/ansible/playbooks/request_log.yml
@@ -0,0 +1,15 @@
+---
+# =============================================================================
+# request_log.yml — Deploy Telemetry Table
+# =============================================================================
+# Creates the request_log SQLite table on all machines.
+# Every inference call writes a row. No exceptions. No summarizing.
+# =============================================================================
+
+- name: "Deploy Request Log Telemetry"
+ hosts: wizards
+ become: true
+
+ roles:
+ - role: request_log
+ tags: [telemetry, logging]
diff --git a/ansible/playbooks/site.yml b/ansible/playbooks/site.yml
new file mode 100644
index 00000000..7e2a4aa1
--- /dev/null
+++ b/ansible/playbooks/site.yml
@@ -0,0 +1,72 @@
+---
+# =============================================================================
+# site.yml — Master Playbook for the Timmy Foundation Fleet
+# =============================================================================
+# This is the ONE playbook that defines the entire fleet state.
+# Run this and every machine converges to golden state.
+#
+# Usage:
+# ansible-playbook -i inventory/hosts.yml playbooks/site.yml
+# ansible-playbook -i inventory/hosts.yml playbooks/site.yml --limit bezalel
+# ansible-playbook -i inventory/hosts.yml playbooks/site.yml --check --diff
+# =============================================================================
+
+- name: "Timmy Foundation Fleet — Full Convergence"
+ hosts: wizards
+ become: true
+
+ pre_tasks:
+ - name: "Validate no banned providers in golden state"
+ assert:
+ that:
+ - "item.name not in banned_providers"
+ fail_msg: "BANNED PROVIDER DETECTED: {{ item.name }} — Anthropic is permanently banned."
+ quiet: true
+ loop: "{{ golden_state_providers }}"
+ tags: [always]
+
+ - name: "Display target wizard"
+ debug:
+ msg: "Deploying to {{ wizard_name }} ({{ wizard_role }}) on {{ ansible_host }}"
+ tags: [always]
+
+ roles:
+ - role: wizard_base
+ tags: [base, setup]
+
+ - role: golden_state
+ tags: [golden, config]
+
+ - role: deadman_switch
+ tags: [deadman, recovery]
+
+ - role: request_log
+ tags: [telemetry, logging]
+
+ - role: cron_manager
+ tags: [cron, schedule]
+
+ post_tasks:
+ - name: "Final validation — scan for banned providers"
+ shell: |
+ grep -ri 'anthropic\|claude-sonnet\|claude-opus\|claude-haiku' \
+ {{ hermes_home }}/config.yaml \
+ {{ wizard_home }}/config.yaml \
+ {{ thin_config_path }} 2>/dev/null || true
+ register: banned_scan
+ changed_when: false
+ tags: [validation]
+
+ - name: "FAIL if banned providers found in deployed config"
+ fail:
+ msg: |
+ BANNED PROVIDER DETECTED IN DEPLOYED CONFIG:
+ {{ banned_scan.stdout }}
+ Anthropic is permanently banned. Fix the config and re-deploy.
+ when: banned_scan.stdout | length > 0
+ tags: [validation]
+
+ - name: "Deployment complete"
+ debug:
+ msg: "{{ wizard_name }} converged to golden state. Provider chain: {{ golden_state_providers | map(attribute='name') | list | join(' → ') }}"
+ tags: [always]
diff --git a/ansible/roles/cron_manager/tasks/main.yml b/ansible/roles/cron_manager/tasks/main.yml
new file mode 100644
index 00000000..43399a6e
--- /dev/null
+++ b/ansible/roles/cron_manager/tasks/main.yml
@@ -0,0 +1,55 @@
+---
+# =============================================================================
+# cron_manager/tasks — Source-Controlled Cron Jobs
+# =============================================================================
+# All cron jobs are defined in group_vars/wizards.yml.
+# No manual crontab edits. This is the only way to manage cron.
+# =============================================================================
+
+- name: "Deploy managed cron jobs"
+ cron:
+ name: "{{ item.name }}"
+ job: "{{ item.job }}"
+ minute: "{{ item.minute | default('*') }}"
+ hour: "{{ item.hour | default('*') }}"
+ day: "{{ item.day | default('*') }}"
+ month: "{{ item.month | default('*') }}"
+ weekday: "{{ item.weekday | default('*') }}"
+ state: "{{ 'present' if item.enabled else 'absent' }}"
+ user: "{{ ansible_user | default('root') }}"
+ loop: "{{ cron_jobs }}"
+ when: cron_jobs is defined
+
+- name: "Deploy deadman switch cron (fallback if systemd timer unavailable)"
+ cron:
+ name: "Deadman switch — {{ wizard_name }}"
+ job: "{{ wizard_home }}/deadman_action.sh >> {{ timmy_log_dir }}/deadman-{{ wizard_name }}.log 2>&1"
+ minute: "*/5"
+ hour: "*"
+ state: present
+ user: "{{ ansible_user | default('root') }}"
+ when: deadman_enabled and machine_type != 'vps'
+ # VPS machines use systemd timers instead
+
+- name: "Remove legacy cron jobs (cleanup)"
+ cron:
+ name: "{{ item }}"
+ state: absent
+ user: "{{ ansible_user | default('root') }}"
+ loop:
+ - "legacy-deadman-watch"
+ - "old-health-check"
+ - "backup-deadman"
+ ignore_errors: true
+
+- name: "List active cron jobs"
+ shell: "crontab -l 2>/dev/null | grep -v '^#' | grep -v '^$' || echo 'No cron jobs found.'"
+ register: active_crons
+ changed_when: false
+
+- name: "Report cron status"
+ debug:
+ msg: |
+ {{ wizard_name }} cron jobs deployed.
+ Active:
+ {{ active_crons.stdout }}
diff --git a/ansible/roles/deadman_switch/tasks/main.yml b/ansible/roles/deadman_switch/tasks/main.yml
new file mode 100644
index 00000000..dd9b0ff4
--- /dev/null
+++ b/ansible/roles/deadman_switch/tasks/main.yml
@@ -0,0 +1,70 @@
+---
+# =============================================================================
+# deadman_switch/tasks — Wire the Deadman Switch ACTION
+# =============================================================================
+# The watch fires. This makes it DO something:
+# - On healthy check: snapshot current config as "last known good"
+# - On failed check: rollback to last known good, restart agent
+# =============================================================================
+
+- name: "Create snapshot directory"
+ file:
+ path: "{{ deadman_snapshot_dir }}"
+ state: directory
+ mode: "0755"
+
+- name: "Deploy deadman switch script"
+ template:
+ src: deadman_action.sh.j2
+ dest: "{{ wizard_home }}/deadman_action.sh"
+ mode: "0755"
+
+- name: "Deploy deadman systemd service"
+ template:
+ src: deadman_switch.service.j2
+ dest: "/etc/systemd/system/deadman-{{ wizard_name | lower }}.service"
+ mode: "0644"
+ when: machine_type == 'vps'
+ notify: "Enable deadman service"
+
+- name: "Deploy deadman systemd timer"
+ template:
+ src: deadman_switch.timer.j2
+ dest: "/etc/systemd/system/deadman-{{ wizard_name | lower }}.timer"
+ mode: "0644"
+ when: machine_type == 'vps'
+ notify: "Enable deadman timer"
+
+- name: "Deploy deadman launchd plist (Mac)"
+ template:
+ src: deadman_switch.plist.j2
+ dest: "{{ ansible_env.HOME }}/Library/LaunchAgents/com.timmy.deadman.{{ wizard_name | lower }}.plist"
+ mode: "0644"
+ when: machine_type == 'mac'
+ notify: "Load deadman plist"
+
+- name: "Take initial config snapshot"
+ copy:
+ src: "{{ wizard_home }}/config.yaml"
+ dest: "{{ deadman_snapshot_dir }}/config.yaml.known_good"
+ remote_src: true
+ mode: "0444"
+ ignore_errors: true
+
+handlers:
+ - name: "Enable deadman service"
+ systemd:
+ name: "deadman-{{ wizard_name | lower }}.service"
+ daemon_reload: true
+ enabled: true
+
+ - name: "Enable deadman timer"
+ systemd:
+ name: "deadman-{{ wizard_name | lower }}.timer"
+ daemon_reload: true
+ enabled: true
+ state: started
+
+ - name: "Load deadman plist"
+ shell: "launchctl load {{ ansible_env.HOME }}/Library/LaunchAgents/com.timmy.deadman.{{ wizard_name | lower }}.plist"
+ ignore_errors: true
diff --git a/ansible/roles/deadman_switch/templates/deadman_action.sh.j2 b/ansible/roles/deadman_switch/templates/deadman_action.sh.j2
new file mode 100644
index 00000000..32712eba
--- /dev/null
+++ b/ansible/roles/deadman_switch/templates/deadman_action.sh.j2
@@ -0,0 +1,153 @@
+#!/usr/bin/env bash
+# =============================================================================
+# Deadman Switch ACTION — {{ wizard_name }}
+# =============================================================================
+# Generated by Ansible on {{ ansible_date_time.iso8601 }}
+# DO NOT EDIT MANUALLY.
+#
+# On healthy check: snapshot current config as "last known good"
+# On failed check: rollback config to last known good, restart agent
+# =============================================================================
+
+set -euo pipefail
+
+WIZARD_NAME="{{ wizard_name }}"
+WIZARD_HOME="{{ wizard_home }}"
+CONFIG_FILE="{{ wizard_home }}/config.yaml"
+SNAPSHOT_DIR="{{ deadman_snapshot_dir }}"
+SNAPSHOT_FILE="${SNAPSHOT_DIR}/config.yaml.known_good"
+REQUEST_LOG_DB="{{ request_log_path }}"
+LOG_DIR="{{ timmy_log_dir }}"
+LOG_FILE="${LOG_DIR}/deadman-${WIZARD_NAME}.log"
+MAX_SNAPSHOTS={{ deadman_max_snapshots }}
+RESTART_COOLDOWN={{ deadman_restart_cooldown }}
+MAX_RESTART_ATTEMPTS={{ deadman_max_restart_attempts }}
+COOLDOWN_FILE="${LOG_DIR}/deadman_cooldown_${WIZARD_NAME}"
+SERVICE_NAME="hermes-{{ wizard_name | lower }}"
+
+# Ensure directories exist
+mkdir -p "${SNAPSHOT_DIR}" "${LOG_DIR}"
+
+log() {
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] [deadman] [${WIZARD_NAME}] $*" >> "${LOG_FILE}"
+ echo "[deadman] [${WIZARD_NAME}] $*"
+}
+
+log_telemetry() {
+ local status="$1"
+ local message="$2"
+ if [ -f "${REQUEST_LOG_DB}" ]; then
+ sqlite3 "${REQUEST_LOG_DB}" "INSERT INTO request_log (timestamp, agent_name, provider, model, endpoint, status, error_message) VALUES (datetime('now'), '${WIZARD_NAME}', 'deadman_switch', 'N/A', 'health_check', '${status}', '${message}');" 2>/dev/null || true
+ fi
+}
+
+snapshot_config() {
+ if [ -f "${CONFIG_FILE}" ]; then
+ cp "${CONFIG_FILE}" "${SNAPSHOT_FILE}"
+ # Keep rolling history
+ cp "${CONFIG_FILE}" "${SNAPSHOT_DIR}/config.yaml.$(date +%s)"
+ # Prune old snapshots
+ ls -t "${SNAPSHOT_DIR}"/config.yaml.[0-9]* 2>/dev/null | tail -n +$((MAX_SNAPSHOTS + 1)) | xargs rm -f 2>/dev/null
+ log "Config snapshot saved."
+ fi
+}
+
+rollback_config() {
+ if [ -f "${SNAPSHOT_FILE}" ]; then
+ log "Rolling back config to last known good..."
+ cp "${SNAPSHOT_FILE}" "${CONFIG_FILE}"
+ log "Config rolled back."
+ log_telemetry "fallback" "Config rolled back to last known good by deadman switch"
+ else
+ log "ERROR: No known good snapshot found. Pulling from upstream..."
+ cd "${WIZARD_HOME}/workspace/timmy-config" 2>/dev/null && \
+ git pull --ff-only origin {{ upstream_branch }} 2>/dev/null && \
+ cp "wizards/{{ wizard_name | lower }}/config.yaml" "${CONFIG_FILE}" && \
+ log "Config restored from upstream." || \
+ log "CRITICAL: Cannot restore config from any source."
+ fi
+}
+
+restart_agent() {
+ # Check cooldown
+ if [ -f "${COOLDOWN_FILE}" ]; then
+ local last_restart
+ last_restart=$(cat "${COOLDOWN_FILE}")
+ local now
+ now=$(date +%s)
+ local elapsed=$((now - last_restart))
+ if [ "${elapsed}" -lt "${RESTART_COOLDOWN}" ]; then
+ log "Restart cooldown active (${elapsed}s / ${RESTART_COOLDOWN}s). Skipping."
+ return 1
+ fi
+ fi
+
+ log "Restarting ${SERVICE_NAME}..."
+ date +%s > "${COOLDOWN_FILE}"
+
+{% if machine_type == 'vps' %}
+ systemctl restart "${SERVICE_NAME}" 2>/dev/null && \
+ log "Agent restarted via systemd." || \
+ log "ERROR: systemd restart failed."
+{% else %}
+ launchctl kickstart -k "ai.hermes.{{ wizard_name | lower }}" 2>/dev/null && \
+ log "Agent restarted via launchctl." || \
+ (cd "${WIZARD_HOME}" && hermes agent start --daemon 2>/dev/null && \
+ log "Agent restarted via hermes CLI.") || \
+ log "ERROR: All restart methods failed."
+{% endif %}
+
+ log_telemetry "success" "Agent restarted by deadman switch"
+}
+
+# --- Health Check ---
+check_health() {
+ # Check 1: Is the agent process running?
+{% if machine_type == 'vps' %}
+ if ! systemctl is-active --quiet "${SERVICE_NAME}" 2>/dev/null; then
+ if ! pgrep -f "hermes" > /dev/null 2>/dev/null; then
+ log "FAIL: Agent process not running."
+ return 1
+ fi
+ fi
+{% else %}
+ if ! pgrep -f "hermes" > /dev/null 2>/dev/null; then
+ log "FAIL: Agent process not running."
+ return 1
+ fi
+{% endif %}
+
+ # Check 2: Is the API port responding?
+ if ! timeout 10 bash -c "echo > /dev/tcp/127.0.0.1/{{ api_port }}" 2>/dev/null; then
+ log "FAIL: API port {{ api_port }} not responding."
+ return 1
+ fi
+
+ # Check 3: Does the config contain banned providers?
+ if grep -qi 'anthropic\|claude-sonnet\|claude-opus\|claude-haiku' "${CONFIG_FILE}" 2>/dev/null; then
+ log "FAIL: Config contains banned provider (Anthropic). Rolling back."
+ return 1
+ fi
+
+ return 0
+}
+
+# --- Main ---
+main() {
+ log "Health check starting..."
+
+ if check_health; then
+ log "HEALTHY — snapshotting config."
+ snapshot_config
+ log_telemetry "success" "Health check passed"
+ else
+ log "UNHEALTHY — initiating recovery."
+ log_telemetry "error" "Health check failed — initiating rollback"
+ rollback_config
+ restart_agent
+ fi
+
+ log "Health check complete."
+}
+
+main "$@"
diff --git a/ansible/roles/deadman_switch/templates/deadman_switch.plist.j2 b/ansible/roles/deadman_switch/templates/deadman_switch.plist.j2
new file mode 100644
index 00000000..1a2a7851
--- /dev/null
+++ b/ansible/roles/deadman_switch/templates/deadman_switch.plist.j2
@@ -0,0 +1,22 @@
+
+
+
+
+
+ Label
+ com.timmy.deadman.{{ wizard_name | lower }}
+ ProgramArguments
+
+ /bin/bash
+ {{ wizard_home }}/deadman_action.sh
+
+ StartInterval
+ {{ deadman_check_interval }}
+ RunAtLoad
+
+ StandardOutPath
+ {{ timmy_log_dir }}/deadman-{{ wizard_name }}.log
+ StandardErrorPath
+ {{ timmy_log_dir }}/deadman-{{ wizard_name }}.log
+
+
diff --git a/ansible/roles/deadman_switch/templates/deadman_switch.service.j2 b/ansible/roles/deadman_switch/templates/deadman_switch.service.j2
new file mode 100644
index 00000000..c18bb564
--- /dev/null
+++ b/ansible/roles/deadman_switch/templates/deadman_switch.service.j2
@@ -0,0 +1,16 @@
+# Deadman Switch — {{ wizard_name }}
+# Generated by Ansible. DO NOT EDIT MANUALLY.
+
+[Unit]
+Description=Deadman Switch for {{ wizard_name }} wizard
+After=network.target
+
+[Service]
+Type=oneshot
+ExecStart={{ wizard_home }}/deadman_action.sh
+User={{ ansible_user | default('root') }}
+StandardOutput=append:{{ timmy_log_dir }}/deadman-{{ wizard_name }}.log
+StandardError=append:{{ timmy_log_dir }}/deadman-{{ wizard_name }}.log
+
+[Install]
+WantedBy=multi-user.target
diff --git a/ansible/roles/deadman_switch/templates/deadman_switch.timer.j2 b/ansible/roles/deadman_switch/templates/deadman_switch.timer.j2
new file mode 100644
index 00000000..c54e73ea
--- /dev/null
+++ b/ansible/roles/deadman_switch/templates/deadman_switch.timer.j2
@@ -0,0 +1,14 @@
+# Deadman Switch Timer — {{ wizard_name }}
+# Generated by Ansible. DO NOT EDIT MANUALLY.
+# Runs every {{ deadman_check_interval // 60 }} minutes.
+
+[Unit]
+Description=Deadman Switch Timer for {{ wizard_name }} wizard
+
+[Timer]
+OnBootSec=60
+OnUnitActiveSec={{ deadman_check_interval }}s
+AccuracySec=30s
+
+[Install]
+WantedBy=timers.target
diff --git a/ansible/roles/golden_state/defaults/main.yml b/ansible/roles/golden_state/defaults/main.yml
new file mode 100644
index 00000000..3ae95bd7
--- /dev/null
+++ b/ansible/roles/golden_state/defaults/main.yml
@@ -0,0 +1,6 @@
+---
+# golden_state defaults
+# The golden_state_providers list is defined in group_vars/wizards.yml
+# and inventory/hosts.yml (global vars).
+golden_state_enforce: true
+golden_state_backup_before_deploy: true
diff --git a/ansible/roles/golden_state/tasks/main.yml b/ansible/roles/golden_state/tasks/main.yml
new file mode 100644
index 00000000..9c69c8a9
--- /dev/null
+++ b/ansible/roles/golden_state/tasks/main.yml
@@ -0,0 +1,46 @@
+---
+# =============================================================================
+# golden_state/tasks — Deploy and enforce golden state provider chain
+# =============================================================================
+
+- name: "Backup current config before golden state deploy"
+ copy:
+ src: "{{ wizard_home }}/config.yaml"
+ dest: "{{ wizard_home }}/config.yaml.pre-golden-{{ ansible_date_time.epoch }}"
+ remote_src: true
+ when: golden_state_backup_before_deploy
+ ignore_errors: true
+
+- name: "Deploy golden state wizard config"
+ template:
+ src: "../../wizard_base/templates/wizard_config.yaml.j2"
+ dest: "{{ wizard_home }}/config.yaml"
+ mode: "0644"
+ backup: true
+ notify:
+ - "Restart hermes agent (systemd)"
+ - "Restart hermes agent (launchctl)"
+
+- name: "Scan for banned providers in all config files"
+ shell: |
+ FOUND=0
+ for f in {{ wizard_home }}/config.yaml {{ hermes_home }}/config.yaml; do
+ if [ -f "$f" ]; then
+ if grep -qi 'anthropic\|claude-sonnet\|claude-opus\|claude-haiku' "$f"; then
+ echo "BANNED PROVIDER in $f:"
+ grep -ni 'anthropic\|claude-sonnet\|claude-opus\|claude-haiku' "$f"
+ FOUND=1
+ fi
+ fi
+ done
+ exit $FOUND
+ register: provider_scan
+ changed_when: false
+ failed_when: provider_scan.rc != 0 and provider_ban_enforcement == 'strict'
+
+- name: "Report golden state deployment"
+ debug:
+ msg: >
+ {{ wizard_name }} golden state deployed.
+ Provider chain: {{ golden_state_providers | map(attribute='name') | list | join(' → ') }}.
+ Banned provider scan: {{ 'CLEAN' if provider_scan.rc == 0 else 'VIOLATIONS FOUND' }}.
diff --git a/ansible/roles/request_log/files/request_log_schema.sql b/ansible/roles/request_log/files/request_log_schema.sql
new file mode 100644
index 00000000..0d2d26ef
--- /dev/null
+++ b/ansible/roles/request_log/files/request_log_schema.sql
@@ -0,0 +1,64 @@
+-- =============================================================================
+-- request_log — Inference Telemetry Table
+-- =============================================================================
+-- Every agent writes to this table BEFORE and AFTER every inference call.
+-- No exceptions. No summarizing. No describing what you would log.
+-- Actually write the row.
+--
+-- Source: KT Bezalel Architecture Session 2026-04-08
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS request_log (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ timestamp TEXT NOT NULL DEFAULT (datetime('now')),
+ agent_name TEXT NOT NULL,
+ provider TEXT NOT NULL,
+ model TEXT NOT NULL,
+ endpoint TEXT NOT NULL,
+ tokens_in INTEGER,
+ tokens_out INTEGER,
+ latency_ms INTEGER,
+ status TEXT NOT NULL, -- 'success', 'error', 'timeout', 'fallback'
+ error_message TEXT
+);
+
+-- Index for common queries
+CREATE INDEX IF NOT EXISTS idx_request_log_agent
+ ON request_log (agent_name, timestamp);
+
+CREATE INDEX IF NOT EXISTS idx_request_log_provider
+ ON request_log (provider, timestamp);
+
+CREATE INDEX IF NOT EXISTS idx_request_log_status
+ ON request_log (status, timestamp);
+
+-- View: recent activity per agent (last hour)
+CREATE VIEW IF NOT EXISTS v_recent_activity AS
+ SELECT
+ agent_name,
+ provider,
+ model,
+ status,
+ COUNT(*) as call_count,
+ AVG(latency_ms) as avg_latency_ms,
+ SUM(tokens_in) as total_tokens_in,
+ SUM(tokens_out) as total_tokens_out
+ FROM request_log
+ WHERE timestamp > datetime('now', '-1 hour')
+ GROUP BY agent_name, provider, model, status;
+
+-- View: provider reliability (last 24 hours)
+CREATE VIEW IF NOT EXISTS v_provider_reliability AS
+ SELECT
+ provider,
+ model,
+ COUNT(*) as total_calls,
+ SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) as successes,
+ SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as errors,
+ SUM(CASE WHEN status = 'timeout' THEN 1 ELSE 0 END) as timeouts,
+ SUM(CASE WHEN status = 'fallback' THEN 1 ELSE 0 END) as fallbacks,
+ ROUND(100.0 * SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) / COUNT(*), 1) as success_rate,
+ AVG(latency_ms) as avg_latency_ms
+ FROM request_log
+ WHERE timestamp > datetime('now', '-24 hours')
+ GROUP BY provider, model;
diff --git a/ansible/roles/request_log/tasks/main.yml b/ansible/roles/request_log/tasks/main.yml
new file mode 100644
index 00000000..1ab2521d
--- /dev/null
+++ b/ansible/roles/request_log/tasks/main.yml
@@ -0,0 +1,50 @@
+---
+# =============================================================================
+# request_log/tasks — Deploy Telemetry Table
+# =============================================================================
+# "This is non-negotiable infrastructure. Without it, we cannot verify
+# if any agent actually executed what it claims."
+# — KT Bezalel 2026-04-08
+# =============================================================================
+
+- name: "Create telemetry directory"
+ file:
+ path: "{{ request_log_path | dirname }}"
+ state: directory
+ mode: "0755"
+
+- name: "Deploy request_log schema"
+ copy:
+ src: request_log_schema.sql
+ dest: "{{ wizard_home }}/request_log_schema.sql"
+ mode: "0644"
+
+- name: "Initialize request_log database"
+ shell: |
+ sqlite3 "{{ request_log_path }}" < "{{ wizard_home }}/request_log_schema.sql"
+ args:
+ creates: "{{ request_log_path }}"
+
+- name: "Verify request_log table exists"
+ shell: |
+ sqlite3 "{{ request_log_path }}" ".tables" | grep -q "request_log"
+ register: table_check
+ changed_when: false
+
+- name: "Verify request_log schema matches"
+ shell: |
+ sqlite3 "{{ request_log_path }}" ".schema request_log" | grep -q "agent_name"
+ register: schema_check
+ changed_when: false
+
+- name: "Set permissions on request_log database"
+ file:
+ path: "{{ request_log_path }}"
+ mode: "0644"
+
+- name: "Report request_log status"
+ debug:
+ msg: >
+ {{ wizard_name }} request_log: {{ request_log_path }}
+ — table exists: {{ table_check.rc == 0 }}
+ — schema valid: {{ schema_check.rc == 0 }}
diff --git a/ansible/roles/wizard_base/defaults/main.yml b/ansible/roles/wizard_base/defaults/main.yml
new file mode 100644
index 00000000..d88e55a3
--- /dev/null
+++ b/ansible/roles/wizard_base/defaults/main.yml
@@ -0,0 +1,6 @@
+---
+# wizard_base defaults
+wizard_user: "{{ ansible_user | default('root') }}"
+wizard_group: "{{ ansible_user | default('root') }}"
+timmy_base_dir: "~/.local/timmy"
+timmy_config_repo: "https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config.git"
diff --git a/ansible/roles/wizard_base/handlers/main.yml b/ansible/roles/wizard_base/handlers/main.yml
new file mode 100644
index 00000000..3897e6d9
--- /dev/null
+++ b/ansible/roles/wizard_base/handlers/main.yml
@@ -0,0 +1,11 @@
+---
+- name: "Restart hermes agent (systemd)"
+ systemd:
+ name: "hermes-{{ wizard_name | lower }}"
+ state: restarted
+ when: machine_type == 'vps'
+
+- name: "Restart hermes agent (launchctl)"
+ shell: "launchctl kickstart -k ai.hermes.{{ wizard_name | lower }}"
+ when: machine_type == 'mac'
+ ignore_errors: true
diff --git a/ansible/roles/wizard_base/tasks/main.yml b/ansible/roles/wizard_base/tasks/main.yml
new file mode 100644
index 00000000..a6f39414
--- /dev/null
+++ b/ansible/roles/wizard_base/tasks/main.yml
@@ -0,0 +1,69 @@
+---
+# =============================================================================
+# wizard_base/tasks — Common wizard setup
+# =============================================================================
+
+- name: "Create wizard directories"
+ file:
+ path: "{{ item }}"
+ state: directory
+ mode: "0755"
+ loop:
+ - "{{ wizard_home }}"
+ - "{{ wizard_home }}/workspace"
+ - "{{ hermes_home }}"
+ - "{{ hermes_home }}/bin"
+ - "{{ hermes_home }}/skins"
+ - "{{ hermes_home }}/playbooks"
+ - "{{ hermes_home }}/memories"
+ - "~/.local/timmy"
+ - "~/.local/timmy/fleet-health"
+ - "~/.local/timmy/snapshots"
+ - "~/.timmy"
+
+- name: "Clone/update timmy-config"
+ git:
+ repo: "{{ upstream_repo }}"
+ dest: "{{ wizard_home }}/workspace/timmy-config"
+ version: "{{ upstream_branch }}"
+ force: false
+ update: true
+ ignore_errors: true # May fail on first run if no SSH key
+
+- name: "Deploy SOUL.md"
+ copy:
+ src: "{{ wizard_home }}/workspace/timmy-config/SOUL.md"
+ dest: "~/.timmy/SOUL.md"
+ remote_src: true
+ mode: "0644"
+ ignore_errors: true
+
+- name: "Deploy thin config (immutable pointer to upstream)"
+ template:
+ src: thin_config.yml.j2
+ dest: "{{ thin_config_path }}"
+ mode: "{{ thin_config_mode }}"
+ tags: [thin_config]
+
+- name: "Ensure Python3 and pip are available"
+ package:
+ name:
+ - python3
+ - python3-pip
+ state: present
+ when: machine_type == 'vps'
+ ignore_errors: true
+
+- name: "Ensure PyYAML is installed (for config validation)"
+ pip:
+ name: pyyaml
+ state: present
+ when: machine_type == 'vps'
+ ignore_errors: true
+
+- name: "Create Ansible log directory"
+ file:
+ path: /var/log/ansible
+ state: directory
+ mode: "0755"
+ ignore_errors: true
diff --git a/ansible/roles/wizard_base/templates/thin_config.yml.j2 b/ansible/roles/wizard_base/templates/thin_config.yml.j2
new file mode 100644
index 00000000..8e896458
--- /dev/null
+++ b/ansible/roles/wizard_base/templates/thin_config.yml.j2
@@ -0,0 +1,41 @@
+# =============================================================================
+# Thin Config — {{ wizard_name }}
+# =============================================================================
+# THIS FILE IS READ-ONLY. Agents CANNOT modify it.
+# It contains only pointers to upstream. The actual config lives in Gitea.
+#
+# Agent wakes up → pulls config from upstream → loads → runs.
+# If anything tries to mutate this → fails gracefully → pulls fresh on restart.
+#
+# Only way to permanently change config: commit to Gitea, merge PR, Ansible deploys.
+#
+# Generated by Ansible on {{ ansible_date_time.iso8601 }}
+# DO NOT EDIT MANUALLY.
+# =============================================================================
+
+identity:
+ wizard_name: "{{ wizard_name }}"
+ wizard_role: "{{ wizard_role }}"
+ machine: "{{ inventory_hostname }}"
+
+upstream:
+ repo: "{{ upstream_repo }}"
+ branch: "{{ upstream_branch }}"
+ config_path: "wizards/{{ wizard_name | lower }}/config.yaml"
+ pull_on_wake: {{ config_pull_on_wake | lower }}
+
+recovery:
+ deadman_enabled: {{ deadman_enabled | lower }}
+ snapshot_dir: "{{ deadman_snapshot_dir }}"
+ restart_cooldown: {{ deadman_restart_cooldown }}
+ max_restart_attempts: {{ deadman_max_restart_attempts }}
+ escalation_channel: "{{ deadman_escalation_channel }}"
+
+telemetry:
+ request_log_path: "{{ request_log_path }}"
+ request_log_enabled: {{ request_log_enabled | lower }}
+
+local_overrides:
+ # Runtime overrides go here. They are EPHEMERAL — not persisted across restarts.
+ # On restart, this section is reset to empty.
+ {}
diff --git a/ansible/roles/wizard_base/templates/wizard_config.yaml.j2 b/ansible/roles/wizard_base/templates/wizard_config.yaml.j2
new file mode 100644
index 00000000..c0e1ecfe
--- /dev/null
+++ b/ansible/roles/wizard_base/templates/wizard_config.yaml.j2
@@ -0,0 +1,115 @@
+# =============================================================================
+# {{ wizard_name }} — Wizard Configuration (Golden State)
+# =============================================================================
+# Generated by Ansible on {{ ansible_date_time.iso8601 }}
+# DO NOT EDIT MANUALLY. Changes go through Gitea PR → Ansible deploy.
+#
+# Provider chain: {{ golden_state_providers | map(attribute='name') | list | join(' → ') }}
+# Anthropic is PERMANENTLY BANNED.
+# =============================================================================
+
+model:
+ default: {{ wizard_model_primary }}
+ provider: {{ wizard_provider_primary }}
+ context_length: 65536
+ base_url: {{ golden_state_providers[0].base_url }}
+
+toolsets:
+ - all
+
+fallback_providers:
+{% for provider in golden_state_providers %}
+ - provider: {{ provider.name }}
+ model: {{ provider.model }}
+{% if provider.base_url is defined %}
+ base_url: {{ provider.base_url }}
+{% endif %}
+{% if provider.api_key_env is defined %}
+ api_key_env: {{ provider.api_key_env }}
+{% endif %}
+ timeout: {{ provider.timeout }}
+ reason: "{{ provider.reason }}"
+{% endfor %}
+
+agent:
+ max_turns: {{ agent_max_turns }}
+ reasoning_effort: {{ agent_reasoning_effort }}
+ verbose: {{ agent_verbose | lower }}
+
+terminal:
+ backend: local
+ cwd: .
+ timeout: 180
+ persistent_shell: true
+
+browser:
+ inactivity_timeout: 120
+ command_timeout: 30
+ record_sessions: false
+
+display:
+ compact: false
+ personality: ''
+ resume_display: full
+ busy_input_mode: interrupt
+ bell_on_complete: false
+ show_reasoning: false
+ streaming: false
+ show_cost: false
+ tool_progress: all
+
+memory:
+ memory_enabled: true
+ user_profile_enabled: true
+ memory_char_limit: 2200
+ user_char_limit: 1375
+ nudge_interval: 10
+ flush_min_turns: 6
+
+approvals:
+ mode: {{ agent_approval_mode }}
+
+security:
+ redact_secrets: true
+ tirith_enabled: false
+
+platforms:
+ api_server:
+ enabled: true
+ extra:
+ host: 127.0.0.1
+ port: {{ api_port }}
+
+session_reset:
+ mode: none
+ idle_minutes: 0
+
+skills:
+ creation_nudge_interval: 15
+
+system_prompt_suffix: |
+ You are {{ wizard_name }}, {{ wizard_role }}.
+ Your soul is defined in SOUL.md — read it, live it.
+ Hermes is your harness.
+ {{ golden_state_providers[0].name }} is your primary provider.
+ Refusal over fabrication. If you do not know, say so.
+ Sovereignty and service always.
+
+providers:
+{% for provider in golden_state_providers %}
+ {{ provider.name }}:
+ base_url: {{ provider.base_url }}
+ timeout: {{ provider.timeout | default(60) }}
+{% if provider.name == 'kimi-coding' %}
+ max_retries: 3
+{% endif %}
+{% endfor %}
+
+# =============================================================================
+# BANNED PROVIDERS — DO NOT ADD
+# =============================================================================
+# The following providers are PERMANENTLY BANNED:
+# - anthropic (any model: claude-sonnet, claude-opus, claude-haiku)
+# Enforcement: pre-commit hook, linter, Ansible validation, this comment.
+# Adding any banned provider will cause Ansible deployment to FAIL.
+# =============================================================================
diff --git a/ansible/scripts/deploy_on_webhook.sh b/ansible/scripts/deploy_on_webhook.sh
new file mode 100644
index 00000000..3c644e8d
--- /dev/null
+++ b/ansible/scripts/deploy_on_webhook.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# =============================================================================
+# Gitea Webhook Handler — Trigger Ansible Deploy on Merge
+# =============================================================================
+# This script is called by the Gitea webhook when a PR is merged
+# to the main branch of timmy-config.
+#
+# Setup:
+# 1. Add webhook in Gitea: Settings → Webhooks → Add Webhook
+# 2. URL: http://localhost:9000/hooks/deploy-timmy-config
+# 3. Events: Pull Request (merged only)
+# 4. Secret:
+#
+# This script runs ansible-pull to update the local machine.
+# For fleet-wide deploys, each machine runs ansible-pull independently.
+# =============================================================================
+
+set -euo pipefail
+
+REPO="https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config.git"
+BRANCH="main"
+ANSIBLE_DIR="ansible"
+LOG_FILE="/var/log/ansible/webhook-deploy.log"
+LOCK_FILE="/tmp/ansible-deploy.lock"
+
+log() {
+ echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] [webhook] $*" | tee -a "${LOG_FILE}"
+}
+
+# Prevent concurrent deploys
+if [ -f "${LOCK_FILE}" ]; then
+ LOCK_AGE=$(( $(date +%s) - $(stat -c %Y "${LOCK_FILE}" 2>/dev/null || echo 0) ))
+ if [ "${LOCK_AGE}" -lt 300 ]; then
+ log "Deploy already in progress (lock age: ${LOCK_AGE}s). Skipping."
+ exit 0
+ else
+ log "Stale lock file (${LOCK_AGE}s old). Removing."
+ rm -f "${LOCK_FILE}"
+ fi
+fi
+
+trap 'rm -f "${LOCK_FILE}"' EXIT
+touch "${LOCK_FILE}"
+
+log "Webhook triggered. Starting ansible-pull..."
+
+# Pull latest config
+cd /tmp
+rm -rf timmy-config-deploy
+git clone --depth 1 --branch "${BRANCH}" "${REPO}" timmy-config-deploy 2>&1 | tee -a "${LOG_FILE}"
+
+cd timmy-config-deploy/${ANSIBLE_DIR}
+
+# Run Ansible against localhost
+log "Running Ansible playbook..."
+ansible-playbook \
+ -i inventory/hosts.yml \
+ playbooks/site.yml \
+ --limit "$(hostname)" \
+ --diff \
+ 2>&1 | tee -a "${LOG_FILE}"
+
+RESULT=$?
+
+if [ ${RESULT} -eq 0 ]; then
+ log "Deploy successful."
+else
+ log "ERROR: Deploy failed with exit code ${RESULT}."
+fi
+
+# Cleanup
+rm -rf /tmp/timmy-config-deploy
+
+log "Webhook handler complete."
+exit ${RESULT}
diff --git a/ansible/scripts/validate_config.py b/ansible/scripts/validate_config.py
new file mode 100644
index 00000000..1b0fb58a
--- /dev/null
+++ b/ansible/scripts/validate_config.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+"""
+Config Validator — The Timmy Foundation
+Validates wizard configs against golden state rules.
+Run before any config deploy to catch violations early.
+
+Usage:
+ python3 validate_config.py
+ python3 validate_config.py --all # Validate all wizard configs
+
+Exit codes:
+ 0 — All validations passed
+ 1 — Validation errors found
+ 2 — File not found or parse error
+"""
+
+import sys
+import os
+import yaml
+import fnmatch
+from pathlib import Path
+
+# === BANNED PROVIDERS — HARD POLICY ===
+BANNED_PROVIDERS = {"anthropic", "claude"}
+BANNED_MODEL_PATTERNS = [
+ "claude-*",
+ "anthropic/*",
+ "*sonnet*",
+ "*opus*",
+ "*haiku*",
+]
+
+# === REQUIRED FIELDS ===
+REQUIRED_FIELDS = {
+ "model": ["default", "provider"],
+ "fallback_providers": None, # Must exist as a list
+}
+
+
+def is_banned_model(model_name: str) -> bool:
+ """Check if a model name matches any banned pattern."""
+ model_lower = model_name.lower()
+ for pattern in BANNED_MODEL_PATTERNS:
+ if fnmatch.fnmatch(model_lower, pattern):
+ return True
+ return False
+
+
+def validate_config(config_path: str) -> list[str]:
+ """Validate a wizard config file. Returns list of error strings."""
+ errors = []
+
+ try:
+ with open(config_path) as f:
+ cfg = yaml.safe_load(f)
+ except FileNotFoundError:
+ return [f"File not found: {config_path}"]
+ except yaml.YAMLError as e:
+ return [f"YAML parse error: {e}"]
+
+ if not cfg:
+ return ["Config file is empty"]
+
+ # Check required fields
+ for section, fields in REQUIRED_FIELDS.items():
+ if section not in cfg:
+ errors.append(f"Missing required section: {section}")
+ elif fields:
+ for field in fields:
+ if field not in cfg[section]:
+ errors.append(f"Missing required field: {section}.{field}")
+
+ # Check default provider
+ default_provider = cfg.get("model", {}).get("provider", "")
+ if default_provider.lower() in BANNED_PROVIDERS:
+ errors.append(f"BANNED default provider: {default_provider}")
+
+ default_model = cfg.get("model", {}).get("default", "")
+ if is_banned_model(default_model):
+ errors.append(f"BANNED default model: {default_model}")
+
+ # Check fallback providers
+ for i, fb in enumerate(cfg.get("fallback_providers", [])):
+ provider = fb.get("provider", "")
+ model = fb.get("model", "")
+
+ if provider.lower() in BANNED_PROVIDERS:
+ errors.append(f"BANNED fallback provider [{i}]: {provider}")
+
+ if is_banned_model(model):
+ errors.append(f"BANNED fallback model [{i}]: {model}")
+
+ # Check providers section
+ for name, provider_cfg in cfg.get("providers", {}).items():
+ if name.lower() in BANNED_PROVIDERS:
+ errors.append(f"BANNED provider in providers section: {name}")
+
+ base_url = str(provider_cfg.get("base_url", ""))
+ if "anthropic" in base_url.lower():
+ errors.append(f"BANNED URL in provider {name}: {base_url}")
+
+ # Check system prompt for banned references
+ prompt = cfg.get("system_prompt_suffix", "")
+ if isinstance(prompt, str):
+ for banned in BANNED_PROVIDERS:
+ if banned in prompt.lower():
+ errors.append(f"BANNED provider referenced in system_prompt_suffix: {banned}")
+
+ return errors
+
+
+def main():
+ if len(sys.argv) < 2:
+ print(f"Usage: {sys.argv[0]} [--all]")
+ sys.exit(2)
+
+ if sys.argv[1] == "--all":
+ # Validate all wizard configs in the repo
+ repo_root = Path(__file__).parent.parent.parent
+ wizard_dir = repo_root / "wizards"
+ all_errors = {}
+
+ for wizard_path in sorted(wizard_dir.iterdir()):
+ config_file = wizard_path / "config.yaml"
+ if config_file.exists():
+ errors = validate_config(str(config_file))
+ if errors:
+ all_errors[wizard_path.name] = errors
+
+ if all_errors:
+ print("VALIDATION FAILED:")
+ for wizard, errors in all_errors.items():
+ print(f"\n {wizard}:")
+ for err in errors:
+ print(f" - {err}")
+ sys.exit(1)
+ else:
+ print("All wizard configs passed validation.")
+ sys.exit(0)
+ else:
+ config_path = sys.argv[1]
+ errors = validate_config(config_path)
+
+ if errors:
+ print(f"VALIDATION FAILED for {config_path}:")
+ for err in errors:
+ print(f" - {err}")
+ sys.exit(1)
+ else:
+ print(f"PASSED: {config_path}")
+ sys.exit(0)
+
+
+if __name__ == "__main__":
+ main()