Some checks failed
PR Checklist / pr-checklist (pull_request) Failing after 1m27s
Implements the Ansible Infrastructure as Code story from KT 2026-04-08. One canonical Ansible playbook defines: - Deadman switch (snapshot good config on health, rollback+restart on death) - Golden state config deployment (Anthropic BANNED, Kimi→Gemini→Ollama) - Cron schedule (source-controlled, no manual crontab edits) - Agent startup sequence (pull→validate→start→verify) - request_log telemetry table (every inference call logged) - Thin config pattern (immutable local pointer to upstream) - Gitea webhook handler (deploy on merge) - Config validator (rejects banned providers) Fleet inventory: Timmy (Mac), Allegro (VPS), Bezalel (VPS), Ezra (VPS) Roles: wizard_base, golden_state, deadman_switch, request_log, cron_manager Addresses: timmy-config #442, #443, #444, #445, #446 References: KT Final 2026-04-08 P2, KT Bezalel 2026-04-08 #1-#5
99 lines
3.3 KiB
YAML
99 lines
3.3 KiB
YAML
---
|
|
# =============================================================================
|
|
# agent_startup.yml — Resurrect Wizards from Checked-in Configs
|
|
# =============================================================================
|
|
# Brings wizards back online using golden state configs.
|
|
# Order: pull config → validate → start agent → verify with request_log
|
|
# =============================================================================
|
|
|
|
- name: "Agent Startup Sequence"
|
|
hosts: wizards
|
|
become: true
|
|
serial: 1 # One wizard at a time to avoid cascading issues
|
|
|
|
tasks:
|
|
- name: "Pull latest config from upstream"
|
|
git:
|
|
repo: "{{ upstream_repo }}"
|
|
dest: "{{ wizard_home }}/workspace/timmy-config"
|
|
version: "{{ upstream_branch }}"
|
|
force: true
|
|
tags: [pull]
|
|
|
|
- name: "Deploy golden state config"
|
|
include_role:
|
|
name: golden_state
|
|
tags: [config]
|
|
|
|
- name: "Validate config — no banned providers"
|
|
shell: |
|
|
python3 -c "
|
|
import yaml, sys
|
|
with open('{{ wizard_home }}/config.yaml') as f:
|
|
cfg = yaml.safe_load(f)
|
|
banned = {{ banned_providers }}
|
|
for p in cfg.get('fallback_providers', []):
|
|
if p.get('provider', '') in banned:
|
|
print(f'BANNED: {p[\"provider\"]}', file=sys.stderr)
|
|
sys.exit(1)
|
|
model = cfg.get('model', {}).get('provider', '')
|
|
if model in banned:
|
|
print(f'BANNED default provider: {model}', file=sys.stderr)
|
|
sys.exit(1)
|
|
print('Config validated — no banned providers.')
|
|
"
|
|
register: config_valid
|
|
tags: [validate]
|
|
|
|
- name: "Ensure hermes-agent service is running"
|
|
systemd:
|
|
name: "hermes-{{ wizard_name | lower }}"
|
|
state: started
|
|
enabled: true
|
|
when: machine_type == 'vps'
|
|
tags: [start]
|
|
ignore_errors: true # Service may not exist yet on all machines
|
|
|
|
- name: "Start hermes agent (Mac — launchctl)"
|
|
shell: |
|
|
launchctl kickstart -k "ai.hermes.{{ wizard_name | lower }}" 2>/dev/null || \
|
|
cd {{ wizard_home }} && hermes agent start --daemon 2>&1 | tail -5
|
|
when: machine_type == 'mac'
|
|
tags: [start]
|
|
ignore_errors: true
|
|
|
|
- name: "Wait for agent to come online"
|
|
wait_for:
|
|
host: 127.0.0.1
|
|
port: "{{ api_port }}"
|
|
timeout: 60
|
|
state: started
|
|
tags: [verify]
|
|
ignore_errors: true
|
|
|
|
- name: "Verify agent is alive — check request_log for activity"
|
|
shell: |
|
|
sleep 10
|
|
python3 -c "
|
|
import sqlite3, sys
|
|
db = sqlite3.connect('{{ request_log_path }}')
|
|
cursor = db.execute('''
|
|
SELECT COUNT(*) FROM request_log
|
|
WHERE agent_name = '{{ wizard_name }}'
|
|
AND timestamp > datetime('now', '-5 minutes')
|
|
''')
|
|
count = cursor.fetchone()[0]
|
|
if count > 0:
|
|
print(f'{{ wizard_name }} is alive — {count} recent inference calls logged.')
|
|
else:
|
|
print(f'WARNING: {{ wizard_name }} started but no telemetry yet.')
|
|
"
|
|
register: agent_status
|
|
tags: [verify]
|
|
ignore_errors: true
|
|
|
|
- name: "Report startup status"
|
|
debug:
|
|
msg: "{{ wizard_name }}: {{ agent_status.stdout | default('startup attempted') }}"
|
|
tags: [always]
|