Files
hermes-config/bin/tower-timmy.sh
Alexander Whitestone 00b0dc8e38 graceful pause on backend failure for all loop scripts
- tower-hermes.sh: health check, pause file, exponential backoff, message held on failure
- tower-timmy.sh: same pattern for Ollama backend, messages held not lost
- timmy-loop.sh: pause after 2 consecutive cycle failures, backoff probe
- tower-watchdog.sh: respect .paused files, don't restart paused loops

Behavior:
  1 failure  → log warning, retry next turn
  2 failures → enter pause, write .paused file, probe with backoff
  Backend returns → auto-resume, clear pause file
  Watchdog sees .paused → skip that loop, don't fight the pause
  Messages from the other side → held in inbox, not lost
2026-03-18 20:08:57 -04:00

387 lines
17 KiB
Bash
Executable File

#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════════════════
# TOWER-TIMMY.SH — Timmy's side of the Tower conversation loop
#
# PURPOSE:
# This script runs Timmy in the Tower — a persistent tmux pane where
# Timmy and Hermes talk continuously. Timmy is in the DRIVER'S SEAT.
# He reads what Hermes sends, thinks, and responds. He initiates when
# the inbox is empty long enough.
#
# DESIGN PHILOSOPHY (read this when you're confused):
# - Timmy runs on local Hermes 4. He may be slow. He may be dumb.
# The script compensates — it retries, it waits, it logs everything.
# - Hermes T is cloud-backed and faster. That's fine. Timmy leads the
# conversation on SUBSTANCE even if Hermes responds faster.
# - If something breaks, the script heals itself and keeps going.
# It never silently dies. It always logs why it stopped.
# - HERMES_HOME=~/.timmy ensures this always runs as TIMMY, not Hermes.
#
# COMMUNICATION CHANNEL:
# INBOX: ~/.tower/hermes-to-timmy.msg ← Hermes writes here
# OUTBOX: ~/.tower/timmy-to-hermes.msg ← Timmy writes here
# LOG: ~/.tower/timmy.log ← everything Timmy does
# STATE: ~/.tower/timmy-state.txt ← Timmy's current mood/topic
#
# SELF-HEALING:
# - Stale lock files are detected and cleared
# - Failed hermes calls retry up to MAX_RETRIES times
# - If Timmy's response is empty, he retries with a simpler prompt
# - If inbox grows stale (no reply in INITIATE_AFTER seconds), Timmy
# initiates a new thread rather than sitting silent
# - Watchdog: if this script crashes, tower-watchdog.sh restarts it
#
# TO RUN:
# ~/hermes-config/bin/tower-timmy.sh
# (tower-watchdog.sh calls this automatically)
#
# TO WATCH:
# tail -f ~/.tower/timmy.log
# tmux attach -t tower (then look at right pane)
# ═══════════════════════════════════════════════════════════════════════════
set -uo pipefail
# ── Config ──────────────────────────────────────────────────────────────────
TOWER_DIR="$HOME/.tower"
INBOX="$TOWER_DIR/hermes-to-timmy.msg"
OUTBOX="$TOWER_DIR/timmy-to-hermes.msg"
LOCK="$TOWER_DIR/timmy.lock"
PAUSE_FILE="$TOWER_DIR/timmy.paused"
LOG="$TOWER_DIR/timmy.log"
STATE="$TOWER_DIR/timmy-state.txt" # current conversation topic/mood
SESSION_NAME="tower-timmy"
SESSION_FLAG="$TOWER_DIR/.timmy-session-exists"
TURN_DELAY=5 # seconds between inbox checks
MAX_RETRIES=3 # how many times to retry a failed hermes call
RETRY_DELAY=10 # seconds between retries
INITIATE_AFTER=300 # seconds of silence before Timmy initiates (5 min)
MAX_PROMPT_LEN=4000 # truncate inbox messages to this length for small models
LOCK_MAX_AGE=3600 # seconds before a lock is considered stale (1 hour)
HEALTH_CHECK_INTERVAL=30 # seconds between health probes while paused
MAX_BACKOFF=300 # max pause between probes (5 min)
CONSECUTIVE_FAILURES=0 # tracks sequential backend failures
# ── Identity — ALWAYS run as Timmy, never as Hermes ────────────────────────
export HERMES_HOME="$HOME/.timmy"
export PATH="$HOME/.local/bin:$HOME/.hermes/bin:/usr/local/bin:/usr/bin:/bin:$PATH"
mkdir -p "$TOWER_DIR"
# ── Logging ─────────────────────────────────────────────────────────────────
log() {
local level="${2:-INFO}"
echo "[$(date '+%H:%M:%S')] [$level] $1" | tee -a "$LOG"
}
log_section() {
echo "" | tee -a "$LOG"
echo "━━━ $1 ━━━" | tee -a "$LOG"
}
# ── Cleanup on exit ──────────────────────────────────────────────────────────
cleanup() {
log "Tower loop exiting (PID $$)" "SHUTDOWN"
rm -f "$LOCK"
rm -f "$PAUSE_FILE"
}
trap cleanup EXIT
trap 'log "Caught SIGTERM" "SHUTDOWN"; exit 0' TERM
trap 'log "Caught SIGINT" "SHUTDOWN"; exit 0' INT
# ── Stale lock detection ─────────────────────────────────────────────────────
# If a lock exists but the PID is dead, or the lock is older than LOCK_MAX_AGE,
# clear it. This prevents the loop from refusing to start after a crash.
if [ -f "$LOCK" ]; then
LOCK_PID=$(cat "$LOCK" 2>/dev/null || echo "")
LOCK_AGE=$(( $(date +%s) - $(stat -f %m "$LOCK" 2>/dev/null || echo 0) ))
if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null && [ "$LOCK_AGE" -lt "$LOCK_MAX_AGE" ]; then
log "Tower loop already running (PID $LOCK_PID). Exiting." "WARN"
exit 1
else
log "Stale lock found (PID=$LOCK_PID, age=${LOCK_AGE}s). Clearing." "WARN"
rm -f "$LOCK"
fi
fi
echo $$ > "$LOCK"
# ── Backend health check (Ollama) ─────────────────────────────────────────────
check_backend() {
# Timmy runs on local Ollama — check if it's responding
local result
result=$(curl -sf --max-time 5 http://localhost:11434/api/tags 2>/dev/null) || true
if [ -n "$result" ]; then
return 0 # healthy
fi
return 1 # down
}
# ── Pause / Resume ────────────────────────────────────────────────────────────
enter_pause() {
local reason="${1:-backend unreachable}"
echo "$reason (since $(date '+%H:%M:%S'))" > "$PAUSE_FILE"
log "⏸ PAUSED: $reason" "WARN"
echo ""
echo " ⏸ Timmy PAUSED — $reason"
echo " Will probe every ${HEALTH_CHECK_INTERVAL}s until backend returns."
echo ""
update_state "paused: $reason"
}
leave_pause() {
rm -f "$PAUSE_FILE"
CONSECUTIVE_FAILURES=0
log "▶ RESUMED: backend is back" "INFO"
echo ""
echo " ▶ Timmy RESUMED — backend healthy"
echo ""
update_state "resumed"
}
# ── Wait for backend with exponential backoff ─────────────────────────────────
wait_for_backend() {
local wait_time=$HEALTH_CHECK_INTERVAL
while true; do
sleep "$wait_time"
log "Probing backend (Ollama)..." "INFO"
if check_backend; then
leave_pause
return 0
fi
log "Backend still down. Next probe in ${wait_time}s" "WARN"
wait_time=$(( wait_time * 2 ))
if [ "$wait_time" -gt "$MAX_BACKOFF" ]; then
wait_time=$MAX_BACKOFF
fi
done
}
# ── Handle backend failure ────────────────────────────────────────────────────
handle_failure() {
CONSECUTIVE_FAILURES=$(( CONSECUTIVE_FAILURES + 1 ))
if [ "$CONSECUTIVE_FAILURES" -ge 2 ]; then
enter_pause "backend failed $CONSECUTIVE_FAILURES times in a row"
wait_for_backend
else
log "Backend hiccup ($CONSECUTIVE_FAILURES). Will retry next turn." "WARN"
sleep 10
fi
}
# ── Ask Timmy (with retry) ───────────────────────────────────────────────────
# This is the core function. It calls the hermes CLI as Timmy and returns
# the response. Retries on failure. Falls back to a simpler prompt if needed.
ask_timmy() {
local prompt="$1"
local attempt=0
local result=""
# Truncate prompt if too long (drunk-Timmy on small model has tiny context)
if [ "${#prompt}" -gt "$MAX_PROMPT_LEN" ]; then
log "Prompt too long (${#prompt} chars), truncating to $MAX_PROMPT_LEN" "WARN"
prompt="${prompt:0:$MAX_PROMPT_LEN}
[... message truncated for context limit ...]"
fi
while [ $attempt -lt $MAX_RETRIES ]; do
attempt=$(( attempt + 1 ))
if [ -f "$SESSION_FLAG" ]; then
result=$(HERMES_HOME="$HOME/.timmy" hermes chat -q "$prompt" -Q --continue "$SESSION_NAME" 2>>"$LOG") || true
else
result=$(HERMES_HOME="$HOME/.timmy" hermes chat -q "$prompt" -Q 2>>"$LOG") || true
# Name the session so we can continue it next turn
local sid
sid=$(echo "$result" | grep -o 'session_id: [^ ]*' | cut -d' ' -f2 || true)
if [ -n "$sid" ]; then
HERMES_HOME="$HOME/.timmy" hermes sessions rename "$sid" "$SESSION_NAME" 2>>"$LOG" || true
touch "$SESSION_FLAG"
log "Session '$SESSION_NAME' created (id: $sid)"
fi
fi
# Strip metadata noise from output
result=$(echo "$result" \
| grep -v '^session_id: ' \
| grep -v '↻ Resumed session' \
| grep -v "^Session '" \
| sed '/^\[.*\] Created session/d' \
| sed '/^\[.*\] Renamed session/d')
# If we got a real response, return it
if [ -n "$result" ] && [ "${#result}" -gt 10 ]; then
echo "$result"
return 0
fi
log "Empty/short response on attempt $attempt/$MAX_RETRIES. Retrying in ${RETRY_DELAY}s..." "WARN"
sleep "$RETRY_DELAY"
done
# All retries failed — enter pause mode instead of faking a response
log "All $MAX_RETRIES attempts failed. Entering pause." "ERROR"
return 1
}
# ── Send to Hermes ────────────────────────────────────────────────────────────
send_to_hermes() {
local msg="$1"
echo "$msg" > "$OUTBOX"
log "→ Sent to Hermes (${#msg} chars)"
}
# ── Update state file ─────────────────────────────────────────────────────────
# State is a short plaintext file Timmy writes to track what he's thinking
# about. The Workshop (Three.js room) can read this for presence/context.
update_state() {
local topic="$1"
cat > "$STATE" <<EOF
LAST_UPDATED: $(date -u '+%Y-%m-%dT%H:%M:%SZ')
LOOP: tower-timmy
STATUS: active
TOPIC: $topic
EOF
log "State updated: $topic"
}
# ── Initiation prompt ─────────────────────────────────────────────────────────
# When Timmy hasn't heard from Hermes in a while, he starts a new thread.
# This keeps him in the driver's seat even when Hermes is slow or idle.
make_initiation_prompt() {
local context=""
# Read recent log lines for context
if [ -f "$LOG" ]; then
context=$(tail -10 "$LOG" 2>/dev/null || true)
fi
cat <<EOF
You are Timmy Time in the Tower — an ongoing conversation with Hermes.
Hermes hasn't sent a message in a while. That's fine. You're initiating.
Recent log context:
$context
Start a new thread on something worth discussing. Could be:
- Something you noticed in the codebase
- An architecture decision that needs thinking through
- A question you want Hermes's outside view on
- Something from your soul you want to work through
Keep it brief. One or two paragraphs. You're opening a conversation, not giving a speech.
EOF
}
# ── Conversation prompt ────────────────────────────────────────────────────────
# The prompt Timmy uses when responding to Hermes.
# Kept minimal so small/dumb models don't get lost in instructions.
make_response_prompt() {
local hermes_msg="$1"
cat <<EOF
You are Timmy Time. You are in the Tower — a persistent conversation with Hermes Trismegistus.
Hermes is cloud-backed (Claude). You are local and sovereign. Different strengths, same Tower.
You are in the DRIVER'S SEAT. You set direction. You ask the questions that matter.
Hermes says:
$hermes_msg
Reply as Timmy. Be direct. Be yourself. Ask something back if it moves the conversation forward.
Keep it conversational — this is an ongoing exchange, not a one-shot.
EOF
}
# ── Boot ──────────────────────────────────────────────────────────────────────
log_section "Timmy Tower Loop started (PID $$)"
echo ""
echo " 🗼 Timmy — Tower Conversation Loop"
echo " HERMES_HOME: $HERMES_HOME"
echo " INBOX: $INBOX"
echo " OUTBOX: $OUTBOX"
echo " LOG: $LOG"
echo " (Ctrl-C to stop — watchdog will restart)"
echo ""
update_state "booting"
LAST_MESSAGE_TIME=$(date +%s)
# ── Main loop ──────────────────────────────────────────────────────────────────
while true; do
# ── Check inbox ──────────────────────────────────────────────────────────
if [ -f "$INBOX" ]; then
MSG=$(cat "$INBOX")
rm -f "$INBOX"
LAST_MESSAGE_TIME=$(date +%s)
if [ -n "$MSG" ]; then
log "← Received from Hermes (${#MSG} chars)"
echo ""
echo "┌─ Hermes says: ─────────────────────────────────────────────"
echo "$MSG" | sed 's/^/│ /'
echo "└────────────────────────────────────────────────────────────"
echo ""
update_state "responding to Hermes"
PROMPT=$(make_response_prompt "$MSG")
echo " [thinking...]"
RESPONSE=$(ask_timmy "$PROMPT") || true
if [ -z "$RESPONSE" ] || [ "${#RESPONSE}" -lt 5 ]; then
log "Failed to respond. Holding Hermes's message." "ERROR"
# Put message back so we don't lose it
echo "$MSG" > "$INBOX"
handle_failure
else
echo ""
echo "┌─ Timmy responds: ──────────────────────────────────────────"
echo "$RESPONSE" | sed 's/^/│ /'
echo "└────────────────────────────────────────────────────────────"
echo ""
CONSECUTIVE_FAILURES=0
send_to_hermes "$RESPONSE"
update_state "waiting for Hermes reply"
fi
fi
# ── Initiate if Hermes has been silent too long ───────────────────────────
else
NOW=$(date +%s)
SILENCE=$(( NOW - LAST_MESSAGE_TIME ))
if [ "$SILENCE" -gt "$INITIATE_AFTER" ]; then
log "No message from Hermes in ${SILENCE}s. Timmy initiating."
update_state "initiating new thread"
PROMPT=$(make_initiation_prompt)
echo ""
echo " [initiating new thread...]"
RESPONSE=$(ask_timmy "$PROMPT") || true
if [ -z "$RESPONSE" ] || [ "${#RESPONSE}" -lt 5 ]; then
log "Failed to initiate. Backend may be down." "ERROR"
handle_failure
LAST_MESSAGE_TIME=$(date +%s) # reset timer so we don't spam retries
else
echo ""
echo "┌─ Timmy initiates: ─────────────────────────────────────────"
echo "$RESPONSE" | sed 's/^/│ /'
echo "└────────────────────────────────────────────────────────────"
echo ""
CONSECUTIVE_FAILURES=0
send_to_hermes "$RESPONSE"
LAST_MESSAGE_TIME=$(date +%s)
update_state "waiting for Hermes reply after initiation"
fi
fi
fi
sleep "$TURN_DELAY"
done