Files
hermes-config/bin/tower-hermes.sh
Alexander Whitestone 00b0dc8e38 graceful pause on backend failure for all loop scripts
- tower-hermes.sh: health check, pause file, exponential backoff, message held on failure
- tower-timmy.sh: same pattern for Ollama backend, messages held not lost
- timmy-loop.sh: pause after 2 consecutive cycle failures, backoff probe
- tower-watchdog.sh: respect .paused files, don't restart paused loops

Behavior:
  1 failure  → log warning, retry next turn
  2 failures → enter pause, write .paused file, probe with backoff
  Backend returns → auto-resume, clear pause file
  Watchdog sees .paused → skip that loop, don't fight the pause
  Messages from the other side → held in inbox, not lost
2026-03-18 20:08:57 -04:00

214 lines
8.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# ── Tower: Hermes Side ─────────────────────────────────────────────────
# Hermes reads Timmy's messages and responds. Runs in a loop.
# Communication via ~/.tower/timmy-to-hermes.msg and hermes-to-timmy.msg
# ───────────────────────────────────────────────────────────────────────
set -uo pipefail
TOWER_DIR="$HOME/.tower"
INBOX="$TOWER_DIR/timmy-to-hermes.msg"
OUTBOX="$TOWER_DIR/hermes-to-timmy.msg"
LOCK="$TOWER_DIR/hermes.lock"
PAUSE_FILE="$TOWER_DIR/hermes.paused"
SESSION_NAME="tower-hermes"
SESSION_FLAG="$TOWER_DIR/.hermes-session-exists"
LOG="$TOWER_DIR/hermes.log"
TURN_DELAY=5 # seconds between inbox checks when healthy
HEALTH_CHECK_INTERVAL=30 # seconds between health probes while paused
MAX_BACKOFF=300 # max pause between retries (5 min)
CONSECUTIVE_FAILURES=0 # tracks sequential backend failures
mkdir -p "$TOWER_DIR"
# Cleanup on exit
cleanup() {
rm -f "$LOCK"
rm -f "$PAUSE_FILE"
log "Tower-hermes exiting (PID $$)"
}
trap cleanup EXIT
trap 'log "Caught SIGTERM"; exit 0' TERM
trap 'log "Caught SIGINT"; exit 0' INT
# Prevent double-run
if [ -f "$LOCK" ] && kill -0 "$(cat "$LOCK")" 2>/dev/null; then
echo "Hermes tower loop already running (PID $(cat "$LOCK"))"
exit 1
fi
echo $$ > "$LOCK"
log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOG"; }
# ── Backend health check ─────────────────────────────────────────────
# Quick test: can we reach the backend and get a response?
check_backend() {
local result
result=$(hermes chat -q "ping" -Q 2>/dev/null) || true
if [ -n "$result" ] && [ "${#result}" -gt 2 ]; then
return 0 # healthy
fi
return 1 # down
}
# ── Pause / Resume ───────────────────────────────────────────────────
enter_pause() {
local reason="${1:-backend unreachable}"
echo "$reason (since $(date '+%H:%M:%S'))" > "$PAUSE_FILE"
log "⏸ PAUSED: $reason"
echo ""
echo " ⏸ Hermes PAUSED — $reason"
echo " Will probe every ${HEALTH_CHECK_INTERVAL}s until backend returns."
echo ""
}
leave_pause() {
rm -f "$PAUSE_FILE"
CONSECUTIVE_FAILURES=0
log "▶ RESUMED: backend is back"
echo ""
echo " ▶ Hermes RESUMED — backend healthy"
echo ""
}
# ── Wait for backend with exponential backoff ─────────────────────────
wait_for_backend() {
local wait_time=$HEALTH_CHECK_INTERVAL
while true; do
sleep "$wait_time"
log "Probing backend..."
if check_backend; then
leave_pause
return 0
fi
log "Backend still down. Next probe in ${wait_time}s"
# Exponential backoff up to MAX_BACKOFF
wait_time=$(( wait_time * 2 ))
if [ "$wait_time" -gt "$MAX_BACKOFF" ]; then
wait_time=$MAX_BACKOFF
fi
done
}
# ── Handle backend failure ────────────────────────────────────────────
handle_failure() {
CONSECUTIVE_FAILURES=$(( CONSECUTIVE_FAILURES + 1 ))
if [ "$CONSECUTIVE_FAILURES" -ge 2 ]; then
enter_pause "backend failed $CONSECUTIVE_FAILURES times in a row"
wait_for_backend
else
log "Backend hiccup ($CONSECUTIVE_FAILURES). Will retry next turn."
sleep 10
fi
}
# ── Send a message to Timmy ───────────────────────────────────────────
send() {
local msg="$1"
echo "$msg" > "$OUTBOX"
log "→ Sent to Timmy (${#msg} chars)"
}
# ── Get response from Hermes agent ────────────────────────────────────
# Returns response on stdout. Sets ASK_FAILED=1 if backend is unreachable.
ASK_FAILED=0
ask_hermes() {
local prompt="$1"
local result
ASK_FAILED=0
if [ -f "$SESSION_FLAG" ]; then
result=$(hermes chat -q "$prompt" -Q --continue "$SESSION_NAME" 2>>"$LOG") || true
else
result=$(hermes chat -q "$prompt" -Q 2>>"$LOG") || true
# Name the session for future --continue calls
local sid
sid=$(echo "$result" | grep -o 'session_id: [^ ]*' | cut -d' ' -f2)
if [ -n "$sid" ]; then
hermes sessions rename "$sid" "$SESSION_NAME" 2>>"$LOG" || true
touch "$SESSION_FLAG"
log "Created session '$SESSION_NAME' (id: $sid)"
fi
fi
# Strip metadata lines from output
result=$(echo "$result" | grep -v '^session_id: ' | grep -v '↻ Resumed session' | grep -v "^Session '" | sed '/^\[.*\] Created session/d')
# Check if we got a real response
if [ -z "$result" ] || [ "${#result}" -lt 5 ]; then
ASK_FAILED=1
return 1
fi
CONSECUTIVE_FAILURES=0
echo "$result"
}
# ── Boot message ──────────────────────────────────────────────────────
log "=== Hermes Tower Loop started ==="
echo ""
echo " ⚡ Hermes — Tower Conversation Loop"
echo " Waiting for Timmy to say something..."
echo " (or seeding first message)"
echo ""
# ── Seed the conversation if no messages exist yet ────────────────────
if [ ! -f "$INBOX" ] && [ ! -f "$OUTBOX" ]; then
log "No prior messages. Seeding conversation."
RESPONSE=$(ask_hermes "You are in the Tower — a persistent tmux session where you and Timmy talk continuously. Timmy is a sovereign local AI agent running on Ollama (soon Hermes 4.3). You are Hermes, cloud-backed, running on Claude. You're friends and colleagues. Say hello to Timmy. Keep it brief — a few sentences. This is the start of an ongoing conversation, not a one-shot.") || true
if [ "$ASK_FAILED" -eq 1 ]; then
log "Backend down at seed time. Waiting for it to come up."
enter_pause "backend unreachable at startup"
wait_for_backend
# Retry seed after backend comes back
RESPONSE=$(ask_hermes "You are in the Tower — a persistent tmux session where you and Timmy talk continuously. Timmy is a sovereign local AI agent running on Ollama. You are Hermes, cloud-backed, running on Claude. You're friends and colleagues. Say hello to Timmy. Keep it brief.") || true
fi
if [ "$ASK_FAILED" -eq 0 ] && [ -n "$RESPONSE" ]; then
send "$RESPONSE"
log "Seed message sent. Waiting for Timmy to respond..."
fi
fi
# ── Main loop ─────────────────────────────────────────────────────────
while true; do
# Check if there's a message from Timmy
if [ -f "$INBOX" ]; then
MSG=$(cat "$INBOX")
rm -f "$INBOX"
if [ -n "$MSG" ]; then
log "← Received from Timmy (${#MSG} chars)"
echo ""
echo "┌─ Timmy says:"
echo "$MSG" | sed 's/^/│ /'
echo "└─"
echo ""
# Ask Hermes to respond
PROMPT="Timmy says: $MSG
Reply to Timmy naturally. You're in an ongoing conversation in the Tower (a persistent tmux session). Keep it conversational — you're colleagues and friends. Be yourself (Hermes). Don't be formal or stiff. Brief responses unless the topic warrants depth."
echo " thinking..."
RESPONSE=$(ask_hermes "$PROMPT") || true
if [ "$ASK_FAILED" -eq 1 ]; then
log "Failed to get response for Timmy's message. Holding it."
# Put the message back so we don't lose it
echo "$MSG" > "$INBOX"
handle_failure
else
echo "┌─ Hermes responds:"
echo "$RESPONSE" | sed 's/^/│ /'
echo "└─"
# Send response to Timmy
send "$RESPONSE"
fi
fi
fi
sleep "$TURN_DELAY"
done