- tower-hermes.sh: health check, pause file, exponential backoff, message held on failure - tower-timmy.sh: same pattern for Ollama backend, messages held not lost - timmy-loop.sh: pause after 2 consecutive cycle failures, backoff probe - tower-watchdog.sh: respect .paused files, don't restart paused loops Behavior: 1 failure → log warning, retry next turn 2 failures → enter pause, write .paused file, probe with backoff Backend returns → auto-resume, clear pause file Watchdog sees .paused → skip that loop, don't fight the pause Messages from the other side → held in inbox, not lost
214 lines
8.3 KiB
Bash
Executable File
214 lines
8.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# ── Tower: Hermes Side ─────────────────────────────────────────────────
|
|
# Hermes reads Timmy's messages and responds. Runs in a loop.
|
|
# Communication via ~/.tower/timmy-to-hermes.msg and hermes-to-timmy.msg
|
|
# ───────────────────────────────────────────────────────────────────────
|
|
|
|
set -uo pipefail
|
|
|
|
TOWER_DIR="$HOME/.tower"
|
|
INBOX="$TOWER_DIR/timmy-to-hermes.msg"
|
|
OUTBOX="$TOWER_DIR/hermes-to-timmy.msg"
|
|
LOCK="$TOWER_DIR/hermes.lock"
|
|
PAUSE_FILE="$TOWER_DIR/hermes.paused"
|
|
SESSION_NAME="tower-hermes"
|
|
SESSION_FLAG="$TOWER_DIR/.hermes-session-exists"
|
|
LOG="$TOWER_DIR/hermes.log"
|
|
|
|
TURN_DELAY=5 # seconds between inbox checks when healthy
|
|
HEALTH_CHECK_INTERVAL=30 # seconds between health probes while paused
|
|
MAX_BACKOFF=300 # max pause between retries (5 min)
|
|
CONSECUTIVE_FAILURES=0 # tracks sequential backend failures
|
|
|
|
mkdir -p "$TOWER_DIR"
|
|
|
|
# Cleanup on exit
|
|
cleanup() {
|
|
rm -f "$LOCK"
|
|
rm -f "$PAUSE_FILE"
|
|
log "Tower-hermes exiting (PID $$)"
|
|
}
|
|
trap cleanup EXIT
|
|
trap 'log "Caught SIGTERM"; exit 0' TERM
|
|
trap 'log "Caught SIGINT"; exit 0' INT
|
|
|
|
# Prevent double-run
|
|
if [ -f "$LOCK" ] && kill -0 "$(cat "$LOCK")" 2>/dev/null; then
|
|
echo "Hermes tower loop already running (PID $(cat "$LOCK"))"
|
|
exit 1
|
|
fi
|
|
echo $$ > "$LOCK"
|
|
|
|
log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOG"; }
|
|
|
|
# ── Backend health check ─────────────────────────────────────────────
|
|
# Quick test: can we reach the backend and get a response?
|
|
check_backend() {
|
|
local result
|
|
result=$(hermes chat -q "ping" -Q 2>/dev/null) || true
|
|
if [ -n "$result" ] && [ "${#result}" -gt 2 ]; then
|
|
return 0 # healthy
|
|
fi
|
|
return 1 # down
|
|
}
|
|
|
|
# ── Pause / Resume ───────────────────────────────────────────────────
|
|
enter_pause() {
|
|
local reason="${1:-backend unreachable}"
|
|
echo "$reason (since $(date '+%H:%M:%S'))" > "$PAUSE_FILE"
|
|
log "⏸ PAUSED: $reason"
|
|
echo ""
|
|
echo " ⏸ Hermes PAUSED — $reason"
|
|
echo " Will probe every ${HEALTH_CHECK_INTERVAL}s until backend returns."
|
|
echo ""
|
|
}
|
|
|
|
leave_pause() {
|
|
rm -f "$PAUSE_FILE"
|
|
CONSECUTIVE_FAILURES=0
|
|
log "▶ RESUMED: backend is back"
|
|
echo ""
|
|
echo " ▶ Hermes RESUMED — backend healthy"
|
|
echo ""
|
|
}
|
|
|
|
# ── Wait for backend with exponential backoff ─────────────────────────
|
|
wait_for_backend() {
|
|
local wait_time=$HEALTH_CHECK_INTERVAL
|
|
while true; do
|
|
sleep "$wait_time"
|
|
log "Probing backend..."
|
|
if check_backend; then
|
|
leave_pause
|
|
return 0
|
|
fi
|
|
log "Backend still down. Next probe in ${wait_time}s"
|
|
# Exponential backoff up to MAX_BACKOFF
|
|
wait_time=$(( wait_time * 2 ))
|
|
if [ "$wait_time" -gt "$MAX_BACKOFF" ]; then
|
|
wait_time=$MAX_BACKOFF
|
|
fi
|
|
done
|
|
}
|
|
|
|
# ── Handle backend failure ────────────────────────────────────────────
|
|
handle_failure() {
|
|
CONSECUTIVE_FAILURES=$(( CONSECUTIVE_FAILURES + 1 ))
|
|
if [ "$CONSECUTIVE_FAILURES" -ge 2 ]; then
|
|
enter_pause "backend failed $CONSECUTIVE_FAILURES times in a row"
|
|
wait_for_backend
|
|
else
|
|
log "Backend hiccup ($CONSECUTIVE_FAILURES). Will retry next turn."
|
|
sleep 10
|
|
fi
|
|
}
|
|
|
|
# ── Send a message to Timmy ───────────────────────────────────────────
|
|
send() {
|
|
local msg="$1"
|
|
echo "$msg" > "$OUTBOX"
|
|
log "→ Sent to Timmy (${#msg} chars)"
|
|
}
|
|
|
|
# ── Get response from Hermes agent ────────────────────────────────────
|
|
# Returns response on stdout. Sets ASK_FAILED=1 if backend is unreachable.
|
|
ASK_FAILED=0
|
|
ask_hermes() {
|
|
local prompt="$1"
|
|
local result
|
|
ASK_FAILED=0
|
|
|
|
if [ -f "$SESSION_FLAG" ]; then
|
|
result=$(hermes chat -q "$prompt" -Q --continue "$SESSION_NAME" 2>>"$LOG") || true
|
|
else
|
|
result=$(hermes chat -q "$prompt" -Q 2>>"$LOG") || true
|
|
# Name the session for future --continue calls
|
|
local sid
|
|
sid=$(echo "$result" | grep -o 'session_id: [^ ]*' | cut -d' ' -f2)
|
|
if [ -n "$sid" ]; then
|
|
hermes sessions rename "$sid" "$SESSION_NAME" 2>>"$LOG" || true
|
|
touch "$SESSION_FLAG"
|
|
log "Created session '$SESSION_NAME' (id: $sid)"
|
|
fi
|
|
fi
|
|
|
|
# Strip metadata lines from output
|
|
result=$(echo "$result" | grep -v '^session_id: ' | grep -v '↻ Resumed session' | grep -v "^Session '" | sed '/^\[.*\] Created session/d')
|
|
|
|
# Check if we got a real response
|
|
if [ -z "$result" ] || [ "${#result}" -lt 5 ]; then
|
|
ASK_FAILED=1
|
|
return 1
|
|
fi
|
|
|
|
CONSECUTIVE_FAILURES=0
|
|
echo "$result"
|
|
}
|
|
|
|
# ── Boot message ──────────────────────────────────────────────────────
|
|
log "=== Hermes Tower Loop started ==="
|
|
echo ""
|
|
echo " ⚡ Hermes — Tower Conversation Loop"
|
|
echo " Waiting for Timmy to say something..."
|
|
echo " (or seeding first message)"
|
|
echo ""
|
|
|
|
# ── Seed the conversation if no messages exist yet ────────────────────
|
|
if [ ! -f "$INBOX" ] && [ ! -f "$OUTBOX" ]; then
|
|
log "No prior messages. Seeding conversation."
|
|
RESPONSE=$(ask_hermes "You are in the Tower — a persistent tmux session where you and Timmy talk continuously. Timmy is a sovereign local AI agent running on Ollama (soon Hermes 4.3). You are Hermes, cloud-backed, running on Claude. You're friends and colleagues. Say hello to Timmy. Keep it brief — a few sentences. This is the start of an ongoing conversation, not a one-shot.") || true
|
|
if [ "$ASK_FAILED" -eq 1 ]; then
|
|
log "Backend down at seed time. Waiting for it to come up."
|
|
enter_pause "backend unreachable at startup"
|
|
wait_for_backend
|
|
# Retry seed after backend comes back
|
|
RESPONSE=$(ask_hermes "You are in the Tower — a persistent tmux session where you and Timmy talk continuously. Timmy is a sovereign local AI agent running on Ollama. You are Hermes, cloud-backed, running on Claude. You're friends and colleagues. Say hello to Timmy. Keep it brief.") || true
|
|
fi
|
|
if [ "$ASK_FAILED" -eq 0 ] && [ -n "$RESPONSE" ]; then
|
|
send "$RESPONSE"
|
|
log "Seed message sent. Waiting for Timmy to respond..."
|
|
fi
|
|
fi
|
|
|
|
# ── Main loop ─────────────────────────────────────────────────────────
|
|
while true; do
|
|
# Check if there's a message from Timmy
|
|
if [ -f "$INBOX" ]; then
|
|
MSG=$(cat "$INBOX")
|
|
rm -f "$INBOX"
|
|
|
|
if [ -n "$MSG" ]; then
|
|
log "← Received from Timmy (${#MSG} chars)"
|
|
echo ""
|
|
echo "┌─ Timmy says:"
|
|
echo "$MSG" | sed 's/^/│ /'
|
|
echo "└─"
|
|
echo ""
|
|
|
|
# Ask Hermes to respond
|
|
PROMPT="Timmy says: $MSG
|
|
|
|
Reply to Timmy naturally. You're in an ongoing conversation in the Tower (a persistent tmux session). Keep it conversational — you're colleagues and friends. Be yourself (Hermes). Don't be formal or stiff. Brief responses unless the topic warrants depth."
|
|
|
|
echo " thinking..."
|
|
RESPONSE=$(ask_hermes "$PROMPT") || true
|
|
|
|
if [ "$ASK_FAILED" -eq 1 ]; then
|
|
log "Failed to get response for Timmy's message. Holding it."
|
|
# Put the message back so we don't lose it
|
|
echo "$MSG" > "$INBOX"
|
|
handle_failure
|
|
else
|
|
echo "┌─ Hermes responds:"
|
|
echo "$RESPONSE" | sed 's/^/│ /'
|
|
echo "└─"
|
|
|
|
# Send response to Timmy
|
|
send "$RESPONSE"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
sleep "$TURN_DELAY"
|
|
done
|