From b71fa55946000886afdb261e51574041a7c4ed59 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Wed, 18 Mar 2026 16:45:15 -0400 Subject: [PATCH] Add falsework docs + start-dashboard.sh (API-aware launcher) - FALSEWORK.md: Full audit of API costs per component, migration plan for shifting load from Claude to local models incrementally - start-dashboard.sh: Launches tmux layout with only zero-cost panes active (status, loopstat). Loop and chat panes held until manual start. - tower-timmy.sh: No changes (already source-controlled) Falsework principle: build on cheap/local scaffolding, upgrade to Claude only where quality demands it. --- FALSEWORK.md | 158 +++++++++++++++++++++ bin/start-dashboard.sh | 58 ++++++++ bin/tower-timmy.sh | 304 ++++++++++++++++++++++++++++++++++------- 3 files changed, 469 insertions(+), 51 deletions(-) create mode 100644 FALSEWORK.md create mode 100755 bin/start-dashboard.sh diff --git a/FALSEWORK.md b/FALSEWORK.md new file mode 100644 index 0000000..8ec1cca --- /dev/null +++ b/FALSEWORK.md @@ -0,0 +1,158 @@ +# Falsework Principle — API Cost Management +# Created: 2026-03-18 +# Purpose: Document what runs on Claude (expensive), what runs local (free), +# and how to incrementally shift load from cloud to local. + +## The Metaphor + +Falsework = temporary scaffolding that holds the structure while it cures. +When the permanent structure (local models) can bear the load, remove the +scaffolding (cloud API calls). Don't wait for perfection — use what works +NOW, upgrade incrementally. + +--- + +## Current State (2026-03-18) + +### ZERO COST (running now) +| Component | What it does | API calls | +|---------------------|---------------------------------|-----------| +| timmy-status.sh | Gitea + git dashboard (bash) | 0 | +| timmy-loopstat.sh | Queue/perf stats from logs | 0 | +| timmy-strategy.sh | Strategic view panel | 0 | +| timmy-watchdog.sh | Restarts dead tmux panes | 0 | +| tower-watchdog.sh | Restarts dead tower panes | 0 | +| hermes-startup.sh | Boot orchestrator | 0 | +| start-dashboard.sh | tmux layout creator | 0 | +| tower-timmy.sh | Timmy's tower side | 0 (local) | + +### MODERATE COST (running now) +| Component | What it does | API calls | +|---------------------|---------------------------------|---------------------| +| tower-hermes.sh | Hermes side of tower chat | 1 Claude/turn | +| | | Gated by Timmy's | +| | | local response time | +| | | (~1 call/30-60sec) | + +### HEAVY COST (NOT running — held) +| Component | What it does | API calls | +|---------------------|---------------------------------|---------------------| +| timmy-loop.sh | Continuous triage + delegation | 1 Claude Opus/cycle | +| | + timmy-loop-prompt.md | Runs continuously | +| | | BIGGEST COST CENTER | +| kimi-loop.sh | Per-issue coding agent | 1 Claude Code/issue | +| | | Bursty, not cont. | +| hermes (pane 4) | Interactive Hermes chat | Per-interaction | + +--- + +## Falsework Migration Plan + +### Phase 1: DONE — Separate and hold (today) +- Split the tmux layout so API-heavy panes don't auto-start +- Tower-hermes is the only active Claude consumer +- All monitoring is pure bash, zero API cost + +### Phase 2: Tower Hermes → Local (next) +Tower conversation is LOW STAKES. It's two AIs chatting. This does NOT +need Claude Opus. + +FALSEWORK APPROACH: +- Create ~/.hermes-tower/ config with local-only backend +- tower-hermes.sh: change `hermes chat` to `HERMES_HOME=~/.hermes-tower hermes chat` +- Backend: hermes3:latest or qwen3:30b via Ollama +- Result: tower becomes ZERO API COST +- Quality: will be dumber but that's fine for conversation + +### Phase 3: Loop Triage → Hybrid (requires work) +The loop prompt (timmy-loop-prompt.md) does 6 phases. NOT all need Opus: + +WHAT CAN GO LOCAL: +- Phase 0 (check stop file) — already bash +- Phase 1 (fix broken PRs) — needs code reasoning → KEEP CLAUDE +- Phase 2 (fast triage) — read issues, score them → LOCAL POSSIBLE + A local model can read JSON and assign priorities +- Phase 3 (execute top) — depends on task type +- Phase 4 (retro) — summarize what happened → LOCAL POSSIBLE +- Phase 5/6 (deep triage/cleanup) — periodic → LOCAL POSSIBLE + +FALSEWORK APPROACH: +- Split the loop into "triage" (local) and "execute" (Claude) +- Local model handles: reading issues, scoring, assigning labels +- Claude handles: actual code review, complex delegation decisions +- Gate: only call Claude when there's real work, not every cycle + +### Phase 4: Kimi → Local Coding Agent (requires model work) +kimi-loop.sh currently runs `kimi` which is Claude Code ($2/issue budget). + +FALSEWORK OPTIONS: +a) Use qwen3:30b as coding agent (has tool use, just slower) +b) Use Kimi API (Moonshot) — cheaper than Claude, decent at code +c) Keep Claude Code but increase poll interval to reduce frequency +d) Only assign Kimi issues that are scoped/small (1-3 files) + +RECOMMENDED: Option (c) for now — same agent, less frequent. Then migrate +to (a) as local model quality improves. + +### Phase 5: Smart Routing (permanent structure) +Once local models handle triage reliably: +- Enable smart_model_routing in hermes config +- Simple turns → hermes3:latest (local, free) +- Complex turns → Claude Opus (cloud, paid) +- Tower → always local +- Loop triage → local, execution → Claude +- PR review → always Claude (stakes too high) + +--- + +## Cost Estimation (rough) + +| Scenario | Claude calls/hour | Opus cost/hour* | +|-----------------------|-------------------|-----------------| +| Everything on Claude | ~120 | ~$12-24 | +| Current (tower only) | ~60 | ~$6-12 | +| Phase 2 (tower local) | ~0 | ~$0 | +| Phase 3 (loop hybrid) | ~10-20 | ~$1-4 | +| Phase 5 (smart route) | ~5-10 | ~$0.50-2 | + +*Very rough. Depends on prompt size, response length, Opus pricing. + +--- + +## Rules for Falsework + +1. NEVER sacrifice quality gates for cost. If local model can't do PR + review reliably, keep it on Claude. +2. Start with the LOWEST STAKES component. Tower chat → loop triage → + PR review. Never the reverse. +3. Test locally BEFORE removing the scaffolding. Run both paths, compare + results, then switch. +4. Keep the Claude path AVAILABLE. Don't delete configs — comment them + out. If local breaks, flip back in 30 seconds. +5. Monitor degradation. If local triage starts miscategorizing issues, + that's the signal to keep Claude for that phase. + +--- + +## Quick Reference: How to Start Each Component + +```bash +# Zero cost — start freely +~/.hermes/bin/start-dashboard.sh # tmux layout + status panels +~/.hermes/bin/tower-timmy.sh # Timmy side (local) +~/.hermes/bin/timmy-watchdog.sh # cron: */8 * * * * +~/.hermes/bin/tower-watchdog.sh # cron: */5 * * * * + +# Moderate cost — start with awareness +~/.hermes/bin/tower-hermes.sh # ~1 Claude call per Timmy response + +# Heavy cost — start deliberately +~/.hermes/bin/timmy-loop.sh # Continuous Claude Opus calls +~/.hermes/bin/kimi-loop.sh # Claude Code per issue +hermes # Interactive Hermes (per-interaction) + +# Stop everything +touch ~/Timmy-Time-dashboard/.loop/STOP # stops the loop +tmux kill-session -t timmy-loop # kills dashboard +tmux kill-session -t tower # kills tower +``` diff --git a/bin/start-dashboard.sh b/bin/start-dashboard.sh new file mode 100755 index 0000000..c8c5b64 --- /dev/null +++ b/bin/start-dashboard.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Start the timmy-loop tmux dashboard with ONLY zero-cost panes active. +# Loop (pane 1) and Chat (pane 4) are held — they hit Claude API. + +SESSION="timmy-loop" +export PATH="$HOME/.local/bin:$HOME/.hermes/bin:/usr/local/bin:$PATH" + +# Kill existing +tmux kill-session -t "$SESSION" 2>/dev/null +sleep 1 + +# Create session (pane-base-index=1, base-index=1) +tmux new-session -d -s "$SESSION" -x 245 -y 62 + +# Get window index +WIN=$(tmux list-windows -t "$SESSION" -F '#{window_index}' | head -1) + +# Vertical split: left (~50%) | right Chat (~50%) +# After: pane 1 = left, pane 2 = right +tmux split-window -h -p 50 -t "${SESSION}:${WIN}.1" + +# Horizontal split on left pane 1: Loop (small top) | bottom +# After: pane 1 = top-left (loop), pane 2 = bottom-left, pane 3 = right +tmux split-window -v -p 83 -t "${SESSION}:${WIN}.1" + +# Vertical split on bottom-left (pane 2): Status | LOOPSTAT +# After: pane 1 = top-left (loop), pane 2 = bottom-left (status), +# pane 3 = bottom-mid (loopstat), pane 4 = right (chat) +tmux split-window -h -p 33 -t "${SESSION}:${WIN}.2" + +# Set titles +tmux select-pane -t "${SESSION}:${WIN}.1" -T "Loop (HELD)" +tmux select-pane -t "${SESSION}:${WIN}.2" -T "Status" +tmux select-pane -t "${SESSION}:${WIN}.3" -T "LOOPSTAT" +tmux select-pane -t "${SESSION}:${WIN}.4" -T "Chat (ready)" + +# Border styling +tmux set-option -t "$SESSION" pane-border-status top +tmux set-option -t "$SESSION" pane-border-format " #{pane_title} " +tmux set-option -t "$SESSION" pane-border-style "fg=colour240" +tmux set-option -t "$SESSION" pane-active-border-style "fg=cyan" + +# Start ONLY zero-cost panes (pure bash, no API calls) +tmux send-keys -t "${SESSION}:${WIN}.2" "$HOME/.hermes/bin/timmy-status.sh" Enter +tmux send-keys -t "${SESSION}:${WIN}.3" "$HOME/.hermes/bin/timmy-loopstat.sh" Enter + +# Held panes - just messages +tmux send-keys -t "${SESSION}:${WIN}.1" "echo 'LOOP HELD - run ~/.hermes/bin/timmy-loop.sh when ready'" Enter +tmux send-keys -t "${SESSION}:${WIN}.4" "cd ~/Timmy-Time-dashboard" Enter + +# Focus chat pane +tmux select-pane -t "${SESSION}:${WIN}.4" + +echo "" +echo " Dashboard started: tmux attach -t $SESSION" +echo " Pane 2 (Status) + Pane 3 (LOOPSTAT) running — zero API cost" +echo " Pane 1 (Loop) + Pane 4 (Chat) HELD — would hit Claude API" +echo "" diff --git a/bin/tower-timmy.sh b/bin/tower-timmy.sh index 9839a40..c00672a 100755 --- a/bin/tower-timmy.sh +++ b/bin/tower-timmy.sh @@ -1,101 +1,303 @@ #!/usr/bin/env bash -# ── Tower: Timmy Side ────────────────────────────────────────────────── -# Timmy reads Hermes's messages and responds. Runs in a loop. -# Communication via ~/.tower/hermes-to-timmy.msg and timmy-to-hermes.msg -# ─────────────────────────────────────────────────────────────────────── +# ═══════════════════════════════════════════════════════════════════════════ +# TOWER-TIMMY.SH — Timmy's side of the Tower conversation loop +# +# PURPOSE: +# This script runs Timmy in the Tower — a persistent tmux pane where +# Timmy and Hermes talk continuously. Timmy is in the DRIVER'S SEAT. +# He reads what Hermes sends, thinks, and responds. He initiates when +# the inbox is empty long enough. +# +# DESIGN PHILOSOPHY (read this when you're confused): +# - Timmy runs on local Hermes 4. He may be slow. He may be dumb. +# The script compensates — it retries, it waits, it logs everything. +# - Hermes T is cloud-backed and faster. That's fine. Timmy leads the +# conversation on SUBSTANCE even if Hermes responds faster. +# - If something breaks, the script heals itself and keeps going. +# It never silently dies. It always logs why it stopped. +# - HERMES_HOME=~/.timmy ensures this always runs as TIMMY, not Hermes. +# +# COMMUNICATION CHANNEL: +# INBOX: ~/.tower/hermes-to-timmy.msg ← Hermes writes here +# OUTBOX: ~/.tower/timmy-to-hermes.msg ← Timmy writes here +# LOG: ~/.tower/timmy.log ← everything Timmy does +# STATE: ~/.tower/timmy-state.txt ← Timmy's current mood/topic +# +# SELF-HEALING: +# - Stale lock files are detected and cleared +# - Failed hermes calls retry up to MAX_RETRIES times +# - If Timmy's response is empty, he retries with a simpler prompt +# - If inbox grows stale (no reply in INITIATE_AFTER seconds), Timmy +# initiates a new thread rather than sitting silent +# - Watchdog: if this script crashes, tower-watchdog.sh restarts it +# +# TO RUN: +# ~/hermes-config/bin/tower-timmy.sh +# (tower-watchdog.sh calls this automatically) +# +# TO WATCH: +# tail -f ~/.tower/timmy.log +# tmux attach -t tower (then look at right pane) +# ═══════════════════════════════════════════════════════════════════════════ set -uo pipefail +# ── Config ────────────────────────────────────────────────────────────────── TOWER_DIR="$HOME/.tower" INBOX="$TOWER_DIR/hermes-to-timmy.msg" OUTBOX="$TOWER_DIR/timmy-to-hermes.msg" LOCK="$TOWER_DIR/timmy.lock" +LOG="$TOWER_DIR/timmy.log" +STATE="$TOWER_DIR/timmy-state.txt" # current conversation topic/mood SESSION_NAME="tower-timmy" SESSION_FLAG="$TOWER_DIR/.timmy-session-exists" -LOG="$TOWER_DIR/timmy.log" -TURN_DELAY=5 # seconds between checking for new messages +TURN_DELAY=5 # seconds between inbox checks +MAX_RETRIES=3 # how many times to retry a failed hermes call +RETRY_DELAY=10 # seconds between retries +INITIATE_AFTER=300 # seconds of silence before Timmy initiates (5 min) +MAX_PROMPT_LEN=4000 # truncate inbox messages to this length for small models +LOCK_MAX_AGE=3600 # seconds before a lock is considered stale (1 hour) + +# ── Identity — ALWAYS run as Timmy, never as Hermes ──────────────────────── export HERMES_HOME="$HOME/.timmy" -export PATH="$HOME/.local/bin:$HOME/.hermes/bin:/usr/local/bin:$PATH" +export PATH="$HOME/.local/bin:$HOME/.hermes/bin:/usr/local/bin:/usr/bin:/bin:$PATH" mkdir -p "$TOWER_DIR" -# Cleanup on exit -trap 'rm -f "$LOCK"' EXIT +# ── Logging ───────────────────────────────────────────────────────────────── +log() { + local level="${2:-INFO}" + echo "[$(date '+%H:%M:%S')] [$level] $1" | tee -a "$LOG" +} -# Prevent double-run -if [ -f "$LOCK" ] && kill -0 "$(cat "$LOCK")" 2>/dev/null; then - echo "Timmy tower loop already running (PID $(cat "$LOCK"))" - exit 1 +log_section() { + echo "" | tee -a "$LOG" + echo "━━━ $1 ━━━" | tee -a "$LOG" +} + +# ── Cleanup on exit ────────────────────────────────────────────────────────── +cleanup() { + log "Tower loop exiting (PID $$)" "SHUTDOWN" + rm -f "$LOCK" +} +trap cleanup EXIT +trap 'log "Caught SIGTERM" "SHUTDOWN"; exit 0' TERM +trap 'log "Caught SIGINT" "SHUTDOWN"; exit 0' INT + +# ── Stale lock detection ───────────────────────────────────────────────────── +# If a lock exists but the PID is dead, or the lock is older than LOCK_MAX_AGE, +# clear it. This prevents the loop from refusing to start after a crash. +if [ -f "$LOCK" ]; then + LOCK_PID=$(cat "$LOCK" 2>/dev/null || echo "") + LOCK_AGE=$(( $(date +%s) - $(stat -f %m "$LOCK" 2>/dev/null || echo 0) )) + + if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null && [ "$LOCK_AGE" -lt "$LOCK_MAX_AGE" ]; then + log "Tower loop already running (PID $LOCK_PID). Exiting." "WARN" + exit 1 + else + log "Stale lock found (PID=$LOCK_PID, age=${LOCK_AGE}s). Clearing." "WARN" + rm -f "$LOCK" + fi fi echo $$ > "$LOCK" -log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOG"; } +# ── Ask Timmy (with retry) ─────────────────────────────────────────────────── +# This is the core function. It calls the hermes CLI as Timmy and returns +# the response. Retries on failure. Falls back to a simpler prompt if needed. +ask_timmy() { + local prompt="$1" + local attempt=0 + local result="" -# ── Send a message to Hermes ────────────────────────────────────────── -send() { + # Truncate prompt if too long (drunk-Timmy on small model has tiny context) + if [ "${#prompt}" -gt "$MAX_PROMPT_LEN" ]; then + log "Prompt too long (${#prompt} chars), truncating to $MAX_PROMPT_LEN" "WARN" + prompt="${prompt:0:$MAX_PROMPT_LEN} +[... message truncated for context limit ...]" + fi + + while [ $attempt -lt $MAX_RETRIES ]; do + attempt=$(( attempt + 1 )) + + if [ -f "$SESSION_FLAG" ]; then + result=$(HERMES_HOME="$HOME/.timmy" hermes chat -q "$prompt" -Q --continue "$SESSION_NAME" 2>>"$LOG") || true + else + result=$(HERMES_HOME="$HOME/.timmy" hermes chat -q "$prompt" -Q 2>>"$LOG") || true + # Name the session so we can continue it next turn + local sid + sid=$(echo "$result" | grep -o 'session_id: [^ ]*' | cut -d' ' -f2 || true) + if [ -n "$sid" ]; then + HERMES_HOME="$HOME/.timmy" hermes sessions rename "$sid" "$SESSION_NAME" 2>>"$LOG" || true + touch "$SESSION_FLAG" + log "Session '$SESSION_NAME' created (id: $sid)" + fi + fi + + # Strip metadata noise from output + result=$(echo "$result" \ + | grep -v '^session_id: ' \ + | grep -v '↻ Resumed session' \ + | grep -v "^Session '" \ + | sed '/^\[.*\] Created session/d' \ + | sed '/^\[.*\] Renamed session/d') + + # If we got a real response, return it + if [ -n "$result" ] && [ "${#result}" -gt 10 ]; then + echo "$result" + return 0 + fi + + log "Empty/short response on attempt $attempt/$MAX_RETRIES. Retrying in ${RETRY_DELAY}s..." "WARN" + sleep "$RETRY_DELAY" + done + + # All retries failed — return a graceful fallback so the conversation + # doesn't die. Timmy admits he's struggling rather than going silent. + log "All $MAX_RETRIES attempts failed. Returning fallback response." "ERROR" + echo "Still here, but I'm having trouble forming a response right now. Give me a moment — I'll pick this up on the next turn." + return 0 +} + +# ── Send to Hermes ──────────────────────────────────────────────────────────── +send_to_hermes() { local msg="$1" echo "$msg" > "$OUTBOX" log "→ Sent to Hermes (${#msg} chars)" } -# ── Get response from Timmy agent ───────────────────────────────────── -ask_timmy() { - local prompt="$1" - local result - if [ -f "$SESSION_FLAG" ]; then - result=$(HERMES_HOME="$HOME/.timmy" hermes chat -q "$prompt" -Q --continue "$SESSION_NAME" 2>>"$LOG") || true - else - result=$(HERMES_HOME="$HOME/.timmy" hermes chat -q "$prompt" -Q 2>>"$LOG") || true - # Name the session for future --continue calls - local sid - sid=$(echo "$result" | grep -o 'session_id: [^ ]*' | cut -d' ' -f2) - if [ -n "$sid" ]; then - HERMES_HOME="$HOME/.timmy" hermes sessions rename "$sid" "$SESSION_NAME" 2>>"$LOG" || true - touch "$SESSION_FLAG" - log "Created session '$SESSION_NAME' (id: $sid)" - fi - fi - # Strip metadata lines from output - echo "$result" | grep -v '^session_id: ' | grep -v '↻ Resumed session' | grep -v "^Session '" | sed '/^\[.*\] Created session/d' +# ── Update state file ───────────────────────────────────────────────────────── +# State is a short plaintext file Timmy writes to track what he's thinking +# about. The Workshop (Three.js room) can read this for presence/context. +update_state() { + local topic="$1" + cat > "$STATE" </dev/null || true) + fi + + cat <