feat: workspace isolation + honest success metrics (#186)

## Workspace Isolation No agent touches ~/Timmy-Time-dashboard anymore. Each agent gets a fully isolated clone under /tmp/timmy-agents/ with its own port, data directory, and TIMMY_HOME. - scripts/agent_workspace.sh: init, reset, branch, destroy per agent - Loop prompt updated: workspace paths replace worktree paths - Smoke tests run in isolated /tmp/timmy-agents/smoke/repo ## Honest Success Metrics Cycle success now requires BOTH hermes clean exit AND main green (smoke test passes). Tracks main_green_rate separately from hermes_clean_rate in summary.json. Follows from PR #162 (triage + retro system). Co-authored-by: Kimi Agent <kimi@timmy.local> Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/186 Co-authored-by: hermes <hermes@timmy.local> Co-committed-by: hermes <hermes@timmy.local>
2026-03-15 12:25:27 -04:00
parent f6d74b9f1d
commit 56b08658b7
2 changed files with 272 additions and 3 deletions
--- a/scripts/agent_workspace.sh
+++ b/scripts/agent_workspace.sh
@@ -0,0 +1,245 @@
+#!/usr/bin/env bash
+# ── Agent Workspace Manager ────────────────────────────────────────────
+# Creates and maintains fully isolated environments per agent.
+# ~/Timmy-Time-dashboard is SACRED — never touched by agents.
+#
+# Each agent gets:
+#   - Its own git clone (from Gitea, not the local repo)
+#   - Its own port range (no collisions)
+#   - Its own data/ directory (databases, files)
+#   - Its own TIMMY_HOME (approvals.db, etc.)
+#   - Shared Ollama backend (single GPU, shared inference)
+#   - Shared Gitea (single source of truth for issues/PRs)
+#
+# Layout:
+#   /tmp/timmy-agents/
+#     hermes/           — Hermes loop orchestrator
+#       repo/           — git clone
+#       home/           — TIMMY_HOME (approvals.db, etc.)
+#       env.sh          — source this for agent's env vars
+#     kimi-0/           — Kimi pane 0
+#       repo/
+#       home/
+#       env.sh
+#     ...
+#     smoke/            — dedicated for smoke-testing main
+#       repo/
+#       home/
+#       env.sh
+#
+# Usage:
+#   agent_workspace.sh init <agent>          — create or refresh
+#   agent_workspace.sh reset <agent>         — hard reset to origin/main
+#   agent_workspace.sh branch <agent> <br>   — fresh branch from main
+#   agent_workspace.sh path <agent>          — print repo path
+#   agent_workspace.sh env <agent>           — print env.sh path
+#   agent_workspace.sh init-all              — init all workspaces
+#   agent_workspace.sh destroy <agent>       — remove workspace entirely
+# ───────────────────────────────────────────────────────────────────────
+
+set -o pipefail
+
+CANONICAL="$HOME/Timmy-Time-dashboard"
+AGENTS_DIR="/tmp/timmy-agents"
+GITEA_REMOTE="http://localhost:3000/rockachopa/Timmy-time-dashboard.git"
+TOKEN_FILE="$HOME/.hermes/gitea_token"
+
+# ── Port allocation (each agent gets a unique range) ──────────────────
+# Dashboard ports: 8100, 8101, 8102, ... (avoids real dashboard on 8000)
+# Serve ports:     8200, 8201, 8202, ...
+agent_index() {
+    case "$1" in
+        hermes) echo 0 ;; kimi-0) echo 1 ;; kimi-1) echo 2 ;;
+        kimi-2) echo 3 ;; kimi-3) echo 4 ;; smoke)  echo 9 ;;
+        *) echo 0 ;;
+    esac
+}
+
+get_dashboard_port() { echo $(( 8100 + $(agent_index "$1") )); }
+get_serve_port()     { echo $(( 8200 + $(agent_index "$1") )); }
+
+log() { echo "[workspace] $*"; }
+
+# ── Get authenticated remote URL ──────────────────────────────────────
+get_remote_url() {
+    if [ -f "$TOKEN_FILE" ]; then
+        local token=""
+        token=$(cat "$TOKEN_FILE" 2>/dev/null || true)
+        if [ -n "$token" ]; then
+            echo "http://hermes:${token}@localhost:3000/rockachopa/Timmy-time-dashboard.git"
+            return
+        fi
+    fi
+    echo "$GITEA_REMOTE"
+}
+
+# ── Create env.sh for an agent ────────────────────────────────────────
+write_env() {
+    local agent="$1"
+    local ws="$AGENTS_DIR/$agent"
+    local repo="$ws/repo"
+    local home="$ws/home"
+    local dash_port=$(get_dashboard_port "$agent")
+    local serve_port=$(get_serve_port "$agent")
+
+    cat > "$ws/env.sh" << EOF
+# Auto-generated agent environment — source this before running Timmy
+# Agent: $agent
+
+export TIMMY_WORKSPACE="$repo"
+export TIMMY_HOME="$home"
+export TIMMY_AGENT_NAME="$agent"
+
+# Ports (isolated per agent)
+export PORT=$dash_port
+export TIMMY_SERVE_PORT=$serve_port
+
+# Ollama (shared — single GPU)
+export OLLAMA_URL="http://localhost:11434"
+
+# Gitea (shared — single source of truth)
+export GITEA_URL="http://localhost:3000"
+
+# Test mode defaults
+export TIMMY_TEST_MODE=1
+export TIMMY_DISABLE_CSRF=1
+export TIMMY_SKIP_EMBEDDINGS=1
+
+# Override data paths to stay inside the clone
+export TIMMY_DATA_DIR="$repo/data"
+export TIMMY_BRAIN_DB="$repo/data/brain.db"
+
+# Working directory
+cd "$repo"
+EOF
+
+    chmod +x "$ws/env.sh"
+}
+
+# ── Init ──────────────────────────────────────────────────────────────
+init_workspace() {
+    local agent="$1"
+    local ws="$AGENTS_DIR/$agent"
+    local repo="$ws/repo"
+    local home="$ws/home"
+    local remote
+    remote=$(get_remote_url)
+
+    mkdir -p "$ws" "$home"
+
+    if [ -d "$repo/.git" ]; then
+        log "$agent: refreshing existing clone..."
+        cd "$repo"
+        git remote set-url origin "$remote" 2>/dev/null
+        git fetch origin --prune --quiet 2>/dev/null
+        git checkout main --quiet 2>/dev/null
+        git reset --hard origin/main --quiet 2>/dev/null
+        git clean -fdx -e data/ --quiet 2>/dev/null
+    else
+        log "$agent: cloning from Gitea..."
+        git clone "$remote" "$repo" --quiet 2>/dev/null
+        cd "$repo"
+        git fetch origin --prune --quiet 2>/dev/null
+    fi
+
+    # Ensure data directory exists
+    mkdir -p "$repo/data"
+
+    # Write env file
+    write_env "$agent"
+
+    log "$agent: ready at $repo (port $(get_dashboard_port "$agent"))"
+}
+
+# ── Reset ─────────────────────────────────────────────────────────────
+reset_workspace() {
+    local agent="$1"
+    local repo="$AGENTS_DIR/$agent/repo"
+
+    if [ ! -d "$repo/.git" ]; then
+        init_workspace "$agent"
+        return
+    fi
+
+    cd "$repo"
+    git merge --abort 2>/dev/null || true
+    git rebase --abort 2>/dev/null || true
+    git cherry-pick --abort 2>/dev/null || true
+    git fetch origin --prune --quiet 2>/dev/null
+    git checkout main --quiet 2>/dev/null
+    git reset --hard origin/main --quiet 2>/dev/null
+    git clean -fdx -e data/ --quiet 2>/dev/null
+
+    log "$agent: reset to origin/main"
+}
+
+# ── Branch ────────────────────────────────────────────────────────────
+branch_workspace() {
+    local agent="$1"
+    local branch="$2"
+    local repo="$AGENTS_DIR/$agent/repo"
+
+    if [ ! -d "$repo/.git" ]; then
+        init_workspace "$agent"
+    fi
+
+    cd "$repo"
+    git fetch origin --prune --quiet 2>/dev/null
+    git branch -D "$branch" 2>/dev/null || true
+    git checkout -b "$branch" origin/main --quiet 2>/dev/null
+
+    log "$agent: on branch $branch (from origin/main)"
+}
+
+# ── Path ──────────────────────────────────────────────────────────────
+print_path() {
+    echo "$AGENTS_DIR/$1/repo"
+}
+
+print_env() {
+    echo "$AGENTS_DIR/$1/env.sh"
+}
+
+# ── Init all ──────────────────────────────────────────────────────────
+init_all() {
+    for agent in hermes kimi-0 kimi-1 kimi-2 kimi-3 smoke; do
+        init_workspace "$agent"
+    done
+    log "All workspaces initialized."
+    echo ""
+    echo "  Agent     Port   Path"
+    echo "  ──────    ────   ────"
+    for agent in hermes kimi-0 kimi-1 kimi-2 kimi-3 smoke; do
+        printf "  %-9s %d   %s\n" "$agent" "$(get_dashboard_port "$agent")" "$AGENTS_DIR/$agent/repo"
+    done
+}
+
+# ── Destroy ───────────────────────────────────────────────────────────
+destroy_workspace() {
+    local agent="$1"
+    local ws="$AGENTS_DIR/$agent"
+    if [ -d "$ws" ]; then
+        rm -rf "$ws"
+        log "$agent: destroyed"
+    else
+        log "$agent: nothing to destroy"
+    fi
+}
+
+# ── CLI dispatch ──────────────────────────────────────────────────────
+case "${1:-help}" in
+    init)     init_workspace "${2:?Usage: $0 init <agent>}" ;;
+    reset)    reset_workspace "${2:?Usage: $0 reset <agent>}" ;;
+    branch)   branch_workspace "${2:?Usage: $0 branch <agent> <branch>}" \
+                               "${3:?Usage: $0 branch <agent> <branch>}" ;;
+    path)     print_path "${2:?Usage: $0 path <agent>}" ;;
+    env)      print_env "${2:?Usage: $0 env <agent>}" ;;
+    init-all) init_all ;;
+    destroy)  destroy_workspace "${2:?Usage: $0 destroy <agent>}" ;;
+    *)
+        echo "Usage: $0 {init|reset|branch|path|env|init-all|destroy} [agent] [branch]"
+        echo ""
+        echo "Agents: hermes, kimi-0, kimi-1, kimi-2, kimi-3, smoke"
+        exit 1
+        ;;
+esac
--- a/scripts/cycle_retro.py
+++ b/scripts/cycle_retro.py
@@ -4,14 +4,25 @@
 Called after each cycle completes (success or failure).
 Appends a structured entry to .loop/retro/cycles.jsonl.

+SUCCESS DEFINITION:
+  A cycle is only "success" if BOTH conditions are met:
+    1. The hermes process exited cleanly (exit code 0)
+    2. Main is green (smoke test passes on main after merge)
+  
+  A cycle that merges a PR but leaves main red is a FAILURE.
+  The --main-green flag records the smoke test result.
+
 Usage:
-  python3 scripts/cycle_retro.py --cycle 42 --success --issue 85 \
+  python3 scripts/cycle_retro.py --cycle 42 --success --main-green --issue 85 \
      --type bug --duration 480 --tests-passed 1450 --tests-added 3 \
      --files-changed 2 --lines-added 45 --lines-removed 12 \
      --kimi-panes 2 --pr 155

  python3 scripts/cycle_retro.py --cycle 43 --failure --issue 90 \
      --type feature --duration 1200 --reason "tox failed: 3 errors"
+
+  python3 scripts/cycle_retro.py --cycle 44 --success --no-main-green \
+      --reason "PR merged but tests fail on main"
 """

 from __future__ import annotations
@@ -51,6 +62,10 @@ def parse_args() -> argparse.Namespace:
    p.add_argument("--pr", type=int, default=None, help="PR number if merged")
    p.add_argument("--reason", type=str, default="", help="Failure reason")
    p.add_argument("--notes", type=str, default="", help="Free-form observations")
+    p.add_argument("--main-green", action="store_true", default=False,
+                   help="Smoke test passed on main after this cycle")
+    p.add_argument("--no-main-green", dest="main_green", action="store_false",
+                   help="Smoke test failed or was not run")

    return p.parse_args()

@@ -73,6 +88,8 @@ def update_summary() -> None:

    successes = [e for e in recent if e.get("success")]
    failures = [e for e in recent if not e.get("success")]
+    main_green_count = sum(1 for e in recent if e.get("main_green"))
+    hermes_clean_count = sum(1 for e in recent if e.get("hermes_clean", e.get("success")))
    durations = [e["duration"] for e in recent if e.get("duration", 0) > 0]

    # Per-type stats
@@ -103,6 +120,8 @@ def update_summary() -> None:
        "window": len(recent),
        "total_cycles": len(entries),
        "success_rate": round(len(successes) / len(recent), 2) if recent else 0,
+        "main_green_rate": round(main_green_count / len(recent), 2) if recent else 0,
+        "hermes_clean_rate": round(hermes_clean_count / len(recent), 2) if recent else 0,
        "avg_duration_seconds": round(sum(durations) / len(durations)) if durations else 0,
        "total_lines_added": sum(e.get("lines_added", 0) for e in recent),
        "total_lines_removed": sum(e.get("lines_removed", 0) for e in recent),
@@ -121,12 +140,17 @@ def update_summary() -> None:
 def main() -> None:
    args = parse_args()

+    # A cycle is only truly successful if hermes exited clean AND main is green
+    truly_success = args.success and args.main_green
+
    entry = {
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "cycle": args.cycle,
        "issue": args.issue,
        "type": args.type,
-        "success": args.success,
+        "success": truly_success,
+        "hermes_clean": args.success,
+        "main_green": args.main_green,
        "duration": args.duration,
        "tests_passed": args.tests_passed,
        "tests_added": args.tests_added,
@@ -135,7 +159,7 @@ def main() -> None:
        "lines_removed": args.lines_removed,
        "kimi_panes": args.kimi_panes,
        "pr": args.pr,
-        "reason": args.reason if args.failure else "",
+        "reason": args.reason if (args.failure or not args.main_green) else "",
        "notes": args.notes,
    }