feat: workspace isolation + honest success metrics (#186)
Some checks failed
Tests / lint (push) Successful in 3s
Tests / test (push) Has been cancelled

## Workspace Isolation

No agent touches ~/Timmy-Time-dashboard anymore. Each agent gets a fully isolated clone under /tmp/timmy-agents/ with its own port, data directory, and TIMMY_HOME.

- scripts/agent_workspace.sh: init, reset, branch, destroy per agent
- Loop prompt updated: workspace paths replace worktree paths
- Smoke tests run in isolated /tmp/timmy-agents/smoke/repo

## Honest Success Metrics

Cycle success now requires BOTH hermes clean exit AND main green (smoke test passes). Tracks main_green_rate separately from hermes_clean_rate in summary.json.

Follows from PR #162 (triage + retro system).

Co-authored-by: Kimi Agent <kimi@timmy.local>
Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/186
Co-authored-by: hermes <hermes@timmy.local>
Co-committed-by: hermes <hermes@timmy.local>
This commit was merged in pull request #186.
This commit is contained in:
2026-03-15 12:25:27 -04:00
committed by rockachopa
parent f6d74b9f1d
commit 56b08658b7
2 changed files with 272 additions and 3 deletions

245
scripts/agent_workspace.sh Normal file
View File

@@ -0,0 +1,245 @@
#!/usr/bin/env bash
# ── Agent Workspace Manager ────────────────────────────────────────────
# Creates and maintains fully isolated environments per agent.
# ~/Timmy-Time-dashboard is SACRED — never touched by agents.
#
# Each agent gets:
# - Its own git clone (from Gitea, not the local repo)
# - Its own port range (no collisions)
# - Its own data/ directory (databases, files)
# - Its own TIMMY_HOME (approvals.db, etc.)
# - Shared Ollama backend (single GPU, shared inference)
# - Shared Gitea (single source of truth for issues/PRs)
#
# Layout:
# /tmp/timmy-agents/
# hermes/ — Hermes loop orchestrator
# repo/ — git clone
# home/ — TIMMY_HOME (approvals.db, etc.)
# env.sh — source this for agent's env vars
# kimi-0/ — Kimi pane 0
# repo/
# home/
# env.sh
# ...
# smoke/ — dedicated for smoke-testing main
# repo/
# home/
# env.sh
#
# Usage:
# agent_workspace.sh init <agent> — create or refresh
# agent_workspace.sh reset <agent> — hard reset to origin/main
# agent_workspace.sh branch <agent> <br> — fresh branch from main
# agent_workspace.sh path <agent> — print repo path
# agent_workspace.sh env <agent> — print env.sh path
# agent_workspace.sh init-all — init all workspaces
# agent_workspace.sh destroy <agent> — remove workspace entirely
# ───────────────────────────────────────────────────────────────────────
set -o pipefail
CANONICAL="$HOME/Timmy-Time-dashboard"
AGENTS_DIR="/tmp/timmy-agents"
GITEA_REMOTE="http://localhost:3000/rockachopa/Timmy-time-dashboard.git"
TOKEN_FILE="$HOME/.hermes/gitea_token"
# ── Port allocation (each agent gets a unique range) ──────────────────
# Dashboard ports: 8100, 8101, 8102, ... (avoids real dashboard on 8000)
# Serve ports: 8200, 8201, 8202, ...
agent_index() {
case "$1" in
hermes) echo 0 ;; kimi-0) echo 1 ;; kimi-1) echo 2 ;;
kimi-2) echo 3 ;; kimi-3) echo 4 ;; smoke) echo 9 ;;
*) echo 0 ;;
esac
}
get_dashboard_port() { echo $(( 8100 + $(agent_index "$1") )); }
get_serve_port() { echo $(( 8200 + $(agent_index "$1") )); }
log() { echo "[workspace] $*"; }
# ── Get authenticated remote URL ──────────────────────────────────────
get_remote_url() {
if [ -f "$TOKEN_FILE" ]; then
local token=""
token=$(cat "$TOKEN_FILE" 2>/dev/null || true)
if [ -n "$token" ]; then
echo "http://hermes:${token}@localhost:3000/rockachopa/Timmy-time-dashboard.git"
return
fi
fi
echo "$GITEA_REMOTE"
}
# ── Create env.sh for an agent ────────────────────────────────────────
write_env() {
local agent="$1"
local ws="$AGENTS_DIR/$agent"
local repo="$ws/repo"
local home="$ws/home"
local dash_port=$(get_dashboard_port "$agent")
local serve_port=$(get_serve_port "$agent")
cat > "$ws/env.sh" << EOF
# Auto-generated agent environment — source this before running Timmy
# Agent: $agent
export TIMMY_WORKSPACE="$repo"
export TIMMY_HOME="$home"
export TIMMY_AGENT_NAME="$agent"
# Ports (isolated per agent)
export PORT=$dash_port
export TIMMY_SERVE_PORT=$serve_port
# Ollama (shared — single GPU)
export OLLAMA_URL="http://localhost:11434"
# Gitea (shared — single source of truth)
export GITEA_URL="http://localhost:3000"
# Test mode defaults
export TIMMY_TEST_MODE=1
export TIMMY_DISABLE_CSRF=1
export TIMMY_SKIP_EMBEDDINGS=1
# Override data paths to stay inside the clone
export TIMMY_DATA_DIR="$repo/data"
export TIMMY_BRAIN_DB="$repo/data/brain.db"
# Working directory
cd "$repo"
EOF
chmod +x "$ws/env.sh"
}
# ── Init ──────────────────────────────────────────────────────────────
init_workspace() {
local agent="$1"
local ws="$AGENTS_DIR/$agent"
local repo="$ws/repo"
local home="$ws/home"
local remote
remote=$(get_remote_url)
mkdir -p "$ws" "$home"
if [ -d "$repo/.git" ]; then
log "$agent: refreshing existing clone..."
cd "$repo"
git remote set-url origin "$remote" 2>/dev/null
git fetch origin --prune --quiet 2>/dev/null
git checkout main --quiet 2>/dev/null
git reset --hard origin/main --quiet 2>/dev/null
git clean -fdx -e data/ --quiet 2>/dev/null
else
log "$agent: cloning from Gitea..."
git clone "$remote" "$repo" --quiet 2>/dev/null
cd "$repo"
git fetch origin --prune --quiet 2>/dev/null
fi
# Ensure data directory exists
mkdir -p "$repo/data"
# Write env file
write_env "$agent"
log "$agent: ready at $repo (port $(get_dashboard_port "$agent"))"
}
# ── Reset ─────────────────────────────────────────────────────────────
reset_workspace() {
local agent="$1"
local repo="$AGENTS_DIR/$agent/repo"
if [ ! -d "$repo/.git" ]; then
init_workspace "$agent"
return
fi
cd "$repo"
git merge --abort 2>/dev/null || true
git rebase --abort 2>/dev/null || true
git cherry-pick --abort 2>/dev/null || true
git fetch origin --prune --quiet 2>/dev/null
git checkout main --quiet 2>/dev/null
git reset --hard origin/main --quiet 2>/dev/null
git clean -fdx -e data/ --quiet 2>/dev/null
log "$agent: reset to origin/main"
}
# ── Branch ────────────────────────────────────────────────────────────
branch_workspace() {
local agent="$1"
local branch="$2"
local repo="$AGENTS_DIR/$agent/repo"
if [ ! -d "$repo/.git" ]; then
init_workspace "$agent"
fi
cd "$repo"
git fetch origin --prune --quiet 2>/dev/null
git branch -D "$branch" 2>/dev/null || true
git checkout -b "$branch" origin/main --quiet 2>/dev/null
log "$agent: on branch $branch (from origin/main)"
}
# ── Path ──────────────────────────────────────────────────────────────
print_path() {
echo "$AGENTS_DIR/$1/repo"
}
print_env() {
echo "$AGENTS_DIR/$1/env.sh"
}
# ── Init all ──────────────────────────────────────────────────────────
init_all() {
for agent in hermes kimi-0 kimi-1 kimi-2 kimi-3 smoke; do
init_workspace "$agent"
done
log "All workspaces initialized."
echo ""
echo " Agent Port Path"
echo " ────── ──── ────"
for agent in hermes kimi-0 kimi-1 kimi-2 kimi-3 smoke; do
printf " %-9s %d %s\n" "$agent" "$(get_dashboard_port "$agent")" "$AGENTS_DIR/$agent/repo"
done
}
# ── Destroy ───────────────────────────────────────────────────────────
destroy_workspace() {
local agent="$1"
local ws="$AGENTS_DIR/$agent"
if [ -d "$ws" ]; then
rm -rf "$ws"
log "$agent: destroyed"
else
log "$agent: nothing to destroy"
fi
}
# ── CLI dispatch ──────────────────────────────────────────────────────
case "${1:-help}" in
init) init_workspace "${2:?Usage: $0 init <agent>}" ;;
reset) reset_workspace "${2:?Usage: $0 reset <agent>}" ;;
branch) branch_workspace "${2:?Usage: $0 branch <agent> <branch>}" \
"${3:?Usage: $0 branch <agent> <branch>}" ;;
path) print_path "${2:?Usage: $0 path <agent>}" ;;
env) print_env "${2:?Usage: $0 env <agent>}" ;;
init-all) init_all ;;
destroy) destroy_workspace "${2:?Usage: $0 destroy <agent>}" ;;
*)
echo "Usage: $0 {init|reset|branch|path|env|init-all|destroy} [agent] [branch]"
echo ""
echo "Agents: hermes, kimi-0, kimi-1, kimi-2, kimi-3, smoke"
exit 1
;;
esac

View File

@@ -4,14 +4,25 @@
Called after each cycle completes (success or failure).
Appends a structured entry to .loop/retro/cycles.jsonl.
SUCCESS DEFINITION:
A cycle is only "success" if BOTH conditions are met:
1. The hermes process exited cleanly (exit code 0)
2. Main is green (smoke test passes on main after merge)
A cycle that merges a PR but leaves main red is a FAILURE.
The --main-green flag records the smoke test result.
Usage:
python3 scripts/cycle_retro.py --cycle 42 --success --issue 85 \
python3 scripts/cycle_retro.py --cycle 42 --success --main-green --issue 85 \
--type bug --duration 480 --tests-passed 1450 --tests-added 3 \
--files-changed 2 --lines-added 45 --lines-removed 12 \
--kimi-panes 2 --pr 155
python3 scripts/cycle_retro.py --cycle 43 --failure --issue 90 \
--type feature --duration 1200 --reason "tox failed: 3 errors"
python3 scripts/cycle_retro.py --cycle 44 --success --no-main-green \
--reason "PR merged but tests fail on main"
"""
from __future__ import annotations
@@ -51,6 +62,10 @@ def parse_args() -> argparse.Namespace:
p.add_argument("--pr", type=int, default=None, help="PR number if merged")
p.add_argument("--reason", type=str, default="", help="Failure reason")
p.add_argument("--notes", type=str, default="", help="Free-form observations")
p.add_argument("--main-green", action="store_true", default=False,
help="Smoke test passed on main after this cycle")
p.add_argument("--no-main-green", dest="main_green", action="store_false",
help="Smoke test failed or was not run")
return p.parse_args()
@@ -73,6 +88,8 @@ def update_summary() -> None:
successes = [e for e in recent if e.get("success")]
failures = [e for e in recent if not e.get("success")]
main_green_count = sum(1 for e in recent if e.get("main_green"))
hermes_clean_count = sum(1 for e in recent if e.get("hermes_clean", e.get("success")))
durations = [e["duration"] for e in recent if e.get("duration", 0) > 0]
# Per-type stats
@@ -103,6 +120,8 @@ def update_summary() -> None:
"window": len(recent),
"total_cycles": len(entries),
"success_rate": round(len(successes) / len(recent), 2) if recent else 0,
"main_green_rate": round(main_green_count / len(recent), 2) if recent else 0,
"hermes_clean_rate": round(hermes_clean_count / len(recent), 2) if recent else 0,
"avg_duration_seconds": round(sum(durations) / len(durations)) if durations else 0,
"total_lines_added": sum(e.get("lines_added", 0) for e in recent),
"total_lines_removed": sum(e.get("lines_removed", 0) for e in recent),
@@ -121,12 +140,17 @@ def update_summary() -> None:
def main() -> None:
args = parse_args()
# A cycle is only truly successful if hermes exited clean AND main is green
truly_success = args.success and args.main_green
entry = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"cycle": args.cycle,
"issue": args.issue,
"type": args.type,
"success": args.success,
"success": truly_success,
"hermes_clean": args.success,
"main_green": args.main_green,
"duration": args.duration,
"tests_passed": args.tests_passed,
"tests_added": args.tests_added,
@@ -135,7 +159,7 @@ def main() -> None:
"lines_removed": args.lines_removed,
"kimi_panes": args.kimi_panes,
"pr": args.pr,
"reason": args.reason if args.failure else "",
"reason": args.reason if (args.failure or not args.main_green) else "",
"notes": args.notes,
}