Files
timmy-config/bin/gemini-loop.sh

525 lines
18 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# gemini-loop.sh — Parallel Gemini Code agent dispatch loop
# Runs N workers concurrently against the Gitea backlog.
# Dynamic scaling: starts at N, scales up to MAX, drops on rate limits.
#
# Usage: gemini-loop.sh [NUM_WORKERS] (default: 2)
set -euo pipefail
export GEMINI_API_KEY="AIzaSyAmGgS516K4PwlODFEnghL535yzoLnofKM"
# === CONFIG ===
NUM_WORKERS="${1:-2}"
MAX_WORKERS=5
WORKTREE_BASE="$HOME/worktrees"
GITEA_URL="http://143.198.27.163:3000"
GITEA_TOKEN=$(cat "$HOME/.hermes/gemini_token")
GEMINI_TIMEOUT=600 # 10 min per issue
COOLDOWN=15 # seconds between issues — stagger clones
RATE_LIMIT_SLEEP=30
MAX_RATE_SLEEP=120
LOG_DIR="$HOME/.hermes/logs"
SKIP_FILE="$LOG_DIR/gemini-skip-list.json"
LOCK_DIR="$LOG_DIR/gemini-locks"
ACTIVE_FILE="$LOG_DIR/gemini-active.json"
ALLOW_SELF_ASSIGN="${ALLOW_SELF_ASSIGN:-0}" # 0 = only explicitly-assigned Gemini work
mkdir -p "$LOG_DIR" "$WORKTREE_BASE" "$LOCK_DIR"
[ -f "$SKIP_FILE" ] || echo '{}' > "$SKIP_FILE"
echo '{}' > "$ACTIVE_FILE"
# === SHARED FUNCTIONS ===
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$LOG_DIR/gemini-loop.log"
}
lock_issue() {
local issue_key="$1"
local lockfile="$LOCK_DIR/$issue_key.lock"
if mkdir "$lockfile" 2>/dev/null; then
echo $$ > "$lockfile/pid"
return 0
fi
return 1
}
unlock_issue() {
rm -rf "$LOCK_DIR/$1.lock" 2>/dev/null
}
mark_skip() {
local issue_num="$1" reason="$2" skip_hours="${3:-1}"
python3 -c "
import json, time, fcntl
with open('$SKIP_FILE', 'r+') as f:
fcntl.flock(f, fcntl.LOCK_EX)
try: skips = json.load(f)
except: skips = {}
skips[str($issue_num)] = {
'until': time.time() + ($skip_hours * 3600),
'reason': '$reason',
'failures': skips.get(str($issue_num), {}).get('failures', 0) + 1
}
if skips[str($issue_num)]['failures'] >= 3:
skips[str($issue_num)]['until'] = time.time() + (6 * 3600)
f.seek(0)
f.truncate()
json.dump(skips, f, indent=2)
" 2>/dev/null
log "SKIP: #${issue_num}${reason}"
}
update_active() {
local worker="$1" issue="$2" repo="$3" status="$4"
python3 -c "
import json, fcntl
with open('$ACTIVE_FILE', 'r+') as f:
fcntl.flock(f, fcntl.LOCK_EX)
try: active = json.load(f)
except: active = {}
if '$status' == 'done':
active.pop('$worker', None)
else:
active['$worker'] = {'issue': '$issue', 'repo': '$repo', 'status': '$status'}
f.seek(0)
f.truncate()
json.dump(active, f, indent=2)
" 2>/dev/null
}
cleanup_workdir() {
local wt="$1"
rm -rf "$wt" 2>/dev/null || true
}
get_next_issue() {
python3 -c "
import json, sys, time, urllib.request, os
token = '${GITEA_TOKEN}'
base = '${GITEA_URL}'
repos = [
'Timmy_Foundation/the-nexus',
'Timmy_Foundation/timmy-home',
'Timmy_Foundation/timmy-config',
'Timmy_Foundation/hermes-agent',
]
allow_self_assign = int('${ALLOW_SELF_ASSIGN}')
try:
with open('${SKIP_FILE}') as f: skips = json.load(f)
except: skips = {}
try:
with open('${ACTIVE_FILE}') as f:
active = json.load(f)
active_issues = {v['issue'] for v in active.values()}
except:
active_issues = set()
all_issues = []
for repo in repos:
url = f'{base}/api/v1/repos/{repo}/issues?state=open&type=issues&limit=50&sort=created'
req = urllib.request.Request(url, headers={'Authorization': f'token {token}'})
try:
resp = urllib.request.urlopen(req, timeout=10)
issues = json.loads(resp.read())
for i in issues:
i['_repo'] = repo
all_issues.extend(issues)
except:
continue
def priority(i):
t = i['title'].lower()
if '[urgent]' in t or 'urgent:' in t: return 0
if '[p0]' in t: return 1
if '[p1]' in t: return 2
if '[bug]' in t: return 3
if 'lhf:' in t or 'lhf ' in t: return 4
if '[p2]' in t: return 5
return 6
all_issues.sort(key=priority)
for i in all_issues:
assignees = [a['login'] for a in (i.get('assignees') or [])]
# Default-safe behavior: only take explicitly assigned Gemini work.
# Self-assignment is opt-in via ALLOW_SELF_ASSIGN=1.
if assignees:
if 'gemini' not in assignees:
continue
elif not allow_self_assign:
continue
title = i['title'].lower()
if '[philosophy]' in title: continue
if '[epic]' in title or 'epic:' in title: continue
if '[showcase]' in title: continue
if '[do not close' in title: continue
if '[meta]' in title: continue
if '[governing]' in title: continue
if '[permanent]' in title: continue
if '[morning report]' in title: continue
if '[retro]' in title: continue
if '[intel]' in title: continue
if 'master escalation' in title: continue
if any(a['login'] == 'Rockachopa' for a in (i.get('assignees') or [])): continue
num_str = str(i['number'])
if num_str in active_issues: continue
entry = skips.get(num_str, {})
if entry and entry.get('until', 0) > time.time(): continue
lock = '${LOCK_DIR}/' + i['_repo'].replace('/', '-') + '-' + num_str + '.lock'
if os.path.isdir(lock): continue
repo = i['_repo']
owner, name = repo.split('/')
# Self-assign only when explicitly enabled.
if not assignees and allow_self_assign:
try:
data = json.dumps({'assignees': ['gemini']}).encode()
req2 = urllib.request.Request(
f'{base}/api/v1/repos/{repo}/issues/{i["number"]}',
data=data, method='PATCH',
headers={'Authorization': f'token {token}', 'Content-Type': 'application/json'})
urllib.request.urlopen(req2, timeout=5)
except: pass
print(json.dumps({
'number': i['number'],
'title': i['title'],
'repo_owner': owner,
'repo_name': name,
'repo': repo,
}))
sys.exit(0)
print('null')
" 2>/dev/null
}
build_prompt() {
local issue_num="$1" issue_title="$2" worktree="$3" repo_owner="$4" repo_name="$5"
cat <<PROMPT
You are Gemini, an autonomous code agent on the ${repo_name} project.
YOUR ISSUE: #${issue_num} — "${issue_title}"
GITEA API: ${GITEA_URL}/api/v1
GITEA TOKEN: ${GITEA_TOKEN}
REPO: ${repo_owner}/${repo_name}
WORKING DIRECTORY: ${worktree}
== YOUR POWERS ==
You can do ANYTHING a developer can do.
1. READ the issue and any comments for context:
curl -s -H "Authorization: token ${GITEA_TOKEN}" "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}"
curl -s -H "Authorization: token ${GITEA_TOKEN}" "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}/comments"
2. DO THE WORK. Code, test, fix, refactor — whatever the issue needs.
- Check for tox.ini / Makefile / package.json for test/lint commands
- Run tests if the project has them
- Follow existing code conventions
3. COMMIT with conventional commits: fix: / feat: / refactor: / test: / chore:
Include "Fixes #${issue_num}" or "Refs #${issue_num}" in the message.
4. PUSH to your branch (gemini/issue-${issue_num}) and CREATE A PR:
git push origin gemini/issue-${issue_num}
curl -s -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls" \\
-H "Authorization: token ${GITEA_TOKEN}" \\
-H "Content-Type: application/json" \\
-d '{"title": "[gemini] <description> (#${issue_num})", "body": "Fixes #${issue_num}\n\n<describe what you did>", "head": "gemini/issue-${issue_num}", "base": "main"}'
5. COMMENT on the issue when done:
curl -s -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}/comments" \\
-H "Authorization: token ${GITEA_TOKEN}" \\
-H "Content-Type: application/json" \\
-d '{"body": "PR created. <summary of changes>"}'
== RULES ==
- Read CLAUDE.md or project README first for conventions
- If the project has tox, use tox. If npm, use npm. Follow the project.
- Never use --no-verify on git commands.
- If tests fail after 2 attempts, STOP and comment on the issue explaining why.
- Be thorough but focused. Fix the issue, don't refactor the world.
== CRITICAL: ALWAYS COMMIT AND PUSH ==
- NEVER exit without committing your work. Even partial progress MUST be committed.
- Before you finish, ALWAYS: git add -A && git commit && git push origin gemini/issue-${issue_num}
- ALWAYS create a PR before exiting. No exceptions.
- If a branch already exists with prior work, check it out and CONTINUE from where it left off.
- Check: git ls-remote origin gemini/issue-${issue_num}if it exists, pull it first.
- Your work is WASTED if it's not pushed. Push early, push often.
PROMPT
}
# === WORKER FUNCTION ===
run_worker() {
local worker_id="$1"
local consecutive_failures=0
log "WORKER-${worker_id}: Started"
while true; do
if [ "$consecutive_failures" -ge 5 ]; then
local backoff=$((RATE_LIMIT_SLEEP * (consecutive_failures / 5)))
[ "$backoff" -gt "$MAX_RATE_SLEEP" ] && backoff=$MAX_RATE_SLEEP
log "WORKER-${worker_id}: BACKOFF ${backoff}s (${consecutive_failures} failures)"
sleep "$backoff"
consecutive_failures=0
fi
issue_json=$(get_next_issue)
if [ "$issue_json" = "null" ] || [ -z "$issue_json" ]; then
update_active "$worker_id" "" "" "idle"
sleep 10
continue
fi
issue_num=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['number'])")
issue_title=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['title'])")
repo_owner=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['repo_owner'])")
repo_name=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['repo_name'])")
issue_key="${repo_owner}-${repo_name}-${issue_num}"
branch="gemini/issue-${issue_num}"
worktree="${WORKTREE_BASE}/gemini-w${worker_id}-${issue_num}"
if ! lock_issue "$issue_key"; then
sleep 5
continue
fi
log "WORKER-${worker_id}: === ISSUE #${issue_num}: ${issue_title} (${repo_owner}/${repo_name}) ==="
update_active "$worker_id" "$issue_num" "${repo_owner}/${repo_name}" "working"
# Clone and pick up prior work if it exists
rm -rf "$worktree" 2>/dev/null
CLONE_URL="http://gemini:${GITEA_TOKEN}@143.198.27.163:3000/${repo_owner}/${repo_name}.git"
if git ls-remote --heads "$CLONE_URL" "$branch" 2>/dev/null | grep -q "$branch"; then
log "WORKER-${worker_id}: Found existing branch $branch — continuing prior work"
if ! git clone --depth=50 -b "$branch" "$CLONE_URL" "$worktree" >/dev/null 2>&1; then
log "WORKER-${worker_id}: ERROR cloning branch $branch for #${issue_num}"
unlock_issue "$issue_key"
consecutive_failures=$((consecutive_failures + 1))
sleep "$COOLDOWN"
continue
fi
else
if ! git clone --depth=1 -b main "$CLONE_URL" "$worktree" >/dev/null 2>&1; then
log "WORKER-${worker_id}: ERROR cloning for #${issue_num}"
unlock_issue "$issue_key"
consecutive_failures=$((consecutive_failures + 1))
sleep "$COOLDOWN"
continue
fi
cd "$worktree"
git checkout -b "$branch" >/dev/null 2>&1
fi
cd "$worktree"
prompt=$(build_prompt "$issue_num" "$issue_title" "$worktree" "$repo_owner" "$repo_name")
log "WORKER-${worker_id}: Launching Gemini Code for #${issue_num}..."
CYCLE_START=$(date +%s)
set +e
cd "$worktree"
gtimeout "$GEMINI_TIMEOUT" gemini \
-p "$prompt" \
--yolo \
</dev/null >> "$LOG_DIR/gemini-${issue_num}.log" 2>&1
exit_code=$?
set -e
CYCLE_END=$(date +%s)
CYCLE_DURATION=$(( CYCLE_END - CYCLE_START ))
# ── SALVAGE: Never waste work. Commit+push whatever exists. ──
cd "$worktree" 2>/dev/null || true
DIRTY=$(git status --porcelain 2>/dev/null | wc -l | tr -d ' ')
if [ "${DIRTY:-0}" -gt 0 ]; then
log "WORKER-${worker_id}: SALVAGING $DIRTY dirty files for #${issue_num}"
git add -A 2>/dev/null
git commit -m "WIP: Gemini Code progress on #${issue_num}
Automated salvage commit — agent session ended (exit $exit_code).
Work in progress, may need continuation." 2>/dev/null || true
fi
UNPUSHED=$(git log --oneline "origin/main..HEAD" 2>/dev/null | wc -l | tr -d ' ')
if [ "${UNPUSHED:-0}" -gt 0 ]; then
git push -u origin "$branch" 2>/dev/null && \
log "WORKER-${worker_id}: Pushed $UNPUSHED commit(s) on $branch" || \
log "WORKER-${worker_id}: Push failed for $branch"
fi
# ── Create PR if needed ──
pr_num=$(curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls?state=open&head=${repo_owner}:${branch}&limit=1" \
-H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
import sys,json
prs = json.load(sys.stdin)
if prs: print(prs[0]['number'])
else: print('')
" 2>/dev/null)
if [ -z "$pr_num" ] && [ "${UNPUSHED:-0}" -gt 0 ]; then
pr_num=$(curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls" \
-H "Authorization: token ${GITEA_TOKEN}" \
-H "Content-Type: application/json" \
-d "$(python3 -c "
import json
print(json.dumps({
'title': 'Gemini: Issue #${issue_num}',
'head': '${branch}',
'base': 'main',
'body': 'Automated PR for issue #${issue_num}.\nExit code: ${exit_code}'
}))
")" | python3 -c "import sys,json; print(json.load(sys.stdin).get('number',''))" 2>/dev/null)
[ -n "$pr_num" ] && log "WORKER-${worker_id}: Created PR #${pr_num} for issue #${issue_num}"
fi
# ── Merge + close on success ──
if [ "$exit_code" -eq 0 ]; then
log "WORKER-${worker_id}: SUCCESS #${issue_num}"
if [ -n "$pr_num" ]; then
curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/merge" \
-H "Authorization: token ${GITEA_TOKEN}" \
-H "Content-Type: application/json" \
-d '{"Do": "squash"}' >/dev/null 2>&1 || true
curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}" \
-H "Authorization: token ${GITEA_TOKEN}" \
-H "Content-Type: application/json" \
-d '{"state": "closed"}' >/dev/null 2>&1 || true
log "WORKER-${worker_id}: PR #${pr_num} merged, issue #${issue_num} closed"
fi
consecutive_failures=0
elif [ "$exit_code" -eq 124 ]; then
log "WORKER-${worker_id}: TIMEOUT #${issue_num} (work saved in PR)"
consecutive_failures=$((consecutive_failures + 1))
else
if grep -q "rate_limit\|rate limit\|429\|overloaded\|quota" "$LOG_DIR/gemini-${issue_num}.log" 2>/dev/null; then
log "WORKER-${worker_id}: RATE LIMITED on #${issue_num} (work saved)"
consecutive_failures=$((consecutive_failures + 3))
else
log "WORKER-${worker_id}: FAILED #${issue_num} exit ${exit_code} (work saved in PR)"
consecutive_failures=$((consecutive_failures + 1))
fi
fi
# ── METRICS ──
LINES_ADDED=$(cd "$worktree" 2>/dev/null && git diff --stat origin/main..HEAD 2>/dev/null | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0)
LINES_REMOVED=$(cd "$worktree" 2>/dev/null && git diff --stat origin/main..HEAD 2>/dev/null | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo 0)
FILES_CHANGED=$(cd "$worktree" 2>/dev/null && git diff --name-only origin/main..HEAD 2>/dev/null | wc -l | tr -d ' ' || echo 0)
if [ "$exit_code" -eq 0 ]; then OUTCOME="success"
elif [ "$exit_code" -eq 124 ]; then OUTCOME="timeout"
elif grep -q "rate_limit\|429" "$LOG_DIR/gemini-${issue_num}.log" 2>/dev/null; then OUTCOME="rate_limited"
else OUTCOME="failed"; fi
python3 -c "
import json, datetime
print(json.dumps({
'ts': datetime.datetime.utcnow().isoformat() + 'Z',
'agent': 'gemini',
'worker': $worker_id,
'issue': $issue_num,
'repo': '${repo_owner}/${repo_name}',
'outcome': '$OUTCOME',
'exit_code': $exit_code,
'duration_s': $CYCLE_DURATION,
'files_changed': ${FILES_CHANGED:-0},
'lines_added': ${LINES_ADDED:-0},
'lines_removed': ${LINES_REMOVED:-0},
'salvaged': ${DIRTY:-0},
'pr': '${pr_num:-}',
'merged': $( [ '$OUTCOME' = 'success' ] && [ -n '${pr_num:-}' ] && echo 'true' || echo 'false' )
}))
" >> "$LOG_DIR/claude-metrics.jsonl" 2>/dev/null
cleanup_workdir "$worktree"
unlock_issue "$issue_key"
update_active "$worker_id" "" "" "done"
sleep "$COOLDOWN"
done
}
# === MAIN ===
log "=== Gemini Loop Started — ${NUM_WORKERS} workers (max ${MAX_WORKERS}) ==="
log "Worktrees: ${WORKTREE_BASE}"
rm -rf "$LOCK_DIR"/*.lock 2>/dev/null
# PID tracking via files (bash 3.2 compatible)
PID_DIR="$LOG_DIR/gemini-pids"
mkdir -p "$PID_DIR"
rm -f "$PID_DIR"/*.pid 2>/dev/null
launch_worker() {
local wid="$1"
run_worker "$wid" &
echo $! > "$PID_DIR/${wid}.pid"
log "Launched worker $wid (PID $!)"
}
for i in $(seq 1 "$NUM_WORKERS"); do
launch_worker "$i"
sleep 3
done
# Dynamic scaler — every 3 minutes
CURRENT_WORKERS="$NUM_WORKERS"
while true; do
sleep 90
# Reap dead workers
for pidfile in "$PID_DIR"/*.pid; do
[ -f "$pidfile" ] || continue
wid=$(basename "$pidfile" .pid)
wpid=$(cat "$pidfile")
if ! kill -0 "$wpid" 2>/dev/null; then
log "SCALER: Worker $wid died — relaunching"
launch_worker "$wid"
sleep 2
fi
done
recent_rate_limits=$(tail -100 "$LOG_DIR/gemini-loop.log" 2>/dev/null | grep -c "RATE LIMITED" || true)
recent_successes=$(tail -100 "$LOG_DIR/gemini-loop.log" 2>/dev/null | grep -c "SUCCESS" || true)
if [ "$recent_rate_limits" -gt 0 ]; then
if [ "$CURRENT_WORKERS" -gt 2 ]; then
drop_to=$(( CURRENT_WORKERS / 2 ))
[ "$drop_to" -lt 2 ] && drop_to=2
log "SCALER: Rate limited — scaling ${CURRENT_WORKERS}${drop_to}"
for wid in $(seq $((drop_to + 1)) "$CURRENT_WORKERS"); do
if [ -f "$PID_DIR/${wid}.pid" ]; then
kill "$(cat "$PID_DIR/${wid}.pid")" 2>/dev/null || true
rm -f "$PID_DIR/${wid}.pid"
update_active "$wid" "" "" "done"
fi
done
CURRENT_WORKERS=$drop_to
fi
elif [ "$recent_successes" -ge 2 ] && [ "$CURRENT_WORKERS" -lt "$MAX_WORKERS" ]; then
new_count=$(( CURRENT_WORKERS + 2 ))
[ "$new_count" -gt "$MAX_WORKERS" ] && new_count=$MAX_WORKERS
log "SCALER: Healthy — scaling ${CURRENT_WORKERS}${new_count}"
for wid in $(seq $((CURRENT_WORKERS + 1)) "$new_count"); do
launch_worker "$wid"
sleep 2
done
CURRENT_WORKERS=$new_count
fi
done