#!/usr/bin/env bash # claude-loop.sh — Parallel Claude Code agent dispatch loop # Runs N workers concurrently against the Gitea backlog. # Gracefully handles rate limits with backoff. # # Usage: claude-loop.sh [NUM_WORKERS] (default: 2) set -euo pipefail # === CONFIG === NUM_WORKERS="${1:-2}" MAX_WORKERS=10 # absolute ceiling WORKTREE_BASE="$HOME/worktrees" GITEA_URL="http://143.198.27.163:3000" GITEA_TOKEN=$(cat "$HOME/.hermes/claude_token") CLAUDE_TIMEOUT=900 # 15 min per issue COOLDOWN=15 # seconds between issues — stagger clones RATE_LIMIT_SLEEP=30 # initial sleep on rate limit MAX_RATE_SLEEP=120 # max backoff on rate limit LOG_DIR="$HOME/.hermes/logs" SKIP_FILE="$LOG_DIR/claude-skip-list.json" LOCK_DIR="$LOG_DIR/claude-locks" ACTIVE_FILE="$LOG_DIR/claude-active.json" mkdir -p "$LOG_DIR" "$WORKTREE_BASE" "$LOCK_DIR" # Initialize files [ -f "$SKIP_FILE" ] || echo '{}' > "$SKIP_FILE" echo '{}' > "$ACTIVE_FILE" # === SHARED FUNCTIONS === log() { local msg="[$(date '+%Y-%m-%d %H:%M:%S')] $*" echo "$msg" >> "$LOG_DIR/claude-loop.log" } lock_issue() { local issue_key="$1" local lockfile="$LOCK_DIR/$issue_key.lock" if mkdir "$lockfile" 2>/dev/null; then echo $$ > "$lockfile/pid" return 0 fi return 1 } unlock_issue() { local issue_key="$1" rm -rf "$LOCK_DIR/$issue_key.lock" 2>/dev/null } mark_skip() { local issue_num="$1" local reason="$2" local skip_hours="${3:-1}" python3 -c " import json, time, fcntl with open('$SKIP_FILE', 'r+') as f: fcntl.flock(f, fcntl.LOCK_EX) try: skips = json.load(f) except: skips = {} skips[str($issue_num)] = { 'until': time.time() + ($skip_hours * 3600), 'reason': '$reason', 'failures': skips.get(str($issue_num), {}).get('failures', 0) + 1 } if skips[str($issue_num)]['failures'] >= 3: skips[str($issue_num)]['until'] = time.time() + (6 * 3600) f.seek(0) f.truncate() json.dump(skips, f, indent=2) " 2>/dev/null log "SKIP: #${issue_num} — ${reason}" } update_active() { local worker="$1" issue="$2" repo="$3" status="$4" python3 -c " import json, fcntl with open('$ACTIVE_FILE', 'r+') as f: fcntl.flock(f, fcntl.LOCK_EX) try: active = json.load(f) except: active = {} if '$status' == 'done': active.pop('$worker', None) else: active['$worker'] = {'issue': '$issue', 'repo': '$repo', 'status': '$status'} f.seek(0) f.truncate() json.dump(active, f, indent=2) " 2>/dev/null } cleanup_workdir() { local wt="$1" rm -rf "$wt" 2>/dev/null || true } get_next_issue() { python3 -c " import json, sys, time, urllib.request, os token = '${GITEA_TOKEN}' base = '${GITEA_URL}' repos = [ 'Timmy_Foundation/the-nexus', 'Timmy_Foundation/autolora', ] # Load skip list try: with open('${SKIP_FILE}') as f: skips = json.load(f) except: skips = {} # Load active issues (to avoid double-picking) try: with open('${ACTIVE_FILE}') as f: active = json.load(f) active_issues = {v['issue'] for v in active.values()} except: active_issues = set() all_issues = [] for repo in repos: url = f'{base}/api/v1/repos/{repo}/issues?state=open&type=issues&limit=50&sort=created' req = urllib.request.Request(url, headers={'Authorization': f'token {token}'}) try: resp = urllib.request.urlopen(req, timeout=10) issues = json.loads(resp.read()) for i in issues: i['_repo'] = repo all_issues.extend(issues) except: continue # Sort by priority: URGENT > P0 > P1 > bugs > LHF > rest def priority(i): t = i['title'].lower() if '[urgent]' in t or 'urgent:' in t: return 0 if '[p0]' in t: return 1 if '[p1]' in t: return 2 if '[bug]' in t: return 3 if 'lhf:' in t or 'lhf ' in t.lower(): return 4 if '[p2]' in t: return 5 return 6 all_issues.sort(key=priority) for i in all_issues: assignees = [a['login'] for a in (i.get('assignees') or [])] # Take issues assigned to claude OR unassigned (self-assign) if assignees and 'claude' not in assignees: continue title = i['title'].lower() if '[philosophy]' in title: continue if '[epic]' in title or 'epic:' in title: continue if '[showcase]' in title: continue if '[do not close' in title: continue if '[meta]' in title: continue if '[governing]' in title: continue if '[permanent]' in title: continue if '[morning report]' in title: continue if '[retro]' in title: continue if '[intel]' in title: continue if 'master escalation' in title: continue if any(a['login'] == 'Rockachopa' for a in (i.get('assignees') or [])): continue num_str = str(i['number']) if num_str in active_issues: continue entry = skips.get(num_str, {}) if entry and entry.get('until', 0) > time.time(): continue lock = '${LOCK_DIR}/' + i['_repo'].replace('/', '-') + '-' + num_str + '.lock' if os.path.isdir(lock): continue repo = i['_repo'] owner, name = repo.split('/') # Self-assign if unassigned if not assignees: try: data = json.dumps({'assignees': ['claude']}).encode() req2 = urllib.request.Request( f'{base}/api/v1/repos/{repo}/issues/{i[\"number\"]}', data=data, method='PATCH', headers={'Authorization': f'token {token}', 'Content-Type': 'application/json'}) urllib.request.urlopen(req2, timeout=5) except: pass print(json.dumps({ 'number': i['number'], 'title': i['title'], 'repo_owner': owner, 'repo_name': name, 'repo': repo, })) sys.exit(0) print('null') " 2>/dev/null } build_prompt() { local issue_num="$1" local issue_title="$2" local worktree="$3" local repo_owner="$4" local repo_name="$5" cat < (#${issue_num})", "body": "Fixes #${issue_num}\n\n", "head": "claude/issue-${issue_num}", "base": "main"}' 5. COMMENT on the issue when done: curl -s -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}/comments" \\ -H "Authorization: token ${GITEA_TOKEN}" \\ -H "Content-Type: application/json" \\ -d '{"body": "PR created. "}' == RULES == - Read CLAUDE.md or project README first for conventions - If the project has tox, use tox. If npm, use npm. Follow the project. - Never use --no-verify on git commands. - If tests fail after 2 attempts, STOP and comment on the issue explaining why. - Be thorough but focused. Fix the issue, don't refactor the world. == CRITICAL: ALWAYS COMMIT AND PUSH == - NEVER exit without committing your work. Even partial progress MUST be committed. - Before you finish, ALWAYS: git add -A && git commit && git push origin claude/issue-${issue_num} - ALWAYS create a PR before exiting. No exceptions. - If a branch already exists with prior work, check it out and CONTINUE from where it left off. - Check: git ls-remote origin claude/issue-${issue_num} — if it exists, pull it first. - Your work is WASTED if it's not pushed. Push early, push often. PROMPT } # === WORKER FUNCTION === run_worker() { local worker_id="$1" local consecutive_failures=0 log "WORKER-${worker_id}: Started" while true; do # Backoff on repeated failures if [ "$consecutive_failures" -ge 5 ]; then local backoff=$((RATE_LIMIT_SLEEP * (consecutive_failures / 5))) [ "$backoff" -gt "$MAX_RATE_SLEEP" ] && backoff=$MAX_RATE_SLEEP log "WORKER-${worker_id}: BACKOFF ${backoff}s (${consecutive_failures} failures)" sleep "$backoff" consecutive_failures=0 fi # RULE: Merge existing PRs BEFORE creating new work. # Check for open PRs from claude, rebase + merge them first. local our_prs our_prs=$(curl -sf -H "Authorization: token ${GITEA_TOKEN}" \ "${GITEA_URL}/api/v1/repos/Timmy_Foundation/the-nexus/pulls?state=open&limit=5" 2>/dev/null | \ python3 -c " import sys, json prs = json.loads(sys.stdin.buffer.read()) ours = [p for p in prs if p['user']['login'] == 'claude'][:3] for p in ours: print(f'{p[\"number\"]}|{p[\"head\"][\"ref\"]}|{p.get(\"mergeable\",False)}') " 2>/dev/null) if [ -n "$our_prs" ]; then local pr_clone_url="http://claude:${GITEA_TOKEN}@143.198.27.163:3000/Timmy_Foundation/the-nexus.git" echo "$our_prs" | while IFS='|' read pr_num branch mergeable; do [ -z "$pr_num" ] && continue if [ "$mergeable" = "True" ]; then curl -sf -X POST -H "Authorization: token ${GITEA_TOKEN}" \ -H "Content-Type: application/json" \ -d '{"Do":"squash","delete_branch_after_merge":true}' \ "${GITEA_URL}/api/v1/repos/Timmy_Foundation/the-nexus/pulls/${pr_num}/merge" >/dev/null 2>&1 log "WORKER-${worker_id}: merged own PR #${pr_num}" sleep 3 else # Rebase and push local tmpdir="/tmp/claude-rebase-${pr_num}" cd "$HOME"; rm -rf "$tmpdir" 2>/dev/null git clone -q --depth=50 -b "$branch" "$pr_clone_url" "$tmpdir" 2>/dev/null if [ -d "$tmpdir/.git" ]; then cd "$tmpdir" git fetch origin main 2>/dev/null if git rebase origin/main 2>/dev/null; then git push -f origin "$branch" 2>/dev/null sleep 3 curl -sf -X POST -H "Authorization: token ${GITEA_TOKEN}" \ -H "Content-Type: application/json" \ -d '{"Do":"squash","delete_branch_after_merge":true}' \ "${GITEA_URL}/api/v1/repos/Timmy_Foundation/the-nexus/pulls/${pr_num}/merge" >/dev/null 2>&1 log "WORKER-${worker_id}: rebased+merged PR #${pr_num}" else git rebase --abort 2>/dev/null curl -sf -X PATCH -H "Authorization: token ${GITEA_TOKEN}" \ -H "Content-Type: application/json" -d '{"state":"closed"}' \ "${GITEA_URL}/api/v1/repos/Timmy_Foundation/the-nexus/pulls/${pr_num}" >/dev/null 2>&1 log "WORKER-${worker_id}: closed unrebaseable PR #${pr_num}" fi cd "$HOME"; rm -rf "$tmpdir" fi fi done fi # Get next issue issue_json=$(get_next_issue) if [ "$issue_json" = "null" ] || [ -z "$issue_json" ]; then update_active "$worker_id" "" "" "idle" sleep 10 continue fi issue_num=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['number'])") issue_title=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['title'])") repo_owner=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['repo_owner'])") repo_name=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['repo_name'])") issue_key="${repo_owner}-${repo_name}-${issue_num}" branch="claude/issue-${issue_num}" # Use UUID for worktree dir to prevent collisions under high concurrency wt_uuid=$(/usr/bin/uuidgen 2>/dev/null || python3 -c "import uuid; print(uuid.uuid4())") worktree="${WORKTREE_BASE}/claude-${issue_num}-${wt_uuid}" # Try to lock if ! lock_issue "$issue_key"; then sleep 5 continue fi log "WORKER-${worker_id}: === ISSUE #${issue_num}: ${issue_title} (${repo_owner}/${repo_name}) ===" update_active "$worker_id" "$issue_num" "${repo_owner}/${repo_name}" "working" # Clone and pick up prior work if it exists rm -rf "$worktree" 2>/dev/null CLONE_URL="http://claude:${GITEA_TOKEN}@143.198.27.163:3000/${repo_owner}/${repo_name}.git" # Check if branch already exists on remote (prior work to continue) if git ls-remote --heads "$CLONE_URL" "$branch" 2>/dev/null | grep -q "$branch"; then log "WORKER-${worker_id}: Found existing branch $branch — continuing prior work" if ! git clone --depth=50 -b "$branch" "$CLONE_URL" "$worktree" >/dev/null 2>&1; then log "WORKER-${worker_id}: ERROR cloning branch $branch for #${issue_num}" unlock_issue "$issue_key" consecutive_failures=$((consecutive_failures + 1)) sleep "$COOLDOWN" continue fi # Rebase on main to resolve stale conflicts from closed PRs cd "$worktree" git fetch origin main >/dev/null 2>&1 if ! git rebase origin/main >/dev/null 2>&1; then # Rebase failed — start fresh from main log "WORKER-${worker_id}: Rebase failed for $branch, starting fresh" cd "$HOME" rm -rf "$worktree" git clone --depth=1 -b main "$CLONE_URL" "$worktree" >/dev/null 2>&1 cd "$worktree" git checkout -b "$branch" >/dev/null 2>&1 fi else if ! git clone --depth=1 -b main "$CLONE_URL" "$worktree" >/dev/null 2>&1; then log "WORKER-${worker_id}: ERROR cloning for #${issue_num}" unlock_issue "$issue_key" consecutive_failures=$((consecutive_failures + 1)) sleep "$COOLDOWN" continue fi cd "$worktree" git checkout -b "$branch" >/dev/null 2>&1 fi cd "$worktree" # Build prompt and run prompt=$(build_prompt "$issue_num" "$issue_title" "$worktree" "$repo_owner" "$repo_name") log "WORKER-${worker_id}: Launching Claude Code for #${issue_num}..." CYCLE_START=$(date +%s) set +e cd "$worktree" env -u CLAUDECODE gtimeout "$CLAUDE_TIMEOUT" claude \ --print \ --model sonnet \ --dangerously-skip-permissions \ -p "$prompt" \ > "$LOG_DIR/claude-${issue_num}.log" 2>&1 exit_code=$? set -e CYCLE_END=$(date +%s) CYCLE_DURATION=$(( CYCLE_END - CYCLE_START )) # ── SALVAGE: Never waste work. Commit+push whatever exists. ── cd "$worktree" 2>/dev/null || true DIRTY=$(git status --porcelain 2>/dev/null | wc -l | tr -d ' ') UNPUSHED=$(git log --oneline "origin/main..HEAD" 2>/dev/null | wc -l | tr -d ' ') if [ "${DIRTY:-0}" -gt 0 ]; then log "WORKER-${worker_id}: SALVAGING $DIRTY dirty files for #${issue_num}" git add -A 2>/dev/null git commit -m "WIP: Claude Code progress on #${issue_num} Automated salvage commit — agent session ended (exit $exit_code). Work in progress, may need continuation." 2>/dev/null || true fi # Push if we have any commits (including salvaged ones) UNPUSHED=$(git log --oneline "origin/main..HEAD" 2>/dev/null | wc -l | tr -d ' ') if [ "${UNPUSHED:-0}" -gt 0 ]; then git push -u origin "$branch" 2>/dev/null && \ log "WORKER-${worker_id}: Pushed $UNPUSHED commit(s) on $branch" || \ log "WORKER-${worker_id}: Push failed for $branch" fi # ── Create PR if branch was pushed and no PR exists yet ── pr_num=$(curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls?state=open&head=${repo_owner}:${branch}&limit=1" \ -H "Authorization: token ${GITEA_TOKEN}" | python3 -c " import sys,json prs = json.load(sys.stdin) if prs: print(prs[0]['number']) else: print('') " 2>/dev/null) if [ -z "$pr_num" ] && [ "${UNPUSHED:-0}" -gt 0 ]; then pr_num=$(curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls" \ -H "Authorization: token ${GITEA_TOKEN}" \ -H "Content-Type: application/json" \ -d "$(python3 -c " import json print(json.dumps({ 'title': 'Claude: Issue #${issue_num}', 'head': '${branch}', 'base': 'main', 'body': 'Automated PR for issue #${issue_num}.\nExit code: ${exit_code}' })) ")" | python3 -c "import sys,json; print(json.load(sys.stdin).get('number',''))" 2>/dev/null) [ -n "$pr_num" ] && log "WORKER-${worker_id}: Created PR #${pr_num} for issue #${issue_num}" fi # ── Merge + close on success ── if [ "$exit_code" -eq 0 ]; then log "WORKER-${worker_id}: SUCCESS #${issue_num}" if [ -n "$pr_num" ]; then curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/merge" \ -H "Authorization: token ${GITEA_TOKEN}" \ -H "Content-Type: application/json" \ -d '{"Do": "squash"}' >/dev/null 2>&1 || true curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}" \ -H "Authorization: token ${GITEA_TOKEN}" \ -H "Content-Type: application/json" \ -d '{"state": "closed"}' >/dev/null 2>&1 || true log "WORKER-${worker_id}: PR #${pr_num} merged, issue #${issue_num} closed" fi consecutive_failures=0 elif [ "$exit_code" -eq 124 ]; then log "WORKER-${worker_id}: TIMEOUT #${issue_num} (work saved in PR)" consecutive_failures=$((consecutive_failures + 1)) else # Check for rate limit if grep -q "rate_limit\|rate limit\|429\|overloaded" "$LOG_DIR/claude-${issue_num}.log" 2>/dev/null; then log "WORKER-${worker_id}: RATE LIMITED on #${issue_num} — backing off (work saved)" consecutive_failures=$((consecutive_failures + 3)) else log "WORKER-${worker_id}: FAILED #${issue_num} exit ${exit_code} (work saved in PR)" consecutive_failures=$((consecutive_failures + 1)) fi fi # ── METRICS: structured JSONL for reporting ── LINES_ADDED=$(cd "$worktree" 2>/dev/null && git diff --stat origin/main..HEAD 2>/dev/null | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo 0) LINES_REMOVED=$(cd "$worktree" 2>/dev/null && git diff --stat origin/main..HEAD 2>/dev/null | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo 0) FILES_CHANGED=$(cd "$worktree" 2>/dev/null && git diff --name-only origin/main..HEAD 2>/dev/null | wc -l | tr -d ' ' || echo 0) # Determine outcome if [ "$exit_code" -eq 0 ]; then OUTCOME="success" elif [ "$exit_code" -eq 124 ]; then OUTCOME="timeout" elif grep -q "rate_limit\|rate limit\|429" "$LOG_DIR/claude-${issue_num}.log" 2>/dev/null; then OUTCOME="rate_limited" else OUTCOME="failed" fi METRICS_FILE="$LOG_DIR/claude-metrics.jsonl" python3 -c " import json, datetime print(json.dumps({ 'ts': datetime.datetime.utcnow().isoformat() + 'Z', 'worker': $worker_id, 'issue': $issue_num, 'repo': '${repo_owner}/${repo_name}', 'title': '''${issue_title}'''[:80], 'outcome': '$OUTCOME', 'exit_code': $exit_code, 'duration_s': $CYCLE_DURATION, 'files_changed': ${FILES_CHANGED:-0}, 'lines_added': ${LINES_ADDED:-0}, 'lines_removed': ${LINES_REMOVED:-0}, 'salvaged': ${DIRTY:-0}, 'pr': '${pr_num:-}', 'merged': $( [ '$OUTCOME' = 'success' ] && [ -n '${pr_num:-}' ] && echo 'true' || echo 'false' ) })) " >> "$METRICS_FILE" 2>/dev/null # Cleanup cleanup_workdir "$worktree" unlock_issue "$issue_key" update_active "$worker_id" "" "" "done" sleep "$COOLDOWN" done } # === MAIN === log "=== Claude Loop Started — ${NUM_WORKERS} workers (max ${MAX_WORKERS}) ===" log "Worktrees: ${WORKTREE_BASE}" # Clean stale locks rm -rf "$LOCK_DIR"/*.lock 2>/dev/null # PID tracking via files (bash 3.2 compatible) PID_DIR="$LOG_DIR/claude-pids" mkdir -p "$PID_DIR" rm -f "$PID_DIR"/*.pid 2>/dev/null launch_worker() { local wid="$1" run_worker "$wid" & echo $! > "$PID_DIR/${wid}.pid" log "Launched worker $wid (PID $!)" } # Initial launch for i in $(seq 1 "$NUM_WORKERS"); do launch_worker "$i" sleep 3 done # === DYNAMIC SCALER === # Every 3 minutes: check health, scale up if no rate limits, scale down if hitting limits CURRENT_WORKERS="$NUM_WORKERS" while true; do sleep 90 # Reap dead workers and relaunch for pidfile in "$PID_DIR"/*.pid; do [ -f "$pidfile" ] || continue wid=$(basename "$pidfile" .pid) wpid=$(cat "$pidfile") if ! kill -0 "$wpid" 2>/dev/null; then log "SCALER: Worker $wid died — relaunching" launch_worker "$wid" sleep 2 fi done recent_rate_limits=$(tail -100 "$LOG_DIR/claude-loop.log" 2>/dev/null | grep -c "RATE LIMITED" || true) recent_successes=$(tail -100 "$LOG_DIR/claude-loop.log" 2>/dev/null | grep -c "SUCCESS" || true) if [ "$recent_rate_limits" -gt 0 ]; then if [ "$CURRENT_WORKERS" -gt 2 ]; then drop_to=$(( CURRENT_WORKERS / 2 )) [ "$drop_to" -lt 2 ] && drop_to=2 log "SCALER: Rate limited — scaling ${CURRENT_WORKERS} → ${drop_to} workers" for wid in $(seq $((drop_to + 1)) "$CURRENT_WORKERS"); do if [ -f "$PID_DIR/${wid}.pid" ]; then kill "$(cat "$PID_DIR/${wid}.pid")" 2>/dev/null || true rm -f "$PID_DIR/${wid}.pid" update_active "$wid" "" "" "done" fi done CURRENT_WORKERS=$drop_to fi elif [ "$recent_successes" -ge 2 ] && [ "$CURRENT_WORKERS" -lt "$MAX_WORKERS" ]; then new_count=$(( CURRENT_WORKERS + 2 )) [ "$new_count" -gt "$MAX_WORKERS" ] && new_count=$MAX_WORKERS log "SCALER: Healthy — scaling ${CURRENT_WORKERS} → ${new_count} workers" for wid in $(seq $((CURRENT_WORKERS + 1)) "$new_count"); do launch_worker "$wid" sleep 2 done CURRENT_WORKERS=$new_count fi done