2026-03-22 18:06:38 -04:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
# claude-loop.sh — Parallel Claude Code agent dispatch loop
|
|
|
|
|
# Runs N workers concurrently against the Gitea backlog.
|
|
|
|
|
# Gracefully handles rate limits with backoff.
|
|
|
|
|
#
|
|
|
|
|
# Usage: claude-loop.sh [NUM_WORKERS] (default: 3)
|
|
|
|
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
# === CONFIG ===
|
2026-03-22 19:42:47 -04:00
|
|
|
NUM_WORKERS="${1:-10}"
|
feat: parallel workers for all agents, dynamic scaling, self-healing watchdog
- claude-loop: 7 workers default, scales up to 21, 5s cooldown
- gemini-loop: rewritten as parallel worker system (3→12), multi-repo,
auto-clone, correct CLI flags (-p/--yolo), bash 3.2 compatible
- loop-watchdog: monitors all loops every 2min, auto-restarts dead loops,
kills zombies, files Gitea issues for unfixable problems
- ops-helpers: added ops-wake-watchdog, ops-kill-watchdog
- All scripts use file-based PID tracking (bash 3.2 safe)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 19:22:18 -04:00
|
|
|
MAX_WORKERS=21 # absolute ceiling
|
2026-03-22 18:06:38 -04:00
|
|
|
WORKTREE_BASE="$HOME/worktrees"
|
|
|
|
|
GITEA_URL="http://143.198.27.163:3000"
|
|
|
|
|
GITEA_TOKEN=$(cat "$HOME/.hermes/claude_token")
|
|
|
|
|
CLAUDE_TIMEOUT=900 # 15 min per issue
|
2026-03-22 19:42:47 -04:00
|
|
|
COOLDOWN=2 # seconds between issues — max speed
|
|
|
|
|
RATE_LIMIT_SLEEP=30 # initial sleep on rate limit
|
|
|
|
|
MAX_RATE_SLEEP=120 # max backoff on rate limit
|
2026-03-22 18:06:38 -04:00
|
|
|
LOG_DIR="$HOME/.hermes/logs"
|
|
|
|
|
SKIP_FILE="$LOG_DIR/claude-skip-list.json"
|
|
|
|
|
LOCK_DIR="$LOG_DIR/claude-locks"
|
|
|
|
|
ACTIVE_FILE="$LOG_DIR/claude-active.json"
|
|
|
|
|
|
|
|
|
|
mkdir -p "$LOG_DIR" "$WORKTREE_BASE" "$LOCK_DIR"
|
|
|
|
|
|
|
|
|
|
# Initialize files
|
|
|
|
|
[ -f "$SKIP_FILE" ] || echo '{}' > "$SKIP_FILE"
|
|
|
|
|
echo '{}' > "$ACTIVE_FILE"
|
|
|
|
|
|
|
|
|
|
# === SHARED FUNCTIONS ===
|
|
|
|
|
log() {
|
|
|
|
|
local msg="[$(date '+%Y-%m-%d %H:%M:%S')] $*"
|
|
|
|
|
echo "$msg" >> "$LOG_DIR/claude-loop.log"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lock_issue() {
|
|
|
|
|
local issue_key="$1"
|
|
|
|
|
local lockfile="$LOCK_DIR/$issue_key.lock"
|
|
|
|
|
if mkdir "$lockfile" 2>/dev/null; then
|
|
|
|
|
echo $$ > "$lockfile/pid"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
return 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unlock_issue() {
|
|
|
|
|
local issue_key="$1"
|
|
|
|
|
rm -rf "$LOCK_DIR/$issue_key.lock" 2>/dev/null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mark_skip() {
|
|
|
|
|
local issue_num="$1"
|
|
|
|
|
local reason="$2"
|
|
|
|
|
local skip_hours="${3:-1}"
|
|
|
|
|
python3 -c "
|
|
|
|
|
import json, time, fcntl
|
|
|
|
|
with open('$SKIP_FILE', 'r+') as f:
|
|
|
|
|
fcntl.flock(f, fcntl.LOCK_EX)
|
|
|
|
|
try: skips = json.load(f)
|
|
|
|
|
except: skips = {}
|
|
|
|
|
skips[str($issue_num)] = {
|
|
|
|
|
'until': time.time() + ($skip_hours * 3600),
|
|
|
|
|
'reason': '$reason',
|
|
|
|
|
'failures': skips.get(str($issue_num), {}).get('failures', 0) + 1
|
|
|
|
|
}
|
|
|
|
|
if skips[str($issue_num)]['failures'] >= 3:
|
|
|
|
|
skips[str($issue_num)]['until'] = time.time() + (6 * 3600)
|
|
|
|
|
f.seek(0)
|
|
|
|
|
f.truncate()
|
|
|
|
|
json.dump(skips, f, indent=2)
|
|
|
|
|
" 2>/dev/null
|
|
|
|
|
log "SKIP: #${issue_num} — ${reason}"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
update_active() {
|
|
|
|
|
local worker="$1" issue="$2" repo="$3" status="$4"
|
|
|
|
|
python3 -c "
|
|
|
|
|
import json, fcntl
|
|
|
|
|
with open('$ACTIVE_FILE', 'r+') as f:
|
|
|
|
|
fcntl.flock(f, fcntl.LOCK_EX)
|
|
|
|
|
try: active = json.load(f)
|
|
|
|
|
except: active = {}
|
|
|
|
|
if '$status' == 'done':
|
|
|
|
|
active.pop('$worker', None)
|
|
|
|
|
else:
|
|
|
|
|
active['$worker'] = {'issue': '$issue', 'repo': '$repo', 'status': '$status'}
|
|
|
|
|
f.seek(0)
|
|
|
|
|
f.truncate()
|
|
|
|
|
json.dump(active, f, indent=2)
|
|
|
|
|
" 2>/dev/null
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-22 21:35:56 -04:00
|
|
|
cleanup_workdir() {
|
2026-03-22 18:06:38 -04:00
|
|
|
local wt="$1"
|
2026-03-22 21:35:56 -04:00
|
|
|
rm -rf "$wt" 2>/dev/null || true
|
2026-03-22 18:06:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
get_next_issue() {
|
|
|
|
|
python3 -c "
|
|
|
|
|
import json, sys, time, urllib.request, os
|
|
|
|
|
|
|
|
|
|
token = '${GITEA_TOKEN}'
|
|
|
|
|
base = '${GITEA_URL}'
|
|
|
|
|
repos = [
|
|
|
|
|
'rockachopa/Timmy-time-dashboard',
|
|
|
|
|
'rockachopa/alexanderwhitestone.com',
|
|
|
|
|
'rockachopa/hermes-agent',
|
|
|
|
|
'replit/timmy-tower',
|
|
|
|
|
'replit/token-gated-economy',
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# Load skip list
|
|
|
|
|
try:
|
|
|
|
|
with open('${SKIP_FILE}') as f: skips = json.load(f)
|
|
|
|
|
except: skips = {}
|
|
|
|
|
|
|
|
|
|
# Load active issues (to avoid double-picking)
|
|
|
|
|
try:
|
|
|
|
|
with open('${ACTIVE_FILE}') as f:
|
|
|
|
|
active = json.load(f)
|
|
|
|
|
active_issues = {v['issue'] for v in active.values()}
|
|
|
|
|
except:
|
|
|
|
|
active_issues = set()
|
|
|
|
|
|
|
|
|
|
all_issues = []
|
|
|
|
|
for repo in repos:
|
|
|
|
|
url = f'{base}/api/v1/repos/{repo}/issues?state=open&type=issues&limit=50&sort=created'
|
|
|
|
|
req = urllib.request.Request(url, headers={'Authorization': f'token {token}'})
|
|
|
|
|
try:
|
|
|
|
|
resp = urllib.request.urlopen(req, timeout=10)
|
|
|
|
|
issues = json.loads(resp.read())
|
|
|
|
|
for i in issues:
|
|
|
|
|
i['_repo'] = repo
|
|
|
|
|
all_issues.extend(issues)
|
|
|
|
|
except:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Sort by priority: URGENT > P0 > P1 > bugs > LHF > rest
|
|
|
|
|
def priority(i):
|
|
|
|
|
t = i['title'].lower()
|
|
|
|
|
if '[urgent]' in t or 'urgent:' in t: return 0
|
|
|
|
|
if '[p0]' in t: return 1
|
|
|
|
|
if '[p1]' in t: return 2
|
|
|
|
|
if '[bug]' in t: return 3
|
|
|
|
|
if 'lhf:' in t or 'lhf ' in t.lower(): return 4
|
|
|
|
|
if '[p2]' in t: return 5
|
|
|
|
|
return 6
|
|
|
|
|
|
|
|
|
|
all_issues.sort(key=priority)
|
|
|
|
|
|
|
|
|
|
for i in all_issues:
|
|
|
|
|
assignees = [a['login'] for a in (i.get('assignees') or [])]
|
2026-03-22 19:39:46 -04:00
|
|
|
# Take issues assigned to claude OR unassigned (self-assign)
|
|
|
|
|
if assignees and 'claude' not in assignees:
|
2026-03-22 18:06:38 -04:00
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
title = i['title'].lower()
|
|
|
|
|
if '[philosophy]' in title: continue
|
|
|
|
|
if '[epic]' in title or 'epic:' in title: continue
|
|
|
|
|
if '[showcase]' in title: continue
|
|
|
|
|
|
|
|
|
|
num_str = str(i['number'])
|
2026-03-22 19:39:46 -04:00
|
|
|
if num_str in active_issues: continue
|
2026-03-22 18:06:38 -04:00
|
|
|
|
|
|
|
|
entry = skips.get(num_str, {})
|
2026-03-22 19:39:46 -04:00
|
|
|
if entry and entry.get('until', 0) > time.time(): continue
|
2026-03-22 18:06:38 -04:00
|
|
|
|
|
|
|
|
lock = '${LOCK_DIR}/' + i['_repo'].replace('/', '-') + '-' + num_str + '.lock'
|
2026-03-22 19:39:46 -04:00
|
|
|
if os.path.isdir(lock): continue
|
2026-03-22 18:06:38 -04:00
|
|
|
|
|
|
|
|
repo = i['_repo']
|
|
|
|
|
owner, name = repo.split('/')
|
2026-03-22 19:39:46 -04:00
|
|
|
|
|
|
|
|
# Self-assign if unassigned
|
|
|
|
|
if not assignees:
|
|
|
|
|
try:
|
|
|
|
|
data = json.dumps({'assignees': ['claude']}).encode()
|
|
|
|
|
req2 = urllib.request.Request(
|
|
|
|
|
f'{base}/api/v1/repos/{repo}/issues/{i[\"number\"]}',
|
|
|
|
|
data=data, method='PATCH',
|
|
|
|
|
headers={'Authorization': f'token {token}', 'Content-Type': 'application/json'})
|
|
|
|
|
urllib.request.urlopen(req2, timeout=5)
|
|
|
|
|
except: pass
|
|
|
|
|
|
2026-03-22 18:06:38 -04:00
|
|
|
print(json.dumps({
|
|
|
|
|
'number': i['number'],
|
|
|
|
|
'title': i['title'],
|
|
|
|
|
'repo_owner': owner,
|
|
|
|
|
'repo_name': name,
|
|
|
|
|
'repo': repo,
|
|
|
|
|
}))
|
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
print('null')
|
|
|
|
|
" 2>/dev/null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
build_prompt() {
|
|
|
|
|
local issue_num="$1"
|
|
|
|
|
local issue_title="$2"
|
|
|
|
|
local worktree="$3"
|
|
|
|
|
local repo_owner="$4"
|
|
|
|
|
local repo_name="$5"
|
|
|
|
|
|
|
|
|
|
cat <<PROMPT
|
|
|
|
|
You are Claude, an autonomous code agent on the ${repo_name} project.
|
|
|
|
|
|
|
|
|
|
YOUR ISSUE: #${issue_num} — "${issue_title}"
|
|
|
|
|
|
|
|
|
|
GITEA API: ${GITEA_URL}/api/v1
|
|
|
|
|
GITEA TOKEN: ${GITEA_TOKEN}
|
|
|
|
|
REPO: ${repo_owner}/${repo_name}
|
|
|
|
|
WORKING DIRECTORY: ${worktree}
|
|
|
|
|
|
|
|
|
|
== YOUR POWERS ==
|
|
|
|
|
You can do ANYTHING a developer can do.
|
|
|
|
|
|
|
|
|
|
1. READ the issue and any comments for context:
|
|
|
|
|
curl -s -H "Authorization: token ${GITEA_TOKEN}" "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}"
|
|
|
|
|
curl -s -H "Authorization: token ${GITEA_TOKEN}" "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}/comments"
|
|
|
|
|
|
|
|
|
|
2. DO THE WORK. Code, test, fix, refactor — whatever the issue needs.
|
|
|
|
|
- Check for tox.ini / Makefile / package.json for test/lint commands
|
|
|
|
|
- Run tests if the project has them
|
|
|
|
|
- Follow existing code conventions
|
|
|
|
|
|
|
|
|
|
3. COMMIT with conventional commits: fix: / feat: / refactor: / test: / chore:
|
|
|
|
|
Include "Fixes #${issue_num}" or "Refs #${issue_num}" in the message.
|
|
|
|
|
|
|
|
|
|
4. PUSH to your branch (claude/issue-${issue_num}) and CREATE A PR:
|
|
|
|
|
git push origin claude/issue-${issue_num}
|
|
|
|
|
curl -s -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls" \\
|
|
|
|
|
-H "Authorization: token ${GITEA_TOKEN}" \\
|
|
|
|
|
-H "Content-Type: application/json" \\
|
|
|
|
|
-d '{"title": "[claude] <description> (#${issue_num})", "body": "Fixes #${issue_num}\n\n<describe what you did>", "head": "claude/issue-${issue_num}", "base": "main"}'
|
|
|
|
|
|
|
|
|
|
5. COMMENT on the issue when done:
|
|
|
|
|
curl -s -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}/comments" \\
|
|
|
|
|
-H "Authorization: token ${GITEA_TOKEN}" \\
|
|
|
|
|
-H "Content-Type: application/json" \\
|
|
|
|
|
-d '{"body": "PR created. <summary of changes>"}'
|
|
|
|
|
|
|
|
|
|
== RULES ==
|
|
|
|
|
- Read CLAUDE.md or project README first for conventions
|
|
|
|
|
- If the project has tox, use tox. If npm, use npm. Follow the project.
|
|
|
|
|
- Never use --no-verify on git commands.
|
|
|
|
|
- If tests fail after 2 attempts, STOP and comment on the issue explaining why.
|
|
|
|
|
- Be thorough but focused. Fix the issue, don't refactor the world.
|
|
|
|
|
PROMPT
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# === WORKER FUNCTION ===
|
|
|
|
|
run_worker() {
|
|
|
|
|
local worker_id="$1"
|
|
|
|
|
local consecutive_failures=0
|
|
|
|
|
|
|
|
|
|
log "WORKER-${worker_id}: Started"
|
|
|
|
|
|
|
|
|
|
while true; do
|
|
|
|
|
# Backoff on repeated failures
|
|
|
|
|
if [ "$consecutive_failures" -ge 5 ]; then
|
|
|
|
|
local backoff=$((RATE_LIMIT_SLEEP * (consecutive_failures / 5)))
|
|
|
|
|
[ "$backoff" -gt "$MAX_RATE_SLEEP" ] && backoff=$MAX_RATE_SLEEP
|
|
|
|
|
log "WORKER-${worker_id}: BACKOFF ${backoff}s (${consecutive_failures} failures)"
|
|
|
|
|
sleep "$backoff"
|
|
|
|
|
consecutive_failures=0
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Get next issue
|
|
|
|
|
issue_json=$(get_next_issue)
|
|
|
|
|
|
|
|
|
|
if [ "$issue_json" = "null" ] || [ -z "$issue_json" ]; then
|
|
|
|
|
update_active "$worker_id" "" "" "idle"
|
2026-03-22 19:42:47 -04:00
|
|
|
sleep 10
|
2026-03-22 18:06:38 -04:00
|
|
|
continue
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
issue_num=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['number'])")
|
|
|
|
|
issue_title=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['title'])")
|
|
|
|
|
repo_owner=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['repo_owner'])")
|
|
|
|
|
repo_name=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['repo_name'])")
|
|
|
|
|
issue_key="${repo_owner}-${repo_name}-${issue_num}"
|
|
|
|
|
branch="claude/issue-${issue_num}"
|
|
|
|
|
worktree="${WORKTREE_BASE}/claude-w${worker_id}-${issue_num}"
|
|
|
|
|
|
|
|
|
|
# Try to lock
|
|
|
|
|
if ! lock_issue "$issue_key"; then
|
|
|
|
|
sleep 5
|
|
|
|
|
continue
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
log "WORKER-${worker_id}: === ISSUE #${issue_num}: ${issue_title} (${repo_owner}/${repo_name}) ==="
|
|
|
|
|
update_active "$worker_id" "$issue_num" "${repo_owner}/${repo_name}" "working"
|
|
|
|
|
|
2026-03-22 21:35:56 -04:00
|
|
|
# Fresh clone per issue — no shared state, no contention
|
|
|
|
|
rm -rf "$worktree" 2>/dev/null
|
|
|
|
|
if ! git clone --depth=1 -b main \
|
|
|
|
|
"http://claude:${GITEA_TOKEN}@143.198.27.163:3000/${repo_owner}/${repo_name}.git" \
|
|
|
|
|
"$worktree" >/dev/null 2>&1; then
|
|
|
|
|
log "WORKER-${worker_id}: ERROR cloning for #${issue_num}"
|
2026-03-22 18:06:38 -04:00
|
|
|
unlock_issue "$issue_key"
|
|
|
|
|
consecutive_failures=$((consecutive_failures + 1))
|
|
|
|
|
sleep "$COOLDOWN"
|
|
|
|
|
continue
|
|
|
|
|
fi
|
|
|
|
|
cd "$worktree"
|
2026-03-22 21:35:56 -04:00
|
|
|
git checkout -b "$branch" >/dev/null 2>&1
|
2026-03-22 18:06:38 -04:00
|
|
|
|
|
|
|
|
# Build prompt and run
|
|
|
|
|
prompt=$(build_prompt "$issue_num" "$issue_title" "$worktree" "$repo_owner" "$repo_name")
|
|
|
|
|
|
|
|
|
|
log "WORKER-${worker_id}: Launching Claude Code for #${issue_num}..."
|
|
|
|
|
|
|
|
|
|
set +e
|
|
|
|
|
cd "$worktree"
|
2026-03-22 18:35:56 -04:00
|
|
|
env -u CLAUDECODE gtimeout "$CLAUDE_TIMEOUT" claude \
|
2026-03-22 18:06:38 -04:00
|
|
|
--print \
|
|
|
|
|
--dangerously-skip-permissions \
|
|
|
|
|
-p "$prompt" \
|
|
|
|
|
</dev/null >> "$LOG_DIR/claude-${issue_num}.log" 2>&1
|
|
|
|
|
exit_code=$?
|
|
|
|
|
set -e
|
|
|
|
|
|
|
|
|
|
if [ "$exit_code" -eq 0 ]; then
|
|
|
|
|
log "WORKER-${worker_id}: SUCCESS #${issue_num}"
|
|
|
|
|
|
|
|
|
|
# Auto-merge
|
|
|
|
|
pr_num=$(curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls?state=open&head=${repo_owner}:${branch}&limit=1" \
|
|
|
|
|
-H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
|
|
|
|
|
import sys,json
|
|
|
|
|
prs = json.load(sys.stdin)
|
|
|
|
|
if prs: print(prs[0]['number'])
|
|
|
|
|
else: print('')
|
|
|
|
|
" 2>/dev/null)
|
|
|
|
|
|
|
|
|
|
if [ -n "$pr_num" ]; then
|
|
|
|
|
curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/merge" \
|
|
|
|
|
-H "Authorization: token ${GITEA_TOKEN}" \
|
|
|
|
|
-H "Content-Type: application/json" \
|
|
|
|
|
-d '{"Do": "squash"}' >/dev/null 2>&1 || true
|
|
|
|
|
curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}" \
|
|
|
|
|
-H "Authorization: token ${GITEA_TOKEN}" \
|
|
|
|
|
-H "Content-Type: application/json" \
|
|
|
|
|
-d '{"state": "closed"}' >/dev/null 2>&1 || true
|
|
|
|
|
log "WORKER-${worker_id}: PR #${pr_num} merged, issue #${issue_num} closed"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
consecutive_failures=0
|
|
|
|
|
|
|
|
|
|
elif [ "$exit_code" -eq 124 ]; then
|
|
|
|
|
log "WORKER-${worker_id}: TIMEOUT #${issue_num}"
|
|
|
|
|
mark_skip "$issue_num" "timeout" 1
|
|
|
|
|
consecutive_failures=$((consecutive_failures + 1))
|
|
|
|
|
|
|
|
|
|
else
|
|
|
|
|
# Check for rate limit (exit code from claude CLI)
|
|
|
|
|
if grep -q "rate_limit\|rate limit\|429\|overloaded" "$LOG_DIR/claude-${issue_num}.log" 2>/dev/null; then
|
|
|
|
|
log "WORKER-${worker_id}: RATE LIMITED on #${issue_num} — backing off"
|
|
|
|
|
mark_skip "$issue_num" "rate_limit" 0.25 # 15 min skip
|
|
|
|
|
consecutive_failures=$((consecutive_failures + 3)) # faster backoff
|
|
|
|
|
else
|
|
|
|
|
log "WORKER-${worker_id}: FAILED #${issue_num} (exit ${exit_code})"
|
|
|
|
|
mark_skip "$issue_num" "exit_code_${exit_code}" 1
|
|
|
|
|
consecutive_failures=$((consecutive_failures + 1))
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Cleanup
|
2026-03-22 21:35:56 -04:00
|
|
|
cleanup_workdir "$worktree"
|
2026-03-22 18:06:38 -04:00
|
|
|
unlock_issue "$issue_key"
|
|
|
|
|
update_active "$worker_id" "" "" "done"
|
|
|
|
|
|
|
|
|
|
sleep "$COOLDOWN"
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# === MAIN ===
|
feat: parallel workers for all agents, dynamic scaling, self-healing watchdog
- claude-loop: 7 workers default, scales up to 21, 5s cooldown
- gemini-loop: rewritten as parallel worker system (3→12), multi-repo,
auto-clone, correct CLI flags (-p/--yolo), bash 3.2 compatible
- loop-watchdog: monitors all loops every 2min, auto-restarts dead loops,
kills zombies, files Gitea issues for unfixable problems
- ops-helpers: added ops-wake-watchdog, ops-kill-watchdog
- All scripts use file-based PID tracking (bash 3.2 safe)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 19:22:18 -04:00
|
|
|
log "=== Claude Loop Started — ${NUM_WORKERS} workers (max ${MAX_WORKERS}) ==="
|
2026-03-22 18:06:38 -04:00
|
|
|
log "Worktrees: ${WORKTREE_BASE}"
|
|
|
|
|
|
|
|
|
|
# Clean stale locks
|
|
|
|
|
rm -rf "$LOCK_DIR"/*.lock 2>/dev/null
|
|
|
|
|
|
feat: parallel workers for all agents, dynamic scaling, self-healing watchdog
- claude-loop: 7 workers default, scales up to 21, 5s cooldown
- gemini-loop: rewritten as parallel worker system (3→12), multi-repo,
auto-clone, correct CLI flags (-p/--yolo), bash 3.2 compatible
- loop-watchdog: monitors all loops every 2min, auto-restarts dead loops,
kills zombies, files Gitea issues for unfixable problems
- ops-helpers: added ops-wake-watchdog, ops-kill-watchdog
- All scripts use file-based PID tracking (bash 3.2 safe)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 19:22:18 -04:00
|
|
|
# PID tracking via files (bash 3.2 compatible)
|
|
|
|
|
PID_DIR="$LOG_DIR/claude-pids"
|
|
|
|
|
mkdir -p "$PID_DIR"
|
|
|
|
|
rm -f "$PID_DIR"/*.pid 2>/dev/null
|
|
|
|
|
|
|
|
|
|
launch_worker() {
|
|
|
|
|
local wid="$1"
|
|
|
|
|
run_worker "$wid" &
|
|
|
|
|
echo $! > "$PID_DIR/${wid}.pid"
|
|
|
|
|
log "Launched worker $wid (PID $!)"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Initial launch
|
2026-03-22 18:06:38 -04:00
|
|
|
for i in $(seq 1 "$NUM_WORKERS"); do
|
feat: parallel workers for all agents, dynamic scaling, self-healing watchdog
- claude-loop: 7 workers default, scales up to 21, 5s cooldown
- gemini-loop: rewritten as parallel worker system (3→12), multi-repo,
auto-clone, correct CLI flags (-p/--yolo), bash 3.2 compatible
- loop-watchdog: monitors all loops every 2min, auto-restarts dead loops,
kills zombies, files Gitea issues for unfixable problems
- ops-helpers: added ops-wake-watchdog, ops-kill-watchdog
- All scripts use file-based PID tracking (bash 3.2 safe)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 19:22:18 -04:00
|
|
|
launch_worker "$i"
|
|
|
|
|
sleep 3
|
2026-03-22 18:06:38 -04:00
|
|
|
done
|
|
|
|
|
|
feat: parallel workers for all agents, dynamic scaling, self-healing watchdog
- claude-loop: 7 workers default, scales up to 21, 5s cooldown
- gemini-loop: rewritten as parallel worker system (3→12), multi-repo,
auto-clone, correct CLI flags (-p/--yolo), bash 3.2 compatible
- loop-watchdog: monitors all loops every 2min, auto-restarts dead loops,
kills zombies, files Gitea issues for unfixable problems
- ops-helpers: added ops-wake-watchdog, ops-kill-watchdog
- All scripts use file-based PID tracking (bash 3.2 safe)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 19:22:18 -04:00
|
|
|
# === DYNAMIC SCALER ===
|
|
|
|
|
# Every 3 minutes: check health, scale up if no rate limits, scale down if hitting limits
|
|
|
|
|
CURRENT_WORKERS="$NUM_WORKERS"
|
|
|
|
|
while true; do
|
2026-03-22 19:42:47 -04:00
|
|
|
sleep 90
|
feat: parallel workers for all agents, dynamic scaling, self-healing watchdog
- claude-loop: 7 workers default, scales up to 21, 5s cooldown
- gemini-loop: rewritten as parallel worker system (3→12), multi-repo,
auto-clone, correct CLI flags (-p/--yolo), bash 3.2 compatible
- loop-watchdog: monitors all loops every 2min, auto-restarts dead loops,
kills zombies, files Gitea issues for unfixable problems
- ops-helpers: added ops-wake-watchdog, ops-kill-watchdog
- All scripts use file-based PID tracking (bash 3.2 safe)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 19:22:18 -04:00
|
|
|
|
|
|
|
|
# Reap dead workers and relaunch
|
|
|
|
|
for pidfile in "$PID_DIR"/*.pid; do
|
|
|
|
|
[ -f "$pidfile" ] || continue
|
|
|
|
|
wid=$(basename "$pidfile" .pid)
|
|
|
|
|
wpid=$(cat "$pidfile")
|
|
|
|
|
if ! kill -0 "$wpid" 2>/dev/null; then
|
|
|
|
|
log "SCALER: Worker $wid died — relaunching"
|
|
|
|
|
launch_worker "$wid"
|
|
|
|
|
sleep 2
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
recent_rate_limits=$(tail -100 "$LOG_DIR/claude-loop.log" 2>/dev/null | grep -c "RATE LIMITED" || true)
|
|
|
|
|
recent_successes=$(tail -100 "$LOG_DIR/claude-loop.log" 2>/dev/null | grep -c "SUCCESS" || true)
|
|
|
|
|
|
|
|
|
|
if [ "$recent_rate_limits" -gt 0 ]; then
|
|
|
|
|
if [ "$CURRENT_WORKERS" -gt 2 ]; then
|
|
|
|
|
drop_to=$(( CURRENT_WORKERS / 2 ))
|
|
|
|
|
[ "$drop_to" -lt 2 ] && drop_to=2
|
|
|
|
|
log "SCALER: Rate limited — scaling ${CURRENT_WORKERS} → ${drop_to} workers"
|
|
|
|
|
for wid in $(seq $((drop_to + 1)) "$CURRENT_WORKERS"); do
|
|
|
|
|
if [ -f "$PID_DIR/${wid}.pid" ]; then
|
|
|
|
|
kill "$(cat "$PID_DIR/${wid}.pid")" 2>/dev/null || true
|
|
|
|
|
rm -f "$PID_DIR/${wid}.pid"
|
|
|
|
|
update_active "$wid" "" "" "done"
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
CURRENT_WORKERS=$drop_to
|
|
|
|
|
fi
|
|
|
|
|
elif [ "$recent_successes" -ge 2 ] && [ "$CURRENT_WORKERS" -lt "$MAX_WORKERS" ]; then
|
|
|
|
|
new_count=$(( CURRENT_WORKERS + 2 ))
|
|
|
|
|
[ "$new_count" -gt "$MAX_WORKERS" ] && new_count=$MAX_WORKERS
|
|
|
|
|
log "SCALER: Healthy — scaling ${CURRENT_WORKERS} → ${new_count} workers"
|
|
|
|
|
for wid in $(seq $((CURRENT_WORKERS + 1)) "$new_count"); do
|
|
|
|
|
launch_worker "$wid"
|
|
|
|
|
sleep 2
|
|
|
|
|
done
|
|
|
|
|
CURRENT_WORKERS=$new_count
|
|
|
|
|
fi
|
|
|
|
|
done
|