diff --git a/bin/claude-loop.sh b/bin/claude-loop.sh index 2e7ca9e..a2c882e 100755 --- a/bin/claude-loop.sh +++ b/bin/claude-loop.sh @@ -91,19 +91,9 @@ with open('$ACTIVE_FILE', 'r+') as f: " 2>/dev/null } -cleanup_worktree() { +cleanup_workdir() { local wt="$1" - local branch="$2" - if [ -d "$wt" ]; then - local parent - parent=$(git -C "$wt" rev-parse --git-common-dir 2>/dev/null | sed 's|/.git$||' || true) - if [ -n "$parent" ] && [ -d "$parent" ]; then - cd "$parent" - fi - git worktree remove --force "$wt" 2>/dev/null || rm -rf "$wt" - git worktree prune 2>/dev/null - git branch -D "$branch" 2>/dev/null || true - fi + rm -rf "$wt" 2>/dev/null || true } get_next_issue() { @@ -303,41 +293,19 @@ run_worker() { log "WORKER-${worker_id}: === ISSUE #${issue_num}: ${issue_title} (${repo_owner}/${repo_name}) ===" update_active "$worker_id" "$issue_num" "${repo_owner}/${repo_name}" "working" - # Ensure local clone - local_repo="${WORKTREE_BASE}/claude-base-${repo_owner}-${repo_name}" - if [ ! -d "$local_repo" ]; then - log "WORKER-${worker_id}: Cloning ${repo_owner}/${repo_name}..." - git clone --depth=1 "http://claude:${GITEA_TOKEN}@143.198.27.163:3000/${repo_owner}/${repo_name}.git" "$local_repo" 2>&1 || { - log "WORKER-${worker_id}: ERROR cloning" - unlock_issue "$issue_key" - consecutive_failures=$((consecutive_failures + 1)) - sleep "$COOLDOWN" - continue - } - cd "$local_repo" - git fetch --unshallow origin main 2>/dev/null || true - fi - - # Fetch latest - cd "$local_repo" - timeout 60 git fetch origin main 2>/dev/null || true - git checkout main 2>/dev/null || true - git reset --hard origin/main 2>/dev/null || true - - # Create worktree - [ -d "$worktree" ] && cleanup_worktree "$worktree" "$branch" - cd "$local_repo" - - if ! git worktree add "$worktree" -b "$branch" origin/main 2>&1; then - log "WORKER-${worker_id}: ERROR creating worktree" + # Fresh clone per issue — no shared state, no contention + rm -rf "$worktree" 2>/dev/null + if ! git clone --depth=1 -b main \ + "http://claude:${GITEA_TOKEN}@143.198.27.163:3000/${repo_owner}/${repo_name}.git" \ + "$worktree" >/dev/null 2>&1; then + log "WORKER-${worker_id}: ERROR cloning for #${issue_num}" unlock_issue "$issue_key" consecutive_failures=$((consecutive_failures + 1)) sleep "$COOLDOWN" continue fi - cd "$worktree" - git remote set-url origin "http://claude:${GITEA_TOKEN}@143.198.27.163:3000/${repo_owner}/${repo_name}.git" + git checkout -b "$branch" >/dev/null 2>&1 # Build prompt and run prompt=$(build_prompt "$issue_num" "$issue_title" "$worktree" "$repo_owner" "$repo_name") @@ -399,7 +367,7 @@ else: print('') fi # Cleanup - cleanup_worktree "$worktree" "$branch" + cleanup_workdir "$worktree" unlock_issue "$issue_key" update_active "$worker_id" "" "" "done" diff --git a/bin/gemini-loop.sh b/bin/gemini-loop.sh index a07ff69..2954942 100755 --- a/bin/gemini-loop.sh +++ b/bin/gemini-loop.sh @@ -85,16 +85,9 @@ with open('$ACTIVE_FILE', 'r+') as f: " 2>/dev/null } -cleanup_worktree() { - local wt="$1" branch="$2" - if [ -d "$wt" ]; then - local parent - parent=$(git -C "$wt" rev-parse --git-common-dir 2>/dev/null | sed 's|/.git$||' || true) - [ -n "$parent" ] && [ -d "$parent" ] && cd "$parent" - git worktree remove --force "$wt" 2>/dev/null || rm -rf "$wt" - git worktree prune 2>/dev/null - git branch -D "$branch" 2>/dev/null || true - fi +cleanup_workdir() { + local wt="$1" + rm -rf "$wt" 2>/dev/null || true } get_next_issue() { @@ -283,39 +276,19 @@ run_worker() { log "WORKER-${worker_id}: === ISSUE #${issue_num}: ${issue_title} (${repo_owner}/${repo_name}) ===" update_active "$worker_id" "$issue_num" "${repo_owner}/${repo_name}" "working" - # Ensure local clone - local_repo="${WORKTREE_BASE}/gemini-base-${repo_owner}-${repo_name}" - if [ ! -d "$local_repo" ]; then - log "WORKER-${worker_id}: Cloning ${repo_owner}/${repo_name}..." - git clone --depth=1 "http://gemini:${GITEA_TOKEN}@143.198.27.163:3000/${repo_owner}/${repo_name}.git" "$local_repo" 2>&1 || { - log "WORKER-${worker_id}: ERROR cloning" - unlock_issue "$issue_key" - consecutive_failures=$((consecutive_failures + 1)) - sleep "$COOLDOWN" - continue - } - cd "$local_repo" - git fetch --unshallow origin main 2>/dev/null || true - fi - - cd "$local_repo" - timeout 60 git fetch origin main 2>/dev/null || true - git checkout main 2>/dev/null || true - git reset --hard origin/main 2>/dev/null || true - - [ -d "$worktree" ] && cleanup_worktree "$worktree" "$branch" - cd "$local_repo" - - if ! git worktree add "$worktree" -b "$branch" origin/main 2>&1; then - log "WORKER-${worker_id}: ERROR creating worktree" + # Fresh clone per issue — no shared state, no contention + rm -rf "$worktree" 2>/dev/null + if ! git clone --depth=1 -b main \ + "http://gemini:${GITEA_TOKEN}@143.198.27.163:3000/${repo_owner}/${repo_name}.git" \ + "$worktree" >/dev/null 2>&1; then + log "WORKER-${worker_id}: ERROR cloning for #${issue_num}" unlock_issue "$issue_key" consecutive_failures=$((consecutive_failures + 1)) sleep "$COOLDOWN" continue fi - cd "$worktree" - git remote set-url origin "http://gemini:${GITEA_TOKEN}@143.198.27.163:3000/${repo_owner}/${repo_name}.git" + git checkout -b "$branch" >/dev/null 2>&1 prompt=$(build_prompt "$issue_num" "$issue_title" "$worktree" "$repo_owner" "$repo_name") @@ -372,7 +345,7 @@ else: print('') fi fi - cleanup_worktree "$worktree" "$branch" + cleanup_workdir "$worktree" unlock_issue "$issue_key" update_active "$worker_id" "" "" "done" diff --git a/bin/loop-watchdog.sh b/bin/loop-watchdog.sh index c56274e..d8cf3bf 100644 --- a/bin/loop-watchdog.sh +++ b/bin/loop-watchdog.sh @@ -51,19 +51,27 @@ else: fi log "FILING ISSUE: $title" - curl -sf -X POST "${GITEA_URL}/api/v1/repos/${ISSUE_REPO}/issues" \ - -H "Authorization: token ${ADMIN_TOKEN}" \ - -H "Content-Type: application/json" \ - -d "$(python3 -c " -import json -print(json.dumps({ - 'title': '[watchdog] $title', - 'body': '''$body + local tmpfile="/tmp/watchdog-issue-$$.json" + python3 -c " +import json, sys +title = sys.argv[1] +body = sys.argv[2] +assignee = sys.argv[3] +with open('$tmpfile', 'w') as f: + json.dump({ + 'title': '[watchdog] ' + title, + 'body': body + '\n\n---\n*Auto-filed by loop-watchdog*', + 'assignees': [assignee], + }, f) +" "$title" "$body" "$assignee" 2>/dev/null ---- -*Auto-filed by loop-watchdog at $(date '+%Y-%m-%d %H:%M:%S')*''', - 'assignees': ['$assignee'], -}))" 2>/dev/null)" >/dev/null 2>&1 || log "WARN: Failed to file issue: $title" + if [ -f "$tmpfile" ]; then + curl -sf -X POST "${GITEA_URL}/api/v1/repos/${ISSUE_REPO}/issues" \ + -H "Authorization: token ${ADMIN_TOKEN}" \ + -H "Content-Type: application/json" \ + -d @"$tmpfile" >/dev/null 2>&1 || log "WARN: Failed to file issue: $title" + rm -f "$tmpfile" + fi } # === HEALTH CHECKS === @@ -164,20 +172,31 @@ check_gitea() { } check_zombies() { - local stuck_git - stuck_git=$(ps aux | grep "git.*push\|git-remote-http" | grep -v grep | wc -l | tr -d ' ') - local orphan_py - orphan_py=$(ps aux | grep "pytest tests/" | grep -v grep | wc -l | tr -d ' ') + # Only kill git/pytest processes older than 5 minutes (300 seconds) + # Normal pushes from workers should complete in under a minute + local killed=0 + for pid in $(ps -eo pid,etime,command | grep -E "git.*push|git-remote-http" | grep -v grep | awk '{ + split($2, t, /[:-]/); + if (length(t)==3) secs=t[1]*3600+t[2]*60+t[3]; + else if (length(t)==2) secs=t[1]*60+t[2]; + else secs=t[1]; + if (secs > 300) print $1 + }'); do + kill "$pid" 2>/dev/null && killed=$((killed + 1)) + done + [ "$killed" -gt 0 ] && log "Killed $killed stuck git processes (>5min old)" - if [ "$stuck_git" -gt 3 ]; then - log "Killing $stuck_git stuck git processes" - pkill -f "git.*push\|git-remote-http" 2>/dev/null || true - fi - - if [ "$orphan_py" -gt 3 ]; then - log "Killing $orphan_py orphaned pytest processes" - pkill -f "pytest tests/" 2>/dev/null || true - fi + local killed_py=0 + for pid in $(ps -eo pid,etime,command | grep "pytest tests/" | grep -v grep | awk '{ + split($2, t, /[:-]/); + if (length(t)==3) secs=t[1]*3600+t[2]*60+t[3]; + else if (length(t)==2) secs=t[1]*60+t[2]; + else secs=t[1]; + if (secs > 300) print $1 + }'); do + kill "$pid" 2>/dev/null && killed_py=$((killed_py + 1)) + done + [ "$killed_py" -gt 0 ] && log "Killed $killed_py orphaned pytest processes (>5min old)" } check_disk() {