fix: replace worktrees with fresh clones, fix watchdog issue filing
- Workers now git clone --depth=1 per issue instead of sharing base repos with worktrees. Eliminates all contention and branch collision errors. - Watchdog file_issue uses temp file + sys.argv for safe JSON escaping - Watchdog zombie detection only kills processes >5min old (was killing legitimate git pushes from active workers) - Simplified cleanup to plain rm -rf (no worktree bookkeeping needed) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -51,19 +51,27 @@ else:
|
||||
fi
|
||||
|
||||
log "FILING ISSUE: $title"
|
||||
curl -sf -X POST "${GITEA_URL}/api/v1/repos/${ISSUE_REPO}/issues" \
|
||||
-H "Authorization: token ${ADMIN_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(python3 -c "
|
||||
import json
|
||||
print(json.dumps({
|
||||
'title': '[watchdog] $title',
|
||||
'body': '''$body
|
||||
local tmpfile="/tmp/watchdog-issue-$$.json"
|
||||
python3 -c "
|
||||
import json, sys
|
||||
title = sys.argv[1]
|
||||
body = sys.argv[2]
|
||||
assignee = sys.argv[3]
|
||||
with open('$tmpfile', 'w') as f:
|
||||
json.dump({
|
||||
'title': '[watchdog] ' + title,
|
||||
'body': body + '\n\n---\n*Auto-filed by loop-watchdog*',
|
||||
'assignees': [assignee],
|
||||
}, f)
|
||||
" "$title" "$body" "$assignee" 2>/dev/null
|
||||
|
||||
---
|
||||
*Auto-filed by loop-watchdog at $(date '+%Y-%m-%d %H:%M:%S')*''',
|
||||
'assignees': ['$assignee'],
|
||||
}))" 2>/dev/null)" >/dev/null 2>&1 || log "WARN: Failed to file issue: $title"
|
||||
if [ -f "$tmpfile" ]; then
|
||||
curl -sf -X POST "${GITEA_URL}/api/v1/repos/${ISSUE_REPO}/issues" \
|
||||
-H "Authorization: token ${ADMIN_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d @"$tmpfile" >/dev/null 2>&1 || log "WARN: Failed to file issue: $title"
|
||||
rm -f "$tmpfile"
|
||||
fi
|
||||
}
|
||||
|
||||
# === HEALTH CHECKS ===
|
||||
@@ -164,20 +172,31 @@ check_gitea() {
|
||||
}
|
||||
|
||||
check_zombies() {
|
||||
local stuck_git
|
||||
stuck_git=$(ps aux | grep "git.*push\|git-remote-http" | grep -v grep | wc -l | tr -d ' ')
|
||||
local orphan_py
|
||||
orphan_py=$(ps aux | grep "pytest tests/" | grep -v grep | wc -l | tr -d ' ')
|
||||
# Only kill git/pytest processes older than 5 minutes (300 seconds)
|
||||
# Normal pushes from workers should complete in under a minute
|
||||
local killed=0
|
||||
for pid in $(ps -eo pid,etime,command | grep -E "git.*push|git-remote-http" | grep -v grep | awk '{
|
||||
split($2, t, /[:-]/);
|
||||
if (length(t)==3) secs=t[1]*3600+t[2]*60+t[3];
|
||||
else if (length(t)==2) secs=t[1]*60+t[2];
|
||||
else secs=t[1];
|
||||
if (secs > 300) print $1
|
||||
}'); do
|
||||
kill "$pid" 2>/dev/null && killed=$((killed + 1))
|
||||
done
|
||||
[ "$killed" -gt 0 ] && log "Killed $killed stuck git processes (>5min old)"
|
||||
|
||||
if [ "$stuck_git" -gt 3 ]; then
|
||||
log "Killing $stuck_git stuck git processes"
|
||||
pkill -f "git.*push\|git-remote-http" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ "$orphan_py" -gt 3 ]; then
|
||||
log "Killing $orphan_py orphaned pytest processes"
|
||||
pkill -f "pytest tests/" 2>/dev/null || true
|
||||
fi
|
||||
local killed_py=0
|
||||
for pid in $(ps -eo pid,etime,command | grep "pytest tests/" | grep -v grep | awk '{
|
||||
split($2, t, /[:-]/);
|
||||
if (length(t)==3) secs=t[1]*3600+t[2]*60+t[3];
|
||||
else if (length(t)==2) secs=t[1]*60+t[2];
|
||||
else secs=t[1];
|
||||
if (secs > 300) print $1
|
||||
}'); do
|
||||
kill "$pid" 2>/dev/null && killed_py=$((killed_py + 1))
|
||||
done
|
||||
[ "$killed_py" -gt 0 ] && log "Killed $killed_py orphaned pytest processes (>5min old)"
|
||||
}
|
||||
|
||||
check_disk() {
|
||||
|
||||
Reference in New Issue
Block a user