bin/kimi-loop.sh

#!/usr/bin/env bash
# kimi-loop.sh — Dropout-proof Kimi code agent dispatch loop
# Picks an open issue from Gitea, creates a worktree, runs Kimi Code CLI,
# handles failures gracefully, and loops forever.
#
# Dropout-proof means:
#   - If Kimi Code crashes/hangs, we kill it and move on
#   - If worktree creation fails, skip and retry
#   - If push fails, log and continue
#   - Exponential backoff on repeated failures
#   - Clean up worktrees after PR is created

set -euo pipefail

# === CONFIG ===
REPO_DIR="$HOME/worktrees/kimi-repo"
WORKTREE_BASE="$HOME/worktrees"
GITEA_URL="http://143.198.27.163:3000"
GITEA_TOKEN=$(cat "$HOME/.hermes/kimi_token")
REPO_OWNER="rockachopa"
REPO_NAME="Timmy-time-dashboard"
KIMI_TIMEOUT=600   # 10 min per issue
COOLDOWN=30          # seconds between issues
MAX_FAILURES=5       # consecutive failures before long sleep
LONG_SLEEP=300       # 5 min backoff on repeated failures
LOG_DIR="$HOME/.hermes/logs"
SKIP_FILE="$LOG_DIR/kimi-skip-list.json"  # issues to skip temporarily

mkdir -p "$LOG_DIR" "$WORKTREE_BASE"

# Initialize skip file if missing
[ -f "$SKIP_FILE" ] || echo '{}' > "$SKIP_FILE"

# === STATE ===
failure_count=0
issues_completed=0

# === SKIP LIST FUNCTIONS ===
is_skipped() {
  local issue_num="$1"
  python3 -c "
import json, time, sys
try:
    with open('$SKIP_FILE') as f: skips = json.load(f)
except: skips = {}
entry = skips.get(str($issue_num), {})
if entry and entry.get('until', 0) > time.time():
    print('skip')
    sys.exit(0)
# Expired or not found — clean up and allow
if str($issue_num) in skips:
    del skips[str($issue_num)]
    with open('$SKIP_FILE', 'w') as f: json.dump(skips, f)
print('ok')
" 2>/dev/null
}

mark_skip() {
  local issue_num="$1"
  local reason="$2"
  local skip_hours="${3:-1}"  # default 1 hour
  python3 -c "
import json, time
try:
    with open('$SKIP_FILE') as f: skips = json.load(f)
except: skips = {}
skips[str($issue_num)] = {
    'until': time.time() + ($skip_hours * 3600),
    'reason': '$reason',
    'failures': skips.get(str($issue_num), {}).get('failures', 0) + 1
}
# If 3+ failures, skip for 6 hours instead
if skips[str($issue_num)]['failures'] >= 3:
    skips[str($issue_num)]['until'] = time.time() + (6 * 3600)
with open('$SKIP_FILE', 'w') as f: json.dump(skips, f, indent=2)
" 2>/dev/null
  log "SKIP: #${issue_num} added to skip list — ${reason}"
}

log() {
  local msg="[$(date '+%Y-%m-%d %H:%M:%S')] $*"
  echo "$msg" >> "$LOG_DIR/kimi-loop.log"
}

post_issue_comment() {
  local issue_num="$1"
  local body="$2"
  local payload
  payload=$(python3 - "$body" <<'PY'
import json, sys
print(json.dumps({"body": sys.argv[1]}))
PY
)
  curl -sf -X POST "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issue_num}/comments" \
    -H "Authorization: token ${GITEA_TOKEN}" \
    -H "Content-Type: application/json" \
    -d "$payload" >/dev/null 2>&1 || true
}


remote_branch_exists() {
  local branch="$1"
  git ls-remote --heads origin "$branch" 2>/dev/null | grep -q .
}

get_pr_num() {
  local branch="$1"
  curl -sf "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/pulls?state=all&head=${REPO_OWNER}:${branch}&limit=1"     -H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
import sys,json
prs = json.load(sys.stdin)
if prs: print(prs[0]['number'])
else: print('')
" 2>/dev/null
}

get_pr_file_count() {
  local pr_num="$1"
  curl -sf "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/pulls/${pr_num}/files"     -H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
import sys, json
try:
    files = json.load(sys.stdin)
    print(len(files) if isinstance(files, list) else 0)
except:
    print(0)
" 2>/dev/null
}

get_pr_state() {
  local pr_num="$1"
  curl -sf "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/pulls/${pr_num}"     -H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
import sys, json
try:
    pr = json.load(sys.stdin)
    if pr.get('merged'):
        print('merged')
    else:
        print(pr.get('state', 'unknown'))
except:
    print('unknown')
" 2>/dev/null
}

get_issue_state() {
  local issue_num="$1"
  curl -sf "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issue_num}"     -H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
import sys, json
try:
    issue = json.load(sys.stdin)
    print(issue.get('state', 'unknown'))
except:
    print('unknown')
" 2>/dev/null
}

proof_comment_status() {
  local issue_num="$1"
  local branch="$2"
  curl -sf "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issue_num}/comments"     -H "Authorization: token ${GITEA_TOKEN}" | BRANCH="$branch" python3 -c "
import os, sys, json
branch = os.environ.get('BRANCH', '').lower()
try:
    comments = json.load(sys.stdin)
except Exception:
    print('missing|')
    raise SystemExit(0)
for c in reversed(comments):
    user = ((c.get('user') or {}).get('login') or '').lower()
    body = c.get('body') or ''
    body_l = body.lower()
    if user != 'kimi':
        continue
    if 'proof:' not in body_l and 'verification:' not in body_l:
        continue
    has_branch = branch in body_l
    has_pr = ('pr:' in body_l) or ('pull request:' in body_l) or ('/pulls/' in body_l)
    has_push = ('push:' in body_l) or ('pushed' in body_l)
    has_verify = ('tox -e unit' in body_l) or ('pytest' in body_l) or ('verification:' in body_l)
    status = 'ok' if (has_branch and has_pr and has_push and has_verify) else 'incomplete'
    print(status + '|' + (c.get('html_url') or ''))
    raise SystemExit(0)
print('missing|')
" 2>/dev/null
}

cleanup_worktree() {

  local wt="$1"
  local branch="$2"
  if [ -d "$wt" ]; then
    cd "$REPO_DIR"
    git worktree remove --force "$wt" 2>/dev/null || rm -rf "$wt"
    git worktree prune 2>/dev/null
    git branch -D "$branch" 2>/dev/null || true
    log "Cleaned up worktree: $wt"
  fi
}

get_next_issue() {
  # Get open issues ASSIGNED TO KIMI only — Kimi works its own queue
  # NOTE: Gitea's assignee filter is unreliable — we validate in Python
  local skip_file="$SKIP_FILE"
  curl -sf "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&type=issues&limit=50&sort=created" \
    -H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
import sys, json, time

issues = json.load(sys.stdin)
# Reverse to oldest-first (Gitea returns newest-first) — respects dependency order
issues.reverse()

# Load skip list
try:
    with open('${skip_file}') as f: skips = json.load(f)
except: skips = {}

for i in issues:
    # MUST be assigned to kimi (Gitea filter is broken, validate here)
    assignees = [a['login'] for a in (i.get('assignees') or [])]
    if 'kimi' not in assignees:
        continue

    title = i['title'].lower()
    # Skip philosophy, epics, showcases, features (not 10-min code work)
    if '[philosophy]' in title: continue
    if '[epic]' in title or 'epic:' in title: continue
    if '[showcase]' in title: continue
    if '[feature]' in title: continue

    # Check skip list
    num_str = str(i['number'])
    entry = skips.get(num_str, {})
    if entry and entry.get('until', 0) > time.time():
        continue

    print(json.dumps({'number': i['number'], 'title': i['title']}))
    sys.exit(0)
print('null')
" 2>/dev/null
}

build_prompt() {
  local issue_num="$1"
  local issue_title="$2"
  local worktree="$3"

  cat <<PROMPT
You are Kimi, an autonomous code agent on the Timmy-time-dashboard project.

YOUR ISSUE: #${issue_num} — "${issue_title}"

GITEA API: ${GITEA_URL}/api/v1
GITEA TOKEN: ${GITEA_TOKEN}
REPO: ${REPO_OWNER}/${REPO_NAME}
WORKING DIRECTORY: ${worktree}

== YOUR POWERS ==
You can do ANYTHING a developer can do. You are not limited to the narrow task.

1. READ the issue. Read any comments — they may have instructions.
   curl -s -H "Authorization: token ${GITEA_TOKEN}" "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issue_num}" 
   curl -s -H "Authorization: token ${GITEA_TOKEN}" "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issue_num}/comments"

2. DO THE WORK. Code, test, fix, refactor — whatever the issue needs.
   - tox -e format (auto-format first)
   - tox -e unit (all tests must pass)
   - tox -e lint (must be clean)

3. COMMIT with conventional commits: fix: / feat: / refactor: / test: / chore:
   ALWAYS stage files first — NEVER commit without git add:
     git add .
     git diff --cached --stat   # verify non-empty — abort if nothing staged
     git commit -m "fix: ... Fixes #${issue_num}"
   Include "Fixes #${issue_num}" or "Refs #${issue_num}" in the message.
   If git diff --cached --stat shows nothing, DO NOT commit or create a PR —
   comment on the issue explaining what you found instead.

4. PUSH to your branch (kimi/issue-${issue_num}) and CREATE A PR.
   Capture the PR URL in your output and in the issue comment.
   curl -s -X POST "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/pulls"      -H "Authorization: token ${GITEA_TOKEN}"      -H "Content-Type: application/json"      -d '{"title": "[kimi] <description> (#${issue_num})", "body": "Fixes #${issue_num}

<describe what you did>

Verification:
- tox -e format
- tox -e unit
- tox -e lint", "head": "kimi/issue-${issue_num}", "base": "main"}'

5. COMMENT on the issue with a PROOF BLOCK before you exit. Use this exact shape:
   Proof:
   - branch: kimi/issue-${issue_num}
   - commit: <full sha>
   - push: ok
   - pr: <full PR url>
   - verification:
     - tox -e format: <pass/fail>
     - tox -e unit: <pass/fail>
     - tox -e lint: <pass/fail>
   - summary: <what changed>

6. FILE NEW ISSUES if you find bugs, missing tests, or improvements while working:
   curl -s -X POST "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/issues" \
     -H "Authorization: token ${GITEA_TOKEN}" \
     -H "Content-Type: application/json" \
     -d '{"title": "[kimi-generated] <title>", "body": "<description>"}'

== RULES ==
- Read CLAUDE.md or project README first for conventions
- tox is the ONLY way to run tests/lint/format. Never run pytest/ruff directly.
- Never use --no-verify on git commands.
- If tests fail after 2 attempts, STOP and comment on the issue explaining why.
- Be thorough. If you see something broken nearby, file an issue for it.
- ALWAYS run `git add .` before `git commit`. NEVER create an empty commit.
- ALWAYS check `git diff --cached --stat` before committing — if empty, do NOT commit or create a PR.
- DO NOT claim success unless the branch exists on Gitea, the PR exists, and the Proof block is posted on the issue.

== CRITICAL: FINISH = PUSHED + PR'D + PROVED ==
- NEVER exit without committing your work. Even partial progress MUST be committed.
- Before you finish, ALWAYS: git add -A && git commit && git push origin kimi/issue-${issue_num}
- ALWAYS create a PR before exiting. No exceptions.
- ALWAYS post the Proof block before exiting. No proof comment = not done.
- Your work is WASTED if it's not pushed. Push early, push often.
PROMPT
}

# === MAIN LOOP ===
log "=== Kimi Loop Started ==="
log "Repo: ${REPO_DIR}"
log "Worktrees: ${WORKTREE_BASE}"

while true; do
  # Check for too many consecutive failures
  if [ "$failure_count" -ge "$MAX_FAILURES" ]; then
    log "BACKOFF: ${failure_count} consecutive failures. Sleeping ${LONG_SLEEP}s..."
    sleep "$LONG_SLEEP"
    failure_count=0
  fi

  # Fetch latest main (resilient — never die on git errors)
  cd "$REPO_DIR"
  timeout 60 git fetch origin main 2>/dev/null || { log "WARN: git fetch failed, continuing anyway"; }
  git checkout main 2>/dev/null || true
  git reset --hard origin/main 2>/dev/null || true

  # Get next issue
  issue_json=$(get_next_issue)

  if [ "$issue_json" = "null" ] || [ -z "$issue_json" ]; then
    # Only log idle ONCE, then go quiet until work appears
    if [ "${LAST_STATE:-}" != "idle" ]; then
      log "Queue empty. Waiting for assignments..."
      LAST_STATE="idle"
    fi
    sleep "$LONG_SLEEP"
    continue
  fi
  LAST_STATE="working"

  issue_num=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['number'])")
  issue_title=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['title'])")
  branch="kimi/issue-${issue_num}"
  worktree="${WORKTREE_BASE}/kimi-${issue_num}"

  log "=== ISSUE #${issue_num}: ${issue_title} ==="

  # Create worktree
  if [ -d "$worktree" ]; then
    log "Worktree already exists, cleaning..."
    cleanup_worktree "$worktree" "$branch"
  fi

  cd "$REPO_DIR"
  if ! git worktree add "$worktree" -b "$branch" origin/main 2>&1; then
    log "ERROR: Failed to create worktree for #${issue_num}"
    failure_count=$((failure_count + 1))
    sleep "$COOLDOWN"
    continue
  fi

  # Configure git remote with kimi's token so it can push
  cd "$worktree"
  git remote set-url origin "http://kimi:${GITEA_TOKEN}@143.198.27.163:3000/${REPO_OWNER}/${REPO_NAME}.git"
  cd "$REPO_DIR"

  # Build prompt
  prompt=$(build_prompt "$issue_num" "$issue_title" "$worktree")

  # Run Kimi Code CLI with timeout
  log "Launching Kimi Code for #${issue_num} (timeout: ${KIMI_TIMEOUT}s)..."

  set +e
  cd "$worktree"
  gtimeout "$KIMI_TIMEOUT" kimi \
    --print \
    --quiet \
    -w "$worktree" \
    -p "$prompt" \
    </dev/null 2>&1 | tee "$LOG_DIR/kimi-${issue_num}.log"
  exit_code=${PIPESTATUS[0]}
  cd "$REPO_DIR"
  set -e

  if [ "$exit_code" -eq 0 ]; then
    log "SUCCESS: #${issue_num} exited 0 — verifying push + PR + proof..."

    cd "$worktree"
    if ! remote_branch_exists "$branch"; then
      log "  BLOCKED: remote branch ${branch} not found — work was not pushed"
      post_issue_comment "$issue_num" "Loop gate blocked completion: remote branch \`${branch}\` was not found on origin after Kimi exited. Issue remains open for retry."
      mark_skip "$issue_num" "missing_remote_branch" 1
      failure_count=$((failure_count + 1))
      cd "$REPO_DIR"
    else
      cd "$REPO_DIR"
      pr_num=$(get_pr_num "$branch")

      if [ -z "$pr_num" ]; then
        log "  BLOCKED: no PR found for branch ${branch}"
        post_issue_comment "$issue_num" "Loop gate blocked completion: branch \`${branch}\` exists remotely, but no pull request was found. Issue remains open for retry."
        mark_skip "$issue_num" "missing_pr" 1
        failure_count=$((failure_count + 1))
      else
        pr_files=$(get_pr_file_count "$pr_num")
        if [ "${pr_files:-0}" -eq 0 ]; then
          log "  BLOCKED: PR #${pr_num} has 0 changed files — empty commit detected, closing PR without merge"
          curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/pulls/${pr_num}"             -H "Authorization: token ${GITEA_TOKEN}"             -H "Content-Type: application/json"             -d '{"state": "closed"}' >/dev/null 2>&1 || true
          post_issue_comment "$issue_num" "PR #${pr_num} was closed automatically: it had 0 changed files (empty commit). Kimi must stage files with \`git add\` before committing. Issue remains open for retry."
          mark_skip "$issue_num" "empty_commit" 2
          failure_count=$((failure_count + 1))
        else
          proof_status=$(proof_comment_status "$issue_num" "$branch")
          proof_state="${proof_status%%|*}"
          proof_url="${proof_status#*|}"

          if [ "$proof_state" != "ok" ]; then
            log "  BLOCKED: proof comment missing or incomplete (${proof_state})"
            post_issue_comment "$issue_num" "Loop gate blocked completion: PR #${pr_num} exists and has ${pr_files} changed file(s), but the required Proof block from Kimi is missing or incomplete. Issue remains open for retry."
            mark_skip "$issue_num" "missing_proof" 1
            failure_count=$((failure_count + 1))
          else
            log "  PROOF: verified issue comment ${proof_url}"
            pr_state=$(get_pr_state "$pr_num")

            if [ "$pr_state" = "open" ]; then
              curl -sf -X POST "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/pulls/${pr_num}/merge"                 -H "Authorization: token ${GITEA_TOKEN}"                 -H "Content-Type: application/json"                 -d '{"Do": "squash"}' >/dev/null 2>&1 || true
              pr_state=$(get_pr_state "$pr_num")
            fi

            if [ "$pr_state" = "merged" ]; then
              curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issue_num}"                 -H "Authorization: token ${GITEA_TOKEN}"                 -H "Content-Type: application/json"                 -d '{"state": "closed"}' >/dev/null 2>&1 || true
              issue_state=$(get_issue_state "$issue_num")
              if [ "$issue_state" = "closed" ]; then
                log "  VERIFIED: branch pushed, PR #${pr_num} merged, proof present, issue closed"
                failure_count=0
                issues_completed=$((issues_completed + 1))
                log "Stats: ${issues_completed} issues completed this session"
              else
                log "  BLOCKED: PR #${pr_num} merged but issue #${issue_num} did not close"
                post_issue_comment "$issue_num" "Loop gate published and merged the work in PR #${pr_num}, but could not verify the issue closed cleanly. Please inspect state manually."
                mark_skip "$issue_num" "issue_close_unverified" 1
                failure_count=$((failure_count + 1))
              fi
            else
              log "  BLOCKED: PR #${pr_num} state is ${pr_state} — merge not verified"
              post_issue_comment "$issue_num" "Loop gate published the work and verified proof, but could not verify merge for PR #${pr_num}. Issue remains open for retry/review."
              mark_skip "$issue_num" "merge_unverified" 1
              failure_count=$((failure_count + 1))
            fi
          fi
        fi
      fi
    fi

  elif [ "$exit_code" -eq 124 ]; then
    log "TIMEOUT: #${issue_num} exceeded ${KIMI_TIMEOUT}s"
    mark_skip "$issue_num" "timeout" 1
    failure_count=$((failure_count + 1))
  else
    log "FAILED: #${issue_num} exited with code ${exit_code}"
    mark_skip "$issue_num" "exit_code_${exit_code}" 1
    failure_count=$((failure_count + 1))
  fi

  # Clean up worktree
  cleanup_worktree "$worktree" "$branch"

  # Cooldown
  log "Cooling down ${COOLDOWN}s before next issue..."
  sleep "$COOLDOWN"
done