Files
hermes-config/bin/efficiency-audit.sh
Alexander Whitestone 539969c45d feat: add all automation scripts to source control
New:
  kimi-loop.sh — Kimi Code CLI dispatch loop (fixed: no double logging,
    idle state logs once then goes quiet, uses kimi not claude)
  consolidated-cycle.sh — sonnet dev cycle (watchdog + dev + philosophy)
  efficiency-audit.sh — zombie cleanup, plateau detection, recommendations

Fixes in kimi-loop.sh:
  - log() writes to file only (no more double lines)
  - idle queue logs once then goes silent until work appears
  - all claude references removed, uses kimi CLI
2026-03-21 12:11:30 -04:00

202 lines
8.1 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# ── Hermes Efficiency Audit ────────────────────────────────────────────
# Runs every 12h initially, backs off to 24h+ when plateau detected.
#
# WHAT IT CHECKS:
# 1. Zombie processes (stuck git, pytest, vi, hermes sessions)
# 2. Token spend estimate (cron run count × estimated tokens)
# 3. Plateau detection (are loops producing value?)
# 4. Stale resources (old worktrees, tmp files, logs)
# 5. Recommendations (evolve or cut)
#
# PLATEAU-AWARE SCHEDULING:
# This script tracks its own diminishing returns. If the last 3 audits
# found nothing actionable, it writes a "back-off" marker and the cron
# should extend its interval. The cron checks this marker.
# ───────────────────────────────────────────────────────────────────────
set -uo pipefail
export PATH="$HOME/.local/bin:$HOME/.hermes/bin:/usr/local/bin:$PATH"
AUDIT_DIR="$HOME/.hermes/audits"
AUDIT_LOG="$AUDIT_DIR/audit-$(date +%Y%m%d_%H%M%S).md"
BACKOFF_MARKER="$AUDIT_DIR/backoff-level"
GITEA_URL="http://143.198.27.163:3000"
GITEA_TOKEN=$(cat ~/.hermes/gitea_token_vps 2>/dev/null || cat ~/.hermes/gitea_token 2>/dev/null)
mkdir -p "$AUDIT_DIR"
findings=0
report=""
r() { report+="$1"$'\n'; }
r "# Hermes Efficiency Audit — $(date '+%Y-%m-%d %H:%M')"
r ""
# ── 1. Zombie Processes ───────────────────────────────────────────────
r "## 1. Zombie Processes"
zombies=0
# Stuck pytest (> 20 min)
while IFS= read -r line; do
if [ -n "$line" ]; then
pid=$(echo "$line" | awk '{print $2}')
r " KILL: stale pytest PID $pid"
kill "$pid" 2>/dev/null
zombies=$((zombies + 1))
fi
done < <(ps aux | grep "pytest tests/" | grep -v grep | while read -r l; do
pid=$(echo "$l" | awk '{print $2}')
et=$(ps -o etime= -p "$pid" 2>/dev/null | tr -d ' ')
[[ "$et" == *:*:* ]] && echo "$l"
done)
# Stuck git
while IFS= read -r line; do
if [ -n "$line" ]; then
pid=$(echo "$line" | awk '{print $2}')
r " KILL: stuck git PID $pid"
kill "$pid" 2>/dev/null
zombies=$((zombies + 1))
fi
done < <(ps aux | grep "git.*push\|git-remote-http" | grep -v grep | while read -r l; do
pid=$(echo "$l" | awk '{print $2}')
et=$(ps -o etime= -p "$pid" 2>/dev/null | tr -d ' ')
[[ "$et" == *:*:* ]] && echo "$l"
done)
# Orphaned vi
vi_count=$(ps aux | grep "vi.*COMMIT_EDITMSG" | grep -v grep | wc -l | tr -d ' ')
if [ "$vi_count" -gt 0 ]; then
ps aux | grep "vi.*COMMIT_EDITMSG" | grep -v grep | awk '{print $2}' | xargs kill 2>/dev/null
r " KILL: $vi_count orphaned vi editors"
zombies=$((zombies + vi_count))
fi
# Count active hermes sessions
hermes_count=$(ps aux | grep "hermes" | grep python | grep -v grep | wc -l | tr -d ' ')
r " Active hermes sessions: $hermes_count"
r " Zombies killed: $zombies"
[ "$zombies" -gt 0 ] && findings=$((findings + 1))
r ""
# ── 2. Cron Job Status ───────────────────────────────────────────────
r "## 2. Cron Jobs"
python3 -c "
import json
data = json.load(open('$HOME/.hermes/cron/jobs.json'))
jobs = data.get('jobs', data) if isinstance(data, dict) else data
active = 0
for j in jobs:
if not isinstance(j, dict): continue
name = j.get('name', j.get('id','?'))
enabled = j.get('enabled', True)
paused = j.get('paused', False)
state = 'PAUSED' if (paused or not enabled) else 'ACTIVE'
sched = j.get('schedule', {})
mins = sched.get('minutes', '?') if isinstance(sched, dict) else '?'
if state == 'ACTIVE': active += 1
print(f' {state:7s} {name:40s} every {mins}m')
print(f'\n Total active: {active}')
" 2>/dev/null | while IFS= read -r line; do r "$line"; done
r ""
# ── 3. Stale Resources ───────────────────────────────────────────────
r "## 3. Stale Resources"
# Old worktrees
worktree_count=$(find ~/worktrees -maxdepth 1 -type d -mtime +3 2>/dev/null | wc -l | tr -d ' ')
if [ "$worktree_count" -gt 0 ]; then
r " Stale worktrees (>3 days): $worktree_count"
find ~/worktrees -maxdepth 1 -type d -mtime +3 -exec basename {} \; 2>/dev/null | while read -r w; do
r " $w"
done
findings=$((findings + 1))
else
r " Worktrees: clean"
fi
# Log disk usage
log_size=$(du -sh ~/.hermes/logs 2>/dev/null | awk '{print $1}')
r " Hermes logs: $log_size"
# Tmp files
tmp_size=$(du -sh /tmp/timmy-agents 2>/dev/null | awk '{print $1}')
r " /tmp/timmy-agents: ${tmp_size:-0}"
r ""
# ── 4. Plateau Detection ─────────────────────────────────────────────
r "## 4. Loop Plateau Analysis"
METRICS="$HOME/Timmy-Time-dashboard/.loop/cycle-metrics.jsonl"
if [ -f "$METRICS" ]; then
total_cycles=$(wc -l < "$METRICS" | tr -d ' ')
recent_zero=$(tail -5 "$METRICS" | python3 -c "
import sys, json
zero = sum(1 for l in sys.stdin if l.strip()
and json.loads(l).get('prs_merged',0) == 0
and json.loads(l).get('issues_filed',0) == 0)
print(zero)
" 2>/dev/null || echo "?")
r " Total cycles logged: $total_cycles"
r " Last 5 zero-output: $recent_zero/5"
if [ "$recent_zero" = "5" ]; then
r " STATUS: PLATEAUED — loop is spinning without producing value"
r " RECOMMENDATION: Increase interval or pause until new issues arrive"
findings=$((findings + 1))
elif [ "$recent_zero" -ge 3 ] 2>/dev/null; then
r " STATUS: APPROACHING PLATEAU"
else
r " STATUS: PRODUCTIVE"
fi
else
r " No cycle metrics yet."
fi
r ""
# ── 5. Gitea Health ──────────────────────────────────────────────────
r "## 5. Gitea Status"
if curl -s --max-time 5 "$GITEA_URL/api/v1/version" >/dev/null 2>&1; then
open_issues=$(curl -s -H "Authorization: token $GITEA_TOKEN" \
"$GITEA_URL/api/v1/repos/rockachopa/Timmy-time-dashboard/issues?state=open&limit=1&type=issues" 2>/dev/null | \
python3 -c "import sys; print(len(__import__('json').loads(sys.stdin.read())))" 2>/dev/null || echo "?")
open_prs=$(curl -s -H "Authorization: token $GITEA_TOKEN" \
"$GITEA_URL/api/v1/repos/rockachopa/Timmy-time-dashboard/pulls?state=open&limit=50" 2>/dev/null | \
python3 -c "import sys; print(len(__import__('json').loads(sys.stdin.read())))" 2>/dev/null || echo "?")
r " Gitea: ONLINE ($GITEA_URL)"
r " Open issues: $open_issues+"
r " Open PRs: $open_prs"
else
r " Gitea: OFFLINE"
findings=$((findings + 1))
fi
r ""
# ── 6. Recommendations ───────────────────────────────────────────────
r "## 6. Recommendations"
if [ "$findings" -eq 0 ]; then
r " All clear. No action needed."
else
r " $findings issue(s) found — see above."
fi
r ""
# ── Self-scheduling: plateau backoff ──────────────────────────────────
current_backoff=$(cat "$BACKOFF_MARKER" 2>/dev/null || echo 0)
if [ "$findings" -eq 0 ]; then
new_backoff=$((current_backoff + 1))
echo "$new_backoff" > "$BACKOFF_MARKER"
if [ "$new_backoff" -ge 3 ]; then
r "## Self-Schedule"
r " Audit backoff level: $new_backoff (no findings in $new_backoff consecutive runs)"
r " RECOMMENDATION: Extend audit interval to 24h or 48h"
fi
else
echo 0 > "$BACKOFF_MARKER"
fi
# ── Output ────────────────────────────────────────────────────────────
echo "$report" | tee "$AUDIT_LOG"
echo ""
echo "Audit saved to: $AUDIT_LOG"