Files
hermes-config/bin/efficiency-audit.sh

202 lines
8.1 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# ── Hermes Efficiency Audit ────────────────────────────────────────────
# Runs every 12h initially, backs off to 24h+ when plateau detected.
#
# WHAT IT CHECKS:
# 1. Zombie processes (stuck git, pytest, vi, hermes sessions)
# 2. Token spend estimate (cron run count × estimated tokens)
# 3. Plateau detection (are loops producing value?)
# 4. Stale resources (old worktrees, tmp files, logs)
# 5. Recommendations (evolve or cut)
#
# PLATEAU-AWARE SCHEDULING:
# This script tracks its own diminishing returns. If the last 3 audits
# found nothing actionable, it writes a "back-off" marker and the cron
# should extend its interval. The cron checks this marker.
# ───────────────────────────────────────────────────────────────────────
set -uo pipefail
export PATH="$HOME/.local/bin:$HOME/.hermes/bin:/usr/local/bin:$PATH"
AUDIT_DIR="$HOME/.hermes/audits"
AUDIT_LOG="$AUDIT_DIR/audit-$(date +%Y%m%d_%H%M%S).md"
BACKOFF_MARKER="$AUDIT_DIR/backoff-level"
GITEA_URL="http://143.198.27.163:3000"
GITEA_TOKEN=$(cat ~/.hermes/gitea_token_vps 2>/dev/null || cat ~/.hermes/gitea_token 2>/dev/null)
mkdir -p "$AUDIT_DIR"
findings=0
report=""
r() { report+="$1"$'\n'; }
r "# Hermes Efficiency Audit — $(date '+%Y-%m-%d %H:%M')"
r ""
# ── 1. Zombie Processes ───────────────────────────────────────────────
r "## 1. Zombie Processes"
zombies=0
# Stuck pytest (> 20 min)
while IFS= read -r line; do
if [ -n "$line" ]; then
pid=$(echo "$line" | awk '{print $2}')
r " KILL: stale pytest PID $pid"
kill "$pid" 2>/dev/null
zombies=$((zombies + 1))
fi
done < <(ps aux | grep "pytest tests/" | grep -v grep | while read -r l; do
pid=$(echo "$l" | awk '{print $2}')
et=$(ps -o etime= -p "$pid" 2>/dev/null | tr -d ' ')
[[ "$et" == *:*:* ]] && echo "$l"
done)
# Stuck git
while IFS= read -r line; do
if [ -n "$line" ]; then
pid=$(echo "$line" | awk '{print $2}')
r " KILL: stuck git PID $pid"
kill "$pid" 2>/dev/null
zombies=$((zombies + 1))
fi
done < <(ps aux | grep "git.*push\|git-remote-http" | grep -v grep | while read -r l; do
pid=$(echo "$l" | awk '{print $2}')
et=$(ps -o etime= -p "$pid" 2>/dev/null | tr -d ' ')
[[ "$et" == *:*:* ]] && echo "$l"
done)
# Orphaned vi
vi_count=$(ps aux | grep "vi.*COMMIT_EDITMSG" | grep -v grep | wc -l | tr -d ' ')
if [ "$vi_count" -gt 0 ]; then
ps aux | grep "vi.*COMMIT_EDITMSG" | grep -v grep | awk '{print $2}' | xargs kill 2>/dev/null
r " KILL: $vi_count orphaned vi editors"
zombies=$((zombies + vi_count))
fi
# Count active hermes sessions
hermes_count=$(ps aux | grep "hermes" | grep python | grep -v grep | wc -l | tr -d ' ')
r " Active hermes sessions: $hermes_count"
r " Zombies killed: $zombies"
[ "$zombies" -gt 0 ] && findings=$((findings + 1))
r ""
# ── 2. Cron Job Status ───────────────────────────────────────────────
r "## 2. Cron Jobs"
python3 -c "
import json
data = json.load(open('$HOME/.hermes/cron/jobs.json'))
jobs = data.get('jobs', data) if isinstance(data, dict) else data
active = 0
for j in jobs:
if not isinstance(j, dict): continue
name = j.get('name', j.get('id','?'))
enabled = j.get('enabled', True)
paused = j.get('paused', False)
state = 'PAUSED' if (paused or not enabled) else 'ACTIVE'
sched = j.get('schedule', {})
mins = sched.get('minutes', '?') if isinstance(sched, dict) else '?'
if state == 'ACTIVE': active += 1
print(f' {state:7s} {name:40s} every {mins}m')
print(f'\n Total active: {active}')
" 2>/dev/null | while IFS= read -r line; do r "$line"; done
r ""
# ── 3. Stale Resources ───────────────────────────────────────────────
r "## 3. Stale Resources"
# Old worktrees
worktree_count=$(find ~/worktrees -maxdepth 1 -type d -mtime +3 2>/dev/null | wc -l | tr -d ' ')
if [ "$worktree_count" -gt 0 ]; then
r " Stale worktrees (>3 days): $worktree_count"
find ~/worktrees -maxdepth 1 -type d -mtime +3 -exec basename {} \; 2>/dev/null | while read -r w; do
r " $w"
done
findings=$((findings + 1))
else
r " Worktrees: clean"
fi
# Log disk usage
log_size=$(du -sh ~/.hermes/logs 2>/dev/null | awk '{print $1}')
r " Hermes logs: $log_size"
# Tmp files
tmp_size=$(du -sh /tmp/timmy-agents 2>/dev/null | awk '{print $1}')
r " /tmp/timmy-agents: ${tmp_size:-0}"
r ""
# ── 4. Plateau Detection ─────────────────────────────────────────────
r "## 4. Loop Plateau Analysis"
METRICS="$HOME/Timmy-Time-dashboard/.loop/cycle-metrics.jsonl"
if [ -f "$METRICS" ]; then
total_cycles=$(wc -l < "$METRICS" | tr -d ' ')
recent_zero=$(tail -5 "$METRICS" | python3 -c "
import sys, json
zero = sum(1 for l in sys.stdin if l.strip()
and json.loads(l).get('prs_merged',0) == 0
and json.loads(l).get('issues_filed',0) == 0)
print(zero)
" 2>/dev/null || echo "?")
r " Total cycles logged: $total_cycles"
r " Last 5 zero-output: $recent_zero/5"
if [ "$recent_zero" = "5" ]; then
r " STATUS: PLATEAUED — loop is spinning without producing value"
r " RECOMMENDATION: Increase interval or pause until new issues arrive"
findings=$((findings + 1))
elif [ "$recent_zero" -ge 3 ] 2>/dev/null; then
r " STATUS: APPROACHING PLATEAU"
else
r " STATUS: PRODUCTIVE"
fi
else
r " No cycle metrics yet."
fi
r ""
# ── 5. Gitea Health ──────────────────────────────────────────────────
r "## 5. Gitea Status"
if curl -s --max-time 5 "$GITEA_URL/api/v1/version" >/dev/null 2>&1; then
open_issues=$(curl -s -H "Authorization: token $GITEA_TOKEN" \
"$GITEA_URL/api/v1/repos/rockachopa/Timmy-time-dashboard/issues?state=open&limit=1&type=issues" 2>/dev/null | \
python3 -c "import sys; print(len(__import__('json').loads(sys.stdin.read())))" 2>/dev/null || echo "?")
open_prs=$(curl -s -H "Authorization: token $GITEA_TOKEN" \
"$GITEA_URL/api/v1/repos/rockachopa/Timmy-time-dashboard/pulls?state=open&limit=50" 2>/dev/null | \
python3 -c "import sys; print(len(__import__('json').loads(sys.stdin.read())))" 2>/dev/null || echo "?")
r " Gitea: ONLINE ($GITEA_URL)"
r " Open issues: $open_issues+"
r " Open PRs: $open_prs"
else
r " Gitea: OFFLINE"
findings=$((findings + 1))
fi
r ""
# ── 6. Recommendations ───────────────────────────────────────────────
r "## 6. Recommendations"
if [ "$findings" -eq 0 ]; then
r " All clear. No action needed."
else
r " $findings issue(s) found — see above."
fi
r ""
# ── Self-scheduling: plateau backoff ──────────────────────────────────
current_backoff=$(cat "$BACKOFF_MARKER" 2>/dev/null || echo 0)
if [ "$findings" -eq 0 ]; then
new_backoff=$((current_backoff + 1))
echo "$new_backoff" > "$BACKOFF_MARKER"
if [ "$new_backoff" -ge 3 ]; then
r "## Self-Schedule"
r " Audit backoff level: $new_backoff (no findings in $new_backoff consecutive runs)"
r " RECOMMENDATION: Extend audit interval to 24h or 48h"
fi
else
echo 0 > "$BACKOFF_MARKER"
fi
# ── Output ────────────────────────────────────────────────────────────
echo "$report" | tee "$AUDIT_LOG"
echo ""
echo "Audit saved to: $AUDIT_LOG"