202 lines
8.1 KiB
Bash
202 lines
8.1 KiB
Bash
|
|
#!/usr/bin/env bash
|
|||
|
|
# ── Hermes Efficiency Audit ────────────────────────────────────────────
|
|||
|
|
# Runs every 12h initially, backs off to 24h+ when plateau detected.
|
|||
|
|
#
|
|||
|
|
# WHAT IT CHECKS:
|
|||
|
|
# 1. Zombie processes (stuck git, pytest, vi, hermes sessions)
|
|||
|
|
# 2. Token spend estimate (cron run count × estimated tokens)
|
|||
|
|
# 3. Plateau detection (are loops producing value?)
|
|||
|
|
# 4. Stale resources (old worktrees, tmp files, logs)
|
|||
|
|
# 5. Recommendations (evolve or cut)
|
|||
|
|
#
|
|||
|
|
# PLATEAU-AWARE SCHEDULING:
|
|||
|
|
# This script tracks its own diminishing returns. If the last 3 audits
|
|||
|
|
# found nothing actionable, it writes a "back-off" marker and the cron
|
|||
|
|
# should extend its interval. The cron checks this marker.
|
|||
|
|
# ───────────────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
set -uo pipefail
|
|||
|
|
export PATH="$HOME/.local/bin:$HOME/.hermes/bin:/usr/local/bin:$PATH"
|
|||
|
|
|
|||
|
|
AUDIT_DIR="$HOME/.hermes/audits"
|
|||
|
|
AUDIT_LOG="$AUDIT_DIR/audit-$(date +%Y%m%d_%H%M%S).md"
|
|||
|
|
BACKOFF_MARKER="$AUDIT_DIR/backoff-level"
|
|||
|
|
GITEA_URL="http://143.198.27.163:3000"
|
|||
|
|
GITEA_TOKEN=$(cat ~/.hermes/gitea_token_vps 2>/dev/null || cat ~/.hermes/gitea_token 2>/dev/null)
|
|||
|
|
|
|||
|
|
mkdir -p "$AUDIT_DIR"
|
|||
|
|
|
|||
|
|
findings=0
|
|||
|
|
report=""
|
|||
|
|
|
|||
|
|
r() { report+="$1"$'\n'; }
|
|||
|
|
|
|||
|
|
r "# Hermes Efficiency Audit — $(date '+%Y-%m-%d %H:%M')"
|
|||
|
|
r ""
|
|||
|
|
|
|||
|
|
# ── 1. Zombie Processes ───────────────────────────────────────────────
|
|||
|
|
r "## 1. Zombie Processes"
|
|||
|
|
zombies=0
|
|||
|
|
|
|||
|
|
# Stuck pytest (> 20 min)
|
|||
|
|
while IFS= read -r line; do
|
|||
|
|
if [ -n "$line" ]; then
|
|||
|
|
pid=$(echo "$line" | awk '{print $2}')
|
|||
|
|
r " KILL: stale pytest PID $pid"
|
|||
|
|
kill "$pid" 2>/dev/null
|
|||
|
|
zombies=$((zombies + 1))
|
|||
|
|
fi
|
|||
|
|
done < <(ps aux | grep "pytest tests/" | grep -v grep | while read -r l; do
|
|||
|
|
pid=$(echo "$l" | awk '{print $2}')
|
|||
|
|
et=$(ps -o etime= -p "$pid" 2>/dev/null | tr -d ' ')
|
|||
|
|
[[ "$et" == *:*:* ]] && echo "$l"
|
|||
|
|
done)
|
|||
|
|
|
|||
|
|
# Stuck git
|
|||
|
|
while IFS= read -r line; do
|
|||
|
|
if [ -n "$line" ]; then
|
|||
|
|
pid=$(echo "$line" | awk '{print $2}')
|
|||
|
|
r " KILL: stuck git PID $pid"
|
|||
|
|
kill "$pid" 2>/dev/null
|
|||
|
|
zombies=$((zombies + 1))
|
|||
|
|
fi
|
|||
|
|
done < <(ps aux | grep "git.*push\|git-remote-http" | grep -v grep | while read -r l; do
|
|||
|
|
pid=$(echo "$l" | awk '{print $2}')
|
|||
|
|
et=$(ps -o etime= -p "$pid" 2>/dev/null | tr -d ' ')
|
|||
|
|
[[ "$et" == *:*:* ]] && echo "$l"
|
|||
|
|
done)
|
|||
|
|
|
|||
|
|
# Orphaned vi
|
|||
|
|
vi_count=$(ps aux | grep "vi.*COMMIT_EDITMSG" | grep -v grep | wc -l | tr -d ' ')
|
|||
|
|
if [ "$vi_count" -gt 0 ]; then
|
|||
|
|
ps aux | grep "vi.*COMMIT_EDITMSG" | grep -v grep | awk '{print $2}' | xargs kill 2>/dev/null
|
|||
|
|
r " KILL: $vi_count orphaned vi editors"
|
|||
|
|
zombies=$((zombies + vi_count))
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# Count active hermes sessions
|
|||
|
|
hermes_count=$(ps aux | grep "hermes" | grep python | grep -v grep | wc -l | tr -d ' ')
|
|||
|
|
r " Active hermes sessions: $hermes_count"
|
|||
|
|
r " Zombies killed: $zombies"
|
|||
|
|
[ "$zombies" -gt 0 ] && findings=$((findings + 1))
|
|||
|
|
r ""
|
|||
|
|
|
|||
|
|
# ── 2. Cron Job Status ───────────────────────────────────────────────
|
|||
|
|
r "## 2. Cron Jobs"
|
|||
|
|
python3 -c "
|
|||
|
|
import json
|
|||
|
|
data = json.load(open('$HOME/.hermes/cron/jobs.json'))
|
|||
|
|
jobs = data.get('jobs', data) if isinstance(data, dict) else data
|
|||
|
|
active = 0
|
|||
|
|
for j in jobs:
|
|||
|
|
if not isinstance(j, dict): continue
|
|||
|
|
name = j.get('name', j.get('id','?'))
|
|||
|
|
enabled = j.get('enabled', True)
|
|||
|
|
paused = j.get('paused', False)
|
|||
|
|
state = 'PAUSED' if (paused or not enabled) else 'ACTIVE'
|
|||
|
|
sched = j.get('schedule', {})
|
|||
|
|
mins = sched.get('minutes', '?') if isinstance(sched, dict) else '?'
|
|||
|
|
if state == 'ACTIVE': active += 1
|
|||
|
|
print(f' {state:7s} {name:40s} every {mins}m')
|
|||
|
|
print(f'\n Total active: {active}')
|
|||
|
|
" 2>/dev/null | while IFS= read -r line; do r "$line"; done
|
|||
|
|
r ""
|
|||
|
|
|
|||
|
|
# ── 3. Stale Resources ───────────────────────────────────────────────
|
|||
|
|
r "## 3. Stale Resources"
|
|||
|
|
|
|||
|
|
# Old worktrees
|
|||
|
|
worktree_count=$(find ~/worktrees -maxdepth 1 -type d -mtime +3 2>/dev/null | wc -l | tr -d ' ')
|
|||
|
|
if [ "$worktree_count" -gt 0 ]; then
|
|||
|
|
r " Stale worktrees (>3 days): $worktree_count"
|
|||
|
|
find ~/worktrees -maxdepth 1 -type d -mtime +3 -exec basename {} \; 2>/dev/null | while read -r w; do
|
|||
|
|
r " $w"
|
|||
|
|
done
|
|||
|
|
findings=$((findings + 1))
|
|||
|
|
else
|
|||
|
|
r " Worktrees: clean"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# Log disk usage
|
|||
|
|
log_size=$(du -sh ~/.hermes/logs 2>/dev/null | awk '{print $1}')
|
|||
|
|
r " Hermes logs: $log_size"
|
|||
|
|
|
|||
|
|
# Tmp files
|
|||
|
|
tmp_size=$(du -sh /tmp/timmy-agents 2>/dev/null | awk '{print $1}')
|
|||
|
|
r " /tmp/timmy-agents: ${tmp_size:-0}"
|
|||
|
|
r ""
|
|||
|
|
|
|||
|
|
# ── 4. Plateau Detection ─────────────────────────────────────────────
|
|||
|
|
r "## 4. Loop Plateau Analysis"
|
|||
|
|
METRICS="$HOME/Timmy-Time-dashboard/.loop/cycle-metrics.jsonl"
|
|||
|
|
if [ -f "$METRICS" ]; then
|
|||
|
|
total_cycles=$(wc -l < "$METRICS" | tr -d ' ')
|
|||
|
|
recent_zero=$(tail -5 "$METRICS" | python3 -c "
|
|||
|
|
import sys, json
|
|||
|
|
zero = sum(1 for l in sys.stdin if l.strip()
|
|||
|
|
and json.loads(l).get('prs_merged',0) == 0
|
|||
|
|
and json.loads(l).get('issues_filed',0) == 0)
|
|||
|
|
print(zero)
|
|||
|
|
" 2>/dev/null || echo "?")
|
|||
|
|
r " Total cycles logged: $total_cycles"
|
|||
|
|
r " Last 5 zero-output: $recent_zero/5"
|
|||
|
|
if [ "$recent_zero" = "5" ]; then
|
|||
|
|
r " STATUS: PLATEAUED — loop is spinning without producing value"
|
|||
|
|
r " RECOMMENDATION: Increase interval or pause until new issues arrive"
|
|||
|
|
findings=$((findings + 1))
|
|||
|
|
elif [ "$recent_zero" -ge 3 ] 2>/dev/null; then
|
|||
|
|
r " STATUS: APPROACHING PLATEAU"
|
|||
|
|
else
|
|||
|
|
r " STATUS: PRODUCTIVE"
|
|||
|
|
fi
|
|||
|
|
else
|
|||
|
|
r " No cycle metrics yet."
|
|||
|
|
fi
|
|||
|
|
r ""
|
|||
|
|
|
|||
|
|
# ── 5. Gitea Health ──────────────────────────────────────────────────
|
|||
|
|
r "## 5. Gitea Status"
|
|||
|
|
if curl -s --max-time 5 "$GITEA_URL/api/v1/version" >/dev/null 2>&1; then
|
|||
|
|
open_issues=$(curl -s -H "Authorization: token $GITEA_TOKEN" \
|
|||
|
|
"$GITEA_URL/api/v1/repos/rockachopa/Timmy-time-dashboard/issues?state=open&limit=1&type=issues" 2>/dev/null | \
|
|||
|
|
python3 -c "import sys; print(len(__import__('json').loads(sys.stdin.read())))" 2>/dev/null || echo "?")
|
|||
|
|
open_prs=$(curl -s -H "Authorization: token $GITEA_TOKEN" \
|
|||
|
|
"$GITEA_URL/api/v1/repos/rockachopa/Timmy-time-dashboard/pulls?state=open&limit=50" 2>/dev/null | \
|
|||
|
|
python3 -c "import sys; print(len(__import__('json').loads(sys.stdin.read())))" 2>/dev/null || echo "?")
|
|||
|
|
r " Gitea: ONLINE ($GITEA_URL)"
|
|||
|
|
r " Open issues: $open_issues+"
|
|||
|
|
r " Open PRs: $open_prs"
|
|||
|
|
else
|
|||
|
|
r " Gitea: OFFLINE"
|
|||
|
|
findings=$((findings + 1))
|
|||
|
|
fi
|
|||
|
|
r ""
|
|||
|
|
|
|||
|
|
# ── 6. Recommendations ───────────────────────────────────────────────
|
|||
|
|
r "## 6. Recommendations"
|
|||
|
|
if [ "$findings" -eq 0 ]; then
|
|||
|
|
r " All clear. No action needed."
|
|||
|
|
else
|
|||
|
|
r " $findings issue(s) found — see above."
|
|||
|
|
fi
|
|||
|
|
r ""
|
|||
|
|
|
|||
|
|
# ── Self-scheduling: plateau backoff ──────────────────────────────────
|
|||
|
|
current_backoff=$(cat "$BACKOFF_MARKER" 2>/dev/null || echo 0)
|
|||
|
|
if [ "$findings" -eq 0 ]; then
|
|||
|
|
new_backoff=$((current_backoff + 1))
|
|||
|
|
echo "$new_backoff" > "$BACKOFF_MARKER"
|
|||
|
|
if [ "$new_backoff" -ge 3 ]; then
|
|||
|
|
r "## Self-Schedule"
|
|||
|
|
r " Audit backoff level: $new_backoff (no findings in $new_backoff consecutive runs)"
|
|||
|
|
r " RECOMMENDATION: Extend audit interval to 24h or 48h"
|
|||
|
|
fi
|
|||
|
|
else
|
|||
|
|
echo 0 > "$BACKOFF_MARKER"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# ── Output ────────────────────────────────────────────────────────────
|
|||
|
|
echo "$report" | tee "$AUDIT_LOG"
|
|||
|
|
echo ""
|
|||
|
|
echo "Audit saved to: $AUDIT_LOG"
|