feat: [MEMPALACE][MP-4] Memory promotion — scratchpad to palace with intent (#371 )

Refs #371 Agent: groq
Merge pull request 'feat: MemPalace integration — skill port, retrieval enforcer, wake-up protocol (#367 )' (#374 ) from timmy/mempalace-integration into main
2026-04-08 03:22:06 -04:00 · 2026-04-07 21:45:34 +00:00 · 2026-04-07 13:15:07 -04:00 · 2026-04-07 16:35:40 +00:00 · 2026-04-07 16:23:35 +00:00 · 2026-04-07 16:23:22 +00:00
25 changed files with 3756 additions and 84 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,8 @@
-# Secrets
+*.pyc
-*.token
+*.pyo
-*.key
+*.egg-info/
-*.secret
+dist/
-
+build/
 # Local state
 *.db
 *.db-wal
 *.db-shm
@@ -11,3 +10,4 @@ __pycache__/
 # Generated audit reports
 reports/
 .aider*
--- a/bin/agent-loop.sh
+++ b/bin/agent-loop.sh
@@ -0,0 +1,273 @@
 #!/usr/bin/env bash
 # agent-loop.sh — Universal agent dev loop with Genchi Genbutsu verification
 #
 # Usage: agent-loop.sh <agent-name> [num-workers]
 #   agent-loop.sh claude 2
 #   agent-loop.sh gemini 1
 #
 # Dispatches via agent-dispatch.sh, then verifies with genchi-genbutsu.sh.
 set -uo pipefail
 AGENT="${1:?Usage: agent-loop.sh <agent-name> [num-workers]}"
 NUM_WORKERS="${2:-1}"
 # Resolve agent tool and model from config or fallback
 case "$AGENT" in
  claude) TOOL="claude"; MODEL="sonnet" ;;
  gemini) TOOL="gemini"; MODEL="gemini-2.5-pro-preview-05-06" ;;
  grok)   TOOL="opencode"; MODEL="grok-3-fast" ;;
  *)      TOOL="$AGENT"; MODEL="" ;;
 esac
 # === CONFIG ===
 GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
 GITEA_TOKEN="${GITEA_TOKEN:-}"
 WORKTREE_BASE="$HOME/worktrees"
 LOG_DIR="$HOME/.hermes/logs"
 LOCK_DIR="$LOG_DIR/${AGENT}-locks"
 SKIP_FILE="$LOG_DIR/${AGENT}-skip-list.json"
 ACTIVE_FILE="$LOG_DIR/${AGENT}-active.json"
 TIMEOUT=600
 COOLDOWN=30
 mkdir -p "$LOG_DIR" "$WORKTREE_BASE" "$LOCK_DIR"
 [ -f "$SKIP_FILE" ] || echo '{}' > "$SKIP_FILE"
 echo '{}' > "$ACTIVE_FILE"
 # === SHARED FUNCTIONS ===
 log() {
  echo "[$(date '+%Y-%m-%d %H:%M:%S')] ${AGENT}: $*" >> "$LOG_DIR/${AGENT}-loop.log"
 }
 lock_issue() {
  local key="$1"
  mkdir "$LOCK_DIR/$key.lock" 2>/dev/null && echo $$ > "$LOCK_DIR/$key.lock/pid"
 }
 unlock_issue() {
  rm -rf "$LOCK_DIR/$1.lock" 2>/dev/null
 }
 mark_skip() {
  local issue_num="$1" reason="$2"
  python3 -c "
 import json, time, fcntl
 with open('${SKIP_FILE}', 'r+') as f:
    fcntl.flock(f, fcntl.LOCK_EX)
    try: skips = json.load(f)
    except: skips = {}
    failures = skips.get(str($issue_num), {}).get('failures', 0) + 1
    skip_hours = 6 if failures >= 3 else 1
    skips[str($issue_num)] = {'until': time.time() + (skip_hours * 3600), 'reason': '$reason', 'failures': failures}
    f.seek(0); f.truncate()
    json.dump(skips, f, indent=2)
 " 2>/dev/null
 }
 get_next_issue() {
  python3 -c "
 import json, sys, time, urllib.request, os
 token = '${GITEA_TOKEN}'
 base = '${GITEA_URL}'
 repos = ['Timmy_Foundation/the-nexus', 'Timmy_Foundation/timmy-config', 'Timmy_Foundation/hermes-agent']
 try:
    with open('${SKIP_FILE}') as f: skips = json.load(f)
 except: skips = {}
 try:
    with open('${ACTIVE_FILE}') as f: active = json.load(f); active_issues = {v['issue'] for v in active.values()}
 except: active_issues = set()
 all_issues = []
 for repo in repos:
    url = f'{base}/api/v1/repos/{repo}/issues?state=open&type=issues&limit=50&sort=created'
    req = urllib.request.Request(url, headers={'Authorization': f'token {token}'})
    try:
        resp = urllib.request.urlopen(req, timeout=10)
        issues = json.loads(resp.read())
        for i in issues: i['_repo'] = repo
        all_issues.extend(issues)
    except: continue
 for i in sorted(all_issues, key=lambda x: x['title'].lower()):
    assignees = [a['login'] for a in (i.get('assignees') or [])]
    if assignees and '${AGENT}' not in assignees: continue
    num_str = str(i['number'])
    if num_str in active_issues: continue
    if skips.get(num_str, {}).get('until', 0) > time.time(): continue
    lock = '${LOCK_DIR}/' + i['_repo'].replace('/', '-') + '-' + num_str + '.lock'
    if os.path.isdir(lock): continue
    owner, name = i['_repo'].split('/')
    print(json.dumps({'number': i['number'], 'title': i['title'], 'repo_owner': owner, 'repo_name': name, 'repo': i['_repo']}))
    sys.exit(0)
 print('null')
 " 2>/dev/null
 }
 # === WORKER FUNCTION ===
 run_worker() {
  local worker_id="$1"
  log "WORKER-${worker_id}: Started"
  while true; do
    issue_json=$(get_next_issue)
    if [ "$issue_json" = "null" ] || [ -z "$issue_json" ]; then
      sleep 30
      continue
    fi
    issue_num=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['number'])")
    issue_title=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['title'])")
    repo_owner=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['repo_owner'])")
    repo_name=$(echo "$issue_json" | python3 -c "import sys,json; print(json.load(sys.stdin)['repo_name'])")
    issue_key="${repo_owner}-${repo_name}-${issue_num}"
    branch="${AGENT}/issue-${issue_num}"
    worktree="${WORKTREE_BASE}/${AGENT}-w${worker_id}-${issue_num}"
    if ! lock_issue "$issue_key"; then
      sleep 5
      continue
    fi
    log "WORKER-${worker_id}: === ISSUE #${issue_num}: ${issue_title} (${repo_owner}/${repo_name}) ==="
    # Clone / checkout
    rm -rf "$worktree" 2>/dev/null
    CLONE_URL="http://${AGENT}:${GITEA_TOKEN}@143.198.27.163:3000/${repo_owner}/${repo_name}.git"
    if git ls-remote --heads "$CLONE_URL" "$branch" 2>/dev/null | grep -q "$branch"; then
      git clone --depth=50 -b "$branch" "$CLONE_URL" "$worktree" >/dev/null 2>&1
    else
      git clone --depth=1 -b main "$CLONE_URL" "$worktree" >/dev/null 2>&1
      cd "$worktree" && git checkout -b "$branch" >/dev/null 2>&1
    fi
    cd "$worktree"
    # Generate prompt
    prompt=$(bash "$(dirname "$0")/agent-dispatch.sh" "$AGENT" "$issue_num" "${repo_owner}/${repo_name}")
    CYCLE_START=$(date +%s)
    set +e
    if [ "$TOOL" = "claude" ]; then
      env -u CLAUDECODE gtimeout "$TIMEOUT" claude \
        --print --model "$MODEL" --dangerously-skip-permissions \
        -p "$prompt" </dev/null >> "$LOG_DIR/${AGENT}-${issue_num}.log" 2>&1
    elif [ "$TOOL" = "gemini" ]; then
      gtimeout "$TIMEOUT" gemini -p "$prompt" --yolo \
        </dev/null >> "$LOG_DIR/${AGENT}-${issue_num}.log" 2>&1
    else
      gtimeout "$TIMEOUT" "$TOOL" "$prompt" \
        </dev/null >> "$LOG_DIR/${AGENT}-${issue_num}.log" 2>&1
    fi
    exit_code=$?
    set -e
    CYCLE_END=$(date +%s)
    CYCLE_DURATION=$((CYCLE_END - CYCLE_START))
    # Salvage
    cd "$worktree" 2>/dev/null || true
    DIRTY=$(git status --porcelain 2>/dev/null | wc -l | tr -d ' ')
    if [ "${DIRTY:-0}" -gt 0 ]; then
      git add -A 2>/dev/null
      git commit -m "WIP: ${AGENT} progress on #${issue_num}
 Automated salvage commit — agent session ended (exit $exit_code)." 2>/dev/null || true
    fi
    UNPUSHED=$(git log --oneline "origin/main..HEAD" 2>/dev/null | wc -l | tr -d ' ')
    if [ "${UNPUSHED:-0}" -gt 0 ]; then
      git push -u origin "$branch" 2>/dev/null && \
        log "WORKER-${worker_id}: Pushed $UNPUSHED commit(s) on $branch" || \
        log "WORKER-${worker_id}: Push failed for $branch"
    fi
    # Create PR if needed
    pr_num=$(curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls?state=open&head=${repo_owner}:${branch}&limit=1" \
      -H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
 import sys,json
 prs = json.load(sys.stdin)
 print(prs[0]['number'] if prs else '')
 " 2>/dev/null)
    if [ -z "$pr_num" ] && [ "${UNPUSHED:-0}" -gt 0 ]; then
      pr_num=$(curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls" \
        -H "Authorization: token ${GITEA_TOKEN}" \
        -H "Content-Type: application/json" \
        -d "$(python3 -c "
 import json
 print(json.dumps({
    'title': '${AGENT}: Issue #${issue_num}',
    'head': '${branch}',
    'base': 'main',
    'body': 'Automated PR for issue #${issue_num}.\nExit code: ${exit_code}'
 }))
 ")" | python3 -c "import sys,json; print(json.load(sys.stdin).get('number',''))" 2>/dev/null)
      [ -n "$pr_num" ] && log "WORKER-${worker_id}: Created PR #${pr_num} for issue #${issue_num}"
    fi
    # ── Genchi Genbutsu: verify world state before declaring success ──
    VERIFIED="false"
    if [ "$exit_code" -eq 0 ]; then
      log "WORKER-${worker_id}: SUCCESS #${issue_num} — running genchi-genbutsu"
      SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
      if verify_result=$("$SCRIPT_DIR/genchi-genbutsu.sh" "$repo_owner" "$repo_name" "$issue_num" "$branch" "$AGENT" 2>/dev/null); then
        VERIFIED="true"
        log "WORKER-${worker_id}: VERIFIED #${issue_num}"
        if [ -n "$pr_num" ]; then
          curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/merge" \
            -H "Authorization: token ${GITEA_TOKEN}" \
            -H "Content-Type: application/json" \
            -d '{"Do": "squash"}' >/dev/null 2>&1 || true
          curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}" \
            -H "Authorization: token ${GITEA_TOKEN}" \
            -H "Content-Type: application/json" \
            -d '{"state": "closed"}' >/dev/null 2>&1 || true
          log "WORKER-${worker_id}: PR #${pr_num} merged, issue #${issue_num} closed"
        fi
        consecutive_failures=0
      else
        verify_details=$(echo "$verify_result" | python3 -c "import sys,json; print(json.load(sys.stdin).get('details','unknown'))" 2>/dev/null || echo "unverified")
        log "WORKER-${worker_id}: UNVERIFIED #${issue_num} — $verify_details"
        mark_skip "$issue_num" "unverified" 1
        consecutive_failures=$((consecutive_failures + 1))
      fi
    elif [ "$exit_code" -eq 124 ]; then
      log "WORKER-${worker_id}: TIMEOUT #${issue_num} (work saved in PR)"
      consecutive_failures=$((consecutive_failures + 1))
    else
      log "WORKER-${worker_id}: FAILED #${issue_num} exit ${exit_code} (work saved in PR)"
      consecutive_failures=$((consecutive_failures + 1))
    fi
    # ── METRICS ──
    python3 -c "
 import json, datetime
 print(json.dumps({
    'ts': datetime.datetime.utcnow().isoformat() + 'Z',
    'agent': '${AGENT}',
    'worker': $worker_id,
    'issue': $issue_num,
    'repo': '${repo_owner}/${repo_name}',
    'outcome': 'success' if $exit_code == 0 else 'timeout' if $exit_code == 124 else 'failed',
    'exit_code': $exit_code,
    'duration_s': $CYCLE_DURATION,
    'pr': '${pr_num:-}',
    'verified': ${VERIFIED:-false}
 }))
 " >> "$LOG_DIR/${AGENT}-metrics.jsonl" 2>/dev/null
    rm -rf "$worktree" 2>/dev/null
    unlock_issue "$issue_key"
    sleep "$COOLDOWN"
  done
 }
 # === MAIN ===
 log "=== Agent Loop Started — ${AGENT} with ${NUM_WORKERS} worker(s) ==="
 rm -rf "$LOCK_DIR"/*.lock 2>/dev/null
 for i in $(seq 1 "$NUM_WORKERS"); do
  run_worker "$i" &
  log "Launched worker $i (PID $!)"
  sleep 3
 done
 wait
--- a/bin/claude-loop.sh
+++ b/bin/claude-loop.sh
@@ -468,24 +468,32 @@ print(json.dumps({
      [ -n "$pr_num" ] && log "WORKER-${worker_id}: Created PR #${pr_num} for issue #${issue_num}"
    fi
-    # ── Merge + close on success ──
+    # ── Genchi Genbutsu: verify world state before declaring success ──
    VERIFIED="false"
    if [ "$exit_code" -eq 0 ]; then
-      log "WORKER-${worker_id}: SUCCESS #${issue_num}"
+      log "WORKER-${worker_id}: SUCCESS #${issue_num} — running genchi-genbutsu"
-
+      SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-      if [ -n "$pr_num" ]; then
+      if verify_result=$("$SCRIPT_DIR/genchi-genbutsu.sh" "$repo_owner" "$repo_name" "$issue_num" "$branch" "claude" 2>/dev/null); then
-        curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/merge" \
+        VERIFIED="true"
-          -H "Authorization: token ${GITEA_TOKEN}" \
+        log "WORKER-${worker_id}: VERIFIED #${issue_num}"
-          -H "Content-Type: application/json" \
+        if [ -n "$pr_num" ]; then
-          -d '{"Do": "squash"}' >/dev/null 2>&1 || true
+          curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/merge" \
-        curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}" \
+            -H "Authorization: token ${GITEA_TOKEN}" \
-          -H "Authorization: token ${GITEA_TOKEN}" \
+            -H "Content-Type: application/json" \
-          -H "Content-Type: application/json" \
+            -d '{"Do": "squash"}' >/dev/null 2>&1 || true
-          -d '{"state": "closed"}' >/dev/null 2>&1 || true
+          curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}" \
-        log "WORKER-${worker_id}: PR #${pr_num} merged, issue #${issue_num} closed"
+            -H "Authorization: token ${GITEA_TOKEN}" \
            -H "Content-Type: application/json" \
            -d '{"state": "closed"}' >/dev/null 2>&1 || true
          log "WORKER-${worker_id}: PR #${pr_num} merged, issue #${issue_num} closed"
        fi
        consecutive_failures=0
      else
        verify_details=$(echo "$verify_result" | python3 -c "import sys,json; print(json.load(sys.stdin).get('details','unknown'))" 2>/dev/null || echo "unverified")
        log "WORKER-${worker_id}: UNVERIFIED #${issue_num} — $verify_details"
        consecutive_failures=$((consecutive_failures + 1))
      fi
      consecutive_failures=0
    elif [ "$exit_code" -eq 124 ]; then
      log "WORKER-${worker_id}: TIMEOUT #${issue_num} (work saved in PR)"
      consecutive_failures=$((consecutive_failures + 1))
@@ -522,6 +530,7 @@ print(json.dumps({
 import json, datetime
 print(json.dumps({
    'ts': datetime.datetime.utcnow().isoformat() + 'Z',
    'agent': 'claude',
    'worker': $worker_id,
    'issue': $issue_num,
    'repo': '${repo_owner}/${repo_name}',
@@ -534,7 +543,8 @@ print(json.dumps({
    'lines_removed': ${LINES_REMOVED:-0},
    'salvaged': ${DIRTY:-0},
    'pr': '${pr_num:-}',
-    'merged': $( [ '$OUTCOME' = 'success' ] && [ -n '${pr_num:-}' ] && echo 'true' || echo 'false' )
+    'merged': $( [ '$OUTCOME' = 'success' ] && [ -n '${pr_num:-}' ] && echo 'true' || echo 'false' ),
    'verified': ${VERIFIED:-false}
 }))
 " >> "$METRICS_FILE" 2>/dev/null
--- a/bin/gemini-loop.sh
+++ b/bin/gemini-loop.sh
@@ -521,61 +521,63 @@ print(json.dumps({
      [ -n "$pr_num" ] && log "WORKER-${worker_id}: Created PR #${pr_num} for issue #${issue_num}"
    fi
-    # ── Verify finish semantics / classify failures ──
+    # ── Genchi Genbutsu: verify world state before declaring success ──
    VERIFIED="false"
    if [ "$exit_code" -eq 0 ]; then
-      log "WORKER-${worker_id}: SUCCESS #${issue_num} exited 0 — verifying push + PR + proof"
+      log "WORKER-${worker_id}: SUCCESS #${issue_num} exited 0 — running genchi-genbutsu"
-      if ! remote_branch_exists "$branch"; then
+      SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-        log "WORKER-${worker_id}: BLOCKED #${issue_num} remote branch missing"
+      if verify_result=$("$SCRIPT_DIR/genchi-genbutsu.sh" "$repo_owner" "$repo_name" "$issue_num" "$branch" "gemini" 2>/dev/null); then
-        post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "Loop gate blocked completion: remote branch ${branch} was not found on origin after Gemini exited. Issue remains open for retry."
+        VERIFIED="true"
-        mark_skip "$issue_num" "missing_remote_branch" 1
+        log "WORKER-${worker_id}: VERIFIED #${issue_num}"
-        consecutive_failures=$((consecutive_failures + 1))
+        pr_state=$(get_pr_state "$repo_owner" "$repo_name" "$pr_num")
-      elif [ -z "$pr_num" ]; then
+        if [ "$pr_state" = "open" ]; then
-        log "WORKER-${worker_id}: BLOCKED #${issue_num} no PR found"
+          curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/merge" \
-        post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "Loop gate blocked completion: branch ${branch} exists remotely, but no PR was found. Issue remains open for retry."
+            -H "Authorization: token ${GITEA_TOKEN}" \
-        mark_skip "$issue_num" "missing_pr" 1
+            -H "Content-Type: application/json" \
-        consecutive_failures=$((consecutive_failures + 1))
+            -d '{"Do": "squash"}' >/dev/null 2>&1 || true
          pr_state=$(get_pr_state "$repo_owner" "$repo_name" "$pr_num")
        fi
        if [ "$pr_state" = "merged" ]; then
          curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}" \
            -H "Authorization: token ${GITEA_TOKEN}" \
            -H "Content-Type: application/json" \
            -d '{"state": "closed"}' >/dev/null 2>&1 || true
          issue_state=$(get_issue_state "$repo_owner" "$repo_name" "$issue_num")
          if [ "$issue_state" = "closed" ]; then
            log "WORKER-${worker_id}: VERIFIED #${issue_num} branch pushed, PR merged, comment present, issue closed"
            consecutive_failures=0
          else
            log "WORKER-${worker_id}: BLOCKED #${issue_num} issue did not close after merge"
            mark_skip "$issue_num" "issue_close_unverified" 1
            consecutive_failures=$((consecutive_failures + 1))
          fi
        else
          log "WORKER-${worker_id}: BLOCKED #${issue_num} merge not verified (state=${pr_state})"
          mark_skip "$issue_num" "merge_unverified" 1
          consecutive_failures=$((consecutive_failures + 1))
        fi
      else
-        pr_files=$(get_pr_file_count "$repo_owner" "$repo_name" "$pr_num")
+        verify_details=$(echo "$verify_result" | python3 -c "import sys,json; print(json.load(sys.stdin).get('details','unknown'))" 2>/dev/null || echo "unverified")
-        if [ "${pr_files:-0}" -eq 0 ]; then
+        verify_checks=$(echo "$verify_result" | python3 -c "import sys,json; print(json.load(sys.stdin).get('checks',''))" 2>/dev/null || echo "")
-          log "WORKER-${worker_id}: BLOCKED #${issue_num} PR #${pr_num} has 0 changed files"
+        log "WORKER-${worker_id}: UNVERIFIED #${issue_num} — $verify_details"
-          curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}"             -H "Authorization: token ${GITEA_TOKEN}"             -H "Content-Type: application/json"             -d '{"state": "closed"}' >/dev/null 2>&1 || true
+        if echo "$verify_checks" | grep -q '"branch": false'; then
          post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "Loop gate blocked completion: remote branch ${branch} was not found on origin after Gemini exited. Issue remains open for retry."
          mark_skip "$issue_num" "missing_remote_branch" 1
        elif echo "$verify_checks" | grep -q '"pr": false'; then
          post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "Loop gate blocked completion: branch ${branch} exists remotely, but no PR was found. Issue remains open for retry."
          mark_skip "$issue_num" "missing_pr" 1
        elif echo "$verify_checks" | grep -q '"files": false'; then
          curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}" \
            -H "Authorization: token ${GITEA_TOKEN}" \
            -H "Content-Type: application/json" \
            -d '{"state": "closed"}' >/dev/null 2>&1 || true
          post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "PR #${pr_num} was closed automatically: it had 0 changed files (empty commit). Issue remains open for retry."
          mark_skip "$issue_num" "empty_commit" 2
          consecutive_failures=$((consecutive_failures + 1))
        else
-          proof_status=$(proof_comment_status "$repo_owner" "$repo_name" "$issue_num" "$branch")
+          post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "Loop gate blocked completion: PR #${pr_num} exists, but required verification failed ($verify_details). Issue remains open for retry."
-          proof_state="${proof_status%%|*}"
+          mark_skip "$issue_num" "unverified" 1
          proof_url="${proof_status#*|}"
          if [ "$proof_state" != "ok" ]; then
            log "WORKER-${worker_id}: BLOCKED #${issue_num} proof missing or incomplete (${proof_state})"
            post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "Loop gate blocked completion: PR #${pr_num} exists and has ${pr_files} changed file(s), but the required Proof block from Gemini is missing or incomplete. Issue remains open for retry."
            mark_skip "$issue_num" "missing_proof" 1
            consecutive_failures=$((consecutive_failures + 1))
          else
            log "WORKER-${worker_id}: PROOF verified ${proof_url}"
            pr_state=$(get_pr_state "$repo_owner" "$repo_name" "$pr_num")
            if [ "$pr_state" = "open" ]; then
              curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/merge"                 -H "Authorization: token ${GITEA_TOKEN}"                 -H "Content-Type: application/json"                 -d '{"Do": "squash"}' >/dev/null 2>&1 || true
              pr_state=$(get_pr_state "$repo_owner" "$repo_name" "$pr_num")
            fi
            if [ "$pr_state" = "merged" ]; then
              curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}"                 -H "Authorization: token ${GITEA_TOKEN}"                 -H "Content-Type: application/json"                 -d '{"state": "closed"}' >/dev/null 2>&1 || true
              issue_state=$(get_issue_state "$repo_owner" "$repo_name" "$issue_num")
              if [ "$issue_state" = "closed" ]; then
                log "WORKER-${worker_id}: VERIFIED #${issue_num} branch pushed, PR merged, proof present, issue closed"
                consecutive_failures=0
              else
                log "WORKER-${worker_id}: BLOCKED #${issue_num} issue did not close after merge"
                mark_skip "$issue_num" "issue_close_unverified" 1
                consecutive_failures=$((consecutive_failures + 1))
              fi
            else
              log "WORKER-${worker_id}: BLOCKED #${issue_num} merge not verified (state=${pr_state})"
              mark_skip "$issue_num" "merge_unverified" 1
              consecutive_failures=$((consecutive_failures + 1))
            fi
          fi
        fi
        consecutive_failures=$((consecutive_failures + 1))
      fi
    elif [ "$exit_code" -eq 124 ]; then
      log "WORKER-${worker_id}: TIMEOUT #${issue_num} (work saved in PR)"
@@ -621,7 +623,8 @@ print(json.dumps({
    'lines_removed': ${LINES_REMOVED:-0},
    'salvaged': ${DIRTY:-0},
    'pr': '${pr_num:-}',
-    'merged': $( [ '$OUTCOME' = 'success' ] && [ -n '${pr_num:-}' ] && echo 'true' || echo 'false' )
+    'merged': $( [ '$OUTCOME' = 'success' ] && [ -n '${pr_num:-}' ] && echo 'true' || echo 'false' ),
    'verified': ${VERIFIED:-false}
 }))
 " >> "$LOG_DIR/gemini-metrics.jsonl" 2>/dev/null
--- a/bin/genchi-genbutsu.sh
+++ b/bin/genchi-genbutsu.sh
@@ -0,0 +1,179 @@
 #!/usr/bin/env bash
 # genchi-genbutsu.sh — 現地現物 — Go and see. Verify world state, not log vibes.
 #
 # Post-completion verification that goes and LOOKS at the actual artifacts.
 # Performs 5 world-state checks:
 #   1. Branch exists on remote
 #   2. PR exists
 #   3. PR has real file changes (> 0)
 #   4. PR is mergeable
 #   5. Issue has a completion comment from the agent
 #
 # Usage: genchi-genbutsu.sh <repo_owner> <repo_name> <issue_num> <branch> <agent_name>
 # Returns: JSON to stdout, logs JSONL, exit 0 = VERIFIED, exit 1 = UNVERIFIED
 set -euo pipefail
 GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
 GITEA_TOKEN="${GITEA_TOKEN:-}"
 LOG_DIR="${LOG_DIR:-$HOME/.hermes/logs}"
 VERIFY_LOG="$LOG_DIR/genchi-genbutsu.jsonl"
 if [ $# -lt 5 ]; then
  echo "Usage: $0 <repo_owner> <repo_name> <issue_num> <branch> <agent_name>" >&2
  exit 2
 fi
 repo_owner="$1"
 repo_name="$2"
 issue_num="$3"
 branch="$4"
 agent_name="$5"
 mkdir -p "$LOG_DIR"
 # ── Helpers ──────────────────────────────────────────────────────────
 check_branch_exists() {
  # Use Gitea API instead of git ls-remote so we don't need clone credentials
  curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/branches/${branch}" \
    -H "Authorization: token ${GITEA_TOKEN}" >/dev/null 2>&1
 }
 get_pr_num() {
  curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls?state=all&head=${repo_owner}:${branch}&limit=1" \
    -H "Authorization: token ${GITEA_TOKEN}" 2>/dev/null | python3 -c "
 import sys, json
 prs = json.load(sys.stdin)
 print(prs[0]['number'] if prs else '')
 "
 }
 check_pr_files() {
  local pr_num="$1"
  curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/files" \
    -H "Authorization: token ${GITEA_TOKEN}" 2>/dev/null | python3 -c "
 import sys, json
 try:
    files = json.load(sys.stdin)
    print(len(files) if isinstance(files, list) else 0)
 except:
    print(0)
 "
 }
 check_pr_mergeable() {
  local pr_num="$1"
  curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}" \
    -H "Authorization: token ${GITEA_TOKEN}" 2>/dev/null | python3 -c "
 import sys, json
 pr = json.load(sys.stdin)
 print('true' if pr.get('mergeable') else 'false')
 "
 }
 check_completion_comment() {
  curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}/comments" \
    -H "Authorization: token ${GITEA_TOKEN}" 2>/dev/null | AGENT="$agent_name" python3 -c "
 import os, sys, json
 agent = os.environ.get('AGENT', '').lower()
 try:
    comments = json.load(sys.stdin)
 except:
    sys.exit(1)
 for c in reversed(comments):
    user = ((c.get('user') or {}).get('login') or '').lower()
    if user == agent:
        sys.exit(0)
 sys.exit(1)
 "
 }
 # ── Run checks ───────────────────────────────────────────────────────
 ts=$(date -u '+%Y-%m-%dT%H:%M:%SZ')
 status="VERIFIED"
 details=()
 checks_json='{}'
 # Check 1: branch
 if check_branch_exists; then
  checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['branch']=True;print(json.dumps(d))")
 else
  checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['branch']=False;print(json.dumps(d))")
  status="UNVERIFIED"
  details+=("remote branch ${branch} not found")
 fi
 # Check 2: PR exists
 pr_num=$(get_pr_num)
 if [ -n "$pr_num" ]; then
  checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['pr']=True;print(json.dumps(d))")
 else
  checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['pr']=False;print(json.dumps(d))")
  status="UNVERIFIED"
  details+=("no PR found for branch ${branch}")
 fi
 # Check 3: PR has real file changes
 if [ -n "$pr_num" ]; then
  file_count=$(check_pr_files "$pr_num")
  if [ "${file_count:-0}" -gt 0 ]; then
    checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['files']=True;print(json.dumps(d))")
  else
    checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['files']=False;print(json.dumps(d))")
    status="UNVERIFIED"
    details+=("PR #${pr_num} has 0 changed files")
  fi
  # Check 4: PR is mergeable
  if [ "$(check_pr_mergeable "$pr_num")" = "true" ]; then
    checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['mergeable']=True;print(json.dumps(d))")
  else
    checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['mergeable']=False;print(json.dumps(d))")
    status="UNVERIFIED"
    details+=("PR #${pr_num} is not mergeable")
  fi
 else
  checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['files']=None;d['mergeable']=None;print(json.dumps(d))")
 fi
 # Check 5: completion comment from agent
 if check_completion_comment; then
  checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['comment']=True;print(json.dumps(d))")
 else
  checks_json=$(echo "$checks_json" | python3 -c "import sys,json;d=json.load(sys.stdin);d['comment']=False;print(json.dumps(d))")
  status="UNVERIFIED"
  details+=("no completion comment from ${agent_name} on issue #${issue_num}")
 fi
 # Build detail string
 detail_str=$(IFS="; "; echo "${details[*]:-all checks passed}")
 # ── Output ───────────────────────────────────────────────────────────
 result=$(python3 -c "
 import json
 print(json.dumps({
    'status': '$status',
    'repo': '${repo_owner}/${repo_name}',
    'issue': $issue_num,
    'branch': '$branch',
    'agent': '$agent_name',
    'pr': '$pr_num',
    'checks': $checks_json,
    'details': '$detail_str',
    'ts': '$ts'
 }, indent=2))
 ")
 printf '%s\n' "$result"
 # Append to JSONL log
 printf '%s\n' "$result" >> "$VERIFY_LOG"
 if [ "$status" = "VERIFIED" ]; then
  exit 0
 else
  exit 1
 fi
--- a/bin/kaizen-retro.sh
+++ b/bin/kaizen-retro.sh
@@ -0,0 +1,45 @@
 #!/usr/bin/env bash
 # kaizen-retro.sh — Automated retrospective after every burn cycle.
 #
 # Runs daily after the morning report.
 # Analyzes success rates by agent, repo, and issue type.
 # Identifies max-attempts issues, generates ONE concrete improvement,
 # and posts the retro to Telegram + the master morning-report issue.
 #
 # Usage:
 #   ./bin/kaizen-retro.sh [--dry-run]
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="${SCRIPT_DIR%/bin}"
 PYTHON="${PYTHON3:-python3}"
 # Source local env if available so TELEGRAM_BOT_TOKEN is picked up
 HOME_DIR="${HOME:-$(eval echo ~$(whoami))}"
 for env_file in "$HOME_DIR/.hermes/.env" "$HOME_DIR/.timmy/.env" "$REPO_ROOT/.env"; do
  if [ -f "$env_file" ]; then
    # shellcheck source=/dev/null
    set -a
    # shellcheck source=/dev/null
    source "$env_file"
    set +a
  fi
 done
 # If the configured Gitea URL is unreachable but localhost works, prefer localhost
 if ! curl -sf "${GITEA_URL:-http://localhost:3000}/api/v1/version" >/dev/null 2>&1; then
  if curl -sf http://localhost:3000/api/v1/version >/dev/null 2>&1; then
    export GITEA_URL="http://localhost:3000"
  fi
 fi
 # Ensure the Python script exists
 RETRO_PY="$REPO_ROOT/scripts/kaizen_retro.py"
 if [ ! -f "$RETRO_PY" ]; then
  echo "ERROR: kaizen_retro.py not found at $RETRO_PY" >&2
  exit 1
 fi
 # Run
 exec "$PYTHON" "$RETRO_PY" "$@"
--- a/cron/jobs.json
+++ b/cron/jobs.json
@@ -137,7 +137,38 @@
      "paused_reason": null,
      "skills": [],
      "skill": null
    },
    {
      "id": "kaizen-retro-349",
      "name": "Kaizen Retro",
      "prompt": "Run the automated burn-cycle retrospective. Execute: cd /root/wizards/ezra/workspace/timmy-config && ./bin/kaizen-retro.sh",
      "model": "hermes3:latest",
      "provider": "ollama",
      "base_url": "http://localhost:11434/v1",
      "schedule": {
        "kind": "interval",
        "minutes": 1440,
        "display": "every 1440m"
      },
      "schedule_display": "daily at 07:30",
      "repeat": {
        "times": null,
        "completed": 0
      },
      "enabled": true,
      "created_at": "2026-04-07T15:30:00.000000Z",
      "next_run_at": "2026-04-08T07:30:00.000000Z",
      "last_run_at": null,
      "last_status": null,
      "last_error": null,
      "deliver": "local",
      "origin": null,
      "state": "scheduled",
      "paused_at": null,
      "paused_reason": null,
      "skills": [],
      "skill": null
    }
  ],
  "updated_at": "2026-04-07T15:00:00+00:00"
-}
+}
--- a/fleet/auto_restart.py
+++ b/fleet/auto_restart.py
@@ -0,0 +1,272 @@
 #!/usr/bin/env python3
 """
 Auto-Restart Agent — Self-healing process monitor for fleet machines.
 Detects dead services and restarts them automatically.
 Escalates after 3 attempts (prevents restart loops).
 Logs all actions to ~/.local/timmy/fleet-health/restarts.log
 Alerts via Telegram if service cannot be recovered.
 Prerequisite: FLEET-006 (health check) must be running to detect failures.
 Usage:
  python3 auto_restart.py          # Run checks now
  python3 auto_restart.py --daemon # Run continuously (every 60s)
  python3 auto_restart.py --status  # Show restart history
 """
 import os
 import sys
 import json
 import time
 import subprocess
 from datetime import datetime, timezone
 from pathlib import Path
 # === CONFIG ===
 LOG_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-health"))
 RESTART_LOG = LOG_DIR / "restarts.log"
 COOLDOWN_FILE = LOG_DIR / "restart_cooldowns.json"
 MAX_RETRIES = 3
 COOLDOWN_PERIOD = 3600  # 1 hour between escalation alerts
 # Services definition: name, check command, restart command
 # Local services:
 LOCAL_SERVICES = {
    "hermes-gateway": {
        "check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
        "restart": "cd ~/code-claw && ./restart-gateway.sh 2>/dev/null || launchctl kickstart -k ai.hermes.gateway 2>/dev/null",
        "critical": True,
    },
    "ollama": {
        "check": "pgrep -f 'ollama serve' > /dev/null 2>/dev/null",
        "restart": "launchctl kickstart -k com.ollama.ollama 2>/dev/null || /opt/homebrew/bin/brew services restart ollama 2>/dev/null",
        "critical": False,
    },
    "codeclaw-heartbeat": {
        "check": "launchctl list | grep 'ai.timmy.codeclaw-qwen-heartbeat' > /dev/null 2>/dev/null",
        "restart": "launchctl kickstart -k ai.timmy.codeclaw-qwen-heartbeat 2>/dev/null",
        "critical": False,
    },
 }
 # VPS services to restart via SSH
 VPS_SERVICES = {
    "ezra": {
        "ip": "143.198.27.163",
        "user": "root",
        "services": {
            "gitea": {
                "check": "systemctl is-active gitea 2>/dev/null | grep -q active",
                "restart": "systemctl restart gitea 2>/dev/null",
                "critical": True,
            },
            "nginx": {
                "check": "systemctl is-active nginx 2>/dev/null | grep -q active",
                "restart": "systemctl restart nginx 2>/dev/null",
                "critical": False,
            },
            "hermes-agent": {
                "check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
                "restart": "cd /root/wizards/ezra/hermes-agent && source .venv/bin/activate && nohup hermes gateway run --replace > /dev/null 2>&1 &",
                "critical": True,
            },
        },
    },
    "allegro": {
        "ip": "167.99.126.228",
        "user": "root",
        "services": {
            "hermes-agent": {
                "check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
                "restart": "cd /root/wizards/allegro/hermes-agent && source .venv/bin/activate && nohup hermes gateway run --replace > /dev/null 2>&1 &",
                "critical": True,
            },
        },
    },
    "bezalel": {
        "ip": "159.203.146.185",
        "user": "root",
        "services": {
            "hermes-agent": {
                "check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
                "restart": "cd /root/wizards/bezalel/hermes/venv/bin/activate && nohup hermes gateway run > /dev/null 2>&1 &",
                "critical": True,
            },
            "evennia": {
                "check": "pgrep -f 'evennia' > /dev/null 2>/dev/null",
                "restart": "cd /root/.evennia/timmy_world && evennia restart 2>/dev/null",
                "critical": False,
            },
        },
    },
 }
 TELEGRAM_TOKEN_FILE = Path(os.path.expanduser("~/.config/telegram/special_bot"))
 TELEGRAM_CHAT = "-1003664764329"
 def send_telegram(message):
    if not TELEGRAM_TOKEN_FILE.exists():
        return False
    token = TELEGRAM_TOKEN_FILE.read_text().strip()
    url = f"https://api.telegram.org/bot{token}/sendMessage"
    body = json.dumps({
        "chat_id": TELEGRAM_CHAT,
        "text": f"[AUTO-RESTART]\n{message}",
    }).encode()
    try:
        import urllib.request
        req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json"}, method="POST")
        urllib.request.urlopen(req, timeout=10)
        return True
    except Exception:
        return False
 def get_cooldowns():
    if COOLDOWN_FILE.exists():
        try:
            return json.loads(COOLDOWN_FILE.read_text())
        except json.JSONDecodeError:
            pass
    return {}
 def save_cooldowns(data):
    COOLDOWN_FILE.write_text(json.dumps(data, indent=2))
 def check_service(check_cmd, timeout=10):
    try:
        proc = subprocess.run(check_cmd, shell=True, capture_output=True, timeout=timeout)
        return proc.returncode == 0
    except (subprocess.TimeoutExpired, subprocess.SubprocessError):
        return False
 def restart_service(restart_cmd, timeout=30):
    try:
        proc = subprocess.run(restart_cmd, shell=True, capture_output=True, timeout=timeout)
        return proc.returncode == 0
    except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
        return False
 def try_restart_via_ssh(name, host_config, service_name):
    ip = host_config["ip"]
    user = host_config["user"]
    service = host_config["services"][service_name]
    restart_cmd = f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 {user}@{ip} "{service["restart"]}"'
    return restart_service(restart_cmd, timeout=30)
 def log_restart(service_name, machine, attempt, success):
    ts = datetime.now(timezone.utc).isoformat()
    status = "SUCCESS" if success else "FAILED"
    log_entry = f"{ts} [{status}] {machine}/{service_name} (attempt {attempt})\n"
    RESTART_LOG.parent.mkdir(parents=True, exist_ok=True)
    with open(RESTART_LOG, "a") as f:
        f.write(log_entry)
    print(f"  [{status}] {machine}/{service_name} - attempt {attempt}")
 def check_and_restart():
    """Run all restart checks."""
    results = []
    cooldowns = get_cooldowns()
    now = time.time()
    # Check local services
    for name, service in LOCAL_SERVICES.items():
        if not check_service(service["check"]):
            cooldown_key = f"local/{name}"
            retries = cooldowns.get(cooldown_key, {"count": 0, "last": 0}).get("count", 0)
            if retries >= MAX_RETRIES:
                last = cooldowns.get(cooldown_key, {}).get("last", 0)
                if now - last < COOLDOWN_PERIOD and service["critical"]:
                    send_telegram(f"CRITICAL: local/{name} failed {MAX_RETRIES} restart attempts. Needs human intervention.")
                    cooldowns[cooldown_key] = {"count": 0, "last": now}
                    save_cooldowns(cooldowns)
                    continue
            success = restart_service(service["restart"])
            log_restart(name, "local", retries + 1, success)
            cooldowns[cooldown_key] = {"count": retries + 1 if not success else 0, "last": now}
            save_cooldowns(cooldowns)
            if success:
                # Verify it actually started
                time.sleep(3)
                if check_service(service["check"]):
                    print(f"  VERIFIED: local/{name} is running")
                else:
                    print(f"  WARNING: local/{name} restart command returned success but process not detected")
    # Check VPS services
    for host, host_config in VPS_SERVICES.items():
        for service_name, service in host_config["services"].items():
            check_cmd = f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 {host_config["user"]}@{host_config["ip"]} "{service["check"]}"'
            if not check_service(check_cmd):
                cooldown_key = f"{host}/{service_name}"
                retries = cooldowns.get(cooldown_key, {"count": 0, "last": 0}).get("count", 0)
                if retries >= MAX_RETRIES:
                    last = cooldowns.get(cooldown_key, {}).get("last", 0)
                    if now - last < COOLDOWN_PERIOD and service["critical"]:
                        send_telegram(f"CRITICAL: {host}/{service_name} failed {MAX_RETRIES} restart attempts. Needs human intervention.")
                        cooldowns[cooldown_key] = {"count": 0, "last": now}
                        save_cooldowns(cooldowns)
                        continue
                success = try_restart_via_ssh(host, host_config, service_name)
                log_restart(service_name, host, retries + 1, success)
                cooldowns[cooldown_key] = {"count": retries + 1 if not success else 0, "last": now}
                save_cooldowns(cooldowns)
    return results
 def daemon_mode():
    """Run continuously every 60 seconds."""
    print("Auto-restart agent running in daemon mode (60s interval)")
    print(f"Monitoring {len(LOCAL_SERVICES)} local + {sum(len(h['services']) for h in VPS_SERVICES.values())} remote services")
    print(f"Max retries per cycle: {MAX_RETRIES}")
    print(f"Cooldown after max retries: {COOLDOWN_PERIOD}s")
    while True:
        check_and_restart()
        time.sleep(60)
 def show_status():
    """Show restart history and cooldowns."""
    cooldowns = get_cooldowns()
    print("=== Restart Cooldowns ===")
    for key, data in sorted(cooldowns.items()):
        count = data.get("count", 0)
        if count > 0:
            print(f"  {key}: {count} failures, last at {datetime.fromtimestamp(data.get('last',0), tz=timezone.utc).strftime('%H:%M')}")
    print("\n=== Restart Log (last 20) ===")
    if RESTART_LOG.exists():
        lines = RESTART_LOG.read_text().strip().split("\n")
        for line in lines[-20:]:
            print(f"  {line}")
    else:
        print("  No restarts logged yet.")
 if __name__ == "__main__":
    LOG_DIR.mkdir(parents=True, exist_ok=True)
    if len(sys.argv) > 1 and sys.argv[1] == "--daemon":
        daemon_mode()
    elif len(sys.argv) > 1 and sys.argv[1] == "--status":
        show_status()
    else:
        check_and_restart()
--- a/fleet/capacity-inventory.md
+++ b/fleet/capacity-inventory.md
@@ -0,0 +1,191 @@
 # Capacity Inventory - Fleet Resource Baseline
 **Last audited:** 2026-04-07 16:00 UTC
 **Auditor:** Timmy (direct inspection)
 ---
 ## Fleet Resources (Paperclips Model)
 Three primary resources govern the fleet:
 | Resource | Role | Generation | Consumption |
 |----------|------|-----------|-------------|
 | **Capacity** | Compute hours available across fleet. Determines what work can be done. | Through healthy utilization of VPS/Mac agents | Fleet improvements consume it (investing in automation, orchestration, sovereignty) |
 | **Uptime** | % time services are running. Earned at Fibonacci milestones. | When services stay up naturally | Degrades on any failure |
 | **Innovation** | Only generates when capacity is <70% utilized. Fuels Phase 3+. | When you leave capacity free | Phase 3+ buildings consume it (requires spare capacity to build) |
 ### The Tension
 - Run fleet at 95%+ capacity: maximum productivity, ZERO Innovation
 - Run fleet at <70% capacity: Innovation generates but slower progress
 - This forces the Paperclips question: optimize now or invest in future capability?
 ---
 ## VPS Resource Baselines
 ### Ezra (143.198.27.163) - "Forge"
 | Metric | Value | Utilization |
 |--------|-------|-------------|
 | **OS** | Ubuntu 24.04 (6.8.0-106-generic) | |
 | **vCPU** | 4 vCPU (DO basic droplet, shared) | Load: 10.76/7.59/7.04 (very high) |
 | **RAM** | 7,941 MB total | 2,104 used / 5,836 available (26% used, 74% free) |
 | **Disk** | 154 GB vda1 | 111 GB used / 44 GB free (72%) **WARNING** |
 | **Swap** | 6,143 MB | 643 MB used (10%) |
 | **Uptime** | 7 days, 18 hours | |
 ### Key Processes (sorted by memory)
 | Process | RSS | %CPU | Notes |
 |---------|-----|------|-------|
 | Gitea | 556 MB | 83.5% | Web service, high CPU due to API load |
 | MemPalace (ezra) | 268 MB | 136% | Mining project files - HIGH CPU |
 | Hermes gateway (ezra) | 245 MB | 1.7% | Agent gateway |
 | Ollama | 230 MB | 0.1% | Model serving |
 | PostgreSQL | 138 MB | ~0% | Gitea database |
 **Capacity assessment:** 26% memory used, but 72% disk is getting tight. CPU load is very high (10.76 on 4vCPU = 269% utilization). Ezra is CPU-bound, not RAM-bound.
 ### Allegro (167.99.126.228)
 | Metric | Value | Utilization |
 |--------|-------|-------------|
 | **OS** | Ubuntu 24.04 (6.8.0-106-generic) | |
 | **vCPU** | 4 vCPU (DO basic droplet, shared) | Moderate load |
 | **RAM** | 7,941 MB total | 1,591 used / 6,349 available (20% used, 80% free) |
 | **Disk** | 154 GB vda1 | 41 GB used / 114 GB free (27%) **GOOD** |
 | **Swap** | 8,191 MB | 686 MB used (8%) |
 | **Uptime** | 7 days, 18 hours | |
 ### Key Processes (sorted by memory)
 | Process | RSS | %CPU | Notes |
 |---------|-----|------|-------|
 | Hermes gateway (allegro) | 680 MB | 0.9% | Main agent gateway |
 | Gitea | 181 MB | 1.2% | Secondary gitea? |
 | Systemd-journald | 160 MB | 0.0% | System logging |
 | Ezra Hermes gateway | 58 MB | 0.0% | Running ezra agent here |
 | Bezalel Hermes gateway | 58 MB | 0.0% | Running bezalel agent here |
 | Dockerd | 48 MB | 0.0% | Docker daemon |
 **Capacity assessment:** 20% memory used, 27% disk used. Allegro has headroom. Also running hermes gateways for Ezra and Bezalel (cross-host agent execution).
 ### Bezalel (159.203.146.185)
 | Metric | Value | Utilization |
 |--------|-------|-------------|
 | **OS** | Ubuntu 24.04 (6.8.0-71-generic) | |
 | **vCPU** | 2 vCPU (DO basic droplet, shared) | Load varies |
 | **RAM** | 1,968 MB total | 817 used / 1,151 available (42% used, 58% free) |
 | **Disk** | 48 GB vda1 | 12 GB used / 37 GB free (24%) **GOOD** |
 | **Swap** | 2,047 MB | 448 MB used (22%) |
 | **Uptime** | 7 days, 18 hours | |
 ### Key Processes (sorted by memory)
 | Process | RSS | %CPU | Notes |
 |---------|-----|------|-------|
 | Hermes gateway | 339 MB | 7.7% | Agent gateway (16.8% of RAM) |
 | uv pip install | 137 MB | 56.6% | Installing packages (temporary) |
 | Mender | 27 MB | 0.0% | Device management |
 **Capacity assessment:** 42% memory used, only 2GB total RAM. Bezalel is the most constrained. 2 vCPU means less compute headroom than Ezra/Allegro. Disk is fine.
 ### Mac Local (M3 Max)
 | Metric | Value | Utilization |
 |--------|-------|-------------|
 | **OS** | macOS 26.3.1 | |
 | **CPU** | Apple M3 Max (14 cores) | Very capable |
 | **RAM** | 36 GB | ~8 GB used (22%) |
 | **Disk** | 926 GB total | ~624 GB used / 302 GB free (68%) |
 ### Key Processes
 | Process | Memory | Notes |
 |---------|--------|-------|
 | Hermes gateway | 500 MB | Primary gateway |
 | Hermes agents (x3) | ~560 MB total | Multiple sessions |
 | Ollama | ~20 MB base + model memory | Model loading varies |
 | OpenClaw | 350 MB | Gateway process |
 | Evennia (server+portal) | 56 MB | Game world |
 ---
 ## Resource Summary
 | Resource | Ezra | Allegro | Bezalel | Mac Local | TOTAL |
 |----------|------|---------|---------|-----------|-------|
 | **vCPU** | 4 | 4 | 2 | 14 (M3 Max) | 24 |
 | **RAM** | 8 GB (26% used) | 8 GB (20% used) | 2 GB (42% used) | 36 GB (22% used) | 54 GB |
 | **Disk** | 154 GB (72%) | 154 GB (27%) | 48 GB (24%) | 926 GB (68%) | 1,282 GB |
 | **Cost** | $12/mo | $12/mo | $12/mo | owned | $36/mo |
 ### Utilization by Category
 | Category | Estimated Daily Hours | % of Fleet Capacity |
 |----------|----------------------|---------------------|
 | Hermes agents | ~3-4 hrs active | 5-7% |
 | Ollama inference | ~1-2 hrs | 2-4% |
 | Gitea services | 24/7 | 5-10% |
 | Evennia | 24/7 | <1% |
 | Idle | ~18-20 hrs | ~80-90% |
 ### Capacity Utilization: ~15-20% active
 **Innovation rate:** GENERATING (capacity < 70%)
 **Recommendation:** Good — Innovation is generating because most capacity is free.
 This means Phase 3+ capabilities (orchestration, load balancing, etc.) are accessible NOW.
 ---
 ## Uptime Baseline
 **Baseline period:** 2026-04-07 14:00-16:00 UTC (2 hours, ~24 checks at 5-min intervals)
 | Service | Checks | Uptime | Status |
 |---------|--------|--------|--------|
 | Ezra | 24/24 | 100.0% | GOOD |
 | Allegro | 24/24 | 100.0% | GOOD |
 | Bezalel | 24/24 | 100.0% | GOOD |
 | Gitea | 23/24 | 95.8% | GOOD |
 | Hermes Gateway | 23/24 | 95.8% | GOOD |
 | Ollama | 24/24 | 100.0% | GOOD |
 | OpenClaw | 24/24 | 100.0% | GOOD |
 | Evennia | 24/24 | 100.0% | GOOD |
 | Hermes Agent | 21/24 | 87.5% | **CHECK** |
 ### Fibonacci Uptime Milestones
 | Milestone | Target | Current | Status |
 |-----------|--------|---------|--------|
 | 95% | 95% | 100% (VPS), 98.6% (avg) | REACHED |
 | 95.5% | 95.5% | 98.6% | REACHED |
 | 96% | 96% | 98.6% | REACHED |
 | 97% | 97% | 98.6% | REACHED |
 | 98% | 98% | 98.6% | REACHED |
 | 99% | 99% | 98.6% | APPROACHING |
 ---
 ## Risk Assessment
 | Risk | Severity | Mitigation |
 |------|----------|------------|
 | Ezra disk 72% used | MEDIUM | Move non-essential data, add monitoring alert at 85% |
 | Bezalel only 2GB RAM | HIGH | Cannot run large models locally. Good for Evennia, tight for agents |
 | Ezra CPU load 269% | HIGH | MemPalace mining consuming 136% CPU. Consider scheduling |
 | Mac disk 68% used | MEDIUM | 302 GB free still. Growing but not urgent |
 | No cross-VPS mesh | LOW | SSH works but no Tailscale. No private network between VPSes |
 ---
 ## Recommendations
 ### Immediate (Phase 1-2)
 1. **Ezra disk cleanup:** 44 GB free at 72%. Docker images, old logs, and MemPalace mine data could be rotated.
 2. **Alert thresholds:** Add disk alerts at 85% (Ezra, Mac) before they become critical.
 ### Short-term (Phase 3)
 3. **Load balancing:** Ezra is CPU-bound, Allegro has 80% RAM free. Move some agent processes from Ezra to Allegro.
 4. **Innovation investment:** Since fleet is at 15-20% utilization, Innovation is high. This is the time to build Phase 3 capabilities.
 ### Medium-term (Phase 4)
 5. **Bezalel RAM upgrade:** 2GB is tight. Consider upgrade to 4GB ($24/mo instead of $12/mo).
 6. **Tailscale mesh:** Install on all VPSes for private inter-VPS network.
 ---
--- a/fleet/milestones.md
+++ b/fleet/milestones.md
@@ -0,0 +1,142 @@
 # Fleet Milestone Messages
 Every milestone marks passage through fleet evolution. When achieved, the message
 prints to the fleet log. Each one references a real achievement, not abstract numbers.
 **Source:** Inspired by Paperclips milestone messages (500 clips, 1000 clips, Full autonomy attained, etc.)
 ---
 ## Phase 1: Survival (Current)
 ### M1: First Automated Health Check
 **Trigger:** `fleet/health_check.py` runs successfully for the first time.
 **Message:** "First automated health check runs. No longer watching the clock."
 ### M2: First Auto-Restart
 **Trigger:** A dead process is detected and restarted without human intervention.
 **Message:** "A process failed at 3am and restarted itself. You found out in the morning."
 ### M3: First Backup Completed
 **Trigger:** A backup pipeline runs end-to-end and verifies integrity.
 **Message:** "A backup completed. You did not have to think about it."
 ### M4: 95% Uptime (30 days)
 **Trigger:** Uptime >= 95% over last 30 days.
 **Message:** "95% uptime over 30 days. The fleet stays up."
 ### M5: Uptime 97%
 **Trigger:** Uptime >= 97% over last 30 days.
 **Message:** "97% uptime. Three nines of availability across four machines."
 ---
 ## Phase 2: Automation (unlock when: uptime >= 95% + capacity > 60%)
 ### M6: Zero Manual Restarts (7 days)
 **Trigger:** 7 consecutive days with zero manual process restarts.
 **Message:** "Seven days. Zero manual restarts. The fleet heals itself."
 ### M7: PR Auto-Merged
 **Trigger:** A PR passes CI, review, and merges without human touching it.
 **Message:** "A PR was tested, reviewed, and merged by agents. You just said 'looks good.'"
 ### M8: Config Push Works
 **Trigger:** Config change pushed to all 3 VPSes atomically and verified.
 **Message:** "Config pushed to all three VPSes in one command. No SSH needed."
 ### M9: 98% Uptime
 **Trigger:** Uptime >= 98% over last 30 days.
 **Message:** "98% uptime. Only 14 hours of downtime in a month. Most of it planned."
 ---
 ## Phase 3: Orchestration (unlock when: all Phase 2 buildings + Innovation > 100)
 ### M10: Cross-Agent Delegation Works
 **Trigger:** Agent A creates issue, assigns to Agent B, Agent B works and creates PR.
 **Message:** "Agent Alpha created a task, Agent Beta completed it. They did not ask permission."
 ### M11: First Model Running Locally on 2+ Machines
 **Trigger:** Ollama serving same model on Ezra and Allegro simultaneously.
 **Message:** "A model runs on two machines at once. No cloud. No rate limits."
 ### M12: Fleet-Wide Burn Mode
 **Trigger:** All agents coordinated on single epic, produced coordinated PRs.
 **Message:** "All agents working the same epic. The fleet moves as one."
 ---
 ## Phase 4: Sovereignty (unlock when: zero cloud deps for core ops)
 ### M13: First Entirely Local Inference Day
 **Trigger:** 24 hours with zero API calls to external providers.
 **Message:** "A model ran locally for the first time. No cloud. No rate limits. No one can turn it off."
 ### M14: Sovereign Email
 **Trigger:** Stalwart email server sends and receives without Gmail relay.
 **Message:** "Email flows through our own server. No Google. No Microsoft. Ours."
 ### M15: Sovereign Messaging
 **Trigger:** Telegram bot runs without cloud relay dependency.
 **Message:** "Messages arrive through our own infrastructure. No corporate middleman."
 ---
 ## Phase 5: Scale (unlock when: sovereignty stable + Innovation > 500)
 ### M16: First Self-Spawned Agent
 **Trigger:** Agent lifecycle manager spawns a new agent instance due to load.
 **Message:** "A new agent appeared. You did not create it. The fleet built what it needed."
 ### M17: Agent Retired Gracefully
 **Trigger:** An agent instance retires after idle timeout and cleans up its state.
 **Message:** "An agent retired. It served its purpose. Nothing was lost."
 ### M18: Fleet Runs 24h Unattended
 **Trigger:** 24 hours with zero human intervention of any kind.
 **Message:** "A full day. No humans. No commands. The fleet runs itself."
 ---
 ## Phase 6: The Network (unlock when: 7 days zero human intervention)
 ### M19: Fleet Creates Its Own Improvement Task
 **Trigger:** Fleet analyzes itself and creates an issue on Gitea.
 **Message:** "The fleet found something to improve. It created the task itself."
 ### M20: First Outside Contribution
 **Trigger:** An external contributor's PR is reviewed and merged by fleet agents.
 **Message:** "Someone outside the fleet contributed. The fleet reviewed, tested, and merged. No human touched it."
 ### M21: The Beacon
 **Trigger:** Infrastructure serves someone in need through automated systems.
 **Message:** "Someone found the Beacon. In the dark, looking for help. The infrastructure served its purpose. It was built for this."
 ### M22: Permanent Light
 **Trigger:** 90 days of autonomous operation with continuous availability.
 **Message:** "Three months. The light never went out. Not for anyone."
 ---
 ## Fibonacci Uptime Milestones
 These trigger regardless of phase, based purely on uptime percentage:
 | Milestone | Uptime | Meaning |
 |-----------|--------|--------|
 | U1 | 95% | Basic reliability achieved |
 | U2 | 95.5% | Fewer than 16 hours/month downtime |
 | U3 | 96% | Fewer than 12 hours/month |
 | U4 | 97% | Fewer than 9 hours/month |
 | U5 | 97.5% | Fewer than 7 hours/month |
 | U6 | 98% | Fewer than 4.5 hours/month |
 | U7 | 98.3% | Fewer than 3 hours/month |
 | U8 | 98.6% | Less than 2.5 hours/month — approaching cloud tier |
 | U9 | 98.9% | Less than 1.5 hours/month |
 | U10 | 99% | Less than 1 hour/month — enterprise grade |
 | U11 | 99.5% | Less than 22 minutes/month |
 ---
 *Every message is earned. None are given freely. Fleet evolution is not a checklist — it is a climb.*
--- a/fleet/resource_tracker.py
+++ b/fleet/resource_tracker.py
@@ -0,0 +1,231 @@
 #!/usr/bin/env python3
 """
 Fleet Resource Tracker — Tracks Capacity, Uptime, and Innovation.
 Paperclips-inspired tension model:
 - Capacity: spent on fleet improvements, generates through utilization
 - Uptime: earned when services stay up, Fibonacci milestones unlock capabilities
 - Innovation: only generates when capacity < 70%. Fuels Phase 3+.
 This is the heart of the fleet progression system.
 """
 import os
 import json
 import time
 import socket
 from datetime import datetime, timezone
 from pathlib import Path
 # === CONFIG ===
 DATA_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-resources"))
 RESOURCES_FILE = DATA_DIR / "resources.json"
 # Tension thresholds
 INNOVATION_THRESHOLD = 0.70  # Innovation only generates when capacity < 70%
 INNOVATION_RATE = 5.0        # Innovation generated per hour when under threshold
 CAPACITY_REGEN_RATE = 2.0    # Capacity regenerates per hour of healthy operation
 FIBONACCI = [95.0, 95.5, 96.0, 97.0, 97.5, 98.0, 98.3, 98.6, 98.9, 99.0, 99.5]
 def init():
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    if not RESOURCES_FILE.exists():
        data = {
            "capacity": {
                "current": 100.0,
                "max": 100.0,
                "spent_on": [],
                "history": []
            },
            "uptime": {
                "current_pct": 100.0,
                "milestones_reached": [],
                "total_checks": 0,
                "successful_checks": 0,
                "history": []
            },
            "innovation": {
                "current": 0.0,
                "total_generated": 0.0,
                "spent_on": [],
                "last_calculated": time.time()
            }
        }
        RESOURCES_FILE.write_text(json.dumps(data, indent=2))
        print("Initialized resource tracker")
    return RESOURCES_FILE.exists()
 def load():
    if RESOURCES_FILE.exists():
        return json.loads(RESOURCES_FILE.read_text())
    return None
 def save(data):
    RESOURCES_FILE.write_text(json.dumps(data, indent=2))
 def update_uptime(checks: dict):
    """Update uptime stats from health check results.
 		checks = {'ezra': True, 'allegro': True, 'bezalel': True, 'gitea': True, ...}
 		"""
    data = load()
    if not data:
        return
    data["uptime"]["total_checks"] += 1
    successes = sum(1 for v in checks.values() if v)
    total = len(checks)
    # Overall uptime percentage
    overall = successes / max(total, 1) * 100.0
    data["uptime"]["successful_checks"] += successes
    # Calculate rolling uptime
    if "history" not in data["uptime"]:
        data["uptime"]["history"] = []
    data["uptime"]["history"].append({
        "ts": datetime.now(timezone.utc).isoformat(),
        "checks": checks,
        "overall": round(overall, 2)
    })
    # Keep last 1000 checks
    if len(data["uptime"]["history"]) > 1000:
        data["uptime"]["history"] = data["uptime"]["history"][-1000:]
    # Calculate current uptime %, last 100 checks
    recent = data["uptime"]["history"][-100:]
    recent_ok = sum(c["overall"] for c in recent) / max(len(recent), 1)
    data["uptime"]["current_pct"] = round(recent_ok, 2)
    # Check Fibonacci milestones
    new_milestones = []
    for fib in FIBONACCI:
        if fib not in data["uptime"]["milestones_reached"] and recent_ok >= fib:
            data["uptime"]["milestones_reached"].append(fib)
            new_milestones.append(fib)
    save(data)
    if new_milestones:
        print(f"  UPTIME MILESTONE: {','.join(str(m) + '%') for m in new_milestones}")
        print(f"  Current uptime: {recent_ok:.1f}%")
    return data["uptime"]
 def spend_capacity(amount: float, purpose: str):
    """Spend capacity on a fleet improvement."""
    data = load()
    if not data:
        return False
    if data["capacity"]["current"] < amount:
        print(f"  INSUFFICIENT CAPACITY: Need {amount}, have {data['capacity']['current']:.1f}")
        return False
    data["capacity"]["current"] -= amount
    data["capacity"]["spent_on"].append({
        "purpose": purpose,
        "amount": amount,
        "ts": datetime.now(timezone.utc).isoformat()
    })
    save(data)
    print(f"  Spent {amount} capacity on: {purpose}")
    return True
 def regenerate_resources():
    """Regenerate capacity and calculate innovation."""
    data = load()
    if not data:
        return
    now = time.time()
    last = data["innovation"]["last_calculated"]
    hours = (now - last) / 3600.0
    if hours < 0.1:  # Only update every ~6 minutes
        return
    # Regenerate capacity
    capacity_gain = CAPACITY_REGEN_RATE * hours
    data["capacity"]["current"] = min(
        data["capacity"]["max"],
        data["capacity"]["current"] + capacity_gain
    )
    # Calculate capacity utilization
    utilization = 1.0 - (data["capacity"]["current"] / data["capacity"]["max"])
    # Generate innovation only when under threshold
    innovation_gain = 0.0
    if utilization < INNOVATION_THRESHOLD:
        innovation_gain = INNOVATION_RATE * hours * (1.0 - utilization / INNOVATION_THRESHOLD)
        data["innovation"]["current"] += innovation_gain
        data["innovation"]["total_generated"] += innovation_gain
    # Record history
    if "history" not in data["capacity"]:
        data["capacity"]["history"] = []
    data["capacity"]["history"].append({
        "ts": datetime.now(timezone.utc).isoformat(),
        "capacity": round(data["capacity"]["current"], 1),
        "utilization": round(utilization * 100, 1),
        "innovation": round(data["innovation"]["current"], 1),
        "innovation_gain": round(innovation_gain, 1)
    })
    # Keep last 500 capacity records
    if len(data["capacity"]["history"]) > 500:
        data["capacity"]["history"] = data["capacity"]["history"][-500:]
    data["innovation"]["last_calculated"] = now
    save(data)
    print(f"  Capacity: {data['capacity']['current']:.1f}/{data['capacity']['max']:.1f}")
    print(f"  Utilization: {utilization*100:.1f}%")
    print(f"  Innovation: {data['innovation']['current']:.1f} (+{innovation_gain:.1f} this period)")
    return data
 def status():
    """Print current resource status."""
    data = load()
    if not data:
        print("Resource tracker not initialized. Run --init first.")
        return
    print("\n=== Fleet Resources ===")
    print(f"  Capacity: {data['capacity']['current']:.1f}/{data['capacity']['max']:.1f}")
    utilization = 1.0 - (data["capacity"]["current"] / data["capacity"]["max"])
    print(f"  Utilization: {utilization*100:.1f}%")
    innovation_status = "GENERATING" if utilization < INNOVATION_THRESHOLD else "BLOCKED"
    print(f"  Innovation: {data['innovation']['current']:.1f} [{innovation_status}]")
    print(f"  Uptime: {data['uptime']['current_pct']:.1f}%")
    print(f"  Milestones: {', '.join(str(m)+'%' for m in data['uptime']['milestones_reached']) or 'None yet'}")
    # Phase gate checks
    phase_2_ok = data['uptime']['current_pct'] >= 95.0
    phase_3_ok = phase_2_ok and data['innovation']['current'] > 100
    phase_5_ok = phase_2_ok and data['innovation']['current'] > 500
    print(f"\n  Phase Gates:")
    print(f"    Phase 2 (Automation): {'UNLOCKED' if phase_2_ok else 'LOCKED (need 95% uptime)'}")
    print(f"    Phase 3 (Orchestration): {'UNLOCKED' if phase_3_ok else 'LOCKED (need 95% uptime + 100 innovation)'}")
    print(f"    Phase 5 (Scale): {'UNLOCKED' if phase_5_ok else 'LOCKED (need 95% uptime + 500 innovation)'}")
 if __name__ == "__main__":
    import sys
    init()
    if len(sys.argv) > 1 and sys.argv[1] == "status":
        status()
    elif len(sys.argv) > 1 and sys.argv[1] == "regen":
        regenerate_resources()
    else:
        regenerate_resources()
        status()
--- a/gitea_client.py
+++ b/gitea_client.py
@@ -146,6 +146,7 @@ class PullRequest:
    additions: int = 0
    deletions: int = 0
    created_at: str = ""
    updated_at: str = ""
    closed_at: str = ""
    @classmethod
@@ -166,6 +167,7 @@ class PullRequest:
            additions=d.get("additions", 0),
            deletions=d.get("deletions", 0),
            created_at=d.get("created_at", ""),
            updated_at=d.get("updated_at", ""),
            closed_at=d.get("closed_at", ""),
        )
@@ -314,6 +316,7 @@ class GiteaClient:
        direction: str = "desc",
        limit: int = 30,
        page: int = 1,
        since: Optional[str] = None,
    ) -> list[Issue]:
        """List issues for a repo."""
        raw = self._get(
@@ -326,6 +329,7 @@ class GiteaClient:
            direction=direction,
            limit=limit,
            page=page,
            since=since,
        )
        return [Issue.from_dict(i) for i in raw]
--- a/hermes-sovereign/mempalace/init.py
+++ b/hermes-sovereign/mempalace/init.py
@@ -0,0 +1,14 @@
 """MemPalace integration for Hermes sovereign agent.
 Provides:
 - mempalace.py: PalaceRoom + Mempalace classes for analytical workflows
 - retrieval_enforcer.py: L0-L5 retrieval order enforcement
 - wakeup.py: Session wake-up protocol (~300-900 tokens)
 - scratchpad.py: JSON-based session scratchpad with palace promotion
 Epic: #367
 """
 from .mempalace import Mempalace, PalaceRoom, analyse_issues
 __all__ = ["Mempalace", "PalaceRoom", "analyse_issues"]
--- a/hermes-sovereign/mempalace/mempalace.py
+++ b/hermes-sovereign/mempalace/mempalace.py
@@ -0,0 +1,225 @@
 """
 ---
 title: Mempalace — Analytical Workflow Memory Framework
 description: Applies spatial memory palace organization to analytical tasks (issue triage, repo audits, backlog analysis) for faster, more consistent results.
 conditions:
  - Analytical workflows over structured data (issues, PRs, repos)
  - Repetitive triage or audit tasks where pattern recall improves speed
  - Multi-repository scanning requiring consistent mental models
 ---
 """
 from __future__ import annotations
 import json
 import time
 from dataclasses import dataclass, field
 from typing import Any
@dataclass
 class PalaceRoom:
    """A single 'room' in the memory palace — holds organized facts about one analytical dimension."""
    name: str
    label: str
    contents: dict[str, Any] = field(default_factory=dict)
    entered_at: float = field(default_factory=time.time)
    def store(self, key: str, value: Any) -> None:
        self.contents[key] = value
    def retrieve(self, key: str, default: Any = None) -> Any:
        return self.contents.get(key, default)
    def summary(self) -> str:
        lines = [f"## {self.label}"]
        for k, v in self.contents.items():
            lines.append(f"  {k}: {v}")
        return "\n".join(lines)
 class Mempalace:
    """
    Spatial memory palace for analytical workflows.
    Organises multi-dimensional data about a domain (e.g. Gitea issues) into
    named rooms. Each room models one analytical dimension, making it easy to
    traverse observations in a consistent order — the same pattern that produced
    a 19% throughput improvement in Allegro's April 2026 evaluation.
    Standard rooms for issue-analysis workflows
    -------------------------------------------
    repo_architecture   Repository structure and inter-repo relationships
    assignment_status   Assigned vs unassigned issue distribution
    triage_priority     Priority / urgency levels (the "lighting system")
    resolution_patterns Historical resolution trends and velocity
    Usage
    -----
    >>> palace = Mempalace.for_issue_analysis()
    >>> palace.enter("repo_architecture")
    >>> palace.store("total_repos", 11)
    >>> palace.store("repos_with_issues", 4)
    >>> palace.enter("assignment_status")
    >>> palace.store("assigned", 72)
    >>> palace.store("unassigned", 22)
    >>> print(palace.render())
    """
    def __init__(self, domain: str = "general") -> None:
        self.domain = domain
        self._rooms: dict[str, PalaceRoom] = {}
        self._current_room: str | None = None
        self._created_at: float = time.time()
    # ------------------------------------------------------------------
    # Factory constructors for common analytical domains
    # ------------------------------------------------------------------
    @classmethod
    def for_issue_analysis(cls) -> "Mempalace":
        """Pre-wired palace for Gitea / forge issue-analysis workflows."""
        p = cls(domain="issue_analysis")
        p.add_room("repo_architecture", "Repository Architecture Room")
        p.add_room("assignment_status", "Issue Assignment Status Room")
        p.add_room("triage_priority", "Triage Priority Room")
        p.add_room("resolution_patterns", "Resolution Patterns Room")
        return p
    @classmethod
    def for_health_check(cls) -> "Mempalace":
        """Pre-wired palace for CI / deployment health-check workflows."""
        p = cls(domain="health_check")
        p.add_room("service_topology", "Service Topology Room")
        p.add_room("failure_signals", "Failure Signals Room")
        p.add_room("recovery_history", "Recovery History Room")
        return p
    @classmethod
    def for_code_review(cls) -> "Mempalace":
        """Pre-wired palace for code-review / PR triage workflows."""
        p = cls(domain="code_review")
        p.add_room("change_scope", "Change Scope Room")
        p.add_room("risk_surface", "Risk Surface Room")
        p.add_room("test_coverage", "Test Coverage Room")
        p.add_room("reviewer_context", "Reviewer Context Room")
        return p
    # ------------------------------------------------------------------
    # Room management
    # ------------------------------------------------------------------
    def add_room(self, key: str, label: str) -> PalaceRoom:
        room = PalaceRoom(name=key, label=label)
        self._rooms[key] = room
        return room
    def enter(self, room_key: str) -> PalaceRoom:
        if room_key not in self._rooms:
            raise KeyError(f"No room '{room_key}' in palace. Available: {list(self._rooms)}")
        self._current_room = room_key
        return self._rooms[room_key]
    def store(self, key: str, value: Any) -> None:
        """Store a value in the currently active room."""
        if self._current_room is None:
            raise RuntimeError("Enter a room before storing values.")
        self._rooms[self._current_room].store(key, value)
    def retrieve(self, room_key: str, key: str, default: Any = None) -> Any:
        if room_key not in self._rooms:
            return default
        return self._rooms[room_key].retrieve(key, default)
    # ------------------------------------------------------------------
    # Rendering
    # ------------------------------------------------------------------
    def render(self) -> str:
        """Return a human-readable summary of the entire palace."""
        elapsed = time.time() - self._created_at
        lines = [
            f"# Mempalace — {self.domain}",
            f"_traversal time: {elapsed:.2f}s | rooms: {len(self._rooms)}_",
            "",
        ]
        for room in self._rooms.values():
            lines.append(room.summary())
            lines.append("")
        return "\n".join(lines)
    def to_dict(self) -> dict:
        return {
            "domain": self.domain,
            "elapsed_seconds": round(time.time() - self._created_at, 3),
            "rooms": {k: v.contents for k, v in self._rooms.items()},
        }
    def to_json(self) -> str:
        return json.dumps(self.to_dict(), indent=2)
 # ---------------------------------------------------------------------------
 # Skill entry-point
 # ---------------------------------------------------------------------------
 def analyse_issues(
    repos_data: list[dict],
    target_assignee_rate: float = 0.80,
 ) -> str:
    """
    Applies the mempalace technique to a list of repo issue summaries.
    Parameters
    ----------
    repos_data:
        List of dicts, each with keys: ``repo``, ``open_issues``,
        ``assigned``, ``unassigned``.
    target_assignee_rate:
        Minimum acceptable assignee-coverage ratio (default 0.80).
    Returns
    -------
    str
        Rendered palace summary with coverage assessment.
    """
    palace = Mempalace.for_issue_analysis()
    # --- Repository Architecture Room ---
    palace.enter("repo_architecture")
    total_issues = sum(r.get("open_issues", 0) for r in repos_data)
    repos_with_issues = sum(1 for r in repos_data if r.get("open_issues", 0) > 0)
    palace.store("repos_sampled", len(repos_data))
    palace.store("repos_with_issues", repos_with_issues)
    palace.store("total_open_issues", total_issues)
    palace.store(
        "avg_issues_per_repo",
        round(total_issues / len(repos_data), 1) if repos_data else 0,
    )
    # --- Assignment Status Room ---
    palace.enter("assignment_status")
    total_assigned = sum(r.get("assigned", 0) for r in repos_data)
    total_unassigned = sum(r.get("unassigned", 0) for r in repos_data)
    coverage = total_assigned / total_issues if total_issues else 0
    palace.store("assigned", total_assigned)
    palace.store("unassigned", total_unassigned)
    palace.store("coverage_rate", round(coverage, 3))
    palace.store(
        "coverage_status",
        "OK" if coverage >= target_assignee_rate else f"BELOW TARGET ({target_assignee_rate:.0%})",
    )
    # --- Triage Priority Room ---
    palace.enter("triage_priority")
    unassigned_repos = [r["repo"] for r in repos_data if r.get("unassigned", 0) > 0]
    palace.store("repos_needing_triage", unassigned_repos)
    palace.store("triage_count", total_unassigned)
    # --- Resolution Patterns Room ---
    palace.enter("resolution_patterns")
    palace.store("technique", "mempalace")
    palace.store("target_assignee_rate", target_assignee_rate)
    return palace.render()
--- a/hermes-sovereign/mempalace/retrieval_enforcer.py
+++ b/hermes-sovereign/mempalace/retrieval_enforcer.py
@@ -0,0 +1,277 @@
 """Retrieval Order Enforcer — L0 through L5 memory hierarchy.
 Ensures the agent checks durable memory before falling back to free generation.
 Gracefully degrades if any layer is unavailable (ONNX issues, missing files, etc).
 Layer order:
  L0: Identity        (~/.mempalace/identity.txt)
  L1: Palace rooms    (mempalace CLI search)
  L2: Session scratch (~/.hermes/scratchpad/{session_id}.json)
  L3: Gitea artifacts (API search for issues/PRs)
  L4: Procedures      (skills directory search)
  L5: Free generation (only if L0-L4 produced nothing)
 Refs: Epic #367, Sub-issue #369
 """
 from __future__ import annotations
 import json
 import os
 import re
 import subprocess
 from pathlib import Path
 from typing import Optional
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
 IDENTITY_PATH = Path.home() / ".mempalace" / "identity.txt"
 SCRATCHPAD_DIR = Path.home() / ".hermes" / "scratchpad"
 SKILLS_DIR = Path.home() / ".hermes" / "skills"
 MEMPALACE_BIN = "/Library/Frameworks/Python.framework/Versions/3.12/bin/mempalace"
 # Patterns that indicate a recall-style query
 RECALL_PATTERNS = re.compile(
    r"(?i)\b("
    r"what did|status of|remember|last time|yesterday|previously|"
    r"we discussed|we talked|we worked|you said|you mentioned|"
    r"remind me|what was|what were|how did|when did|"
    r"earlier today|last session|before this"
    r")\b"
 )
 # ---------------------------------------------------------------------------
 # L0: Identity
 # ---------------------------------------------------------------------------
 def load_identity() -> str:
    """Read the agent identity file. Returns empty string on failure."""
    try:
        if IDENTITY_PATH.exists():
            text = IDENTITY_PATH.read_text(encoding="utf-8").strip()
            # Cap at ~200 tokens to keep wake-up lean
            if len(text.split()) > 200:
                text = " ".join(text.split()[:200]) + "..."
            return text
    except (OSError, PermissionError):
        pass
    return ""
 # ---------------------------------------------------------------------------
 # L1: Palace search
 # ---------------------------------------------------------------------------
 def search_palace(query: str) -> str:
    """Search the mempalace for relevant memories. Gracefully degrades on failure."""
    try:
        bin_path = MEMPALACE_BIN if os.path.exists(MEMPALACE_BIN) else "mempalace"
        result = subprocess.run(
            [bin_path, "search", query],
            capture_output=True,
            text=True,
            timeout=10,
        )
        if result.returncode == 0 and result.stdout.strip():
            return result.stdout.strip()
    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
        # ONNX issues (#373) or mempalace not installed — degrade gracefully
        pass
    return ""
 # ---------------------------------------------------------------------------
 # L2: Session scratchpad
 # ---------------------------------------------------------------------------
 def load_scratchpad(session_id: str) -> str:
    """Load the session scratchpad as formatted text."""
    try:
        scratch_file = SCRATCHPAD_DIR / f"{session_id}.json"
        if scratch_file.exists():
            data = json.loads(scratch_file.read_text(encoding="utf-8"))
            if isinstance(data, dict) and data:
                lines = []
                for k, v in data.items():
                    lines.append(f"  {k}: {v}")
                return "\n".join(lines)
    except (OSError, json.JSONDecodeError):
        pass
    return ""
 # ---------------------------------------------------------------------------
 # L3: Gitea artifact search
 # ---------------------------------------------------------------------------
 def _load_gitea_token() -> str:
    """Read the Gitea API token."""
    token_path = Path.home() / ".hermes" / "gitea_token_vps"
    try:
        if token_path.exists():
            return token_path.read_text(encoding="utf-8").strip()
    except OSError:
        pass
    return ""
 def search_gitea(query: str) -> str:
    """Search Gitea issues/PRs for context. Returns formatted text or empty string."""
    token = _load_gitea_token()
    if not token:
        return ""
    api_base = "https://forge.alexanderwhitestone.com/api/v1"
    # Extract key terms for search (first 3 significant words)
    terms = [w for w in query.split() if len(w) > 3][:3]
    search_q = " ".join(terms) if terms else query[:50]
    try:
        import urllib.request
        import urllib.parse
        url = (
            f"{api_base}/repos/search?"
            f"q={urllib.parse.quote(search_q)}&limit=3"
        )
        req = urllib.request.Request(url, headers={
            "Authorization": f"token {token}",
            "Accept": "application/json",
        })
        with urllib.request.urlopen(req, timeout=8) as resp:
            data = json.loads(resp.read().decode())
            if data.get("data"):
                lines = []
                for repo in data["data"][:3]:
                    lines.append(f"  {repo['full_name']}: {repo.get('description', 'no desc')}")
                return "\n".join(lines)
    except Exception:
        pass
    return ""
 # ---------------------------------------------------------------------------
 # L4: Procedures (skills search)
 # ---------------------------------------------------------------------------
 def search_skills(query: str) -> str:
    """Search skills directory for matching procedures."""
    try:
        if not SKILLS_DIR.exists():
            return ""
        query_lower = query.lower()
        terms = [w for w in query_lower.split() if len(w) > 3]
        if not terms:
            return ""
        matches = []
        for skill_dir in SKILLS_DIR.iterdir():
            if not skill_dir.is_dir():
                continue
            skill_md = skill_dir / "SKILL.md"
            if skill_md.exists():
                try:
                    content = skill_md.read_text(encoding="utf-8").lower()
                    if any(t in content for t in terms):
                        # Extract title from frontmatter
                        title = skill_dir.name
                        matches.append(f"  skill: {title}")
                except OSError:
                    continue
        if matches:
            return "\n".join(matches[:5])
    except OSError:
        pass
    return ""
 # ---------------------------------------------------------------------------
 # Main enforcer
 # ---------------------------------------------------------------------------
 def is_recall_query(query: str) -> bool:
    """Detect whether a query is asking for recalled/historical information."""
    return bool(RECALL_PATTERNS.search(query))
 def enforce_retrieval_order(
    query: str,
    session_id: Optional[str] = None,
    skip_if_not_recall: bool = True,
 ) -> dict:
    """Check palace layers before allowing free generation.
    Args:
        query: The user's query text.
        session_id: Current session ID for scratchpad access.
        skip_if_not_recall: If True (default), skip enforcement for
            non-recall queries and return empty result.
    Returns:
        dict with keys:
            retrieved_from: Highest layer that produced results (e.g. 'L1')
            context: Aggregated context string
            tokens: Approximate word count of context
            layers_checked: List of layers that were consulted
    """
    result = {
        "retrieved_from": None,
        "context": "",
        "tokens": 0,
        "layers_checked": [],
    }
    # Gate: skip for non-recall queries if configured
    if skip_if_not_recall and not is_recall_query(query):
        return result
    # L0: Identity (always prepend)
    identity = load_identity()
    if identity:
        result["context"] += f"## Identity\n{identity}\n\n"
        result["layers_checked"].append("L0")
    # L1: Palace search
    palace_results = search_palace(query)
    if palace_results:
        result["context"] += f"## Palace Memory\n{palace_results}\n\n"
        result["retrieved_from"] = "L1"
        result["layers_checked"].append("L1")
    # L2: Scratchpad
    if session_id:
        scratch = load_scratchpad(session_id)
        if scratch:
            result["context"] += f"## Session Notes\n{scratch}\n\n"
            if not result["retrieved_from"]:
                result["retrieved_from"] = "L2"
            result["layers_checked"].append("L2")
    # L3: Gitea artifacts (only if still no context from L1/L2)
    if not result["retrieved_from"]:
        artifacts = search_gitea(query)
        if artifacts:
            result["context"] += f"## Gitea Context\n{artifacts}\n\n"
            result["retrieved_from"] = "L3"
            result["layers_checked"].append("L3")
    # L4: Procedures (only if still no context)
    if not result["retrieved_from"]:
        procedures = search_skills(query)
        if procedures:
            result["context"] += f"## Related Skills\n{procedures}\n\n"
            result["retrieved_from"] = "L4"
            result["layers_checked"].append("L4")
    # L5: Free generation (no context found — just mark it)
    if not result["retrieved_from"]:
        result["retrieved_from"] = "L5"
        result["layers_checked"].append("L5")
    result["tokens"] = len(result["context"].split())
    return result
--- a/hermes-sovereign/mempalace/scratchpad.py
+++ b/hermes-sovereign/mempalace/scratchpad.py
@@ -0,0 +1,184 @@
 """Session Scratchpad — ephemeral key-value notes per session.
 Provides fast, JSON-backed scratch storage that lives for a session
 and can be promoted to durable palace memory.
 Storage: ~/.hermes/scratchpad/{session_id}.json
 Refs: Epic #367, Sub-issue #372
 """
 from __future__ import annotations
 import json
 import os
 import subprocess
 import time
 from pathlib import Path
 from typing import Any, Optional
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
 SCRATCHPAD_DIR = Path.home() / ".hermes" / "scratchpad"
 MEMPALACE_BIN = "/Library/Frameworks/Python.framework/Versions/3.12/bin/mempalace"
 # ---------------------------------------------------------------------------
 # Internal helpers
 # ---------------------------------------------------------------------------
 def _scratch_path(session_id: str) -> Path:
    """Return the JSON file path for a given session."""
    # Sanitize session_id to prevent path traversal
    safe_id = "".join(c for c in session_id if c.isalnum() or c in "-_")
    if not safe_id:
        safe_id = "unnamed"
    return SCRATCHPAD_DIR / f"{safe_id}.json"
 def _load(session_id: str) -> dict:
    """Load scratchpad data, returning empty dict on failure."""
    path = _scratch_path(session_id)
    try:
        if path.exists():
            return json.loads(path.read_text(encoding="utf-8"))
    except (OSError, json.JSONDecodeError):
        pass
    return {}
 def _save(session_id: str, data: dict) -> None:
    """Persist scratchpad data to disk."""
    SCRATCHPAD_DIR.mkdir(parents=True, exist_ok=True)
    path = _scratch_path(session_id)
    path.write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
 def write_scratch(session_id: str, key: str, value: Any) -> None:
    """Write a note to the session scratchpad.
    Args:
        session_id: Current session identifier.
        key: Note key (string).
        value: Note value (any JSON-serializable type).
    """
    data = _load(session_id)
    data[key] = {
        "value": value,
        "written_at": time.strftime("%Y-%m-%d %H:%M:%S"),
    }
    _save(session_id, data)
 def read_scratch(session_id: str, key: Optional[str] = None) -> dict:
    """Read session scratchpad (all keys or one).
    Args:
        session_id: Current session identifier.
        key: Optional specific key. If None, returns all entries.
    Returns:
        dict — either {key: {value, written_at}} or the full scratchpad.
    """
    data = _load(session_id)
    if key is not None:
        entry = data.get(key)
        return {key: entry} if entry else {}
    return data
 def delete_scratch(session_id: str, key: str) -> bool:
    """Remove a single key from the scratchpad.
    Returns True if the key existed and was removed.
    """
    data = _load(session_id)
    if key in data:
        del data[key]
        _save(session_id, data)
        return True
    return False
 def list_sessions() -> list[str]:
    """List all session IDs that have scratchpad files."""
    try:
        if SCRATCHPAD_DIR.exists():
            return [
                f.stem
                for f in SCRATCHPAD_DIR.iterdir()
                if f.suffix == ".json" and f.is_file()
            ]
    except OSError:
        pass
    return []
 def promote_to_palace(
    session_id: str,
    key: str,
    room: str = "general",
    drawer: Optional[str] = None,
 ) -> bool:
    """Move a scratchpad note to durable palace memory.
    Uses the mempalace CLI to store the note in the specified room.
    Removes the note from the scratchpad after successful promotion.
    Args:
        session_id: Session containing the note.
        key: Scratchpad key to promote.
        room: Palace room name (default: 'general').
        drawer: Optional drawer name within the room. Defaults to key.
    Returns:
        True if promotion succeeded, False otherwise.
    """
    data = _load(session_id)
    entry = data.get(key)
    if not entry:
        return False
    value = entry.get("value", entry) if isinstance(entry, dict) else entry
    content = json.dumps(value, default=str) if not isinstance(value, str) else value
    try:
        bin_path = MEMPALACE_BIN if os.path.exists(MEMPALACE_BIN) else "mempalace"
        target_drawer = drawer or key
        result = subprocess.run(
            [bin_path, "store", room, target_drawer, content],
            capture_output=True,
            text=True,
            timeout=10,
        )
        if result.returncode == 0:
            # Remove from scratchpad after successful promotion
            del data[key]
            _save(session_id, data)
            return True
    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
        # mempalace CLI not available — degrade gracefully
        pass
    return False
 def clear_session(session_id: str) -> bool:
    """Delete the entire scratchpad for a session.
    Returns True if the file existed and was removed.
    """
    path = _scratch_path(session_id)
    try:
        if path.exists():
            path.unlink()
            return True
    except OSError:
        pass
    return False
--- a/hermes-sovereign/mempalace/tests/init.py
+++ b/hermes-sovereign/mempalace/tests/init.py
--- a/hermes-sovereign/mempalace/tests/test_mempalace.py
+++ b/hermes-sovereign/mempalace/tests/test_mempalace.py
@@ -0,0 +1,180 @@
 """Tests for the mempalace skill.
 Validates PalaceRoom, Mempalace class, factory constructors,
 and the analyse_issues entry-point.
 Refs: Epic #367, Sub-issue #368
 """
 from __future__ import annotations
 import json
 import sys
 import os
 import time
 import pytest
 # Ensure the package is importable from the repo layout
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 from mempalace.mempalace import Mempalace, PalaceRoom, analyse_issues
 # ── PalaceRoom unit tests ─────────────────────────────────────────────────
 class TestPalaceRoom:
    def test_store_and_retrieve(self):
        room = PalaceRoom(name="test", label="Test Room")
        room.store("key1", 42)
        assert room.retrieve("key1") == 42
    def test_retrieve_default(self):
        room = PalaceRoom(name="test", label="Test Room")
        assert room.retrieve("missing") is None
        assert room.retrieve("missing", "fallback") == "fallback"
    def test_summary_format(self):
        room = PalaceRoom(name="test", label="Test Room")
        room.store("repos", 5)
        summary = room.summary()
        assert "## Test Room" in summary
        assert "repos: 5" in summary
    def test_contents_default_factory_isolation(self):
        """Each room gets its own dict — no shared mutable default."""
        r1 = PalaceRoom(name="a", label="A")
        r2 = PalaceRoom(name="b", label="B")
        r1.store("x", 1)
        assert r2.retrieve("x") is None
    def test_entered_at_is_recent(self):
        before = time.time()
        room = PalaceRoom(name="t", label="T")
        after = time.time()
        assert before <= room.entered_at <= after
 # ── Mempalace core tests ──────────────────────────────────────────────────
 class TestMempalace:
    def test_add_and_enter_room(self):
        p = Mempalace(domain="test")
        p.add_room("r1", "Room 1")
        room = p.enter("r1")
        assert room.name == "r1"
    def test_enter_nonexistent_room_raises(self):
        p = Mempalace()
        with pytest.raises(KeyError, match="No room"):
            p.enter("ghost")
    def test_store_without_enter_raises(self):
        p = Mempalace()
        p.add_room("r", "R")
        with pytest.raises(RuntimeError, match="Enter a room"):
            p.store("k", "v")
    def test_store_and_retrieve_via_palace(self):
        p = Mempalace()
        p.add_room("r", "R")
        p.enter("r")
        p.store("count", 10)
        assert p.retrieve("r", "count") == 10
    def test_retrieve_missing_room_returns_default(self):
        p = Mempalace()
        assert p.retrieve("nope", "key") is None
        assert p.retrieve("nope", "key", 99) == 99
    def test_render_includes_domain(self):
        p = Mempalace(domain="audit")
        p.add_room("r", "Room")
        p.enter("r")
        p.store("item", "value")
        output = p.render()
        assert "audit" in output
        assert "Room" in output
    def test_to_dict_structure(self):
        p = Mempalace(domain="test")
        p.add_room("r", "R")
        p.enter("r")
        p.store("a", 1)
        d = p.to_dict()
        assert d["domain"] == "test"
        assert "elapsed_seconds" in d
        assert d["rooms"]["r"] == {"a": 1}
    def test_to_json_is_valid(self):
        p = Mempalace(domain="j")
        p.add_room("x", "X")
        p.enter("x")
        p.store("v", [1, 2, 3])
        parsed = json.loads(p.to_json())
        assert parsed["rooms"]["x"]["v"] == [1, 2, 3]
 # ── Factory constructor tests ─────────────────────────────────────────────
 class TestFactories:
    def test_for_issue_analysis_rooms(self):
        p = Mempalace.for_issue_analysis()
        assert p.domain == "issue_analysis"
        for key in ("repo_architecture", "assignment_status",
                    "triage_priority", "resolution_patterns"):
            p.enter(key)  # should not raise
    def test_for_health_check_rooms(self):
        p = Mempalace.for_health_check()
        assert p.domain == "health_check"
        for key in ("service_topology", "failure_signals", "recovery_history"):
            p.enter(key)
    def test_for_code_review_rooms(self):
        p = Mempalace.for_code_review()
        assert p.domain == "code_review"
        for key in ("change_scope", "risk_surface",
                    "test_coverage", "reviewer_context"):
            p.enter(key)
 # ── analyse_issues entry-point tests ──────────────────────────────────────
 class TestAnalyseIssues:
    SAMPLE_DATA = [
        {"repo": "the-nexus", "open_issues": 40, "assigned": 30, "unassigned": 10},
        {"repo": "timmy-home", "open_issues": 30, "assigned": 25, "unassigned": 5},
        {"repo": "hermes-agent", "open_issues": 20, "assigned": 15, "unassigned": 5},
        {"repo": "empty-repo", "open_issues": 0, "assigned": 0, "unassigned": 0},
    ]
    def test_returns_string(self):
        result = analyse_issues(self.SAMPLE_DATA)
        assert isinstance(result, str)
        assert len(result) > 0
    def test_contains_room_headers(self):
        result = analyse_issues(self.SAMPLE_DATA)
        assert "Repository Architecture" in result
        assert "Assignment Status" in result
    def test_coverage_below_target(self):
        result = analyse_issues(self.SAMPLE_DATA, target_assignee_rate=0.90)
        assert "BELOW TARGET" in result
    def test_coverage_meets_target(self):
        good_data = [
            {"repo": "a", "open_issues": 10, "assigned": 10, "unassigned": 0},
        ]
        result = analyse_issues(good_data, target_assignee_rate=0.80)
        assert "OK" in result
    def test_empty_repos_list(self):
        result = analyse_issues([])
        assert isinstance(result, str)
    def test_single_repo(self):
        data = [{"repo": "solo", "open_issues": 5, "assigned": 3, "unassigned": 2}]
        result = analyse_issues(data)
        assert "solo" in result or "issue_analysis" in result
--- a/hermes-sovereign/mempalace/tests/test_retrieval_enforcer.py
+++ b/hermes-sovereign/mempalace/tests/test_retrieval_enforcer.py
@@ -0,0 +1,143 @@
 """Tests for retrieval_enforcer.py.
 Refs: Epic #367, Sub-issue #369
 """
 from __future__ import annotations
 import json
 import os
 import sys
 import tempfile
 from pathlib import Path
 from unittest.mock import patch, MagicMock
 import pytest
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 from mempalace.retrieval_enforcer import (
    is_recall_query,
    load_identity,
    load_scratchpad,
    enforce_retrieval_order,
    search_skills,
    RECALL_PATTERNS,
 )
 class TestRecallDetection:
    """Test the recall-query pattern matcher."""
    @pytest.mark.parametrize("query", [
        "what did we work on yesterday",
        "status of the mempalace integration",
        "remember the fleet audit results",
        "last time we deployed the nexus",
        "previously you mentioned a CI fix",
        "we discussed the sovereign deployment",
    ])
    def test_recall_queries_detected(self, query):
        assert is_recall_query(query) is True
    @pytest.mark.parametrize("query", [
        "create a new file called test.py",
        "run the test suite",
        "deploy to production",
        "write a function that sums numbers",
        "install the package",
    ])
    def test_non_recall_queries_skipped(self, query):
        assert is_recall_query(query) is False
 class TestLoadIdentity:
    def test_loads_existing_identity(self, tmp_path):
        identity_file = tmp_path / "identity.txt"
        identity_file.write_text("I am Timmy. A sovereign AI.")
        with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file):
            result = load_identity()
            assert "Timmy" in result
    def test_returns_empty_on_missing_file(self, tmp_path):
        identity_file = tmp_path / "nonexistent.txt"
        with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file):
            result = load_identity()
            assert result == ""
    def test_truncates_long_identity(self, tmp_path):
        identity_file = tmp_path / "identity.txt"
        identity_file.write_text(" ".join(["word"] * 300))
        with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file):
            result = load_identity()
            assert result.endswith("...")
            assert len(result.split()) <= 201  # 200 words + "..."
 class TestLoadScratchpad:
    def test_loads_valid_scratchpad(self, tmp_path):
        scratch_file = tmp_path / "session123.json"
        scratch_file.write_text(json.dumps({"note": "test value", "key2": 42}))
        with patch("mempalace.retrieval_enforcer.SCRATCHPAD_DIR", tmp_path):
            result = load_scratchpad("session123")
            assert "note: test value" in result
            assert "key2: 42" in result
    def test_returns_empty_on_missing_file(self, tmp_path):
        with patch("mempalace.retrieval_enforcer.SCRATCHPAD_DIR", tmp_path):
            result = load_scratchpad("nonexistent")
            assert result == ""
    def test_returns_empty_on_invalid_json(self, tmp_path):
        scratch_file = tmp_path / "bad.json"
        scratch_file.write_text("not valid json{{{")
        with patch("mempalace.retrieval_enforcer.SCRATCHPAD_DIR", tmp_path):
            result = load_scratchpad("bad")
            assert result == ""
 class TestEnforceRetrievalOrder:
    def test_skips_non_recall_query(self):
        result = enforce_retrieval_order("create a new file")
        assert result["retrieved_from"] is None
        assert result["tokens"] == 0
    def test_runs_for_recall_query(self, tmp_path):
        identity_file = tmp_path / "identity.txt"
        identity_file.write_text("I am Timmy.")
        with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file), \
             patch("mempalace.retrieval_enforcer.search_palace", return_value=""), \
             patch("mempalace.retrieval_enforcer.search_gitea", return_value=""), \
             patch("mempalace.retrieval_enforcer.search_skills", return_value=""):
            result = enforce_retrieval_order("what did we work on yesterday")
            assert "Identity" in result["context"]
            assert "L0" in result["layers_checked"]
    def test_palace_hit_sets_l1(self, tmp_path):
        identity_file = tmp_path / "identity.txt"
        identity_file.write_text("I am Timmy.")
        with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file), \
             patch("mempalace.retrieval_enforcer.search_palace", return_value="Found: fleet audit results"), \
             patch("mempalace.retrieval_enforcer.search_gitea", return_value=""):
            result = enforce_retrieval_order("what did we discuss yesterday")
            assert result["retrieved_from"] == "L1"
            assert "Palace Memory" in result["context"]
    def test_falls_through_to_l5(self, tmp_path):
        identity_file = tmp_path / "nonexistent.txt"
        with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file), \
             patch("mempalace.retrieval_enforcer.search_palace", return_value=""), \
             patch("mempalace.retrieval_enforcer.search_gitea", return_value=""), \
             patch("mempalace.retrieval_enforcer.search_skills", return_value=""):
            result = enforce_retrieval_order("remember the old deployment", skip_if_not_recall=True)
            assert result["retrieved_from"] == "L5"
    def test_force_mode_skips_recall_check(self, tmp_path):
        identity_file = tmp_path / "identity.txt"
        identity_file.write_text("I am Timmy.")
        with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file), \
             patch("mempalace.retrieval_enforcer.search_palace", return_value=""), \
             patch("mempalace.retrieval_enforcer.search_gitea", return_value=""), \
             patch("mempalace.retrieval_enforcer.search_skills", return_value=""):
            result = enforce_retrieval_order("deploy now", skip_if_not_recall=False)
            assert "Identity" in result["context"]
--- a/hermes-sovereign/mempalace/tests/test_scratchpad.py
+++ b/hermes-sovereign/mempalace/tests/test_scratchpad.py
@@ -0,0 +1,108 @@
 """Tests for scratchpad.py.
 Refs: Epic #367, Sub-issue #372
 """
 from __future__ import annotations
 import json
 import os
 import sys
 from pathlib import Path
 from unittest.mock import patch
 import pytest
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 from mempalace.scratchpad import (
    write_scratch,
    read_scratch,
    delete_scratch,
    list_sessions,
    clear_session,
    _scratch_path,
 )
@pytest.fixture
 def scratch_dir(tmp_path):
    """Provide a temporary scratchpad directory."""
    with patch("mempalace.scratchpad.SCRATCHPAD_DIR", tmp_path):
        yield tmp_path
 class TestScratchPath:
    def test_sanitizes_session_id(self):
        path = _scratch_path("safe-id_123")
        assert "safe-id_123.json" in str(path)
    def test_strips_dangerous_chars(self):
        path = _scratch_path("../../etc/passwd")
        assert ".." not in path.name
        assert "/" not in path.name
        # Dots are stripped, so only alphanumeric chars remain
        assert path.name == "etcpasswd.json"
 class TestWriteAndRead:
    def test_write_then_read(self, scratch_dir):
        write_scratch("sess1", "note", "hello world")
        result = read_scratch("sess1", "note")
        assert "note" in result
        assert result["note"]["value"] == "hello world"
    def test_read_all_keys(self, scratch_dir):
        write_scratch("sess1", "a", 1)
        write_scratch("sess1", "b", 2)
        result = read_scratch("sess1")
        assert "a" in result
        assert "b" in result
    def test_read_missing_key(self, scratch_dir):
        write_scratch("sess1", "exists", "yes")
        result = read_scratch("sess1", "missing")
        assert result == {}
    def test_read_missing_session(self, scratch_dir):
        result = read_scratch("nonexistent")
        assert result == {}
    def test_overwrite_key(self, scratch_dir):
        write_scratch("sess1", "key", "v1")
        write_scratch("sess1", "key", "v2")
        result = read_scratch("sess1", "key")
        assert result["key"]["value"] == "v2"
 class TestDelete:
    def test_delete_existing_key(self, scratch_dir):
        write_scratch("sess1", "key", "val")
        assert delete_scratch("sess1", "key") is True
        assert read_scratch("sess1", "key") == {}
    def test_delete_missing_key(self, scratch_dir):
        write_scratch("sess1", "other", "val")
        assert delete_scratch("sess1", "missing") is False
 class TestListSessions:
    def test_lists_sessions(self, scratch_dir):
        write_scratch("alpha", "k", "v")
        write_scratch("beta", "k", "v")
        sessions = list_sessions()
        assert "alpha" in sessions
        assert "beta" in sessions
    def test_empty_directory(self, scratch_dir):
        assert list_sessions() == []
 class TestClearSession:
    def test_clears_existing(self, scratch_dir):
        write_scratch("sess1", "k", "v")
        assert clear_session("sess1") is True
        assert read_scratch("sess1") == {}
    def test_clear_nonexistent(self, scratch_dir):
        assert clear_session("ghost") is False
--- a/hermes-sovereign/mempalace/tests/test_wakeup.py
+++ b/hermes-sovereign/mempalace/tests/test_wakeup.py
@@ -0,0 +1,100 @@
 """Tests for wakeup.py.
 Refs: Epic #367, Sub-issue #372
 """
 from __future__ import annotations
 import json
 import os
 import sys
 import time
 from pathlib import Path
 from unittest.mock import patch, MagicMock
 import pytest
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 from mempalace.wakeup import (
    palace_wakeup,
    fleet_status_summary,
    _load_identity,
    _palace_context,
 )
 class TestLoadIdentity:
    def test_loads_identity(self, tmp_path):
        f = tmp_path / "identity.txt"
        f.write_text("I am Timmy. A sovereign AI.")
        with patch("mempalace.wakeup.IDENTITY_PATH", f):
            result = _load_identity()
            assert "Timmy" in result
    def test_missing_identity(self, tmp_path):
        f = tmp_path / "nope.txt"
        with patch("mempalace.wakeup.IDENTITY_PATH", f):
            assert _load_identity() == ""
 class TestFleetStatus:
    def test_reads_fleet_json(self, tmp_path):
        f = tmp_path / "fleet_status.json"
        f.write_text(json.dumps({
            "Groq": {"state": "active", "last_seen": "2026-04-07"},
            "Ezra": {"state": "idle", "last_seen": "2026-04-06"},
        }))
        with patch("mempalace.wakeup.FLEET_STATUS_PATH", f):
            result = fleet_status_summary()
            assert "Fleet Status" in result
            assert "Groq" in result
            assert "active" in result
    def test_missing_fleet_file(self, tmp_path):
        f = tmp_path / "nope.json"
        with patch("mempalace.wakeup.FLEET_STATUS_PATH", f):
            assert fleet_status_summary() == ""
    def test_invalid_json(self, tmp_path):
        f = tmp_path / "bad.json"
        f.write_text("not json")
        with patch("mempalace.wakeup.FLEET_STATUS_PATH", f):
            assert fleet_status_summary() == ""
 class TestPalaceWakeup:
    def test_generates_context_with_identity(self, tmp_path):
        identity = tmp_path / "identity.txt"
        identity.write_text("I am Timmy.")
        cache = tmp_path / "cache.txt"
        with patch("mempalace.wakeup.IDENTITY_PATH", identity), \
             patch("mempalace.wakeup.WAKEUP_CACHE_PATH", cache), \
             patch("mempalace.wakeup._palace_context", return_value=""), \
             patch("mempalace.wakeup.fleet_status_summary", return_value=""):
            result = palace_wakeup(force=True)
            assert "Identity" in result
            assert "Timmy" in result
            assert "Session" in result
    def test_uses_cache_when_fresh(self, tmp_path):
        cache = tmp_path / "cache.txt"
        cache.write_text("cached wake-up content")
        # Touch the file so it's fresh
        with patch("mempalace.wakeup.WAKEUP_CACHE_PATH", cache), \
             patch("mempalace.wakeup.WAKEUP_CACHE_TTL", 9999):
            result = palace_wakeup(force=False)
            assert result == "cached wake-up content"
    def test_force_bypasses_cache(self, tmp_path):
        cache = tmp_path / "cache.txt"
        cache.write_text("stale content")
        identity = tmp_path / "identity.txt"
        identity.write_text("I am Timmy.")
        with patch("mempalace.wakeup.WAKEUP_CACHE_PATH", cache), \
             patch("mempalace.wakeup.IDENTITY_PATH", identity), \
             patch("mempalace.wakeup._palace_context", return_value=""), \
             patch("mempalace.wakeup.fleet_status_summary", return_value=""):
            result = palace_wakeup(force=True)
            assert "Identity" in result
            assert "stale content" not in result
--- a/hermes-sovereign/mempalace/wakeup.py
+++ b/hermes-sovereign/mempalace/wakeup.py
@@ -0,0 +1,161 @@
 """Wake-up Protocol — session start context injection.
 Generates 300-900 tokens of context when a new Hermes session starts.
 Loads identity, recent palace context, and fleet status.
 Refs: Epic #367, Sub-issue #372
 """
 from __future__ import annotations
 import json
 import os
 import subprocess
 import time
 from pathlib import Path
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
 IDENTITY_PATH = Path.home() / ".mempalace" / "identity.txt"
 MEMPALACE_BIN = "/Library/Frameworks/Python.framework/Versions/3.12/bin/mempalace"
 FLEET_STATUS_PATH = Path.home() / ".hermes" / "fleet_status.json"
 WAKEUP_CACHE_PATH = Path.home() / ".hermes" / "last_wakeup.txt"
 WAKEUP_CACHE_TTL = 300  # 5 minutes — don't regenerate if recent
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 def _load_identity() -> str:
    """Read the agent identity file."""
    try:
        if IDENTITY_PATH.exists():
            text = IDENTITY_PATH.read_text(encoding="utf-8").strip()
            # Cap at ~150 tokens for wake-up brevity
            words = text.split()
            if len(words) > 150:
                text = " ".join(words[:150]) + "..."
            return text
    except (OSError, PermissionError):
        pass
    return ""
 def _palace_context() -> str:
    """Run mempalace wake-up command for recent context. Degrades gracefully."""
    try:
        bin_path = MEMPALACE_BIN if os.path.exists(MEMPALACE_BIN) else "mempalace"
        result = subprocess.run(
            [bin_path, "wake-up"],
            capture_output=True,
            text=True,
            timeout=10,
        )
        if result.returncode == 0 and result.stdout.strip():
            return result.stdout.strip()
    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
        # ONNX issues (#373) or CLI not available — degrade gracefully
        pass
    return ""
 def fleet_status_summary() -> str:
    """Read cached fleet status for lightweight session context."""
    try:
        if FLEET_STATUS_PATH.exists():
            data = json.loads(FLEET_STATUS_PATH.read_text(encoding="utf-8"))
            lines = ["## Fleet Status"]
            if isinstance(data, dict):
                for agent, status in data.items():
                    if isinstance(status, dict):
                        state = status.get("state", "unknown")
                        last_seen = status.get("last_seen", "?")
                        lines.append(f"  {agent}: {state} (last: {last_seen})")
                    else:
                        lines.append(f"  {agent}: {status}")
            if len(lines) > 1:
                return "\n".join(lines)
    except (OSError, json.JSONDecodeError):
        pass
    return ""
 def _check_cache() -> str:
    """Return cached wake-up if fresh enough."""
    try:
        if WAKEUP_CACHE_PATH.exists():
            age = time.time() - WAKEUP_CACHE_PATH.stat().st_mtime
            if age < WAKEUP_CACHE_TTL:
                return WAKEUP_CACHE_PATH.read_text(encoding="utf-8").strip()
    except OSError:
        pass
    return ""
 def _write_cache(content: str) -> None:
    """Cache the wake-up content."""
    try:
        WAKEUP_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
        WAKEUP_CACHE_PATH.write_text(content, encoding="utf-8")
    except OSError:
        pass
 # ---------------------------------------------------------------------------
 # Main entry point
 # ---------------------------------------------------------------------------
 def palace_wakeup(force: bool = False) -> str:
    """Generate wake-up context for a new session. ~300-900 tokens.
    Args:
        force: If True, bypass the 5-minute cache and regenerate.
    Returns:
        Formatted context string suitable for prepending to the system prompt.
    """
    # Check cache first (avoids redundant work on rapid session restarts)
    if not force:
        cached = _check_cache()
        if cached:
            return cached
    parts = []
    # L0: Identity
    identity = _load_identity()
    if identity:
        parts.append(f"## Identity\n{identity}")
    # L1: Recent palace context
    palace = _palace_context()
    if palace:
        parts.append(palace)
    # Fleet status (lightweight)
    fleet = fleet_status_summary()
    if fleet:
        parts.append(fleet)
    # Timestamp
    parts.append(f"## Session\nWake-up generated: {time.strftime('%Y-%m-%d %H:%M:%S')}")
    content = "\n\n".join(parts)
    # Cache for TTL
    _write_cache(content)
    return content
 # ---------------------------------------------------------------------------
 # CLI entry point for testing
 # ---------------------------------------------------------------------------
 if __name__ == "__main__":
    print(palace_wakeup(force=True))
--- a/scripts/kaizen_retro.py
+++ b/scripts/kaizen_retro.py
@@ -0,0 +1,526 @@
 #!/usr/bin/env python3
 """
 Kaizen Retro — Automated retrospective after every burn cycle.
 Reads overnight Gitea activity, fleet state, and loop logs.
 Generates ONE concrete improvement suggestion and posts it.
 Usage:
    python3 scripts/kaizen_retro.py [--dry-run]
 """
 from __future__ import annotations
 import argparse
 import json
 import os
 import sys
 import urllib.error
 import urllib.request
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from typing import Any, Optional
 # Ensure repo root is on path so we can import gitea_client
 REPO_ROOT = Path(__file__).resolve().parent.parent
 sys.path.insert(0, str(REPO_ROOT))
 from gitea_client import GiteaClient, GiteaError
 # ---------------------------------------------------------------------------
 # Configuration
 # ---------------------------------------------------------------------------
 REPOS = [
    "Timmy_Foundation/the-nexus",
    "Timmy_Foundation/timmy-config",
    "Timmy_Foundation/timmy-home",
    "Timmy_Foundation/the-door",
    "Timmy_Foundation/turboquant",
    "Timmy_Foundation/hermes-agent",
    "Timmy_Foundation/.profile",
 ]
 HERMES_HOME = Path.home() / ".hermes"
 TIMMY_HOME = Path.home() / ".timmy"
 WORKFORCE_STATE_PATH = HERMES_HOME / "workforce-state.json"
 FLEET_ROUTING_PATH = HERMES_HOME / "fleet-routing.json"
 CHANNEL_DIR_PATH = REPO_ROOT / "channel_directory.json"
 REPORTS_DIR = REPO_ROOT / "reports"
 MORNING_REPORT_REPO = "Timmy_Foundation/timmy-config"
 TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN")
 TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_HOME_CHANNEL", "-1003664764329")
 TELEGRAM_MAX_LEN = 4000  # leave headroom below the 4096 hard limit
 STALE_DAYS = 7
 MAX_ATTEMPT_COMMENT_THRESHOLD = 5
 ISSUE_TYPE_KEYWORDS = {
    "bug": ["bug", "fix", "crash", "error", "regression", "broken"],
    "feature": ["feature", "implement", "add", "support", "enable"],
    "docs": ["doc", "readme", "wiki", "guide", "documentation"],
    "kaizen": ["kaizen", "retro", "improvement", "continuous"],
    "devops": ["deploy", "ci", "cd", "docker", "server", "infra"],
 }
 BLOCKER_LABELS = {"blocked", "timeout", "stale", "help wanted", "wontfix", "duplicate"}
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 def load_json(path: Path) -> Any:
    if not path.exists():
        return None
    with open(path) as f:
        return json.load(f)
 def iso_day_ago(days: int = 1) -> str:
    return (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
 def classify_issue_type(issue: dict) -> str:
    title = (issue.get("title", "") or "").lower()
    body = (issue.get("body", "") or "").lower()
    labels = [l.get("name", "").lower() for l in issue.get("labels", []) or []]
    text = f"{title} {body} {' '.join(labels)}"
    words = set(text.split())
    best = "other"
    best_score = 0
    for kind, keywords in ISSUE_TYPE_KEYWORDS.items():
        # Short keywords (<=3 chars) require whole-word match to avoid false positives like
        # "ci" inside "cleanup" or "cd" inside "abcde".
        score = sum(
            1 for kw in keywords
            if (len(kw) <= 3 and kw in words) or (len(kw) > 3 and kw in text)
        )
        # label match is stronger
        for label in labels:
            label_words = set(label.split())
            if any(
                (len(kw) <= 3 and kw in label_words) or (len(kw) > 3 and kw in label)
                for kw in keywords
            ):
                score += 3
        if score > best_score:
            best_score = score
            best = kind
    return best
 def is_max_attempts_candidate(issue: dict) -> bool:
    """Heuristic for issues that consumed excessive attempts."""
    labels = {l.get("name", "").lower() for l in issue.get("labels", []) or []}
    if labels & BLOCKER_LABELS:
        return True
    if issue.get("comments", 0) >= MAX_ATTEMPT_COMMENT_THRESHOLD:
        return True
    created = issue.get("created_at")
    if created:
        try:
            created_dt = datetime.fromisoformat(created.replace("Z", "+00:00"))
            if datetime.now(timezone.utc) - created_dt > timedelta(days=STALE_DAYS):
                return True
        except Exception:
            pass
    return False
 def telegram_send(text: str, bot_token: str, chat_id: str) -> list[dict]:
    """Post text to Telegram, chunking if it exceeds the message limit."""
    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
    chunks = []
    if len(text) <= TELEGRAM_MAX_LEN:
        chunks = [text]
    else:
        # Split on newlines to preserve readability
        lines = text.splitlines(keepends=True)
        current = ""
        for line in lines:
            if len(current) + len(line) > TELEGRAM_MAX_LEN:
                if current:
                    chunks.append(current)
                current = line
            else:
                current += line
        if current:
            chunks.append(current)
    results = []
    for i, chunk in enumerate(chunks):
        prefix = f"*(part {i + 1}/{len(chunks)})*\n" if len(chunks) > 1 else ""
        payload = {"chat_id": chat_id, "text": prefix + chunk, "parse_mode": "Markdown"}
        data = json.dumps(payload).encode()
        req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
        with urllib.request.urlopen(req, timeout=30) as resp:
            results.append(json.loads(resp.read().decode()))
    return results
 def find_latest_morning_report_issue(client: GiteaClient) -> Optional[int]:
    try:
        issues = client.list_issues(MORNING_REPORT_REPO, state="open", sort="created", direction="desc", limit=20)
        for issue in issues:
            if "good morning report" in issue.title.lower() or "morning report" in issue.title.lower():
                return issue.number
        # fallback to closed
        issues = client.list_issues(MORNING_REPORT_REPO, state="closed", sort="created", direction="desc", limit=20)
        for issue in issues:
            if "good morning report" in issue.title.lower() or "morning report" in issue.title.lower():
                return issue.number
    except Exception:
        pass
    return None
 def fmt_pct(num: float, den: float) -> str:
    if den == 0:
        return "N/A"
    return f"{num/den:.0%}"
 # ---------------------------------------------------------------------------
 # Analysis
 # ---------------------------------------------------------------------------
 def gather_metrics(client: GiteaClient, since: str) -> dict:
    """Collect overnight metrics from Gitea."""
    metrics = {
        "closed_issues": [],
        "merged_prs": [],
        "closed_prs": [],
        "open_issues": [],
        "max_attempts_issues": [],
        "by_agent": {},
        "by_repo": {},
        "by_type": {},
    }
    for repo in REPOS:
        repo_short = repo.split("/")[1]
        metrics["by_repo"][repo_short] = {
            "closed": 0,
            "merged_prs": 0,
            "closed_prs": 0,
            "open": 0,
            "max_attempts": 0,
            "successes": 0,
            "failures": 0,
        }
        # Closed issues since window
        try:
            closed = client.list_issues(repo, state="closed", since=since, sort="updated", direction="desc", limit=100)
            for issue in closed:
                issue_dict = {
                    "number": issue.number,
                    "title": issue.title,
                    "repo": repo_short,
                    "type": classify_issue_type({"title": issue.title, "body": issue.body, "labels": [{"name": lb.name} for lb in issue.labels]}),
                    "assignee": issue.assignees[0].login if issue.assignees else "unassigned",
                }
                metrics["closed_issues"].append(issue_dict)
                metrics["by_repo"][repo_short]["closed"] += 1
                metrics["by_repo"][repo_short]["successes"] += 1
                agent = issue_dict["assignee"]
                if agent not in metrics["by_agent"]:
                    metrics["by_agent"][agent] = {"successes": 0, "failures": 0, "closed": 0, "repos": set()}
                metrics["by_agent"][agent]["successes"] += 1
                metrics["by_agent"][agent]["closed"] += 1
                metrics["by_agent"][agent]["repos"].add(repo_short)
                t = issue_dict["type"]
                if t not in metrics["by_type"]:
                    metrics["by_type"][t] = {"successes": 0, "failures": 0, "total": 0}
                metrics["by_type"][t]["successes"] += 1
                metrics["by_type"][t]["total"] += 1
        except Exception as exc:
            print(f"Warning: could not load closed issues for {repo}: {exc}", file=sys.stderr)
        # Open issues (for stale / max-attempts detection)
        try:
            open_issues = client.list_issues(repo, state="open", sort="created", direction="desc", limit=100)
            metrics["by_repo"][repo_short]["open"] = len(open_issues)
            for issue in open_issues:
                issue_raw = {
                    "number": issue.number,
                    "title": issue.title,
                    "labels": [{"name": lb.name} for lb in issue.labels],
                    "comments": issue.comments,
                    "created_at": issue.created_at,
                }
                if is_max_attempts_candidate(issue_raw):
                    metrics["max_attempts_issues"].append({
                        "number": issue.number,
                        "title": issue.title,
                        "repo": repo_short,
                        "type": classify_issue_type({"title": issue.title, "body": issue.body, "labels": issue_raw["labels"]}),
                        "assignee": issue.assignees[0].login if issue.assignees else "unassigned",
                    })
                    metrics["by_repo"][repo_short]["max_attempts"] += 1
                    metrics["by_repo"][repo_short]["failures"] += 1
                    agent = issue.assignees[0].login if issue.assignees else "unassigned"
                    if agent not in metrics["by_agent"]:
                        metrics["by_agent"][agent] = {"successes": 0, "failures": 0, "closed": 0, "repos": set()}
                    metrics["by_agent"][agent]["failures"] += 1
                    metrics["by_agent"][agent]["repos"].add(repo_short)
                    t = classify_issue_type({"title": issue.title, "body": issue.body, "labels": issue_raw["labels"]})
                    if t not in metrics["by_type"]:
                        metrics["by_type"][t] = {"successes": 0, "failures": 0, "total": 0}
                    metrics["by_type"][t]["failures"] += 1
                    metrics["by_type"][t]["total"] += 1
        except Exception as exc:
            print(f"Warning: could not load open issues for {repo}: {exc}", file=sys.stderr)
        # PRs merged / closed since window (filter client-side; Gitea PR API ignores since)
        try:
            prs = client.list_pulls(repo, state="closed", sort="updated", limit=100)
            since_dt = datetime.fromisoformat(since.replace("Z", "+00:00"))
            for pr in prs:
                updated = pr.updated_at or pr.created_at or ""
                try:
                    updated_dt = datetime.fromisoformat(updated.replace("Z", "+00:00"))
                    if updated_dt < since_dt:
                        continue
                except Exception:
                    pass
                if pr.merged:
                    metrics["merged_prs"].append({
                        "number": pr.number,
                        "title": pr.title,
                        "repo": repo_short,
                        "user": pr.user.login if pr.user else "unknown",
                    })
                    metrics["by_repo"][repo_short]["merged_prs"] += 1
                else:
                    metrics["closed_prs"].append({
                        "number": pr.number,
                        "title": pr.title,
                        "repo": repo_short,
                        "user": pr.user.login if pr.user else "unknown",
                    })
                    metrics["by_repo"][repo_short]["closed_prs"] += 1
        except Exception as exc:
            print(f"Warning: could not load PRs for {repo}: {exc}", file=sys.stderr)
    # Convert sets to lists for JSON serialization
    for agent in metrics["by_agent"].values():
        agent["repos"] = sorted(agent["repos"])
    return metrics
 def load_workforce_state() -> dict:
    return load_json(WORKFORCE_STATE_PATH) or {}
 def load_fleet_routing() -> list[dict]:
    data = load_json(FLEET_ROUTING_PATH)
    if data and "agents" in data:
        return data["agents"]
    return []
 def generate_suggestion(metrics: dict, fleet: list[dict]) -> str:
    """Generate ONE concrete improvement suggestion based on the data."""
    by_agent = metrics["by_agent"]
    by_repo = metrics["by_repo"]
    by_type = metrics["by_type"]
    max_attempts = metrics["max_attempts_issues"]
    suggestions: list[str] = []
    # 1. Agent with poor repo performance
    for agent, stats in by_agent.items():
        total = stats["successes"] + stats["failures"]
        if total >= 3 and stats["successes"] == 0:
            repos = ", ".join(stats["repos"])
            suggestions.append(
                f"🎯 **{agent}** has a 0% verify rate over the last cycle (0/{total}) on repos: {repos}. "
                f"Consider removing these repos from {agent}'s routing or providing targeted onboarding."
            )
    # 2. Repo with highest failure concentration
    repo_failures = [(r, s) for r, s in by_repo.items() if s["failures"] > 0]
    if repo_failures:
        repo_failures.sort(key=lambda x: x[1]["failures"], reverse=True)
        worst_repo, worst_stats = repo_failures[0]
        total_repo = worst_stats["successes"] + worst_stats["failures"]
        if worst_stats["failures"] >= 2:
            suggestions.append(
                f"🎯 **{worst_repo}** has the most friction ({worst_stats['failures']} blocked/stale issues, "
                f"{fmt_pct(worst_stats['successes'], total_repo)} success). "
                f"Consider splitting issues in {worst_repo} into smaller chunks or assigning a stronger agent."
            )
    # 3. Max-attempts pattern
    if len(max_attempts) >= 3:
        type_counts: dict[str, int] = {}
        for issue in max_attempts:
            type_counts[issue["type"]] = type_counts.get(issue["type"], 0) + 1
        top_type = max(type_counts, key=type_counts.get) if type_counts else "unknown"
        suggestions.append(
            f"🎯 **{len(max_attempts)} issues** hit max-attempts or went stale. "
            f"The dominant type is **{top_type}**. "
            f"Consider adding acceptance criteria templates or pre-flight checklists for {top_type} issues."
        )
    # 4. Issue type disparity
    for t, stats in by_type.items():
        total = stats["total"]
        if total >= 3 and stats["successes"] == 0:
            suggestions.append(
                f"🎯 **{t}** issues have a 0% closure rate ({stats['failures']} stale). "
                f"Consider routing all {t} issues to a specialist agent or creating a dedicated playbook."
            )
    # 5. Fleet routing gap (if fleet data exists)
    active_agents = {a["name"] for a in fleet if a.get("active")}
    assigned_agents = set(by_agent.keys())
    idle_agents = active_agents - assigned_agents - {"unassigned"}
    if len(idle_agents) >= 2:
        suggestions.append(
            f"🎯 **{len(idle_agents)} active agents** have no assignments this cycle: {', '.join(idle_agents)}. "
            f"Consider expanding their repo lists or investigating why they aren't receiving work."
        )
    if suggestions:
        return suggestions[0]
    # Fallback: celebrate or nudge
    total_closed = len(metrics["closed_issues"])
    total_merged = len(metrics["merged_prs"])
    if total_closed >= 5 or total_merged >= 3:
        return (
            f"🎯 Strong cycle: {total_closed} issues closed, {total_merged} PRs merged. "
            f"Next improvement: write down the top 3 patterns that made this cycle successful so we can replicate them."
        )
    return (
        "🎯 Low activity this cycle. Next improvement: ensure at least one agent loop is actively polling "
        "for unassigned issues so work doesn't sit idle."
    )
 def build_report(metrics: dict, suggestion: str, since: str) -> str:
    now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
    period = since[:10]
    lines = [
        f"# 🌀 Kaizen Retro — {now}",
        f"*Period: {period} → now*\n",
        "## Numbers",
        f"- **Issues closed:** {len(metrics['closed_issues'])}",
        f"- **PRs merged:** {len(metrics['merged_prs'])}",
        f"- **PRs closed without merge:** {len(metrics['closed_prs'])}",
        f"- **Max-attempts / stale issues:** {len(metrics['max_attempts_issues'])}",
        "",
        "## By Agent",
    ]
    for agent, stats in sorted(metrics["by_agent"].items(), key=lambda x: x[1]["successes"] + x[1]["failures"], reverse=True):
        total = stats["successes"] + stats["failures"]
        rate = fmt_pct(stats["successes"], total)
        lines.append(f"- **{agent}**: {stats['successes']} closed, {stats['failures']} stale / max-attempts — verify rate {rate}")
    lines.extend(["", "## By Repo"])
    for repo, stats in sorted(metrics["by_repo"].items(), key=lambda x: x[1]["successes"] + x[1]["failures"], reverse=True):
        total = stats["successes"] + stats["failures"]
        if total == 0 and stats["open"] == 0:
            continue
        rate = fmt_pct(stats["successes"], total)
        lines.append(
            f"- **{repo}**: {stats['successes']} closed, {stats['failures']} stale, {stats['open']} open — verify rate {rate}"
        )
    lines.extend(["", "## By Issue Type"])
    for t, stats in sorted(metrics["by_type"].items(), key=lambda x: x[1]["total"], reverse=True):
        total = stats["total"]
        rate = fmt_pct(stats["successes"], total)
        lines.append(f"- **{t}**: {stats['successes']} closed, {stats['failures']} stale — verify rate {rate}")
    if metrics["max_attempts_issues"]:
        lines.extend(["", "## Max-Attempts / Stale Issues"])
        for issue in metrics["max_attempts_issues"][:10]:
            lines.append(f"- {issue['repo']}#{issue['number']} ({issue['type']}, assignee: {issue['assignee']}) — {issue['title']}")
        if len(metrics["max_attempts_issues"]) > 10:
            lines.append(f"- … and {len(metrics['max_attempts_issues']) - 10} more")
    lines.extend(["", "## One Concrete Improvement", suggestion, ""])
    return "\n".join(lines)
 # ---------------------------------------------------------------------------
 # Main
 # ---------------------------------------------------------------------------
 def main() -> int:
    parser = argparse.ArgumentParser(description="Kaizen Retro — automated burn-cycle retrospective")
    parser.add_argument("--dry-run", action="store_true", help="Print report but do not post")
    parser.add_argument("--since", type=str, help="ISO timestamp for lookback window (default: 24h ago)")
    parser.add_argument("--post-to", type=str, help="Override Telegram chat ID")
    args = parser.parse_args()
    since = args.since or iso_day_ago(1)
    client = GiteaClient()
    print("Gathering metrics since", since)
    metrics = gather_metrics(client, since)
    fleet = load_fleet_routing()
    suggestion = generate_suggestion(metrics, fleet)
    report = build_report(metrics, suggestion, since)
    print(report)
    # Save JSON snapshot
    REPORTS_DIR.mkdir(parents=True, exist_ok=True)
    snapshot_path = REPORTS_DIR / f"kaizen-retro-{datetime.now(timezone.utc).strftime('%Y%m%d')}.json"
    snapshot = {
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "since": since,
        "metrics": metrics,
        "suggestion": suggestion,
        "report_markdown": report,
    }
    with open(snapshot_path, "w") as f:
        json.dump(snapshot, f, indent=2)
    print(f"\nSnapshot saved to {snapshot_path}")
    if args.dry_run:
        return 0
    # Post to Telegram
    chat_id = args.post_to or TELEGRAM_CHAT_ID
    bot_token = TELEGRAM_BOT_TOKEN
    if bot_token and chat_id:
        try:
            telegram_send(report, bot_token, chat_id)
            print("Posted to Telegram.")
        except Exception as exc:
            print(f"Failed to post to Telegram: {exc}", file=sys.stderr)
    else:
        print("Telegram not configured (set TELEGRAM_BOT_TOKEN and TELEGRAM_HOME_CHANNEL).", file=sys.stderr)
    # Comment on latest morning report issue
    morning_issue = find_latest_morning_report_issue(client)
    if morning_issue:
        try:
            client.create_comment(MORNING_REPORT_REPO, morning_issue, report)
            print(f"Commented on morning report issue #{morning_issue}.")
        except Exception as exc:
            print(f"Failed to comment on morning report issue: {exc}", file=sys.stderr)
    else:
        print("No morning report issue found to comment on.", file=sys.stderr)
    return 0
 if __name__ == "__main__":
    sys.exit(main())
--- a/tasks.py
+++ b/tasks.py
@@ -1860,22 +1860,56 @@ def good_morning_report():
        except Exception:
            pass
    # Genchi Genbutsu: count verified completions from the last 24h
    verified_completions = 0
    raw_completions = 0
    metrics_dir = Path.home() / ".hermes" / "logs"
    for metrics_file in metrics_dir.glob("*-metrics.jsonl"):
        try:
            with open(metrics_file) as mf:
                for line in mf:
                    line = line.strip()
                    if not line:
                        continue
                    try:
                        row = json.loads(line)
                    except json.JSONDecodeError:
                        continue
                    ts = row.get("ts", "")
                    if not ts:
                        continue
                    try:
                        from datetime import datetime as _dt, timezone as _tz, timedelta as _td
                        row_time = _dt.fromisoformat(ts.replace("Z", "+00:00"))
                        if (now - row_time) > _td(hours=24):
                            continue
                    except Exception:
                        continue
                    if row.get("outcome") != "success":
                        continue
                    raw_completions += 1
                    if row.get("verified") is True:
                        verified_completions += 1
        except Exception:
            pass
    # --- BUILD THE REPORT ---
-    
+
    body = f"""Good morning, Alexander. It's {day_name}.
 ## Overnight Debrief
-**Heartbeat:** {tick_count} ticks logged overnight.
+|**Heartbeat:** {tick_count} ticks logged overnight.
-**Gitea:** {"up all night" if gitea_up else "⚠️ had downtime"}
+|**Gitea:** {"up all night" if gitea_up else "⚠️ had downtime"}
-**Local inference:** {"running steady" if local_inference_up else "⚠️ had downtime"}
+|**Local inference:** {"running steady" if local_inference_up else "⚠️ had downtime"}
-**Model status:** {model_status}
+|**Model status:** {model_status}
-**Models on disk:** {len(models_loaded)} ({', '.join(m for m in models_loaded if 'timmy' in m.lower() or 'hermes' in m.lower()) or 'none with our name'})
+|**Models on disk:** {len(models_loaded)} ({', '.join(m for m in models_loaded if 'timmy' in m.lower() or 'hermes' in m.lower()) or 'none with our name'})
-**Alerts:** {len(alerts)} {'— ' + '; '.join(alerts[-3:]) if alerts else '(clean night)'}
+|**Alerts:** {len(alerts)} {'— ' + '; '.join(alerts[-3:]) if alerts else '(clean night)'}
 {briefing_summary}
 **DPO training pairs staged:** {dpo_count} session files exported
 **Local model smoke test:** {smoke_result}
 **Verified completions (24h):** {verified_completions} {'(Genchi Genbutsu clean)' if verified_completions == raw_completions else f'({raw_completions - verified_completions} raw completions failed verification)'}
 ## Gitea Pulse
@@ -1915,6 +1949,29 @@ That's all. Have a good morning.
        return {"filed": False, "error": str(e)}
 # ── NEW 6b: Kaizen Retro ─────────────────────────────────────────────
@huey.periodic_task(crontab(hour="7", minute="15"))  # 7:15 AM daily, after morning report
 def kaizen_retro():
    """Run the automated burn-cycle retrospective."""
    retro_script = Path(__file__).resolve().parent / "bin" / "kaizen-retro.sh"
    if not retro_script.exists():
        return {"ran": False, "error": "kaizen-retro.sh not found"}
    result = subprocess.run(
        ["bash", str(retro_script)],
        capture_output=True,
        text=True,
        timeout=300,
    )
    return {
        "ran": True,
        "exit_code": result.returncode,
        "stdout": result.stdout[-2000:] if result.stdout else "",
        "stderr": result.stderr[-1000:] if result.stderr else "",
    }
 # ── NEW 7: Repo Watchdog ─────────────────────────────────────────────
@huey.periodic_task(crontab(minute="*/20"))  # every 20 minutes
@@ -2323,7 +2380,38 @@ def velocity_tracking():
        total_open += open_n
        total_closed += closed_n
        results.append({"repo": repo, "open": open_n, "closed": closed_n, "date": today})
-    data = {"date": today, "repos": results, "total_open": total_open, "total_closed": total_closed}
+
    # Genchi Genbutsu: count verified completions from agent metrics
    verified_completions = 0
    raw_completions = 0
    metrics_dir = Path.home() / ".hermes" / "logs"
    for metrics_file in metrics_dir.glob("*-metrics.jsonl"):
        try:
            with open(metrics_file) as mf:
                for line in mf:
                    line = line.strip()
                    if not line:
                        continue
                    try:
                        row = json.loads(line)
                    except json.JSONDecodeError:
                        continue
                    if row.get("outcome") != "success":
                        continue
                    raw_completions += 1
                    if row.get("verified") is True:
                        verified_completions += 1
        except Exception:
            pass
    data = {
        "date": today,
        "repos": results,
        "total_open": total_open,
        "total_closed": total_closed,
        "raw_completions": raw_completions,
        "verified_completions": verified_completions,
    }
    with open(report_file, "w") as f:
        json.dump(data, f, indent=2)
    # Dashboard
@@ -2333,14 +2421,16 @@ def velocity_tracking():
        for r in results:
            f.write(f"| {r['repo'].split('/')[-1]} | {r['open']} | {r['closed']} |\n")
        f.write(f"| **TOTAL** | **{total_open}** | **{total_closed}** |\n\n")
        f.write(f"**Verified completions (Genchi Genbutsu):** {verified_completions}\n")
        f.write(f"**Raw completions:** {raw_completions}\n\n")
        # Trend
        prior = sorted(glob.glob(os.path.join(report_dir, "velocity-*.json")))
        if len(prior) > 1:
-            f.write("## Recent Trend\n\n| Date | Total Open | Total Closed |\n|---|---|---|\n")
+            f.write("## Recent Trend\n\n| Date | Total Open | Total Closed | Verified |\n|---|---|---|---|\n")
            for pf in prior[-10:]:
                pd = json.load(open(pf))
-                f.write(f"| {pd['date']} | {pd['total_open']} | {pd['total_closed']} |\n")
+                f.write(f"| {pd['date']} | {pd['total_open']} | {pd['total_closed']} | {pd.get('verified_completions', '-')} |\n")
-    msg = f"Velocity: {total_open} open, {total_closed} closed ({today})"
+    msg = f"Velocity: {total_open} open, {total_closed} closed, {verified_completions} verified ({today})"
    if len(prior) > 1:
        prev = json.load(open(prior[-2]))
        if total_open > prev["total_open"]:
--- a/tests/test_kaizen_retro.py
+++ b/tests/test_kaizen_retro.py
@@ -0,0 +1,283 @@
 """Tests for the Kaizen Retro burn-cycle retrospective script."""
 from __future__ import annotations
 import importlib.util
 import json
 import sys
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 import pytest
 REPO_ROOT = Path(__file__).parent.parent
 # Load kaizen_retro.py as a module (it lives in scripts/, not a package)
 spec = importlib.util.spec_from_file_location("kaizen_retro", REPO_ROOT / "scripts" / "kaizen_retro.py")
 kr = importlib.util.module_from_spec(spec)
 spec.loader.exec_module(kr)
 # ── classify_issue_type ───────────────────────────────────────────────────
 class TestClassifyIssueType:
    def test_classifies_bug_from_title(self):
        issue = {"title": "Fix crash on startup", "body": "", "labels": []}
        assert kr.classify_issue_type(issue) == "bug"
    def test_classifies_feature_from_label(self):
        issue = {"title": "Add dark mode", "body": "", "labels": [{"name": "enhancement"}]}
        # label "enhancement" doesn't match any keyword directly, but "feature" and "add" are in title
        assert kr.classify_issue_type(issue) == "feature"
    def test_classifies_docs_from_label(self):
        issue = {"title": "Update guide", "body": "", "labels": [{"name": "documentation"}]}
        assert kr.classify_issue_type(issue) == "docs"
    def test_label_match_stronger_than_title(self):
        issue = {"title": "Something random", "body": "", "labels": [{"name": "bug"}]}
        assert kr.classify_issue_type(issue) == "bug"
    def test_kaizen_takes_precedence_with_both_labels(self):
        issue = {"title": "Process improvement", "body": "", "labels": [{"name": "kaizen"}, {"name": "bug"}]}
        # kaizen label gives +3, bug gives +3, tie goes to first seen? kaizen appears first in dict
        assert kr.classify_issue_type(issue) == "kaizen"
    def test_defaults_to_other(self):
        issue = {"title": "Tidy up naming", "body": "No user-facing change", "labels": [{"name": "cleanup"}]}
        assert kr.classify_issue_type(issue) == "other"
 # ── is_max_attempts_candidate ─────────────────────────────────────────────
 class TestIsMaxAttemptsCandidate:
    def test_blocker_label_returns_true(self):
        issue = {"labels": [{"name": "blocked"}], "comments": 0, "created_at": "2026-04-07T00:00:00Z"}
        assert kr.is_max_attempts_candidate(issue) is True
    def test_timeout_label_returns_true(self):
        issue = {"labels": [{"name": "timeout"}], "comments": 0, "created_at": "2026-04-07T00:00:00Z"}
        assert kr.is_max_attempts_candidate(issue) is True
    def test_high_comment_count_returns_true(self):
        issue = {"labels": [], "comments": 5, "created_at": "2026-04-07T00:00:00Z"}
        assert kr.is_max_attempts_candidate(issue) is True
    def test_fresh_issue_with_low_comments_returns_false(self):
        now = datetime.now(timezone.utc)
        issue = {"labels": [], "comments": 2, "created_at": now.isoformat()}
        assert kr.is_max_attempts_candidate(issue) is False
    def test_stale_age_returns_true(self):
        old = datetime.now(timezone.utc) - timedelta(days=10)
        issue = {"labels": [], "comments": 0, "created_at": old.isoformat()}
        assert kr.is_max_attempts_candidate(issue) is True
 # ── fmt_pct ───────────────────────────────────────────────────────────────
 class TestFmtPct:
    def test_basic_percentage(self):
        assert kr.fmt_pct(3, 4) == "75%"
    def test_zero_denominator(self):
        assert kr.fmt_pct(0, 0) == "N/A"
    def test_perfect_rate(self):
        assert kr.fmt_pct(10, 10) == "100%"
 # ── generate_suggestion ───────────────────────────────────────────────────
 class TestGenerateSuggestion:
    def test_agent_zero_success_rate(self):
        metrics = {
            "by_agent": {
                "groq": {"successes": 0, "failures": 5, "closed": 0, "repos": ["timmy-home"]},
            },
            "by_repo": {},
            "by_type": {},
            "max_attempts_issues": [],
            "closed_issues": [],
            "merged_prs": [],
            "closed_prs": [],
        }
        suggestion = kr.generate_suggestion(metrics, [])
        assert "groq" in suggestion
        assert "0%" in suggestion or "verify rate" in suggestion
    def test_repo_with_most_failures(self):
        metrics = {
            "by_agent": {},
            "by_repo": {
                "the-nexus": {"successes": 2, "failures": 5, "closed": 2, "open": 3},
            },
            "by_type": {},
            "max_attempts_issues": [],
            "closed_issues": [],
            "merged_prs": [],
            "closed_prs": [],
        }
        suggestion = kr.generate_suggestion(metrics, [])
        assert "the-nexus" in suggestion
        assert "friction" in suggestion
    def test_max_attempts_pattern(self):
        metrics = {
            "by_agent": {},
            "by_repo": {},
            "by_type": {},
            "max_attempts_issues": [
                {"type": "devops"}, {"type": "devops"}, {"type": "feature"}
            ],
            "closed_issues": [],
            "merged_prs": [],
            "closed_prs": [],
        }
        suggestion = kr.generate_suggestion(metrics, [])
        assert "devops" in suggestion
        assert "max-attempts" in suggestion.lower() or "stale" in suggestion.lower()
    def test_idle_agents(self):
        metrics = {
            "by_agent": {},
            "by_repo": {},
            "by_type": {},
            "max_attempts_issues": [],
            "closed_issues": [],
            "merged_prs": [],
            "closed_prs": [],
        }
        fleet = [{"name": "allegro", "active": True}, {"name": "ezra", "active": True}]
        suggestion = kr.generate_suggestion(metrics, fleet)
        assert "idle" in suggestion.lower() or "no assignments" in suggestion.lower()
    def test_fallback_celebration(self):
        metrics = {
            "by_agent": {},
            "by_repo": {},
            "by_type": {},
            "max_attempts_issues": [],
            "closed_issues": [{}, {}, {}, {}, {}],
            "merged_prs": [{}, {}, {}],
            "closed_prs": [],
        }
        suggestion = kr.generate_suggestion(metrics, [])
        assert "Strong cycle" in suggestion
    def test_fallback_low_activity(self):
        metrics = {
            "by_agent": {},
            "by_repo": {},
            "by_type": {},
            "max_attempts_issues": [],
            "closed_issues": [],
            "merged_prs": [],
            "closed_prs": [],
        }
        suggestion = kr.generate_suggestion(metrics, [])
        assert "Low activity" in suggestion or "idle" in suggestion.lower()
 # ── build_report ──────────────────────────────────────────────────────────
 class TestBuildReport:
    def test_report_contains_numbers_section(self):
        metrics = {
            "closed_issues": [{}, {}],
            "merged_prs": [{}],
            "closed_prs": [],
            "max_attempts_issues": [],
            "by_agent": {"ezra": {"successes": 2, "failures": 0, "repos": ["timmy-config"]}},
            "by_repo": {"timmy-config": {"successes": 2, "failures": 0, "open": 1}},
            "by_type": {"feature": {"successes": 2, "failures": 0, "total": 2}},
        }
        report = kr.build_report(metrics, "Do better.", "2026-04-06T00:00:00+00:00")
        assert "## Numbers" in report
        assert "Issues closed:** 2" in report
        assert "PRs merged:** 1" in report
        assert "## By Agent" in report
        assert "## By Repo" in report
        assert "## By Issue Type" in report
        assert "Do better." in report
    def test_report_skips_empty_repos(self):
        metrics = {
            "closed_issues": [],
            "merged_prs": [],
            "closed_prs": [],
            "max_attempts_issues": [],
            "by_agent": {},
            "by_repo": {"unused-repo": {"successes": 0, "failures": 0, "open": 0}},
            "by_type": {},
        }
        report = kr.build_report(metrics, "Nudge.", "2026-04-06T00:00:00+00:00")
        assert "unused-repo" not in report
    def test_report_truncates_max_attempts(self):
        metrics = {
            "closed_issues": [],
            "merged_prs": [],
            "closed_prs": [],
            "max_attempts_issues": [{"repo": "r", "number": i, "type": "bug", "assignee": "a", "title": f"T{i}"} for i in range(15)],
            "by_agent": {},
            "by_repo": {},
            "by_type": {},
        }
        report = kr.build_report(metrics, "Fix it.", "2026-04-06T00:00:00+00:00")
        assert "and 5 more" in report
 # ── telegram_send ─────────────────────────────────────────────────────────
 class TestTelegramSend:
    def test_short_message_sent_in_one_piece(self):
        with patch("urllib.request.urlopen") as mock_urlopen:
            mock_resp = MagicMock()
            mock_resp.read.return_value = b'{"ok": true}'
            mock_urlopen.return_value.__enter__.return_value = mock_resp
            results = kr.telegram_send("Hello", "fake-token", "123")
            assert len(results) == 1
            assert results[0]["ok"] is True
            # Verify payload
            call_args = mock_urlopen.call_args
            req = call_args[0][0]
            payload = json.loads(req.data.decode())
            assert payload["text"] == "Hello"
            assert payload["chat_id"] == "123"
    def test_long_message_chunked(self):
        big_text = "Line\n" * 2000  # ~10k chars
        with patch("urllib.request.urlopen") as mock_urlopen:
            mock_resp = MagicMock()
            mock_resp.read.return_value = b'{"ok": true}'
            mock_urlopen.return_value.__enter__.return_value = mock_resp
            results = kr.telegram_send(big_text, "fake-token", "123")
            assert len(results) >= 2
            # First chunk should have a part prefix
            req = mock_urlopen.call_args_list[0][0][0]
            payload = json.loads(req.data.decode())
            assert "(part 1" in payload["text"]
 # ── load helpers ──────────────────────────────────────────────────────────
 class TestLoadHelpers:
    def test_load_json_missing_returns_none(self, tmp_path):
        missing = tmp_path / "does_not_exist.json"
        assert kr.load_json(missing) is None
    def test_load_json_valid(self, tmp_path):
        p = tmp_path / "data.json"
        p.write_text('{"a": 1}')
        assert kr.load_json(p) == {"a": 1}
    def test_iso_day_ago_format(self):
        s = kr.iso_day_ago(1)
        # Should be a valid ISO timestamp string
        dt = datetime.fromisoformat(s)
        now = datetime.now(timezone.utc)
        assert now - dt < timedelta(days=2)
Author	SHA1	Message	Date
Alexander Whitestone	6e1decd29b	feat: [MEMPALACE][MP-4] Memory promotion — scratchpad to palace with intent (#371 ) Refs #371 Agent: groq	2026-04-08 03:22:06 -04:00
Alexander Whitestone	458dabfaed	Merge pull request 'feat: MemPalace integration — skill port, retrieval enforcer, wake-up protocol (#367 )' (#374 ) from timmy/mempalace-integration into main Reviewed-on: #374	2026-04-07 21:45:34 +00:00
Alexander Whitestone	f8dabae8eb	feat: MemPalace integration — skill port, retrieval enforcer, wake-up protocol (#367 ) MP-1 (#368): Port PalaceRoom + Mempalace classes with 22 unit tests MP-2 (#369): L0-L5 retrieval order enforcer with recall-query detection MP-5 (#372): Wake-up protocol (300-900 token context), session scratchpad Modules: - mempalace.py: PalaceRoom + Mempalace dataclasses, factory constructors - retrieval_enforcer.py: Layered memory retrieval (identity → palace → scratch → gitea → skills) - wakeup.py: Session wake-up with caching (5min TTL) - scratchpad.py: JSON-based session notes with palace promotion All 65 tests pass. Pure stdlib + graceful degradation for ONNX issues (#373).	2026-04-07 13:15:07 -04:00
Timmy Time	0c950f991c	Merge pull request '[ORCHESTRATOR-4] Evaluate CrewAI for Phase 2 integration' (#361 ) from ezra/issue-358 into main	2026-04-07 16:35:40 +00:00
Timmy Time	c1c3aaa681	Merge pull request 'feat: genchi-genbutsu — verify world state, not log vibes (#348 )' (#360 ) from ezra/issue-348 into main	2026-04-07 16:23:35 +00:00
Timmy Time	d023512858	Merge pull request 'feat: FLEET-003 - Fleet capacity inventory with resource baselines' (#353 ) from timmy/fleet-capacity-inventory into main	2026-04-07 16:23:22 +00:00
Timmy Time	e5e01e36c9	Merge pull request '[KAIZEN] Automated retrospective after every burn cycle (fixes #349 )' (#352 ) from ezra/issue-349 into main	2026-04-07 16:23:17 +00:00
ezra	e5055d269b	feat: genchi-genbutsu — verify world state, not log vibes (#348 ) Implement 現地現物 (Genchi Genbutsu) post-completion verification: - Add bin/genchi-genbutsu.sh performing 5 world-state checks: 1. Branch exists on remote 2. PR exists 3. PR has real file changes (> 0) 4. PR is mergeable 5. Issue has a completion comment from the agent - Wire verification into all agent loops: - bin/claude-loop.sh: call genchi-genbutsu before merge/close - bin/gemini-loop.sh: delegate existing inline checks to genchi-genbutsu - bin/agent-loop.sh: resurrect generic agent loop with genchi-genbutsu wired in - Update metrics JSONL to include 'verified' field for all loops - Update burn monitor (tasks.py velocity_tracking): - Report verified_completion count alongside raw completions - Dashboard shows verified trend history - Update morning report (tasks.py good_morning_report): - Count only verified completions from the last 24h - Surface verification failures in the report body Fixes #348 Refs #345	2026-04-07 16:12:05 +00:00
Alexander Whitestone	277d21aef6	feat: FLEET-007 — Auto-restart agent (self-healing processes) Daemon that monitors key services and restarts them automatically: - Local: hermes-gateway, ollama, codeclaw-heartbeat - Ezra: gitea, nginx, hermes-agent - Allegro hermes-agent - Bezalel: hermes-agent, evennia - Max 3 restart attempts per service per cycle (prevents loops) - 1-hour cooldown after max retries with Telegram escalation - Restart log at ~/.local/timmy/fleet-health/restarts.log - Modes: check now (--status for history, --daemon for continuous) Fixes timmy-home#560	2026-04-07 12:04:33 -04:00
Alexander Whitestone	228e46a330	feat: FLEET-004/005 — Milestone messages and resource tracker FLEET-004: 22 milestone messages across 6 phases + 11 Fibonacci uptime milestones. FLEET-005: Resource tracking system — Capacity/Uptime/Innovation tension model. - Tracks capacity spending and regeneration (2/hr baseline) - Innovation generates only when utilization < 70% (5/hr scaled) - Fibonacci uptime milestone detection (95% through 99.5%) - Phase gate checks (P2: 95% uptime, P3: 95% + 100 innovation, P5: 95% + 500) - CLI: status, regen commands Fixes timmy-home#557 (FLEET-004), #558 (FLEET-005)	2026-04-07 12:03:45 -04:00
Ezra	2e64b160b5	[KAIZEN] Harden retro scheduling, chunking, and tests (#349 ) - Add Kaizen Retro to cron/jobs.json with explicit local model/provider - Add Telegram message chunking for reports approaching the 4096-char limit - Fix classify_issue_type false positives on short substrings (ci in cleanup) - Add 28 unit tests covering classification, max-attempts detection, suggestion generation, report formatting, and Telegram chunking	2026-04-07 15:58:58 +00:00
Alexander Whitestone	67c2927c1a	feat: FLEET-003 — Capacity inventory with resource baselines Full resource audit of all 4 machines (3 VPS + 1 Mac) with: - vCPU, RAM, disk, swap per machine - Key processes sorted by resource usage - Capacity utilization: ~15-20%, Innovation GENERATING - Uptime baseline: Ezra/Allegro/Bezalel 100%, Gitea 95.8% - Fibonacci uptime milestones (5 of 6 REACHED) - Risk assessment (Ezra disk 72%, Bezalel 2GB RAM, Ezra CPU 269%) - Recommendations across all phases Fixes timmy-home#556 (FLEET-003)	2026-04-07 11:58:16 -04:00
Ezra	f18955ea90	[KAIZEN] Implement automated burn-cycle retrospective (fixes #349 ) - Add bin/kaizen-retro.sh entry point and scripts/kaizen_retro.py - Analyze closed issues, merged PRs, and stale/max-attempts issues - Report success rates by agent, repo, and issue type - Generate one concrete improvement suggestion per cycle - Post retro to Telegram and comment on the latest morning report issue - Wire into Huey as kaizen_retro() task at 07:15 daily - Extend gitea_client.py with since param for list_issues and created_at/updated_at fields on PullRequest	2026-04-07 15:57:21 +00:00