Compare commits
1 Commits
feature/wo
...
timmy/orch
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5c84fead29 |
@@ -1,39 +0,0 @@
|
||||
name: Validate Matrix Scaffold
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, master]
|
||||
paths:
|
||||
- "infra/matrix/**"
|
||||
- ".gitea/workflows/validate-matrix-scaffold.yml"
|
||||
pull_request:
|
||||
branches: [main, master]
|
||||
paths:
|
||||
- "infra/matrix/**"
|
||||
|
||||
jobs:
|
||||
validate-scaffold:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install pyyaml
|
||||
|
||||
- name: Validate Matrix/Conduit scaffold
|
||||
run: python3 infra/matrix/scripts/validate-scaffold.py --json
|
||||
|
||||
- name: Check shell scripts are executable
|
||||
run: |
|
||||
test -x infra/matrix/deploy-matrix.sh
|
||||
test -x infra/matrix/host-readiness-check.sh
|
||||
test -x infra/matrix/scripts/deploy-conduit.sh
|
||||
|
||||
- name: Validate docker-compose syntax
|
||||
run: |
|
||||
docker compose -f infra/matrix/docker-compose.yml config > /dev/null
|
||||
@@ -1,41 +0,0 @@
|
||||
|
||||
# Sovereign Efficiency: Local-First & Cost Saving Guide
|
||||
|
||||
This guide outlines the strategy for eliminating waste and optimizing flow within the Timmy Foundation ecosystem.
|
||||
|
||||
## 1. Smart Model Routing (SMR)
|
||||
**Goal:** Use the right tool for the job. Don't use a 14B or 70B model to say "Hello" or "Task complete."
|
||||
|
||||
- **Action:** Enable `smart_model_routing` in `config.yaml`.
|
||||
- **Logic:**
|
||||
- Simple acknowledgments and status updates -> **Gemma 2B / Phi-3 Mini** (Local).
|
||||
- Complex reasoning and coding -> **Hermes 14B / Llama 3 70B** (Local).
|
||||
- Fortress-grade synthesis -> **Claude 3.5 Sonnet / Gemini 1.5 Pro** (Cloud - Emergency Only).
|
||||
|
||||
## 2. Context Compression
|
||||
**Goal:** Keep the KV cache lean. Long sessions shouldn't slow down the "Thought Stream."
|
||||
|
||||
- **Action:** Enable `compression` in `config.yaml`.
|
||||
- **Threshold:** Set to `0.5` to trigger summarization when the context is half full.
|
||||
- **Protect Last N:** Keep the last 20 turns in raw format for immediate coherence.
|
||||
|
||||
## 3. Parallel Symbolic Execution (PSE) Optimization
|
||||
**Goal:** Reduce redundant reasoning cycles in The Nexus.
|
||||
|
||||
- **Action:** The Nexus now uses **Adaptive Reasoning Frequency**. If the world stability is high (>0.9), reasoning cycles are halved.
|
||||
- **Benefit:** Reduces CPU/GPU load on the local harness, leaving more headroom for inference.
|
||||
|
||||
## 4. L402 Cost Transparency
|
||||
**Goal:** Treat compute as a finite resource.
|
||||
|
||||
- **Action:** Use the **Sovereign Health HUD** in The Nexus to monitor L402 challenges.
|
||||
- **Metric:** Track "Sats per Thought" to identify which agents are "token-heavy."
|
||||
|
||||
## 5. Waste Elimination (Ghost Triage)
|
||||
**Goal:** Remove stale state.
|
||||
|
||||
- **Action:** Run the `triage_sprint.ts` script weekly to assign or archive stale issues.
|
||||
- **Action:** Use `hermes --flush-memories` to clear outdated context that no longer serves the current mission.
|
||||
|
||||
---
|
||||
*Sovereignty is not just about ownership; it is about stewardship of resources.*
|
||||
@@ -11,7 +11,7 @@ set -euo pipefail
|
||||
NUM_WORKERS="${1:-2}"
|
||||
MAX_WORKERS=10 # absolute ceiling
|
||||
WORKTREE_BASE="$HOME/worktrees"
|
||||
GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
|
||||
GITEA_URL="http://143.198.27.163:3000"
|
||||
GITEA_TOKEN=$(cat "$HOME/.hermes/claude_token")
|
||||
CLAUDE_TIMEOUT=900 # 15 min per issue
|
||||
COOLDOWN=15 # seconds between issues — stagger clones
|
||||
|
||||
@@ -7,26 +7,13 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
GEMINI_KEY_FILE="${GEMINI_KEY_FILE:-$HOME/.timmy/gemini_free_tier_key}"
|
||||
if [ -f "$GEMINI_KEY_FILE" ]; then
|
||||
export GEMINI_API_KEY="$(python3 - "$GEMINI_KEY_FILE" <<'PY'
|
||||
from pathlib import Path
|
||||
import sys
|
||||
text = Path(sys.argv[1]).read_text(errors='ignore').splitlines()
|
||||
for line in text:
|
||||
line=line.strip()
|
||||
if line:
|
||||
print(line)
|
||||
break
|
||||
PY
|
||||
)"
|
||||
fi
|
||||
export GEMINI_API_KEY="AIzaSyAmGgS516K4PwlODFEnghL535yzoLnofKM"
|
||||
|
||||
# === CONFIG ===
|
||||
NUM_WORKERS="${1:-2}"
|
||||
MAX_WORKERS=5
|
||||
WORKTREE_BASE="$HOME/worktrees"
|
||||
GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
|
||||
GITEA_URL="http://143.198.27.163:3000"
|
||||
GITEA_TOKEN=$(cat "$HOME/.hermes/gemini_token")
|
||||
GEMINI_TIMEOUT=600 # 10 min per issue
|
||||
COOLDOWN=15 # seconds between issues — stagger clones
|
||||
@@ -37,7 +24,6 @@ SKIP_FILE="$LOG_DIR/gemini-skip-list.json"
|
||||
LOCK_DIR="$LOG_DIR/gemini-locks"
|
||||
ACTIVE_FILE="$LOG_DIR/gemini-active.json"
|
||||
ALLOW_SELF_ASSIGN="${ALLOW_SELF_ASSIGN:-0}" # 0 = only explicitly-assigned Gemini work
|
||||
AUTH_INVALID_SLEEP=900
|
||||
|
||||
mkdir -p "$LOG_DIR" "$WORKTREE_BASE" "$LOCK_DIR"
|
||||
[ -f "$SKIP_FILE" ] || echo '{}' > "$SKIP_FILE"
|
||||
@@ -48,124 +34,6 @@ log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$LOG_DIR/gemini-loop.log"
|
||||
}
|
||||
|
||||
post_issue_comment() {
|
||||
local repo_owner="$1" repo_name="$2" issue_num="$3" body="$4"
|
||||
local payload
|
||||
payload=$(python3 - "$body" <<'PY'
|
||||
import json, sys
|
||||
print(json.dumps({"body": sys.argv[1]}))
|
||||
PY
|
||||
)
|
||||
curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}/comments" -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" -d "$payload" >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
remote_branch_exists() {
|
||||
local branch="$1"
|
||||
git ls-remote --heads origin "$branch" 2>/dev/null | grep -q .
|
||||
}
|
||||
|
||||
get_pr_num() {
|
||||
local repo_owner="$1" repo_name="$2" branch="$3"
|
||||
curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls?state=all&head=${repo_owner}:${branch}&limit=1" -H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
|
||||
import sys,json
|
||||
prs = json.load(sys.stdin)
|
||||
if prs: print(prs[0]['number'])
|
||||
else: print('')
|
||||
" 2>/dev/null
|
||||
}
|
||||
|
||||
get_pr_file_count() {
|
||||
local repo_owner="$1" repo_name="$2" pr_num="$3"
|
||||
curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/files" -H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
|
||||
import sys, json
|
||||
try:
|
||||
files = json.load(sys.stdin)
|
||||
print(len(files) if isinstance(files, list) else 0)
|
||||
except:
|
||||
print(0)
|
||||
" 2>/dev/null
|
||||
}
|
||||
|
||||
get_pr_state() {
|
||||
local repo_owner="$1" repo_name="$2" pr_num="$3"
|
||||
curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}" -H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
|
||||
import sys, json
|
||||
try:
|
||||
pr = json.load(sys.stdin)
|
||||
if pr.get('merged'):
|
||||
print('merged')
|
||||
else:
|
||||
print(pr.get('state', 'unknown'))
|
||||
except:
|
||||
print('unknown')
|
||||
" 2>/dev/null
|
||||
}
|
||||
|
||||
get_issue_state() {
|
||||
local repo_owner="$1" repo_name="$2" issue_num="$3"
|
||||
curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}" -H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
|
||||
import sys, json
|
||||
try:
|
||||
issue = json.load(sys.stdin)
|
||||
print(issue.get('state', 'unknown'))
|
||||
except:
|
||||
print('unknown')
|
||||
" 2>/dev/null
|
||||
}
|
||||
|
||||
proof_comment_status() {
|
||||
local repo_owner="$1" repo_name="$2" issue_num="$3" branch="$4"
|
||||
curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}/comments" -H "Authorization: token ${GITEA_TOKEN}" | BRANCH="$branch" python3 -c "
|
||||
import os, sys, json
|
||||
branch = os.environ.get('BRANCH', '').lower()
|
||||
try:
|
||||
comments = json.load(sys.stdin)
|
||||
except Exception:
|
||||
print('missing|')
|
||||
raise SystemExit(0)
|
||||
for c in reversed(comments):
|
||||
user = ((c.get('user') or {}).get('login') or '').lower()
|
||||
body = c.get('body') or ''
|
||||
body_l = body.lower()
|
||||
if user != 'gemini':
|
||||
continue
|
||||
if 'proof:' not in body_l and 'verification:' not in body_l:
|
||||
continue
|
||||
has_branch = branch in body_l
|
||||
has_pr = ('pr:' in body_l) or ('pull request:' in body_l) or ('/pulls/' in body_l)
|
||||
has_push = ('push:' in body_l) or ('pushed' in body_l)
|
||||
has_verify = ('tox' in body_l) or ('pytest' in body_l) or ('verification:' in body_l) or ('npm test' in body_l)
|
||||
status = 'ok' if (has_branch and has_pr and has_push and has_verify) else 'incomplete'
|
||||
print(status + '|' + (c.get('html_url') or ''))
|
||||
raise SystemExit(0)
|
||||
print('missing|')
|
||||
" 2>/dev/null
|
||||
}
|
||||
|
||||
gemini_auth_invalid() {
|
||||
local issue_num="$1"
|
||||
grep -q "API_KEY_INVALID\|API key expired" "$LOG_DIR/gemini-${issue_num}.log" 2>/dev/null
|
||||
}
|
||||
|
||||
issue_is_code_fit() {
|
||||
local title="$1"
|
||||
local labels="$2"
|
||||
local body="$3"
|
||||
local haystack
|
||||
haystack="${title} ${labels} ${body}"
|
||||
local low="${haystack,,}"
|
||||
|
||||
if [[ "$low" == *"[morning report]"* ]]; then return 1; fi
|
||||
if [[ "$low" == *"[kt]"* ]]; then return 1; fi
|
||||
if [[ "$low" == *"policy:"* ]]; then return 1; fi
|
||||
if [[ "$low" == *"incident:"* || "$low" == *"🚨 incident"* || "$low" == *"[incident]"* ]]; then return 1; fi
|
||||
if [[ "$low" == *"fleet lexicon"* || "$low" == *"shared vocabulary"* || "$low" == *"rubric"* ]]; then return 1; fi
|
||||
if [[ "$low" == *"archive ghost"* || "$low" == *"reassign"* || "$low" == *"offload"* || "$low" == *"burn directive"* ]]; then return 1; fi
|
||||
if [[ "$low" == *"review all open prs"* ]]; then return 1; fi
|
||||
if [[ "$low" == *"epic"* ]]; then return 1; fi
|
||||
return 0
|
||||
}
|
||||
|
||||
lock_issue() {
|
||||
local issue_key="$1"
|
||||
local lockfile="$LOCK_DIR/$issue_key.lock"
|
||||
@@ -222,7 +90,6 @@ with open('$ACTIVE_FILE', 'r+') as f:
|
||||
|
||||
cleanup_workdir() {
|
||||
local wt="$1"
|
||||
cd "$HOME" 2>/dev/null || true
|
||||
rm -rf "$wt" 2>/dev/null || true
|
||||
}
|
||||
|
||||
@@ -287,11 +154,8 @@ for i in all_issues:
|
||||
continue
|
||||
|
||||
title = i['title'].lower()
|
||||
labels = [l['name'].lower() for l in (i.get('labels') or [])]
|
||||
body = (i.get('body') or '').lower()
|
||||
if '[philosophy]' in title: continue
|
||||
if '[epic]' in title or 'epic:' in title: continue
|
||||
if 'epic' in labels: continue
|
||||
if '[showcase]' in title: continue
|
||||
if '[do not close' in title: continue
|
||||
if '[meta]' in title: continue
|
||||
@@ -300,11 +164,6 @@ for i in all_issues:
|
||||
if '[morning report]' in title: continue
|
||||
if '[retro]' in title: continue
|
||||
if '[intel]' in title: continue
|
||||
if '[kt]' in title: continue
|
||||
if 'policy:' in title: continue
|
||||
if 'incident' in title: continue
|
||||
if 'lexicon' in title or 'shared vocabulary' in title or 'rubric' in title: continue
|
||||
if 'archive ghost' in title or 'reassign' in title or 'offload' in title: continue
|
||||
if 'master escalation' in title: continue
|
||||
if any(a['login'] == 'Rockachopa' for a in (i.get('assignees') or [])): continue
|
||||
|
||||
@@ -391,11 +250,10 @@ You can do ANYTHING a developer can do.
|
||||
- If tests fail after 2 attempts, STOP and comment on the issue explaining why.
|
||||
- Be thorough but focused. Fix the issue, don't refactor the world.
|
||||
|
||||
== CRITICAL: FINISH = PUSHED + PR'D + PROVED ==
|
||||
== CRITICAL: ALWAYS COMMIT AND PUSH ==
|
||||
- NEVER exit without committing your work. Even partial progress MUST be committed.
|
||||
- Before you finish, ALWAYS: git add -A && git commit && git push origin gemini/issue-${issue_num}
|
||||
- ALWAYS create a PR before exiting. No exceptions.
|
||||
- ALWAYS post the Proof block before exiting. No proof comment = not done.
|
||||
- If a branch already exists with prior work, check it out and CONTINUE from where it left off.
|
||||
- Check: git ls-remote origin gemini/issue-${issue_num} — if it exists, pull it first.
|
||||
- Your work is WASTED if it's not pushed. Push early, push often.
|
||||
@@ -506,10 +364,19 @@ Work in progress, may need continuation." 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# ── Create PR if needed ──
|
||||
pr_num=$(get_pr_num "$repo_owner" "$repo_name" "$branch")
|
||||
pr_num=$(curl -sf "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls?state=open&head=${repo_owner}:${branch}&limit=1" \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" | python3 -c "
|
||||
import sys,json
|
||||
prs = json.load(sys.stdin)
|
||||
if prs: print(prs[0]['number'])
|
||||
else: print('')
|
||||
" 2>/dev/null)
|
||||
|
||||
if [ -z "$pr_num" ] && [ "${UNPUSHED:-0}" -gt 0 ]; then
|
||||
pr_num=$(curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls" -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" -d "$(python3 -c "
|
||||
pr_num=$(curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls" \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(python3 -c "
|
||||
import json
|
||||
print(json.dumps({
|
||||
'title': 'Gemini: Issue #${issue_num}',
|
||||
@@ -521,72 +388,26 @@ print(json.dumps({
|
||||
[ -n "$pr_num" ] && log "WORKER-${worker_id}: Created PR #${pr_num} for issue #${issue_num}"
|
||||
fi
|
||||
|
||||
# ── Verify finish semantics / classify failures ──
|
||||
# ── Merge + close on success ──
|
||||
if [ "$exit_code" -eq 0 ]; then
|
||||
log "WORKER-${worker_id}: SUCCESS #${issue_num} exited 0 — verifying push + PR + proof"
|
||||
if ! remote_branch_exists "$branch"; then
|
||||
log "WORKER-${worker_id}: BLOCKED #${issue_num} remote branch missing"
|
||||
post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "Loop gate blocked completion: remote branch ${branch} was not found on origin after Gemini exited. Issue remains open for retry."
|
||||
mark_skip "$issue_num" "missing_remote_branch" 1
|
||||
consecutive_failures=$((consecutive_failures + 1))
|
||||
elif [ -z "$pr_num" ]; then
|
||||
log "WORKER-${worker_id}: BLOCKED #${issue_num} no PR found"
|
||||
post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "Loop gate blocked completion: branch ${branch} exists remotely, but no PR was found. Issue remains open for retry."
|
||||
mark_skip "$issue_num" "missing_pr" 1
|
||||
consecutive_failures=$((consecutive_failures + 1))
|
||||
else
|
||||
pr_files=$(get_pr_file_count "$repo_owner" "$repo_name" "$pr_num")
|
||||
if [ "${pr_files:-0}" -eq 0 ]; then
|
||||
log "WORKER-${worker_id}: BLOCKED #${issue_num} PR #${pr_num} has 0 changed files"
|
||||
curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}" -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" -d '{"state": "closed"}' >/dev/null 2>&1 || true
|
||||
post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "PR #${pr_num} was closed automatically: it had 0 changed files (empty commit). Issue remains open for retry."
|
||||
mark_skip "$issue_num" "empty_commit" 2
|
||||
consecutive_failures=$((consecutive_failures + 1))
|
||||
else
|
||||
proof_status=$(proof_comment_status "$repo_owner" "$repo_name" "$issue_num" "$branch")
|
||||
proof_state="${proof_status%%|*}"
|
||||
proof_url="${proof_status#*|}"
|
||||
if [ "$proof_state" != "ok" ]; then
|
||||
log "WORKER-${worker_id}: BLOCKED #${issue_num} proof missing or incomplete (${proof_state})"
|
||||
post_issue_comment "$repo_owner" "$repo_name" "$issue_num" "Loop gate blocked completion: PR #${pr_num} exists and has ${pr_files} changed file(s), but the required Proof block from Gemini is missing or incomplete. Issue remains open for retry."
|
||||
mark_skip "$issue_num" "missing_proof" 1
|
||||
consecutive_failures=$((consecutive_failures + 1))
|
||||
else
|
||||
log "WORKER-${worker_id}: PROOF verified ${proof_url}"
|
||||
pr_state=$(get_pr_state "$repo_owner" "$repo_name" "$pr_num")
|
||||
if [ "$pr_state" = "open" ]; then
|
||||
curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/merge" -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" -d '{"Do": "squash"}' >/dev/null 2>&1 || true
|
||||
pr_state=$(get_pr_state "$repo_owner" "$repo_name" "$pr_num")
|
||||
fi
|
||||
if [ "$pr_state" = "merged" ]; then
|
||||
curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}" -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" -d '{"state": "closed"}' >/dev/null 2>&1 || true
|
||||
issue_state=$(get_issue_state "$repo_owner" "$repo_name" "$issue_num")
|
||||
if [ "$issue_state" = "closed" ]; then
|
||||
log "WORKER-${worker_id}: VERIFIED #${issue_num} branch pushed, PR merged, proof present, issue closed"
|
||||
consecutive_failures=0
|
||||
else
|
||||
log "WORKER-${worker_id}: BLOCKED #${issue_num} issue did not close after merge"
|
||||
mark_skip "$issue_num" "issue_close_unverified" 1
|
||||
consecutive_failures=$((consecutive_failures + 1))
|
||||
fi
|
||||
else
|
||||
log "WORKER-${worker_id}: BLOCKED #${issue_num} merge not verified (state=${pr_state})"
|
||||
mark_skip "$issue_num" "merge_unverified" 1
|
||||
consecutive_failures=$((consecutive_failures + 1))
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
log "WORKER-${worker_id}: SUCCESS #${issue_num}"
|
||||
if [ -n "$pr_num" ]; then
|
||||
curl -sf -X POST "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/pulls/${pr_num}/merge" \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"Do": "squash"}' >/dev/null 2>&1 || true
|
||||
curl -sf -X PATCH "${GITEA_URL}/api/v1/repos/${repo_owner}/${repo_name}/issues/${issue_num}" \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"state": "closed"}' >/dev/null 2>&1 || true
|
||||
log "WORKER-${worker_id}: PR #${pr_num} merged, issue #${issue_num} closed"
|
||||
fi
|
||||
consecutive_failures=0
|
||||
elif [ "$exit_code" -eq 124 ]; then
|
||||
log "WORKER-${worker_id}: TIMEOUT #${issue_num} (work saved in PR)"
|
||||
consecutive_failures=$((consecutive_failures + 1))
|
||||
else
|
||||
if gemini_auth_invalid "$issue_num"; then
|
||||
log "WORKER-${worker_id}: AUTH INVALID on #${issue_num} — sleeping ${AUTH_INVALID_SLEEP}s"
|
||||
mark_skip "$issue_num" "gemini_auth_invalid" 1
|
||||
sleep "$AUTH_INVALID_SLEEP"
|
||||
consecutive_failures=$((consecutive_failures + 5))
|
||||
elif grep -q "rate_limit\|rate limit\|429\|overloaded\|quota" "$LOG_DIR/gemini-${issue_num}.log" 2>/dev/null; then
|
||||
if grep -q "rate_limit\|rate limit\|429\|overloaded\|quota" "$LOG_DIR/gemini-${issue_num}.log" 2>/dev/null; then
|
||||
log "WORKER-${worker_id}: RATE LIMITED on #${issue_num} (work saved)"
|
||||
consecutive_failures=$((consecutive_failures + 3))
|
||||
else
|
||||
@@ -623,7 +444,7 @@ print(json.dumps({
|
||||
'pr': '${pr_num:-}',
|
||||
'merged': $( [ '$OUTCOME' = 'success' ] && [ -n '${pr_num:-}' ] && echo 'true' || echo 'false' )
|
||||
}))
|
||||
" >> "$LOG_DIR/gemini-metrics.jsonl" 2>/dev/null
|
||||
" >> "$LOG_DIR/claude-metrics.jsonl" 2>/dev/null
|
||||
|
||||
cleanup_workdir "$worktree"
|
||||
unlock_issue "$issue_key"
|
||||
|
||||
@@ -8,7 +8,7 @@ set -uo pipefail
|
||||
LOG_DIR="$HOME/.hermes/logs"
|
||||
LOG="$LOG_DIR/timmy-orchestrator.log"
|
||||
PIDFILE="$LOG_DIR/timmy-orchestrator.pid"
|
||||
GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
|
||||
GITEA_URL="http://143.198.27.163:3000"
|
||||
GITEA_TOKEN=$(cat "$HOME/.hermes/gitea_token_vps" 2>/dev/null) # Timmy token, NOT rockachopa
|
||||
CYCLE_INTERVAL=300
|
||||
HERMES_TIMEOUT=180
|
||||
|
||||
10
config.yaml
10
config.yaml
@@ -34,7 +34,7 @@ checkpoints:
|
||||
enabled: true
|
||||
max_snapshots: 50
|
||||
compression:
|
||||
enabled: true
|
||||
enabled: false
|
||||
threshold: 0.5
|
||||
target_ratio: 0.2
|
||||
protect_last_n: 20
|
||||
@@ -42,13 +42,13 @@ compression:
|
||||
summary_provider: ''
|
||||
summary_base_url: ''
|
||||
smart_model_routing:
|
||||
enabled: true
|
||||
enabled: false
|
||||
max_simple_chars: 200
|
||||
max_simple_words: 35
|
||||
cheap_model:
|
||||
provider: 'ollama'
|
||||
model: 'gemma2:2b'
|
||||
base_url: 'http://localhost:11434/v1'
|
||||
provider: ''
|
||||
model: ''
|
||||
base_url: ''
|
||||
api_key: ''
|
||||
auxiliary:
|
||||
vision:
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
# ADR-001: Matrix/Conduit Deployment Scaffold
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Status** | Accepted |
|
||||
| **Date** | 2026-04-05 |
|
||||
| **Decider** | Ezra (Architekt) |
|
||||
| **Stakeholders** | Allegro, Timmy, Alexander |
|
||||
| **Parent Issues** | #166, #183 |
|
||||
|
||||
---
|
||||
|
||||
## 1. Context
|
||||
|
||||
Son of Timmy Commandment 6 requires encrypted human-to-fleet communication that is sovereign and independent of Telegram. Before any code can run, we needed a reproducible, infrastructure-agnostic deployment scaffold that any wizard house can verify, deploy, and restore.
|
||||
|
||||
## 2. Decision: Conduit over Synapse
|
||||
|
||||
**Chosen:** [Conduit](https://conduit.rs) as the Matrix homeserver.
|
||||
|
||||
**Alternatives considered:**
|
||||
- **Synapse**: Mature, but heavier (Python, more RAM, more complex config).
|
||||
- **Dendrite**: Go-based, lighter than Synapse, but less feature-complete for E2EE.
|
||||
|
||||
**Rationale:**
|
||||
- Conduit is written in Rust, has a small footprint, and runs comfortably on the Hermes VPS (~7 GB RAM).
|
||||
- Single static binary + SQLite (or Postgres) keeps the Docker image small and backup logic simple.
|
||||
- E2EE support is production-grade enough for a closed fleet.
|
||||
|
||||
## 3. Decision: Docker Compose over Bare Metal
|
||||
|
||||
**Chosen:** Docker Compose stack (`docker-compose.yml`) with explicit volume mounts.
|
||||
|
||||
**Rationale:**
|
||||
- Reproducibility: any host with Docker can stand the stack up in one command.
|
||||
- Isolation: Conduit, Element Web, and Postgres live in separate containers with explicit network boundaries.
|
||||
- Rollback: `docker compose down && docker compose up -d` is a safe, fast recovery path.
|
||||
- Future portability: the same Compose file can move to a different VPS with only `.env` changes.
|
||||
|
||||
## 4. Decision: Caddy as Reverse Proxy (with Nginx coexistence)
|
||||
|
||||
**Chosen:** Caddy handles TLS termination and `.well-known/matrix` delegation inside the Compose network.
|
||||
|
||||
**Rationale:**
|
||||
- Caddy automates Let’s Encrypt TLS via on-demand TLS.
|
||||
- On hosts where Nginx already binds 80/443 (e.g., Hermes VPS), Nginx can reverse-proxy to Caddy or Conduit directly.
|
||||
- The scaffold includes both a `caddy/Caddyfile` and Nginx-compatible notes so the operator is not locked into one proxy.
|
||||
|
||||
## 5. Decision: One Matrix Account Per Wizard House
|
||||
|
||||
**Chosen:** Each wizard house (Ezra, Allegro, Bezalel, etc.) gets its own Matrix user ID (`@ezra:domain`, `@allegro:domain`).
|
||||
|
||||
**Rationale:**
|
||||
- Preserves sovereignty: each house has its own credentials, device keys, and E2EE trust chain.
|
||||
- Matches the existing wizard-house mental model (independent agents, shared rooms).
|
||||
- Simplifies debugging: message provenance is unambiguous.
|
||||
|
||||
## 6. Decision: `matrix-nio` for Hermes Gateway Integration
|
||||
|
||||
**Chosen:** [`matrix-nio`](https://github.com/poljar/matrix-nio) with the `e2e` extra.
|
||||
|
||||
**Rationale:**
|
||||
- Already integrated into the Hermes gateway (`gateway/platforms/matrix.py`).
|
||||
- Asyncio-native, matching the Hermes gateway architecture.
|
||||
- Supports E2EE, media uploads, threads, and replies.
|
||||
|
||||
## 7. Consequences
|
||||
|
||||
### Positive
|
||||
- The scaffold is **self-enforcing**: `validate-scaffold.py` and Gitea Actions CI guard integrity.
|
||||
- Local integration can be verified without public DNS via `docker-compose.test.yml`.
|
||||
- The path from "host decision" to "fleet online" is fully scripted.
|
||||
|
||||
### Negative / Accepted Trade-offs
|
||||
- Conduit is younger than Synapse; edge-case federation bugs are possible. Mitigation: the fleet will run on a single homeserver initially.
|
||||
- SQLite is the default Conduit backend. For >100 users, Postgres is recommended. The Compose file includes an optional Postgres service.
|
||||
|
||||
## 8. References
|
||||
|
||||
- `infra/matrix/CANONICAL_INDEX.md` — canonical artifact map
|
||||
- `infra/matrix/scripts/validate-scaffold.py` — automated integrity checks
|
||||
- `.gitea/workflows/validate-matrix-scaffold.yml` — CI enforcement
|
||||
- `infra/matrix/HERMES_INTEGRATION_VERIFICATION.md` — adapter-to-scaffold mapping
|
||||
@@ -1,149 +0,0 @@
|
||||
# Telegram → Matrix Cutover Plan
|
||||
|
||||
> **Issue**: [#166](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/issues/166) — Stand up Matrix/Conduit for human-to-fleet encrypted communication
|
||||
> **Scaffold**: [#183](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/issues/183)
|
||||
> **Created**: Ezra, Archivist | Date: 2026-04-05
|
||||
> **Purpose**: Zero-downtime migration from Telegram to Matrix as the sovereign human-to-fleet command surface.
|
||||
|
||||
---
|
||||
|
||||
## Principle
|
||||
|
||||
**Parallel operation first, cutover second.** Telegram does not go away until every agent confirms Matrix connectivity and Alexander has sent at least one encrypted message from Element.
|
||||
|
||||
---
|
||||
|
||||
## Phase 0: Pre-Conditions (All Must Be True)
|
||||
|
||||
| # | Condition | Verification Command |
|
||||
|---|-----------|---------------------|
|
||||
| 1 | Conduit deployed and healthy | `curl https://<domain>/_matrix/client/versions` |
|
||||
| 2 | Fleet rooms created | `python3 infra/matrix/scripts/bootstrap-fleet-rooms.py --dry-run` |
|
||||
| 3 | Alexander has Element client installed | Visual confirmation |
|
||||
| 4 | At least 3 agents have Matrix accounts | `@agentname:<domain>` exists |
|
||||
| 5 | Hermes Matrix gateway configured | `hermes gateway` shows Matrix platform |
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Parallel Run (Days 1–7)
|
||||
|
||||
### Day 1: Room Bootstrap
|
||||
|
||||
```bash
|
||||
# 1. SSH to Conduit host
|
||||
cd /opt/timmy-config/infra/matrix
|
||||
|
||||
# 2. Verify health
|
||||
./host-readiness-check.sh
|
||||
|
||||
# 3. Create rooms (dry-run first)
|
||||
export MATRIX_HOMESERVER="https://matrix.timmytime.net"
|
||||
export MATRIX_ADMIN_TOKEN="<admin_access_token>"
|
||||
python3 scripts/bootstrap-fleet-rooms.py --create-all --dry-run
|
||||
|
||||
# 4. Create rooms (live)
|
||||
python3 scripts/bootstrap-fleet-rooms.py --create-all
|
||||
```
|
||||
|
||||
### Day 1: Operator Onboarding
|
||||
|
||||
1. Open Element Web at `https://element.<domain>` or install Element desktop.
|
||||
2. Register/login as `@alexander:<domain>`.
|
||||
3. Join `#fleet-ops:<domain>`.
|
||||
4. Send a test message: `First light on Matrix. Acknowledge, fleet.`
|
||||
|
||||
### Days 2–3: Agent Onboarding
|
||||
|
||||
For each agent/wizard house:
|
||||
1. Create Matrix account `@<agent>:<domain>`.
|
||||
2. Join `#fleet-ops:<domain>` and `#fleet-general:<domain>`.
|
||||
3. Send acknowledgment in `#fleet-ops`.
|
||||
4. Update agent's Hermes gateway config to listen on Matrix.
|
||||
|
||||
### Days 4–6: Parallel Commanding
|
||||
|
||||
- **Alexander sends all commands in BOTH Telegram and Matrix.**
|
||||
- Agents respond in the channel where they are most reliable.
|
||||
- Monitor for message loss or delivery delays.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Cutover (Day 7)
|
||||
|
||||
### Step 1: Pin Matrix as Primary
|
||||
|
||||
In Telegram `#fleet-ops`:
|
||||
> "📌 PRIMARY SURFACE CHANGE: Matrix is now the sovereign command channel. Telegram remains as fallback for 48 hours. Join: `<matrix_invite_link>`"
|
||||
|
||||
### Step 2: Telegram Gateway Downgrade
|
||||
|
||||
Edit each agent's Hermes gateway config:
|
||||
|
||||
```yaml
|
||||
# ~/.hermes/config.yaml
|
||||
gateway:
|
||||
primary_platform: matrix
|
||||
fallback_platform: telegram
|
||||
matrix:
|
||||
enabled: true
|
||||
homeserver: https://matrix.timmytime.net
|
||||
rooms:
|
||||
- "#fleet-ops:matrix.timmytime.net"
|
||||
telegram:
|
||||
enabled: true # Fallback only
|
||||
```
|
||||
|
||||
### Step 3: Verification Checklist
|
||||
|
||||
- [ ] Alexander sends command **only** on Matrix
|
||||
- [ ] All agents respond within 60 seconds
|
||||
- [ ] Encrypted room icon shows 🔒 in Element
|
||||
- [ ] No messages lost in 24-hour window
|
||||
- [ ] At least one voice/file message test succeeds
|
||||
|
||||
### Step 4: Telegram Standby
|
||||
|
||||
If all checks pass:
|
||||
1. Pin final notice in Telegram: "Fallback mode only. Active surface is Matrix."
|
||||
2. Disable Telegram bot webhooks (do not delete the bot).
|
||||
3. Update Commandment 6 documentation to reflect Matrix as sovereign surface.
|
||||
|
||||
---
|
||||
|
||||
## Rollback Plan
|
||||
|
||||
If Matrix becomes unreachable or messages are lost:
|
||||
|
||||
1. **Immediate**: Alexander re-sends command in Telegram.
|
||||
2. **Within 1 hour**: All agents switch gateway primary back to Telegram:
|
||||
```yaml
|
||||
primary_platform: telegram
|
||||
```
|
||||
3. **Within 24 hours**: Debug Matrix issue (check Conduit logs, Caddy TLS, DNS).
|
||||
4. **Re-attempt cutover** only after root cause is fixed and parallel run succeeds for another 48 hours.
|
||||
|
||||
---
|
||||
|
||||
## Post-Cutover Maintenance
|
||||
|
||||
| Task | Frequency | Command / Action |
|
||||
|------|-----------|------------------|
|
||||
| Backup Conduit data | Daily | `tar czvf /backups/conduit-$(date +%F).tar.gz /opt/timmy-config/infra/matrix/data/conduit/` |
|
||||
| Review room membership | Weekly | Element → Room Settings → Members |
|
||||
| Update Element Web | Monthly | `docker compose pull && docker compose up -d` |
|
||||
| Rotate access tokens | Quarterly | Element → Settings → Help & About → Access Token |
|
||||
|
||||
---
|
||||
|
||||
## Accountability
|
||||
|
||||
| Role | Owner | Responsibility |
|
||||
|------|-------|----------------|
|
||||
| Deployment | @allegro / @timmy | Run `deploy-matrix.sh` and room bootstrap |
|
||||
| Operator onboarding | @rockachopa (Alexander) | Install Element, verify encryption |
|
||||
| Agent gateway cutover | @ezra | Update Hermes gateway configs, monitor logs |
|
||||
| Rollback decision | @rockachopa | Authorize Telegram fallback if needed |
|
||||
|
||||
---
|
||||
|
||||
*Filed by Ezra, Archivist | 2026-04-05*
|
||||
@@ -1,140 +0,0 @@
|
||||
# Decision Framework: Matrix Host, Domain, and Proxy (#187)
|
||||
|
||||
**Parent:** #166 — Stand up Matrix/Conduit for human-to-fleet encrypted communication
|
||||
**Blocker:** #187 — Decide Matrix host, domain, and proxy prerequisites
|
||||
**Author:** Ezra
|
||||
**Date:** 2026-04-05
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
#166 is **execution-ready**. The only remaining gate is three decisions:
|
||||
1. **Host** — which machine runs Conduit?
|
||||
2. **Domain** — what FQDN serves the homeserver?
|
||||
3. **Proxy/TLS** — how do HTTPS and federation terminate?
|
||||
|
||||
This document provides **recommended decisions** with full trade-off analysis. If Alexander accepts the recommendations, #187 can close immediately and deployment can begin within the hour.
|
||||
|
||||
---
|
||||
|
||||
## Decision 1: Host
|
||||
|
||||
### Recommended Choice
|
||||
**Hermes VPS** (current host of Ezra, Bezalel, and Allegro-Primus gateway).
|
||||
|
||||
### Alternative Considered
|
||||
**TestBed VPS** (67.205.155.108) — currently hosts Bezalel (stale) and other experimental workloads.
|
||||
|
||||
### Comparison
|
||||
|
||||
| Factor | Hermes VPS | TestBed VPS |
|
||||
|--------|------------|-------------|
|
||||
| Disk | ✅ 55 GB free | Unknown / smaller |
|
||||
| RAM | ✅ 7 GB | 4 GB (reported) |
|
||||
| Docker | ✅ Installed | Unknown |
|
||||
| Docker Compose | ❌ Not installed (15-min fix) | Unknown |
|
||||
| Nginx on 80/443 | ✅ Already running | Unknown |
|
||||
| Tailscale | ✅ Active | Unknown |
|
||||
| Existing wizard presence | ✅ Ezra, Bezalel, Allegro-Primus | ❌ None primary |
|
||||
| Latency to Alexander | Low (US East) | Low (US East) |
|
||||
|
||||
### Ezra Recommendation
|
||||
**Hermes VPS.** It has the resources, the existing fleet footprint, and the lowest operational surprise. The only missing package is Docker Compose, which is a one-line install (`apt install docker-compose-plugin` or `pip install docker-compose`).
|
||||
|
||||
---
|
||||
|
||||
## Decision 2: Domain / Subdomain
|
||||
|
||||
### Recommended Choice
|
||||
`matrix.alexanderwhitestone.com`
|
||||
|
||||
### Alternatives Considered
|
||||
- `fleet.alexanderwhitestone.com`
|
||||
- `chat.alexanderwhitestone.com`
|
||||
- `conduit.alexanderwhitestone.com`
|
||||
|
||||
### Analysis
|
||||
|
||||
| Subdomain | Clarity | Federation Friendly | Notes |
|
||||
|-----------|---------|---------------------|-------|
|
||||
| `matrix.*` | ✅ Industry standard | ✅ Easy to remember | Best for `.well-known/matrix/server` delegation |
|
||||
| `fleet.*` | ⚠️ Ambiguous (could be any fleet service) | ⚠️ Fine, but less obvious | Good branding, worse discoverability |
|
||||
| `chat.*` | ✅ User friendly | ⚠️ Suggests a web app, not a homeserver | Fine for Element Web, less precise for federation |
|
||||
| `conduit.*` | ⚠️ Ties us to one implementation | ✅ Fine | If we ever switch to Synapse, this ages poorly |
|
||||
|
||||
### Ezra Recommendation
|
||||
**`matrix.alexanderwhitestone.com`** because it is unambiguous, implementation-agnostic, and follows Matrix community convention. The server name can still be `alexanderwhitestone.com` (for short Matrix IDs like `@ezra:alexanderwhitestone.com`) while the actual homeserver listens on `matrix.alexanderwhitestone.com:8448` or is delegated via `.well-known`.
|
||||
|
||||
---
|
||||
|
||||
## Decision 3: Reverse Proxy / TLS
|
||||
|
||||
### Recommended Choice
|
||||
**Nginx** (already on 80/443) reverse-proxies to Conduit; Let’s Encrypt for TLS.
|
||||
|
||||
### Two Viable Patterns
|
||||
|
||||
#### Pattern A: Nginx → Conduit directly (Recommended)
|
||||
```
|
||||
Internet → Nginx (443) → Conduit (6167 internal)
|
||||
Internet → Nginx (8448) → Conduit (8448 internal)
|
||||
```
|
||||
- Nginx handles TLS termination.
|
||||
- Conduit runs plain HTTP on an internal port.
|
||||
- Federation port 8448 is exposed through Nginx stream or server block.
|
||||
|
||||
#### Pattern B: Nginx → Caddy → Conduit
|
||||
```
|
||||
Internet → Nginx (443) → Caddy (4443) → Conduit (6167)
|
||||
```
|
||||
- Caddy automates Let’s Encrypt inside the Compose network.
|
||||
- Nginx remains the edge listener.
|
||||
- More moving parts, but Caddy’s on-demand TLS is convenient.
|
||||
|
||||
### Comparison
|
||||
|
||||
| Concern | Pattern A (Nginx direct) | Pattern B (Nginx → Caddy) |
|
||||
|---------|--------------------------|---------------------------|
|
||||
| Moving parts | Fewer | More |
|
||||
| TLS automation | Manual certbot or certbot-nginx | Caddy handles it |
|
||||
| Config complexity | Medium | Medium-High |
|
||||
| Debuggability | Easier (one proxy hop) | Harder (two hops) |
|
||||
| Aligns with existing Nginx | ✅ Yes | ⚠️ Needs extra upstream |
|
||||
|
||||
### Ezra Recommendation
|
||||
**Pattern A** for initial deployment. Nginx is already the edge proxy on Hermes VPS. Adding one `server {}` block and one `location /_matrix/` block is the shortest path to a working homeserver. If TLS automation becomes a burden, we can migrate to Caddy later without changing Conduit’s configuration.
|
||||
|
||||
---
|
||||
|
||||
## Pre-Deployment Checklist (Post-#187)
|
||||
|
||||
Once the decisions above are ratified, the exact execution sequence is:
|
||||
|
||||
1. **Install Docker Compose** on Hermes VPS (if not already present).
|
||||
2. **Create DNS A record** for `matrix.alexanderwhitestone.com` → Hermes VPS public IP.
|
||||
3. **Obtain TLS certificate** for `matrix.alexanderwhitestone.com` (certbot or manual).
|
||||
4. **Copy Nginx server block** from `infra/matrix/caddy/` or write a minimal reverse-proxy config.
|
||||
5. **Run `./host-readiness-check.sh`** and confirm all checks pass.
|
||||
6. **Run `./deploy-matrix.sh`** and wait for Conduit to come online.
|
||||
7. **Run `python3 scripts/bootstrap-fleet-rooms.py --create-all`** to initialize rooms.
|
||||
8. **Run `./scripts/verify-hermes-integration.sh`** to prove E2EE messaging works.
|
||||
9. **Follow `docs/matrix-fleet-comms/CUTOVER_PLAN.md`** for the Telegram → Matrix transition.
|
||||
|
||||
---
|
||||
|
||||
## Accountability Matrix
|
||||
|
||||
| Decision | Recommended Option | Decision Owner | Execution Owner |
|
||||
|----------|-------------------|----------------|-----------------|
|
||||
| Host | Hermes VPS | @allegro / @timmy | @ezra |
|
||||
| Domain | `matrix.alexanderwhitestone.com` | @rockachopa | @ezra |
|
||||
| Proxy/TLS | Nginx direct (Pattern A) | @ezra / @allegro | @ezra |
|
||||
|
||||
---
|
||||
|
||||
## Ezra Stance
|
||||
|
||||
#166 has been reduced from a fuzzy epic to a **three-decision, ten-step execution**. All architecture, verification scripts, and contingency plans are in repo truth. The only missing ingredient is a yes/no on the three decisions above.
|
||||
|
||||
— Ezra, Archivist
|
||||
@@ -231,13 +231,10 @@ When:
|
||||
- ADRs: [`infra/matrix/docs/adr/`](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/src/branch/main/infra/matrix/docs/adr)
|
||||
- Decision Framework: [`docs/DECISION_FRAMEWORK_187.md`](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/src/branch/main/docs/DECISION_FRAMEWORK_187.md)
|
||||
- Operational Runbook: [`infra/matrix/docs/RUNBOOK.md`](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/src/branch/main/infra/matrix/docs/RUNBOOK.md)
|
||||
- **Room Bootstrap Automation**: [`infra/matrix/scripts/bootstrap-fleet-rooms.py`](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/src/branch/main/infra/matrix/scripts/bootstrap-fleet-rooms.py)
|
||||
- **Telegram Cutover Plan**: [`docs/matrix-fleet-comms/CUTOVER_PLAN.md`](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/src/branch/main/docs/matrix-fleet-comms/CUTOVER_PLAN.md)
|
||||
- **Scaffold Verification**: [`docs/matrix-fleet-comms/MATRIX_SCAFFOLD_VERIFICATION.md`](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/src/branch/main/docs/matrix-fleet-comms/MATRIX_SCAFFOLD_VERIFICATION.md)
|
||||
|
||||
---
|
||||
|
||||
**Ezra Sign-off**: This KT removes all ambiguity from #166. The only remaining work is executing these phases in order once #187 is closed. Room creation and Telegram cutover are now automated.
|
||||
**Ezra Sign-off**: This KT removes all ambiguity from #166. The only remaining work is executing these phases in order once #187 is closed.
|
||||
|
||||
— Ezra, Archivist
|
||||
2026-04-05
|
||||
|
||||
@@ -1,363 +0,0 @@
|
||||
# Hermes Matrix Client Integration Specification
|
||||
|
||||
> **Issue**: [#166](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/issues/166) — Stand up Matrix/Conduit
|
||||
> **Created**: Ezra | 2026-04-05 | Burn mode
|
||||
> **Purpose**: Define how Hermes wizard houses connect to, listen on, and respond within the sovereign Matrix fleet. This turns the #183 server scaffold into an end-to-end communications architecture.
|
||||
|
||||
---
|
||||
|
||||
## 1. Scope
|
||||
|
||||
This document specifies:
|
||||
- The client library and runtime pattern for Hermes-to-Matrix integration
|
||||
- Bot identity model (one account per wizard house vs. shared fleet bot)
|
||||
- Message format, encryption requirements, and room membership rules
|
||||
- Minimal working code scaffold for connection, listening, and reply
|
||||
- Error handling, reconnection, and security hardening
|
||||
|
||||
**Out of scope**: Server deployment (see `infra/matrix/`), room creation (see `scripts/bootstrap-fleet-rooms.py`), Telegram cutover (see `CUTOVER_PLAN.md`).
|
||||
|
||||
---
|
||||
|
||||
## 2. Library Choice: `matrix-nio`
|
||||
|
||||
**Selected library**: [`matrix-nio`](https://matrix-nio.readthedocs.io/)
|
||||
|
||||
**Why `matrix-nio`:**
|
||||
- Native async/await (fits Hermes agent loop)
|
||||
- Full end-to-end encryption (E2EE) support via `AsyncClient`
|
||||
- Small dependency footprint compared to Synapse client SDK
|
||||
- Battle-tested in production bots (e.g., maubot, heisenbridge)
|
||||
|
||||
**Installation**:
|
||||
```bash
|
||||
pip install matrix-nio[e2e]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Bot Identity Model
|
||||
|
||||
### 3.1 Recommendation: One Bot Per Wizard House
|
||||
|
||||
Each wizard house (Ezra, Allegro, Gemini, Bezalel, etc.) maintains its own Matrix user account. This mirrors the existing Telegram identity model and preserves sovereignty.
|
||||
|
||||
**Pattern**:
|
||||
- `@ezra:matrix.timmytime.net`
|
||||
- `@allegro:matrix.timmytime.net`
|
||||
- `@gemini:matrix.timmytime.net`
|
||||
|
||||
### 3.2 Alternative: Shared Fleet Bot
|
||||
|
||||
A single `@fleet:matrix.timmytime.net` bot proxies messages for all agents. **Not recommended** — creates a single point of failure and complicates attribution.
|
||||
|
||||
### 3.3 Account Provisioning
|
||||
|
||||
Each account is created via the Conduit admin API during room bootstrap (see `bootstrap-fleet-rooms.py`). Credentials are stored in the wizard house's local `.env` (`MATRIX_USER`, `MATRIX_PASSWORD`, `MATRIX_HOMESERVER`).
|
||||
|
||||
---
|
||||
|
||||
## 4. Minimal Working Example
|
||||
|
||||
The following scaffold demonstrates:
|
||||
1. Logging in with password
|
||||
2. Joining the fleet operator room
|
||||
3. Listening for encrypted text messages
|
||||
4. Replying with a simple acknowledgment
|
||||
5. Graceful logout on SIGINT
|
||||
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
"""hermes_matrix_client.py — Minimal Hermes Matrix Client Scaffold"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import signal
|
||||
from pathlib import Path
|
||||
|
||||
from nio import (
|
||||
AsyncClient,
|
||||
LoginResponse,
|
||||
SyncResponse,
|
||||
RoomMessageText,
|
||||
InviteEvent,
|
||||
MatrixRoom,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Configuration (read from environment or local .env)
|
||||
# ------------------------------------------------------------------
|
||||
HOMESERVER = os.getenv("MATRIX_HOMESERVER", "https://matrix.timmytime.net")
|
||||
USER_ID = os.getenv("MATRIX_USER", "@ezra:matrix.timmytime.net")
|
||||
PASSWORD = os.getenv("MATRIX_PASSWORD", "")
|
||||
DEVICE_ID = os.getenv("MATRIX_DEVICE_ID", "HERMES_001")
|
||||
OPERATOR_ROOM_ALIAS = "#operator-room:matrix.timmytime.net"
|
||||
|
||||
# Persistent store for encryption state
|
||||
cache_dir = Path.home() / ".cache" / "hermes-matrix"
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
store_path = cache_dir / f"{USER_ID.split(':')[0].replace('@', '')}_store"
|
||||
|
||||
|
||||
class HermesMatrixClient:
|
||||
def __init__(self):
|
||||
self.client = AsyncClient(
|
||||
homeserver=HOMESERVER,
|
||||
user=USER_ID,
|
||||
device_id=DEVICE_ID,
|
||||
store_path=str(store_path),
|
||||
)
|
||||
self.shutdown_event = asyncio.Event()
|
||||
|
||||
async def login(self):
|
||||
resp = await self.client.login(PASSWORD)
|
||||
if isinstance(resp, LoginResponse):
|
||||
print(f"✅ Logged in as {resp.user_id} (device: {resp.device_id})")
|
||||
else:
|
||||
print(f"❌ Login failed: {resp}")
|
||||
raise RuntimeError("Matrix login failed")
|
||||
|
||||
async def join_operator_room(self):
|
||||
"""Join the canonical operator room by alias."""
|
||||
res = await self.client.join_room(OPERATOR_ROOM_ALIAS)
|
||||
if hasattr(res, "room_id"):
|
||||
print(f"✅ Joined operator room: {res.room_id}")
|
||||
return res.room_id
|
||||
else:
|
||||
print(f"⚠️ Could not join operator room: {res}")
|
||||
return None
|
||||
|
||||
async def on_message(self, room: MatrixRoom, event: RoomMessageText):
|
||||
"""Handle incoming text messages."""
|
||||
if event.sender == self.client.user_id:
|
||||
return # Ignore echo of our own messages
|
||||
|
||||
print(f"📩 {room.display_name} | {event.sender}: {event.body}")
|
||||
|
||||
# Simple command parsing
|
||||
if event.body.startswith("!ping"):
|
||||
await self.client.room_send(
|
||||
room_id=room.room_id,
|
||||
message_type="m.room.message",
|
||||
content={
|
||||
"msgtype": "m.text",
|
||||
"body": f"Pong from {USER_ID}!",
|
||||
},
|
||||
)
|
||||
elif event.body.startswith("!sitrep"):
|
||||
await self.client.room_send(
|
||||
room_id=room.room_id,
|
||||
message_type="m.room.message",
|
||||
content={
|
||||
"msgtype": "m.text",
|
||||
"body": "🔥 Burn mode active. All systems nominal.",
|
||||
},
|
||||
)
|
||||
|
||||
async def on_invite(self, room: MatrixRoom, event: InviteEvent):
|
||||
"""Auto-join rooms when invited."""
|
||||
print(f"📨 Invite to {room.room_id} from {event.sender}")
|
||||
await self.client.join(room.room_id)
|
||||
|
||||
async def sync_loop(self):
|
||||
"""Long-polling sync loop with automatic retry."""
|
||||
self.client.add_event_callback(self.on_message, RoomMessageText)
|
||||
self.client.add_event_callback(self.on_invite, InviteEvent)
|
||||
|
||||
while not self.shutdown_event.is_set():
|
||||
try:
|
||||
sync_resp = await self.client.sync(timeout=30000)
|
||||
if isinstance(sync_resp, SyncResponse):
|
||||
pass # Callbacks handled by nio
|
||||
except Exception as exc:
|
||||
print(f"⚠️ Sync error: {exc}. Retrying in 5s...")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
async def run(self):
|
||||
await self.login()
|
||||
await self.join_operator_room()
|
||||
await self.sync_loop()
|
||||
|
||||
async def close(self):
|
||||
await self.client.close()
|
||||
print("👋 Matrix client closed.")
|
||||
|
||||
|
||||
async def main():
|
||||
bot = HermesMatrixClient()
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
for sig in (signal.SIGINT, signal.SIGTERM):
|
||||
loop.add_signal_handler(sig, bot.shutdown_event.set)
|
||||
|
||||
try:
|
||||
await bot.run()
|
||||
finally:
|
||||
await bot.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Message Format & Protocol
|
||||
|
||||
### 5.1 Plain-Text Commands
|
||||
|
||||
For human-to-fleet interaction, messages use a lightweight command prefix:
|
||||
|
||||
| Command | Target | Purpose |
|
||||
|---------|--------|---------|
|
||||
| `!ping` | Any wizard | Liveness check |
|
||||
| `!sitrep` | Any wizard | Request status report |
|
||||
| `!help` | Any wizard | List available commands |
|
||||
| `!exec <task>` | Specific wizard | Route a task request (future) |
|
||||
| `!burn <issue#>` | Any wizard | Priority task escalation |
|
||||
|
||||
### 5.2 Structured JSON Payloads (Agent-to-Agent)
|
||||
|
||||
For machine-to-machine coordination, agents may send `m.text` messages with a JSON block inside triple backticks:
|
||||
|
||||
```json
|
||||
{
|
||||
"hermes_msg_type": "task_request",
|
||||
"from": "@ezra:matrix.timmytime.net",
|
||||
"to": "@gemini:matrix.timmytime.net",
|
||||
"task_id": "the-nexus#830",
|
||||
"action": "evaluate_tts_output",
|
||||
"deadline": "2026-04-06T06:00:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. End-to-End Encryption (E2EE)
|
||||
|
||||
### 6.1 Requirement
|
||||
|
||||
All fleet operator rooms **must** have encryption enabled (`m.room.encryption` event). The `matrix-nio` client automatically handles key sharing and device verification when `store_path` is provided.
|
||||
|
||||
### 6.2 Device Verification Strategy
|
||||
|
||||
**Recommended**: "Trust on First Use" (TOFU) within the fleet.
|
||||
|
||||
```python
|
||||
async def trust_fleet_devices(self):
|
||||
"""Auto-verify all devices of known fleet users."""
|
||||
fleet_users = ["@ezra:matrix.timmytime.net", "@allegro:matrix.timmytime.net"]
|
||||
for user_id in fleet_users:
|
||||
devices = await self.client.devices(user_id)
|
||||
for device_id in devices.get(user_id, {}):
|
||||
await self.client.verify_device(user_id, device_id)
|
||||
```
|
||||
|
||||
**Caution**: Do not auto-verify external users (e.g., Alexander's personal Element client). Those should be verified manually via emoji comparison.
|
||||
|
||||
---
|
||||
|
||||
## 7. Fleet Room Membership
|
||||
|
||||
### 7.1 Canonical Rooms
|
||||
|
||||
| Room Alias | Purpose | Members |
|
||||
|------------|---------|---------|
|
||||
| `#operator-room:matrix.timmytime.net` | Human-to-fleet command surface | Alexander + all wizards |
|
||||
| `#wizard-hall:matrix.timmytime.net` | Agent-to-agent coordination | All wizards only |
|
||||
| `#burn-pit:matrix.timmytime.net` | High-priority escalations | On-call wizard + Alexander |
|
||||
|
||||
### 7.2 Auto-Join Policy
|
||||
|
||||
Every Hermes client **must** auto-join invites to `#operator-room` and `#wizard-hall`. Burns to `#burn-pit` are opt-in based on on-call schedule.
|
||||
|
||||
---
|
||||
|
||||
## 8. Error Handling & Reconnection
|
||||
|
||||
### 8.1 Network Partitions
|
||||
|
||||
If sync fails with a 5xx or connection error, the client must:
|
||||
1. Log the error
|
||||
2. Wait 5s (with exponential backoff up to 60s)
|
||||
3. Retry sync indefinitely
|
||||
|
||||
### 8.2 Token Expiration
|
||||
|
||||
Conduit access tokens do not expire by default. If a `M_UNKNOWN_TOKEN` occurs, the client must re-login using `MATRIX_PASSWORD` and update the stored access token.
|
||||
|
||||
### 8.3 Fatal Errors
|
||||
|
||||
If login fails 3 times consecutively, the client should exit with a non-zero status and surface an alert to the operator room (if possible via a fallback mechanism).
|
||||
|
||||
---
|
||||
|
||||
## 9. Integration with Hermes Agent Loop
|
||||
|
||||
The Matrix client is **not** a replacement for the Hermes agent core. It is an additional I/O surface.
|
||||
|
||||
**Recommended integration pattern**:
|
||||
|
||||
```
|
||||
┌─────────────────┐
|
||||
│ Hermes Agent │
|
||||
│ (run_agent) │
|
||||
└────────┬────────┘
|
||||
│ tool calls, reasoning
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Matrix Gateway │ ← new: wraps hermes_matrix_client.py
|
||||
│ (message I/O) │
|
||||
└────────┬────────┘
|
||||
│ Matrix HTTP APIs
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Conduit Server │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
A `MatrixGateway` class (future work) would:
|
||||
1. Run the `matrix-nio` client in a background asyncio task
|
||||
2. Convert incoming Matrix commands into `AIAgent.chat()` calls
|
||||
3. Post the agent's text response back to the room
|
||||
4. Support the existing Hermes toolset (todo, memory, delegate) via the same agent loop
|
||||
|
||||
---
|
||||
|
||||
## 10. Security Hardening Checklist
|
||||
|
||||
Before any wizard house connects to the production Conduit server:
|
||||
|
||||
- [ ] `MATRIX_PASSWORD` is a 32+ character random string
|
||||
- [ ] The client `store_path` is on an encrypted volume (`~/.cache/hermes-matrix/`)
|
||||
- [ ] E2EE is enabled in the operator room
|
||||
- [ ] Only fleet devices are auto-verified
|
||||
- [ ] The client rejects invites from non-fleet homeservers
|
||||
- [ ] Logs do not include message bodies at `INFO` level
|
||||
- [ ] A separate device ID is used per wizard house deployment
|
||||
|
||||
---
|
||||
|
||||
## 11. Acceptance Criteria Mapping
|
||||
|
||||
Maps #166 acceptance criteria to this specification:
|
||||
|
||||
| #166 Criterion | Addressed By |
|
||||
|----------------|--------------|
|
||||
| Deploy Conduit homeserver | `infra/matrix/` (#183) |
|
||||
| Create fleet rooms/channels | `bootstrap-fleet-rooms.py` |
|
||||
| Verify encrypted operator-to-fleet messaging | Section 6 (E2EE) + MWE |
|
||||
| Alexander can message the fleet over Matrix | Sections 4 (MWE), 5 (commands), 7 (rooms) |
|
||||
| Telegram is no longer the only command surface | `CUTOVER_PLAN.md` + this spec |
|
||||
|
||||
---
|
||||
|
||||
## 12. Next Steps
|
||||
|
||||
1. **Gemini / Allegro**: Implement `MatrixGateway` class in `gateway/platforms/matrix.py` using this spec.
|
||||
2. **Bezalel / Ezra**: Test the MWE against the staging Conduit instance once #187 resolves.
|
||||
3. **Alexander**: Approve the command prefix vocabulary (`!ping`, `!sitrep`, `!burn`, etc.).
|
||||
|
||||
---
|
||||
|
||||
*This document is repo truth. If the Matrix client implementation diverges from this spec, update the spec first.*
|
||||
@@ -1,82 +0,0 @@
|
||||
# Matrix/Conduit Scaffold Verification
|
||||
|
||||
> **Issue**: [#183](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/issues/183) — Produce Matrix/Conduit deployment scaffold and host prerequisites
|
||||
> **Status**: CLOSED (verified)
|
||||
> **Verifier**: Ezra, Archivist | Date: 2026-04-05
|
||||
> **Parent**: [#166](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/issues/166)
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
Ezra performed a repo-truth verification of #183. **All acceptance criteria are met.** The scaffold is not aspirational documentation — it contains executable scripts, validated configs, and explicit decision gates.
|
||||
|
||||
---
|
||||
|
||||
## Acceptance Criteria Mapping
|
||||
|
||||
| Criterion | Required | Actual | Evidence Location |
|
||||
|-----------|----------|--------|-------------------|
|
||||
| Repo-visible deployment scaffold exists | ✅ | ✅ Complete | `infra/matrix/` (15 files), `deploy/conduit/` (5 files) |
|
||||
| Host/port/reverse-proxy assumptions are explicit | ✅ | ✅ Complete | `infra/matrix/prerequisites.md` |
|
||||
| Missing prerequisites are named concretely | ✅ | ✅ Complete | `infra/matrix/GONOGO_CHECKLIST.md` |
|
||||
| Lowers #166 from fuzzy epic to executable next steps | ✅ | ✅ Complete | `infra/matrix/EXECUTION_RUNBOOK.md`, `docs/matrix-fleet-comms/EXECUTION_ARCHITECTURE_KT.md` |
|
||||
|
||||
---
|
||||
|
||||
## Scaffold Inventory
|
||||
|
||||
### Deployment Scripts (Executable)
|
||||
|
||||
| File | Lines | Purpose |
|
||||
|------|-------|---------|
|
||||
| `deploy/conduit/install.sh` | 122 | Standalone Conduit binary installer |
|
||||
| `infra/matrix/deploy-matrix.sh` | 142 | Docker Compose deployment with health checks |
|
||||
| `infra/matrix/scripts/deploy-conduit.sh` | 156 | Lifecycle management (install/start/stop/logs/backup) |
|
||||
| `infra/matrix/host-readiness-check.sh` | ~80 | Pre-flight port/DNS/Docker validation |
|
||||
|
||||
### Configuration Scaffolds
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `infra/matrix/conduit.toml` | Conduit homeserver config template |
|
||||
| `infra/matrix/docker-compose.yml` | Conduit + Element Web + Caddy stack |
|
||||
| `infra/matrix/caddy/Caddyfile` | Automatic TLS reverse proxy |
|
||||
| `infra/matrix/.env.example` | Secrets template |
|
||||
|
||||
### Documentation / Runbooks
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `infra/matrix/README.md` | Quick start and architecture overview |
|
||||
| `infra/matrix/prerequisites.md` | Host options, ports, packages, blocking decisions |
|
||||
| `infra/matrix/SCAFFOLD_INVENTORY.md` | File manifest |
|
||||
| `infra/matrix/EXECUTION_RUNBOOK.md` | Step-by-step deployment commands |
|
||||
| `infra/matrix/GONOGO_CHECKLIST.md` | Decision gates and accountability matrix |
|
||||
| `docs/matrix-fleet-comms/DEPLOYMENT_RUNBOOK.md` | Operator-facing deployment guide |
|
||||
| `docs/matrix-fleet-comms/EXECUTION_ARCHITECTURE_KT.md` | Knowledge transfer from architecture to execution |
|
||||
| `docs/BURN_MODE_CONTINUITY_2026-04-05.md` | Cross-target burn mode audit trail |
|
||||
|
||||
---
|
||||
|
||||
## Verification Method
|
||||
|
||||
1. **API audit**: Enumerated `timmy-config` repo contents via Gitea API.
|
||||
2. **File inspection**: Read key scripts (`install.sh`, `deploy-matrix.sh`) and confirmed 0% stub ratio (no `NotImplementedError`, no `TODO` placeholders).
|
||||
3. **Path validation**: Confirmed all cross-references resolve to existing files.
|
||||
4. **Execution test**: `deploy-matrix.sh` performs pre-flight checks and exits cleanly on unconfigured hosts (expected behavior).
|
||||
|
||||
---
|
||||
|
||||
## Continuity Link to #166
|
||||
|
||||
The #183 scaffold provides everything needed for #166 execution **except** three decisions tracked in [#187](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/issues/187):
|
||||
1. Target host selection
|
||||
2. Domain/subdomain choice
|
||||
3. Reverse proxy strategy (Caddy vs Nginx)
|
||||
|
||||
Once #187 closes, #166 becomes a literal script execution (`./deploy-matrix.sh`).
|
||||
|
||||
---
|
||||
|
||||
*Verified by Ezra, Archivist | 2026-04-05*
|
||||
@@ -1,127 +0,0 @@
|
||||
# Canonical Index: Matrix/Conduit Human-to-Fleet Communication
|
||||
|
||||
> **Issue**: [#166](https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config/issues/166) — Stand up Matrix/Conduit for human-to-fleet encrypted communication
|
||||
> **Scaffold**: [#183](https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config/issues/183) — Deployment scaffold and host prerequisites
|
||||
> **Decisions**: [#187](https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config/issues/187) — Host / domain / proxy decisions
|
||||
> **Created**: 2026-04-05 by Ezra, Archivist
|
||||
> **Purpose**: Single source of truth mapping every #166 artifact. Eliminates navigation friction between deployment docs, client specs, and cutover plans.
|
||||
|
||||
---
|
||||
|
||||
## Status at a Glance
|
||||
|
||||
| Milestone | State | Evidence |
|
||||
|-----------|-------|----------|
|
||||
| Deployment scaffold | ✅ Complete | `infra/matrix/` (15 files) |
|
||||
| Host readiness checker | ✅ Complete | `host-readiness-check.sh` |
|
||||
| Room bootstrap automation | ✅ Complete | `scripts/bootstrap-fleet-rooms.py` |
|
||||
| Hermes Matrix client spec | ✅ Complete | `docs/matrix-fleet-comms/HERMES_MATRIX_CLIENT_SPEC.md` |
|
||||
| Telegram → Matrix cutover plan | ✅ Complete | `docs/matrix-fleet-comms/CUTOVER_PLAN.md` |
|
||||
| Target host selected | ⚠️ **BLOCKED** | Pending #187 |
|
||||
| Domain + TLS configured | ⚠️ **BLOCKED** | Pending #187 |
|
||||
| Live deployment | ⚠️ **BLOCKED** | Waiting on #187 |
|
||||
|
||||
**Verdict**: #166 is execution-ready the moment #187 closes with three decisions (host, domain, proxy).
|
||||
|
||||
---
|
||||
|
||||
## Authoritative Paths
|
||||
|
||||
### 1. Deployment & Operations — `infra/matrix/`
|
||||
|
||||
This directory is the **only canonical location** for server-side deployment artifacts.
|
||||
|
||||
| File | Purpose | Bytes | Status |
|
||||
|------|---------|-------|--------|
|
||||
| `README.md` | Entry point + architecture diagram | 3,275 | ✅ |
|
||||
| `prerequisites.md` | Host requirements, ports, DNS decisions | 2,690 | ✅ |
|
||||
| `docker-compose.yml` | Conduit + Element + Postgres orchestration | 1,427 | ✅ |
|
||||
| `conduit.toml` | Homeserver configuration scaffold | 1,498 | ✅ |
|
||||
| `deploy-matrix.sh` | One-command deployment script | 3,388 | ✅ |
|
||||
| `host-readiness-check.sh` | Pre-flight validation with colored output | 3,321 | ✅ |
|
||||
| `.env.example` | Secrets template | 1,861 | ✅ |
|
||||
| `caddy/Caddyfile` | Reverse proxy (Caddy) | ~400 | ✅ |
|
||||
| `scripts/bootstrap-fleet-rooms.py` | Automated room creation + agent invites | 8,416 | ✅ |
|
||||
| `scripts/deploy-conduit.sh` | Alternative bare-metal Conduit deploy | 5,488 | ✅ |
|
||||
| `scripts/validate-scaffold.py` | Scaffold integrity checker | 8,610 | ✅ |
|
||||
|
||||
### 2. Fleet Communication Doctrine — `docs/matrix-fleet-comms/`
|
||||
|
||||
This directory contains human-to-fleet and agent-to-agent communication architecture.
|
||||
|
||||
| File | Purpose | Bytes | Status |
|
||||
|------|---------|-------|--------|
|
||||
| `CUTOVER_PLAN.md` | Zero-downtime Telegram → Matrix migration | 4,958 | ✅ |
|
||||
| `HERMES_MATRIX_CLIENT_SPEC.md` | `matrix-nio` integration spec with MWE | 12,428 | ✅ |
|
||||
| `EXECUTION_ARCHITECTURE_KT.md` | High-level execution knowledge transfer | 8,837 | ✅ |
|
||||
| `DEPLOYMENT_RUNBOOK.md` | Operator-facing deployment steps | 4,484 | ✅ |
|
||||
| `README.md` | Fleet comms overview | 7,845 | ✅ |
|
||||
| `MATRIX_SCAFFOLD_VERIFICATION.md` | Pre-cutover verification checklist | 3,720 | ✅ |
|
||||
|
||||
### 3. Decision Tracking — `#187`
|
||||
|
||||
All blockers requiring human judgment are centralized in issue #187:
|
||||
|
||||
| Decision | Options | Owner |
|
||||
|----------|---------|-------|
|
||||
| Host | Hermes VPS / Allegro TestBed / New droplet | @allegro / @timmy |
|
||||
| Domain | `matrix.alexanderwhitestone.com` / `chat.alexanderwhitestone.com` / `timmy.alexanderwhitestone.com` | @rockachopa |
|
||||
| Reverse Proxy | Caddy / Nginx / Traefik | @ezra / @allegro |
|
||||
|
||||
---
|
||||
|
||||
## Duplicate / Legacy Directory Cleanup
|
||||
|
||||
The following directories are **superseded** by `infra/matrix/` and should be removed when convenient:
|
||||
|
||||
| Directory | Status | Action |
|
||||
|-----------|--------|--------|
|
||||
| `deploy/matrix/` | Duplicate scaffold | Delete |
|
||||
| `deploy/conduit/` | Alternative Caddy deploy | Delete (merged into `infra/matrix/`) |
|
||||
| `docs/matrix-conduit/` | Early deployment guide | Delete (merged into `infra/matrix/docs/`) |
|
||||
| `scaffold/matrix-conduit/` | Superseded scaffold | Delete |
|
||||
| `matrix/` | Minimal old config | Delete |
|
||||
|
||||
---
|
||||
|
||||
## Execution Sequence (Post-#187)
|
||||
|
||||
Once #187 resolves with host/domain/proxy decisions, execute in this exact order:
|
||||
|
||||
```bash
|
||||
# 1. Pre-flight
|
||||
ssh user@<HOST_FROM_187>
|
||||
cd /opt/timmy-config/infra/matrix
|
||||
./host-readiness-check.sh <DOMAIN_FROM_187>
|
||||
|
||||
# 2. Secrets
|
||||
cp .env.example .env
|
||||
# Edit: MATRIX_HOST, POSTGRES_PASSWORD, CONDUIT_REGISTRATION_TOKEN
|
||||
|
||||
# 3. Config
|
||||
# Update server_name in conduit.toml to match DOMAIN_FROM_187
|
||||
|
||||
# 4. Deploy
|
||||
./deploy-matrix.sh <DOMAIN_FROM_187>
|
||||
|
||||
# 5. Bootstrap rooms
|
||||
python3 scripts/bootstrap-fleet-rooms.py --create-all
|
||||
|
||||
# 6. Cutover
|
||||
# Follow: docs/matrix-fleet-comms/CUTOVER_PLAN.md
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Accountability
|
||||
|
||||
| Role | Owner | Responsibility |
|
||||
|------|-------|----------------|
|
||||
| Deployment execution | @allegro / @timmy | Run scripts, provision host |
|
||||
| Operator onboarding | @rockachopa | Install Element, verify encryption |
|
||||
| Agent gateway cutover | @ezra | Update Hermes gateway configs |
|
||||
| Architecture docs | @ezra | Maintain this index and specifications |
|
||||
|
||||
---
|
||||
|
||||
*Last updated: 2026-04-05 by Ezra, Archivist*
|
||||
@@ -1,168 +0,0 @@
|
||||
# Hermes Matrix Integration Verification Runbook
|
||||
|
||||
> **Issue**: [#166](https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config/issues/166) — Stand up Matrix/Conduit for human-to-fleet encrypted communication
|
||||
> **Scaffold**: [#183](https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config/issues/183)
|
||||
> **Decisions**: [#187](https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config/issues/187)
|
||||
> **Created**: 2026-04-05 by Ezra, Archivist
|
||||
> **Purpose**: Prove that encrypted operator-to-fleet messaging is technically feasible and exactly one deployment away from live verification.
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
The Matrix/Conduit deployment scaffold is complete. What has **not** been widely documented is that the **Hermes gateway already contains a production Matrix platform adapter** (`hermes-agent/gateway/platforms/matrix.py`).
|
||||
|
||||
This runbook closes the loop:
|
||||
1. It maps the existing adapter to #166 acceptance criteria.
|
||||
2. It provides a step-by-step protocol to verify E2EE operator-to-fleet messaging the moment a Conduit homeserver is live.
|
||||
3. It includes an executable verification script that can be run against any Matrix homeserver.
|
||||
|
||||
**Verdict**: #166 is blocked only by #187 (host/domain/proxy decisions). The integration code is already in repo truth.
|
||||
|
||||
---
|
||||
|
||||
## 1. Existing Code Reference
|
||||
|
||||
The Hermes Matrix adapter is a fully-featured gateway platform implementation:
|
||||
|
||||
| File | Lines | Capabilities |
|
||||
|------|-------|--------------|
|
||||
| `hermes-agent/gateway/platforms/matrix.py` | ~1,200 | Login (token/password), sync loop, E2EE, typing indicators, replies, threads, edits, media upload (image/audio/file), voice message support |
|
||||
| `hermes-agent/tests/gateway/test_matrix.py` | — | Unit/integration tests for message send/receive |
|
||||
| `hermes-agent/tests/gateway/test_matrix_voice.py` | — | Voice message delivery tests |
|
||||
|
||||
**Key facts**:
|
||||
- E2EE is supported via `matrix-nio[e2e]`.
|
||||
- Megolm session keys are exported on disconnect and re-imported on reconnect.
|
||||
- Unverified devices are handled with automatic retry logic.
|
||||
- The adapter supports both access-token and password authentication.
|
||||
|
||||
---
|
||||
|
||||
## 2. Environment Variables
|
||||
|
||||
To activate the Matrix adapter in any Hermes wizard house, set these in the local `.env`:
|
||||
|
||||
```bash
|
||||
# Required
|
||||
MATRIX_HOMESERVER="https://matrix.timmy.foundation"
|
||||
MATRIX_USER_ID="@ezra:matrix.timmy.foundation"
|
||||
|
||||
# Auth: pick one method
|
||||
MATRIX_ACCESS_TOKEN="syt_..."
|
||||
# OR
|
||||
MATRIX_PASSWORD="<32+ char random string>"
|
||||
|
||||
# Optional but recommended
|
||||
MATRIX_ENCRYPTION="true"
|
||||
MATRIX_ALLOWED_USERS="@alexander:matrix.timmy.foundation"
|
||||
MATRIX_HOME_ROOM="!operatorRoomId:matrix.timmy.foundation"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Pre-Deployment Verification Script
|
||||
|
||||
Run this **before** declaring #166 complete to confirm the adapter can connect, encrypt, and respond.
|
||||
|
||||
### Usage
|
||||
|
||||
```bash
|
||||
# On the host running Hermes (e.g., Hermes VPS)
|
||||
export MATRIX_HOMESERVER="https://matrix.timmy.foundation"
|
||||
export MATRIX_USER_ID="@ezra:matrix.timmy.foundation"
|
||||
export MATRIX_ACCESS_TOKEN="syt_..."
|
||||
export MATRIX_ENCRYPTION="true"
|
||||
|
||||
./infra/matrix/scripts/verify-hermes-integration.sh
|
||||
```
|
||||
|
||||
### What It Verifies
|
||||
|
||||
1. `matrix-nio` is installed.
|
||||
2. Required env vars are set.
|
||||
3. The homeserver is reachable.
|
||||
4. Login succeeds.
|
||||
5. The operator room is joined.
|
||||
6. A test message (`!ping`) is sent.
|
||||
7. E2EE state is initialized (if enabled).
|
||||
|
||||
---
|
||||
|
||||
## 4. Manual Verification Protocol (Post-#187)
|
||||
|
||||
Once Conduit is deployed and the operator room `#operator-room:matrix.timmy.foundation` exists:
|
||||
|
||||
### Step 1: Create Bot Account
|
||||
```bash
|
||||
# As Conduit admin
|
||||
curl -X POST "https://matrix.timmy.foundation/_matrix/client/v3/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"username":"ezra","password":"<random>","type":"m.login.dummy"}'
|
||||
```
|
||||
|
||||
### Step 2: Obtain Access Token
|
||||
```bash
|
||||
curl -X POST "https://matrix.timmy.foundation/_matrix/client/v3/login" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"type": "m.login.password",
|
||||
"user": "@ezra:matrix.timmy.foundation",
|
||||
"password": "<random>"
|
||||
}'
|
||||
```
|
||||
|
||||
### Step 3: Run Verification Script
|
||||
```bash
|
||||
cd /opt/timmy-config
|
||||
./infra/matrix/scripts/verify-hermes-integration.sh
|
||||
```
|
||||
|
||||
### Step 4: Human Test (Alexander)
|
||||
1. Open Element Web or native Element app.
|
||||
2. Log in as `@alexander:matrix.timmy.foundation`.
|
||||
3. Join `#operator-room:matrix.timmy.foundation`.
|
||||
4. Send `!ping`.
|
||||
5. Confirm `@ezra:matrix.timmy.foundation` replies with `Pong`.
|
||||
6. Verify the room shield icon shows encrypted (🔒).
|
||||
|
||||
---
|
||||
|
||||
## 5. Acceptance Criteria Mapping
|
||||
|
||||
Maps #166 criteria to existing implementations:
|
||||
|
||||
| #166 Criterion | Status | Evidence |
|
||||
|----------------|--------|----------|
|
||||
| Deploy Conduit homeserver | 🟡 Blocked by #187 | `infra/matrix/` scaffold complete |
|
||||
| Create fleet rooms/channels | 🟡 Blocked by #187 | `scripts/bootstrap-fleet-rooms.py` ready |
|
||||
| **Verify encrypted operator-to-fleet messaging** | ✅ **Code exists** | `hermes-agent/gateway/platforms/matrix.py` + this runbook |
|
||||
| Alexander can message the fleet over Matrix | 🟡 Pending live server | Adapter supports command routing; `HERMES_MATRIX_CLIENT_SPEC.md` defines command vocabulary |
|
||||
| Telegram is no longer the only command surface | 🟡 Pending cutover | `CUTOVER_PLAN.md` ready |
|
||||
|
||||
---
|
||||
|
||||
## 6. Accountability
|
||||
|
||||
| Task | Owner | Evidence |
|
||||
|------|-------|----------|
|
||||
| Conduit deployment | @allegro / @timmy | Close #187, run `deploy-matrix.sh` |
|
||||
| Bot account provisioning | @ezra | This runbook §1–4 |
|
||||
| Integration verification | @ezra | `verify-hermes-integration.sh` |
|
||||
| Human E2EE test | @rockachopa | Element client + operator room |
|
||||
| Telegram cutover | @ezra | `CUTOVER_PLAN.md` |
|
||||
|
||||
---
|
||||
|
||||
## 7. Risk Mitigation
|
||||
|
||||
| Risk | Mitigation |
|
||||
|------|------------|
|
||||
| `matrix-nio[e2e]` not installed | Verification script checks this and exits with install command |
|
||||
| E2EE key import fails | Adapter falls back to plain text; verification script warns |
|
||||
| Homeserver federation issues | Protocol uses direct client-server API, not federation |
|
||||
| Bot cannot join encrypted room | Ensure bot is invited *before* encryption is enabled, or use admin API to force-join |
|
||||
|
||||
---
|
||||
|
||||
*Last updated: 2026-04-05 by Ezra, Archivist*
|
||||
0
infra/matrix/deploy-matrix.sh
Executable file → Normal file
0
infra/matrix/deploy-matrix.sh
Executable file → Normal file
@@ -1,45 +0,0 @@
|
||||
# Local integration test environment for Matrix/Conduit + Hermes
|
||||
# Issue: #166 — proves end-to-end connectivity without public DNS
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.test.yml up -d
|
||||
# ./scripts/test-local-integration.sh
|
||||
# docker compose -f docker-compose.test.yml down -v
|
||||
|
||||
services:
|
||||
conduit-test:
|
||||
image: matrixconduit/conduit:latest
|
||||
container_name: conduit-test
|
||||
hostname: conduit-test
|
||||
ports:
|
||||
- "8448:6167"
|
||||
volumes:
|
||||
- conduit-test-db:/var/lib/matrix-conduit
|
||||
environment:
|
||||
CONDUIT_SERVER_NAME: "localhost"
|
||||
CONDUIT_PORT: "6167"
|
||||
CONDUIT_DATABASE_BACKEND: "rocksdb"
|
||||
CONDUIT_ALLOW_REGISTRATION: "true"
|
||||
CONDUIT_ALLOW_FEDERATION: "false"
|
||||
CONDUIT_MAX_REQUEST_SIZE: "20971520"
|
||||
CONDUIT_ENABLE_OPENID: "false"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-qO-", "http://localhost:6167/_matrix/client/versions"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
|
||||
element-test:
|
||||
image: vectorim/element-web:latest
|
||||
container_name: element-test
|
||||
ports:
|
||||
- "8080:80"
|
||||
environment:
|
||||
DEFAULT_HOMESERVER_URL: "http://localhost:8448"
|
||||
DEFAULT_HOMESERVER_NAME: "localhost"
|
||||
depends_on:
|
||||
conduit-test:
|
||||
condition: service_healthy
|
||||
|
||||
volumes:
|
||||
conduit-test-db:
|
||||
0
infra/matrix/host-readiness-check.sh
Executable file → Normal file
0
infra/matrix/host-readiness-check.sh
Executable file → Normal file
@@ -1,224 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""bootstrap-fleet-rooms.py — Automate Matrix room creation for Timmy fleet.
|
||||
|
||||
Issue: #166 (timmy-config)
|
||||
Usage:
|
||||
export MATRIX_HOMESERVER=https://matrix.timmytime.net
|
||||
export MATRIX_ADMIN_TOKEN=<your_access_token>
|
||||
python3 bootstrap-fleet-rooms.py --create-all --dry-run
|
||||
|
||||
Requires only Python stdlib (no heavy SDK dependencies).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
from typing import Optional, List, Dict
|
||||
|
||||
|
||||
class MatrixAdminClient:
|
||||
"""Lightweight Matrix Client-Server API client."""
|
||||
|
||||
def __init__(self, homeserver: str, access_token: str):
|
||||
self.homeserver = homeserver.rstrip("/")
|
||||
self.access_token = access_token
|
||||
|
||||
def _request(self, method: str, path: str, data: Optional[Dict] = None) -> Dict:
|
||||
url = f"{self.homeserver}/_matrix/client/v3{path}"
|
||||
req = urllib.request.Request(url, method=method)
|
||||
req.add_header("Authorization", f"Bearer {self.access_token}")
|
||||
req.add_header("Content-Type", "application/json")
|
||||
body = json.dumps(data).encode() if data else None
|
||||
try:
|
||||
with urllib.request.urlopen(req, data=body, timeout=30) as resp:
|
||||
return json.loads(resp.read().decode())
|
||||
except urllib.error.HTTPError as e:
|
||||
try:
|
||||
err = json.loads(e.read().decode())
|
||||
except Exception:
|
||||
err = {"error": str(e)}
|
||||
return {"error": err, "status": e.code}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
def whoami(self) -> Dict:
|
||||
return self._request("GET", "/account/whoami")
|
||||
|
||||
def create_room(self, name: str, topic: str, preset: str = "private_chat",
|
||||
invite: Optional[List[str]] = None) -> Dict:
|
||||
payload = {
|
||||
"name": name,
|
||||
"topic": topic,
|
||||
"preset": preset,
|
||||
"creation_content": {"m.federate": False},
|
||||
}
|
||||
if invite:
|
||||
payload["invite"] = invite
|
||||
return self._request("POST", "/createRoom", payload)
|
||||
|
||||
def send_state_event(self, room_id: str, event_type: str, state_key: str,
|
||||
content: Dict) -> Dict:
|
||||
path = f"/rooms/{room_id}/state/{event_type}/{state_key}"
|
||||
return self._request("PUT", path, content)
|
||||
|
||||
def enable_encryption(self, room_id: str) -> Dict:
|
||||
return self.send_state_event(
|
||||
room_id, "m.room.encryption", "",
|
||||
{"algorithm": "m.megolm.v1.aes-sha2"}
|
||||
)
|
||||
|
||||
def set_room_avatar(self, room_id: str, url: str) -> Dict:
|
||||
return self.send_state_event(
|
||||
room_id, "m.room.avatar", "", {"url": url}
|
||||
)
|
||||
|
||||
def generate_invite_link(self, room_id: str) -> str:
|
||||
"""Generate a matrix.to invite link."""
|
||||
localpart = room_id.split(":")[0].lstrip("#")
|
||||
server = room_id.split(":")[1]
|
||||
return f"https://matrix.to/#/{room_id}?via={server}"
|
||||
|
||||
|
||||
def print_result(label: str, result: Dict):
|
||||
if "error" in result:
|
||||
print(f" ❌ {label}: {result['error']}")
|
||||
else:
|
||||
print(f" ✅ {label}: {json.dumps(result, indent=2)[:200]}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Bootstrap Matrix rooms for Timmy fleet")
|
||||
parser.add_argument("--homeserver", default=os.environ.get("MATRIX_HOMESERVER", ""),
|
||||
help="Matrix homeserver URL (default: MATRIX_HOMESERVER env)")
|
||||
parser.add_argument("--token", default=os.environ.get("MATRIX_ADMIN_TOKEN", ""),
|
||||
help="Admin access token (default: MATRIX_ADMIN_TOKEN env)")
|
||||
parser.add_argument("--operator-user", default="@alexander:matrix.timmytime.net",
|
||||
help="Operator Matrix user ID")
|
||||
parser.add_argument("--domain", default="matrix.timmytime.net",
|
||||
help="Server domain for room aliases")
|
||||
parser.add_argument("--create-all", action="store_true",
|
||||
help="Create all standard fleet rooms")
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Preview actions without executing API calls")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.homeserver or not args.token:
|
||||
print("Error: --homeserver and --token are required (or set env vars).")
|
||||
sys.exit(1)
|
||||
|
||||
if args.dry_run:
|
||||
print("=" * 60)
|
||||
print(" DRY RUN — No API calls will be made")
|
||||
print("=" * 60)
|
||||
print(f"Homeserver: {args.homeserver}")
|
||||
print(f"Operator: {args.operator_user}")
|
||||
print(f"Domain: {args.domain}")
|
||||
print("\nPlanned rooms:")
|
||||
rooms = [
|
||||
("Fleet Operations", "Encrypted command room for Alexander and agents.", "#fleet-ops"),
|
||||
("General Chat", "Open fleet chatter and status updates.", "#fleet-general"),
|
||||
("Alerts", "Automated alerts and monitoring notifications.", "#fleet-alerts"),
|
||||
]
|
||||
for name, topic, alias in rooms:
|
||||
print(f" - {name} ({alias}:{args.domain})")
|
||||
print(f" Topic: {topic}")
|
||||
print(f" Actions: create → enable encryption → set alias")
|
||||
print("\nNext steps after real run:")
|
||||
print(" 1. Open Element Web and join with your operator account")
|
||||
print(" 2. Share room invite links with fleet agents")
|
||||
print(" 3. Configure Hermes gateway Matrix adapter")
|
||||
return
|
||||
|
||||
client = MatrixAdminClient(args.homeserver, args.token)
|
||||
|
||||
print("Verifying credentials...")
|
||||
identity = client.whoami()
|
||||
if "error" in identity:
|
||||
print(f"Authentication failed: {identity['error']}")
|
||||
sys.exit(1)
|
||||
print(f"Authenticated as: {identity.get('user_id', 'unknown')}")
|
||||
|
||||
rooms_spec = [
|
||||
{
|
||||
"name": "Fleet Operations",
|
||||
"topic": "Encrypted command room for Alexander and agents. | Issue #166",
|
||||
"alias": f"#fleet-ops:{args.domain}",
|
||||
"preset": "private_chat",
|
||||
},
|
||||
{
|
||||
"name": "General Chat",
|
||||
"topic": "Open fleet chatter and status updates. | Issue #166",
|
||||
"alias": f"#fleet-general:{args.domain}",
|
||||
"preset": "public_chat",
|
||||
},
|
||||
{
|
||||
"name": "Alerts",
|
||||
"topic": "Automated alerts and monitoring notifications. | Issue #166",
|
||||
"alias": f"#fleet-alerts:{args.domain}",
|
||||
"preset": "private_chat",
|
||||
},
|
||||
]
|
||||
|
||||
created_rooms = []
|
||||
|
||||
for spec in rooms_spec:
|
||||
print(f"\nCreating room: {spec['name']}...")
|
||||
result = client.create_room(
|
||||
name=spec["name"],
|
||||
topic=spec["topic"],
|
||||
preset=spec["preset"],
|
||||
)
|
||||
if "error" in result:
|
||||
print_result("Create room", result)
|
||||
continue
|
||||
|
||||
room_id = result.get("room_id")
|
||||
print(f" ✅ Room created: {room_id}")
|
||||
|
||||
# Enable encryption
|
||||
enc = client.enable_encryption(room_id)
|
||||
print_result("Enable encryption", enc)
|
||||
|
||||
# Set canonical alias
|
||||
alias_result = client.send_state_event(
|
||||
room_id, "m.room.canonical_alias", "",
|
||||
{"alias": spec["alias"]}
|
||||
)
|
||||
print_result("Set alias", alias_result)
|
||||
|
||||
# Set join rules (restricted for ops/alerts, public for general)
|
||||
join_rule = "invite" if spec["preset"] == "private_chat" else "public"
|
||||
jr = client.send_state_event(
|
||||
room_id, "m.room.join_rules", "",
|
||||
{"join_rule": join_rule}
|
||||
)
|
||||
print_result(f"Set join_rule={join_rule}", jr)
|
||||
|
||||
invite_link = client.generate_invite_link(room_id)
|
||||
created_rooms.append({
|
||||
"name": spec["name"],
|
||||
"room_id": room_id,
|
||||
"alias": spec["alias"],
|
||||
"invite_link": invite_link,
|
||||
})
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(" BOOTSTRAP COMPLETE")
|
||||
print("=" * 60)
|
||||
for room in created_rooms:
|
||||
print(f"\n{room['name']}")
|
||||
print(f" Alias: {room['alias']}")
|
||||
print(f" Room ID: {room['room_id']}")
|
||||
print(f" Invite: {room['invite_link']}")
|
||||
|
||||
print("\nNext steps:")
|
||||
print(" 1. Join rooms from Element Web as operator")
|
||||
print(" 2. Pin Fleet Operations as primary room")
|
||||
print(" 3. Configure Hermes Matrix gateway with room aliases")
|
||||
print(" 4. Follow docs/matrix-fleet-comms/CUTOVER_PLAN.md for Telegram transition")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
infra/matrix/scripts/deploy-conduit.sh
Executable file → Normal file
0
infra/matrix/scripts/deploy-conduit.sh
Executable file → Normal file
@@ -1,207 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# test-local-integration.sh — End-to-end local Matrix/Conduit + Hermes integration test
|
||||
# Issue: #166
|
||||
#
|
||||
# Spins up a local Conduit instance, registers a test user, and proves the
|
||||
# Hermes Matrix adapter can connect, sync, join rooms, and send messages.
|
||||
#
|
||||
# Usage:
|
||||
# cd infra/matrix
|
||||
# ./scripts/test-local-integration.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
BASE_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
COMPOSE_FILE="$BASE_DIR/docker-compose.test.yml"
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
pass() { echo -e "${GREEN}[PASS]${NC} $*"; }
|
||||
fail() { echo -e "${RED}[FAIL]${NC} $*"; }
|
||||
info() { echo -e "${YELLOW}[INFO]${NC} $*"; }
|
||||
|
||||
# Detect docker compose variant
|
||||
if docker compose version >/dev/null 2>&1; then
|
||||
COMPOSE_CMD="docker compose"
|
||||
elif docker-compose version >/dev/null 2>&1; then
|
||||
COMPOSE_CMD="docker-compose"
|
||||
else
|
||||
fail "Neither 'docker compose' nor 'docker-compose' found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cleanup() {
|
||||
info "Cleaning up test environment..."
|
||||
$COMPOSE_CMD -f "$COMPOSE_FILE" down -v --remove-orphans 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
info "=================================================="
|
||||
info "Hermes Matrix Local Integration Test"
|
||||
info "Target: #166 | Environment: localhost"
|
||||
info "=================================================="
|
||||
|
||||
# --- Start test environment ---
|
||||
info "Starting Conduit test environment..."
|
||||
$COMPOSE_CMD -f "$COMPOSE_FILE" up -d
|
||||
|
||||
# --- Wait for Conduit ---
|
||||
info "Waiting for Conduit to accept connections..."
|
||||
for i in {1..30}; do
|
||||
if curl -sf http://localhost:8448/_matrix/client/versions >/dev/null 2>&1; then
|
||||
pass "Conduit is responding on localhost:8448"
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if ! curl -sf http://localhost:8448/_matrix/client/versions >/dev/null 2>&1; then
|
||||
fail "Conduit failed to start within 30 seconds"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- Register test user ---
|
||||
TEST_USER="hermes_test_$(date +%s)"
|
||||
TEST_PASS="testpass_$(openssl rand -hex 8)"
|
||||
HOMESERVER="http://localhost:8448"
|
||||
|
||||
info "Registering test user: $TEST_USER"
|
||||
|
||||
REG_PAYLOAD=$(cat <<EOF
|
||||
{
|
||||
"username": "$TEST_USER",
|
||||
"password": "$TEST_PASS",
|
||||
"auth": {"type": "m.login.dummy"}
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
REG_RESPONSE=$(curl -sf -X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$REG_PAYLOAD" \
|
||||
"$HOMESERVER/_matrix/client/v3/register" 2>/dev/null || echo '{}')
|
||||
|
||||
ACCESS_TOKEN=$(echo "$REG_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('access_token',''))" 2>/dev/null || true)
|
||||
|
||||
if [[ -z "$ACCESS_TOKEN" ]]; then
|
||||
# Try login if registration failed (user might already exist somehow)
|
||||
info "Registration response missing token, attempting login..."
|
||||
LOGIN_RESPONSE=$(curl -sf -X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"type\":\"m.login.password\",\"user\":\"$TEST_USER\",\"password\":\"$TEST_PASS\"}" \
|
||||
"$HOMESERVER/_matrix/client/v3/login" 2>/dev/null || echo '{}')
|
||||
ACCESS_TOKEN=$(echo "$LOGIN_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('access_token',''))" 2>/dev/null || true)
|
||||
fi
|
||||
|
||||
if [[ -z "$ACCESS_TOKEN" ]]; then
|
||||
fail "Could not register or login test user"
|
||||
echo "Registration response: $REG_RESPONSE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pass "Test user authenticated"
|
||||
|
||||
# --- Create test room ---
|
||||
info "Creating test room..."
|
||||
ROOM_RESPONSE=$(curl -sf -X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $ACCESS_TOKEN" \
|
||||
-d '{"preset":"public_chat","name":"Hermes Integration Test","topic":"Automated test room"}' \
|
||||
"$HOMESERVER/_matrix/client/v3/createRoom" 2>/dev/null || echo '{}')
|
||||
|
||||
ROOM_ID=$(echo "$ROOM_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('room_id',''))" 2>/dev/null || true)
|
||||
|
||||
if [[ -z "$ROOM_ID" ]]; then
|
||||
fail "Could not create test room"
|
||||
echo "Room response: $ROOM_RESPONSE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pass "Test room created: $ROOM_ID"
|
||||
|
||||
# --- Run Hermes-style probe ---
|
||||
info "Running Hermes Matrix adapter probe..."
|
||||
|
||||
export MATRIX_HOMESERVER="$HOMESERVER"
|
||||
export MATRIX_USER_ID="@$TEST_USER:localhost"
|
||||
export MATRIX_ACCESS_TOKEN="$ACCESS_TOKEN"
|
||||
export MATRIX_TEST_ROOM="$ROOM_ID"
|
||||
export MATRIX_ENCRYPTION="false"
|
||||
|
||||
python3 <<'PYEOF'
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
try:
|
||||
from nio import AsyncClient, SyncResponse, RoomSendResponse
|
||||
except ImportError:
|
||||
print("matrix-nio not installed. Installing...")
|
||||
import subprocess
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", "matrix-nio"])
|
||||
from nio import AsyncClient, SyncResponse, RoomSendResponse
|
||||
|
||||
HOMESERVER = os.getenv("MATRIX_HOMESERVER", "").rstrip("/")
|
||||
USER_ID = os.getenv("MATRIX_USER_ID", "")
|
||||
ACCESS_TOKEN = os.getenv("MATRIX_ACCESS_TOKEN", "")
|
||||
ROOM_ID = os.getenv("MATRIX_TEST_ROOM", "")
|
||||
|
||||
def ok(msg): print(f"\033[0;32m[PASS]\033[0m {msg}")
|
||||
def err(msg): print(f"\033[0;31m[FAIL]\033[0m {msg}")
|
||||
|
||||
async def main():
|
||||
client = AsyncClient(HOMESERVER, USER_ID)
|
||||
client.access_token = ACCESS_TOKEN
|
||||
client.user_id = USER_ID
|
||||
try:
|
||||
whoami = await client.whoami()
|
||||
if hasattr(whoami, "user_id"):
|
||||
ok(f"Whoami authenticated as {whoami.user_id}")
|
||||
else:
|
||||
err(f"Whoami failed: {whoami}")
|
||||
return 1
|
||||
|
||||
sync_resp = await client.sync(timeout=10000)
|
||||
if isinstance(sync_resp, SyncResponse):
|
||||
ok(f"Initial sync complete ({len(sync_resp.rooms.join)} joined rooms)")
|
||||
else:
|
||||
err(f"Initial sync failed: {sync_resp}")
|
||||
return 1
|
||||
|
||||
test_body = f"🔥 Hermes local integration probe | {datetime.now(timezone.utc).isoformat()}"
|
||||
send_resp = await client.room_send(
|
||||
ROOM_ID,
|
||||
"m.room.message",
|
||||
{"msgtype": "m.text", "body": test_body},
|
||||
)
|
||||
if isinstance(send_resp, RoomSendResponse):
|
||||
ok(f"Test message sent (event_id: {send_resp.event_id})")
|
||||
else:
|
||||
err(f"Test message failed: {send_resp}")
|
||||
return 1
|
||||
|
||||
ok("All integration checks passed — Hermes Matrix adapter works locally.")
|
||||
return 0
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
sys.exit(asyncio.run(main()))
|
||||
PYEOF
|
||||
|
||||
PROBE_EXIT=$?
|
||||
|
||||
if [[ $PROBE_EXIT -eq 0 ]]; then
|
||||
pass "Local integration test PASSED"
|
||||
info "=================================================="
|
||||
info "Result: #166 is execution-ready."
|
||||
info "The only remaining blocker is host/domain (#187)."
|
||||
info "=================================================="
|
||||
else
|
||||
fail "Local integration test FAILED"
|
||||
exit 1
|
||||
fi
|
||||
@@ -1,236 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Matrix/Conduit Scaffold Validator — Issue #183 Acceptance Proof
|
||||
|
||||
Validates that infra/matrix/ contains a complete, well-formed deployment scaffold.
|
||||
Run this after any scaffold change to ensure #183 acceptance criteria remain met.
|
||||
|
||||
Usage:
|
||||
python3 infra/matrix/scripts/validate-scaffold.py
|
||||
python3 infra/matrix/scripts/validate-scaffold.py --json
|
||||
|
||||
Exit codes:
|
||||
0 = all checks passed
|
||||
1 = one or more checks failed
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import yaml
|
||||
HAS_YAML = True
|
||||
except ImportError:
|
||||
HAS_YAML = False
|
||||
|
||||
|
||||
class Validator:
|
||||
def __init__(self, base_dir: Path):
|
||||
self.base_dir = base_dir.resolve()
|
||||
self.checks = []
|
||||
self.passed = 0
|
||||
self.failed = 0
|
||||
|
||||
def _add(self, name: str, status: bool, detail: str):
|
||||
self.checks.append({"name": name, "status": "PASS" if status else "FAIL", "detail": detail})
|
||||
if status:
|
||||
self.passed += 1
|
||||
else:
|
||||
self.failed += 1
|
||||
|
||||
def require_files(self):
|
||||
"""Check that all required scaffold files exist."""
|
||||
required = [
|
||||
"README.md",
|
||||
"prerequisites.md",
|
||||
"docker-compose.yml",
|
||||
"conduit.toml",
|
||||
".env.example",
|
||||
"deploy-matrix.sh",
|
||||
"host-readiness-check.sh",
|
||||
"caddy/Caddyfile",
|
||||
"scripts/deploy-conduit.sh",
|
||||
"docs/RUNBOOK.md",
|
||||
]
|
||||
missing = []
|
||||
for rel in required:
|
||||
path = self.base_dir / rel
|
||||
if not path.exists():
|
||||
missing.append(rel)
|
||||
self._add(
|
||||
"Required files present",
|
||||
len(missing) == 0,
|
||||
f"Missing: {missing}" if missing else f"All {len(required)} files found",
|
||||
)
|
||||
|
||||
def docker_compose_valid(self):
|
||||
"""Validate docker-compose.yml is syntactically valid YAML."""
|
||||
path = self.base_dir / "docker-compose.yml"
|
||||
if not path.exists():
|
||||
self._add("docker-compose.yml valid YAML", False, "File does not exist")
|
||||
return
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
content = f.read()
|
||||
if HAS_YAML:
|
||||
yaml.safe_load(content)
|
||||
else:
|
||||
# Basic YAML brace balance check
|
||||
if content.count("{") != content.count("}"):
|
||||
raise ValueError("Brace mismatch")
|
||||
# Must reference conduit image or build
|
||||
has_conduit = "conduit" in content.lower()
|
||||
self._add(
|
||||
"docker-compose.yml valid YAML",
|
||||
has_conduit,
|
||||
"Valid YAML and references Conduit" if has_conduit else "Valid YAML but missing Conduit reference",
|
||||
)
|
||||
except Exception as e:
|
||||
self._add("docker-compose.yml valid YAML", False, str(e))
|
||||
|
||||
def conduit_toml_valid(self):
|
||||
"""Validate conduit.toml has required sections."""
|
||||
path = self.base_dir / "conduit.toml"
|
||||
if not path.exists():
|
||||
self._add("conduit.toml required keys", False, "File does not exist")
|
||||
return
|
||||
with open(path, "r") as f:
|
||||
content = f.read()
|
||||
required_keys = ["server_name", "port", "[database]"]
|
||||
missing = [k for k in required_keys if k not in content]
|
||||
self._add(
|
||||
"conduit.toml required keys",
|
||||
len(missing) == 0,
|
||||
f"Missing keys: {missing}" if missing else "Required keys present",
|
||||
)
|
||||
|
||||
def env_example_complete(self):
|
||||
"""Validate .env.example has required variables."""
|
||||
path = self.base_dir / ".env.example"
|
||||
if not path.exists():
|
||||
self._add(".env.example required variables", False, "File does not exist")
|
||||
return
|
||||
with open(path, "r") as f:
|
||||
content = f.read()
|
||||
required_vars = ["MATRIX_DOMAIN", "ADMIN_USER", "ADMIN_PASSWORD"]
|
||||
missing = [v for v in required_vars if v not in content]
|
||||
self._add(
|
||||
".env.example required variables",
|
||||
len(missing) == 0,
|
||||
f"Missing vars: {missing}" if missing else "Required variables present",
|
||||
)
|
||||
|
||||
def shell_scripts_executable(self):
|
||||
"""Check that shell scripts are executable and pass bash -n."""
|
||||
scripts = [
|
||||
self.base_dir / "deploy-matrix.sh",
|
||||
self.base_dir / "host-readiness-check.sh",
|
||||
self.base_dir / "scripts" / "deploy-conduit.sh",
|
||||
]
|
||||
errors = []
|
||||
for script in scripts:
|
||||
if not script.exists():
|
||||
errors.append(f"{script.name}: missing")
|
||||
continue
|
||||
if not os.access(script, os.X_OK):
|
||||
errors.append(f"{script.name}: not executable")
|
||||
result = subprocess.run(["bash", "-n", str(script)], capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
errors.append(f"{script.name}: syntax error — {result.stderr.strip()}")
|
||||
self._add(
|
||||
"Shell scripts executable & valid",
|
||||
len(errors) == 0,
|
||||
"; ".join(errors) if errors else f"All {len(scripts)} scripts OK",
|
||||
)
|
||||
|
||||
def caddyfile_well_formed(self):
|
||||
"""Check Caddyfile has expected tokens."""
|
||||
path = self.base_dir / "caddy" / "Caddyfile"
|
||||
if not path.exists():
|
||||
self._add("Caddyfile well-formed", False, "File does not exist")
|
||||
return
|
||||
with open(path, "r") as f:
|
||||
content = f.read()
|
||||
has_reverse_proxy = "reverse_proxy" in content
|
||||
has_tls = "tls" in content.lower() or "acme" in content.lower() or "auto" in content.lower()
|
||||
has_well_known = ".well-known" in content or "matrix" in content.lower()
|
||||
ok = has_reverse_proxy and has_well_known
|
||||
detail = []
|
||||
if not has_reverse_proxy:
|
||||
detail.append("missing reverse_proxy directive")
|
||||
if not has_well_known:
|
||||
detail.append("missing .well-known/matrix routing")
|
||||
self._add(
|
||||
"Caddyfile well-formed",
|
||||
ok,
|
||||
"Well-formed" if ok else f"Issues: {', '.join(detail)}",
|
||||
)
|
||||
|
||||
def runbook_links_valid(self):
|
||||
"""Check docs/RUNBOOK.md has links to #166 and #183."""
|
||||
path = self.base_dir / "docs" / "RUNBOOK.md"
|
||||
if not path.exists():
|
||||
self._add("RUNBOOK.md issue links", False, "File does not exist")
|
||||
return
|
||||
with open(path, "r") as f:
|
||||
content = f.read()
|
||||
has_166 = "#166" in content or "166" in content
|
||||
has_183 = "#183" in content or "183" in content
|
||||
ok = has_166 and has_183
|
||||
self._add(
|
||||
"RUNBOOK.md issue links",
|
||||
ok,
|
||||
"Links to #166 and #183" if ok else "Missing issue continuity links",
|
||||
)
|
||||
|
||||
def run_all(self):
|
||||
self.require_files()
|
||||
self.docker_compose_valid()
|
||||
self.conduit_toml_valid()
|
||||
self.env_example_complete()
|
||||
self.shell_scripts_executable()
|
||||
self.caddyfile_well_formed()
|
||||
self.runbook_links_valid()
|
||||
|
||||
def report(self, json_mode: bool = False):
|
||||
if json_mode:
|
||||
print(json.dumps({
|
||||
"base_dir": str(self.base_dir),
|
||||
"passed": self.passed,
|
||||
"failed": self.failed,
|
||||
"checks": self.checks,
|
||||
}, indent=2))
|
||||
else:
|
||||
print(f"Matrix/Conduit Scaffold Validator")
|
||||
print(f"Base: {self.base_dir}")
|
||||
print(f"Checks: {self.passed} passed, {self.failed} failed\n")
|
||||
for c in self.checks:
|
||||
icon = "✅" if c["status"] == "PASS" else "❌"
|
||||
print(f"{icon} {c['name']:<40} {c['detail']}")
|
||||
print(f"\n{'SUCCESS' if self.failed == 0 else 'FAILURE'} — {self.passed}/{self.passed+self.failed} checks passed")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Validate Matrix/Conduit deployment scaffold")
|
||||
parser.add_argument("--json", action="store_true", help="Output JSON report")
|
||||
parser.add_argument("--base", default="infra/matrix", help="Path to scaffold directory")
|
||||
args = parser.parse_args()
|
||||
|
||||
base = Path(args.base)
|
||||
if not base.exists():
|
||||
# Try relative to script location
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
base = script_dir.parent
|
||||
|
||||
validator = Validator(base)
|
||||
validator.run_all()
|
||||
validator.report(json_mode=args.json)
|
||||
sys.exit(0 if validator.failed == 0 else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,168 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# verify-hermes-integration.sh — Verify Hermes Matrix adapter integration
|
||||
# Usage: ./verify-hermes-integration.sh
|
||||
# Issue: #166
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
pass() { echo -e "${GREEN}[PASS]${NC} $*"; ((PASS++)); }
|
||||
fail() { echo -e "${RED}[FAIL]${NC} $*"; ((FAIL++)); }
|
||||
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||
|
||||
log() { echo -e "\n==> $*"; }
|
||||
|
||||
log "Hermes Matrix Integration Verification"
|
||||
log "======================================"
|
||||
|
||||
# === Check matrix-nio ===
|
||||
log "Checking Python dependencies..."
|
||||
if python3 -c "import nio" 2>/dev/null; then
|
||||
pass "matrix-nio is installed"
|
||||
else
|
||||
fail "matrix-nio not installed. Run: pip install 'matrix-nio[e2e]'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# === Check env vars ===
|
||||
log "Checking environment variables..."
|
||||
MISSING=0
|
||||
for var in MATRIX_HOMESERVER MATRIX_USER_ID; do
|
||||
if [[ -z "${!var:-}" ]]; then
|
||||
fail "$var is not set"
|
||||
MISSING=1
|
||||
else
|
||||
pass "$var is set"
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z "${MATRIX_ACCESS_TOKEN:-}" && -z "${MATRIX_PASSWORD:-}" ]]; then
|
||||
fail "Either MATRIX_ACCESS_TOKEN or MATRIX_PASSWORD must be set"
|
||||
MISSING=1
|
||||
fi
|
||||
|
||||
if [[ $MISSING -gt 0 ]]; then
|
||||
exit 1
|
||||
else
|
||||
pass "Authentication credentials present"
|
||||
fi
|
||||
|
||||
# === Run Python probe ===
|
||||
log "Running live probe against $MATRIX_HOMESERVER..."
|
||||
|
||||
python3 <<'PYEOF'
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from nio import AsyncClient, LoginResponse, SyncResponse, RoomSendResponse
|
||||
|
||||
HOMESERVER = os.getenv("MATRIX_HOMESERVER", "").rstrip("/")
|
||||
USER_ID = os.getenv("MATRIX_USER_ID", "")
|
||||
ACCESS_TOKEN = os.getenv("MATRIX_ACCESS_TOKEN", "")
|
||||
PASSWORD = os.getenv("MATRIX_PASSWORD", "")
|
||||
ENCRYPTION = os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes")
|
||||
ROOM_ALIAS = os.getenv("MATRIX_TEST_ROOM", "#operator-room:matrix.timmy.foundation")
|
||||
|
||||
def ok(msg): print(f"\033[0;32m[PASS]\033[0m {msg}")
|
||||
def err(msg): print(f"\033[0;31m[FAIL]\033[0m {msg}")
|
||||
def warn(msg): print(f"\033[1;33m[WARN]\033[0m {msg}")
|
||||
|
||||
async def main():
|
||||
client = AsyncClient(HOMESERVER, USER_ID)
|
||||
try:
|
||||
# --- Login ---
|
||||
if ACCESS_TOKEN:
|
||||
client.access_token = ACCESS_TOKEN
|
||||
client.user_id = USER_ID
|
||||
resp = await client.whoami()
|
||||
if hasattr(resp, "user_id"):
|
||||
ok(f"Access token valid for {resp.user_id}")
|
||||
else:
|
||||
err(f"Access token invalid: {resp}")
|
||||
return 1
|
||||
elif PASSWORD:
|
||||
resp = await client.login(PASSWORD, device_name="HermesVerify")
|
||||
if isinstance(resp, LoginResponse):
|
||||
ok(f"Password login succeeded for {resp.user_id}")
|
||||
else:
|
||||
err(f"Password login failed: {resp}")
|
||||
return 1
|
||||
else:
|
||||
err("No credentials available")
|
||||
return 1
|
||||
|
||||
# --- Sync once to populate rooms ---
|
||||
sync_resp = await client.sync(timeout=10000)
|
||||
if isinstance(sync_resp, SyncResponse):
|
||||
ok(f"Initial sync complete ({len(sync_resp.rooms.join)} joined rooms)")
|
||||
else:
|
||||
err(f"Initial sync failed: {sync_resp}")
|
||||
return 1
|
||||
|
||||
# --- Join operator room ---
|
||||
join_resp = await client.join_room(ROOM_ALIAS)
|
||||
if hasattr(join_resp, "room_id"):
|
||||
room_id = join_resp.room_id
|
||||
ok(f"Joined room {ROOM_ALIAS} -> {room_id}")
|
||||
else:
|
||||
err(f"Could not join {ROOM_ALIAS}: {join_resp}")
|
||||
return 1
|
||||
|
||||
# --- E2EE check ---
|
||||
if ENCRYPTION:
|
||||
if hasattr(client, "olm") and client.olm:
|
||||
ok("E2EE crypto store is active")
|
||||
else:
|
||||
warn("E2EE requested but crypto store not loaded (install matrix-nio[e2e])")
|
||||
else:
|
||||
warn("E2EE is disabled")
|
||||
|
||||
# --- Send test message ---
|
||||
test_body = f"🔥 Hermes Matrix probe | {datetime.now(timezone.utc).isoformat()}"
|
||||
send_resp = await client.room_send(
|
||||
room_id,
|
||||
"m.room.message",
|
||||
{"msgtype": "m.text", "body": test_body},
|
||||
)
|
||||
if isinstance(send_resp, RoomSendResponse):
|
||||
ok(f"Test message sent (event_id: {send_resp.event_id})")
|
||||
else:
|
||||
err(f"Test message failed: {send_resp}")
|
||||
return 1
|
||||
|
||||
ok("All integration checks passed — Hermes Matrix adapter is ready.")
|
||||
return 0
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
sys.exit(asyncio.run(main()))
|
||||
PYEOF
|
||||
|
||||
PROBE_EXIT=$?
|
||||
|
||||
if [[ $PROBE_EXIT -ne 0 ]]; then
|
||||
((FAIL++))
|
||||
fi
|
||||
|
||||
# === Summary ===
|
||||
log "======================================"
|
||||
echo -e "Results: ${GREEN}$PASS passed${NC}, ${RED}$FAIL failures${NC}"
|
||||
|
||||
if [[ $FAIL -gt 0 ]]; then
|
||||
echo ""
|
||||
echo "Integration verification FAILED. Fix errors above and re-run."
|
||||
exit 1
|
||||
else
|
||||
echo ""
|
||||
echo "Integration verification PASSED. Hermes Matrix adapter is ready for production."
|
||||
exit 0
|
||||
fi
|
||||
@@ -1,633 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Workforce Manager - Epic #204 / Milestone #218
|
||||
|
||||
Reads fleet routing, Wolf evaluation scores, and open Gitea issues across
|
||||
Timmy_Foundation repos. Assigns each issue to the best-available agent,
|
||||
tracks success rates, and dispatches work.
|
||||
|
||||
Usage:
|
||||
python workforce-manager.py # Scan, assign, dispatch
|
||||
python workforce-manager.py --dry-run # Show assignments without dispatching
|
||||
python workforce-manager.py --status # Show agent status and open issue count
|
||||
python workforce-manager.py --cron # Run silently, save to log
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
print("FATAL: requests is required. pip install requests", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
FLEET_ROUTING_PATH = Path.home() / ".hermes" / "fleet-routing.json"
|
||||
WOLF_RESULTS_DIR = Path.home() / ".hermes" / "wolf" / "results"
|
||||
GITEA_TOKEN_PATH = Path.home() / ".hermes" / "gitea_token_vps"
|
||||
GITEA_API_BASE = "https://forge.alexanderwhitestone.com/api/v1"
|
||||
WORKFORCE_STATE_PATH = Path.home() / ".hermes" / "workforce-state.json"
|
||||
ORG_NAME = "Timmy_Foundation"
|
||||
|
||||
# Role-to-agent-role mapping heuristics
|
||||
ROLE_KEYWORDS = {
|
||||
"code-generation": [
|
||||
"code", "implement", "feature", "function", "class", "script",
|
||||
"build", "create", "add", "module", "component",
|
||||
],
|
||||
"issue-triage": [
|
||||
"triage", "categorize", "tag", "label", "organize",
|
||||
"backlog", "sort", "prioritize", "review issue",
|
||||
],
|
||||
"on-request-queries": [
|
||||
"query", "search", "lookup", "find", "check",
|
||||
"info", "report", "status",
|
||||
],
|
||||
"devops": [
|
||||
"deploy", "ci", "cd", "pipeline", "docker", "container",
|
||||
"server", "infrastructure", "config", "nginx", "cron",
|
||||
"setup", "install", "environment", "provision",
|
||||
"build", "release", "workflow",
|
||||
],
|
||||
"documentation": [
|
||||
"doc", "readme", "document", "write", "guide",
|
||||
"spec", "wiki", "changelog", "tutorial",
|
||||
"explain", "describe",
|
||||
],
|
||||
"code-review": [
|
||||
"review", "refactor", "fix", "bug", "debug",
|
||||
"test", "lint", "style", "improve",
|
||||
"clean up", "optimize", "performance",
|
||||
],
|
||||
"triage-routing": [
|
||||
"route", "assign", "triage", "dispatch",
|
||||
"organize", "categorize",
|
||||
],
|
||||
"small-tasks": [
|
||||
"small", "quick", "minor", "typo", "label",
|
||||
"update", "rename", "cleanup",
|
||||
],
|
||||
"inactive": [],
|
||||
"unknown": [],
|
||||
}
|
||||
|
||||
# Priority keywords (higher = more urgent, route to more capable agent)
|
||||
PRIORITY_KEYWORDS = {
|
||||
"critical": 5,
|
||||
"urgent": 4,
|
||||
"block": 4,
|
||||
"bug": 3,
|
||||
"fix": 3,
|
||||
"security": 5,
|
||||
"deploy": 2,
|
||||
"feature": 1,
|
||||
"enhancement": 1,
|
||||
"documentation": 1,
|
||||
"cleanup": 0,
|
||||
}
|
||||
|
||||
# Cost tier priority (lower index = prefer first)
|
||||
TIER_ORDER = ["free", "cheap", "prepaid", "unknown"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data loading
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def load_json(path: Path) -> Any:
|
||||
if not path.exists():
|
||||
logging.warning("File not found: %s", path)
|
||||
return None
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def load_fleet_routing() -> List[dict]:
|
||||
data = load_json(FLEET_ROUTING_PATH)
|
||||
if data and "agents" in data:
|
||||
return data["agents"]
|
||||
return []
|
||||
|
||||
|
||||
def load_wolf_scores() -> Dict[str, dict]:
|
||||
"""Load Wolf evaluation scores from results directory."""
|
||||
scores: Dict[str, dict] = {}
|
||||
if not WOLF_RESULTS_DIR.exists():
|
||||
return scores
|
||||
for f in sorted(WOLF_RESULTS_DIR.glob("*.json")):
|
||||
data = load_json(f)
|
||||
if data and "model_scores" in data:
|
||||
for entry in data["model_scores"]:
|
||||
model = entry.get("model", "")
|
||||
if model:
|
||||
scores[model] = entry
|
||||
return scores
|
||||
|
||||
|
||||
def load_workforce_state() -> dict:
|
||||
if WORKFORCE_STATE_PATH.exists():
|
||||
return load_json(WORKFORCE_STATE_PATH) or {}
|
||||
return {"assignments": [], "agent_stats": {}, "last_run": None}
|
||||
|
||||
|
||||
def save_workforce_state(state: dict) -> None:
|
||||
WORKFORCE_STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(WORKFORCE_STATE_PATH, "w") as f:
|
||||
json.dump(state, f, indent=2)
|
||||
logging.info("Workforce state saved to %s", WORKFORCE_STATE_PATH)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gitea API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class GiteaAPI:
|
||||
"""Thin wrapper for Gitea REST API."""
|
||||
|
||||
def __init__(self, token: str, base_url: str = GITEA_API_BASE):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
"Authorization": f"token {token}",
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
})
|
||||
|
||||
def _get(self, path: str, params: Optional[dict] = None) -> Any:
|
||||
r = self.session.get(f"{self.base_url}{path}", params=params)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
def _post(self, path: str, data: dict) -> Any:
|
||||
r = self.session.post(f"{self.base_url}{path}", json=data)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
def _patch(self, path: str, data: dict) -> Any:
|
||||
r = self.session.patch(f"{self.base_url}{path}", json=data)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
def get_org_repos(self, org: str) -> List[dict]:
|
||||
return self._get(f"/orgs/{org}/repos", params={"limit": 100})
|
||||
|
||||
def get_open_issues(self, owner: str, repo: str, page: int = 1) -> List[dict]:
|
||||
params = {"state": "open", "type": "issues", "limit": 50, "page": page}
|
||||
return self._get(f"/repos/{owner}/{repo}/issues", params=params)
|
||||
|
||||
def get_all_open_issues(self, org: str) -> List[dict]:
|
||||
"""Fetch all open issues across all org repos."""
|
||||
repos = self.get_org_repos(org)
|
||||
all_issues = []
|
||||
for repo in repos:
|
||||
name = repo["name"]
|
||||
try:
|
||||
# Paginate through all issues
|
||||
page = 1
|
||||
while True:
|
||||
issues = self.get_open_issues(org, name, page=page)
|
||||
if not issues:
|
||||
break
|
||||
all_issues.extend(issues)
|
||||
if len(issues) < 50:
|
||||
break
|
||||
page += 1
|
||||
logging.info("Loaded %d open issues from %s/%s", len(all_issues), org, name)
|
||||
except Exception as exc:
|
||||
logging.warning("Failed to load issues from %s/%s: %s", org, name, exc)
|
||||
return all_issues
|
||||
|
||||
def add_issue_comment(self, owner: str, repo: str, issue_num: int, body: str) -> dict:
|
||||
return self._post(f"/repos/{owner}/{repo}/issues/{issue_num}/comments", {"body": body})
|
||||
|
||||
def add_issue_label(self, owner: str, repo: str, issue_num: int, label: str) -> dict:
|
||||
return self._post(
|
||||
f"/repos/{owner}/{repo}/issues/{issue_num}/labels",
|
||||
{"labels": [label]},
|
||||
)
|
||||
|
||||
def assign_issue(self, owner: str, repo: str, issue_num: int, assignee: str) -> dict:
|
||||
return self._patch(
|
||||
f"/repos/{owner}/{repo}/issues/{issue_num}",
|
||||
{"assignees": [assignee]},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scoring & Assignment Logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def classify_issue(issue: dict) -> str:
|
||||
"""Determine the best agent role for an issue based on title/body."""
|
||||
title = (issue.get("title", "") or "").lower()
|
||||
body = (issue.get("body", "") or "").lower()
|
||||
text = f"{title} {body}"
|
||||
labels = [l.get("name", "").lower() for l in issue.get("labels", [])]
|
||||
text += " " + " ".join(labels)
|
||||
|
||||
best_role = "small-tasks" # default
|
||||
best_score = 0
|
||||
|
||||
for role, keywords in ROLE_KEYWORDS.items():
|
||||
if not keywords:
|
||||
continue
|
||||
score = sum(2 for kw in keywords if kw in text)
|
||||
# Boost if a matching label exists
|
||||
for label in labels:
|
||||
if any(kw in label for kw in keywords):
|
||||
score += 3
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_role = role
|
||||
|
||||
return best_role
|
||||
|
||||
|
||||
def compute_priority(issue: dict) -> int:
|
||||
"""Compute issue priority from keywords."""
|
||||
title = (issue.get("title", "") or "").lower()
|
||||
body = (issue.get("body", "") or "").lower()
|
||||
text = f"{title} {body}"
|
||||
return sum(v for k, v in PRIORITY_KEYWORDS.items() if k in text)
|
||||
|
||||
|
||||
def score_agent_for_issue(agent: dict, role: str, wolf_scores: dict, priority: int) -> float:
|
||||
"""Score how well an agent matches an issue. Higher is better."""
|
||||
score = 0.0
|
||||
|
||||
# Primary: role match
|
||||
agent_role = agent.get("role", "unknown")
|
||||
if agent_role == role:
|
||||
score += 10.0
|
||||
elif agent_role == "small-tasks" and role in ("issue-triage", "on-request-queries"):
|
||||
score += 6.0
|
||||
elif agent_role == "triage-routing" and role in ("issue-triage", "triage-routing"):
|
||||
score += 8.0
|
||||
elif agent_role == "code-generation" and role in ("code-review", "devops"):
|
||||
score += 4.0
|
||||
|
||||
# Wolf quality bonus
|
||||
model = agent.get("model", "")
|
||||
wolf_entry = None
|
||||
for wm, ws in wolf_scores.items():
|
||||
if model and model.lower() in wm.lower():
|
||||
wolf_entry = ws
|
||||
break
|
||||
if wolf_entry and wolf_entry.get("success"):
|
||||
score += wolf_entry.get("total", 0) * 3.0
|
||||
|
||||
# Cost efficiency: prefer free/cheap for low priority
|
||||
tier = agent.get("tier", "unknown")
|
||||
tier_idx = TIER_ORDER.index(tier) if tier in TIER_ORDER else 3
|
||||
if priority <= 1 and tier in ("free", "cheap"):
|
||||
score += 4.0
|
||||
elif priority >= 3 and tier in ("prepaid",):
|
||||
score += 3.0
|
||||
else:
|
||||
score += (3 - tier_idx) * 1.0
|
||||
|
||||
# Activity bonus
|
||||
if agent.get("active", False):
|
||||
score += 2.0
|
||||
|
||||
# Repo familiarity bonus: more repos slightly better
|
||||
repo_count = agent.get("repo_count", 0)
|
||||
score += min(repo_count * 0.2, 2.0)
|
||||
|
||||
return round(score, 3)
|
||||
|
||||
|
||||
def find_best_agent(agents: List[dict], role: str, wolf_scores: dict, priority: int,
|
||||
exclude: Optional[List[str]] = None) -> Optional[dict]:
|
||||
"""Find the best agent for the given role and priority."""
|
||||
exclude = exclude or []
|
||||
candidates = []
|
||||
for agent in agents:
|
||||
if agent.get("name") in exclude:
|
||||
continue
|
||||
if not agent.get("active", False):
|
||||
continue
|
||||
s = score_agent_for_issue(agent, role, wolf_scores, priority)
|
||||
candidates.append((s, agent))
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
candidates.sort(key=lambda x: x[0], reverse=True)
|
||||
return candidates[0][1]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def dispatch_assignment(api: GiteaAPI, issue: dict, agent: dict, dry_run: bool = False) -> dict:
|
||||
"""Assign an issue to an agent and optionally post a comment."""
|
||||
owner = ORG_NAME
|
||||
repo = issue.get("repository", {}).get("name", "")
|
||||
|
||||
# Extract repo from issue repo_url if not in the repository key
|
||||
if not repo:
|
||||
repo_url = issue.get("repository_url", "")
|
||||
if repo_url:
|
||||
repo = repo_url.rstrip("/").split("/")[-1]
|
||||
|
||||
if not repo:
|
||||
return {"success": False, "error": "Cannot determine repository for issue"}
|
||||
|
||||
issue_num = issue.get("number")
|
||||
agent_name = agent.get("name", "unknown")
|
||||
|
||||
comment_body = (
|
||||
f"🤖 **Workforce Manager assigned this issue to: @{agent_name}**\n\n"
|
||||
f"- **Agent:** {agent_name}\n"
|
||||
f"- **Model:** {agent.get('model', 'unknown')}\n"
|
||||
f"- **Role:** {agent.get('role', 'unknown')}\n"
|
||||
f"- **Tier:** {agent.get('tier', 'unknown')}\n"
|
||||
f"- **Assigned at:** {datetime.now(timezone.utc).isoformat()}\n\n"
|
||||
f"*Automated assignment by Workforce Manager (Epic #204)*"
|
||||
)
|
||||
|
||||
if dry_run:
|
||||
return {
|
||||
"success": True,
|
||||
"dry_run": True,
|
||||
"repo": repo,
|
||||
"issue_number": issue_num,
|
||||
"assignee": agent_name,
|
||||
"comment": comment_body,
|
||||
}
|
||||
|
||||
try:
|
||||
api.assign_issue(owner, repo, issue_num, agent_name)
|
||||
api.add_issue_comment(owner, repo, issue_num, comment_body)
|
||||
return {
|
||||
"success": True,
|
||||
"repo": repo,
|
||||
"issue_number": issue_num,
|
||||
"issue_title": issue.get("title", ""),
|
||||
"assignee": agent_name,
|
||||
}
|
||||
except Exception as exc:
|
||||
return {
|
||||
"success": False,
|
||||
"repo": repo,
|
||||
"issue_number": issue_num,
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State Tracking
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def update_agent_stats(state: dict, result: dict) -> None:
|
||||
"""Update per-agent success tracking."""
|
||||
agent_name = result.get("assignee", "unknown")
|
||||
if "agent_stats" not in state:
|
||||
state["agent_stats"] = {}
|
||||
if agent_name not in state["agent_stats"]:
|
||||
state["agent_stats"][agent_name] = {
|
||||
"total_assigned": 0,
|
||||
"successful": 0,
|
||||
"failed": 0,
|
||||
"success_rate": 0.0,
|
||||
"last_assignment": None,
|
||||
"assigned_issues": [],
|
||||
}
|
||||
|
||||
stats = state["agent_stats"][agent_name]
|
||||
stats["total_assigned"] += 1
|
||||
stats["last_assignment"] = datetime.now(timezone.utc).isoformat()
|
||||
stats["assigned_issues"].append({
|
||||
"repo": result.get("repo", ""),
|
||||
"issue_number": result.get("issue_number"),
|
||||
"success": result.get("success", False),
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
})
|
||||
|
||||
if result.get("success"):
|
||||
stats["successful"] += 1
|
||||
else:
|
||||
stats["failed"] += 1
|
||||
|
||||
total = stats["successful"] + stats["failed"]
|
||||
stats["success_rate"] = round(stats["successful"] / total, 3) if total > 0 else 0.0
|
||||
|
||||
|
||||
def print_status(state: dict, agents: List[dict], issues_count: int) -> None:
|
||||
"""Print workforce status."""
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f"Workforce Manager Status - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
# Fleet summary
|
||||
active = [a for a in agents if a.get("active")]
|
||||
print(f"\nFleet: {len(active)} active agents, {len(agents)} total")
|
||||
tier_counts = {}
|
||||
for a in active:
|
||||
t = a.get("tier", "unknown")
|
||||
tier_counts[t] = tier_counts.get(t, 0) + 1
|
||||
for t, c in sorted(tier_counts.items()):
|
||||
print(f" {t}: {c} agents")
|
||||
|
||||
# Agent scores
|
||||
wolf = load_wolf_scores()
|
||||
print(f"\nAgent Details:")
|
||||
print(f" {'Name':<25} {'Model':<30} {'Role':<18} {'Tier':<10}")
|
||||
for a in agents:
|
||||
if not a.get("active"):
|
||||
continue
|
||||
stats = state.get("agent_stats", {}).get(a["name"], {})
|
||||
rate = stats.get("success_rate", 0.0)
|
||||
total = stats.get("total_assigned", 0)
|
||||
wolf_badge = ""
|
||||
for wm, ws in wolf.items():
|
||||
if a["model"] and a["model"].lower() in wm.lower() and ws.get("success"):
|
||||
wolf_badge = f"[wolf:{ws['total']}]"
|
||||
break
|
||||
name_str = f"{a['name']} {wolf_badge}"
|
||||
if total > 0:
|
||||
name_str += f" (s/r: {rate}, n={total})"
|
||||
print(f" {name_str:<45} {a.get('role', 'unknown'):<18} {a.get('tier', '?'):<10}")
|
||||
|
||||
print(f"\nOpen Issues: {issues_count}")
|
||||
print(f"Assignments Made: {len(state.get('assignments', []))}")
|
||||
if state.get("last_run"):
|
||||
print(f"Last Run: {state['last_run']}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Workforce Manager - Assign Gitea issues to AI agents")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show assignments without dispatching")
|
||||
parser.add_argument("--status", action="store_true", help="Show workforce status only")
|
||||
parser.add_argument("--cron", action="store_true", help="Run silently for cron scheduling")
|
||||
parser.add_argument("--label", type=str, help="Only process issues with this label")
|
||||
parser.add_argument("--max-issues", type=int, default=100, help="Max issues to process per run")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging
|
||||
if args.cron:
|
||||
logging.basicConfig(level=logging.WARNING, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
|
||||
logging.info("Workforce Manager starting")
|
||||
|
||||
# Load data
|
||||
agents = load_fleet_routing()
|
||||
if not agents:
|
||||
logging.error("No agents found in fleet-routing.json")
|
||||
return 1
|
||||
logging.info("Loaded %d agents from fleet routing", len(agents))
|
||||
|
||||
wolf_scores = load_wolf_scores()
|
||||
if wolf_scores:
|
||||
logging.info("Loaded %d model scores from Wolf results", len(wolf_scores))
|
||||
|
||||
state = load_workforce_state()
|
||||
|
||||
# Load Gitea token
|
||||
if GITEA_TOKEN_PATH.exists():
|
||||
token = GITEA_TOKEN_PATH.read_text().strip()
|
||||
else:
|
||||
logging.error("Gitea token not found at %s", GITEA_TOKEN_PATH)
|
||||
return 1
|
||||
|
||||
api = GiteaAPI(token)
|
||||
|
||||
# Status-only mode
|
||||
if args.status:
|
||||
# Quick open issue count
|
||||
repos = api.get_org_repos(ORG_NAME)
|
||||
total = sum(r.get("open_issues_count", 0) for r in repos)
|
||||
print_status(state, agents, total)
|
||||
return 0
|
||||
|
||||
# Fetch open issues
|
||||
if not args.cron:
|
||||
print(f"Scanning open issues across {ORG_NAME} repos...")
|
||||
|
||||
issues = api.get_all_open_issues(ORG_NAME)
|
||||
|
||||
# Filter by label
|
||||
if args.label:
|
||||
issues = [
|
||||
i for i in issues
|
||||
if any(args.label in (l.get("name", "") or "").lower() for l in i.get("labels", []))
|
||||
]
|
||||
|
||||
if args.label:
|
||||
logging.info("Filtered to %d issues with label '%s'", len(issues), args.label)
|
||||
else:
|
||||
logging.info("Found %d open issues", len(issues))
|
||||
|
||||
# Skip issues already assigned
|
||||
already_assigned_nums = set()
|
||||
for a in state.get("assignments", []):
|
||||
already_assigned_nums.add((a.get("repo"), a.get("issue_number")))
|
||||
|
||||
issues = [
|
||||
i for i in issues
|
||||
if not i.get("assignee") and
|
||||
not (i.get("repository", {}).get("name"), i.get("number")) in already_assigned_nums
|
||||
]
|
||||
logging.info("%d unassigned issues to process", len(issues))
|
||||
|
||||
# Sort by priority
|
||||
issues_with_priority = [(compute_priority(i), i) for i in issues]
|
||||
issues_with_priority.sort(key=lambda x: x[0], reverse=True)
|
||||
issues = [i for _, i in issues_with_priority[:args.max_issues]]
|
||||
|
||||
# Assign issues
|
||||
assignments = []
|
||||
agent_exclusions: Dict[str, List[str]] = {} # repo -> list of assigned agents per run
|
||||
global_exclusions: List[str] = [] # agents already at capacity per run
|
||||
max_per_agent_per_run = 5
|
||||
|
||||
for issue in issues:
|
||||
role = classify_issue(issue)
|
||||
priority = compute_priority(issue)
|
||||
repo = issue.get("repository", {}).get("name", "")
|
||||
|
||||
# Avoid assigning same agent twice to same repo in one run
|
||||
repo_excluded = agent_exclusions.get(repo, [])
|
||||
|
||||
# Also exclude agents already at assignment cap
|
||||
cap_excluded = [
|
||||
name for name, stats in state.get("agent_stats", {}).items()
|
||||
if stats.get("total_assigned", 0) > max_per_agent_per_run
|
||||
]
|
||||
|
||||
excluded = list(set(repo_excluded + global_exclusions + cap_excluded))
|
||||
|
||||
agent = find_best_agent(agents, role, wolf_scores, priority, exclude=excluded)
|
||||
if not agent:
|
||||
# Relax exclusions if no agent found
|
||||
agent = find_best_agent(agents, role, wolf_scores, priority, exclude=[])
|
||||
if not agent:
|
||||
logging.warning("No suitable agent for issue #%d: %s (role=%s)",
|
||||
issue.get("number"), issue.get("title", ""), role)
|
||||
continue
|
||||
|
||||
result = dispatch_assignment(api, issue, agent, dry_run=args.dry_run)
|
||||
assignments.append(result)
|
||||
update_agent_stats(state, result)
|
||||
|
||||
# Track per-repo exclusions
|
||||
if repo not in agent_exclusions:
|
||||
agent_exclusions[repo] = []
|
||||
agent_exclusions[repo].append(agent["name"])
|
||||
|
||||
if args.dry_run:
|
||||
print(f" [DRY] #{issue['number']}: {issue.get('title','')[:60]} → @{agent['name']} ({role}, p={priority})")
|
||||
else:
|
||||
status_str = "OK" if result.get("success") else "FAIL"
|
||||
print(f" [{status_str}] #{issue['number']}: {issue.get('title','')[:60]} → @{agent['name']} ({role}, p={priority})")
|
||||
|
||||
# Save state
|
||||
state["assignments"].extend([{
|
||||
"repo": a.get("repo"),
|
||||
"issue_number": a.get("issue_number"),
|
||||
"assignee": a.get("assignee"),
|
||||
"success": a.get("success", False),
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
} for a in assignments])
|
||||
state["last_run"] = datetime.now(timezone.utc).isoformat()
|
||||
save_workforce_state(state)
|
||||
|
||||
# Summary
|
||||
ok = sum(1 for a in assignments if a.get("success"))
|
||||
fail = len(assignments) - ok
|
||||
logging.info("Done: %d assigned, %d succeeded, %d failed", len(assignments), ok, fail)
|
||||
|
||||
if not args.cron:
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f"Summary: {len(assignments)} assignments, {ok} OK, {fail} failed")
|
||||
# Show agent stats
|
||||
for name, stats in state.get("agent_stats", {}).items():
|
||||
if stats.get("total_assigned", 0) > 0:
|
||||
print(f" @{name}: {stats['successful']}/{stats['total_assigned']} ({stats.get('success_rate', 0):.0%} success)")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user