From 2009ac75b28922da81789b2b6c1775ccb88044fa Mon Sep 17 00:00:00 2001 From: Ezra Date: Sun, 5 Apr 2026 19:02:41 +0000 Subject: [PATCH] feat(matrix): scaffold validator + Hermes client spec - Add validate-scaffold.py: automated acceptance proof for #183 - Add HERMES_MATRIX_CLIENT_SPEC.md: end-to-end agent integration spec for #166 Refs #183, #166 --- .../HERMES_MATRIX_CLIENT_SPEC.md | 363 ++++++++++++++++++ infra/matrix/deploy-matrix.sh | 0 infra/matrix/host-readiness-check.sh | 0 infra/matrix/scripts/deploy-conduit.sh | 0 infra/matrix/scripts/validate-scaffold.py | 236 ++++++++++++ 5 files changed, 599 insertions(+) create mode 100644 docs/matrix-fleet-comms/HERMES_MATRIX_CLIENT_SPEC.md mode change 100644 => 100755 infra/matrix/deploy-matrix.sh mode change 100644 => 100755 infra/matrix/host-readiness-check.sh mode change 100644 => 100755 infra/matrix/scripts/deploy-conduit.sh create mode 100755 infra/matrix/scripts/validate-scaffold.py diff --git a/docs/matrix-fleet-comms/HERMES_MATRIX_CLIENT_SPEC.md b/docs/matrix-fleet-comms/HERMES_MATRIX_CLIENT_SPEC.md new file mode 100644 index 00000000..2f4b9273 --- /dev/null +++ b/docs/matrix-fleet-comms/HERMES_MATRIX_CLIENT_SPEC.md @@ -0,0 +1,363 @@ +# Hermes Matrix Client Integration Specification + +> **Issue**: [#166](http://143.198.27.163:3000/Timmy_Foundation/timmy-config/issues/166) — Stand up Matrix/Conduit +> **Created**: Ezra | 2026-04-05 | Burn mode +> **Purpose**: Define how Hermes wizard houses connect to, listen on, and respond within the sovereign Matrix fleet. This turns the #183 server scaffold into an end-to-end communications architecture. + +--- + +## 1. Scope + +This document specifies: +- The client library and runtime pattern for Hermes-to-Matrix integration +- Bot identity model (one account per wizard house vs. shared fleet bot) +- Message format, encryption requirements, and room membership rules +- Minimal working code scaffold for connection, listening, and reply +- Error handling, reconnection, and security hardening + +**Out of scope**: Server deployment (see `infra/matrix/`), room creation (see `scripts/bootstrap-fleet-rooms.py`), Telegram cutover (see `CUTOVER_PLAN.md`). + +--- + +## 2. Library Choice: `matrix-nio` + +**Selected library**: [`matrix-nio`](https://matrix-nio.readthedocs.io/) + +**Why `matrix-nio`:** +- Native async/await (fits Hermes agent loop) +- Full end-to-end encryption (E2EE) support via `AsyncClient` +- Small dependency footprint compared to Synapse client SDK +- Battle-tested in production bots (e.g., maubot, heisenbridge) + +**Installation**: +```bash +pip install matrix-nio[e2e] +``` + +--- + +## 3. Bot Identity Model + +### 3.1 Recommendation: One Bot Per Wizard House + +Each wizard house (Ezra, Allegro, Gemini, Bezalel, etc.) maintains its own Matrix user account. This mirrors the existing Telegram identity model and preserves sovereignty. + +**Pattern**: +- `@ezra:matrix.timmytime.net` +- `@allegro:matrix.timmytime.net` +- `@gemini:matrix.timmytime.net` + +### 3.2 Alternative: Shared Fleet Bot + +A single `@fleet:matrix.timmytime.net` bot proxies messages for all agents. **Not recommended** — creates a single point of failure and complicates attribution. + +### 3.3 Account Provisioning + +Each account is created via the Conduit admin API during room bootstrap (see `bootstrap-fleet-rooms.py`). Credentials are stored in the wizard house's local `.env` (`MATRIX_USER`, `MATRIX_PASSWORD`, `MATRIX_HOMESERVER`). + +--- + +## 4. Minimal Working Example + +The following scaffold demonstrates: +1. Logging in with password +2. Joining the fleet operator room +3. Listening for encrypted text messages +4. Replying with a simple acknowledgment +5. Graceful logout on SIGINT + +```python +#!/usr/bin/env python3 +"""hermes_matrix_client.py — Minimal Hermes Matrix Client Scaffold""" + +import asyncio +import os +import signal +from pathlib import Path + +from nio import ( + AsyncClient, + LoginResponse, + SyncResponse, + RoomMessageText, + InviteEvent, + MatrixRoom, +) + +# ------------------------------------------------------------------ +# Configuration (read from environment or local .env) +# ------------------------------------------------------------------ +HOMESERVER = os.getenv("MATRIX_HOMESERVER", "https://matrix.timmytime.net") +USER_ID = os.getenv("MATRIX_USER", "@ezra:matrix.timmytime.net") +PASSWORD = os.getenv("MATRIX_PASSWORD", "") +DEVICE_ID = os.getenv("MATRIX_DEVICE_ID", "HERMES_001") +OPERATOR_ROOM_ALIAS = "#operator-room:matrix.timmytime.net" + +# Persistent store for encryption state +cache_dir = Path.home() / ".cache" / "hermes-matrix" +cache_dir.mkdir(parents=True, exist_ok=True) +store_path = cache_dir / f"{USER_ID.split(':')[0].replace('@', '')}_store" + + +class HermesMatrixClient: + def __init__(self): + self.client = AsyncClient( + homeserver=HOMESERVER, + user=USER_ID, + device_id=DEVICE_ID, + store_path=str(store_path), + ) + self.shutdown_event = asyncio.Event() + + async def login(self): + resp = await self.client.login(PASSWORD) + if isinstance(resp, LoginResponse): + print(f"✅ Logged in as {resp.user_id} (device: {resp.device_id})") + else: + print(f"❌ Login failed: {resp}") + raise RuntimeError("Matrix login failed") + + async def join_operator_room(self): + """Join the canonical operator room by alias.""" + res = await self.client.join_room(OPERATOR_ROOM_ALIAS) + if hasattr(res, "room_id"): + print(f"✅ Joined operator room: {res.room_id}") + return res.room_id + else: + print(f"⚠️ Could not join operator room: {res}") + return None + + async def on_message(self, room: MatrixRoom, event: RoomMessageText): + """Handle incoming text messages.""" + if event.sender == self.client.user_id: + return # Ignore echo of our own messages + + print(f"📩 {room.display_name} | {event.sender}: {event.body}") + + # Simple command parsing + if event.body.startswith("!ping"): + await self.client.room_send( + room_id=room.room_id, + message_type="m.room.message", + content={ + "msgtype": "m.text", + "body": f"Pong from {USER_ID}!", + }, + ) + elif event.body.startswith("!sitrep"): + await self.client.room_send( + room_id=room.room_id, + message_type="m.room.message", + content={ + "msgtype": "m.text", + "body": "🔥 Burn mode active. All systems nominal.", + }, + ) + + async def on_invite(self, room: MatrixRoom, event: InviteEvent): + """Auto-join rooms when invited.""" + print(f"📨 Invite to {room.room_id} from {event.sender}") + await self.client.join(room.room_id) + + async def sync_loop(self): + """Long-polling sync loop with automatic retry.""" + self.client.add_event_callback(self.on_message, RoomMessageText) + self.client.add_event_callback(self.on_invite, InviteEvent) + + while not self.shutdown_event.is_set(): + try: + sync_resp = await self.client.sync(timeout=30000) + if isinstance(sync_resp, SyncResponse): + pass # Callbacks handled by nio + except Exception as exc: + print(f"⚠️ Sync error: {exc}. Retrying in 5s...") + await asyncio.sleep(5) + + async def run(self): + await self.login() + await self.join_operator_room() + await self.sync_loop() + + async def close(self): + await self.client.close() + print("👋 Matrix client closed.") + + +async def main(): + bot = HermesMatrixClient() + + loop = asyncio.get_event_loop() + for sig in (signal.SIGINT, signal.SIGTERM): + loop.add_signal_handler(sig, bot.shutdown_event.set) + + try: + await bot.run() + finally: + await bot.close() + + +if __name__ == "__main__": + asyncio.run(main()) +``` + +--- + +## 5. Message Format & Protocol + +### 5.1 Plain-Text Commands + +For human-to-fleet interaction, messages use a lightweight command prefix: + +| Command | Target | Purpose | +|---------|--------|---------| +| `!ping` | Any wizard | Liveness check | +| `!sitrep` | Any wizard | Request status report | +| `!help` | Any wizard | List available commands | +| `!exec ` | Specific wizard | Route a task request (future) | +| `!burn ` | Any wizard | Priority task escalation | + +### 5.2 Structured JSON Payloads (Agent-to-Agent) + +For machine-to-machine coordination, agents may send `m.text` messages with a JSON block inside triple backticks: + +```json +{ + "hermes_msg_type": "task_request", + "from": "@ezra:matrix.timmytime.net", + "to": "@gemini:matrix.timmytime.net", + "task_id": "the-nexus#830", + "action": "evaluate_tts_output", + "deadline": "2026-04-06T06:00:00Z" +} +``` + +--- + +## 6. End-to-End Encryption (E2EE) + +### 6.1 Requirement + +All fleet operator rooms **must** have encryption enabled (`m.room.encryption` event). The `matrix-nio` client automatically handles key sharing and device verification when `store_path` is provided. + +### 6.2 Device Verification Strategy + +**Recommended**: "Trust on First Use" (TOFU) within the fleet. + +```python +async def trust_fleet_devices(self): + """Auto-verify all devices of known fleet users.""" + fleet_users = ["@ezra:matrix.timmytime.net", "@allegro:matrix.timmytime.net"] + for user_id in fleet_users: + devices = await self.client.devices(user_id) + for device_id in devices.get(user_id, {}): + await self.client.verify_device(user_id, device_id) +``` + +**Caution**: Do not auto-verify external users (e.g., Alexander's personal Element client). Those should be verified manually via emoji comparison. + +--- + +## 7. Fleet Room Membership + +### 7.1 Canonical Rooms + +| Room Alias | Purpose | Members | +|------------|---------|---------| +| `#operator-room:matrix.timmytime.net` | Human-to-fleet command surface | Alexander + all wizards | +| `#wizard-hall:matrix.timmytime.net` | Agent-to-agent coordination | All wizards only | +| `#burn-pit:matrix.timmytime.net` | High-priority escalations | On-call wizard + Alexander | + +### 7.2 Auto-Join Policy + +Every Hermes client **must** auto-join invites to `#operator-room` and `#wizard-hall`. Burns to `#burn-pit` are opt-in based on on-call schedule. + +--- + +## 8. Error Handling & Reconnection + +### 8.1 Network Partitions + +If sync fails with a 5xx or connection error, the client must: +1. Log the error +2. Wait 5s (with exponential backoff up to 60s) +3. Retry sync indefinitely + +### 8.2 Token Expiration + +Conduit access tokens do not expire by default. If a `M_UNKNOWN_TOKEN` occurs, the client must re-login using `MATRIX_PASSWORD` and update the stored access token. + +### 8.3 Fatal Errors + +If login fails 3 times consecutively, the client should exit with a non-zero status and surface an alert to the operator room (if possible via a fallback mechanism). + +--- + +## 9. Integration with Hermes Agent Loop + +The Matrix client is **not** a replacement for the Hermes agent core. It is an additional I/O surface. + +**Recommended integration pattern**: + +``` +┌─────────────────┐ +│ Hermes Agent │ +│ (run_agent) │ +└────────┬────────┘ + │ tool calls, reasoning + ▼ +┌─────────────────┐ +│ Matrix Gateway │ ← new: wraps hermes_matrix_client.py +│ (message I/O) │ +└────────┬────────┘ + │ Matrix HTTP APIs + ▼ +┌─────────────────┐ +│ Conduit Server │ +└─────────────────┘ +``` + +A `MatrixGateway` class (future work) would: +1. Run the `matrix-nio` client in a background asyncio task +2. Convert incoming Matrix commands into `AIAgent.chat()` calls +3. Post the agent's text response back to the room +4. Support the existing Hermes toolset (todo, memory, delegate) via the same agent loop + +--- + +## 10. Security Hardening Checklist + +Before any wizard house connects to the production Conduit server: + +- [ ] `MATRIX_PASSWORD` is a 32+ character random string +- [ ] The client `store_path` is on an encrypted volume (`~/.cache/hermes-matrix/`) +- [ ] E2EE is enabled in the operator room +- [ ] Only fleet devices are auto-verified +- [ ] The client rejects invites from non-fleet homeservers +- [ ] Logs do not include message bodies at `INFO` level +- [ ] A separate device ID is used per wizard house deployment + +--- + +## 11. Acceptance Criteria Mapping + +Maps #166 acceptance criteria to this specification: + +| #166 Criterion | Addressed By | +|----------------|--------------| +| Deploy Conduit homeserver | `infra/matrix/` (#183) | +| Create fleet rooms/channels | `bootstrap-fleet-rooms.py` | +| Verify encrypted operator-to-fleet messaging | Section 6 (E2EE) + MWE | +| Alexander can message the fleet over Matrix | Sections 4 (MWE), 5 (commands), 7 (rooms) | +| Telegram is no longer the only command surface | `CUTOVER_PLAN.md` + this spec | + +--- + +## 12. Next Steps + +1. **Gemini / Allegro**: Implement `MatrixGateway` class in `gateway/platforms/matrix.py` using this spec. +2. **Bezalel / Ezra**: Test the MWE against the staging Conduit instance once #187 resolves. +3. **Alexander**: Approve the command prefix vocabulary (`!ping`, `!sitrep`, `!burn`, etc.). + +--- + +*This document is repo truth. If the Matrix client implementation diverges from this spec, update the spec first.* diff --git a/infra/matrix/deploy-matrix.sh b/infra/matrix/deploy-matrix.sh old mode 100644 new mode 100755 diff --git a/infra/matrix/host-readiness-check.sh b/infra/matrix/host-readiness-check.sh old mode 100644 new mode 100755 diff --git a/infra/matrix/scripts/deploy-conduit.sh b/infra/matrix/scripts/deploy-conduit.sh old mode 100644 new mode 100755 diff --git a/infra/matrix/scripts/validate-scaffold.py b/infra/matrix/scripts/validate-scaffold.py new file mode 100755 index 00000000..91768925 --- /dev/null +++ b/infra/matrix/scripts/validate-scaffold.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +"""Matrix/Conduit Scaffold Validator — Issue #183 Acceptance Proof + +Validates that infra/matrix/ contains a complete, well-formed deployment scaffold. +Run this after any scaffold change to ensure #183 acceptance criteria remain met. + +Usage: + python3 infra/matrix/scripts/validate-scaffold.py + python3 infra/matrix/scripts/validate-scaffold.py --json + +Exit codes: + 0 = all checks passed + 1 = one or more checks failed +""" + +import argparse +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +try: + import yaml + HAS_YAML = True +except ImportError: + HAS_YAML = False + + +class Validator: + def __init__(self, base_dir: Path): + self.base_dir = base_dir.resolve() + self.checks = [] + self.passed = 0 + self.failed = 0 + + def _add(self, name: str, status: bool, detail: str): + self.checks.append({"name": name, "status": "PASS" if status else "FAIL", "detail": detail}) + if status: + self.passed += 1 + else: + self.failed += 1 + + def require_files(self): + """Check that all required scaffold files exist.""" + required = [ + "README.md", + "prerequisites.md", + "docker-compose.yml", + "conduit.toml", + ".env.example", + "deploy-matrix.sh", + "host-readiness-check.sh", + "caddy/Caddyfile", + "scripts/deploy-conduit.sh", + "docs/RUNBOOK.md", + ] + missing = [] + for rel in required: + path = self.base_dir / rel + if not path.exists(): + missing.append(rel) + self._add( + "Required files present", + len(missing) == 0, + f"Missing: {missing}" if missing else f"All {len(required)} files found", + ) + + def docker_compose_valid(self): + """Validate docker-compose.yml is syntactically valid YAML.""" + path = self.base_dir / "docker-compose.yml" + if not path.exists(): + self._add("docker-compose.yml valid YAML", False, "File does not exist") + return + try: + with open(path, "r") as f: + content = f.read() + if HAS_YAML: + yaml.safe_load(content) + else: + # Basic YAML brace balance check + if content.count("{") != content.count("}"): + raise ValueError("Brace mismatch") + # Must reference conduit image or build + has_conduit = "conduit" in content.lower() + self._add( + "docker-compose.yml valid YAML", + has_conduit, + "Valid YAML and references Conduit" if has_conduit else "Valid YAML but missing Conduit reference", + ) + except Exception as e: + self._add("docker-compose.yml valid YAML", False, str(e)) + + def conduit_toml_valid(self): + """Validate conduit.toml has required sections.""" + path = self.base_dir / "conduit.toml" + if not path.exists(): + self._add("conduit.toml required keys", False, "File does not exist") + return + with open(path, "r") as f: + content = f.read() + required_keys = ["server_name", "port", "[database]"] + missing = [k for k in required_keys if k not in content] + self._add( + "conduit.toml required keys", + len(missing) == 0, + f"Missing keys: {missing}" if missing else "Required keys present", + ) + + def env_example_complete(self): + """Validate .env.example has required variables.""" + path = self.base_dir / ".env.example" + if not path.exists(): + self._add(".env.example required variables", False, "File does not exist") + return + with open(path, "r") as f: + content = f.read() + required_vars = ["MATRIX_DOMAIN", "ADMIN_USER", "ADMIN_PASSWORD"] + missing = [v for v in required_vars if v not in content] + self._add( + ".env.example required variables", + len(missing) == 0, + f"Missing vars: {missing}" if missing else "Required variables present", + ) + + def shell_scripts_executable(self): + """Check that shell scripts are executable and pass bash -n.""" + scripts = [ + self.base_dir / "deploy-matrix.sh", + self.base_dir / "host-readiness-check.sh", + self.base_dir / "scripts" / "deploy-conduit.sh", + ] + errors = [] + for script in scripts: + if not script.exists(): + errors.append(f"{script.name}: missing") + continue + if not os.access(script, os.X_OK): + errors.append(f"{script.name}: not executable") + result = subprocess.run(["bash", "-n", str(script)], capture_output=True, text=True) + if result.returncode != 0: + errors.append(f"{script.name}: syntax error — {result.stderr.strip()}") + self._add( + "Shell scripts executable & valid", + len(errors) == 0, + "; ".join(errors) if errors else f"All {len(scripts)} scripts OK", + ) + + def caddyfile_well_formed(self): + """Check Caddyfile has expected tokens.""" + path = self.base_dir / "caddy" / "Caddyfile" + if not path.exists(): + self._add("Caddyfile well-formed", False, "File does not exist") + return + with open(path, "r") as f: + content = f.read() + has_reverse_proxy = "reverse_proxy" in content + has_tls = "tls" in content.lower() or "acme" in content.lower() or "auto" in content.lower() + has_well_known = ".well-known" in content or "matrix" in content.lower() + ok = has_reverse_proxy and has_well_known + detail = [] + if not has_reverse_proxy: + detail.append("missing reverse_proxy directive") + if not has_well_known: + detail.append("missing .well-known/matrix routing") + self._add( + "Caddyfile well-formed", + ok, + "Well-formed" if ok else f"Issues: {', '.join(detail)}", + ) + + def runbook_links_valid(self): + """Check docs/RUNBOOK.md has links to #166 and #183.""" + path = self.base_dir / "docs" / "RUNBOOK.md" + if not path.exists(): + self._add("RUNBOOK.md issue links", False, "File does not exist") + return + with open(path, "r") as f: + content = f.read() + has_166 = "#166" in content or "166" in content + has_183 = "#183" in content or "183" in content + ok = has_166 and has_183 + self._add( + "RUNBOOK.md issue links", + ok, + "Links to #166 and #183" if ok else "Missing issue continuity links", + ) + + def run_all(self): + self.require_files() + self.docker_compose_valid() + self.conduit_toml_valid() + self.env_example_complete() + self.shell_scripts_executable() + self.caddyfile_well_formed() + self.runbook_links_valid() + + def report(self, json_mode: bool = False): + if json_mode: + print(json.dumps({ + "base_dir": str(self.base_dir), + "passed": self.passed, + "failed": self.failed, + "checks": self.checks, + }, indent=2)) + else: + print(f"Matrix/Conduit Scaffold Validator") + print(f"Base: {self.base_dir}") + print(f"Checks: {self.passed} passed, {self.failed} failed\n") + for c in self.checks: + icon = "✅" if c["status"] == "PASS" else "❌" + print(f"{icon} {c['name']:<40} {c['detail']}") + print(f"\n{'SUCCESS' if self.failed == 0 else 'FAILURE'} — {self.passed}/{self.passed+self.failed} checks passed") + + +def main(): + parser = argparse.ArgumentParser(description="Validate Matrix/Conduit deployment scaffold") + parser.add_argument("--json", action="store_true", help="Output JSON report") + parser.add_argument("--base", default="infra/matrix", help="Path to scaffold directory") + args = parser.parse_args() + + base = Path(args.base) + if not base.exists(): + # Try relative to script location + script_dir = Path(__file__).resolve().parent + base = script_dir.parent + + validator = Validator(base) + validator.run_all() + validator.report(json_mode=args.json) + sys.exit(0 if validator.failed == 0 else 1) + + +if __name__ == "__main__": + main()