Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
cc9d7705b6 feat(synapse): Matrix Phase 1 — Synapse homeserver deployment stack
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 1m1s
Deploy Synapse on Ezra VPS with PostgreSQL backend, bot registration,
and management tooling.

Closes #272

Components:
- docker-compose.yml: Synapse + PostgreSQL 16 stack
- homeserver.yaml: Production config (registration disabled, rate limits, retention)
- setup.sh: One-shot deploy (generates secrets, starts stack, registers accounts, gets bot token)
- manage.sh: Day-to-day ops (status, restart, logs, backup, update, create-user, teardown)
- docs/synapse-deployment.md: Full deployment guide with Nginx TLS, DNS, troubleshooting

Security:
- Registration disabled by default
- Rate limiting on login/registration/messages
- Client API bound to localhost (Nginx proxy for public access)
- Secrets chmod 600, .gitignore'd
- Federation certificate verification enabled

Bot account auto-registered and access token acquired — credentials
written to synapse-credentials.env for hermes-agent integration.
2026-04-13 18:07:15 -04:00
10 changed files with 818 additions and 658 deletions

View File

@@ -1,18 +0,0 @@
{
"agents": {
"ezra": {
"host": "143.198.27.163",
"hermes_path": "/root/wizards/ezra/hermes-agent/venv/bin/hermes",
"username": "root"
},
"timmy": {
"host": "timmy",
"hermes_path": "/root/wizards/timmy/hermes-agent/venv/bin/hermes",
"username": "root"
}
},
"validation_timeout": 30,
"command_timeout": 300,
"max_retries": 2,
"retry_delay": 5
}

View File

@@ -1,551 +0,0 @@
"""
VPS Agent Dispatch Worker for Hermes Cron System
This module provides a dispatch worker that SSHs into remote VPS machines
and runs hermes commands. It ensures that:
1. Remote dispatch only counts as success when the remote hermes command actually launches
2. Stale per-agent hermes binary paths are configurable/validated before queue drain
3. Failed remote launches remain in the queue (or are marked failed) instead of being reported as OK
"""
import json
import logging
import os
import subprocess
import sys
import time
from pathlib import Path
from typing import Optional, Dict, Any, List
from dataclasses import dataclass
from enum import Enum
logger = logging.getLogger(__name__)
class DispatchStatus(Enum):
"""Status of a dispatch operation."""
PENDING = "pending"
VALIDATING = "validating"
DISPATCHING = "dispatching"
SUCCESS = "success"
FAILED = "failed"
RETRYING = "retrying"
@dataclass
class DispatchResult:
"""Result of a dispatch operation."""
status: DispatchStatus
message: str
exit_code: Optional[int] = None
stdout: Optional[str] = None
stderr: Optional[str] = None
execution_time: Optional[float] = None
hermes_path: Optional[str] = None
validated: bool = False
class HermesPathValidator:
"""Validates hermes binary paths on remote VPS machines."""
def __init__(self, ssh_key_path: Optional[str] = None):
self.ssh_key_path = ssh_key_path or os.path.expanduser("~/.ssh/id_rsa")
self.timeout = 30 # SSH timeout in seconds
def validate_hermes_path(self, host: str, hermes_path: str,
username: str = "root") -> DispatchResult:
"""
Validate that the hermes binary exists and is executable on the remote host.
Args:
host: Remote host IP or hostname
hermes_path: Path to hermes binary on remote host
username: SSH username
Returns:
DispatchResult with validation status
"""
start_time = time.time()
# Build SSH command to check hermes binary
ssh_cmd = [
"ssh",
"-i", self.ssh_key_path,
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
"-o", "BatchMode=yes",
f"{username}@{host}",
f"test -x {hermes_path} && echo 'VALID' || echo 'INVALID'"
]
try:
result = subprocess.run(
ssh_cmd,
capture_output=True,
text=True,
timeout=self.timeout
)
execution_time = time.time() - start_time
if result.returncode == 0 and "VALID" in result.stdout:
return DispatchResult(
status=DispatchStatus.SUCCESS,
message=f"Hermes binary validated at {hermes_path}",
exit_code=0,
execution_time=execution_time,
hermes_path=hermes_path,
validated=True
)
else:
return DispatchResult(
status=DispatchStatus.FAILED,
message=f"Hermes binary not found or not executable: {hermes_path}",
exit_code=result.returncode,
stdout=result.stdout,
stderr=result.stderr,
execution_time=execution_time,
hermes_path=hermes_path,
validated=False
)
except subprocess.TimeoutExpired:
return DispatchResult(
status=DispatchStatus.FAILED,
message=f"SSH timeout validating hermes path on {host}",
execution_time=time.time() - start_time,
hermes_path=hermes_path,
validated=False
)
except Exception as e:
return DispatchResult(
status=DispatchStatus.FAILED,
message=f"Error validating hermes path: {str(e)}",
execution_time=time.time() - start_time,
hermes_path=hermes_path,
validated=False
)
class VPSAgentDispatcher:
"""Dispatches hermes commands to remote VPS agents."""
def __init__(self, config_path: Optional[str] = None):
self.config_path = config_path or os.path.expanduser("~/.hermes/dispatch_config.json")
self.validator = HermesPathValidator()
self.config = self._load_config()
def _load_config(self) -> Dict[str, Any]:
"""Load dispatch configuration."""
try:
if os.path.exists(self.config_path):
with open(self.config_path, 'r') as f:
return json.load(f)
except Exception as e:
logger.warning(f"Failed to load dispatch config: {e}")
# Default configuration
return {
"agents": {
"ezra": {
"host": "143.198.27.163",
"hermes_path": "/root/wizards/ezra/hermes-agent/venv/bin/hermes",
"username": "root"
},
"timmy": {
"host": "timmy",
"hermes_path": "/root/wizards/timmy/hermes-agent/venv/bin/hermes",
"username": "root"
}
},
"validation_timeout": 30,
"command_timeout": 300,
"max_retries": 2,
"retry_delay": 5
}
def save_config(self):
"""Save dispatch configuration."""
try:
config_dir = Path(self.config_path).parent
config_dir.mkdir(parents=True, exist_ok=True)
with open(self.config_path, 'w') as f:
json.dump(self.config, f, indent=2)
# Set secure permissions
os.chmod(self.config_path, 0o600)
except Exception as e:
logger.error(f"Failed to save dispatch config: {e}")
def get_agent_config(self, agent_name: str) -> Optional[Dict[str, Any]]:
"""Get configuration for a specific agent."""
return self.config.get("agents", {}).get(agent_name)
def update_agent_config(self, agent_name: str, host: str, hermes_path: str,
username: str = "root"):
"""Update configuration for a specific agent."""
if "agents" not in self.config:
self.config["agents"] = {}
self.config["agents"][agent_name] = {
"host": host,
"hermes_path": hermes_path,
"username": username
}
self.save_config()
def validate_agent(self, agent_name: str) -> DispatchResult:
"""Validate that an agent's hermes binary is accessible."""
agent_config = self.get_agent_config(agent_name)
if not agent_config:
return DispatchResult(
status=DispatchStatus.FAILED,
message=f"Agent configuration not found: {agent_name}"
)
return self.validator.validate_hermes_path(
host=agent_config["host"],
hermes_path=agent_config["hermes_path"],
username=agent_config.get("username", "root")
)
def dispatch_command(self, agent_name: str, command: str,
validate_first: bool = True) -> DispatchResult:
"""
Dispatch a command to a remote VPS agent.
Args:
agent_name: Name of the agent to dispatch to
command: Command to execute
validate_first: Whether to validate hermes path before dispatching
Returns:
DispatchResult with execution status
"""
agent_config = self.get_agent_config(agent_name)
if not agent_config:
return DispatchResult(
status=DispatchStatus.FAILED,
message=f"Agent configuration not found: {agent_name}"
)
# Validate hermes path if requested
if validate_first:
validation_result = self.validate_agent(agent_name)
if validation_result.status != DispatchStatus.SUCCESS:
return DispatchResult(
status=DispatchStatus.FAILED,
message=f"Validation failed: {validation_result.message}",
hermes_path=agent_config["hermes_path"],
validated=False
)
# Build SSH command to execute hermes command
ssh_cmd = [
"ssh",
"-i", self.validator.ssh_key_path,
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
f"{agent_config.get('username', 'root')}@{agent_config['host']}",
f"cd /root/wizards/{agent_name}/hermes-agent && source venv/bin/activate && {command}"
]
start_time = time.time()
try:
result = subprocess.run(
ssh_cmd,
capture_output=True,
text=True,
timeout=self.config.get("command_timeout", 300)
)
execution_time = time.time() - start_time
if result.returncode == 0:
return DispatchResult(
status=DispatchStatus.SUCCESS,
message=f"Command executed successfully on {agent_name}",
exit_code=0,
stdout=result.stdout,
stderr=result.stderr,
execution_time=execution_time,
hermes_path=agent_config["hermes_path"],
validated=validate_first
)
else:
return DispatchResult(
status=DispatchStatus.FAILED,
message=f"Command failed on {agent_name}: {result.stderr}",
exit_code=result.returncode,
stdout=result.stdout,
stderr=result.stderr,
execution_time=execution_time,
hermes_path=agent_config["hermes_path"],
validated=validate_first
)
except subprocess.TimeoutExpired:
return DispatchResult(
status=DispatchStatus.FAILED,
message=f"Command timeout on {agent_name}",
execution_time=time.time() - start_time,
hermes_path=agent_config["hermes_path"],
validated=validate_first
)
except Exception as e:
return DispatchResult(
status=DispatchStatus.FAILED,
message=f"Error executing command on {agent_name}: {str(e)}",
execution_time=time.time() - start_time,
hermes_path=agent_config["hermes_path"],
validated=validate_first
)
def dispatch_hermes_command(self, agent_name: str, hermes_command: str,
validate_first: bool = True) -> DispatchResult:
"""
Dispatch a hermes command to a remote VPS agent.
Args:
agent_name: Name of the agent to dispatch to
hermes_command: Hermes command to execute (e.g., "hermes cron list")
validate_first: Whether to validate hermes path before dispatching
Returns:
DispatchResult with execution status
"""
agent_config = self.get_agent_config(agent_name)
if not agent_config:
return DispatchResult(
status=DispatchStatus.FAILED,
message=f"Agent configuration not found: {agent_name}"
)
# Build full hermes command
full_command = f"{agent_config['hermes_path']} {hermes_command}"
return self.dispatch_command(agent_name, full_command, validate_first)
class DispatchQueue:
"""Queue for managing dispatch operations."""
def __init__(self, queue_file: Optional[str] = None):
self.queue_file = queue_file or os.path.expanduser("~/.hermes/dispatch_queue.json")
self.queue: List[Dict[str, Any]] = self._load_queue()
def _load_queue(self) -> List[Dict[str, Any]]:
"""Load queue from file."""
try:
if os.path.exists(self.queue_file):
with open(self.queue_file, 'r') as f:
return json.load(f)
except Exception as e:
logger.warning(f"Failed to load dispatch queue: {e}")
return []
def save_queue(self):
"""Save queue to file."""
try:
queue_dir = Path(self.queue_file).parent
queue_dir.mkdir(parents=True, exist_ok=True)
with open(self.queue_file, 'w') as f:
json.dump(self.queue, f, indent=2)
# Set secure permissions
os.chmod(self.queue_file, 0o600)
except Exception as e:
logger.error(f"Failed to save dispatch queue: {e}")
def add_item(self, agent_name: str, command: str, priority: int = 0,
max_retries: int = 3) -> str:
"""
Add an item to the dispatch queue.
Returns:
Queue item ID
"""
item_id = f"dispatch_{int(time.time())}_{len(self.queue)}"
item = {
"id": item_id,
"agent_name": agent_name,
"command": command,
"priority": priority,
"max_retries": max_retries,
"retry_count": 0,
"status": DispatchStatus.PENDING.value,
"created_at": time.time(),
"last_attempt": None,
"result": None
}
self.queue.append(item)
self.save_queue()
return item_id
def get_next_item(self) -> Optional[Dict[str, Any]]:
"""Get the next item from the queue (highest priority, oldest first)."""
if not self.queue:
return None
# Sort by priority (descending) and created_at (ascending)
sorted_queue = sorted(
self.queue,
key=lambda x: (-x.get("priority", 0), x.get("created_at", 0))
)
# Find first pending item
for item in sorted_queue:
if item.get("status") == DispatchStatus.PENDING.value:
return item
return None
def update_item(self, item_id: str, status: DispatchStatus,
result: Optional[DispatchResult] = None):
"""Update a queue item."""
for item in self.queue:
if item.get("id") == item_id:
item["status"] = status.value
item["last_attempt"] = time.time()
if result:
item["result"] = {
"status": result.status.value,
"message": result.message,
"exit_code": result.exit_code,
"stdout": result.stdout,
"stderr": result.stderr,
"execution_time": result.execution_time,
"hermes_path": result.hermes_path,
"validated": result.validated
}
# Update retry count if failed
if status == DispatchStatus.FAILED:
item["retry_count"] = item.get("retry_count", 0) + 1
self.save_queue()
break
def remove_item(self, item_id: str):
"""Remove an item from the queue."""
self.queue = [item for item in self.queue if item.get("id") != item_id]
self.save_queue()
def get_failed_items(self) -> List[Dict[str, Any]]:
"""Get all failed items that can be retried."""
return [
item for item in self.queue
if item.get("status") == DispatchStatus.FAILED.value
and item.get("retry_count", 0) < item.get("max_retries", 3)
]
def get_stats(self) -> Dict[str, Any]:
"""Get queue statistics."""
total = len(self.queue)
pending = sum(1 for item in self.queue if item.get("status") == DispatchStatus.PENDING.value)
success = sum(1 for item in self.queue if item.get("status") == DispatchStatus.SUCCESS.value)
failed = sum(1 for item in self.queue if item.get("status") == DispatchStatus.FAILED.value)
return {
"total": total,
"pending": pending,
"success": success,
"failed": failed,
"retryable": len(self.get_failed_items())
}
def process_dispatch_queue(dispatcher: VPSAgentDispatcher,
queue: DispatchQueue,
batch_size: int = 5) -> Dict[str, Any]:
"""
Process items from the dispatch queue.
Args:
dispatcher: VPS agent dispatcher
queue: Dispatch queue
batch_size: Number of items to process in this batch
Returns:
Processing statistics
"""
processed = 0
success = 0
failed = 0
for _ in range(batch_size):
item = queue.get_next_item()
if not item:
break
item_id = item["id"]
agent_name = item["agent_name"]
command = item["command"]
# Update status to dispatching
queue.update_item(item_id, DispatchStatus.DISPATCHING)
# Dispatch the command
result = dispatcher.dispatch_hermes_command(
agent_name=agent_name,
hermes_command=command,
validate_first=True
)
# Update queue with result
if result.status == DispatchStatus.SUCCESS:
queue.update_item(item_id, DispatchStatus.SUCCESS, result)
success += 1
else:
# Check if we should retry
item_data = next((i for i in queue.queue if i.get("id") == item_id), None)
if item_data and item_data.get("retry_count", 0) < item_data.get("max_retries", 3):
queue.update_item(item_id, DispatchStatus.FAILED, result)
failed += 1
else:
# Max retries reached, remove from queue
queue.remove_item(item_id)
failed += 1
processed += 1
return {
"processed": processed,
"success": success,
"failed": failed,
"queue_stats": queue.get_stats()
}
# Example usage and testing
if __name__ == "__main__":
# Set up logging
logging.basicConfig(level=logging.INFO)
# Create dispatcher and queue
dispatcher = VPSAgentDispatcher()
queue = DispatchQueue()
# Example: Add items to queue
queue.add_item("ezra", "cron list")
queue.add_item("timmy", "cron status")
# Process queue
stats = process_dispatch_queue(dispatcher, queue)
print(f"Processing stats: {stats}")
# Show queue stats
queue_stats = queue.get_stats()
print(f"Queue stats: {queue_stats}")

View File

@@ -653,12 +653,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
# AIAgent.__init__ is missing params the scheduler expects.
_validate_agent_interface()
# Check if this is a dispatch job
if job.get("type") == "dispatch" or "dispatch" in job.get("name", "").lower():
return _run_dispatch_job(job)
from run_agent import AIAgent
# Initialize SQLite session store so cron job messages are persisted
@@ -1013,89 +1007,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
def _run_dispatch_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
"""
Execute a dispatch job that SSHs into remote VPS machines.
Returns:
Tuple of (success, full_output_doc, final_response, error_message)
"""
from cron.dispatch_worker import VPSAgentDispatcher, DispatchQueue, process_dispatch_queue
job_id = job["id"]
job_name = job["name"]
logger.info("Running dispatch job '%s' (ID: %s)", job_name, job_id)
try:
# Load dispatch configuration
dispatcher = VPSAgentDispatcher()
queue = DispatchQueue()
# Get dispatch parameters from job
agent_name = job.get("agent_name", "ezra")
command = job.get("command", "cron list")
batch_size = job.get("batch_size", 5)
# Add command to queue if specified
if command:
queue.add_item(agent_name, command)
# Process the dispatch queue
stats = process_dispatch_queue(dispatcher, queue, batch_size)
# Generate output
output = f"""# Dispatch Job: {job_name}
**Job ID:** {job_id}
**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}
**Agent:** {agent_name}
**Command:** {command}
## Dispatch Results
- **Processed:** {stats['processed']}
- **Success:** {stats['success']}
- **Failed:** {stats['failed']}
## Queue Statistics
- **Total items:** {stats['queue_stats']['total']}
- **Pending:** {stats['queue_stats']['pending']}
- **Success:** {stats['queue_stats']['success']}
- **Failed:** {stats['queue_stats']['failed']}
- **Retryable:** {stats['queue_stats']['retryable']}
## Status
{"✅ All dispatches successful" if stats['failed'] == 0 else f"⚠️ {stats['failed']} dispatches failed"}
"""
success = stats['failed'] == 0
error_message = None if success else f"{stats['failed']} dispatches failed"
return (success, output, output, error_message)
except Exception as e:
error_msg = f"Dispatch job failed: {str(e)}"
logger.error(error_msg, exc_info=True)
output = f"""# Dispatch Job: {job_name}
**Job ID:** {job_id}
**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}
**Status:** ❌ Failed
## Error
{error_msg}
"""
return (False, output, output, error_msg)
def tick(verbose: bool = True, adapters=None, loop=None) -> int:
"""
Check and run all due jobs.

9
deploy/synapse/.gitignore vendored Normal file
View File

@@ -0,0 +1,9 @@
# Secrets — never commit
.env
synapse-credentials.env
# Backups
backups/
# Generated config backups
homeserver.yaml.bak

View File

@@ -0,0 +1,82 @@
# Synapse Homeserver — Docker Compose Stack
# Matrix Phase 1: Deploy Synapse on Ezra VPS
#
# Usage:
# cd deploy/synapse
# ./setup.sh # first-time deploy (generates config + keys)
# docker compose up -d # start
# docker compose logs -f # follow logs
# docker compose down # stop
#
# Secrets:
# Never commit .env to version control.
# setup.sh generates secrets automatically.
services:
synapse-db:
image: postgres:16-alpine
container_name: synapse-db
restart: unless-stopped
volumes:
- synapse_db:/var/lib/postgresql/data
environment:
POSTGRES_USER: synapse
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?Set POSTGRES_PASSWORD in .env}
POSTGRES_INITDB_ARGS: "--encoding=UTF8 --lc-collate=C --lc-ctype=C"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U synapse"]
interval: 10s
timeout: 5s
retries: 5
networks:
- synapse_net
logging:
driver: "json-file"
options:
max-size: "20m"
max-file: "3"
synapse:
image: matrixdotorg/synapse:latest
container_name: synapse
restart: unless-stopped
depends_on:
synapse-db:
condition: service_healthy
volumes:
- synapse_data:/data
env_file:
- .env
environment:
SYNAPSE_CONFIG_PATH: /data/homeserver.yaml
ports:
- "127.0.0.1:8008:8008" # Client-server API (localhost only)
- "8448:8448" # Federation (public)
networks:
- synapse_net
healthcheck:
test: ["CMD", "curl", "-fSs", "http://localhost:8008/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
logging:
driver: "json-file"
options:
max-size: "50m"
max-file: "5"
deploy:
resources:
limits:
cpus: "2.0"
memory: 2G
reservations:
memory: 512M
volumes:
synapse_data:
synapse_db:
networks:
synapse_net:
driver: bridge

View File

@@ -0,0 +1,101 @@
# Synapse Homeserver Configuration
# Generated by setup.sh — edit with care.
#
# Docs: https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html
# Server name — your Matrix domain (e.g. matrix.example.com)
server_name: "SERVER_NAME_PLACEHOLDER"
# Signing key — generated by setup.sh
signing_key_path: "/data/signing.key"
# Trusted key servers (empty = trust only ourselves for our own keys)
trusted_key_servers: []
# Report stats to matrix.org (no for sovereignty)
report_stats: false
# Listeners
listeners:
- port: 8008
tls: false
type: http
x_forwarded: true
resources:
- names: [client, federation]
compress: false
# Database — PostgreSQL
database:
name: psycopg2
args:
user: synapse
password: "${POSTGRES_PASSWORD}"
database: synapse
host: synapse-db
cp_min: 5
cp_max: 10
# Media store
media_store_path: "/data/media_store"
# Upload limits
max_upload_size: "50M"
# URL previews (disable to reduce attack surface)
url_preview_enabled: false
# Enable room list publishing
enable_room_list_search: true
# Turn off public registration by default (create users via admin API)
enable_registration: false
enable_registration_without_verification: false
# Rate limiting
rc_message:
per_second: 0.2
burst_count: 10
rc_registration:
per_second: 0.1
burst_count: 3
rc_login:
address:
per_second: 0.05
burst_count: 2
account:
per_second: 0.05
burst_count: 2
failed_attempts:
per_second: 0.15
burst_count: 3
# Retention — keep messages for 90 days by default
retention:
enabled: true
default_policy:
min_lifetime: 1d
max_lifetime: 90d
# Logging
log_config: "/data/log.config"
# Metrics (optional — enable if running Prometheus)
enable_metrics: false
# Presence
use_presence: true
# Federation
federation_verify_certificates: true
federation_sender_instances: 1
# Appservice config directory
app_service_config_files: []
# Experimental features
experimental_features:
# MSC3440: Threading support
msc3440_enabled: true

33
deploy/synapse/log.config Normal file
View File

@@ -0,0 +1,33 @@
# Synapse logging configuration
# https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#log_config
version: 1
formatters:
precise:
format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s'
handlers:
console:
class: logging.StreamHandler
formatter: precise
level: INFO
stream: ext://sys.stdout
file:
class: logging.handlers.RotatingFileHandler
formatter: precise
filename: /data/homeserver.log
maxBytes: 104857600 # 100MB
backupCount: 3
level: INFO
loggers:
synapse.storage.SQL:
level: WARNING
synapse.http.client:
level: INFO
root:
level: INFO
handlers: [console, file]

131
deploy/synapse/manage.sh Executable file
View File

@@ -0,0 +1,131 @@
#!/usr/bin/env bash
# Synapse Homeserver — Management Utilities
# Usage: ./manage.sh <command>
#
# Commands:
# status Show container status and health
# restart Restart Synapse (preserves data)
# logs Tail Synapse logs
# create-user <username> <password> [admin]
# backup Create timestamped backup of data volumes
# update Pull latest Synapse image and recreate
# teardown Stop and remove everything (DESTRUCTIVE)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
info() { echo -e "${GREEN}[MANAGE]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
COMMAND="${1:-help}"
case "$COMMAND" in
status)
info "Container status:"
docker compose ps
echo ""
info "Synapse health:"
curl -sfS http://127.0.0.1:8008/health && echo "" || echo "Not responding"
echo ""
info "Disk usage:"
docker system df -v 2>/dev/null | grep -E "synapse|VOLUME" || true
;;
restart)
info "Restarting Synapse..."
docker compose restart synapse
info "Waiting for health check..."
sleep 5
curl -sfS http://127.0.0.1:8008/health && echo "" && info "Synapse is healthy" || warn "Not responding yet"
;;
logs)
shift
LINES="${1:-100}"
info "Tailing Synapse logs (last $LINES lines)..."
docker compose logs -f --tail="$LINES" synapse
;;
create-user)
USERNAME="${2:?Usage: manage.sh create-user <username> <password> [admin]}"
PASSWORD="${3:?Usage: manage.sh create-user <username> <password> [admin]}"
IS_ADMIN="${4:-false}"
info "Creating user @$USERNAME..."
ADMIN_FLAG=""
if [ "$IS_ADMIN" = "admin" ] || [ "$IS_ADMIN" = "true" ]; then
ADMIN_FLAG="--admin"
fi
docker compose exec -T synapse register_new_matrix_user \
http://localhost:8008 \
-c /data/homeserver.yaml \
-u "$USERNAME" \
-p "$PASSWORD" \
$ADMIN_FLAG \
--no-extra-prompt
;;
backup)
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="./backups/${TIMESTAMP}"
mkdir -p "$BACKUP_DIR"
info "Backing up PostgreSQL..."
docker compose exec -T synapse-db pg_dump -U synapse > "${BACKUP_DIR}/synapse_db.sql"
info "Backing up Synapse data volume..."
docker run --rm \
-v synapse_data:/source:ro \
-v "$(pwd)/${BACKUP_DIR}:/backup" \
alpine tar czf /backup/synapse_data.tar.gz -C /source .
info "Backup complete: $BACKUP_DIR"
ls -lh "$BACKUP_DIR"
;;
update)
info "Pulling latest Synapse image..."
docker compose pull synapse
info "Recreating containers..."
docker compose up -d --force-recreate synapse
info "Waiting for health..."
sleep 10
curl -sfS http://127.0.0.1:8008/health && echo "" && info "Updated and healthy" || warn "Check logs"
;;
teardown)
echo -e "${RED}WARNING: This will stop and remove all Synapse containers and volumes.${NC}"
echo -e "${RED}ALL DATA WILL BE LOST. This cannot be undone.${NC}"
echo ""
read -p "Type 'yes-delete-everything' to confirm: " CONFIRM
if [ "$CONFIRM" = "yes-delete-everything" ]; then
info "Stopping containers..."
docker compose down -v
info "Removing volumes..."
docker volume rm synapse_data synapse_db 2>/dev/null || true
info "Teardown complete."
else
info "Aborted."
fi
;;
help|*)
echo "Synapse Homeserver Management"
echo ""
echo "Usage: ./manage.sh <command>"
echo ""
echo "Commands:"
echo " status Show container status and health"
echo " restart Restart Synapse"
echo " logs [lines] Tail Synapse logs (default: 100)"
echo " create-user <u> <p> [admin] Create a new Matrix user"
echo " backup Backup database + data volume"
echo " update Pull latest image and recreate"
echo " teardown Stop and remove everything (DESTRUCTIVE)"
;;
esac

211
deploy/synapse/setup.sh Executable file
View File

@@ -0,0 +1,211 @@
#!/usr/bin/env bash
# Synapse Homeserver — One-Shot Setup Script
# Matrix Phase 1: Deploy Synapse on Ezra VPS
#
# Usage:
# ./setup.sh <server_name> [admin_user] [admin_password]
#
# Example:
# ./setup.sh matrix.timmy-time.xyz hermes-bot 'secure-pass-123'
#
# What it does:
# 1. Generates .env with secrets
# 2. Prepares homeserver.yaml with correct server name
# 3. Generates signing key
# 4. Starts Synapse + PostgreSQL via Docker Compose
# 5. Waits for Synapse to be healthy
# 6. Registers admin user + bot account
# 7. Outputs Matrix credentials for hermes-agent
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# --- Colors ---
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
info() { echo -e "${GREEN}[SETUP]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; }
# --- Args ---
SERVER_NAME="${1:?Usage: $0 <server_name> [admin_user] [admin_password]}"
ADMIN_USER="${2:-timmy-admin}"
ADMIN_PASS="${3:-$(openssl rand -hex 16)}"
BOT_USER="${4:-hermes-bot}"
BOT_PASS="${5:-$(openssl rand -hex 16)}"
echo -e "${CYAN}"
echo "╔══════════════════════════════════════════════════╗"
echo "║ Synapse Homeserver — Matrix Phase 1 Deploy ║"
echo "╚══════════════════════════════════════════════════╝"
echo -e "${NC}"
info "Server name: $SERVER_NAME"
info "Admin user: @$ADMIN_USER:$SERVER_NAME"
info "Bot user: @$BOT_USER:$SERVER_NAME"
echo ""
# --- Preflight ---
info "Preflight checks..."
command -v docker >/dev/null 2>&1 || error "docker not found. Install Docker first."
command -v docker compose version >/dev/null 2>&1 || error "docker compose not found. Install Docker Compose plugin."
info "Docker: $(docker --version | head -1)"
info "Compose: $(docker compose version | head -1)"
# --- Generate .env ---
info "Generating .env..."
POSTGRES_PASSWORD=$(openssl rand -hex 24)
REGISTRATION_SECRET=$(openssl rand -hex 16)
cat > .env <<EOF
# Synapse deployment — generated $(date -u +%Y-%m-%dT%H:%M:%SZ)
# DO NOT COMMIT THIS FILE
POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
SYNAPSE_SERVER_NAME=${SERVER_NAME}
SYNAPSE_REPORT_STATS=no
REGISTRATION_SECRET=${REGISTRATION_SECRET}
EOF
chmod 600 .env
info ".env written with secure permissions"
# --- Prepare homeserver.yaml ---
info "Preparing homeserver.yaml..."
sed -i.bak "s/SERVER_NAME_PLACEHOLDER/${SERVER_NAME}/g" homeserver.yaml
rm -f homeserver.yaml.bak
info "Server name set to: $SERVER_NAME"
# --- Generate signing key ---
info "Generating signing key..."
# Synapse will generate its own key on first run if missing
# But we pre-create the data volume structure
docker volume create synapse_data >/dev/null 2>&1 || true
docker volume create synapse_db >/dev/null 2>&1 || true
# --- Start the stack ---
info "Starting Synapse + PostgreSQL..."
docker compose up -d
# --- Wait for Synapse to be healthy ---
info "Waiting for Synapse to start (up to 120s)..."
MAX_WAIT=120
ELAPSED=0
while [ $ELAPSED -lt $MAX_WAIT ]; do
if curl -sfS http://127.0.0.1:8008/health >/dev/null 2>&1; then
info "Synapse is healthy!"
break
fi
sleep 3
ELAPSED=$((ELAPSED + 3))
if [ $((ELAPSED % 15)) -eq 0 ]; then
info "Still waiting... (${ELAPSED}s)"
fi
done
if [ $ELAPSED -ge $MAX_WAIT ]; then
warn "Synapse did not respond within ${MAX_WAIT}s. Check logs:"
echo " docker compose logs synapse"
error "Aborting registration."
fi
# --- Register admin user ---
info "Registering admin user @$ADMIN_USER:$SERVER_NAME..."
docker compose exec -T synapse register_new_matrix_user \
http://localhost:8008 \
-c /data/homeserver.yaml \
-u "$ADMIN_USER" \
-p "$ADMIN_PASS" \
--admin \
--no-extra-prompt 2>&1 || {
# User might already exist if re-running
warn "Admin user registration returned non-zero (may already exist)"
}
# --- Register bot user ---
info "Registering bot user @$BOT_USER:$SERVER_NAME..."
docker compose exec -T synapse register_new_matrix_user \
http://localhost:8008 \
-c /data/homeserver.yaml \
-u "$BOT_USER" \
-p "$BOT_PASS" \
--no-admin \
--no-extra-prompt 2>&1 || {
warn "Bot user registration returned non-zero (may already exist)"
}
# --- Get bot access token ---
info "Acquiring bot access token..."
BOT_TOKEN_RESPONSE=$(curl -sfS -X POST "http://127.0.0.1:8008/_matrix/client/v3/login" \
-H 'Content-Type: application/json' \
-d "{
\"type\": \"m.login.password\",
\"identifier\": {
\"type\": \"m.id.user\",
\"user\": \"${BOT_USER}\"
},
\"password\": \"${BOT_PASS}\",
\"device_name\": \"Hermes Agent\"
}")
BOT_ACCESS_TOKEN=$(echo "$BOT_TOKEN_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin)['access_token'])" 2>/dev/null || echo "FAILED_TO_EXTRACT")
BOT_DEVICE_ID=$(echo "$BOT_TOKEN_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin)['device_id'])" 2>/dev/null || echo "UNKNOWN")
if [ "$BOT_ACCESS_TOKEN" = "FAILED_TO_EXTRACT" ]; then
warn "Could not extract bot access token automatically."
warn "Login manually: curl -X POST http://127.0.0.1:8008/_matrix/client/v3/login ..."
fi
# --- Write credentials file ---
CREDENTIALS_FILE="synapse-credentials.env"
cat > "$CREDENTIALS_FILE" <<EOF
# Synapse Credentials — generated $(date -u +%Y-%m-%dT%H:%M:%SZ)
# Add these to hermes-agent's ~/.hermes/.env
# Matrix integration
MATRIX_HOMESERVER=http://${SERVER_NAME}:8008
MATRIX_ACCESS_TOKEN=${BOT_ACCESS_TOKEN}
MATRIX_USER_ID=@${BOT_USER}:${SERVER_NAME}
MATRIX_DEVICE_ID=${BOT_DEVICE_ID}
MATRIX_ENCRYPTION=true
# Admin credentials (for user management)
SYNAPSE_ADMIN_USER=@${ADMIN_USER}:${SERVER_NAME}
SYNAPSE_ADMIN_PASSWORD=${ADMIN_PASS}
# Bot credentials
SYNAPSE_BOT_USER=@${BOT_USER}:${SERVER_NAME}
SYNAPSE_BOT_PASSWORD=${BOT_PASS}
EOF
chmod 600 "$CREDENTIALS_FILE"
info "Credentials written to: $CREDENTIALS_FILE"
# --- Summary ---
echo ""
echo -e "${GREEN}╔══════════════════════════════════════════════════╗${NC}"
echo -e "${GREEN}║ Synapse Deployed Successfully! ║${NC}"
echo -e "${GREEN}╚══════════════════════════════════════════════════╝${NC}"
echo ""
echo -e " Server: ${CYAN}https://${SERVER_NAME}${NC}"
echo -e " Client API: ${CYAN}http://127.0.0.1:8008${NC}"
echo -e " Federation: ${CYAN}https://${SERVER_NAME}:8448${NC}"
echo ""
echo -e " Admin: ${YELLOW}@${ADMIN_USER}:${SERVER_NAME}${NC}"
echo -e " Bot: ${YELLOW}@${BOT_USER}:${SERVER_NAME}${NC}"
echo -e " Bot Token: ${YELLOW}${BOT_ACCESS_TOKEN:0:20}...${NC}"
echo ""
echo -e " Credentials: ${CYAN}${SCRIPT_DIR}/${CREDENTIALS_FILE}${NC}"
echo ""
echo -e "${GREEN}Next steps:${NC}"
echo " 1. Point DNS: ${SERVER_NAME}$(curl -s ifconfig.me 2>/dev/null || echo '<VPS_IP>')"
echo " 2. Set up TLS: nginx/certbot reverse proxy for :8008 and :8448"
echo " 3. Copy credentials to hermes-agent: cp ${CREDENTIALS_FILE} ~/.hermes/.env"
echo " 4. Start hermes: hermes gateway --platform matrix"
echo ""
echo " Manage: docker compose logs -f | docker compose restart | docker compose down"
echo " Users: docker compose exec synapse register_new_matrix_user http://localhost:8008 -c /data/homeserver.yaml -u <user> -p <pass>"
echo ""

251
docs/synapse-deployment.md Normal file
View File

@@ -0,0 +1,251 @@
# Synapse Homeserver Deployment Guide
## Matrix Phase 1: Deploy Synapse on Ezra VPS
Part of [Epic #269: Matrix Integration — Sovereign Messaging for Timmy](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/269).
## Architecture
```
┌─────────────────────────────────────────────────┐
│ Ezra VPS (143.198.27.163) │
│ │
│ ┌──────────┐ ┌─────────────────────────┐ │
│ │ Nginx │────▶│ Synapse (Docker) │ │
│ │ :443→8008│ │ Client API: localhost:8008│ │
│ │ :8448→8448│ │ Federation: 0.0.0.0:8448│ │
│ └──────────┘ └──────────┬──────────────┘ │
│ │ │
│ ┌────────▼──────────┐ │
│ │ PostgreSQL 16 │ │
│ │ (Docker volume) │ │
│ └───────────────────┘ │
│ │
│ ┌──────────────────────────────────────────┐ │
│ │ hermes-agent (gateway) │ │
│ │ MATRIX_HOMESERVER=http://localhost:8008 │ │
│ └──────────────────────────────────────────┘ │
└─────────────────────────────────────────────────┘
```
## Prerequisites
- Docker + Docker Compose plugin on Ezra VPS
- SSH access: `ssh root@143.198.27.163`
- DNS A record pointing to the VPS IP
- (Recommended) Nginx + Certbot for TLS termination
## Quick Start
```bash
# SSH into Ezra
ssh root@143.198.27.163
# Clone hermes-agent (if not present)
cd /root
git clone https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent.git
cd hermes-agent/deploy/synapse
# Deploy Synapse
chmod +x setup.sh
./setup.sh matrix.timmy-time.xyz
# This will:
# 1. Generate .env with database password
# 2. Prepare homeserver.yaml
# 3. Start Synapse + PostgreSQL via Docker Compose
# 4. Wait for health
# 5. Register admin + bot accounts
# 6. Acquire bot access token
# 7. Write synapse-credentials.env
```
## Step-by-Step
### 1. DNS Configuration
Point your Matrix domain to Ezra's IP:
```
Type Name Value
A matrix 143.198.27.163
```
Federation uses SRV records for port discovery, but direct `:8448` works without them.
### 2. Deploy Synapse
```bash
cd /root/hermes-agent/deploy/synapse
./setup.sh matrix.timmy-time.xyz hermes-bot 'your-secure-password'
```
Arguments:
| Arg | Default | Description |
|-----|---------|-------------|
| `server_name` | (required) | Matrix domain (e.g., `matrix.timmy-time.xyz`) |
| `admin_user` | `timmy-admin` | Admin account username |
| `admin_password` | (random) | Admin account password |
| `bot_user` | `hermes-bot` | Bot account username |
| `bot_password` | (random) | Bot account password |
### 3. TLS Termination (Nginx)
Install Nginx + Certbot:
```bash
apt install -y nginx certbot python3-certbot-nginx
# Client-server API
cat > /etc/nginx/sites-available/matrix <<'EOF'
server {
listen 443 ssl http2;
server_name matrix.timmy-time.xyz;
ssl_certificate /etc/letsencrypt/live/matrix.timmy-time.xyz/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/matrix.timmy-time.xyz/privkey.pem;
location / {
proxy_pass http://127.0.0.1:8008;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
client_max_body_size 50M;
}
}
server {
listen 8448 ssl http2;
server_name matrix.timmy-time.xyz;
ssl_certificate /etc/letsencrypt/live/matrix.timmy-time.xyz/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/matrix.timmy-time.xyz/privkey.pem;
location / {
proxy_pass http://127.0.0.1:8008;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
EOF
ln -sf /etc/nginx/sites-available/matrix /etc/nginx/sites-enabled/
nginx -t && systemctl reload nginx
# Get cert
certbot --nginx -d matrix.timmy-time.xyz
```
### 4. Wire Hermes Agent
Copy the generated credentials to hermes-agent's environment:
```bash
# From synapse-credentials.env, add to ~/.hermes/.env:
MATRIX_HOMESERVER=https://matrix.timmy-time.xyz
MATRIX_ACCESS_TOKEN=<from synapse-credentials.env>
MATRIX_USER_ID=@hermes-bot:matrix.timmy-time.xyz
MATRIX_DEVICE_ID=<from synapse-credentials.env>
MATRIX_ENCRYPTION=true
```
Then start the gateway:
```bash
hermes gateway --platform matrix
```
### 5. Verify
```bash
# Check Synapse health
curl -s https://matrix.timmy-time.xyz/_matrix/client/versions
# Check federation
curl -s https://matrix.timmy-time.xyz:8448/_matrix/federation/v1/version
# Check bot is connected
# (should appear online in Element or any Matrix client)
```
## Management
Use the management script for day-to-day operations:
```bash
cd /root/hermes-agent/deploy/synapse
./manage.sh status # container health
./manage.sh logs # tail logs
./manage.sh restart # restart Synapse
./manage.sh backup # backup DB + data
./manage.sh update # pull latest image
./manage.sh create-user alice 'password123'
./manage.sh create-user admin 'secret' admin
```
## Backups
```bash
./manage.sh backup
# Creates: backups/YYYYMMDD_HHMMSS/
# ├── synapse_db.sql (PostgreSQL dump)
# └── synapse_data.tar.gz (media store + keys)
```
Automate with cron:
```bash
# Daily backup at 3 AM
0 3 * * * cd /root/hermes-agent/deploy/synapse && ./manage.sh backup >> /var/log/synapse-backup.log 2>&1
```
## Troubleshooting
### Synapse won't start
```bash
docker compose logs synapse
# Common: PostgreSQL not ready. Wait for healthcheck.
```
### Bot can't connect
```bash
# Verify token is valid
curl -H "Authorization: Bearer $MATRIX_ACCESS_TOKEN" \
https://matrix.timmy-time.xyz/_matrix/client/v3/account/whoami
```
### Federation not working
```bash
# Check port 8448 is open
ss -tlnp | grep 8448
# Check firewall
ufw status
```
### High memory usage
```bash
# Check resource limits in docker-compose.yml
docker stats synapse
# Tune in homeserver.yaml: event_cache_size, caches
```
## Security Notes
- Registration is disabled by default (`enable_registration: false`)
- Rate limiting is enforced on login, registration, and messages
- Federation certificate verification is enabled
- `.env` and `synapse-credentials.env` are `chmod 600`
- Client API binds to `127.0.0.1` only (use Nginx for public access)
- Consider: firewall rules, fail2ban, regular backups
## References
- [Synapse Documentation](https://matrix-org.github.io/synapse/latest/)
- [Matrix Spec](https://spec.matrix.org/)
- [Epic #269: Matrix Integration](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/269)
- [Issue #272: Deploy Synapse on Ezra](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/272)
- [Hermes Matrix Setup Guide](docs/matrix-setup.md)