Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
f8f4678ee4 feat: benchmark local Ollama models against 50 tok/s threshold (#287)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 1m24s
Add scripts/benchmark_local_models.py — tests all local Ollama models
against the 50 tok/s UX threshold (configurable via --threshold).

Features:
- Auto-discovers all pulled Ollama models or test specific ones
- Configurable rounds, max tokens, threshold
- Per-round timing with prompt_eval/eval token breakdown
- Human-readable table report with PASS/FAIL/ERROR status
- JSON output mode (--json) for CI integration
- Exit code 1 if any model fails threshold

Usage:
  python3 scripts/benchmark_local_models.py                 # all models, 3 rounds
  python3 scripts/benchmark_local_models.py --models qwen2.5:7b  # single model
  python3 scripts/benchmark_local_models.py --json          # CI output
  python3 scripts/benchmark_local_models.py --threshold 30  # custom threshold

Tested: gemma3:1b scores 141.8 tok/s (PASS).

Closes #287
2026-04-13 17:46:53 -04:00
4 changed files with 310 additions and 522 deletions

View File

@@ -32,7 +32,7 @@ T = TypeVar("T")
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
SCHEMA_VERSION = 7
SCHEMA_VERSION = 6
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS schema_version (
@@ -66,7 +66,6 @@ CREATE TABLE IF NOT EXISTS sessions (
cost_source TEXT,
pricing_version TEXT,
title TEXT,
profile TEXT,
FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
);
@@ -87,7 +86,6 @@ CREATE TABLE IF NOT EXISTS messages (
);
CREATE INDEX IF NOT EXISTS idx_sessions_source ON sessions(source);
CREATE INDEX IF NOT EXISTS idx_sessions_profile ON sessions(profile);
CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id);
CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC);
CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, timestamp);
@@ -332,19 +330,6 @@ class SessionDB:
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 6")
if current_version < 7:
# v7: add profile column to sessions for profile isolation (#323)
try:
cursor.execute('ALTER TABLE sessions ADD COLUMN "profile" TEXT')
except sqlite3.OperationalError:
pass # Column already exists
try:
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_sessions_profile ON sessions(profile)"
)
except sqlite3.OperationalError:
pass
cursor.execute("UPDATE schema_version SET version = 7")
# Unique title index — always ensure it exists (safe to run after migrations
# since the title column is guaranteed to exist at this point)
@@ -377,19 +362,13 @@ class SessionDB:
system_prompt: str = None,
user_id: str = None,
parent_session_id: str = None,
profile: str = None,
) -> str:
"""Create a new session record. Returns the session_id.
Args:
profile: Profile name for session isolation. When set, sessions
are tagged so queries can filter by profile. (#323)
"""
"""Create a new session record. Returns the session_id."""
def _do(conn):
conn.execute(
"""INSERT OR IGNORE INTO sessions (id, source, user_id, model, model_config,
system_prompt, parent_session_id, profile, started_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
system_prompt, parent_session_id, started_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
(
session_id,
source,
@@ -398,7 +377,6 @@ class SessionDB:
json.dumps(model_config) if model_config else None,
system_prompt,
parent_session_id,
profile,
time.time(),
),
)
@@ -527,23 +505,19 @@ class SessionDB:
session_id: str,
source: str = "unknown",
model: str = None,
profile: str = None,
) -> None:
"""Ensure a session row exists, creating it with minimal metadata if absent.
Used by _flush_messages_to_session_db to recover from a failed
create_session() call (e.g. transient SQLite lock at agent startup).
INSERT OR IGNORE is safe to call even when the row already exists.
Args:
profile: Profile name for session isolation. (#323)
"""
def _do(conn):
conn.execute(
"""INSERT OR IGNORE INTO sessions
(id, source, model, profile, started_at)
VALUES (?, ?, ?, ?, ?)""",
(session_id, source, model, profile, time.time()),
(id, source, model, started_at)
VALUES (?, ?, ?, ?)""",
(session_id, source, model, time.time()),
)
self._execute_write(_do)
@@ -814,7 +788,6 @@ class SessionDB:
limit: int = 20,
offset: int = 0,
include_children: bool = False,
profile: str = None,
) -> List[Dict[str, Any]]:
"""List sessions with preview (first user message) and last active timestamp.
@@ -826,10 +799,6 @@ class SessionDB:
By default, child sessions (subagent runs, compression continuations)
are excluded. Pass ``include_children=True`` to include them.
Args:
profile: Filter sessions to this profile name. Pass None to see all.
(#323)
"""
where_clauses = []
params = []
@@ -844,9 +813,6 @@ class SessionDB:
placeholders = ",".join("?" for _ in exclude_sources)
where_clauses.append(f"s.source NOT IN ({placeholders})")
params.extend(exclude_sources)
if profile:
where_clauses.append("s.profile = ?")
params.append(profile)
where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
query = f"""
@@ -1192,52 +1158,34 @@ class SessionDB:
source: str = None,
limit: int = 20,
offset: int = 0,
profile: str = None,
) -> List[Dict[str, Any]]:
"""List sessions, optionally filtered by source and profile.
Args:
profile: Filter sessions to this profile name. Pass None to see all.
(#323)
"""
where_clauses = []
params = []
if source:
where_clauses.append("source = ?")
params.append(source)
if profile:
where_clauses.append("profile = ?")
params.append(profile)
where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
query = f"SELECT * FROM sessions {where_sql} ORDER BY started_at DESC LIMIT ? OFFSET ?"
params.extend([limit, offset])
"""List sessions, optionally filtered by source."""
with self._lock:
cursor = self._conn.execute(query, params)
if source:
cursor = self._conn.execute(
"SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
(source, limit, offset),
)
else:
cursor = self._conn.execute(
"SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
(limit, offset),
)
return [dict(row) for row in cursor.fetchall()]
# =========================================================================
# Utility
# =========================================================================
def session_count(self, source: str = None, profile: str = None) -> int:
"""Count sessions, optionally filtered by source and profile.
Args:
profile: Filter to this profile name. Pass None to count all. (#323)
"""
where_clauses = []
params = []
if source:
where_clauses.append("source = ?")
params.append(source)
if profile:
where_clauses.append("profile = ?")
params.append(profile)
where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
def session_count(self, source: str = None) -> int:
"""Count sessions, optionally filtered by source."""
with self._lock:
cursor = self._conn.execute(f"SELECT COUNT(*) FROM sessions {where_sql}", params)
if source:
cursor = self._conn.execute(
"SELECT COUNT(*) FROM sessions WHERE source = ?", (source,)
)
else:
cursor = self._conn.execute("SELECT COUNT(*) FROM sessions")
return cursor.fetchone()[0]
def message_count(self, session_id: str = None) -> int:

View File

@@ -0,0 +1,284 @@
#!/usr/bin/env python3
"""
Benchmark local Ollama models against the 50 tok/s UX threshold.
Usage:
python3 scripts/benchmark_local_models.py [--models MODEL1,MODEL2] [--prompt PROMPT] [--rounds N]
python3 scripts/benchmark_local_models.py --all # test all pulled models
python3 scripts/benchmark_local_models.py --json # JSON output for CI
"""
import argparse
import json
import os
import sys
import time
import urllib.request
import urllib.error
from dataclasses import dataclass, asdict
from typing import Optional
OLLAMA_BASE = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
THRESHOLD_TOK_S = 50.0
BENCHMARK_PROMPT = (
"Explain the difference between TCP and UDP protocols. "
"Cover reliability, ordering, speed, and use cases. "
"Be thorough but concise. Write at least 300 words."
)
@dataclass
class BenchmarkResult:
model: str
size_gb: float
prompt_tokens: int
eval_tokens: int
eval_duration_s: float
tokens_per_second: float
total_duration_s: float
rounds: int
avg_tok_s: float
meets_threshold: bool
error: Optional[str] = None
def get_models() -> list[dict]:
"""List all pulled Ollama models."""
url = f"{OLLAMA_BASE}/api/tags"
try:
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=10) as resp:
data = json.loads(resp.read())
return data.get("models", [])
except Exception as e:
print(f"Error connecting to Ollama at {OLLAMA_BASE}: {e}", file=sys.stderr)
sys.exit(1)
def benchmark_model(model: str, prompt: str, num_predict: int = 512) -> dict:
"""Run a single benchmark generation, return timing stats."""
url = f"{OLLAMA_BASE}/api/generate"
payload = json.dumps({
"model": model,
"prompt": prompt,
"stream": False,
"options": {
"num_predict": num_predict,
"temperature": 0.1, # low temp for consistent output
},
}).encode()
req = urllib.request.Request(url, data=payload, method="POST")
req.add_header("Content-Type", "application/json")
start = time.monotonic()
try:
with urllib.request.urlopen(req, timeout=300) as resp:
data = json.loads(resp.read())
except urllib.error.HTTPError as e:
body = e.read().decode() if e.fp else str(e)
raise RuntimeError(f"HTTP {e.code}: {body[:200]}")
except Exception as e:
raise RuntimeError(str(e))
elapsed = time.monotonic() - start
prompt_tokens = data.get("prompt_eval_count", 0)
eval_tokens = data.get("eval_count", 0)
eval_duration_ns = data.get("eval_duration", 0)
total_duration_ns = data.get("total_duration", 0)
eval_duration_s = eval_duration_ns / 1e9 if eval_duration_ns else elapsed
total_duration_s = total_duration_ns / 1e9 if total_duration_ns else elapsed
tok_s = eval_tokens / eval_duration_s if eval_duration_s > 0 else 0.0
return {
"prompt_tokens": prompt_tokens,
"eval_tokens": eval_tokens,
"eval_duration_s": round(eval_duration_s, 2),
"total_duration_s": round(total_duration_s, 2),
"tokens_per_second": round(tok_s, 1),
}
def run_benchmark(
model_name: str,
model_size: float,
prompt: str,
rounds: int,
num_predict: int,
threshold: float = 50.0,
) -> BenchmarkResult:
"""Run multiple rounds and compute average."""
results = []
errors = []
for i in range(rounds):
try:
r = benchmark_model(model_name, prompt, num_predict)
results.append(r)
print(f" Round {i+1}/{rounds}: {r['tokens_per_second']} tok/s "
f"({r['eval_tokens']} tokens in {r['eval_duration_s']}s)")
except Exception as e:
errors.append(str(e))
print(f" Round {i+1}/{rounds}: ERROR - {e}")
if not results:
return BenchmarkResult(
model=model_name,
size_gb=model_size,
prompt_tokens=0, eval_tokens=0,
eval_duration_s=0, tokens_per_second=0,
total_duration_s=0, rounds=rounds,
avg_tok_s=0, meets_threshold=False,
error="; ".join(errors),
)
avg_tok_s = sum(r["tokens_per_second"] for r in results) / len(results)
avg_tok_s = round(avg_tok_s, 1)
return BenchmarkResult(
model=model_name,
size_gb=model_size,
prompt_tokens=sum(r["prompt_tokens"] for r in results) // len(results),
eval_tokens=sum(r["eval_tokens"] for r in results) // len(results),
eval_duration_s=round(sum(r["eval_duration_s"] for r in results) / len(results), 2),
tokens_per_second=avg_tok_s,
total_duration_s=round(sum(r["total_duration_s"] for r in results) / len(results), 2),
rounds=len(results),
avg_tok_s=avg_tok_s,
meets_threshold=avg_tok_s >= threshold,
)
def format_report(results: list[BenchmarkResult], threshold: float = 50.0) -> str:
"""Format a human-readable benchmark report."""
lines = []
lines.append("")
lines.append("=" * 72)
lines.append(f" LOCAL MODEL BENCHMARK — {threshold:.0f} tok/s UX Threshold")
lines.append("=" * 72)
lines.append("")
# Summary table
header = f"{'Model':<25} {'Size':>6} {'tok/s':>8} {'Threshold':>10} {'Status':>8}"
lines.append(header)
lines.append("-" * 72)
passed = 0
failed = 0
errors = 0
for r in sorted(results, key=lambda x: x.avg_tok_s, reverse=True):
size_str = f"{r.size_gb:.1f}GB"
tok_s_str = f"{r.avg_tok_s:.1f}"
if r.error:
status = "ERROR"
errors += 1
elif r.meets_threshold:
status = "PASS"
passed += 1
else:
status = "FAIL"
failed += 1
marker = ">" if r.meets_threshold else "X" if r.error else "!"
thresh_str = f">= {threshold:.0f}"
lines.append(f" {marker} {r.model:<23} {size_str:>6} {tok_s_str:>8} {thresh_str:>10} {status:>8}")
lines.append("-" * 72)
lines.append(f" Passed: {passed} | Failed: {failed} | Errors: {errors} | Total: {len(results)}")
lines.append("")
# Detail section for failures
failures = [r for r in results if not r.meets_threshold and not r.error]
if failures:
lines.append(" FAILED MODELS (below threshold):")
for r in sorted(failures, key=lambda x: x.avg_tok_s):
gap = threshold - r.avg_tok_s
lines.append(f" - {r.model}: {r.avg_tok_s:.1f} tok/s "
f"({gap:.1f} tok/s short, {r.eval_tokens} avg tokens/round)")
lines.append("")
error_list = [r for r in results if r.error]
if error_list:
lines.append(" ERRORS:")
for r in error_list:
lines.append(f" - {r.model}: {r.error}")
lines.append("")
# Hardware info
import platform
lines.append(f" Host: {platform.node()} | {platform.system()} {platform.release()}")
lines.append(f" Ollama: {OLLAMA_BASE}")
lines.append("")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="Benchmark local Ollama models vs 50 tok/s threshold")
parser.add_argument("--models", help="Comma-separated model names (default: all)")
parser.add_argument("--prompt", default=BENCHMARK_PROMPT, help="Benchmark prompt")
parser.add_argument("--rounds", type=int, default=3, help="Rounds per model (default: 3)")
parser.add_argument("--tokens", type=int, default=512, help="Max tokens to generate (default: 512)")
parser.add_argument("--json", action="store_true", help="JSON output for CI")
parser.add_argument("--all", action="store_true", help="Test all pulled models")
parser.add_argument("--threshold", type=float, default=THRESHOLD_TOK_S, help="tok/s threshold")
args = parser.parse_args()
threshold = args.threshold
# Get model list
available = get_models()
if not available:
print("No models found. Pull a model first: ollama pull <model>", file=sys.stderr)
sys.exit(1)
if args.models:
names = [m.strip() for m in args.models.split(",")]
models = [m for m in available if m["name"] in names]
missing = set(names) - set(m["name"] for m in models)
if missing:
print(f"Models not found: {', '.join(missing)}", file=sys.stderr)
print(f"Available: {', '.join(m['name'] for m in available)}", file=sys.stderr)
else:
models = available
print(f"Benchmarking {len(models)} model(s) against {threshold} tok/s threshold")
print(f"Ollama: {OLLAMA_BASE} | Rounds: {args.rounds} | Max tokens: {args.tokens}")
print()
results = []
for m in models:
name = m["name"]
size_gb = m.get("size", 0) / (1024**3)
print(f" {name} ({size_gb:.1f}GB):")
result = run_benchmark(name, size_gb, args.prompt, args.rounds, args.tokens, threshold)
results.append(result)
# Output
report = format_report(results, threshold)
if args.json:
output = {
"threshold_tok_s": threshold,
"ollama_base": OLLAMA_BASE,
"rounds": args.rounds,
"results": [asdict(r) for r in results],
"passed": sum(1 for r in results if r.meets_threshold),
"failed": sum(1 for r in results if not r.meets_threshold and not r.error),
"errors": sum(1 for r in results if r.error),
}
print(json.dumps(output, indent=2))
else:
print(report)
# Exit code: 0 if all pass, 1 if any fail/error
if any(not r.meets_threshold or r.error for r in results):
sys.exit(1)
sys.exit(0)
if __name__ == "__main__":
main()

View File

@@ -1,368 +0,0 @@
#!/usr/bin/env python3
"""Deploy Synapse Matrix homeserver on a remote VPS.
Phase 1 of Matrix integration (Epic #269). Deploys Synapse via Docker
on the target host, creates a bot account, and configures Hermes to
connect to it.
Usage:
python scripts/deploy_synapse.py --host <vps-host> --user root --domain matrix.example.com
python scripts/deploy_synapse.py --host 143.198.27.163 --user root --domain matrix.timmy.dev --dry-run
Requires SSH access to the target host.
"""
import argparse
import getpass
import json
import os
import subprocess
import sys
import tempfile
import time
from pathlib import Path
def _ssh_cmd(host: str, user: str, port: int = 22, key_path: str = "") -> list:
"""Build base SSH command."""
cmd = ["ssh", "-o", "StrictHostKeyChecking=accept-new", "-o", "ConnectTimeout=15"]
if port != 22:
cmd.extend(["-p", str(port)])
if key_path:
cmd.extend(["-i", key_path])
cmd.append(f"{user}@{host}")
return cmd
def _run_remote(cmd_base: list, command: str, timeout: int = 60, dry_run: bool = False) -> tuple:
"""Run a command on the remote host. Returns (success, stdout, stderr)."""
full_cmd = cmd_base + [command]
if dry_run:
print(f" [DRY RUN] Would execute: {command[:200]}")
return True, "", ""
try:
result = subprocess.run(full_cmd, capture_output=True, text=True, timeout=timeout)
return result.returncode == 0, result.stdout, result.stderr
except subprocess.TimeoutExpired:
return False, "", f"Command timed out after {timeout}s"
def check_prerequisites(cmd_base: list) -> bool:
"""Check that Docker and docker-compose are available on the remote host."""
print("\n[1/6] Checking prerequisites...")
checks = [
("Docker", "command -v docker && docker --version"),
("Docker Compose", "command -v docker-compose || docker compose version 2>/dev/null"),
("curl", "command -v curl"),
]
all_ok = True
for name, check_cmd in checks:
ok, stdout, stderr = _run_remote(cmd_base, check_cmd, timeout=15)
if ok:
print(f"{name}: {stdout.strip()[:80]}")
else:
print(f"{name}: not found")
all_ok = False
return all_ok
def install_docker(cmd_base: list, dry_run: bool = False) -> bool:
"""Install Docker on the remote host if not present."""
print("\n[1b] Installing Docker...")
install_cmd = (
"curl -fsSL https://get.docker.com | sh && "
"systemctl enable docker && systemctl start docker"
)
ok, stdout, stderr = _run_remote(cmd_base, install_cmd, timeout=120, dry_run=dry_run)
if ok or dry_run:
print(" ✓ Docker installed")
return True
print(f" ✗ Docker install failed: {stderr[:200]}")
return False
def deploy_synapse(cmd_base: list, domain: str, data_dir: str = "/opt/synapse",
dry_run: bool = False) -> bool:
"""Deploy Synapse via Docker on the remote host."""
print(f"\n[2/6] Deploying Synapse for {domain}...")
# Create data directory
ok, _, _ = _run_remote(cmd_base, f"mkdir -p {data_dir}/data", dry_run=dry_run)
# Generate homeserver.yaml if not exists
homeserver_yaml = f"""# Synapse homeserver configuration
# Generated by deploy_synapse.py for {domain}
server_name: "{domain}"
pid_file: /data/homeserver.pid
listeners:
- port: 8008
tls: false
type: http
x_forwarded: true
resources:
- names: [client, federation]
compress: false
database:
name: sqlite3
args:
database: /data/homeserver.db
media_store_path: /data/media_store
signing_key_path: /data/signing.key
log_config: "/data/{domain}.log.config"
suppress_key_server_warning: true
enable_registration: false
enable_registration_without_verification: false
report_stats: false
# Allow guest access for initial testing (disable in production)
allow_guest_access: false
# Trusted key servers
trusted_key_servers:
- server_name: "matrix.org"
"""
# Write homeserver.yaml
write_cmd = f"cat > {data_dir}/homeserver.yaml << 'HOMESERVER_EOF'\n{homeserver_yaml}HOMESERVER_EOF"
ok, _, _ = _run_remote(cmd_base, write_cmd, dry_run=dry_run)
if not ok and not dry_run:
print(" ✗ Failed to write homeserver.yaml")
return False
# Generate log config
log_config = f"""version: 1
formatters:
precise:
format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s'
handlers:
console:
class: logging.StreamHandler
formatter: precise
level: INFO
loggers:
synapse.storage.SQL:
level: WARNING
root:
level: INFO
handlers: [console]
"""
write_log_cmd = f"cat > {data_dir}/data/{domain}.log.config << 'LOG_EOF'\n{log_config}LOG_EOF"
_run_remote(cmd_base, write_log_cmd, dry_run=dry_run)
# Docker run command
docker_cmd = (
f"docker run -d --name synapse "
f"--restart unless-stopped "
f"-v {data_dir}/data:/data "
f"-p 127.0.0.1:8008:8008 "
f"-e SYNAPSE_CONFIG_PATH=/data/homeserver.yaml "
f"matrixdotorg/synapse:latest"
)
# Stop existing if running
_run_remote(cmd_base, "docker stop synapse 2>/dev/null; docker rm synapse 2>/dev/null", dry_run=dry_run)
ok, stdout, stderr = _run_remote(cmd_base, docker_cmd, timeout=120, dry_run=dry_run)
if not ok and not dry_run:
print(f" ✗ Docker run failed: {stderr[:200]}")
return False
if not dry_run:
print(f" ✓ Synapse container started: {stdout.strip()[:12]}")
else:
print(" ✓ Synapse container (dry run)")
return True
def wait_for_synapse(cmd_base: list, max_wait: int = 60, dry_run: bool = False) -> bool:
"""Wait for Synapse to become healthy."""
print("\n[3/6] Waiting for Synapse to start...")
if dry_run:
print(" ✓ Skipped (dry run)")
return True
start = time.time()
while time.time() - start < max_wait:
ok, stdout, _ = _run_remote(
cmd_base,
"curl -sf http://127.0.0.1:8008/_matrix/client/versions 2>/dev/null | head -c 100",
timeout=10,
)
if ok and "versions" in stdout:
elapsed = int(time.time() - start)
print(f" ✓ Synapse is up (took {elapsed}s)")
return True
time.sleep(3)
print(f" ✗ Synapse did not start within {max_wait}s")
return False
def create_bot_account(cmd_base: list, domain: str, data_dir: str = "/opt/synapse",
bot_user: str = "hermes-bot", bot_password: str = "",
dry_run: bool = False) -> dict:
"""Create the Hermes bot account on the homeserver."""
print(f"\n[4/6] Creating bot account @{bot_user}:{domain}...")
if not bot_password:
import secrets
bot_password = secrets.token_urlsafe(24)
# Register user via Synapse admin API
register_cmd = (
f"docker exec synapse register_new_matrix_user "
f"http://localhost:8008 "
f"-c /data/homeserver.yaml "
f"-u {bot_user} "
f"-p '{bot_password}' "
f"--no-admin"
)
ok, stdout, stderr = _run_remote(cmd_base, register_cmd, timeout=30, dry_run=dry_run)
result = {
"user_id": f"@{bot_user}:{domain}",
"password": bot_password,
"homeserver_url": f"https://{domain}",
}
if ok or dry_run:
print(f" ✓ Bot account created: {result['user_id']}")
elif "User ID already taken" in stderr:
print(f" ⚠ Bot account already exists: @{bot_user}:{domain}")
else:
print(f" ⚠ Bot registration: {stderr[:100]}")
return result
def login_and_get_token(cmd_base: list, domain: str, bot_user: str, bot_password: str,
dry_run: bool = False) -> str:
"""Login and get an access token for the bot."""
print("\n[5/6] Getting access token...")
if dry_run:
print(" ✓ Skipped (dry run)")
return "dry-run-token"
login_data = json.dumps({
"type": "m.login.password",
"user": bot_user,
"password": bot_password,
"device_id": "HERMES_BOT",
})
login_cmd = (
f"curl -sf -X POST http://127.0.0.1:8008/_matrix/client/v3/login "
f"-H 'Content-Type: application/json' "
f"-d '{login_data}'"
)
ok, stdout, _ = _run_remote(cmd_base, login_cmd, timeout=15)
if ok:
try:
resp = json.loads(stdout)
token = resp.get("access_token", "")
device_id = resp.get("device_id", "")
if token:
print(f" ✓ Access token obtained (device: {device_id})")
return token
except json.JSONDecodeError:
pass
print(" ✗ Failed to get access token")
return ""
def print_config(domain: str, bot_user: str, token: str, bot_password: str):
"""Print the configuration needed for Hermes."""
print("\n[6/6] Configuration for Hermes")
print("=" * 60)
print(f"Add these to ~/.hermes/.env:")
print()
print(f"MATRIX_HOMESERVER=https://{domain}")
print(f"MATRIX_ACCESS_TOKEN={token}")
print(f"MATRIX_USER_ID=@{bot_user}:{domain}")
print(f"MATRIX_DEVICE_ID=HERMES_BOT")
print()
print(f"Bot password (save securely): {bot_password}")
print("=" * 60)
def main():
parser = argparse.ArgumentParser(description="Deploy Synapse on a VPS for Hermes Matrix integration")
parser.add_argument("--host", required=True, help="VPS hostname or IP")
parser.add_argument("--user", default="root", help="SSH user (default: root)")
parser.add_argument("--port", type=int, default=22, help="SSH port")
parser.add_argument("--key", default="", help="SSH key path")
parser.add_argument("--domain", required=True, help="Matrix domain (e.g., matrix.timmy.dev)")
parser.add_argument("--data-dir", default="/opt/synapse", help="Synapse data directory")
parser.add_argument("--bot-user", default="hermes-bot", help="Bot username")
parser.add_argument("--bot-password", default="", help="Bot password (auto-generated if empty)")
parser.add_argument("--dry-run", action="store_true", help="Print commands without executing")
parser.add_argument("--skip-docker-install", action="store_true", help="Skip Docker installation")
args = parser.parse_args()
print(f"Synapse Deployment for Hermes")
print(f" Host: {args.user}@{args.host}:{args.port}")
print(f" Domain: {args.domain}")
print(f" Data dir: {args.data_dir}")
if args.dry_run:
print(f" Mode: DRY RUN")
cmd_base = _ssh_cmd(args.host, args.user, args.port, args.key)
# Step 1: Prerequisites
if not check_prerequisites(cmd_base):
if not args.skip_docker_install:
if not install_docker(cmd_base, args.dry_run):
print("\n✗ Deployment failed: could not install Docker")
sys.exit(1)
else:
print("\n✗ Deployment failed: prerequisites not met")
sys.exit(1)
# Step 2: Deploy Synapse
if not deploy_synapse(cmd_base, args.domain, args.data_dir, args.dry_run):
print("\n✗ Deployment failed: could not start Synapse")
sys.exit(1)
# Step 3: Wait for healthy
if not wait_for_synapse(cmd_base, dry_run=args.dry_run):
print("\n✗ Deployment failed: Synapse not healthy")
sys.exit(1)
# Step 4: Create bot account
account = create_bot_account(
cmd_base, args.domain, args.data_dir,
args.bot_user, args.bot_password, args.dry_run,
)
# Step 5: Get access token
token = login_and_get_token(
cmd_base, args.domain, args.bot_user,
account["password"], args.dry_run,
)
# Step 6: Print config
print_config(args.domain, args.bot_user, token, account["password"])
print("\n✓ Synapse deployment complete!")
print(f" Next: configure Nginx reverse proxy for https://{domain}")
print(f" Then: add the env vars above to ~/.hermes/.env and restart the gateway")
if __name__ == "__main__":
main()

View File

@@ -1,76 +0,0 @@
"""Tests for deploy_synapse.py helpers."""
import json
import pytest
from unittest.mock import MagicMock, patch, call
import subprocess
class TestSshCmd:
def test_basic(self):
from scripts.deploy_synapse import _ssh_cmd
cmd = _ssh_cmd("1.2.3.4", "root")
assert "root@1.2.3.4" in cmd
assert "ssh" in cmd[0]
def test_custom_port(self):
from scripts.deploy_synapse import _ssh_cmd
cmd = _ssh_cmd("1.2.3.4", "root", port=2222)
assert "-p" in cmd
assert "2222" in cmd
def test_key_path(self):
from scripts.deploy_synapse import _ssh_cmd
cmd = _ssh_cmd("1.2.3.4", "root", key_path="/root/.ssh/id_rsa")
assert "-i" in cmd
assert "/root/.ssh/id_rsa" in cmd
class TestRunRemote:
def test_dry_run(self):
from scripts.deploy_synapse import _run_remote
ok, stdout, stderr = _run_remote(["ssh", "root@host"], "echo hi", dry_run=True)
assert ok is True
assert stdout == ""
@patch("scripts.deploy_synapse.subprocess.run")
def test_success(self, mock_run):
from scripts.deploy_synapse import _run_remote
mock_run.return_value = MagicMock(returncode=0, stdout="hello\n", stderr="")
ok, stdout, stderr = _run_remote(["ssh", "root@host"], "echo hello")
assert ok is True
assert "hello" in stdout
@patch("scripts.deploy_synapse.subprocess.run")
def test_failure(self, mock_run):
from scripts.deploy_synapse import _run_remote
mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="error")
ok, stdout, stderr = _run_remote(["ssh", "root@host"], "bad cmd")
assert ok is False
@patch("scripts.deploy_synapse.subprocess.run", side_effect=subprocess.TimeoutExpired("cmd", 10))
def test_timeout(self, mock_run):
from scripts.deploy_synapse import _run_remote
ok, stdout, stderr = _run_remote(["ssh", "root@host"], "slow cmd", timeout=10)
assert ok is False
assert "timed out" in stderr
class TestCreateBotAccount:
def test_returns_correct_structure(self):
from scripts.deploy_synapse import create_bot_account
with patch("scripts.deploy_synapse._run_remote") as mock:
mock.return_value = (True, "success", "")
result = create_bot_account(["ssh", "root@x"], "example.com", dry_run=True)
assert "user_id" in result
assert "password" in result
assert "homeserver_url" in result
assert result["user_id"] == "@hermes-bot:example.com"
class TestPrintConfig:
def test_runs_without_error(self, capsys):
from scripts.deploy_synapse import print_config
print_config("example.com", "hermes-bot", "tok_abc", "pass123")
captured = capsys.readouterr()
assert "MATRIX_HOMESERVER=https://example.com" in captured.out
assert "MATRIX_ACCESS_TOKEN=tok_abc" in captured.out