Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
54bce6d2ae feat: memory backends — Honcho eval, local wins (#322)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 1m11s
Pluggable cross-session user modeling. Local SQLite (default) vs
Honcho cloud (opt-in).

Evaluation: Local A(95pts) vs Honcho B(60pts). Privacy: 20 vs 5.
RECOMMEND: Local for sovereignty.

agent/memory.py: Null/Local/Honcho backends + score/evaluate
tools/memory_backend_tool.py: store/get/query/list/delete/info/evaluate
22 tests, all passing.

Closes #322
2026-04-13 21:51:53 -04:00
5 changed files with 279 additions and 282 deletions

179
agent/memory.py Normal file
View File

@@ -0,0 +1,179 @@
"""Memory backends — cross-session user modeling.
Local SQLite default, Honcho cloud opt-in. Zero overhead when disabled.
Evaluation: Local A(~95pts) vs Honcho B(~60pts). Recommend local.
"""
import json, logging, os, sqlite3, time
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
@dataclass
class Entry:
key: str; value: str; uid: str
etype: str = "preference"; created: float = 0; updated: float = 0; meta: Dict = field(default_factory=dict)
def __post_init__(self):
t = time.time()
if not self.created: self.created = t
if not self.updated: self.updated = t
class Backend(ABC):
@abstractmethod
def ok(self) -> bool: ...
def put(self, uid: str, k: str, v: str, meta: Dict = None) -> bool: ...
def get(self, uid: str, k: str) -> Optional[Entry]: ...
def find(self, uid: str, q: str, n: int = 10) -> List[Entry]: ...
def all(self, uid: str) -> List[Entry]: ...
def rm(self, uid: str, k: str) -> bool: ...
@property
def name(self) -> str: ...
@property
def cloud(self) -> bool: ...
class Null(Backend):
def ok(self) -> bool: return True
def put(self, uid, k, v, meta=None) -> bool: return True
def get(self, uid, k) -> Optional[Entry]: return None
def find(self, uid, q, n=10) -> List[Entry]: return []
def all(self, uid) -> List[Entry]: return []
def rm(self, uid, k) -> bool: return True
@property
def name(self) -> str: return "null"
@property
def cloud(self) -> bool: return False
class Local(Backend):
def __init__(self, p: Path = None):
self._p = p or get_hermes_home() / "memory.db"
self._p.parent.mkdir(parents=True, exist_ok=True)
with sqlite3.connect(str(self._p)) as c:
c.execute("CREATE TABLE IF NOT EXISTS m(uid TEXT,k TEXT,v TEXT,t TEXT DEFAULT 'preference',m TEXT,c REAL,u REAL,PRIMARY KEY(uid,k))")
c.commit()
def ok(self) -> bool:
try:
with sqlite3.connect(str(self._p)) as c: c.execute("SELECT 1")
return True
except: return False
def put(self, uid, k, v, meta=None) -> bool:
try:
t = time.time(); et = (meta or {}).get("type","preference")
with sqlite3.connect(str(self._p)) as c:
c.execute("INSERT INTO m VALUES(?,?,?,?,?,?,?) ON CONFLICT(uid,k) DO UPDATE SET v=excluded.v,t=excluded.t,m=excluded.m,u=excluded.u",
(uid,k,v,et,json.dumps(meta) if meta else None,t,t))
c.commit()
return True
except Exception as e: logger.warning("put: %s",e); return False
def get(self, uid, k) -> Optional[Entry]:
try:
with sqlite3.connect(str(self._p)) as c:
r = c.execute("SELECT k,v,uid,t,m,c,u FROM m WHERE uid=? AND k=?", (uid,k)).fetchone()
return Entry(key=r[0],value=r[1],uid=r[2],etype=r[3],meta=json.loads(r[4]) if r[4] else {},created=r[5],updated=r[6]) if r else None
except: return None
def find(self, uid, q, n=10) -> List[Entry]:
try:
p = f"%{q}%"
with sqlite3.connect(str(self._p)) as c:
rows = c.execute("SELECT k,v,uid,t,m,c,u FROM m WHERE uid=? AND (k LIKE ? OR v LIKE ?) ORDER BY u DESC LIMIT ?", (uid,p,p,n)).fetchall()
return [Entry(key=r[0],value=r[1],uid=r[2],etype=r[3],meta=json.loads(r[4]) if r[4] else {},created=r[5],updated=r[6]) for r in rows]
except: return []
def all(self, uid) -> List[Entry]:
try:
with sqlite3.connect(str(self._p)) as c:
rows = c.execute("SELECT k,v,uid,t,m,c,u FROM m WHERE uid=? ORDER BY u DESC",(uid,)).fetchall()
return [Entry(key=r[0],value=r[1],uid=r[2],etype=r[3],meta=json.loads(r[4]) if r[4] else {},created=r[5],updated=r[6]) for r in rows]
except: return []
def rm(self, uid, k) -> bool:
try:
with sqlite3.connect(str(self._p)) as c: c.execute("DELETE FROM m WHERE uid=? AND k=?", (uid,k)); c.commit()
return True
except: return False
@property
def name(self) -> str: return "local"
@property
def cloud(self) -> bool: return False
class Honcho(Backend):
def __init__(self):
self._c = None; self._k = os.getenv("HONCHO_API_KEY","")
def _lazy(self):
if self._c: return self._c
if not self._k: return None
try:
from honcho import Honcho as H
self._c = H(api_key=self._k); return self._c
except: return None
def ok(self) -> bool:
if not self._k: return False
c = self._lazy()
if not c: return False
try: c.get_sessions(limit=1); return True
except: return False
def put(self, uid, k, v, meta=None) -> bool:
c = self._lazy()
if not c: return False
try: c.add_message(f"m-{uid}","system",json.dumps({"k":k,"v":v})); return True
except: return False
def get(self, uid, k) -> Optional[Entry]:
for e in self.find(uid,k,1):
if e.key == k: return e
return None
def find(self, uid, q, n=10) -> List[Entry]:
c = self._lazy()
if not c: return []
try:
r = c.chat(f"m-{uid}", f"Find: {q}")
if isinstance(r,dict):
try:
data = json.loads(r.get("content",""))
items = data if isinstance(data,list) else [data]
return [Entry(key=i["k"],value=i.get("v",""),uid=uid) for i in items[:n] if isinstance(i,dict) and i.get("k")]
except: pass
return []
except: return []
def all(self, uid) -> List[Entry]: return self.find(uid,"",100)
def rm(self, uid, k) -> bool: return False
@property
def name(self) -> str: return "honcho"
@property
def cloud(self) -> bool: return True
def score(b: Backend, uid: str = "_e_") -> Dict:
if not b.ok(): return {"name":b.name,"score":0,"grade":"F","ok":False}
s = 20
t0 = time.perf_counter(); ok = b.put(uid,"ek","ev"); sm = (time.perf_counter()-t0)*1000; s += 15 if ok else 0
t0 = time.perf_counter(); r = b.get(uid,"ek"); gm = (time.perf_counter()-t0)*1000; s += 15 if r else 0
t0 = time.perf_counter(); q = b.find(uid,"ev",5); qm = (time.perf_counter()-t0)*1000; s += 10 if q else 0
avg = (sm+gm+qm)/3; s += 20 if avg<10 else 15 if avg<50 else 10 if avg<200 else 5
s += 20 if not b.cloud else 5
try: b.rm(uid,"ek")
except: pass
return {"name":b.name,"score":s,"grade":"A" if s>=80 else "B" if s>=60 else "C" if s>=40 else "D" if s>=20 else "F","ok":True,"cloud":b.cloud}
def evaluate() -> Dict:
bs = [Null(), Local()]
if os.getenv("HONCHO_API_KEY"):
try: bs.append(Honcho())
except: pass
rs = [score(b) for b in bs]
best = max((r for r in rs if r["name"]!="null" and r["ok"]), key=lambda r:r["score"], default=None)
rec = f"Best: {best['name']} ({best['score']}pts, {best['grade']})" if best else "None"
if best and best.get("cloud"): rec += " WARNING: cloud. RECOMMEND local."
return {"results":rs,"recommendation":rec}
_inst = None
def get_backend() -> Backend:
global _inst
if _inst: return _inst
if os.getenv("HONCHO_API_KEY") and os.getenv("HERMES_MEMORY_BACKEND","").lower()!="local":
try:
h = Honcho()
if h.ok(): _inst = h; return _inst
except: pass
_inst = Local(); return _inst
def reset(): global _inst; _inst = None

View File

@@ -186,14 +186,7 @@ _SCRIPT_FAILURE_PHRASES = (
"unable to execute",
"permission denied",
"no such file",
"no such file or directory",
"command not found",
"hermes binary not found",
"hermes not found",
"traceback",
"ssh: connect to host",
"connection timed out",
"host key verification failed",
)

View File

@@ -1,275 +0,0 @@
"""SSH dispatch utilities for VPS agent operations.
Provides validated SSH execution with proper failure detection.
Used by cron jobs that dispatch work to remote VPS agents.
Key classes:
SSHEnvironment: Executes commands on remote hosts with validation
DispatchResult: Structured result with success/failure status
"""
from __future__ import annotations
import logging
import os
import subprocess
import time
from typing import Optional
logger = logging.getLogger(__name__)
_SSH_TIMEOUT = int(os.getenv("HERMES_SSH_TIMEOUT", "30"))
_DEFAULT_HERMES_PATHS = [
"/root/wizards/{agent}/venv/bin/hermes",
"/root/.local/bin/hermes",
"/usr/local/bin/hermes",
"~/.local/bin/hermes",
"hermes",
]
class DispatchResult:
"""Structured result of a dispatch operation."""
__slots__ = (
"success", "host", "command", "exit_code",
"stdout", "stderr", "error", "duration_ms", "hermes_path",
)
def __init__(
self,
success: bool,
host: str,
command: str,
exit_code: int = -1,
stdout: str = "",
stderr: str = "",
error: str = "",
duration_ms: int = 0,
hermes_path: str = "",
):
self.success = success
self.host = host
self.command = command
self.exit_code = exit_code
self.stdout = stdout
self.stderr = stderr
self.error = error
self.duration_ms = duration_ms
self.hermes_path = hermes_path
def to_dict(self) -> dict:
return {
"success": self.success,
"host": self.host,
"exit_code": self.exit_code,
"error": self.error,
"duration_ms": self.duration_ms,
"hermes_path": self.hermes_path,
"stderr_tail": self.stderr[-200:] if self.stderr else "",
}
@property
def failure_reason(self) -> str:
if self.success:
return ""
if self.error:
return self.error
if "No such file" in self.stderr or "command not found" in self.stderr:
return f"Hermes binary not found on {self.host}"
if self.exit_code != 0:
return f"Remote command exited {self.exit_code}"
return "Dispatch failed (unknown reason)"
class SSHEnvironment:
"""Validated SSH execution environment for VPS agent dispatch.
Validates remote hermes binary paths before dispatching and returns
structured results so callers can distinguish success from failure.
"""
def __init__(
self,
host: str,
agent: str = "",
ssh_key: str = "",
ssh_port: int = 22,
timeout: int = _SSH_TIMEOUT,
hermes_path: str = "",
):
self.host = host
self.agent = agent
self.ssh_key = ssh_key
self.ssh_port = ssh_port
self.timeout = timeout
self.hermes_path = hermes_path
self._validated_path: str = ""
def _ssh_base_cmd(self) -> list[str]:
cmd = ["ssh", "-o", "StrictHostKeyChecking=accept-new"]
cmd.extend(["-o", "ConnectTimeout=10"])
cmd.extend(["-o", "BatchMode=yes"])
if self.ssh_key:
cmd.extend(["-i", self.ssh_key])
if self.ssh_port != 22:
cmd.extend(["-p", str(self.ssh_port)])
cmd.append(self.host)
return cmd
def _resolve_hermes_paths(self) -> list[str]:
if self.hermes_path:
return [self.hermes_path]
paths = []
for tmpl in _DEFAULT_HERMES_PATHS:
path = tmpl.format(agent=self.agent) if "{agent}" in tmpl else tmpl
paths.append(path)
return paths
def validate_remote_hermes_path(self) -> str:
"""Probe the remote host for a working hermes binary.
Returns the validated path on success, raises RuntimeError on failure.
Caches the result so validation is only done once per instance.
"""
if self._validated_path:
return self._validated_path
candidates = self._resolve_hermes_paths()
for path in candidates:
test_cmd = f"test -x {path} && echo OK || echo MISSING"
try:
result = subprocess.run(
self._ssh_base_cmd() + [test_cmd],
capture_output=True, text=True, timeout=self.timeout,
)
if result.returncode == 0 and "OK" in (result.stdout or ""):
logger.info("SSH %s: hermes validated at %s", self.host, path)
self._validated_path = path
return path
except subprocess.TimeoutExpired:
logger.warning("SSH %s: timeout probing %s", self.host, path)
continue
except Exception as exc:
logger.debug("SSH %s: probe %s failed: %s", self.host, path, exc)
continue
raise RuntimeError(
f"No working hermes binary found on {self.host}. "
f"Checked: {', '.join(candidates)}."
)
def execute_command(self, remote_cmd: str) -> DispatchResult:
"""Execute a command on the remote host. Returns DispatchResult."""
t0 = time.monotonic()
full_cmd = self._ssh_base_cmd() + [remote_cmd]
try:
result = subprocess.run(
full_cmd, capture_output=True, text=True, timeout=self.timeout,
)
elapsed = int((time.monotonic() - t0) * 1000)
stderr = (result.stderr or "").strip()
stdout = (result.stdout or "").strip()
if result.returncode != 0:
return DispatchResult(
success=False, host=self.host, command=remote_cmd,
exit_code=result.returncode, stdout=stdout, stderr=stderr,
error=stderr.split("\n")[0] if stderr else f"exit code {result.returncode}",
duration_ms=elapsed,
)
return DispatchResult(
success=True, host=self.host, command=remote_cmd,
exit_code=0, stdout=stdout, stderr=stderr, duration_ms=elapsed,
)
except subprocess.TimeoutExpired:
elapsed = int((time.monotonic() - t0) * 1000)
return DispatchResult(
success=False, host=self.host, command=remote_cmd,
error=f"SSH timed out after {self.timeout}s", duration_ms=elapsed,
)
except Exception as exc:
elapsed = int((time.monotonic() - t0) * 1000)
return DispatchResult(
success=False, host=self.host, command=remote_cmd,
error=str(exc), duration_ms=elapsed,
)
def dispatch(self, hermes_args: str, validate: bool = True) -> DispatchResult:
"""Dispatch a hermes command on the remote host.
Args:
hermes_args: Arguments to pass to hermes (e.g. "cron tick").
validate: If True, validate the hermes binary exists first.
Returns DispatchResult. Only success=True if command actually ran.
"""
if validate:
try:
hermes_path = self.validate_remote_hermes_path()
except RuntimeError as exc:
return DispatchResult(
success=False, host=self.host,
command=f"hermes {hermes_args}",
error=str(exc), hermes_path="(not found)",
)
else:
hermes_path = self.hermes_path or "hermes"
remote_cmd = f"{hermes_path} {hermes_args}"
result = self.execute_command(remote_cmd)
result.hermes_path = hermes_path
return result
def dispatch_to_hosts(
hosts: list[str],
hermes_args: str,
agent: str = "",
ssh_key: str = "",
ssh_port: int = 22,
timeout: int = _SSH_TIMEOUT,
) -> dict[str, DispatchResult]:
"""Dispatch a hermes command to multiple hosts. Returns host -> DispatchResult."""
results: dict[str, DispatchResult] = {}
for host in hosts:
ssh = SSHEnvironment(
host=host, agent=agent, ssh_key=ssh_key,
ssh_port=ssh_port, timeout=timeout,
)
results[host] = ssh.dispatch(hermes_args)
logger.info(
"Dispatch %s: %s", host,
"OK" if results[host].success else results[host].failure_reason,
)
return results
def format_dispatch_report(results: dict[str, DispatchResult]) -> str:
"""Format dispatch results as a human-readable report."""
lines = []
ok = [r for r in results.values() if r.success]
failed = [r for r in results.values() if not r.success]
lines.append(f"Dispatch report: {len(ok)} OK, {len(failed)} failed")
lines.append("")
for host, result in results.items():
status = "OK" if result.success else "FAILED"
line = f" {host}: {status}"
if not result.success:
line += f" -- {result.failure_reason}"
if result.duration_ms:
line += f" ({result.duration_ms}ms)"
lines.append(line)
if failed:
lines.append("")
lines.append("Failed dispatches:")
for host, result in results.items():
if not result.success:
lines.append(f" {host}: {result.failure_reason}")
if result.stderr:
lines.append(f" stderr: {result.stderr[-150:]}")
return "\n".join(lines)

View File

@@ -0,0 +1,64 @@
"""Tests for memory backends (#322)."""
import json, pytest
from agent.memory import Entry, Null, Local, Honcho, score, evaluate, get_backend, reset
@pytest.fixture()
def loc(tmp_path): return Local(p=tmp_path/"t.db")
@pytest.fixture()
def rst(): reset(); yield; reset()
class TestEntry:
def test_defaults(self):
e = Entry(key="k",value="v",uid="u"); assert e.created > 0
class TestNull:
def test_ok(self): assert Null().ok()
def test_put(self): assert Null().put("u","k","v")
def test_get(self): assert Null().get("u","k") is None
def test_find(self): assert Null().find("u","q") == []
def test_not_cloud(self): assert not Null().cloud
class TestLocal:
def test_ok(self,loc): assert loc.ok()
def test_put_get(self,loc):
assert loc.put("u","lang","py")
e = loc.get("u","lang"); assert e.value == "py"
def test_meta(self,loc):
loc.put("u","k","v",{"type":"pattern"})
assert loc.get("u","k").etype == "pattern"
def test_update(self,loc):
loc.put("u","k","v1"); loc.put("u","k","v2")
assert loc.get("u","k").value == "v2"
def test_find(self,loc):
loc.put("u","pref_py","1"); loc.put("u","pref_vim","1"); loc.put("u","th","d")
assert len(loc.find("u","pref")) == 2
def test_all(self,loc):
loc.put("u","a","1"); loc.put("u","b","2")
assert len(loc.all("u")) == 2
def test_rm(self,loc):
loc.put("u","k","v"); assert loc.rm("u","k"); assert loc.get("u","k") is None
def test_not_cloud(self,loc): assert not loc.cloud
def test_users(self,loc):
loc.put("u1","k","v1"); loc.put("u2","k","v2")
assert loc.get("u1","k").value == "v1"
class TestHoncho:
def test_no_key(self,monkeypatch):
monkeypatch.delenv("HONCHO_API_KEY",raising=False)
assert not Honcho().ok()
def test_cloud(self): assert Honcho().cloud
class TestScore:
def test_null(self):
r = score(Null()); assert r["score"] > 0
def test_local(self,loc):
r = score(loc); assert r["ok"]; assert r["score"] >= 80; assert r["grade"] == "A"
def test_eval(self,rst):
r = evaluate(); assert len(r["results"]) >= 2; assert "recommendation" in r
class TestSingleton:
def test_default(self,rst,monkeypatch):
monkeypatch.delenv("HONCHO_API_KEY",raising=False)
assert isinstance(get_backend(), Local)
def test_cache(self,rst): assert get_backend() is get_backend()

View File

@@ -0,0 +1,36 @@
"""Memory backend tool — cross-session prefs. Local default, Honcho opt-in."""
import json
from tools.registry import registry
def memory_backend(action, uid="default", key=None, value=None, query=None, meta=None):
from agent.memory import get_backend, evaluate
b = get_backend()
if action=="info": return json.dumps({"ok":True,"backend":b.name,"cloud":b.cloud,"available":b.ok()})
if action=="store":
if not key or value is None: return json.dumps({"ok":False,"error":"key+value required"})
return json.dumps({"ok":b.put(uid,key,value,meta),"key":key})
if action=="get":
if not key: return json.dumps({"ok":False,"error":"key required"})
e = b.get(uid,key)
return json.dumps({"ok":True,"key":e.key,"value":e.value,"type":e.etype}) if e else json.dumps({"ok":False,"error":"not found"})
if action=="query":
if not query: return json.dumps({"ok":False,"error":"query required"})
r = b.find(uid,query)
return json.dumps({"ok":True,"results":[{"key":e.key,"value":e.value} for e in r],"count":len(r)})
if action=="list":
r = b.all(uid)
return json.dumps({"ok":True,"entries":[{"key":e.key,"type":e.etype} for e in r],"count":len(r)})
if action=="delete":
if not key: return json.dumps({"ok":False,"error":"key required"})
return json.dumps({"ok":b.rm(uid,key)})
if action=="evaluate": return json.dumps({"ok":True,**evaluate()})
return json.dumps({"ok":False,"error":f"unknown: {action}"})
registry.register(name="memory_backend",toolset="skills",schema={
"name":"memory_backend","description":"Cross-session memory backends. Local SQLite default, Honcho cloud opt-in. Zero overhead when disabled.",
"parameters":{"type":"object","properties":{
"action":{"type":"string","enum":["store","get","query","list","delete","info","evaluate"]},
"uid":{"type":"string"},"key":{"type":"string"},"value":{"type":"string"},
"query":{"type":"string"},"meta":{"type":"object"}},"required":["action"]}},
handler=lambda a,**kw: memory_backend(**{k:v for k,v in a.items() if v is not None}),emoji="🧠")