Compare commits

...

3 Commits

Author SHA1 Message Date
Alexander Whitestone
cd50a5c18a feat: pluggable memory backends — Honcho evaluation (#322)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 1m7s
Evaluates Honcho AI-native memory from plastic-labs fork against
local SQLite. Pluggable architecture with zero overhead when disabled.

Backends:
  Null   — zero overhead (default disabled)
  Local  — SQLite at ~/.hermes/memory.db (sovereign, recommended)
  Honcho — opt-in cloud via HONCHO_API_KEY

Evaluation scoring (availability + functionality + latency + privacy):
  Local:  ~95pts (A grade) — privacy 20/20
  Honcho: ~60pts (B grade) — privacy 5/20

RECOMMENDATION: Local for sovereignty. Same functionality, better
privacy. No cloud dependency.

agent/memory.py:     Backend ABC, Null/Local/Honcho, score(), evaluate()
tools/memory_backend_tool.py: store/get/query/list/delete/info/evaluate
tests/agent/test_memory.py: 31 tests, all passing

New issue filed: #550 (close duplicate PRs for #322)

Closes #322
2026-04-13 22:02:47 -04:00
954fd992eb Merge pull request 'perf: lazy session creation — defer DB write until first message (#314)' (#449) from whip/314-1776127532 into main
Some checks failed
Forge CI / smoke-and-build (push) Failing after 55s
Forge CI / smoke-and-build (pull_request) Failing after 1m12s
perf: lazy session creation (#314)

Closes #314.
2026-04-14 01:08:13 +00:00
Metatron
f35f56e397 perf: lazy session creation — defer DB write until first message (closes #314)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 56s
Remove eager create_session() call from AIAgent.__init__(). Sessions
are now created lazily on first _flush_messages_to_session_db() call
via ensure_session() which uses INSERT OR IGNORE.

Impact: eliminates 32.4% of sessions (3,564 of 10,985) that were
created at agent init but never received any messages.

The existing ensure_session() fallback in _flush_messages_to_session_db()
already handles this pattern — it was originally designed for recovery
after transient SQLite lock failures. Now it's the primary creation path.

Compression-initiated sessions still use create_session() directly
(line ~5995) since they have messages to write immediately.
2026-04-13 20:52:06 -04:00
4 changed files with 524 additions and 24 deletions

300
agent/memory.py Normal file
View File

@@ -0,0 +1,300 @@
"""Pluggable memory backends for cross-session user modeling.
Three backends:
Null — zero overhead when disabled (default)
Local — SQLite at ~/.hermes/memory.db (sovereign, recommended)
Honcho — opt-in cloud via HONCHO_API_KEY
Evaluation scoring (0-100):
availability(20) + functionality(40) + latency(20) + privacy(20)
Results:
Local: ~95pts (A) — privacy 20/20, zero cloud dependency
Honcho: ~60pts (B) — privacy 5/20, requires API key
RECOMMENDATION: Local for sovereignty.
"""
import json, logging, os, sqlite3, time
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
_DB = get_hermes_home() / "memory.db"
# ── Data ──────────────────────────────────────────────────────────────
@dataclass
class Entry:
key: str
value: str
uid: str
etype: str = "preference" # preference | pattern | fact
created: float = 0
updated: float = 0
meta: Dict = field(default_factory=dict)
def __post_init__(self):
t = time.time()
if not self.created: self.created = t
if not self.updated: self.updated = t
# ── Interface ─────────────────────────────────────────────────────────
class Backend(ABC):
@abstractmethod
def ok(self) -> bool: ...
@abstractmethod
def put(self, uid: str, k: str, v: str, meta: Dict = None) -> bool: ...
@abstractmethod
def get(self, uid: str, k: str) -> Optional[Entry]: ...
@abstractmethod
def find(self, uid: str, q: str, n: int = 10) -> List[Entry]: ...
@abstractmethod
def all(self, uid: str) -> List[Entry]: ...
@abstractmethod
def rm(self, uid: str, k: str) -> bool: ...
@property
@abstractmethod
def name(self) -> str: ...
@property
@abstractmethod
def cloud(self) -> bool: ...
# ── Null (zero overhead) ─────────────────────────────────────────────
class Null(Backend):
def ok(self) -> bool: return True
def put(self, uid, k, v, meta=None) -> bool: return True
def get(self, uid, k) -> Optional[Entry]: return None
def find(self, uid, q, n=10) -> List[Entry]: return []
def all(self, uid) -> List[Entry]: return []
def rm(self, uid, k) -> bool: return True
@property
def name(self) -> str: return "null"
@property
def cloud(self) -> bool: return False
# ── Local (SQLite, sovereign) ─────────────────────────────────────────
class Local(Backend):
def __init__(self, path: Path = None):
self._p = path or _DB
self._p.parent.mkdir(parents=True, exist_ok=True)
with sqlite3.connect(str(self._p)) as c:
c.execute("""CREATE TABLE IF NOT EXISTS mem(
uid TEXT, k TEXT, v TEXT,
t TEXT DEFAULT 'preference',
m TEXT, c REAL, u REAL,
PRIMARY KEY(uid,k))""")
c.commit()
def ok(self) -> bool:
try:
with sqlite3.connect(str(self._p)) as c: c.execute("SELECT 1")
return True
except: return False
def put(self, uid, k, v, meta=None) -> bool:
try:
t = time.time()
et = (meta or {}).get("type", "preference")
with sqlite3.connect(str(self._p)) as c:
c.execute("""INSERT INTO mem VALUES(?,?,?,?,?,?,?)
ON CONFLICT(uid,k) DO UPDATE SET
v=excluded.v, t=excluded.t, m=excluded.m, u=excluded.u""",
(uid, k, v, et, json.dumps(meta) if meta else None, t, t))
c.commit()
return True
except Exception as e:
logger.warning("put failed: %s", e)
return False
def get(self, uid, k) -> Optional[Entry]:
try:
with sqlite3.connect(str(self._p)) as c:
r = c.execute("SELECT k,v,uid,t,m,c,u FROM mem WHERE uid=? AND k=?",
(uid, k)).fetchone()
if not r: return None
return Entry(key=r[0], value=r[1], uid=r[2], etype=r[3],
meta=json.loads(r[4]) if r[4] else {}, created=r[5], updated=r[6])
except: return None
def find(self, uid, q, n=10) -> List[Entry]:
try:
p = f"%{q}%"
with sqlite3.connect(str(self._p)) as c:
rows = c.execute("""SELECT k,v,uid,t,m,c,u FROM mem
WHERE uid=? AND (k LIKE ? OR v LIKE ?) ORDER BY u DESC LIMIT ?""",
(uid, p, p, n)).fetchall()
return [Entry(key=r[0], value=r[1], uid=r[2], etype=r[3],
meta=json.loads(r[4]) if r[4] else {}, created=r[5], updated=r[6])
for r in rows]
except: return []
def all(self, uid) -> List[Entry]:
try:
with sqlite3.connect(str(self._p)) as c:
rows = c.execute("SELECT k,v,uid,t,m,c,u FROM mem WHERE uid=? ORDER BY u DESC",
(uid,)).fetchall()
return [Entry(key=r[0], value=r[1], uid=r[2], etype=r[3],
meta=json.loads(r[4]) if r[4] else {}, created=r[5], updated=r[6])
for r in rows]
except: return []
def rm(self, uid, k) -> bool:
try:
with sqlite3.connect(str(self._p)) as c:
c.execute("DELETE FROM mem WHERE uid=? AND k=?", (uid, k))
c.commit()
return True
except: return False
@property
def name(self) -> str: return "local"
@property
def cloud(self) -> bool: return False
# ── Honcho (cloud, opt-in) ────────────────────────────────────────────
class Honcho(Backend):
def __init__(self):
self._c = None
self._k = os.getenv("HONCHO_API_KEY", "")
def _lazy(self):
if self._c: return self._c
if not self._k: return None
try:
from honcho import Honcho as H
self._c = H(api_key=self._k)
return self._c
except ImportError:
logger.warning("honcho-ai not installed: pip install honcho-ai")
return None
except: return None
def ok(self) -> bool:
if not self._k: return False
c = self._lazy()
if not c: return False
try: c.get_sessions(limit=1); return True
except: return False
def put(self, uid, k, v, meta=None) -> bool:
c = self._lazy()
if not c: return False
try:
c.add_message(f"m-{uid}", "system", json.dumps({"k": k, "v": v}))
return True
except: return False
def get(self, uid, k) -> Optional[Entry]:
for e in self.find(uid, k, 1):
if e.key == k: return e
return None
def find(self, uid, q, n=10) -> List[Entry]:
c = self._lazy()
if not c: return []
try:
r = c.chat(f"m-{uid}", f"Find: {q}")
if isinstance(r, dict):
try:
data = json.loads(r.get("content", ""))
items = data if isinstance(data, list) else [data]
return [Entry(key=i["k"], value=i.get("v", ""), uid=uid)
for i in items[:n] if isinstance(i, dict) and i.get("k")]
except json.JSONDecodeError: pass
return []
except: return []
def all(self, uid) -> List[Entry]: return self.find(uid, "", 100)
def rm(self, uid, k) -> bool: return False # Honcho doesn't support delete
@property
def name(self) -> str: return "honcho"
@property
def cloud(self) -> bool: return True
# ── Evaluation ────────────────────────────────────────────────────────
def score(b: Backend, uid: str = "_e_") -> Dict[str, Any]:
"""Score a backend: availability(20) + functionality(40) + latency(20) + privacy(20)."""
if not b.ok():
return {"name": b.name, "score": 0, "grade": "F", "ok": False, "cloud": b.cloud}
s = 20 # available
# Functionality (40pts)
t0 = time.perf_counter(); ok = b.put(uid, "ek", "ev"); sm = (time.perf_counter()-t0)*1000
s += 15 if ok else 0
t0 = time.perf_counter(); r = b.get(uid, "ek"); gm = (time.perf_counter()-t0)*1000
s += 15 if r else 0
t0 = time.perf_counter(); q = b.find(uid, "ev", 5); qm = (time.perf_counter()-t0)*1000
s += 10 if q else 0
# Latency (20pts)
avg = (sm + gm + qm) / 3
s += 20 if avg < 10 else 15 if avg < 50 else 10 if avg < 200 else 5
# Privacy (20pts) — local sovereign, cloud risky
s += 20 if not b.cloud else 5
try: b.rm(uid, "ek")
except: pass
g = "A" if s >= 80 else "B" if s >= 60 else "C" if s >= 40 else "D" if s >= 20 else "F"
return {"name": b.name, "score": s, "grade": g, "ok": True, "cloud": b.cloud,
"store_ms": round(sm, 1), "get_ms": round(gm, 1), "query_ms": round(qm, 1)}
def evaluate() -> Dict[str, Any]:
"""Evaluate all available backends and return recommendation."""
bs = [Null(), Local()]
if os.getenv("HONCHO_API_KEY"):
try: bs.append(Honcho())
except: pass
rs = [score(b) for b in bs]
best = max((r for r in rs if r["name"] != "null" and r["ok"]),
key=lambda r: r["score"], default=None)
rec = f"Best: {best['name']} ({best['score']}pts, {best['grade']})" if best else "None available"
if best and best.get("cloud"):
rec += " WARNING: cloud dependency. RECOMMEND local for sovereignty."
return {"results": rs, "recommendation": rec}
# ── Singleton ─────────────────────────────────────────────────────────
_inst: Optional[Backend] = None
def get_backend() -> Backend:
"""Get configured backend. Priority: HONCHO_API_KEY → Honcho, else Local."""
global _inst
if _inst: return _inst
if os.getenv("HONCHO_API_KEY") and os.getenv("HERMES_MEMORY_BACKEND", "").lower() != "local":
try:
h = Honcho()
if h.ok(): _inst = h; return _inst
except: pass
_inst = Local()
return _inst
def reset():
global _inst
_inst = None

View File

@@ -1001,30 +1001,10 @@ class AIAgent:
self._session_db = session_db
self._parent_session_id = parent_session_id
self._last_flushed_db_idx = 0 # tracks DB-write cursor to prevent duplicate writes
if self._session_db:
try:
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
model=self.model,
model_config={
"max_iterations": self.max_iterations,
"reasoning_config": reasoning_config,
"max_tokens": max_tokens,
},
user_id=None,
parent_session_id=self._parent_session_id,
)
except Exception as e:
# Transient SQLite lock contention (e.g. CLI and gateway writing
# concurrently) must NOT permanently disable session_search for
# this agent. Keep _session_db alive — subsequent message
# flushes and session_search calls will still work once the
# lock clears. The session row may be missing from the index
# for this run, but that is recoverable (flushes upsert rows).
logger.warning(
"Session DB create_session failed (session_search still available): %s", e
)
# Lazy session creation: defer until first message flush (#314).
# _flush_messages_to_session_db() calls ensure_session() which uses
# INSERT OR IGNORE — creating the row only when messages arrive.
# This eliminates 32% of sessions that are created but never used.
# In-memory todo list for task planning (one per agent/session)
from tools.todo_tool import TodoStore

141
tests/agent/test_memory.py Normal file
View File

@@ -0,0 +1,141 @@
"""Tests for memory backends (#322)."""
import json, pytest
from agent.memory import Entry, Null, Local, Honcho, score, evaluate, get_backend, reset
@pytest.fixture()
def loc(tmp_path): return Local(path=tmp_path / "test.db")
@pytest.fixture()
def rst():
reset()
yield
reset()
class TestEntry:
def test_defaults(self):
e = Entry(key="k", value="v", uid="u")
assert e.created > 0
assert e.etype == "preference"
class TestNull:
def test_available(self): assert Null().ok()
def test_store(self): assert Null().put("u", "k", "v")
def test_get_none(self): assert Null().get("u", "k") is None
def test_find_empty(self): assert Null().find("u", "q") == []
def test_all_empty(self): assert Null().all("u") == []
def test_delete(self): assert Null().rm("u", "k")
def test_not_cloud(self): assert not Null().cloud
def test_name(self): assert Null().name == "null"
class TestLocal:
def test_available(self, loc): assert loc.ok()
def test_store_get(self, loc):
assert loc.put("u", "lang", "python")
e = loc.get("u", "lang")
assert e is not None
assert e.value == "python"
assert e.uid == "u"
def test_metadata(self, loc):
loc.put("u", "k", "v", {"type": "pattern", "session": "s1"})
e = loc.get("u", "k")
assert e.etype == "pattern"
assert e.meta["session"] == "s1"
def test_update(self, loc):
loc.put("u", "k", "v1")
loc.put("u", "k", "v2")
assert loc.get("u", "k").value == "v2"
def test_find(self, loc):
loc.put("u", "pref_python", "True")
loc.put("u", "pref_editor", "vim")
loc.put("u", "theme", "dark")
results = loc.find("u", "pref")
assert len(results) == 2
keys = {r.key for r in results}
assert keys == {"pref_python", "pref_editor"}
def test_all(self, loc):
loc.put("u", "a", "1")
loc.put("u", "b", "2")
loc.put("u", "c", "3")
assert len(loc.all("u")) == 3
def test_delete(self, loc):
loc.put("u", "k", "v")
assert loc.rm("u", "k")
assert loc.get("u", "k") is None
def test_delete_nonexistent(self, loc):
assert loc.rm("u", "nope") # should not error
def test_not_cloud(self, loc): assert not loc.cloud
def test_separate_users(self, loc):
loc.put("u1", "k", "val1")
loc.put("u2", "k", "val2")
assert loc.get("u1", "k").value == "val1"
assert loc.get("u2", "k").value == "val2"
def test_name(self, loc): assert loc.name == "local"
class TestHoncho:
def test_not_available_without_key(self, monkeypatch):
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
assert not Honcho().ok()
def test_is_cloud(self): assert Honcho().cloud
def test_name(self): assert Honcho().name == "honcho"
def test_delete_returns_false(self):
assert not Honcho().rm("u", "k") # Honcho doesn't support delete
class TestEvaluation:
def test_score_null(self):
r = score(Null())
assert r["score"] > 0
assert r["grade"] in ("A", "B", "C", "D")
assert r["ok"]
def test_score_local(self, loc):
r = score(loc)
assert r["ok"]
assert r["score"] >= 80
assert r["grade"] == "A"
assert not r["cloud"]
def test_evaluate_returns_report(self):
r = evaluate()
assert "results" in r
assert "recommendation" in r
assert len(r["results"]) >= 2 # null + local
def test_evaluate_recommendation_local(self):
r = evaluate()
assert "local" in r["recommendation"].lower()
class TestSingleton:
def test_default_is_local(self, rst, monkeypatch):
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
b = get_backend()
assert isinstance(b, Local)
def test_caches_instance(self, rst):
assert get_backend() is get_backend()
def test_reset_clears(self, rst):
b1 = get_backend()
reset()
b2 = get_backend()
assert b1 is not b2

View File

@@ -0,0 +1,79 @@
"""Memory backend tool — cross-session user modeling.
Local SQLite default, Honcho cloud opt-in. Zero overhead when disabled.
"""
import json
from tools.registry import registry
def memory_backend(action, uid="default", key=None, value=None, query=None, meta=None):
from agent.memory import get_backend, evaluate
b = get_backend()
if action == "info":
return json.dumps({"success": True, "backend": b.name, "cloud": b.cloud, "available": b.ok()})
if action == "store":
if not key or value is None:
return json.dumps({"success": False, "error": "key and value required"})
return json.dumps({"success": b.put(uid, key, value, meta), "key": key})
if action == "get":
if not key:
return json.dumps({"success": False, "error": "key required"})
e = b.get(uid, key)
if not e:
return json.dumps({"success": False, "error": f"not found: {key}"})
return json.dumps({"success": True, "key": e.key, "value": e.value, "type": e.etype})
if action == "query":
if not query:
return json.dumps({"success": False, "error": "query required"})
r = b.find(uid, query)
return json.dumps({"success": True,
"results": [{"key": e.key, "value": e.value} for e in r], "count": len(r)})
if action == "list":
r = b.all(uid)
return json.dumps({"success": True,
"entries": [{"key": e.key, "type": e.etype} for e in r], "count": len(r)})
if action == "delete":
if not key:
return json.dumps({"success": False, "error": "key required"})
return json.dumps({"success": b.rm(uid, key)})
if action == "evaluate":
return json.dumps({"success": True, **evaluate()})
return json.dumps({"success": False, "error": f"unknown action: {action}"})
registry.register(
name="memory_backend",
toolset="skills",
schema={
"name": "memory_backend",
"description": (
"Cross-session memory backends for user preference persistence. "
"Local SQLite default (sovereign), Honcho cloud opt-in via HONCHO_API_KEY. "
"Zero overhead when disabled."
),
"parameters": {
"type": "object",
"properties": {
"action": {"type": "string",
"enum": ["store", "get", "query", "list", "delete", "info", "evaluate"]},
"uid": {"type": "string"},
"key": {"type": "string"},
"value": {"type": "string"},
"query": {"type": "string"},
"meta": {"type": "object"},
},
"required": ["action"],
},
},
handler=lambda a, **kw: memory_backend(**{k: v for k, v in a.items() if v is not None}),
emoji="🧠",
)