Files
hermes-agent/plugins/memory/mem0_local/__init__.py

382 lines
13 KiB
Python
Raw Normal View History

"""Mem0 Local memory provider - ChromaDB-backed, no API key required.
Sovereign deployment: all data stays on the user's machine. Uses ChromaDB
for vector storage and simple heuristic fact extraction (no server-side LLM).
Compatible tool schemas with the cloud Mem0 provider:
mem0_profile - retrieve all stored memories
mem0_search - semantic search by meaning
mem0_conclude - store a fact verbatim
Config via $HERMES_HOME/mem0-local.json or environment variables:
MEM0_LOCAL_PATH - storage directory (default: $HERMES_HOME/mem0-local/)
"""
from __future__ import annotations
import hashlib
import json
import logging
import os
import re
import threading
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
from agent.memory_provider import MemoryProvider
from tools.registry import tool_error
logger = logging.getLogger(__name__)
# Circuit breaker
_BREAKER_THRESHOLD = 5
_BREAKER_COOLDOWN_SECS = 120
def _load_config() -> dict:
"""Load local config from env vars, with $HERMES_HOME/mem0-local.json overrides."""
from hermes_constants import get_hermes_home
config = {
"storage_path": os.environ.get("MEM0_LOCAL_PATH", ""),
"collection_prefix": "mem0",
"max_memories": 10000,
}
config_path = get_hermes_home() / "mem0-local.json"
if config_path.exists():
try:
file_cfg = json.loads(config_path.read_text(encoding="utf-8"))
config.update({k: v for k, v in file_cfg.items()
if v is not None and v != ""})
except Exception:
pass
if not config["storage_path"]:
config["storage_path"] = str(get_hermes_home() / "mem0-local")
return config
# Simple fact extraction patterns (no LLM required)
_FACT_PATTERNS = [
(r"(?:my|the user'?s?)\s+(?:name|username)\s+(?:is|=)\s+(.+?)(?:\.|$)", "user.name"),
(r"(?:i|user)\s+(?:prefer|like|use|want|need)s?\s+(.+?)(?:\.|$)", "preference"),
(r"(?:i|user)\s+(?:work|am)\s+(?:at|as|on|with)\s+(.+?)(?:\.|$)", "context"),
(r"(?:remember|note|save|store)[:\s]+(.+?)(?:\.|$)", "explicit"),
(r"(?:my|the)\s+(?:timezone|tz)\s+(?:is|=)\s+(.+?)(?:\.|$)", "user.timezone"),
(r"(?:my|the)\s+(?:project|repo|codebase)\s+(?:is|=|called)\s+(.+?)(?:\.|$)", "project"),
(r"(?:actually|correction|instead)[:\s]+(.+?)(?:\.|$)", "correction"),
]
def _extract_facts(text: str) -> List[Dict[str, str]]:
"""Extract structured facts from conversation text using pattern matching."""
facts = []
if not text or len(text) < 10:
return facts
text_lower = text.lower().strip()
for pattern, category in _FACT_PATTERNS:
matches = re.findall(pattern, text_lower, re.IGNORECASE)
for match in matches:
fact_text = match.strip() if isinstance(match, str) else match[0].strip()
if len(fact_text) > 3 and len(fact_text) < 500:
facts.append({
"content": fact_text,
"category": category,
"source_text": text[:200],
})
return facts
# Tool schemas (compatible with cloud Mem0)
PROFILE_SCHEMA = {
"name": "mem0_profile",
"description": (
"Retrieve all stored memories about the user - preferences, facts, "
"project context. Fast, no reranking. Use at conversation start."
),
"parameters": {"type": "object", "properties": {}, "required": []},
}
SEARCH_SCHEMA = {
"name": "mem0_search",
"description": (
"Search memories by meaning. Returns relevant facts ranked by similarity. "
"Local-only - no API calls."
),
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "What to search for."},
"top_k": {"type": "integer", "description": "Max results (default: 10, max: 50)."},
},
"required": ["query"],
},
}
CONCLUDE_SCHEMA = {
"name": "mem0_conclude",
"description": (
"Store a durable fact about the user. Stored verbatim (no LLM extraction). "
"Use for explicit preferences, corrections, or decisions. Local-only."
),
"parameters": {
"type": "object",
"properties": {
"conclusion": {"type": "string", "description": "The fact to store."},
},
"required": ["conclusion"],
},
}
class Mem0LocalProvider(MemoryProvider):
"""Local ChromaDB-backed memory provider. No API key required."""
def __init__(self):
self._config = None
self._client = None
self._collection = None
self._client_lock = threading.Lock()
self._user_id = "hermes-user"
self._storage_path = ""
self._max_memories = 10000
self._consecutive_failures = 0
self._breaker_open_until = 0.0
@property
def name(self) -> str:
return "mem0-local"
def is_available(self) -> bool:
try:
import chromadb
return True
except ImportError:
return False
def save_config(self, values, hermes_home):
config_path = Path(hermes_home) / "mem0-local.json"
existing = {}
if config_path.exists():
try:
existing = json.loads(config_path.read_text())
except Exception:
pass
existing.update(values)
config_path.write_text(json.dumps(existing, indent=2))
def get_config_schema(self):
return [
{"key": "storage_path", "description": "Storage directory for ChromaDB", "default": "~/.hermes/mem0-local/"},
{"key": "collection_prefix", "description": "Collection name prefix", "default": "mem0"},
{"key": "max_memories", "description": "Maximum stored memories", "default": "10000"},
]
def _get_collection(self):
"""Thread-safe ChromaDB collection accessor with lazy init."""
with self._client_lock:
if self._collection is not None:
return self._collection
try:
import chromadb
from chromadb.config import Settings
except ImportError:
raise RuntimeError("chromadb package not installed. Run: pip install chromadb")
Path(self._storage_path).mkdir(parents=True, exist_ok=True)
self._client = chromadb.PersistentClient(
path=self._storage_path,
settings=Settings(anonymized_telemetry=False),
)
collection_name = f"{self._config.get('collection_prefix', 'mem0')}_memories"
self._collection = self._client.get_or_create_collection(
name=collection_name,
metadata={"hnsw:space": "cosine"},
)
logger.info(
"Mem0 local: ChromaDB collection '%s' at %s (%d docs)",
collection_name, self._storage_path, self._collection.count(),
)
return self._collection
def _doc_id(self, content: str) -> str:
"""Deterministic ID from content hash (for dedup)."""
return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
def _is_breaker_open(self) -> bool:
if self._consecutive_failures < _BREAKER_THRESHOLD:
return False
if time.monotonic() >= self._breaker_open_until:
self._consecutive_failures = 0
return False
return True
def _record_success(self):
self._consecutive_failures = 0
def _record_failure(self):
self._consecutive_failures += 1
if self._consecutive_failures >= _BREAKER_THRESHOLD:
self._breaker_open_until = time.monotonic() + _BREAKER_COOLDOWN_SECS
def initialize(self, session_id: str, **kwargs) -> None:
self._config = _load_config()
self._storage_path = self._config.get("storage_path", "")
self._max_memories = int(self._config.get("max_memories", 10000))
self._user_id = kwargs.get("user_id") or self._config.get("user_id", "hermes-user")
def system_prompt_block(self) -> str:
count = 0
try:
col = self._get_collection()
count = col.count()
except Exception:
pass
return (
"# Mem0 Local Memory\n"
f"Active. {count} memories stored locally. "
"Use mem0_search to find memories, mem0_conclude to store facts, "
"mem0_profile for a full overview."
)
def prefetch(self, query: str, *, session_id: str = "") -> str:
return ""
def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
pass
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
"""Extract and store facts from the conversation turn."""
if self._is_breaker_open():
return
try:
col = self._get_collection()
except Exception:
return
for content in [user_content, assistant_content]:
if not content or len(content) < 10:
continue
facts = _extract_facts(content)
for fact in facts:
doc_id = self._doc_id(fact["content"])
try:
col.upsert(
ids=[doc_id],
documents=[fact["content"]],
metadatas=[{
"category": fact["category"],
"user_id": self._user_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
"source": "extracted",
}],
)
self._record_success()
except Exception as e:
self._record_failure()
logger.debug("Mem0 local: failed to upsert fact: %s", e)
def get_tool_schemas(self) -> List[Dict[str, Any]]:
return [PROFILE_SCHEMA, SEARCH_SCHEMA, CONCLUDE_SCHEMA]
def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
if self._is_breaker_open():
return json.dumps({"error": "Local memory temporarily unavailable. Will retry automatically."})
try:
col = self._get_collection()
except Exception as e:
return tool_error(f"ChromaDB not available: {e}")
if tool_name == "mem0_profile":
try:
results = col.get(
where={"user_id": self._user_id} if self._user_id else None,
limit=500,
)
documents = results.get("documents", [])
if not documents:
return json.dumps({"result": "No memories stored yet."})
lines = [d for d in documents if d]
self._record_success()
return json.dumps({"result": "\n".join(f"- {l}" for l in lines), "count": len(lines)})
except Exception as e:
self._record_failure()
return tool_error(f"Failed to fetch profile: {e}")
elif tool_name == "mem0_search":
query = args.get("query", "")
if not query:
return tool_error("Missing required parameter: query")
top_k = min(int(args.get("top_k", 10)), 50)
try:
results = col.query(
query_texts=[query],
n_results=top_k,
where={"user_id": self._user_id} if self._user_id else None,
)
documents = results.get("documents", [[]])[0]
distances = results.get("distances", [[]])[0]
if not documents:
return json.dumps({"result": "No relevant memories found."})
items = []
for doc, dist in zip(documents, distances):
score = max(0, 1 - (dist / 2))
items.append({"memory": doc, "score": round(score, 3)})
self._record_success()
return json.dumps({"results": items, "count": len(items)})
except Exception as e:
self._record_failure()
return tool_error(f"Search failed: {e}")
elif tool_name == "mem0_conclude":
conclusion = args.get("conclusion", "")
if not conclusion:
return tool_error("Missing required parameter: conclusion")
try:
doc_id = self._doc_id(conclusion)
col.upsert(
ids=[doc_id],
documents=[conclusion],
metadatas=[{
"category": "explicit",
"user_id": self._user_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
"source": "conclude",
}],
)
self._record_success()
return json.dumps({"result": "Fact stored locally.", "id": doc_id})
except Exception as e:
self._record_failure()
return tool_error(f"Failed to store: {e}")
return tool_error(f"Unknown tool: {tool_name}")
def shutdown(self) -> None:
with self._client_lock:
self._collection = None
self._client = None
def register(ctx) -> None:
"""Register Mem0 Local as a memory provider plugin."""
ctx.register_memory_provider(Mem0LocalProvider())