"""Mem0 Local memory provider - ChromaDB-backed, no API key required. Sovereign deployment: all data stays on the user's machine. Uses ChromaDB for vector storage and simple heuristic fact extraction (no server-side LLM). Compatible tool schemas with the cloud Mem0 provider: mem0_profile - retrieve all stored memories mem0_search - semantic search by meaning mem0_conclude - store a fact verbatim Config via $HERMES_HOME/mem0-local.json or environment variables: MEM0_LOCAL_PATH - storage directory (default: $HERMES_HOME/mem0-local/) """ from __future__ import annotations import hashlib import json import logging import os import re import threading import time from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Optional from agent.memory_provider import MemoryProvider from tools.registry import tool_error logger = logging.getLogger(__name__) # Circuit breaker _BREAKER_THRESHOLD = 5 _BREAKER_COOLDOWN_SECS = 120 def _load_config() -> dict: """Load local config from env vars, with $HERMES_HOME/mem0-local.json overrides.""" from hermes_constants import get_hermes_home config = { "storage_path": os.environ.get("MEM0_LOCAL_PATH", ""), "collection_prefix": "mem0", "max_memories": 10000, } config_path = get_hermes_home() / "mem0-local.json" if config_path.exists(): try: file_cfg = json.loads(config_path.read_text(encoding="utf-8")) config.update({k: v for k, v in file_cfg.items() if v is not None and v != ""}) except Exception: pass if not config["storage_path"]: config["storage_path"] = str(get_hermes_home() / "mem0-local") return config # Simple fact extraction patterns (no LLM required) _FACT_PATTERNS = [ (r"(?:my|the user'?s?)\s+(?:name|username)\s+(?:is|=)\s+(.+?)(?:\.|$)", "user.name"), (r"(?:i|user)\s+(?:prefer|like|use|want|need)s?\s+(.+?)(?:\.|$)", "preference"), (r"(?:i|user)\s+(?:work|am)\s+(?:at|as|on|with)\s+(.+?)(?:\.|$)", "context"), (r"(?:remember|note|save|store)[:\s]+(.+?)(?:\.|$)", "explicit"), (r"(?:my|the)\s+(?:timezone|tz)\s+(?:is|=)\s+(.+?)(?:\.|$)", "user.timezone"), (r"(?:my|the)\s+(?:project|repo|codebase)\s+(?:is|=|called)\s+(.+?)(?:\.|$)", "project"), (r"(?:actually|correction|instead)[:\s]+(.+?)(?:\.|$)", "correction"), ] def _extract_facts(text: str) -> List[Dict[str, str]]: """Extract structured facts from conversation text using pattern matching.""" facts = [] if not text or len(text) < 10: return facts text_lower = text.lower().strip() for pattern, category in _FACT_PATTERNS: matches = re.findall(pattern, text_lower, re.IGNORECASE) for match in matches: fact_text = match.strip() if isinstance(match, str) else match[0].strip() if len(fact_text) > 3 and len(fact_text) < 500: facts.append({ "content": fact_text, "category": category, "source_text": text[:200], }) return facts # Tool schemas (compatible with cloud Mem0) PROFILE_SCHEMA = { "name": "mem0_profile", "description": ( "Retrieve all stored memories about the user - preferences, facts, " "project context. Fast, no reranking. Use at conversation start." ), "parameters": {"type": "object", "properties": {}, "required": []}, } SEARCH_SCHEMA = { "name": "mem0_search", "description": ( "Search memories by meaning. Returns relevant facts ranked by similarity. " "Local-only - no API calls." ), "parameters": { "type": "object", "properties": { "query": {"type": "string", "description": "What to search for."}, "top_k": {"type": "integer", "description": "Max results (default: 10, max: 50)."}, }, "required": ["query"], }, } CONCLUDE_SCHEMA = { "name": "mem0_conclude", "description": ( "Store a durable fact about the user. Stored verbatim (no LLM extraction). " "Use for explicit preferences, corrections, or decisions. Local-only." ), "parameters": { "type": "object", "properties": { "conclusion": {"type": "string", "description": "The fact to store."}, }, "required": ["conclusion"], }, } class Mem0LocalProvider(MemoryProvider): """Local ChromaDB-backed memory provider. No API key required.""" def __init__(self): self._config = None self._client = None self._collection = None self._client_lock = threading.Lock() self._user_id = "hermes-user" self._storage_path = "" self._max_memories = 10000 self._consecutive_failures = 0 self._breaker_open_until = 0.0 @property def name(self) -> str: return "mem0-local" def is_available(self) -> bool: try: import chromadb return True except ImportError: return False def save_config(self, values, hermes_home): config_path = Path(hermes_home) / "mem0-local.json" existing = {} if config_path.exists(): try: existing = json.loads(config_path.read_text()) except Exception: pass existing.update(values) config_path.write_text(json.dumps(existing, indent=2)) def get_config_schema(self): return [ {"key": "storage_path", "description": "Storage directory for ChromaDB", "default": "~/.hermes/mem0-local/"}, {"key": "collection_prefix", "description": "Collection name prefix", "default": "mem0"}, {"key": "max_memories", "description": "Maximum stored memories", "default": "10000"}, ] def _get_collection(self): """Thread-safe ChromaDB collection accessor with lazy init.""" with self._client_lock: if self._collection is not None: return self._collection try: import chromadb from chromadb.config import Settings except ImportError: raise RuntimeError("chromadb package not installed. Run: pip install chromadb") Path(self._storage_path).mkdir(parents=True, exist_ok=True) self._client = chromadb.PersistentClient( path=self._storage_path, settings=Settings(anonymized_telemetry=False), ) collection_name = f"{self._config.get('collection_prefix', 'mem0')}_memories" self._collection = self._client.get_or_create_collection( name=collection_name, metadata={"hnsw:space": "cosine"}, ) logger.info( "Mem0 local: ChromaDB collection '%s' at %s (%d docs)", collection_name, self._storage_path, self._collection.count(), ) return self._collection def _doc_id(self, content: str) -> str: """Deterministic ID from content hash (for dedup).""" return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16] def _is_breaker_open(self) -> bool: if self._consecutive_failures < _BREAKER_THRESHOLD: return False if time.monotonic() >= self._breaker_open_until: self._consecutive_failures = 0 return False return True def _record_success(self): self._consecutive_failures = 0 def _record_failure(self): self._consecutive_failures += 1 if self._consecutive_failures >= _BREAKER_THRESHOLD: self._breaker_open_until = time.monotonic() + _BREAKER_COOLDOWN_SECS def initialize(self, session_id: str, **kwargs) -> None: self._config = _load_config() self._storage_path = self._config.get("storage_path", "") self._max_memories = int(self._config.get("max_memories", 10000)) self._user_id = kwargs.get("user_id") or self._config.get("user_id", "hermes-user") def system_prompt_block(self) -> str: count = 0 try: col = self._get_collection() count = col.count() except Exception: pass return ( "# Mem0 Local Memory\n" f"Active. {count} memories stored locally. " "Use mem0_search to find memories, mem0_conclude to store facts, " "mem0_profile for a full overview." ) def prefetch(self, query: str, *, session_id: str = "") -> str: return "" def queue_prefetch(self, query: str, *, session_id: str = "") -> None: pass def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None: """Extract and store facts from the conversation turn.""" if self._is_breaker_open(): return try: col = self._get_collection() except Exception: return for content in [user_content, assistant_content]: if not content or len(content) < 10: continue facts = _extract_facts(content) for fact in facts: doc_id = self._doc_id(fact["content"]) try: col.upsert( ids=[doc_id], documents=[fact["content"]], metadatas=[{ "category": fact["category"], "user_id": self._user_id, "timestamp": datetime.now(timezone.utc).isoformat(), "source": "extracted", }], ) self._record_success() except Exception as e: self._record_failure() logger.debug("Mem0 local: failed to upsert fact: %s", e) def get_tool_schemas(self) -> List[Dict[str, Any]]: return [PROFILE_SCHEMA, SEARCH_SCHEMA, CONCLUDE_SCHEMA] def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str: if self._is_breaker_open(): return json.dumps({"error": "Local memory temporarily unavailable. Will retry automatically."}) try: col = self._get_collection() except Exception as e: return tool_error(f"ChromaDB not available: {e}") if tool_name == "mem0_profile": try: results = col.get( where={"user_id": self._user_id} if self._user_id else None, limit=500, ) documents = results.get("documents", []) if not documents: return json.dumps({"result": "No memories stored yet."}) lines = [d for d in documents if d] self._record_success() return json.dumps({"result": "\n".join(f"- {l}" for l in lines), "count": len(lines)}) except Exception as e: self._record_failure() return tool_error(f"Failed to fetch profile: {e}") elif tool_name == "mem0_search": query = args.get("query", "") if not query: return tool_error("Missing required parameter: query") top_k = min(int(args.get("top_k", 10)), 50) try: results = col.query( query_texts=[query], n_results=top_k, where={"user_id": self._user_id} if self._user_id else None, ) documents = results.get("documents", [[]])[0] distances = results.get("distances", [[]])[0] if not documents: return json.dumps({"result": "No relevant memories found."}) items = [] for doc, dist in zip(documents, distances): score = max(0, 1 - (dist / 2)) items.append({"memory": doc, "score": round(score, 3)}) self._record_success() return json.dumps({"results": items, "count": len(items)}) except Exception as e: self._record_failure() return tool_error(f"Search failed: {e}") elif tool_name == "mem0_conclude": conclusion = args.get("conclusion", "") if not conclusion: return tool_error("Missing required parameter: conclusion") try: doc_id = self._doc_id(conclusion) col.upsert( ids=[doc_id], documents=[conclusion], metadatas=[{ "category": "explicit", "user_id": self._user_id, "timestamp": datetime.now(timezone.utc).isoformat(), "source": "conclude", }], ) self._record_success() return json.dumps({"result": "Fact stored locally.", "id": doc_id}) except Exception as e: self._record_failure() return tool_error(f"Failed to store: {e}") return tool_error(f"Unknown tool: {tool_name}") def shutdown(self) -> None: with self._client_lock: self._collection = None self._client = None def register(ctx) -> None: """Register Mem0 Local as a memory provider plugin.""" ctx.register_memory_provider(Mem0LocalProvider())