Some checks failed
Contributor Attribution Check / check-attribution (pull_request) Successful in 38s
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 32s
Tests / test (pull_request) Failing after 43m54s
Tests / e2e (pull_request) Successful in 2m5s
## Investigation Report - docs/tool-investigation-2026-04-15.md: Full report analyzing 414 tools from awesome-ai-tools. Top 5 recommendations with integration paths. - docs/plans/awesome-ai-tools-integration.md: Implementation tracking plan. ## Mem0 Local Provider (P1) - plugins/memory/mem0_local/: New ChromaDB-backed memory provider. No API key required - fully sovereign. Compatible tool schemas with cloud Mem0 (mem0_profile, mem0_search, mem0_conclude). - Pattern-based fact extraction from conversations. - Deterministic dedup via content hashing. - Circuit breaker for resilience. - tests/plugins/memory/test_mem0_local.py: Full test coverage. ## Issues Filed - #857: LightRAG integration (P2) - #858: n8n workflow orchestration (P3) - #859: RAGFlow document understanding (P4) - #860: tensorzero LLMOps evaluation (P3) Closes #842
382 lines
13 KiB
Python
382 lines
13 KiB
Python
"""Mem0 Local memory provider - ChromaDB-backed, no API key required.
|
|
|
|
Sovereign deployment: all data stays on the user's machine. Uses ChromaDB
|
|
for vector storage and simple heuristic fact extraction (no server-side LLM).
|
|
|
|
Compatible tool schemas with the cloud Mem0 provider:
|
|
mem0_profile - retrieve all stored memories
|
|
mem0_search - semantic search by meaning
|
|
mem0_conclude - store a fact verbatim
|
|
|
|
Config via $HERMES_HOME/mem0-local.json or environment variables:
|
|
MEM0_LOCAL_PATH - storage directory (default: $HERMES_HOME/mem0-local/)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import threading
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from agent.memory_provider import MemoryProvider
|
|
from tools.registry import tool_error
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Circuit breaker
|
|
_BREAKER_THRESHOLD = 5
|
|
_BREAKER_COOLDOWN_SECS = 120
|
|
|
|
|
|
def _load_config() -> dict:
|
|
"""Load local config from env vars, with $HERMES_HOME/mem0-local.json overrides."""
|
|
from hermes_constants import get_hermes_home
|
|
|
|
config = {
|
|
"storage_path": os.environ.get("MEM0_LOCAL_PATH", ""),
|
|
"collection_prefix": "mem0",
|
|
"max_memories": 10000,
|
|
}
|
|
|
|
config_path = get_hermes_home() / "mem0-local.json"
|
|
if config_path.exists():
|
|
try:
|
|
file_cfg = json.loads(config_path.read_text(encoding="utf-8"))
|
|
config.update({k: v for k, v in file_cfg.items()
|
|
if v is not None and v != ""})
|
|
except Exception:
|
|
pass
|
|
|
|
if not config["storage_path"]:
|
|
config["storage_path"] = str(get_hermes_home() / "mem0-local")
|
|
|
|
return config
|
|
|
|
|
|
# Simple fact extraction patterns (no LLM required)
|
|
_FACT_PATTERNS = [
|
|
(r"(?:my|the user'?s?)\s+(?:name|username)\s+(?:is|=)\s+(.+?)(?:\.|$)", "user.name"),
|
|
(r"(?:i|user)\s+(?:prefer|like|use|want|need)s?\s+(.+?)(?:\.|$)", "preference"),
|
|
(r"(?:i|user)\s+(?:work|am)\s+(?:at|as|on|with)\s+(.+?)(?:\.|$)", "context"),
|
|
(r"(?:remember|note|save|store)[:\s]+(.+?)(?:\.|$)", "explicit"),
|
|
(r"(?:my|the)\s+(?:timezone|tz)\s+(?:is|=)\s+(.+?)(?:\.|$)", "user.timezone"),
|
|
(r"(?:my|the)\s+(?:project|repo|codebase)\s+(?:is|=|called)\s+(.+?)(?:\.|$)", "project"),
|
|
(r"(?:actually|correction|instead)[:\s]+(.+?)(?:\.|$)", "correction"),
|
|
]
|
|
|
|
|
|
def _extract_facts(text: str) -> List[Dict[str, str]]:
|
|
"""Extract structured facts from conversation text using pattern matching."""
|
|
facts = []
|
|
if not text or len(text) < 10:
|
|
return facts
|
|
text_lower = text.lower().strip()
|
|
|
|
for pattern, category in _FACT_PATTERNS:
|
|
matches = re.findall(pattern, text_lower, re.IGNORECASE)
|
|
for match in matches:
|
|
fact_text = match.strip() if isinstance(match, str) else match[0].strip()
|
|
if len(fact_text) > 3 and len(fact_text) < 500:
|
|
facts.append({
|
|
"content": fact_text,
|
|
"category": category,
|
|
"source_text": text[:200],
|
|
})
|
|
|
|
return facts
|
|
|
|
|
|
# Tool schemas (compatible with cloud Mem0)
|
|
PROFILE_SCHEMA = {
|
|
"name": "mem0_profile",
|
|
"description": (
|
|
"Retrieve all stored memories about the user - preferences, facts, "
|
|
"project context. Fast, no reranking. Use at conversation start."
|
|
),
|
|
"parameters": {"type": "object", "properties": {}, "required": []},
|
|
}
|
|
|
|
SEARCH_SCHEMA = {
|
|
"name": "mem0_search",
|
|
"description": (
|
|
"Search memories by meaning. Returns relevant facts ranked by similarity. "
|
|
"Local-only - no API calls."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {"type": "string", "description": "What to search for."},
|
|
"top_k": {"type": "integer", "description": "Max results (default: 10, max: 50)."},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
}
|
|
|
|
CONCLUDE_SCHEMA = {
|
|
"name": "mem0_conclude",
|
|
"description": (
|
|
"Store a durable fact about the user. Stored verbatim (no LLM extraction). "
|
|
"Use for explicit preferences, corrections, or decisions. Local-only."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"conclusion": {"type": "string", "description": "The fact to store."},
|
|
},
|
|
"required": ["conclusion"],
|
|
},
|
|
}
|
|
|
|
|
|
class Mem0LocalProvider(MemoryProvider):
|
|
"""Local ChromaDB-backed memory provider. No API key required."""
|
|
|
|
def __init__(self):
|
|
self._config = None
|
|
self._client = None
|
|
self._collection = None
|
|
self._client_lock = threading.Lock()
|
|
self._user_id = "hermes-user"
|
|
self._storage_path = ""
|
|
self._max_memories = 10000
|
|
self._consecutive_failures = 0
|
|
self._breaker_open_until = 0.0
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "mem0-local"
|
|
|
|
def is_available(self) -> bool:
|
|
try:
|
|
import chromadb
|
|
return True
|
|
except ImportError:
|
|
return False
|
|
|
|
def save_config(self, values, hermes_home):
|
|
config_path = Path(hermes_home) / "mem0-local.json"
|
|
existing = {}
|
|
if config_path.exists():
|
|
try:
|
|
existing = json.loads(config_path.read_text())
|
|
except Exception:
|
|
pass
|
|
existing.update(values)
|
|
config_path.write_text(json.dumps(existing, indent=2))
|
|
|
|
def get_config_schema(self):
|
|
return [
|
|
{"key": "storage_path", "description": "Storage directory for ChromaDB", "default": "~/.hermes/mem0-local/"},
|
|
{"key": "collection_prefix", "description": "Collection name prefix", "default": "mem0"},
|
|
{"key": "max_memories", "description": "Maximum stored memories", "default": "10000"},
|
|
]
|
|
|
|
def _get_collection(self):
|
|
"""Thread-safe ChromaDB collection accessor with lazy init."""
|
|
with self._client_lock:
|
|
if self._collection is not None:
|
|
return self._collection
|
|
|
|
try:
|
|
import chromadb
|
|
from chromadb.config import Settings
|
|
except ImportError:
|
|
raise RuntimeError("chromadb package not installed. Run: pip install chromadb")
|
|
|
|
Path(self._storage_path).mkdir(parents=True, exist_ok=True)
|
|
|
|
self._client = chromadb.PersistentClient(
|
|
path=self._storage_path,
|
|
settings=Settings(anonymized_telemetry=False),
|
|
)
|
|
|
|
collection_name = f"{self._config.get('collection_prefix', 'mem0')}_memories"
|
|
self._collection = self._client.get_or_create_collection(
|
|
name=collection_name,
|
|
metadata={"hnsw:space": "cosine"},
|
|
)
|
|
|
|
logger.info(
|
|
"Mem0 local: ChromaDB collection '%s' at %s (%d docs)",
|
|
collection_name, self._storage_path, self._collection.count(),
|
|
)
|
|
|
|
return self._collection
|
|
|
|
def _doc_id(self, content: str) -> str:
|
|
"""Deterministic ID from content hash (for dedup)."""
|
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
|
|
|
|
def _is_breaker_open(self) -> bool:
|
|
if self._consecutive_failures < _BREAKER_THRESHOLD:
|
|
return False
|
|
if time.monotonic() >= self._breaker_open_until:
|
|
self._consecutive_failures = 0
|
|
return False
|
|
return True
|
|
|
|
def _record_success(self):
|
|
self._consecutive_failures = 0
|
|
|
|
def _record_failure(self):
|
|
self._consecutive_failures += 1
|
|
if self._consecutive_failures >= _BREAKER_THRESHOLD:
|
|
self._breaker_open_until = time.monotonic() + _BREAKER_COOLDOWN_SECS
|
|
|
|
def initialize(self, session_id: str, **kwargs) -> None:
|
|
self._config = _load_config()
|
|
self._storage_path = self._config.get("storage_path", "")
|
|
self._max_memories = int(self._config.get("max_memories", 10000))
|
|
self._user_id = kwargs.get("user_id") or self._config.get("user_id", "hermes-user")
|
|
|
|
def system_prompt_block(self) -> str:
|
|
count = 0
|
|
try:
|
|
col = self._get_collection()
|
|
count = col.count()
|
|
except Exception:
|
|
pass
|
|
return (
|
|
"# Mem0 Local Memory\n"
|
|
f"Active. {count} memories stored locally. "
|
|
"Use mem0_search to find memories, mem0_conclude to store facts, "
|
|
"mem0_profile for a full overview."
|
|
)
|
|
|
|
def prefetch(self, query: str, *, session_id: str = "") -> str:
|
|
return ""
|
|
|
|
def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
|
|
pass
|
|
|
|
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
|
"""Extract and store facts from the conversation turn."""
|
|
if self._is_breaker_open():
|
|
return
|
|
try:
|
|
col = self._get_collection()
|
|
except Exception:
|
|
return
|
|
|
|
for content in [user_content, assistant_content]:
|
|
if not content or len(content) < 10:
|
|
continue
|
|
facts = _extract_facts(content)
|
|
for fact in facts:
|
|
doc_id = self._doc_id(fact["content"])
|
|
try:
|
|
col.upsert(
|
|
ids=[doc_id],
|
|
documents=[fact["content"]],
|
|
metadatas=[{
|
|
"category": fact["category"],
|
|
"user_id": self._user_id,
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"source": "extracted",
|
|
}],
|
|
)
|
|
self._record_success()
|
|
except Exception as e:
|
|
self._record_failure()
|
|
logger.debug("Mem0 local: failed to upsert fact: %s", e)
|
|
|
|
def get_tool_schemas(self) -> List[Dict[str, Any]]:
|
|
return [PROFILE_SCHEMA, SEARCH_SCHEMA, CONCLUDE_SCHEMA]
|
|
|
|
def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
|
|
if self._is_breaker_open():
|
|
return json.dumps({"error": "Local memory temporarily unavailable. Will retry automatically."})
|
|
|
|
try:
|
|
col = self._get_collection()
|
|
except Exception as e:
|
|
return tool_error(f"ChromaDB not available: {e}")
|
|
|
|
if tool_name == "mem0_profile":
|
|
try:
|
|
results = col.get(
|
|
where={"user_id": self._user_id} if self._user_id else None,
|
|
limit=500,
|
|
)
|
|
documents = results.get("documents", [])
|
|
if not documents:
|
|
return json.dumps({"result": "No memories stored yet."})
|
|
lines = [d for d in documents if d]
|
|
self._record_success()
|
|
return json.dumps({"result": "\n".join(f"- {l}" for l in lines), "count": len(lines)})
|
|
except Exception as e:
|
|
self._record_failure()
|
|
return tool_error(f"Failed to fetch profile: {e}")
|
|
|
|
elif tool_name == "mem0_search":
|
|
query = args.get("query", "")
|
|
if not query:
|
|
return tool_error("Missing required parameter: query")
|
|
top_k = min(int(args.get("top_k", 10)), 50)
|
|
|
|
try:
|
|
results = col.query(
|
|
query_texts=[query],
|
|
n_results=top_k,
|
|
where={"user_id": self._user_id} if self._user_id else None,
|
|
)
|
|
|
|
documents = results.get("documents", [[]])[0]
|
|
distances = results.get("distances", [[]])[0]
|
|
|
|
if not documents:
|
|
return json.dumps({"result": "No relevant memories found."})
|
|
|
|
items = []
|
|
for doc, dist in zip(documents, distances):
|
|
score = max(0, 1 - (dist / 2))
|
|
items.append({"memory": doc, "score": round(score, 3)})
|
|
|
|
self._record_success()
|
|
return json.dumps({"results": items, "count": len(items)})
|
|
except Exception as e:
|
|
self._record_failure()
|
|
return tool_error(f"Search failed: {e}")
|
|
|
|
elif tool_name == "mem0_conclude":
|
|
conclusion = args.get("conclusion", "")
|
|
if not conclusion:
|
|
return tool_error("Missing required parameter: conclusion")
|
|
|
|
try:
|
|
doc_id = self._doc_id(conclusion)
|
|
col.upsert(
|
|
ids=[doc_id],
|
|
documents=[conclusion],
|
|
metadatas=[{
|
|
"category": "explicit",
|
|
"user_id": self._user_id,
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"source": "conclude",
|
|
}],
|
|
)
|
|
self._record_success()
|
|
return json.dumps({"result": "Fact stored locally.", "id": doc_id})
|
|
except Exception as e:
|
|
self._record_failure()
|
|
return tool_error(f"Failed to store: {e}")
|
|
|
|
return tool_error(f"Unknown tool: {tool_name}")
|
|
|
|
def shutdown(self) -> None:
|
|
with self._client_lock:
|
|
self._collection = None
|
|
self._client = None
|
|
|
|
|
|
def register(ctx) -> None:
|
|
"""Register Mem0 Local as a memory provider plugin."""
|
|
ctx.register_memory_provider(Mem0LocalProvider())
|