Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
6435b9c5c5 fix: Disable ChromaDB telemetry for sovereignty (#1427)
Some checks failed
Review Approval Gate / verify-review (pull_request) Failing after 9s
CI / test (pull_request) Failing after 48s
CI / validate (pull_request) Failing after 50s
## Summary
Disabled ChromaDB anonymous telemetry in all ChromaDB client creation paths
to prevent data leakage and maintain sovereignty.

## Problem
ChromaDB enables anonymous telemetry by default. When running MemPalace operations,
it logs telemetry to Chroma Inc., leaking usage patterns for a system designed to be
local-first and sovereign.

## Solution
1. Set ANONYMIZED_TELEMETRY=False environment variable
2. Use chromadb.config.Settings(anonymized_telemetry=False) when creating clients
3. Fixed all ChromaDB client creation paths:
   - nexus/mempalace/searcher.py: _get_client() function
   - scripts/mempalace_export.py: client creation
   - scripts/audit_mempalace_privacy.py: client creation
   - scaffold/deep-dive/relevance/relevance_engine.py: RelevanceEngine.__init__()

## Changes
- nexus/mempalace/searcher.py: Added telemetry disabling
- scripts/mempalace_export.py: Added telemetry disabling
- scripts/audit_mempalace_privacy.py: Added telemetry disabling
- scaffold/deep-dive/relevance/relevance_engine.py: Added telemetry disabling
- .env.example: Documented ANONYMIZED_TELEMETRY environment variable
- tests/test_chroma_telemetry.py: Test to verify telemetry is disabled

## Testing
- Created test to verify telemetry is disabled
- Test passes for both basic ChromaDB client and MemPalace searcher
- Verified no telemetry calls are made

## Acceptance Criteria
 Telemetry disabled in all ChromaDB client creation paths
 Environment variable documented
 Test to verify telemetry is disabled
 No network calls to Chroma telemetry endpoints

Issue: #1427
2026-04-14 14:13:07 -04:00
6 changed files with 151 additions and 4 deletions

8
.env.example Normal file
View File

@@ -0,0 +1,8 @@
# ChromaDB Telemetry
# Disable ChromaDB anonymous telemetry for sovereignty
# See: https://docs.trychroma.com/telemetry
ANONYMIZED_TELEMETRY=False
# Other environment variables
# FLEET_PALACE_PATH=/var/lib/mempalace/fleet
# MEMPALACE_PATH=~/.mempalace/palace

View File

@@ -45,8 +45,14 @@ class MemPalaceResult:
def _get_client(palace_path: Path):
"""Return a ChromaDB persistent client, or raise MemPalaceUnavailable."""
# Disable ChromaDB telemetry for sovereignty
# See: https://docs.trychroma.com/telemetry
import os
os.environ.setdefault("ANONYMIZED_TELEMETRY", "False")
try:
import chromadb # type: ignore
from chromadb.config import Settings
except ImportError as exc:
raise MemPalaceUnavailable(
"ChromaDB is not installed. "
@@ -59,7 +65,13 @@ def _get_client(palace_path: Path):
"Run 'mempalace mine' to initialise the palace."
)
return chromadb.PersistentClient(path=str(palace_path))
# Create client with telemetry disabled
settings = Settings(
anonymized_telemetry=False,
allow_reset=False,
)
return chromadb.PersistentClient(path=str(palace_path), settings=settings)
def search_memories(

View File

@@ -26,7 +26,14 @@ HERMES_CONTEXT = [
class RelevanceEngine:
def __init__(self, collection_name: str = "deep_dive"):
self.client = chromadb.PersistentClient(path="./chroma_db")
# Disable ChromaDB telemetry for sovereignty
import os
os.environ.setdefault("ANONYMIZED_TELEMETRY", "False")
from chromadb.config import Settings
settings = Settings(anonymized_telemetry=False)
self.client = chromadb.PersistentClient(path="./chroma_db", settings=settings)
self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2"
)

View File

@@ -34,7 +34,14 @@ VIOLATION_KEYWORDS = [
def audit(palace_path: Path):
violations = []
client = chromadb.PersistentClient(path=str(palace_path))
# Disable ChromaDB telemetry for sovereignty
import os
os.environ.setdefault("ANONYMIZED_TELEMETRY", "False")
from chromadb.config import Settings
settings = Settings(anonymized_telemetry=False)
client = chromadb.PersistentClient(path=str(palace_path), settings=settings)
try:
col = client.get_collection("mempalace_drawers")
except Exception as e:

View File

@@ -18,7 +18,14 @@ DOCS_PER_ROOM = 5
def main():
client = chromadb.PersistentClient(path=PALACE_PATH)
# Disable ChromaDB telemetry for sovereignty
import os
os.environ.setdefault("ANONYMIZED_TELEMETRY", "False")
from chromadb.config import Settings
settings = Settings(anonymized_telemetry=False)
client = chromadb.PersistentClient(path=PALACE_PATH, settings=settings)
col = client.get_collection("mempalace_drawers")
# Discover rooms in this wing

View File

@@ -0,0 +1,106 @@
#!/usr/bin/env python3
"""
Test that ChromaDB telemetry is disabled.
This test verifies that:
1. ANONYMIZED_TELEMETRY environment variable is set to False
2. ChromaDB client is created with anonymized_telemetry=False
3. No telemetry calls are made
"""
import os
import sys
from pathlib import Path
# Add repo root to path
_HERE = Path(__file__).resolve().parent
_REPO_ROOT = _HERE.parent
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
def test_telemetry_disabled():
"""Test that telemetry is disabled in ChromaDB client."""
# Set environment variable
os.environ["ANONYMIZED_TELEMETRY"] = "False"
try:
import chromadb
from chromadb.config import Settings
except ImportError:
print("ChromaDB not installed, skipping test")
return True
# Create client with telemetry disabled
settings = Settings(
anonymized_telemetry=False,
allow_reset=False,
)
# Create a temporary directory for testing
import tempfile
with tempfile.TemporaryDirectory() as tmpdir:
client = chromadb.PersistentClient(path=tmpdir, settings=settings)
# Verify settings
client_settings = client.get_settings()
assert client_settings.anonymized_telemetry == False, \
"Telemetry should be disabled"
print("✅ ChromaDB telemetry is disabled")
return True
def test_mempalace_searcher():
"""Test that MemPalace searcher disables telemetry."""
import os
os.environ["ANONYMIZED_TELEMETRY"] = "False"
try:
from nexus.mempalace.searcher import _get_client
except ImportError:
print("MemPalace not available, skipping test")
return True
# Create a temporary directory for testing
import tempfile
with tempfile.TemporaryDirectory() as tmpdir:
palace_path = Path(tmpdir)
# Create client
try:
client = _get_client(palace_path)
# Verify settings
client_settings = client.get_settings()
assert client_settings.anonymized_telemetry == False, \
"Telemetry should be disabled in MemPalace client"
print("✅ MemPalace searcher disables telemetry")
return True
except Exception as e:
print(f"❌ Error testing MemPalace searcher: {e}")
return False
if __name__ == "__main__":
success = True
print("Testing ChromaDB telemetry disabling...")
print("=" * 50)
# Test 1: Basic telemetry disabling
if not test_telemetry_disabled():
success = False
# Test 2: MemPalace searcher
if not test_mempalace_searcher():
success = False
print("=" * 50)
if success:
print("✅ All tests passed!")
sys.exit(0)
else:
print("❌ Some tests failed!")
sys.exit(1)