Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
d084149f5a fix: disable ChromaDB telemetry in all client creation paths (closes #1427)
Some checks failed
CI / test (pull_request) Failing after 1m40s
CI / validate (pull_request) Failing after 53s
Review Approval Gate / verify-review (pull_request) Successful in 10s
Set anonymized_telemetry=False via chromadb.config.Settings on all
PersistentClient calls:

- nexus/mempalace/searcher.py: _get_client()
- scripts/mempalace_export.py
- scripts/audit_mempalace_privacy.py
- scaffold/deep-dive/relevance/relevance_engine.py

ChromaDB enables anonymous telemetry by default. For a sovereign,
local-first system this is a data leak. Now disabled everywhere.
2026-04-15 00:15:31 -04:00
6 changed files with 12 additions and 152 deletions

View File

@@ -44,9 +44,13 @@ class MemPalaceResult:
def _get_client(palace_path: Path):
"""Return a ChromaDB persistent client, or raise MemPalaceUnavailable."""
"""Return a ChromaDB persistent client, or raise MemPalaceUnavailable.
Telemetry is disabled for sovereignty — no data leaks to Chroma Inc.
"""
try:
import chromadb # type: ignore
from chromadb.config import Settings
except ImportError as exc:
raise MemPalaceUnavailable(
"ChromaDB is not installed. "
@@ -59,7 +63,10 @@ def _get_client(palace_path: Path):
"Run 'mempalace mine' to initialise the palace."
)
return chromadb.PersistentClient(path=str(palace_path))
return chromadb.PersistentClient(
path=str(palace_path),
settings=Settings(anonymized_telemetry=False),
)
def search_memories(

View File

@@ -1,24 +0,0 @@
# PR Backlog Report — Timmy_Foundation/timmy-config
Generated: 2026-04-14 23:23:33
## Summary
- **Total Open PRs**: 50
- **Stale (>30 days)**: 0
- **Recent (<7 days)**: 50
## Recommendations
### Immediate Actions
1. **Review stale PRs**: 0 PRs are >30 days old
2. **Close duplicates**: Check for duplicate PRs on same issues
3. **Assign reviewers**: Ensure each PR has a reviewer
### Process Improvements
1. **Set SLAs**: Review within 48 hours, merge within 7 days
2. **Weekly cleanup**: Run this analyzer weekly
3. **Automate**: Add CI checks to prevent backlog
## Stale PRs (>30 days)

View File

@@ -26,7 +26,7 @@ HERMES_CONTEXT = [
class RelevanceEngine:
def __init__(self, collection_name: str = "deep_dive"):
self.client = chromadb.PersistentClient(path="./chroma_db")
self.client = chromadb.PersistentClient(path="./chroma_db", settings=chromadb.config.Settings(anonymized_telemetry=False))
self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2"
)

View File

@@ -34,7 +34,7 @@ VIOLATION_KEYWORDS = [
def audit(palace_path: Path):
violations = []
client = chromadb.PersistentClient(path=str(palace_path))
client = chromadb.PersistentClient(path=str(palace_path), settings=chromadb.config.Settings(anonymized_telemetry=False))
try:
col = client.get_collection("mempalace_drawers")
except Exception as e:

View File

@@ -18,7 +18,7 @@ DOCS_PER_ROOM = 5
def main():
client = chromadb.PersistentClient(path=PALACE_PATH)
client = chromadb.PersistentClient(path=PALACE_PATH, settings=chromadb.config.Settings(anonymized_telemetry=False))
col = client.get_collection("mempalace_drawers")
# Discover rooms in this wing

View File

@@ -1,123 +0,0 @@
#!/usr/bin/env python3
"""
PR Backlog Analyzer for timmy-config
Analyzes open PRs and provides recommendations for cleanup.
Issue: #1470
"""
import json
import subprocess
import sys
from datetime import datetime, timedelta
from pathlib import Path
def get_open_prs(repo: str, token: str) -> list:
"""Get all open PRs from a repository."""
result = subprocess.run([
"curl", "-s", "-H", f"Authorization: token {token}",
f"https://forge.alexanderwhitestone.com/api/v1/repos/{repo}/pulls?state=open&limit=100"
], capture_output=True, text=True)
if result.returncode != 0:
print(f"Error fetching PRs: {result.stderr}")
return []
return json.loads(result.stdout)
def analyze_pr(pr: dict) -> dict:
"""Analyze a single PR."""
created = datetime.fromisoformat(pr['created_at'].replace('Z', '+00:00'))
age_days = (datetime.now(created.tzinfo) - created).days
labels = [l['name'] for l in pr.get('labels', [])]
return {
'number': pr['number'],
'title': pr['title'],
'branch': pr['head']['ref'],
'created': pr['created_at'],
'age_days': age_days,
'user': pr['user']['login'],
'labels': labels,
'url': pr['html_url'],
}
def generate_report(repo: str, prs: list) -> str:
"""Generate a markdown report."""
stale = [p for p in prs if p['age_days'] > 30]
recent = [p for p in prs if p['age_days'] <= 7]
report = f"""# PR Backlog Report — {repo}
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
## Summary
- **Total Open PRs**: {len(prs)}
- **Stale (>30 days)**: {len(stale)}
- **Recent (<7 days)**: {len(recent)}
## Recommendations
### Immediate Actions
1. **Review stale PRs**: {len(stale)} PRs are >30 days old
2. **Close duplicates**: Check for duplicate PRs on same issues
3. **Assign reviewers**: Ensure each PR has a reviewer
### Process Improvements
1. **Set SLAs**: Review within 48 hours, merge within 7 days
2. **Weekly cleanup**: Run this analyzer weekly
3. **Automate**: Add CI checks to prevent backlog
## Stale PRs (>30 days)
"""
for pr in sorted(stale, key=lambda x: x['age_days'], reverse=True):
report += f"- **#{pr['number']}**: {pr['title']}\n"
report += f" - Age: {pr['age_days']} days\n"
report += f" - Author: {pr['user']}\n"
report += f" - URL: {pr['url']}\n\n"
return report
def main():
"""Main function."""
token_path = Path.home() / '.config' / 'gitea' / 'token'
if not token_path.exists():
print("Error: Gitea token not found")
sys.exit(1)
token = token_path.read_text().strip()
repo = "Timmy_Foundation/timmy-config"
print(f"Fetching PRs for {repo}...")
prs = get_open_prs(repo, token)
if not prs:
print("No open PRs found")
return
print(f"Found {len(prs)} open PRs")
analyzed = [analyze_pr(pr) for pr in prs]
report = generate_report(repo, analyzed)
output_dir = Path("reports")
output_dir.mkdir(exist_ok=True)
report_file = output_dir / f"pr-backlog-{datetime.now().strftime('%Y%m%d')}.md"
report_file.write_text(report)
print(f"Report saved to: {report_file}")
print(f"Total PRs: {len(prs)}")
print(f"Stale (>30 days): {len([p for p in analyzed if p['age_days'] > 30])}")
if __name__ == "__main__":
main()