Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
d084149f5a fix: disable ChromaDB telemetry in all client creation paths (closes #1427)
Some checks failed
CI / test (pull_request) Failing after 1m40s
CI / validate (pull_request) Failing after 53s
Review Approval Gate / verify-review (pull_request) Successful in 10s
Set anonymized_telemetry=False via chromadb.config.Settings on all
PersistentClient calls:

- nexus/mempalace/searcher.py: _get_client()
- scripts/mempalace_export.py
- scripts/audit_mempalace_privacy.py
- scaffold/deep-dive/relevance/relevance_engine.py

ChromaDB enables anonymous telemetry by default. For a sovereign,
local-first system this is a data leak. Now disabled everywhere.
2026-04-15 00:15:31 -04:00
8 changed files with 12 additions and 241 deletions

View File

@@ -1,23 +0,0 @@
## Description
<!-- What does this PR do? -->
## Changes
- [ ]
## Testing
- [ ]
## Reviewer Checklist
**IMPORTANT: Do not rubber-stamp. Verify each item below.**
- [ ] **PR has actual changes** — check additions, deletions, and changed files are > 0
- [ ] **Changes match description** — the code changes match what the PR claims to do
- [ ] **Code quality** — no obvious bugs, follows conventions, readable
- [ ] **Tests are adequate** — new code has tests, existing tests pass
- [ ] **Documentation updated** — if applicable
**By approving, I confirm I have actually reviewed the code changes in this PR.**

View File

@@ -1,40 +0,0 @@
name: Check PR Changes
on:
pull_request:
types: [opened, synchronize, reopened]
jobs:
check-changes:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check for actual changes
run: |
BASE="${{ github.event.pull_request.base.sha }}"
HEAD="${{ github.event.pull_request.head.sha }}"
ADDITIONS=${{ github.event.pull_request.additions }}
DELETIONS=${{ github.event.pull_request.deletions }}
CHANGED_FILES=${{ github.event.pull_request.changed_files }}
echo "PR Stats: +${ADDITIONS} -${DELETIONS} files:${CHANGED_FILES}"
if [ "$ADDITIONS" -eq 0 ] && [ "$DELETIONS" -eq 0 ] && [ "$CHANGED_FILES" -eq 0 ]; then
echo "::error::ZOMBIE PR detected — zero changes between base and head."
echo "This PR has no additions, deletions, or changed files."
echo "Please add actual changes or close this PR."
exit 1
fi
# Check for empty commits
COMMITS=$(git rev-list --count "$BASE".."$HEAD" 2>/dev/null || echo "0")
if [ "$COMMITS" -eq 0 ]; then
echo "::warning::PR has no commits between base and head."
fi
echo "PR has valid changes (+${ADDITIONS} -${DELETIONS})."

View File

@@ -1,121 +0,0 @@
#!/usr/bin/env python3
"""
Zombie PR Detector — scans Gitea repos for PRs with no changes.
Usage:
python bin/check_zombie_prs.py
python bin/check_zombie_prs.py --repos the-nexus timmy-home
python bin/check_zombie_prs.py --report
"""
import argparse
import json
import os
import urllib.request
from typing import Optional
def get_token() -> str:
"""Read Gitea API token."""
for path in ["~/.config/gitea/token", "~/.config/forge.token"]:
expanded = os.path.expanduser(path)
if os.path.exists(expanded):
return open(expanded).read().strip()
raise RuntimeError("No Gitea token found")
def get_open_prs(token: str, repo: str, base_url: str) -> list:
"""Get all open PRs for a repo."""
url = f"{base_url}/repos/{repo}/pulls?state=open&limit=100"
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
return json.loads(urllib.request.urlopen(req, timeout=30).read())
def check_pr_zombie(pr: dict) -> Optional[dict]:
"""Check if a PR is a zombie (no changes)."""
additions = pr.get("additions", 0)
deletions = pr.get("deletions", 0)
changed_files = pr.get("changed_files", 0)
if additions == 0 and deletions == 0 and changed_files == 0:
return {
"number": pr["number"],
"title": pr["title"],
"author": pr.get("user", {}).get("login", "unknown"),
"url": pr.get("html_url", ""),
"created": pr.get("created_at", ""),
"additions": additions,
"deletions": deletions,
"changed_files": changed_files,
}
return None
def scan_repos(token: str, repos: list, base_url: str) -> list:
"""Scan repos for zombie PRs."""
zombies = []
for repo in repos:
try:
prs = get_open_prs(token, repo, base_url)
for pr in prs:
zombie = check_pr_zombie(pr)
if zombie:
zombie["repo"] = repo
zombies.append(zombie)
except Exception as e:
print(f" Error scanning {repo}: {e}")
return zombies
def list_org_repos(token: str, org: str, base_url: str) -> list:
"""List all repos in an org."""
url = f"{base_url}/orgs/{org}/repos?limit=100"
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
repos = json.loads(urllib.request.urlopen(req, timeout=30).read())
return [r["full_name"] for r in repos]
def main():
parser = argparse.ArgumentParser(description="Detect zombie PRs with no changes")
parser.add_argument("--repos", nargs="+", help="Specific repos to scan")
parser.add_argument("--org", default="Timmy_Foundation", help="Organization name")
parser.add_argument("--base-url", default="https://forge.alexanderwhitestone.com/api/v1")
parser.add_argument("--report", action="store_true", help="Generate detailed report")
args = parser.parse_args()
token = get_token()
if args.repos:
repos = [f"{args.org}/{r}" if "/" not in r else r for r in args.repos]
else:
repos = list_org_repos(token, args.org, args.base_url)
print(f"Scanning {len(repos)} repos...")
zombies = scan_repos(token, repos, args.base_url)
if zombies:
print(f"\nFOUND {len(zombies)} ZOMBIE PR(s):\n")
for z in zombies:
print(f" [{z['repo']}] #{z['number']}: {z['title']}")
print(f" Author: {z['author']} Created: {z['created']}")
print(f" Stats: +{z['additions']} -{z['deletions']} files:{z['changed_files']}")
print(f" URL: {z['url']}")
print()
else:
print("\nNo zombie PRs found. All clear.")
if args.report:
report = {
"scanned_repos": len(repos),
"zombie_prs": len(zombies),
"zombies": zombies,
}
report_path = os.path.expanduser("~/.hermes/reports/zombie_prs.json")
os.makedirs(os.path.dirname(report_path), exist_ok=True)
with open(report_path, "w") as f:
json.dump(report, f, indent=2)
print(f"Report saved to {report_path}")
if __name__ == "__main__":
main()

View File

@@ -1,52 +0,0 @@
# Rubber-Stamping Prevention
## What is Rubber-Stamping?
Rubber-stamping is approving a PR without actually reviewing the code. This was observed in PR #359 which received 3 APPROVED reviews despite having zero changes.
## Why It's Bad
1. Wastes reviewer time
2. Creates false sense of review quality
3. Allows zombie PRs to appear reviewed
## Prevention Measures
### 1. CI Check (`.gitea/workflows/check-pr-changes.yml`)
Automated check that runs on every PR:
- Detects PRs with no changes (0 additions, 0 deletions, 0 files changed)
- Blocks merge if PR is a zombie
- Provides clear error messages
### 2. PR Template
Enhanced reviewer checklist:
- Verify PR has actual changes
- Changes match description
- Code quality review
- Tests are adequate
- Documentation is updated
### 3. Zombie PR Detection
```bash
# Scan all repos
python bin/check_zombie_prs.py
# Scan specific repos
python bin/check_zombie_prs.py --repos the-nexus timmy-home
# Generate report
python bin/check_zombie_prs.py --report
```
## Testing
```bash
# Create a test PR with no changes
git checkout -b test/zombie-pr
git commit --allow-empty -m "test: empty commit"
git push origin test/zombie-pr
# Create PR — CI should fail
```

View File

@@ -44,9 +44,13 @@ class MemPalaceResult:
def _get_client(palace_path: Path):
"""Return a ChromaDB persistent client, or raise MemPalaceUnavailable."""
"""Return a ChromaDB persistent client, or raise MemPalaceUnavailable.
Telemetry is disabled for sovereignty — no data leaks to Chroma Inc.
"""
try:
import chromadb # type: ignore
from chromadb.config import Settings
except ImportError as exc:
raise MemPalaceUnavailable(
"ChromaDB is not installed. "
@@ -59,7 +63,10 @@ def _get_client(palace_path: Path):
"Run 'mempalace mine' to initialise the palace."
)
return chromadb.PersistentClient(path=str(palace_path))
return chromadb.PersistentClient(
path=str(palace_path),
settings=Settings(anonymized_telemetry=False),
)
def search_memories(

View File

@@ -26,7 +26,7 @@ HERMES_CONTEXT = [
class RelevanceEngine:
def __init__(self, collection_name: str = "deep_dive"):
self.client = chromadb.PersistentClient(path="./chroma_db")
self.client = chromadb.PersistentClient(path="./chroma_db", settings=chromadb.config.Settings(anonymized_telemetry=False))
self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2"
)

View File

@@ -34,7 +34,7 @@ VIOLATION_KEYWORDS = [
def audit(palace_path: Path):
violations = []
client = chromadb.PersistentClient(path=str(palace_path))
client = chromadb.PersistentClient(path=str(palace_path), settings=chromadb.config.Settings(anonymized_telemetry=False))
try:
col = client.get_collection("mempalace_drawers")
except Exception as e:

View File

@@ -18,7 +18,7 @@ DOCS_PER_ROOM = 5
def main():
client = chromadb.PersistentClient(path=PALACE_PATH)
client = chromadb.PersistentClient(path=PALACE_PATH, settings=chromadb.config.Settings(anonymized_telemetry=False))
col = client.get_collection("mempalace_drawers")
# Discover rooms in this wing