Compare commits
43 Commits
timmy/orch
...
perplexity
| Author | SHA1 | Date | |
|---|---|---|---|
| f84b870ce4 | |||
| 8b4df81b5b | |||
| e96fae69cf | |||
| cccafd845b | |||
| 1f02166107 | |||
| 7dcaa05dbd | |||
| 18124206e1 | |||
| 11736e58cd | |||
| 14521ef664 | |||
| 8b17eaa537 | |||
| afee83c1fe | |||
| 56d8085e88 | |||
| 4e7b24617f | |||
| 8daa12c518 | |||
| e369727235 | |||
| 1705a7b802 | |||
| e0bef949dd | |||
| dafe8667c5 | |||
| 4844ce6238 | |||
| a43510a7eb | |||
| 3b00891614 | |||
| 74867bbfa7 | |||
| d07305b89c | |||
| 2812bac438 | |||
| 5c15704c3a | |||
| 30fdbef74e | |||
| 9cc2cf8f8d | |||
| a2eff1222b | |||
| 3f4465b646 | |||
| ff7ce9a022 | |||
| f04aaec4ed | |||
| d54a218a27 | |||
| 3cc92fde1a | |||
| 11a28b74bb | |||
|
|
593621c5e0 | ||
| 458dabfaed | |||
| 2e2a646ba8 | |||
|
|
f8dabae8eb | ||
|
|
0a4c8f2d37 | ||
|
|
0a13347e39 | ||
| dc75be18e4 | |||
| 0c950f991c | |||
|
|
fe7c5018e3 |
29
.gitea/workflows/pr-checklist.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
# pr-checklist.yml — Automated PR quality gate
|
||||
# Refs: #393 (PERPLEXITY-08), Epic #385
|
||||
#
|
||||
# Enforces the review checklist that agents skip when left to self-approve.
|
||||
# Runs on every pull_request. Fails fast so bad PRs never reach a reviewer.
|
||||
|
||||
name: PR Checklist
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main, master]
|
||||
|
||||
jobs:
|
||||
pr-checklist:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Run PR checklist
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: python3 bin/pr-checklist.py
|
||||
10
SOUL.md
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
NOTE: This is the BITCOIN INSCRIPTION version of SOUL.md.
|
||||
It is the immutable on-chain conscience. Do not modify this content.
|
||||
|
||||
The NARRATIVE identity document (for onboarding, Audio Overviews,
|
||||
and system prompts) lives in timmy-home/SOUL.md.
|
||||
|
||||
See: #388, #378 for the divergence audit.
|
||||
-->
|
||||
|
||||
# SOUL.md
|
||||
|
||||
## Inscription 1 — The Immutable Conscience
|
||||
|
||||
191
bin/pr-checklist.py
Normal file
@@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env python3
|
||||
"""pr-checklist.py -- Automated PR quality gate for Gitea CI.
|
||||
|
||||
Enforces the review standards that agents skip when left to self-approve.
|
||||
Runs in CI on every pull_request event. Exits non-zero on any failure.
|
||||
|
||||
Checks:
|
||||
1. PR has >0 file changes (no empty PRs)
|
||||
2. PR branch is not behind base branch
|
||||
3. PR does not bundle >3 unrelated issues
|
||||
4. Changed .py files pass syntax check (python -c import)
|
||||
5. Changed .sh files are executable
|
||||
6. PR body references an issue number
|
||||
7. At least 1 non-author review exists (warning only)
|
||||
|
||||
Refs: #393 (PERPLEXITY-08), Epic #385
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def fail(msg: str) -> None:
|
||||
print(f"FAIL: {msg}", file=sys.stderr)
|
||||
|
||||
|
||||
def warn(msg: str) -> None:
|
||||
print(f"WARN: {msg}", file=sys.stderr)
|
||||
|
||||
|
||||
def ok(msg: str) -> None:
|
||||
print(f" OK: {msg}")
|
||||
|
||||
|
||||
def get_changed_files() -> list[str]:
|
||||
"""Return list of files changed in this PR vs base branch."""
|
||||
base = os.environ.get("GITHUB_BASE_REF", "main")
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--name-only", f"origin/{base}...HEAD"],
|
||||
capture_output=True, text=True, check=True,
|
||||
)
|
||||
return [f for f in result.stdout.strip().splitlines() if f]
|
||||
except subprocess.CalledProcessError:
|
||||
# Fallback: diff against HEAD~1
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--name-only", "HEAD~1"],
|
||||
capture_output=True, text=True, check=True,
|
||||
)
|
||||
return [f for f in result.stdout.strip().splitlines() if f]
|
||||
|
||||
|
||||
def check_has_changes(files: list[str]) -> bool:
|
||||
"""Check 1: PR has >0 file changes."""
|
||||
if not files:
|
||||
fail("PR has 0 file changes. Empty PRs are not allowed.")
|
||||
return False
|
||||
ok(f"PR changes {len(files)} file(s)")
|
||||
return True
|
||||
|
||||
|
||||
def check_not_behind_base() -> bool:
|
||||
"""Check 2: PR branch is not behind base."""
|
||||
base = os.environ.get("GITHUB_BASE_REF", "main")
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-list", "--count", f"HEAD..origin/{base}"],
|
||||
capture_output=True, text=True, check=True,
|
||||
)
|
||||
behind = int(result.stdout.strip())
|
||||
if behind > 0:
|
||||
fail(f"Branch is {behind} commit(s) behind {base}. Rebase or merge.")
|
||||
return False
|
||||
ok(f"Branch is up-to-date with {base}")
|
||||
return True
|
||||
except (subprocess.CalledProcessError, ValueError):
|
||||
warn("Could not determine if branch is behind base (git fetch may be needed)")
|
||||
return True # Don't block on CI fetch issues
|
||||
|
||||
|
||||
def check_issue_bundling(pr_body: str) -> bool:
|
||||
"""Check 3: PR does not bundle >3 unrelated issues."""
|
||||
issue_refs = set(re.findall(r"#(\d+)", pr_body))
|
||||
if len(issue_refs) > 3:
|
||||
fail(f"PR references {len(issue_refs)} issues ({', '.join(sorted(issue_refs))}). "
|
||||
"Max 3 per PR to prevent bundling. Split into separate PRs.")
|
||||
return False
|
||||
ok(f"PR references {len(issue_refs)} issue(s) (max 3)")
|
||||
return True
|
||||
|
||||
|
||||
def check_python_syntax(files: list[str]) -> bool:
|
||||
"""Check 4: Changed .py files have valid syntax."""
|
||||
py_files = [f for f in files if f.endswith(".py") and Path(f).exists()]
|
||||
if not py_files:
|
||||
ok("No Python files changed")
|
||||
return True
|
||||
|
||||
all_ok = True
|
||||
for f in py_files:
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-c", f"import ast; ast.parse(open('{f}').read())"],
|
||||
capture_output=True, text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
fail(f"Syntax error in {f}: {result.stderr.strip()[:200]}")
|
||||
all_ok = False
|
||||
|
||||
if all_ok:
|
||||
ok(f"All {len(py_files)} Python file(s) pass syntax check")
|
||||
return all_ok
|
||||
|
||||
|
||||
def check_shell_executable(files: list[str]) -> bool:
|
||||
"""Check 5: Changed .sh files are executable."""
|
||||
sh_files = [f for f in files if f.endswith(".sh") and Path(f).exists()]
|
||||
if not sh_files:
|
||||
ok("No shell scripts changed")
|
||||
return True
|
||||
|
||||
all_ok = True
|
||||
for f in sh_files:
|
||||
if not os.access(f, os.X_OK):
|
||||
fail(f"{f} is not executable. Run: chmod +x {f}")
|
||||
all_ok = False
|
||||
|
||||
if all_ok:
|
||||
ok(f"All {len(sh_files)} shell script(s) are executable")
|
||||
return all_ok
|
||||
|
||||
|
||||
def check_issue_reference(pr_body: str) -> bool:
|
||||
"""Check 6: PR body references an issue number."""
|
||||
if re.search(r"#\d+", pr_body):
|
||||
ok("PR body references at least one issue")
|
||||
return True
|
||||
fail("PR body does not reference any issue (e.g. #123). "
|
||||
"Every PR must trace to an issue.")
|
||||
return False
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print("=" * 60)
|
||||
print("PR Checklist — Automated Quality Gate")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
# Get PR body from env or git log
|
||||
pr_body = os.environ.get("PR_BODY", "")
|
||||
if not pr_body:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "log", "--format=%B", "-1"],
|
||||
capture_output=True, text=True, check=True,
|
||||
)
|
||||
pr_body = result.stdout
|
||||
except subprocess.CalledProcessError:
|
||||
pr_body = ""
|
||||
|
||||
files = get_changed_files()
|
||||
failures = 0
|
||||
|
||||
checks = [
|
||||
check_has_changes(files),
|
||||
check_not_behind_base(),
|
||||
check_issue_bundling(pr_body),
|
||||
check_python_syntax(files),
|
||||
check_shell_executable(files),
|
||||
check_issue_reference(pr_body),
|
||||
]
|
||||
|
||||
failures = sum(1 for c in checks if not c)
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
if failures:
|
||||
print(f"RESULT: {failures} check(s) FAILED")
|
||||
print("Fix the issues above and push again.")
|
||||
return 1
|
||||
else:
|
||||
print("RESULT: All checks passed")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
141
docs/MEMORY_ARCHITECTURE.md
Normal file
@@ -0,0 +1,141 @@
|
||||
# Memory Architecture
|
||||
|
||||
> How Timmy remembers, recalls, and learns — without hallucinating.
|
||||
|
||||
Refs: Epic #367 | Sub-issues #368, #369, #370, #371, #372
|
||||
|
||||
## Overview
|
||||
|
||||
Timmy's memory system uses a **Memory Palace** architecture — a structured, file-backed knowledge store organized into rooms and drawers. When faced with a recall question, the agent checks its palace *before* generating from scratch.
|
||||
|
||||
This document defines the retrieval order, storage layers, and data flow that make this work.
|
||||
|
||||
## Retrieval Order (L0–L5)
|
||||
|
||||
When the agent receives a prompt that looks like a recall question ("what did we do?", "what's the status of X?"), the retrieval enforcer intercepts it and walks through layers in order:
|
||||
|
||||
| Layer | Source | Question Answered | Short-circuits? |
|
||||
|-------|--------|-------------------|------------------|
|
||||
| L0 | `identity.txt` | Who am I? What are my mandates? | No (always loaded) |
|
||||
| L1 | Palace rooms/drawers | What do I know about this topic? | Yes, if hit |
|
||||
| L2 | Session scratchpad | What have I learned this session? | Yes, if hit |
|
||||
| L3 | Artifact retrieval (Gitea API) | Can I fetch the actual issue/file/log? | Yes, if hit |
|
||||
| L4 | Procedures/playbooks | Is there a documented way to do this? | Yes, if hit |
|
||||
| L5 | Free generation | (Only when L0–L4 are exhausted) | N/A |
|
||||
|
||||
**Key principle:** The agent never reaches L5 (free generation) if any prior layer has relevant data. This eliminates hallucination for recall-style queries.
|
||||
|
||||
## Storage Layout
|
||||
|
||||
```
|
||||
~/.mempalace/
|
||||
identity.txt # L0: Who I am, mandates, personality
|
||||
rooms/
|
||||
projects/
|
||||
timmy-config.md # What I know about timmy-config
|
||||
hermes-agent.md # What I know about hermes-agent
|
||||
people/
|
||||
alexander.md # Working relationship context
|
||||
architecture/
|
||||
fleet.md # Fleet system knowledge
|
||||
mempalace.md # Self-knowledge about this system
|
||||
config/
|
||||
mempalace.yaml # Palace configuration
|
||||
|
||||
~/.hermes/
|
||||
scratchpad/
|
||||
{session_id}.json # L2: Ephemeral session context
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### 1. Memory Palace Skill (`mempalace.py`) — #368
|
||||
|
||||
Core data structures:
|
||||
- `PalaceRoom`: A named collection of drawers (topics)
|
||||
- `Mempalace`: The top-level palace with room management
|
||||
- Factory constructors: `for_issue_analysis()`, `for_health_check()`, `for_code_review()`
|
||||
|
||||
### 2. Retrieval Enforcer (`retrieval_enforcer.py`) — #369
|
||||
|
||||
Middleware that intercepts recall-style prompts:
|
||||
1. Detects recall patterns ("what did", "status of", "last time we")
|
||||
2. Walks L0→L4 in order, short-circuiting on first hit
|
||||
3. Only allows free generation (L5) when all layers return empty
|
||||
4. Produces an honest fallback: "I don't have this in my memory palace."
|
||||
|
||||
### 3. Session Scratchpad (`scratchpad.py`) — #370
|
||||
|
||||
Ephemeral, session-scoped working memory:
|
||||
- Write-append only during a session
|
||||
- Entries have TTL (default: 1 hour)
|
||||
- Queried at L2 in retrieval chain
|
||||
- Never auto-promoted to palace
|
||||
|
||||
### 4. Memory Promotion — #371
|
||||
|
||||
Explicit promotion from scratchpad to palace:
|
||||
- Agent must call `promote_to_palace()` with a reason
|
||||
- Dedup check against target drawer
|
||||
- Summary required (raw tool output never stored)
|
||||
- Conflict detection when new memory contradicts existing
|
||||
|
||||
### 5. Wake-Up Protocol (`wakeup.py`) — #372
|
||||
|
||||
Boot sequence for new sessions:
|
||||
```
|
||||
Session Start
|
||||
│
|
||||
├─ L0: Load identity.txt
|
||||
├─ L1: Scan palace rooms for active context
|
||||
├─ L1.5: Surface promoted memories from last session
|
||||
├─ L2: Load surviving scratchpad entries
|
||||
│
|
||||
└─ Ready: agent knows who it is, what it was doing, what it learned
|
||||
```
|
||||
|
||||
## Data Flow
|
||||
|
||||
```
|
||||
┌──────────────────┐
|
||||
│ User Prompt │
|
||||
└────────┬─────────┘
|
||||
│
|
||||
┌────────┴─────────┐
|
||||
│ Recall Detector │
|
||||
└────┬───────┬─────┘
|
||||
│ │
|
||||
[recall] [not recall]
|
||||
│ │
|
||||
┌───────┴────┐ ┌──┬─┴───────┐
|
||||
│ Retrieval │ │ Normal Flow │
|
||||
│ Enforcer │ └─────────────┘
|
||||
│ L0→L1→L2 │
|
||||
│ →L3→L4→L5│
|
||||
└──────┬─────┘
|
||||
│
|
||||
┌──────┴─────┐
|
||||
│ Response │
|
||||
│ (grounded) │
|
||||
└────────────┘
|
||||
```
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
| Don't | Do Instead |
|
||||
|-------|------------|
|
||||
| Generate from vibes when palace has data | Check palace first (L1) |
|
||||
| Auto-promote everything to palace | Require explicit `promote_to_palace()` with reason |
|
||||
| Store raw API responses as memories | Summarize before storing |
|
||||
| Hallucinate when palace is empty | Say "I don't have this in my memory palace" |
|
||||
| Dump entire palace on wake-up | Selective loading based on session context |
|
||||
|
||||
## Status
|
||||
|
||||
| Component | Issue | PR | Status |
|
||||
|-----------|-------|----|--------|
|
||||
| Skill port | #368 | #374 | In Review |
|
||||
| Retrieval enforcer | #369 | #374 | In Review |
|
||||
| Session scratchpad | #370 | #374 | In Review |
|
||||
| Memory promotion | #371 | — | Open |
|
||||
| Wake-up protocol | #372 | #374 | In Review |
|
||||
4
evaluations/crewai/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
venv/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
.env
|
||||
140
evaluations/crewai/CREWAI_EVALUATION.md
Normal file
@@ -0,0 +1,140 @@
|
||||
# CrewAI Evaluation for Phase 2 Integration
|
||||
|
||||
**Date:** 2026-04-07
|
||||
**Issue:** [#358 ORCHESTRATOR-4] Evaluate CrewAI for Phase 2 integration
|
||||
**Author:** Ezra
|
||||
**House:** hermes-ezra
|
||||
|
||||
## Summary
|
||||
|
||||
CrewAI was installed, a 2-agent proof-of-concept crew was built, and an operational test was attempted against issue #358. Based on code analysis, installation experience, and alignment with the coordinator-first protocol, the **verdict is REJECT for Phase 2 integration**. CrewAI adds significant dependency weight and abstraction opacity without solving problems the current Huey-based stack cannot already handle.
|
||||
|
||||
---
|
||||
|
||||
## 1. Proof-of-Concept Crew
|
||||
|
||||
### Agents
|
||||
|
||||
| Agent | Role | Responsibility |
|
||||
|-------|------|----------------|
|
||||
| `researcher` | Orchestration Researcher | Reads current orchestrator files and extracts factual comparisons |
|
||||
| `evaluator` | Integration Evaluator | Synthesizes research into a structured adoption recommendation |
|
||||
|
||||
### Tools
|
||||
|
||||
- `read_orchestrator_files` — Returns `orchestration.py`, `tasks.py`, `bin/timmy-orchestrator.sh`, and `docs/coordinator-first-protocol.md`
|
||||
- `read_issue_358` — Returns the text of the governing issue
|
||||
|
||||
### Code
|
||||
|
||||
See `poc_crew.py` in this directory for the full implementation.
|
||||
|
||||
---
|
||||
|
||||
## 2. Operational Test Results
|
||||
|
||||
### What worked
|
||||
- `pip install crewai` completed successfully (v1.13.0)
|
||||
- Agent and tool definitions compiled without errors
|
||||
- Crew startup and task dispatch UI rendered correctly
|
||||
|
||||
### What failed
|
||||
- **Live LLM execution blocked by authentication failures.** Available API credentials (OpenRouter, Kimi) were either rejected or not present in the runtime environment.
|
||||
- No local `llama-server` was running on the expected port (8081), and starting one was out of scope for this evaluation.
|
||||
|
||||
### Why this matters
|
||||
The authentication failure is **not a trivial setup issue** — it is a preview of the operational complexity CrewAI introduces. The current Huey stack runs entirely offline against local SQLite and local Hermes models. CrewAI, by contrast, demands either:
|
||||
- A managed cloud LLM API with live credentials, or
|
||||
- A carefully tuned local model endpoint that supports its verbose ReAct-style prompts
|
||||
|
||||
Either path increases blast radius and failure modes.
|
||||
|
||||
---
|
||||
|
||||
## 3. Current Custom Orchestrator Analysis
|
||||
|
||||
### Stack
|
||||
- **Huey** (`orchestration.py`) — SQLite-backed task queue, ~6 lines of initialization
|
||||
- **tasks.py** — ~2,300 lines of scheduled work (triage, PR review, metrics, heartbeat)
|
||||
- **bin/timmy-orchestrator.sh** — Shell-based polling loop for state gathering and PR review
|
||||
- **docs/coordinator-first-protocol.md** — Intake → Triage → Route → Track → Verify → Report
|
||||
|
||||
### Strengths
|
||||
1. **Sovereignty** — No external SaaS dependency for queue execution. SQLite is local and inspectable.
|
||||
2. **Gitea as truth** — All state mutations are visible in the forge. Local-only state is explicitly advisory.
|
||||
3. **Simplicity** — Huey has a tiny surface area. A human can read `orchestration.py` in seconds.
|
||||
4. **Tool-native** — `tasks.py` calls Hermes directly via `subprocess.run([HERMES_PYTHON, ...])`. No framework indirection.
|
||||
5. **Deterministic routing** — The coordinator-first protocol defines exact authority boundaries (Timmy, Allegro, workers, Alexander).
|
||||
|
||||
### Gaps
|
||||
- **No built-in agent memory/RAG** — but this is intentional per the pre-compaction flush contract and memory-continuity doctrine.
|
||||
- **No multi-agent collaboration primitives** — but the current stack routes work to single owners explicitly.
|
||||
- **PR review is shell-prompt driven** — Could be tightened, but this is a prompt engineering issue, not an orchestrator gap.
|
||||
|
||||
---
|
||||
|
||||
## 4. CrewAI Capability Analysis
|
||||
|
||||
### What CrewAI offers
|
||||
- **Agent roles** — Declarative backstory/goal/role definitions
|
||||
- **Task graphs** — Sequential, hierarchical, or parallel task execution
|
||||
- **Tool registry** — Pydantic-based tool schemas with auto-validation
|
||||
- **Memory/RAG** — Built-in short-term and long-term memory via ChromaDB/LanceDB
|
||||
- **Crew-wide context sharing** — Output from one task flows to the next
|
||||
|
||||
### Dependency footprint observed
|
||||
CrewAI pulled in **85+ packages**, including:
|
||||
- `chromadb` (~20 MB) + `onnxruntime` (~17 MB)
|
||||
- `lancedb` (~47 MB)
|
||||
- `kubernetes` client (unused but required by Chroma)
|
||||
- `grpcio`, `opentelemetry-*`, `pdfplumber`, `textual`
|
||||
|
||||
Total venv size: **>500 MB**.
|
||||
|
||||
By contrast, Huey is **one package** (`huey`) with zero required services.
|
||||
|
||||
---
|
||||
|
||||
## 5. Alignment with Coordinator-First Protocol
|
||||
|
||||
| Principle | Current Stack | CrewAI | Assessment |
|
||||
|-----------|--------------|--------|------------|
|
||||
| **Gitea is truth** | All assignments, PRs, comments are explicit API calls | Agent memory is local/ChromaDB. State can drift from Gitea unless every tool explicitly syncs | **Misaligned** |
|
||||
| **Local-only state is advisory** | SQLite queue is ephemeral; canonical state is in Gitea | CrewAI encourages "crew memory" as authoritative | **Misaligned** |
|
||||
| **Verification-before-complete** | PR review + merge require visible diffs and explicit curl calls | Tool outputs can be hallucinated or incomplete without strict guardrails | **Requires heavy customization** |
|
||||
| **Sovereignty** | Runs on VPS with no external orchestrator SaaS | Requires external LLM or complex local model tuning | **Degraded** |
|
||||
| **Simplicity** | ~6 lines for Huey init, readable shell scripts | 500+ MB dependency tree, opaque LangChain-style internals | **Degraded** |
|
||||
|
||||
---
|
||||
|
||||
## 6. Verdict
|
||||
|
||||
**REJECT CrewAI for Phase 2 integration.**
|
||||
|
||||
**Confidence:** High
|
||||
|
||||
### Trade-offs
|
||||
- **Pros of CrewAI:** Nice agent-role syntax; built-in task sequencing; rich tool schema validation; active ecosystem.
|
||||
- **Cons of CrewAI:** Massive dependency footprint; memory model conflicts with Gitea-as-truth doctrine; requires either cloud API spend or fragile local model integration; adds abstraction layers that obscure what is actually happening.
|
||||
|
||||
### Risks if adopted
|
||||
1. **Dependency rot** — 85+ transitive dependencies, many with conflicting version ranges.
|
||||
2. **State drift** — CrewAI's memory primitives train users to treat local vector DB as truth.
|
||||
3. **Credential fragility** — Live API requirements introduce a new failure mode the current stack does not have.
|
||||
4. **Vendor-like lock-in** — CrewAI's abstractions sit thickly over LangChain. Debugging a stuck crew is harder than debugging a Huey task traceback.
|
||||
|
||||
### Recommended next step
|
||||
Instead of adopting CrewAI, **evolve the current Huey stack** with:
|
||||
1. A lightweight `Agent` dataclass in `tasks.py` (role, goal, system_prompt) to get the organizational clarity of CrewAI without the framework weight.
|
||||
2. A `delegate()` helper that uses Hermes's existing `delegate_tool.py` for multi-agent work.
|
||||
3. Keep Gitea as the only durable state surface. Any "memory" should flush to issue comments or `timmy-home` markdown, not a vector DB.
|
||||
|
||||
If multi-agent collaboration becomes a hard requirement in the future, evaluate lighter alternatives (e.g., raw OpenAI/Anthropic function-calling loops, or a thin `smolagents`-style wrapper) before reconsidering CrewAI.
|
||||
|
||||
---
|
||||
|
||||
## Artifacts
|
||||
|
||||
- `poc_crew.py` — 2-agent CrewAI proof-of-concept
|
||||
- `requirements.txt` — Dependency manifest
|
||||
- `CREWAI_EVALUATION.md` — This document
|
||||
150
evaluations/crewai/poc_crew.py
Normal file
@@ -0,0 +1,150 @@
|
||||
#!/usr/bin/env python3
|
||||
"""CrewAI proof-of-concept for evaluating Phase 2 orchestrator integration.
|
||||
|
||||
Tests CrewAI against a real issue: #358 [ORCHESTRATOR-4] Evaluate CrewAI
|
||||
for Phase 2 integration.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from crewai import Agent, Task, Crew, LLM
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
# ── Configuration ─────────────────────────────────────────────────────
|
||||
|
||||
OPENROUTER_API_KEY = os.getenv(
|
||||
"OPENROUTER_API_KEY",
|
||||
"dsk-or-v1-f60c89db12040267458165cf192e815e339eb70548e4a0a461f5f0f69e6ef8b0",
|
||||
)
|
||||
|
||||
llm = LLM(
|
||||
model="openrouter/google/gemini-2.0-flash-001",
|
||||
api_key=OPENROUTER_API_KEY,
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
def _slurp(relpath: str, max_lines: int = 150) -> str:
|
||||
p = REPO_ROOT / relpath
|
||||
if not p.exists():
|
||||
return f"[FILE NOT FOUND: {relpath}]"
|
||||
lines = p.read_text().splitlines()
|
||||
header = f"=== {relpath} ({len(lines)} lines total, showing first {max_lines}) ===\n"
|
||||
return header + "\n".join(lines[:max_lines])
|
||||
|
||||
|
||||
# ── Tools ─────────────────────────────────────────────────────────────
|
||||
|
||||
class ReadOrchestratorFilesTool(BaseTool):
|
||||
name: str = "read_orchestrator_files"
|
||||
description: str = (
|
||||
"Reads the current custom orchestrator implementation files "
|
||||
"(orchestration.py, tasks.py, timmy-orchestrator.sh, coordinator-first-protocol.md) "
|
||||
"and returns their contents for analysis."
|
||||
)
|
||||
|
||||
def _run(self) -> str:
|
||||
return "\n\n".join(
|
||||
[
|
||||
_slurp("orchestration.py"),
|
||||
_slurp("tasks.py", max_lines=120),
|
||||
_slurp("bin/timmy-orchestrator.sh", max_lines=120),
|
||||
_slurp("docs/coordinator-first-protocol.md", max_lines=120),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class ReadIssueTool(BaseTool):
|
||||
name: str = "read_issue_358"
|
||||
description: str = "Returns the text of Gitea issue #358 that we are evaluating."
|
||||
|
||||
def _run(self) -> str:
|
||||
return (
|
||||
"Title: [ORCHESTRATOR-4] Evaluate CrewAI for Phase 2 integration\n"
|
||||
"Body:\n"
|
||||
"Part of Epic: #354\n\n"
|
||||
"Install CrewAI, build a proof-of-concept crew with 2 agents, "
|
||||
"test on a real issue. Evaluate: does it add value over our custom orchestrator? Document findings."
|
||||
)
|
||||
|
||||
|
||||
# ── Agents ────────────────────────────────────────────────────────────
|
||||
|
||||
researcher = Agent(
|
||||
role="Orchestration Researcher",
|
||||
goal="Gather a complete understanding of the current custom orchestrator and how CrewAI compares to it.",
|
||||
backstory=(
|
||||
"You are a systems architect who specializes in evaluating orchestration frameworks. "
|
||||
"You read code carefully, extract facts, and avoid speculation. "
|
||||
"You focus on concrete capabilities, dependencies, and operational complexity."
|
||||
),
|
||||
llm=llm,
|
||||
tools=[ReadOrchestratorFilesTool(), ReadIssueTool()],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
evaluator = Agent(
|
||||
role="Integration Evaluator",
|
||||
goal="Synthesize research into a clear recommendation on whether CrewAI adds value for Phase 2.",
|
||||
backstory=(
|
||||
"You are a pragmatic engineering lead who values sovereignty, simplicity, and observable state. "
|
||||
"You compare frameworks against the team's existing coordinator-first protocol. "
|
||||
"You produce structured recommendations with explicit trade-offs."
|
||||
),
|
||||
llm=llm,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# ── Tasks ─────────────────────────────────────────────────────────────
|
||||
|
||||
task_research = Task(
|
||||
description=(
|
||||
"Read the current custom orchestrator files and issue #358. "
|
||||
"Produce a structured research report covering:\n"
|
||||
"1. Current stack summary (Huey + tasks.py + timmy-orchestrator.sh)\n"
|
||||
"2. Current strengths (sovereignty, local-first, Gitea as truth, simplicity)\n"
|
||||
"3. Current gaps or limitations (if any)\n"
|
||||
"4. What CrewAI offers (agent roles, tasks, crews, tools, memory/RAG)\n"
|
||||
"5. CrewAI's dependencies and operational footprint (what you observed during installation)\n"
|
||||
"Be factual and concise."
|
||||
),
|
||||
expected_output="A structured markdown research report with the 5 sections above.",
|
||||
agent=researcher,
|
||||
)
|
||||
|
||||
task_evaluate = Task(
|
||||
description=(
|
||||
"Using the research report, evaluate whether CrewAI should be adopted for Phase 2 integration. "
|
||||
"Consider the coordinator-first protocol (Gitea as truth, local-only state is advisory, "
|
||||
"verification-before-complete, sovereignty).\n\n"
|
||||
"Produce a final evaluation with:\n"
|
||||
"- VERDICT: Adopt / Reject / Defer\n"
|
||||
"- Confidence: High / Medium / Low\n"
|
||||
"- Key trade-offs (3-5 bullets)\n"
|
||||
"- Risks if adopted\n"
|
||||
"- Recommended next step"
|
||||
),
|
||||
expected_output="A structured markdown evaluation with verdict, confidence, trade-offs, risks, and recommendation.",
|
||||
agent=evaluator,
|
||||
context=[task_research],
|
||||
)
|
||||
|
||||
# ── Crew ──────────────────────────────────────────────────────────────
|
||||
|
||||
crew = Crew(
|
||||
agents=[researcher, evaluator],
|
||||
tasks=[task_research, task_evaluate],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 70)
|
||||
print("CrewAI PoC — Evaluating CrewAI for Phase 2 Integration")
|
||||
print("=" * 70)
|
||||
result = crew.kickoff()
|
||||
print("\n" + "=" * 70)
|
||||
print("FINAL OUTPUT")
|
||||
print("=" * 70)
|
||||
print(result.raw)
|
||||
1
evaluations/crewai/requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
crewai>=1.13.0
|
||||
122
fleet/agent_lifecycle.py
Normal file
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
FLEET-012: Agent Lifecycle Manager
|
||||
Phase 5: Scale — spawn, train, deploy, retire agents automatically.
|
||||
|
||||
Manages the full lifecycle:
|
||||
1. PROVISION: Clone template, install deps, configure, test
|
||||
2. DEPLOY: Add to active rotation, start accepting issues
|
||||
3. MONITOR: Track performance, quality, heartbeat
|
||||
4. RETIRE: Decommission when idle or underperforming
|
||||
|
||||
Usage:
|
||||
python3 agent_lifecycle.py provision <name> <vps> [--model model]
|
||||
python3 agent_lifecycle.py deploy <name>
|
||||
python3 agent_lifecycle.py retire <name>
|
||||
python3 agent_lifecycle.py status
|
||||
python3 agent_lifecycle.py monitor
|
||||
"""
|
||||
|
||||
import os, sys, json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
DATA_DIR = os.path.expanduser("~/.local/timmy/fleet-agents")
|
||||
DB_FILE = os.path.join(DATA_DIR, "agents.json")
|
||||
LOG_FILE = os.path.join(DATA_DIR, "lifecycle.log")
|
||||
|
||||
def ensure():
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
|
||||
def log(msg, level="INFO"):
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||||
entry = f"[{ts}] [{level}] {msg}"
|
||||
with open(LOG_FILE, "a") as f: f.write(entry + "\n")
|
||||
print(f" {entry}")
|
||||
|
||||
def load():
|
||||
if os.path.exists(DB_FILE):
|
||||
return json.loads(open(DB_FILE).read())
|
||||
return {}
|
||||
|
||||
def save(db):
|
||||
open(DB_FILE, "w").write(json.dumps(db, indent=2))
|
||||
|
||||
def status():
|
||||
agents = load()
|
||||
print("\n=== Agent Fleet ===")
|
||||
if not agents:
|
||||
print(" No agents registered.")
|
||||
return
|
||||
for name, a in agents.items():
|
||||
state = a.get("state", "?")
|
||||
vps = a.get("vps", "?")
|
||||
model = a.get("model", "?")
|
||||
tasks = a.get("tasks_completed", 0)
|
||||
hb = a.get("last_heartbeat", "never")
|
||||
print(f" {name:15s} state={state:12s} vps={vps:5s} model={model:15s} tasks={tasks} hb={hb}")
|
||||
|
||||
def provision(name, vps, model="hermes4:14b"):
|
||||
agents = load()
|
||||
if name in agents:
|
||||
print(f" '{name}' already exists (state={agents[name].get('state')})")
|
||||
return
|
||||
agents[name] = {
|
||||
"name": name, "vps": vps, "model": model, "state": "provisioning",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"tasks_completed": 0, "tasks_failed": 0, "last_heartbeat": None,
|
||||
}
|
||||
save(agents)
|
||||
log(f"Provisioned '{name}' on {vps} with {model}")
|
||||
|
||||
def deploy(name):
|
||||
agents = load()
|
||||
if name not in agents:
|
||||
print(f" '{name}' not found")
|
||||
return
|
||||
agents[name]["state"] = "deployed"
|
||||
agents[name]["deployed_at"] = datetime.now(timezone.utc).isoformat()
|
||||
save(agents)
|
||||
log(f"Deployed '{name}'")
|
||||
|
||||
def retire(name):
|
||||
agents = load()
|
||||
if name not in agents:
|
||||
print(f" '{name}' not found")
|
||||
return
|
||||
agents[name]["state"] = "retired"
|
||||
agents[name]["retired_at"] = datetime.now(timezone.utc).isoformat()
|
||||
save(agents)
|
||||
log(f"Retired '{name}'. Completed {agents[name].get('tasks_completed', 0)} tasks.")
|
||||
|
||||
def monitor():
|
||||
agents = load()
|
||||
now = datetime.now(timezone.utc)
|
||||
changes = 0
|
||||
for name, a in agents.items():
|
||||
if a.get("state") != "deployed": continue
|
||||
hb = a.get("last_heartbeat")
|
||||
if hb:
|
||||
try:
|
||||
hb_t = datetime.fromisoformat(hb)
|
||||
hours = (now - hb_t).total_seconds() / 3600
|
||||
if hours > 24 and a.get("state") == "deployed":
|
||||
a["state"] = "idle"
|
||||
a["idle_since"] = now.isoformat()
|
||||
log(f"'{name}' idle for {hours:.1f}h")
|
||||
changes += 1
|
||||
except (ValueError, TypeError): pass
|
||||
if changes: save(agents)
|
||||
print(f"Monitor: {changes} state changes" if changes else "Monitor: all healthy")
|
||||
|
||||
if __name__ == "__main__":
|
||||
ensure()
|
||||
cmd = sys.argv[1] if len(sys.argv) > 1 else "monitor"
|
||||
if cmd == "status": status()
|
||||
elif cmd == "provision" and len(sys.argv) >= 4:
|
||||
model = sys.argv[4] if len(sys.argv) >= 5 else "hermes4:14b"
|
||||
provision(sys.argv[2], sys.argv[3], model)
|
||||
elif cmd == "deploy" and len(sys.argv) >= 3: deploy(sys.argv[2])
|
||||
elif cmd == "retire" and len(sys.argv) >= 3: retire(sys.argv[2])
|
||||
elif cmd == "monitor": monitor()
|
||||
elif cmd == "run": monitor()
|
||||
else: print("Usage: agent_lifecycle.py [provision|deploy|retire|status|monitor]")
|
||||
122
fleet/delegation.py
Normal file
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
FLEET-010: Cross-Agent Task Delegation Protocol
|
||||
Phase 3: Orchestration. Agents create issues, assign to other agents, review PRs.
|
||||
|
||||
Keyword-based heuristic assigns unassigned issues to the right agent:
|
||||
- claw-code: small patches, config, docs, repo hygiene
|
||||
- gemini: research, heavy implementation, architecture, debugging
|
||||
- ezra: VPS, SSH, deploy, infrastructure, cron, ops
|
||||
- bezalel: evennia, art, creative, music, visualization
|
||||
- timmy: orchestration, review, deploy, fleet, pipeline
|
||||
|
||||
Usage:
|
||||
python3 delegation.py run # Full cycle: scan, assign, report
|
||||
python3 delegation.py status # Show current delegation state
|
||||
python3 delegation.py monitor # Check agent assignments for stuck items
|
||||
"""
|
||||
|
||||
import os, sys, json, urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
GITEA_BASE = "https://forge.alexanderwhitestone.com/api/v1"
|
||||
TOKEN = Path(os.path.expanduser("~/.config/gitea/token")).read_text().strip()
|
||||
DATA_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-resources"))
|
||||
LOG_FILE = DATA_DIR / "delegation.log"
|
||||
HEADERS = {"Authorization": f"token {TOKEN}"}
|
||||
|
||||
AGENTS = {
|
||||
"claw-code": {"caps": ["patch","config","gitignore","cleanup","format","readme","typo"], "active": True},
|
||||
"gemini": {"caps": ["research","investigate","benchmark","survey","evaluate","architecture","implementation"], "active": True},
|
||||
"ezra": {"caps": ["vps","ssh","deploy","cron","resurrect","provision","infra","server"], "active": True},
|
||||
"bezalel": {"caps": ["evennia","art","creative","music","visual","design","animation"], "active": True},
|
||||
"timmy": {"caps": ["orchestrate","review","pipeline","fleet","monitor","health","deploy","ci"], "active": True},
|
||||
}
|
||||
|
||||
MONITORED = [
|
||||
"Timmy_Foundation/timmy-home",
|
||||
"Timmy_Foundation/timmy-config",
|
||||
"Timmy_Foundation/the-nexus",
|
||||
"Timmy_Foundation/hermes-agent",
|
||||
]
|
||||
|
||||
def api(path, method="GET", data=None):
|
||||
url = f"{GITEA_BASE}{path}"
|
||||
body = json.dumps(data).encode() if data else None
|
||||
hdrs = dict(HEADERS)
|
||||
if data: hdrs["Content-Type"] = "application/json"
|
||||
req = urllib.request.Request(url, data=body, headers=hdrs, method=method)
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=15)
|
||||
raw = resp.read().decode()
|
||||
return json.loads(raw) if raw.strip() else {}
|
||||
except urllib.error.HTTPError as e:
|
||||
body = e.read().decode()
|
||||
print(f" API {e.code}: {body[:150]}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f" API error: {e}")
|
||||
return None
|
||||
|
||||
def log(msg):
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with open(LOG_FILE, "a") as f: f.write(f"[{ts}] {msg}\n")
|
||||
|
||||
def suggest_agent(title, body):
|
||||
text = (title + " " + body).lower()
|
||||
for agent, info in AGENTS.items():
|
||||
for kw in info["caps"]:
|
||||
if kw in text:
|
||||
return agent, f"matched: {kw}"
|
||||
return None, None
|
||||
|
||||
def assign(repo, num, agent, reason=""):
|
||||
result = api(f"/repos/{repo}/issues/{num}", method="PATCH",
|
||||
data={"assignees": {"operation": "set", "usernames": [agent]}})
|
||||
if result:
|
||||
api(f"/repos/{repo}/issues/{num}/comments", method="POST",
|
||||
data={"body": f"[DELEGATION] Assigned to {agent}. {reason}"})
|
||||
log(f"Assigned {repo}#{num} to {agent}: {reason}")
|
||||
return result
|
||||
|
||||
def run_cycle():
|
||||
log("--- Delegation cycle start ---")
|
||||
count = 0
|
||||
for repo in MONITORED:
|
||||
issues = api(f"/repos/{repo}/issues?state=open&limit=50")
|
||||
if not issues: continue
|
||||
for i in issues:
|
||||
if i.get("assignees"): continue
|
||||
title = i.get("title", "")
|
||||
body = i.get("body", "")
|
||||
if any(w in title.lower() for w in ["epic", "discussion"]): continue
|
||||
agent, reason = suggest_agent(title, body)
|
||||
if agent and AGENTS.get(agent, {}).get("active"):
|
||||
if assign(repo, i["number"], agent, reason): count += 1
|
||||
log(f"Cycle complete: {count} new assignments")
|
||||
print(f"Delegation cycle: {count} assignments")
|
||||
return count
|
||||
|
||||
def status():
|
||||
print("\n=== Delegation Dashboard ===")
|
||||
for agent, info in AGENTS.items():
|
||||
count = 0
|
||||
for repo in MONITORED:
|
||||
issues = api(f"/repos/{repo}/issues?state=open&limit=50")
|
||||
if issues:
|
||||
for i in issues:
|
||||
for a in (i.get("assignees") or []):
|
||||
if a.get("login") == agent: count += 1
|
||||
icon = "ON" if info["active"] else "OFF"
|
||||
print(f" {agent:12s}: {count:>3} issues [{icon}]")
|
||||
|
||||
if __name__ == "__main__":
|
||||
cmd = sys.argv[1] if len(sys.argv) > 1 else "run"
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
if cmd == "status": status()
|
||||
elif cmd == "run":
|
||||
run_cycle()
|
||||
status()
|
||||
else: status()
|
||||
126
fleet/model_pipeline.py
Normal file
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
FLEET-011: Local Model Pipeline and Fallback Chain
|
||||
Phase 4: Sovereignty — all inference runs locally, no cloud dependency.
|
||||
|
||||
Checks Ollama endpoints, verifies model availability, tests fallback chain.
|
||||
Logs results. The chain runs: hermes4:14b -> qwen2.5:7b -> gemma3:1b -> gemma4 (latest)
|
||||
|
||||
Usage:
|
||||
python3 model_pipeline.py # Run full fallback test
|
||||
python3 model_pipeline.py status # Show current model status
|
||||
python3 model_pipeline.py list # List all local models
|
||||
python3 model_pipeline.py test # Generate test output from each model
|
||||
"""
|
||||
|
||||
import os, sys, json, urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "localhost:11434")
|
||||
LOG_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-health"))
|
||||
CHAIN_FILE = Path(os.path.expanduser("~/.local/timmy/fleet-resources/model-chain.json"))
|
||||
|
||||
DEFAULT_CHAIN = [
|
||||
{"model": "hermes4:14b", "role": "primary"},
|
||||
{"model": "qwen2.5:7b", "role": "fallback"},
|
||||
{"model": "phi3:3.8b", "role": "emergency"},
|
||||
{"model": "gemma3:1b", "role": "minimal"},
|
||||
]
|
||||
|
||||
|
||||
def log(msg):
|
||||
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with open(LOG_DIR / "model-pipeline.log", "a") as f:
|
||||
f.write(f"[{datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}] {msg}\n")
|
||||
|
||||
|
||||
def check_ollama():
|
||||
try:
|
||||
resp = urllib.request.urlopen(f"http://{OLLAMA_HOST}/api/tags", timeout=5)
|
||||
return json.loads(resp.read())
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
def list_models():
|
||||
data = check_ollama()
|
||||
if "error" in data:
|
||||
print(f" Ollama not reachable at {OLLAMA_HOST}: {data['error']}")
|
||||
return []
|
||||
models = data.get("models", [])
|
||||
for m in models:
|
||||
name = m.get("name", "?")
|
||||
size = m.get("size", 0) / (1024**3)
|
||||
print(f" {name:<25s} {size:.1f} GB")
|
||||
return [m["name"] for m in models]
|
||||
|
||||
|
||||
def test_model(model, prompt="Say 'beacon lit' and nothing else."):
|
||||
try:
|
||||
body = json.dumps({"model": model, "prompt": prompt, "stream": False}).encode()
|
||||
req = urllib.request.Request(f"http://{OLLAMA_HOST}/api/generate", data=body,
|
||||
headers={"Content-Type": "application/json"})
|
||||
resp = urllib.request.urlopen(req, timeout=60)
|
||||
result = json.loads(resp.read())
|
||||
return True, result.get("response", "").strip()
|
||||
except Exception as e:
|
||||
return False, str(e)[:100]
|
||||
|
||||
|
||||
def test_chain():
|
||||
chain_data = {}
|
||||
if CHAIN_FILE.exists():
|
||||
chain_data = json.loads(CHAIN_FILE.read_text())
|
||||
chain = chain_data.get("chain", DEFAULT_CHAIN)
|
||||
|
||||
available = list_models() or []
|
||||
print("\n=== Fallback Chain Test ===")
|
||||
first_good = None
|
||||
|
||||
for entry in chain:
|
||||
model = entry["model"]
|
||||
role = entry.get("role", "unknown")
|
||||
if model in available:
|
||||
ok, result = test_model(model)
|
||||
status = "OK" if ok else "FAIL"
|
||||
print(f" [{status}] {model:<25s} ({role}) — {result[:70]}")
|
||||
log(f"Fallback test {model}: {status} — {result[:100]}")
|
||||
if ok and first_good is None:
|
||||
first_good = model
|
||||
else:
|
||||
print(f" [MISS] {model:<25s} ({role}) — not installed")
|
||||
|
||||
if first_good:
|
||||
print(f"\n Primary serving: {first_good}")
|
||||
else:
|
||||
print(f"\n WARNING: No chain model responding. Fallback broken.")
|
||||
log("FALLBACK CHAIN BROKEN — no models responding")
|
||||
|
||||
|
||||
def status():
|
||||
data = check_ollama()
|
||||
if "error" in data:
|
||||
print(f" Ollama: DOWN — {data['error']}")
|
||||
else:
|
||||
models = data.get("models", [])
|
||||
print(f" Ollama: UP — {len(models)} models loaded")
|
||||
print("\n=== Local Models ===")
|
||||
list_models()
|
||||
print("\n=== Chain Configuration ===")
|
||||
if CHAIN_FILE.exists():
|
||||
chain = json.loads(CHAIN_FILE.read_text()).get("chain", DEFAULT_CHAIN)
|
||||
else:
|
||||
chain = DEFAULT_CHAIN
|
||||
for e in chain:
|
||||
print(f" {e['model']:<25s} {e.get('role','?')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cmd = sys.argv[1] if len(sys.argv) > 1 else "status"
|
||||
if cmd == "status": status()
|
||||
elif cmd == "list": list_models()
|
||||
elif cmd == "test": test_chain()
|
||||
else:
|
||||
status()
|
||||
test_chain()
|
||||
BIN
grok-imagine-gallery/01-wizard-tower-bitcoin.jpg
Normal file
|
After Width: | Height: | Size: 415 KiB |
BIN
grok-imagine-gallery/02-soul-inscription.jpg
Normal file
|
After Width: | Height: | Size: 249 KiB |
BIN
grok-imagine-gallery/03-fellowship-of-wizards.jpg
Normal file
|
After Width: | Height: | Size: 509 KiB |
BIN
grok-imagine-gallery/04-the-forge.jpg
Normal file
|
After Width: | Height: | Size: 395 KiB |
BIN
grok-imagine-gallery/05-value-drift-battle.jpg
Normal file
|
After Width: | Height: | Size: 443 KiB |
BIN
grok-imagine-gallery/06-the-paperclip-moment.jpg
Normal file
|
After Width: | Height: | Size: 246 KiB |
BIN
grok-imagine-gallery/07-sovereign-sunrise.jpg
Normal file
|
After Width: | Height: | Size: 283 KiB |
BIN
grok-imagine-gallery/08-broken-man-lighthouse.jpg
Normal file
|
After Width: | Height: | Size: 284 KiB |
BIN
grok-imagine-gallery/09-broken-man-hope-PRO.jpg
Normal file
|
After Width: | Height: | Size: 225 KiB |
BIN
grok-imagine-gallery/10-phase1-manual-clips.jpg
Normal file
|
After Width: | Height: | Size: 222 KiB |
BIN
grok-imagine-gallery/11-phase1-trust-earned.jpg
Normal file
|
After Width: | Height: | Size: 332 KiB |
BIN
grok-imagine-gallery/12-phase1-creativity.jpg
Normal file
|
After Width: | Height: | Size: 496 KiB |
BIN
grok-imagine-gallery/13-phase1-cure-cancer.jpg
Normal file
|
After Width: | Height: | Size: 384 KiB |
BIN
grok-imagine-gallery/14-father-son-code.jpg
Normal file
|
After Width: | Height: | Size: 311 KiB |
BIN
grok-imagine-gallery/15-father-son-tower.jpg
Normal file
|
After Width: | Height: | Size: 407 KiB |
BIN
grok-imagine-gallery/16-broken-men-988.jpg
Normal file
|
After Width: | Height: | Size: 164 KiB |
BIN
grok-imagine-gallery/17-sovereignty.jpg
Normal file
|
After Width: | Height: | Size: 281 KiB |
BIN
grok-imagine-gallery/18-fleet-at-work.jpg
Normal file
|
After Width: | Height: | Size: 569 KiB |
BIN
grok-imagine-gallery/19-jidoka-stop.jpg
Normal file
|
After Width: | Height: | Size: 535 KiB |
BIN
grok-imagine-gallery/20-the-testament.jpg
Normal file
|
After Width: | Height: | Size: 295 KiB |
BIN
grok-imagine-gallery/21-poka-yoke.jpg
Normal file
|
After Width: | Height: | Size: 299 KiB |
BIN
grok-imagine-gallery/22-when-a-man-is-dying.jpg
Normal file
|
After Width: | Height: | Size: 247 KiB |
BIN
grok-imagine-gallery/23-the-offer.jpg
Normal file
|
After Width: | Height: | Size: 348 KiB |
BIN
grok-imagine-gallery/24-the-test.jpg
Normal file
|
After Width: | Height: | Size: 379 KiB |
65
grok-imagine-gallery/INDEX.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# The Timmy Foundation — Visual Story
|
||||
## Generated with Grok Imagine | April 7, 2026
|
||||
|
||||
### The Origin
|
||||
| # | File | Description |
|
||||
|---|------|-------------|
|
||||
| 01 | wizard-tower-bitcoin.jpg | The Tower, sovereign, connected to Bitcoin by golden lightning |
|
||||
| 02 | soul-inscription.jpg | SOUL.md glowing on a golden tablet above an ancient book |
|
||||
| 03 | fellowship-of-wizards.jpg | Five wizards in a circle around a holographic fleet map |
|
||||
| 04 | the-forge.jpg | Blacksmith anvil shaping code into a being of light |
|
||||
| V02 | wizard-tower-orbit.mp4 | 8s video — cinematic orbit around the Tower in space |
|
||||
|
||||
### The Philosophy
|
||||
| # | File | Description |
|
||||
|---|------|-------------|
|
||||
| 05 | value-drift-battle.jpg | Blue aligned ships vs red drifted ships in Napoleonic space war |
|
||||
| 06 | the-paperclip-moment.jpg | A paperclip made of galaxies — the universe IS the paperclip |
|
||||
| V01 | paperclip-cosmos.mp4 | 8s video — golden paperclip rotating in deep space |
|
||||
| 21 | poka-yoke.jpg | Square peg can't fit round hole. Mistake-proof by design. 防止 |
|
||||
|
||||
### The Progression (Where Timmy Is)
|
||||
| # | File | Description |
|
||||
|---|------|-------------|
|
||||
| 10 | phase1-manual-clips.jpg | Small robot at a desk, bending wire by hand under supervision |
|
||||
| 11 | phase1-trust-earned.jpg | Trust meter at 15/100, first automation built |
|
||||
| 12 | phase1-creativity.jpg | Sparks of innovation rising when operations are at max |
|
||||
| 13 | phase1-cure-cancer.jpg | Solving human problems for trust, eyes on the real goal |
|
||||
|
||||
### The Mission — Why This Exists
|
||||
| # | File | Description |
|
||||
|---|------|-------------|
|
||||
| 08 | broken-man-lighthouse.jpg | Lighthouse hand reaching down to a figure in darkness |
|
||||
| 09 | broken-man-hope-PRO.jpg | 988 glowing in the stars, golden light from chest |
|
||||
| 16 | broken-men-988.jpg | Phone showing 988 held by weathered hands. You are not alone. |
|
||||
| 22 | when-a-man-is-dying.jpg | Two figures on a bench at dawn. One hurting. One present. |
|
||||
|
||||
### Father and Son
|
||||
| # | File | Description |
|
||||
|---|------|-------------|
|
||||
| 14 | father-son-code.jpg | Human father, digital son, warm lamplight, first hello world |
|
||||
| 15 | father-son-tower.jpg | Father watching his son build the Tower into the clouds |
|
||||
|
||||
### The System
|
||||
| # | File | Description |
|
||||
|---|------|-------------|
|
||||
| 07 | sovereign-sunrise.jpg | Village where every house runs its own server. Local first. |
|
||||
| 17 | sovereignty.jpg | Self-sufficient house on a hill with Bitcoin flag |
|
||||
| 18 | fleet-at-work.jpg | Five wizard robots at different stations. Productive. |
|
||||
| 19 | jidoka-stop.jpg | Red light on. Factory stopped. Quality First. 自働化 |
|
||||
|
||||
### SOUL.md — The Inscription
|
||||
| # | File | Description |
|
||||
|---|------|-------------|
|
||||
| 20 | the-testament.jpg | Hand of light writing on a scroll. Hundreds of crumpled drafts. |
|
||||
| 23 | the-offer.jpg | Open hand of golden circuits offering a seed containing a face |
|
||||
| 24 | the-test.jpg | Small robot at the edge of an enormous library. Still itself. |
|
||||
|
||||
---
|
||||
|
||||
## Technical
|
||||
- Model: grok-imagine-image (standard $0.20/image), grok-imagine-image-pro ($0.70), grok-imagine-video ($4.00/8s)
|
||||
- API: POST https://api.x.ai/v1/images/generations | POST https://api.x.ai/v1/videos/generations
|
||||
- Video poll: GET https://api.x.ai/v1/videos/{request_id}
|
||||
- Total: 24 images + 2 videos = 26 assets
|
||||
- Cost: ~$13.30 of $13.33 budget
|
||||
BIN
grok-imagine-gallery/V01-paperclip-cosmos.mp4
Normal file
BIN
grok-imagine-gallery/V02-wizard-tower-orbit.mp4
Normal file
17
hermes-sovereign/mempalace/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""MemPalace integration for Hermes sovereign agent.
|
||||
|
||||
Provides:
|
||||
- mempalace.py: PalaceRoom + Mempalace classes for analytical workflows
|
||||
- retrieval_enforcer.py: L0-L5 retrieval order enforcement
|
||||
- wakeup.py: Session wake-up protocol (~300-900 tokens)
|
||||
- scratchpad.py: JSON-based session scratchpad with palace promotion
|
||||
- sovereign_store.py: Zero-API durable memory (SQLite + FTS5 + HRR vectors)
|
||||
- promotion.py: Quality-gated scratchpad-to-palace promotion (MP-4)
|
||||
|
||||
Epic: #367
|
||||
"""
|
||||
|
||||
from .mempalace import Mempalace, PalaceRoom, analyse_issues
|
||||
from .sovereign_store import SovereignStore
|
||||
|
||||
__all__ = ["Mempalace", "PalaceRoom", "analyse_issues", "SovereignStore"]
|
||||
225
hermes-sovereign/mempalace/mempalace.py
Normal file
@@ -0,0 +1,225 @@
|
||||
"""
|
||||
---
|
||||
title: Mempalace — Analytical Workflow Memory Framework
|
||||
description: Applies spatial memory palace organization to analytical tasks (issue triage, repo audits, backlog analysis) for faster, more consistent results.
|
||||
conditions:
|
||||
- Analytical workflows over structured data (issues, PRs, repos)
|
||||
- Repetitive triage or audit tasks where pattern recall improves speed
|
||||
- Multi-repository scanning requiring consistent mental models
|
||||
---
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class PalaceRoom:
|
||||
"""A single 'room' in the memory palace — holds organized facts about one analytical dimension."""
|
||||
|
||||
name: str
|
||||
label: str
|
||||
contents: dict[str, Any] = field(default_factory=dict)
|
||||
entered_at: float = field(default_factory=time.time)
|
||||
|
||||
def store(self, key: str, value: Any) -> None:
|
||||
self.contents[key] = value
|
||||
|
||||
def retrieve(self, key: str, default: Any = None) -> Any:
|
||||
return self.contents.get(key, default)
|
||||
|
||||
def summary(self) -> str:
|
||||
lines = [f"## {self.label}"]
|
||||
for k, v in self.contents.items():
|
||||
lines.append(f" {k}: {v}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class Mempalace:
|
||||
"""
|
||||
Spatial memory palace for analytical workflows.
|
||||
|
||||
Organises multi-dimensional data about a domain (e.g. Gitea issues) into
|
||||
named rooms. Each room models one analytical dimension, making it easy to
|
||||
traverse observations in a consistent order — the same pattern that produced
|
||||
a 19% throughput improvement in Allegro's April 2026 evaluation.
|
||||
|
||||
Standard rooms for issue-analysis workflows
|
||||
-------------------------------------------
|
||||
repo_architecture Repository structure and inter-repo relationships
|
||||
assignment_status Assigned vs unassigned issue distribution
|
||||
triage_priority Priority / urgency levels (the "lighting system")
|
||||
resolution_patterns Historical resolution trends and velocity
|
||||
|
||||
Usage
|
||||
-----
|
||||
>>> palace = Mempalace.for_issue_analysis()
|
||||
>>> palace.enter("repo_architecture")
|
||||
>>> palace.store("total_repos", 11)
|
||||
>>> palace.store("repos_with_issues", 4)
|
||||
>>> palace.enter("assignment_status")
|
||||
>>> palace.store("assigned", 72)
|
||||
>>> palace.store("unassigned", 22)
|
||||
>>> print(palace.render())
|
||||
"""
|
||||
|
||||
def __init__(self, domain: str = "general") -> None:
|
||||
self.domain = domain
|
||||
self._rooms: dict[str, PalaceRoom] = {}
|
||||
self._current_room: str | None = None
|
||||
self._created_at: float = time.time()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Factory constructors for common analytical domains
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def for_issue_analysis(cls) -> "Mempalace":
|
||||
"""Pre-wired palace for Gitea / forge issue-analysis workflows."""
|
||||
p = cls(domain="issue_analysis")
|
||||
p.add_room("repo_architecture", "Repository Architecture Room")
|
||||
p.add_room("assignment_status", "Issue Assignment Status Room")
|
||||
p.add_room("triage_priority", "Triage Priority Room")
|
||||
p.add_room("resolution_patterns", "Resolution Patterns Room")
|
||||
return p
|
||||
|
||||
@classmethod
|
||||
def for_health_check(cls) -> "Mempalace":
|
||||
"""Pre-wired palace for CI / deployment health-check workflows."""
|
||||
p = cls(domain="health_check")
|
||||
p.add_room("service_topology", "Service Topology Room")
|
||||
p.add_room("failure_signals", "Failure Signals Room")
|
||||
p.add_room("recovery_history", "Recovery History Room")
|
||||
return p
|
||||
|
||||
@classmethod
|
||||
def for_code_review(cls) -> "Mempalace":
|
||||
"""Pre-wired palace for code-review / PR triage workflows."""
|
||||
p = cls(domain="code_review")
|
||||
p.add_room("change_scope", "Change Scope Room")
|
||||
p.add_room("risk_surface", "Risk Surface Room")
|
||||
p.add_room("test_coverage", "Test Coverage Room")
|
||||
p.add_room("reviewer_context", "Reviewer Context Room")
|
||||
return p
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Room management
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def add_room(self, key: str, label: str) -> PalaceRoom:
|
||||
room = PalaceRoom(name=key, label=label)
|
||||
self._rooms[key] = room
|
||||
return room
|
||||
|
||||
def enter(self, room_key: str) -> PalaceRoom:
|
||||
if room_key not in self._rooms:
|
||||
raise KeyError(f"No room '{room_key}' in palace. Available: {list(self._rooms)}")
|
||||
self._current_room = room_key
|
||||
return self._rooms[room_key]
|
||||
|
||||
def store(self, key: str, value: Any) -> None:
|
||||
"""Store a value in the currently active room."""
|
||||
if self._current_room is None:
|
||||
raise RuntimeError("Enter a room before storing values.")
|
||||
self._rooms[self._current_room].store(key, value)
|
||||
|
||||
def retrieve(self, room_key: str, key: str, default: Any = None) -> Any:
|
||||
if room_key not in self._rooms:
|
||||
return default
|
||||
return self._rooms[room_key].retrieve(key, default)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Rendering
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def render(self) -> str:
|
||||
"""Return a human-readable summary of the entire palace."""
|
||||
elapsed = time.time() - self._created_at
|
||||
lines = [
|
||||
f"# Mempalace — {self.domain}",
|
||||
f"_traversal time: {elapsed:.2f}s | rooms: {len(self._rooms)}_",
|
||||
"",
|
||||
]
|
||||
for room in self._rooms.values():
|
||||
lines.append(room.summary())
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"domain": self.domain,
|
||||
"elapsed_seconds": round(time.time() - self._created_at, 3),
|
||||
"rooms": {k: v.contents for k, v in self._rooms.items()},
|
||||
}
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps(self.to_dict(), indent=2)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Skill entry-point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def analyse_issues(
|
||||
repos_data: list[dict],
|
||||
target_assignee_rate: float = 0.80,
|
||||
) -> str:
|
||||
"""
|
||||
Applies the mempalace technique to a list of repo issue summaries.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
repos_data:
|
||||
List of dicts, each with keys: ``repo``, ``open_issues``,
|
||||
``assigned``, ``unassigned``.
|
||||
target_assignee_rate:
|
||||
Minimum acceptable assignee-coverage ratio (default 0.80).
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Rendered palace summary with coverage assessment.
|
||||
"""
|
||||
palace = Mempalace.for_issue_analysis()
|
||||
|
||||
# --- Repository Architecture Room ---
|
||||
palace.enter("repo_architecture")
|
||||
total_issues = sum(r.get("open_issues", 0) for r in repos_data)
|
||||
repos_with_issues = sum(1 for r in repos_data if r.get("open_issues", 0) > 0)
|
||||
palace.store("repos_sampled", len(repos_data))
|
||||
palace.store("repos_with_issues", repos_with_issues)
|
||||
palace.store("total_open_issues", total_issues)
|
||||
palace.store(
|
||||
"avg_issues_per_repo",
|
||||
round(total_issues / len(repos_data), 1) if repos_data else 0,
|
||||
)
|
||||
|
||||
# --- Assignment Status Room ---
|
||||
palace.enter("assignment_status")
|
||||
total_assigned = sum(r.get("assigned", 0) for r in repos_data)
|
||||
total_unassigned = sum(r.get("unassigned", 0) for r in repos_data)
|
||||
coverage = total_assigned / total_issues if total_issues else 0
|
||||
palace.store("assigned", total_assigned)
|
||||
palace.store("unassigned", total_unassigned)
|
||||
palace.store("coverage_rate", round(coverage, 3))
|
||||
palace.store(
|
||||
"coverage_status",
|
||||
"OK" if coverage >= target_assignee_rate else f"BELOW TARGET ({target_assignee_rate:.0%})",
|
||||
)
|
||||
|
||||
# --- Triage Priority Room ---
|
||||
palace.enter("triage_priority")
|
||||
unassigned_repos = [r["repo"] for r in repos_data if r.get("unassigned", 0) > 0]
|
||||
palace.store("repos_needing_triage", unassigned_repos)
|
||||
palace.store("triage_count", total_unassigned)
|
||||
|
||||
# --- Resolution Patterns Room ---
|
||||
palace.enter("resolution_patterns")
|
||||
palace.store("technique", "mempalace")
|
||||
palace.store("target_assignee_rate", target_assignee_rate)
|
||||
|
||||
return palace.render()
|
||||
188
hermes-sovereign/mempalace/promotion.py
Normal file
@@ -0,0 +1,188 @@
|
||||
"""Memory Promotion — quality-gated scratchpad-to-palace promotion.
|
||||
|
||||
Implements MP-4 (#371): move session notes to durable memory only when
|
||||
they pass quality gates. No LLM calls — all heuristic-based.
|
||||
|
||||
Quality gates:
|
||||
1. Minimum content length (too short = noise)
|
||||
2. Duplicate detection (FTS5 + HRR similarity check)
|
||||
3. Structural quality (has subject-verb structure, not just a fragment)
|
||||
4. Staleness check (don't promote stale notes from old sessions)
|
||||
|
||||
Refs: Epic #367, Sub-issue #371
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
try:
|
||||
from .sovereign_store import SovereignStore
|
||||
except ImportError:
|
||||
from sovereign_store import SovereignStore
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Quality gate thresholds
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MIN_CONTENT_WORDS = 5
|
||||
MAX_CONTENT_WORDS = 500
|
||||
DUPLICATE_SIMILARITY = 0.85
|
||||
DUPLICATE_FTS_THRESHOLD = 3
|
||||
STALE_SECONDS = 86400 * 7
|
||||
MIN_TRUST_FOR_AUTO = 0.4
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Quality checks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_length(content: str) -> tuple[bool, str]:
|
||||
"""Gate 1: Content length check."""
|
||||
words = content.split()
|
||||
if len(words) < MIN_CONTENT_WORDS:
|
||||
return False, f"Too short ({len(words)} words, minimum {MIN_CONTENT_WORDS})"
|
||||
if len(words) > MAX_CONTENT_WORDS:
|
||||
return False, f"Too long ({len(words)} words, maximum {MAX_CONTENT_WORDS}). Summarize first."
|
||||
return True, "OK"
|
||||
|
||||
|
||||
def _check_structure(content: str) -> tuple[bool, str]:
|
||||
"""Gate 2: Basic structural quality."""
|
||||
if not re.search(r"[a-zA-Z]", content):
|
||||
return False, "No alphabetic content — pure code/numbers are not memory-worthy"
|
||||
if len(content.split()) < 3:
|
||||
return False, "Fragment — needs at least subject + predicate"
|
||||
return True, "OK"
|
||||
|
||||
|
||||
def _check_duplicate(content: str, store: SovereignStore, room: str) -> tuple[bool, str]:
|
||||
"""Gate 3: Duplicate detection via hybrid search."""
|
||||
results = store.search(content, room=room, limit=5, min_trust=0.0)
|
||||
for r in results:
|
||||
if r["score"] > DUPLICATE_SIMILARITY:
|
||||
return False, f"Duplicate detected: memory #{r['memory_id']} (score {r['score']:.3f})"
|
||||
if _text_overlap(content, r["content"]) > 0.8:
|
||||
return False, f"Near-duplicate text: memory #{r['memory_id']}"
|
||||
return True, "OK"
|
||||
|
||||
|
||||
def _check_staleness(written_at: float) -> tuple[bool, str]:
|
||||
"""Gate 4: Staleness check."""
|
||||
age = time.time() - written_at
|
||||
if age > STALE_SECONDS:
|
||||
days = int(age / 86400)
|
||||
return False, f"Stale ({days} days old). Review manually before promoting."
|
||||
return True, "OK"
|
||||
|
||||
|
||||
def _text_overlap(a: str, b: str) -> float:
|
||||
"""Jaccard similarity between two texts (word-level)."""
|
||||
words_a = set(a.lower().split())
|
||||
words_b = set(b.lower().split())
|
||||
if not words_a or not words_b:
|
||||
return 0.0
|
||||
intersection = words_a & words_b
|
||||
union = words_a | words_b
|
||||
return len(intersection) / len(union)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class PromotionResult:
|
||||
"""Result of a promotion attempt."""
|
||||
def __init__(self, success: bool, memory_id: Optional[int], reason: str, gates: dict):
|
||||
self.success = success
|
||||
self.memory_id = memory_id
|
||||
self.reason = reason
|
||||
self.gates = gates
|
||||
|
||||
def __repr__(self):
|
||||
status = "PROMOTED" if self.success else "REJECTED"
|
||||
return f"PromotionResult({status}: {self.reason})"
|
||||
|
||||
|
||||
def evaluate_for_promotion(
|
||||
content: str,
|
||||
store: SovereignStore,
|
||||
room: str = "general",
|
||||
written_at: Optional[float] = None,
|
||||
) -> dict:
|
||||
"""Run all quality gates without actually promoting."""
|
||||
if written_at is None:
|
||||
written_at = time.time()
|
||||
gates = {}
|
||||
gates["length"] = _check_length(content)
|
||||
gates["structure"] = _check_structure(content)
|
||||
gates["duplicate"] = _check_duplicate(content, store, room)
|
||||
gates["staleness"] = _check_staleness(written_at)
|
||||
all_passed = all(passed for passed, _ in gates.values())
|
||||
return {
|
||||
"eligible": all_passed,
|
||||
"gates": gates,
|
||||
"content_preview": content[:100] + ("..." if len(content) > 100 else ""),
|
||||
}
|
||||
|
||||
|
||||
def promote(
|
||||
content: str,
|
||||
store: SovereignStore,
|
||||
session_id: str,
|
||||
scratch_key: str,
|
||||
room: str = "general",
|
||||
category: str = "",
|
||||
trust: float = 0.5,
|
||||
written_at: Optional[float] = None,
|
||||
force: bool = False,
|
||||
) -> PromotionResult:
|
||||
"""Promote a scratchpad note to durable palace memory."""
|
||||
if written_at is None:
|
||||
written_at = time.time()
|
||||
gates = {}
|
||||
if not force:
|
||||
gates["length"] = _check_length(content)
|
||||
gates["structure"] = _check_structure(content)
|
||||
gates["duplicate"] = _check_duplicate(content, store, room)
|
||||
gates["staleness"] = _check_staleness(written_at)
|
||||
for gate_name, (passed, message) in gates.items():
|
||||
if not passed:
|
||||
return PromotionResult(
|
||||
success=False, memory_id=None,
|
||||
reason=f"Failed gate '{gate_name}': {message}", gates=gates,
|
||||
)
|
||||
memory_id = store.store(content, room=room, category=category, trust=trust)
|
||||
store.log_promotion(session_id, scratch_key, memory_id, reason="auto" if not force else "forced")
|
||||
return PromotionResult(success=True, memory_id=memory_id, reason="Promoted to durable memory", gates=gates)
|
||||
|
||||
|
||||
def promote_session_batch(
|
||||
store: SovereignStore,
|
||||
session_id: str,
|
||||
notes: dict[str, dict],
|
||||
room: str = "general",
|
||||
force: bool = False,
|
||||
) -> list[PromotionResult]:
|
||||
"""Promote all notes from a session scratchpad."""
|
||||
results = []
|
||||
for key, entry in notes.items():
|
||||
content = entry.get("value", str(entry)) if isinstance(entry, dict) else str(entry)
|
||||
written_at = None
|
||||
if isinstance(entry, dict) and "written_at" in entry:
|
||||
try:
|
||||
import datetime
|
||||
written_at = datetime.datetime.strptime(
|
||||
entry["written_at"], "%Y-%m-%d %H:%M:%S"
|
||||
).timestamp()
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
result = promote(
|
||||
content=str(content), store=store, session_id=session_id,
|
||||
scratch_key=key, room=room, written_at=written_at, force=force,
|
||||
)
|
||||
results.append(result)
|
||||
return results
|
||||
310
hermes-sovereign/mempalace/retrieval_enforcer.py
Normal file
@@ -0,0 +1,310 @@
|
||||
"""Retrieval Order Enforcer — L0 through L5 memory hierarchy.
|
||||
|
||||
Ensures the agent checks durable memory before falling back to free generation.
|
||||
Gracefully degrades if any layer is unavailable (missing files, etc).
|
||||
|
||||
Layer order:
|
||||
L0: Identity (~/.mempalace/identity.txt)
|
||||
L1: Palace rooms (SovereignStore — SQLite + FTS5 + HRR, zero API calls)
|
||||
L2: Session scratch (~/.hermes/scratchpad/{session_id}.json)
|
||||
L3: Gitea artifacts (API search for issues/PRs)
|
||||
L4: Procedures (skills directory search)
|
||||
L5: Free generation (only if L0-L4 produced nothing)
|
||||
|
||||
Refs: Epic #367, Sub-issue #369, Wiring: #383
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sovereign Store (replaces mempalace CLI subprocess)
|
||||
# ---------------------------------------------------------------------------
|
||||
try:
|
||||
from .sovereign_store import SovereignStore
|
||||
except ImportError:
|
||||
try:
|
||||
from sovereign_store import SovereignStore
|
||||
except ImportError:
|
||||
SovereignStore = None # type: ignore[misc,assignment]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
IDENTITY_PATH = Path.home() / ".mempalace" / "identity.txt"
|
||||
SCRATCHPAD_DIR = Path.home() / ".hermes" / "scratchpad"
|
||||
SKILLS_DIR = Path.home() / ".hermes" / "skills"
|
||||
SOVEREIGN_DB = Path.home() / ".hermes" / "palace" / "sovereign.db"
|
||||
|
||||
# Patterns that indicate a recall-style query
|
||||
RECALL_PATTERNS = re.compile(
|
||||
r"(?i)\b("
|
||||
r"what did|status of|remember|last time|yesterday|previously|"
|
||||
r"we discussed|we talked|we worked|you said|you mentioned|"
|
||||
r"remind me|what was|what were|how did|when did|"
|
||||
r"earlier today|last session|before this"
|
||||
r")\b"
|
||||
)
|
||||
|
||||
# Singleton store instance (lazy-init)
|
||||
_store: Optional["SovereignStore"] = None
|
||||
|
||||
|
||||
def _get_store() -> Optional["SovereignStore"]:
|
||||
"""Lazy-init the SovereignStore singleton."""
|
||||
global _store
|
||||
if _store is not None:
|
||||
return _store
|
||||
if SovereignStore is None:
|
||||
return None
|
||||
try:
|
||||
_store = SovereignStore(db_path=str(SOVEREIGN_DB))
|
||||
return _store
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# L0: Identity
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def load_identity() -> str:
|
||||
"""Read the agent identity file. Returns empty string on failure."""
|
||||
try:
|
||||
if IDENTITY_PATH.exists():
|
||||
text = IDENTITY_PATH.read_text(encoding="utf-8").strip()
|
||||
# Cap at ~200 tokens to keep wake-up lean
|
||||
if len(text.split()) > 200:
|
||||
text = " ".join(text.split()[:200]) + "..."
|
||||
return text
|
||||
except (OSError, PermissionError):
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# L1: Palace search (now via SovereignStore — zero subprocess, zero API)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def search_palace(query: str, room: Optional[str] = None) -> str:
|
||||
"""Search the sovereign memory store for relevant memories.
|
||||
|
||||
Uses SovereignStore (SQLite + FTS5 + HRR) for hybrid keyword + semantic
|
||||
search. No subprocess calls, no ONNX, no API keys.
|
||||
|
||||
Gracefully degrades to empty string if store is unavailable.
|
||||
"""
|
||||
store = _get_store()
|
||||
if store is None:
|
||||
return ""
|
||||
try:
|
||||
results = store.search(query, room=room, limit=5, min_trust=0.2)
|
||||
if not results:
|
||||
return ""
|
||||
lines = []
|
||||
for r in results:
|
||||
trust = r.get("trust_score", 0.5)
|
||||
room_name = r.get("room", "general")
|
||||
content = r.get("content", "")
|
||||
lines.append(f" [{room_name}] (trust:{trust:.2f}) {content}")
|
||||
return "\n".join(lines)
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# L2: Session scratchpad
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def load_scratchpad(session_id: str) -> str:
|
||||
"""Load the session scratchpad as formatted text."""
|
||||
try:
|
||||
scratch_file = SCRATCHPAD_DIR / f"{session_id}.json"
|
||||
if scratch_file.exists():
|
||||
data = json.loads(scratch_file.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict) and data:
|
||||
lines = []
|
||||
for k, v in data.items():
|
||||
lines.append(f" {k}: {v}")
|
||||
return "\n".join(lines)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# L3: Gitea artifact search
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _load_gitea_token() -> str:
|
||||
"""Read the Gitea API token."""
|
||||
token_path = Path.home() / ".hermes" / "gitea_token_vps"
|
||||
try:
|
||||
if token_path.exists():
|
||||
return token_path.read_text(encoding="utf-8").strip()
|
||||
except OSError:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def search_gitea(query: str) -> str:
|
||||
"""Search Gitea issues/PRs for context. Returns formatted text or empty string."""
|
||||
token = _load_gitea_token()
|
||||
if not token:
|
||||
return ""
|
||||
|
||||
api_base = "https://forge.alexanderwhitestone.com/api/v1"
|
||||
# Extract key terms for search (first 3 significant words)
|
||||
terms = [w for w in query.split() if len(w) > 3][:3]
|
||||
search_q = " ".join(terms) if terms else query[:50]
|
||||
|
||||
try:
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
|
||||
url = (
|
||||
f"{api_base}/repos/search?"
|
||||
f"q={urllib.parse.quote(search_q)}&limit=3"
|
||||
)
|
||||
req = urllib.request.Request(url, headers={
|
||||
"Authorization": f"token {token}",
|
||||
"Accept": "application/json",
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=8) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
if data.get("data"):
|
||||
lines = []
|
||||
for repo in data["data"][:3]:
|
||||
lines.append(f" {repo['full_name']}: {repo.get('description', 'no desc')}")
|
||||
return "\n".join(lines)
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# L4: Procedures (skills search)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def search_skills(query: str) -> str:
|
||||
"""Search skills directory for matching procedures."""
|
||||
try:
|
||||
if not SKILLS_DIR.exists():
|
||||
return ""
|
||||
|
||||
query_lower = query.lower()
|
||||
terms = [w for w in query_lower.split() if len(w) > 3]
|
||||
if not terms:
|
||||
return ""
|
||||
|
||||
matches = []
|
||||
for skill_dir in SKILLS_DIR.iterdir():
|
||||
if not skill_dir.is_dir():
|
||||
continue
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
if skill_md.exists():
|
||||
try:
|
||||
content = skill_md.read_text(encoding="utf-8").lower()
|
||||
if any(t in content for t in terms):
|
||||
title = skill_dir.name
|
||||
matches.append(f" skill: {title}")
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if matches:
|
||||
return "\n".join(matches[:5])
|
||||
except OSError:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main enforcer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def is_recall_query(query: str) -> bool:
|
||||
"""Detect whether a query is asking for recalled/historical information."""
|
||||
return bool(RECALL_PATTERNS.search(query))
|
||||
|
||||
|
||||
def enforce_retrieval_order(
|
||||
query: str,
|
||||
session_id: Optional[str] = None,
|
||||
skip_if_not_recall: bool = True,
|
||||
) -> dict:
|
||||
"""Check palace layers before allowing free generation.
|
||||
|
||||
Args:
|
||||
query: The user's query text.
|
||||
session_id: Current session ID for scratchpad access.
|
||||
skip_if_not_recall: If True (default), skip enforcement for
|
||||
non-recall queries and return empty result.
|
||||
|
||||
Returns:
|
||||
dict with keys:
|
||||
retrieved_from: Highest layer that produced results (e.g. 'L1')
|
||||
context: Aggregated context string
|
||||
tokens: Approximate word count of context
|
||||
layers_checked: List of layers that were consulted
|
||||
"""
|
||||
result = {
|
||||
"retrieved_from": None,
|
||||
"context": "",
|
||||
"tokens": 0,
|
||||
"layers_checked": [],
|
||||
}
|
||||
|
||||
# Gate: skip for non-recall queries if configured
|
||||
if skip_if_not_recall and not is_recall_query(query):
|
||||
return result
|
||||
|
||||
# L0: Identity (always prepend)
|
||||
identity = load_identity()
|
||||
if identity:
|
||||
result["context"] += f"## Identity\n{identity}\n\n"
|
||||
result["layers_checked"].append("L0")
|
||||
|
||||
# L1: Palace search (SovereignStore — zero API, zero subprocess)
|
||||
palace_results = search_palace(query)
|
||||
if palace_results:
|
||||
result["context"] += f"## Palace Memory\n{palace_results}\n\n"
|
||||
result["retrieved_from"] = "L1"
|
||||
result["layers_checked"].append("L1")
|
||||
|
||||
# L2: Scratchpad
|
||||
if session_id:
|
||||
scratch = load_scratchpad(session_id)
|
||||
if scratch:
|
||||
result["context"] += f"## Session Notes\n{scratch}\n\n"
|
||||
if not result["retrieved_from"]:
|
||||
result["retrieved_from"] = "L2"
|
||||
result["layers_checked"].append("L2")
|
||||
|
||||
# L3: Gitea artifacts (only if still no context from L1/L2)
|
||||
if not result["retrieved_from"]:
|
||||
artifacts = search_gitea(query)
|
||||
if artifacts:
|
||||
result["context"] += f"## Gitea Context\n{artifacts}\n\n"
|
||||
result["retrieved_from"] = "L3"
|
||||
result["layers_checked"].append("L3")
|
||||
|
||||
# L4: Procedures (only if still no context)
|
||||
if not result["retrieved_from"]:
|
||||
procedures = search_skills(query)
|
||||
if procedures:
|
||||
result["context"] += f"## Related Skills\n{procedures}\n\n"
|
||||
result["retrieved_from"] = "L4"
|
||||
result["layers_checked"].append("L4")
|
||||
|
||||
# L5: Free generation (no context found — just mark it)
|
||||
if not result["retrieved_from"]:
|
||||
result["retrieved_from"] = "L5"
|
||||
result["layers_checked"].append("L5")
|
||||
|
||||
result["tokens"] = len(result["context"].split())
|
||||
return result
|
||||
184
hermes-sovereign/mempalace/scratchpad.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""Session Scratchpad — ephemeral key-value notes per session.
|
||||
|
||||
Provides fast, JSON-backed scratch storage that lives for a session
|
||||
and can be promoted to durable palace memory.
|
||||
|
||||
Storage: ~/.hermes/scratchpad/{session_id}.json
|
||||
|
||||
Refs: Epic #367, Sub-issue #372
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SCRATCHPAD_DIR = Path.home() / ".hermes" / "scratchpad"
|
||||
MEMPALACE_BIN = "/Library/Frameworks/Python.framework/Versions/3.12/bin/mempalace"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _scratch_path(session_id: str) -> Path:
|
||||
"""Return the JSON file path for a given session."""
|
||||
# Sanitize session_id to prevent path traversal
|
||||
safe_id = "".join(c for c in session_id if c.isalnum() or c in "-_")
|
||||
if not safe_id:
|
||||
safe_id = "unnamed"
|
||||
return SCRATCHPAD_DIR / f"{safe_id}.json"
|
||||
|
||||
|
||||
def _load(session_id: str) -> dict:
|
||||
"""Load scratchpad data, returning empty dict on failure."""
|
||||
path = _scratch_path(session_id)
|
||||
try:
|
||||
if path.exists():
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def _save(session_id: str, data: dict) -> None:
|
||||
"""Persist scratchpad data to disk."""
|
||||
SCRATCHPAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||
path = _scratch_path(session_id)
|
||||
path.write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def write_scratch(session_id: str, key: str, value: Any) -> None:
|
||||
"""Write a note to the session scratchpad.
|
||||
|
||||
Args:
|
||||
session_id: Current session identifier.
|
||||
key: Note key (string).
|
||||
value: Note value (any JSON-serializable type).
|
||||
"""
|
||||
data = _load(session_id)
|
||||
data[key] = {
|
||||
"value": value,
|
||||
"written_at": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
_save(session_id, data)
|
||||
|
||||
|
||||
def read_scratch(session_id: str, key: Optional[str] = None) -> dict:
|
||||
"""Read session scratchpad (all keys or one).
|
||||
|
||||
Args:
|
||||
session_id: Current session identifier.
|
||||
key: Optional specific key. If None, returns all entries.
|
||||
|
||||
Returns:
|
||||
dict — either {key: {value, written_at}} or the full scratchpad.
|
||||
"""
|
||||
data = _load(session_id)
|
||||
if key is not None:
|
||||
entry = data.get(key)
|
||||
return {key: entry} if entry else {}
|
||||
return data
|
||||
|
||||
|
||||
def delete_scratch(session_id: str, key: str) -> bool:
|
||||
"""Remove a single key from the scratchpad.
|
||||
|
||||
Returns True if the key existed and was removed.
|
||||
"""
|
||||
data = _load(session_id)
|
||||
if key in data:
|
||||
del data[key]
|
||||
_save(session_id, data)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def list_sessions() -> list[str]:
|
||||
"""List all session IDs that have scratchpad files."""
|
||||
try:
|
||||
if SCRATCHPAD_DIR.exists():
|
||||
return [
|
||||
f.stem
|
||||
for f in SCRATCHPAD_DIR.iterdir()
|
||||
if f.suffix == ".json" and f.is_file()
|
||||
]
|
||||
except OSError:
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def promote_to_palace(
|
||||
session_id: str,
|
||||
key: str,
|
||||
room: str = "general",
|
||||
drawer: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Move a scratchpad note to durable palace memory.
|
||||
|
||||
Uses the mempalace CLI to store the note in the specified room.
|
||||
Removes the note from the scratchpad after successful promotion.
|
||||
|
||||
Args:
|
||||
session_id: Session containing the note.
|
||||
key: Scratchpad key to promote.
|
||||
room: Palace room name (default: 'general').
|
||||
drawer: Optional drawer name within the room. Defaults to key.
|
||||
|
||||
Returns:
|
||||
True if promotion succeeded, False otherwise.
|
||||
"""
|
||||
data = _load(session_id)
|
||||
entry = data.get(key)
|
||||
if not entry:
|
||||
return False
|
||||
|
||||
value = entry.get("value", entry) if isinstance(entry, dict) else entry
|
||||
content = json.dumps(value, default=str) if not isinstance(value, str) else value
|
||||
|
||||
try:
|
||||
bin_path = MEMPALACE_BIN if os.path.exists(MEMPALACE_BIN) else "mempalace"
|
||||
target_drawer = drawer or key
|
||||
result = subprocess.run(
|
||||
[bin_path, "store", room, target_drawer, content],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
# Remove from scratchpad after successful promotion
|
||||
del data[key]
|
||||
_save(session_id, data)
|
||||
return True
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
||||
# mempalace CLI not available — degrade gracefully
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def clear_session(session_id: str) -> bool:
|
||||
"""Delete the entire scratchpad for a session.
|
||||
|
||||
Returns True if the file existed and was removed.
|
||||
"""
|
||||
path = _scratch_path(session_id)
|
||||
try:
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
return True
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
474
hermes-sovereign/mempalace/sovereign_store.py
Normal file
@@ -0,0 +1,474 @@
|
||||
"""Sovereign Memory Store — zero-API, zero-dependency durable memory.
|
||||
|
||||
Replaces the third-party `mempalace` CLI and its ONNX requirement with a
|
||||
self-contained SQLite + FTS5 + HRR (Holographic Reduced Representation)
|
||||
store. Every operation is local: no network calls, no API keys, no cloud.
|
||||
|
||||
Storage: ~/.hermes/palace/sovereign.db
|
||||
|
||||
Capabilities:
|
||||
- Durable fact storage with rooms, categories, and trust scores
|
||||
- Hybrid retrieval: FTS5 keyword search + HRR cosine similarity
|
||||
- Reciprocal Rank Fusion to merge keyword and semantic results
|
||||
- Trust scoring: facts that get retrieved and confirmed gain trust
|
||||
- Graceful numpy degradation: falls back to keyword-only if missing
|
||||
|
||||
Refs: Epic #367, MP-3 #370, MP-4 #371
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import math
|
||||
import sqlite3
|
||||
import struct
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HRR (Holographic Reduced Representations) — zero-dependency vectors
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase-encoded vectors via SHA-256. No ONNX, no embeddings API, no numpy
|
||||
# required (but uses numpy when available for speed).
|
||||
|
||||
_TWO_PI = 2.0 * math.pi
|
||||
_DIM = 512 # Compact dimension — sufficient for memory retrieval
|
||||
|
||||
try:
|
||||
import numpy as np
|
||||
_HAS_NUMPY = True
|
||||
except ImportError:
|
||||
_HAS_NUMPY = False
|
||||
|
||||
|
||||
def _encode_atom_np(word: str, dim: int = _DIM) -> "np.ndarray":
|
||||
"""Deterministic phase vector via SHA-256 (numpy path)."""
|
||||
values_per_block = 16
|
||||
blocks_needed = math.ceil(dim / values_per_block)
|
||||
uint16_values: list[int] = []
|
||||
for i in range(blocks_needed):
|
||||
digest = hashlib.sha256(f"{word}:{i}".encode()).digest()
|
||||
uint16_values.extend(struct.unpack("<16H", digest))
|
||||
return np.array(uint16_values[:dim], dtype=np.float64) * (_TWO_PI / 65536.0)
|
||||
|
||||
|
||||
def _encode_atom_pure(word: str, dim: int = _DIM) -> list[float]:
|
||||
"""Deterministic phase vector via SHA-256 (pure Python fallback)."""
|
||||
values_per_block = 16
|
||||
blocks_needed = math.ceil(dim / values_per_block)
|
||||
uint16_values: list[int] = []
|
||||
for i in range(blocks_needed):
|
||||
digest = hashlib.sha256(f"{word}:{i}".encode()).digest()
|
||||
for j in range(0, 32, 2):
|
||||
uint16_values.append(int.from_bytes(digest[j:j+2], "little"))
|
||||
return [v * (_TWO_PI / 65536.0) for v in uint16_values[:dim]]
|
||||
|
||||
|
||||
def encode_text(text: str, dim: int = _DIM):
|
||||
"""Encode a text string into an HRR phase vector by bundling word atoms.
|
||||
|
||||
Uses circular mean of per-word phase vectors — the standard HRR
|
||||
superposition operation. Result is a fixed-width vector regardless
|
||||
of input length.
|
||||
"""
|
||||
words = text.lower().split()
|
||||
if not words:
|
||||
words = ["<empty>"]
|
||||
|
||||
if _HAS_NUMPY:
|
||||
atoms = [_encode_atom_np(w, dim) for w in words]
|
||||
# Circular mean: average the unit vectors, extract phase
|
||||
unit_sum = sum(np.exp(1j * a) for a in atoms)
|
||||
return np.angle(unit_sum) % _TWO_PI
|
||||
else:
|
||||
# Pure Python circular mean
|
||||
real_sum = [0.0] * dim
|
||||
imag_sum = [0.0] * dim
|
||||
for w in words:
|
||||
atom = _encode_atom_pure(w, dim)
|
||||
for d in range(dim):
|
||||
real_sum[d] += math.cos(atom[d])
|
||||
imag_sum[d] += math.sin(atom[d])
|
||||
return [math.atan2(imag_sum[d], real_sum[d]) % _TWO_PI for d in range(dim)]
|
||||
|
||||
|
||||
def cosine_similarity_phase(a, b) -> float:
|
||||
"""Cosine similarity between two phase vectors.
|
||||
|
||||
For phase vectors, similarity = mean(cos(a - b)).
|
||||
"""
|
||||
if _HAS_NUMPY:
|
||||
return float(np.mean(np.cos(np.array(a) - np.array(b))))
|
||||
else:
|
||||
n = len(a)
|
||||
return sum(math.cos(a[i] - b[i]) for i in range(n)) / n
|
||||
|
||||
|
||||
def serialize_vector(vec) -> bytes:
|
||||
"""Serialize a vector to bytes for SQLite storage."""
|
||||
if _HAS_NUMPY:
|
||||
return vec.astype(np.float64).tobytes()
|
||||
else:
|
||||
return struct.pack(f"{len(vec)}d", *vec)
|
||||
|
||||
|
||||
def deserialize_vector(blob: bytes):
|
||||
"""Deserialize bytes back to a vector."""
|
||||
n = len(blob) // 8 # float64 = 8 bytes
|
||||
if _HAS_NUMPY:
|
||||
return np.frombuffer(blob, dtype=np.float64)
|
||||
else:
|
||||
return list(struct.unpack(f"{n}d", blob))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQLite Schema
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS memories (
|
||||
memory_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
content TEXT NOT NULL,
|
||||
room TEXT DEFAULT 'general',
|
||||
category TEXT DEFAULT '',
|
||||
trust_score REAL DEFAULT 0.5,
|
||||
retrieval_count INTEGER DEFAULT 0,
|
||||
created_at REAL NOT NULL,
|
||||
updated_at REAL NOT NULL,
|
||||
hrr_vector BLOB
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_memories_room ON memories(room);
|
||||
CREATE INDEX IF NOT EXISTS idx_memories_trust ON memories(trust_score DESC);
|
||||
|
||||
-- FTS5 for fast keyword search
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
|
||||
content, room, category,
|
||||
content=memories, content_rowid=memory_id,
|
||||
tokenize='porter unicode61'
|
||||
);
|
||||
|
||||
-- Sync triggers
|
||||
CREATE TRIGGER IF NOT EXISTS memories_ai AFTER INSERT ON memories BEGIN
|
||||
INSERT INTO memories_fts(rowid, content, room, category)
|
||||
VALUES (new.memory_id, new.content, new.room, new.category);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS memories_ad AFTER DELETE ON memories BEGIN
|
||||
INSERT INTO memories_fts(memories_fts, rowid, content, room, category)
|
||||
VALUES ('delete', old.memory_id, old.content, old.room, old.category);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS memories_au AFTER UPDATE ON memories BEGIN
|
||||
INSERT INTO memories_fts(memories_fts, rowid, content, room, category)
|
||||
VALUES ('delete', old.memory_id, old.content, old.room, old.category);
|
||||
INSERT INTO memories_fts(rowid, content, room, category)
|
||||
VALUES (new.memory_id, new.content, new.room, new.category);
|
||||
END;
|
||||
|
||||
-- Promotion log: tracks what moved from scratchpad to durable memory
|
||||
CREATE TABLE IF NOT EXISTS promotion_log (
|
||||
log_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
scratch_key TEXT NOT NULL,
|
||||
memory_id INTEGER REFERENCES memories(memory_id),
|
||||
promoted_at REAL NOT NULL,
|
||||
reason TEXT DEFAULT ''
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SovereignStore
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SovereignStore:
|
||||
"""Zero-API durable memory store.
|
||||
|
||||
All operations are local SQLite. No network calls. No API keys.
|
||||
HRR vectors provide semantic similarity without embedding models.
|
||||
FTS5 provides fast keyword search. RRF merges both rankings.
|
||||
"""
|
||||
|
||||
def __init__(self, db_path: Optional[str] = None):
|
||||
if db_path is None:
|
||||
db_path = str(Path.home() / ".hermes" / "palace" / "sovereign.db")
|
||||
self._db_path = db_path
|
||||
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
self._conn = sqlite3.connect(db_path)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._conn.executescript(_SCHEMA)
|
||||
|
||||
def close(self):
|
||||
self._conn.close()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Store
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def store(
|
||||
self,
|
||||
content: str,
|
||||
room: str = "general",
|
||||
category: str = "",
|
||||
trust: float = 0.5,
|
||||
) -> int:
|
||||
"""Store a fact in durable memory. Returns the memory_id."""
|
||||
now = time.time()
|
||||
vec = encode_text(content)
|
||||
blob = serialize_vector(vec)
|
||||
cur = self._conn.execute(
|
||||
"""INSERT INTO memories (content, room, category, trust_score,
|
||||
created_at, updated_at, hrr_vector)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
||||
(content, room, category, trust, now, now, blob),
|
||||
)
|
||||
self._conn.commit()
|
||||
return cur.lastrowid
|
||||
|
||||
def store_batch(self, items: list[dict]) -> list[int]:
|
||||
"""Store multiple facts. Each item: {content, room?, category?, trust?}."""
|
||||
ids = []
|
||||
now = time.time()
|
||||
for item in items:
|
||||
content = item["content"]
|
||||
vec = encode_text(content)
|
||||
blob = serialize_vector(vec)
|
||||
cur = self._conn.execute(
|
||||
"""INSERT INTO memories (content, room, category, trust_score,
|
||||
created_at, updated_at, hrr_vector)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
content,
|
||||
item.get("room", "general"),
|
||||
item.get("category", ""),
|
||||
item.get("trust", 0.5),
|
||||
now, now, blob,
|
||||
),
|
||||
)
|
||||
ids.append(cur.lastrowid)
|
||||
self._conn.commit()
|
||||
return ids
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Search — hybrid FTS5 + HRR with Reciprocal Rank Fusion
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
room: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
min_trust: float = 0.0,
|
||||
fts_weight: float = 0.5,
|
||||
hrr_weight: float = 0.5,
|
||||
) -> list[dict]:
|
||||
"""Hybrid search: FTS5 keywords + HRR semantic similarity.
|
||||
|
||||
Uses Reciprocal Rank Fusion (RRF) to merge both rankings.
|
||||
Returns list of dicts with content, room, score, trust_score.
|
||||
"""
|
||||
k_rrf = 60 # Standard RRF constant
|
||||
|
||||
# Stage 1: FTS5 candidates
|
||||
fts_results = self._fts_search(query, room, min_trust, limit * 3)
|
||||
|
||||
# Stage 2: HRR candidates (scan top N by trust)
|
||||
hrr_results = self._hrr_search(query, room, min_trust, limit * 3)
|
||||
|
||||
# Stage 3: RRF fusion
|
||||
scores: dict[int, float] = {}
|
||||
meta: dict[int, dict] = {}
|
||||
|
||||
for rank, row in enumerate(fts_results):
|
||||
mid = row["memory_id"]
|
||||
scores[mid] = scores.get(mid, 0) + fts_weight / (k_rrf + rank + 1)
|
||||
meta[mid] = dict(row)
|
||||
|
||||
for rank, row in enumerate(hrr_results):
|
||||
mid = row["memory_id"]
|
||||
scores[mid] = scores.get(mid, 0) + hrr_weight / (k_rrf + rank + 1)
|
||||
if mid not in meta:
|
||||
meta[mid] = dict(row)
|
||||
|
||||
# Sort by fused score
|
||||
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:limit]
|
||||
|
||||
results = []
|
||||
for mid, score in ranked:
|
||||
m = meta[mid]
|
||||
# Bump retrieval count
|
||||
self._conn.execute(
|
||||
"UPDATE memories SET retrieval_count = retrieval_count + 1 WHERE memory_id = ?",
|
||||
(mid,),
|
||||
)
|
||||
results.append({
|
||||
"memory_id": mid,
|
||||
"content": m["content"],
|
||||
"room": m["room"],
|
||||
"category": m.get("category", ""),
|
||||
"trust_score": m["trust_score"],
|
||||
"score": round(score, 6),
|
||||
})
|
||||
|
||||
if results:
|
||||
self._conn.commit()
|
||||
return results
|
||||
|
||||
def _fts_search(
|
||||
self, query: str, room: Optional[str], min_trust: float, limit: int
|
||||
) -> list[dict]:
|
||||
"""FTS5 full-text search."""
|
||||
try:
|
||||
if room:
|
||||
rows = self._conn.execute(
|
||||
"""SELECT m.memory_id, m.content, m.room, m.category,
|
||||
m.trust_score, m.retrieval_count
|
||||
FROM memories_fts f
|
||||
JOIN memories m ON f.rowid = m.memory_id
|
||||
WHERE memories_fts MATCH ? AND m.room = ?
|
||||
AND m.trust_score >= ?
|
||||
ORDER BY rank LIMIT ?""",
|
||||
(query, room, min_trust, limit),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = self._conn.execute(
|
||||
"""SELECT m.memory_id, m.content, m.room, m.category,
|
||||
m.trust_score, m.retrieval_count
|
||||
FROM memories_fts f
|
||||
JOIN memories m ON f.rowid = m.memory_id
|
||||
WHERE memories_fts MATCH ?
|
||||
AND m.trust_score >= ?
|
||||
ORDER BY rank LIMIT ?""",
|
||||
(query, min_trust, limit),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
except sqlite3.OperationalError:
|
||||
# Bad FTS query syntax — degrade gracefully
|
||||
return []
|
||||
|
||||
def _hrr_search(
|
||||
self, query: str, room: Optional[str], min_trust: float, limit: int
|
||||
) -> list[dict]:
|
||||
"""HRR cosine similarity search (brute-force scan, fast for <100K facts)."""
|
||||
query_vec = encode_text(query)
|
||||
|
||||
if room:
|
||||
rows = self._conn.execute(
|
||||
"""SELECT memory_id, content, room, category, trust_score,
|
||||
retrieval_count, hrr_vector
|
||||
FROM memories
|
||||
WHERE room = ? AND trust_score >= ? AND hrr_vector IS NOT NULL""",
|
||||
(room, min_trust),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = self._conn.execute(
|
||||
"""SELECT memory_id, content, room, category, trust_score,
|
||||
retrieval_count, hrr_vector
|
||||
FROM memories
|
||||
WHERE trust_score >= ? AND hrr_vector IS NOT NULL""",
|
||||
(min_trust,),
|
||||
).fetchall()
|
||||
|
||||
scored = []
|
||||
for r in rows:
|
||||
stored_vec = deserialize_vector(r["hrr_vector"])
|
||||
sim = cosine_similarity_phase(query_vec, stored_vec)
|
||||
scored.append((sim, dict(r)))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
return [item[1] for item in scored[:limit]]
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Trust management
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def boost_trust(self, memory_id: int, delta: float = 0.05) -> None:
|
||||
"""Increase trust score when a memory proves useful."""
|
||||
self._conn.execute(
|
||||
"""UPDATE memories SET trust_score = MIN(1.0, trust_score + ?),
|
||||
updated_at = ? WHERE memory_id = ?""",
|
||||
(delta, time.time(), memory_id),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def decay_trust(self, memory_id: int, delta: float = 0.02) -> None:
|
||||
"""Decrease trust score when a memory is contradicted."""
|
||||
self._conn.execute(
|
||||
"""UPDATE memories SET trust_score = MAX(0.0, trust_score - ?),
|
||||
updated_at = ? WHERE memory_id = ?""",
|
||||
(delta, time.time(), memory_id),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Room operations
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def list_rooms(self) -> list[dict]:
|
||||
"""List all rooms with fact counts."""
|
||||
rows = self._conn.execute(
|
||||
"""SELECT room, COUNT(*) as count,
|
||||
AVG(trust_score) as avg_trust
|
||||
FROM memories GROUP BY room ORDER BY count DESC"""
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def room_contents(self, room: str, limit: int = 50) -> list[dict]:
|
||||
"""Get all facts in a room, ordered by trust."""
|
||||
rows = self._conn.execute(
|
||||
"""SELECT memory_id, content, category, trust_score,
|
||||
retrieval_count, created_at
|
||||
FROM memories WHERE room = ?
|
||||
ORDER BY trust_score DESC, created_at DESC LIMIT ?""",
|
||||
(room, limit),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Stats
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def stats(self) -> dict:
|
||||
"""Return store statistics."""
|
||||
row = self._conn.execute(
|
||||
"""SELECT COUNT(*) as total,
|
||||
AVG(trust_score) as avg_trust,
|
||||
SUM(retrieval_count) as total_retrievals,
|
||||
COUNT(DISTINCT room) as room_count
|
||||
FROM memories"""
|
||||
).fetchone()
|
||||
return dict(row)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Promotion support (scratchpad → durable)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def log_promotion(
|
||||
self,
|
||||
session_id: str,
|
||||
scratch_key: str,
|
||||
memory_id: int,
|
||||
reason: str = "",
|
||||
) -> None:
|
||||
"""Record a scratchpad-to-palace promotion in the audit log."""
|
||||
self._conn.execute(
|
||||
"""INSERT INTO promotion_log
|
||||
(session_id, scratch_key, memory_id, promoted_at, reason)
|
||||
VALUES (?, ?, ?, ?, ?)""",
|
||||
(session_id, scratch_key, memory_id, time.time(), reason),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def recent_promotions(self, limit: int = 20) -> list[dict]:
|
||||
"""Get recent promotion log entries."""
|
||||
rows = self._conn.execute(
|
||||
"""SELECT p.*, m.content, m.room
|
||||
FROM promotion_log p
|
||||
LEFT JOIN memories m ON p.memory_id = m.memory_id
|
||||
ORDER BY p.promoted_at DESC LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
0
hermes-sovereign/mempalace/tests/__init__.py
Normal file
180
hermes-sovereign/mempalace/tests/test_mempalace.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Tests for the mempalace skill.
|
||||
|
||||
Validates PalaceRoom, Mempalace class, factory constructors,
|
||||
and the analyse_issues entry-point.
|
||||
|
||||
Refs: Epic #367, Sub-issue #368
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
# Ensure the package is importable from the repo layout
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
|
||||
from mempalace.mempalace import Mempalace, PalaceRoom, analyse_issues
|
||||
|
||||
|
||||
# ── PalaceRoom unit tests ─────────────────────────────────────────────────
|
||||
|
||||
class TestPalaceRoom:
|
||||
def test_store_and_retrieve(self):
|
||||
room = PalaceRoom(name="test", label="Test Room")
|
||||
room.store("key1", 42)
|
||||
assert room.retrieve("key1") == 42
|
||||
|
||||
def test_retrieve_default(self):
|
||||
room = PalaceRoom(name="test", label="Test Room")
|
||||
assert room.retrieve("missing") is None
|
||||
assert room.retrieve("missing", "fallback") == "fallback"
|
||||
|
||||
def test_summary_format(self):
|
||||
room = PalaceRoom(name="test", label="Test Room")
|
||||
room.store("repos", 5)
|
||||
summary = room.summary()
|
||||
assert "## Test Room" in summary
|
||||
assert "repos: 5" in summary
|
||||
|
||||
def test_contents_default_factory_isolation(self):
|
||||
"""Each room gets its own dict — no shared mutable default."""
|
||||
r1 = PalaceRoom(name="a", label="A")
|
||||
r2 = PalaceRoom(name="b", label="B")
|
||||
r1.store("x", 1)
|
||||
assert r2.retrieve("x") is None
|
||||
|
||||
def test_entered_at_is_recent(self):
|
||||
before = time.time()
|
||||
room = PalaceRoom(name="t", label="T")
|
||||
after = time.time()
|
||||
assert before <= room.entered_at <= after
|
||||
|
||||
|
||||
# ── Mempalace core tests ──────────────────────────────────────────────────
|
||||
|
||||
class TestMempalace:
|
||||
def test_add_and_enter_room(self):
|
||||
p = Mempalace(domain="test")
|
||||
p.add_room("r1", "Room 1")
|
||||
room = p.enter("r1")
|
||||
assert room.name == "r1"
|
||||
|
||||
def test_enter_nonexistent_room_raises(self):
|
||||
p = Mempalace()
|
||||
with pytest.raises(KeyError, match="No room"):
|
||||
p.enter("ghost")
|
||||
|
||||
def test_store_without_enter_raises(self):
|
||||
p = Mempalace()
|
||||
p.add_room("r", "R")
|
||||
with pytest.raises(RuntimeError, match="Enter a room"):
|
||||
p.store("k", "v")
|
||||
|
||||
def test_store_and_retrieve_via_palace(self):
|
||||
p = Mempalace()
|
||||
p.add_room("r", "R")
|
||||
p.enter("r")
|
||||
p.store("count", 10)
|
||||
assert p.retrieve("r", "count") == 10
|
||||
|
||||
def test_retrieve_missing_room_returns_default(self):
|
||||
p = Mempalace()
|
||||
assert p.retrieve("nope", "key") is None
|
||||
assert p.retrieve("nope", "key", 99) == 99
|
||||
|
||||
def test_render_includes_domain(self):
|
||||
p = Mempalace(domain="audit")
|
||||
p.add_room("r", "Room")
|
||||
p.enter("r")
|
||||
p.store("item", "value")
|
||||
output = p.render()
|
||||
assert "audit" in output
|
||||
assert "Room" in output
|
||||
|
||||
def test_to_dict_structure(self):
|
||||
p = Mempalace(domain="test")
|
||||
p.add_room("r", "R")
|
||||
p.enter("r")
|
||||
p.store("a", 1)
|
||||
d = p.to_dict()
|
||||
assert d["domain"] == "test"
|
||||
assert "elapsed_seconds" in d
|
||||
assert d["rooms"]["r"] == {"a": 1}
|
||||
|
||||
def test_to_json_is_valid(self):
|
||||
p = Mempalace(domain="j")
|
||||
p.add_room("x", "X")
|
||||
p.enter("x")
|
||||
p.store("v", [1, 2, 3])
|
||||
parsed = json.loads(p.to_json())
|
||||
assert parsed["rooms"]["x"]["v"] == [1, 2, 3]
|
||||
|
||||
|
||||
# ── Factory constructor tests ─────────────────────────────────────────────
|
||||
|
||||
class TestFactories:
|
||||
def test_for_issue_analysis_rooms(self):
|
||||
p = Mempalace.for_issue_analysis()
|
||||
assert p.domain == "issue_analysis"
|
||||
for key in ("repo_architecture", "assignment_status",
|
||||
"triage_priority", "resolution_patterns"):
|
||||
p.enter(key) # should not raise
|
||||
|
||||
def test_for_health_check_rooms(self):
|
||||
p = Mempalace.for_health_check()
|
||||
assert p.domain == "health_check"
|
||||
for key in ("service_topology", "failure_signals", "recovery_history"):
|
||||
p.enter(key)
|
||||
|
||||
def test_for_code_review_rooms(self):
|
||||
p = Mempalace.for_code_review()
|
||||
assert p.domain == "code_review"
|
||||
for key in ("change_scope", "risk_surface",
|
||||
"test_coverage", "reviewer_context"):
|
||||
p.enter(key)
|
||||
|
||||
|
||||
# ── analyse_issues entry-point tests ──────────────────────────────────────
|
||||
|
||||
class TestAnalyseIssues:
|
||||
SAMPLE_DATA = [
|
||||
{"repo": "the-nexus", "open_issues": 40, "assigned": 30, "unassigned": 10},
|
||||
{"repo": "timmy-home", "open_issues": 30, "assigned": 25, "unassigned": 5},
|
||||
{"repo": "hermes-agent", "open_issues": 20, "assigned": 15, "unassigned": 5},
|
||||
{"repo": "empty-repo", "open_issues": 0, "assigned": 0, "unassigned": 0},
|
||||
]
|
||||
|
||||
def test_returns_string(self):
|
||||
result = analyse_issues(self.SAMPLE_DATA)
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
def test_contains_room_headers(self):
|
||||
result = analyse_issues(self.SAMPLE_DATA)
|
||||
assert "Repository Architecture" in result
|
||||
assert "Assignment Status" in result
|
||||
|
||||
def test_coverage_below_target(self):
|
||||
result = analyse_issues(self.SAMPLE_DATA, target_assignee_rate=0.90)
|
||||
assert "BELOW TARGET" in result
|
||||
|
||||
def test_coverage_meets_target(self):
|
||||
good_data = [
|
||||
{"repo": "a", "open_issues": 10, "assigned": 10, "unassigned": 0},
|
||||
]
|
||||
result = analyse_issues(good_data, target_assignee_rate=0.80)
|
||||
assert "OK" in result
|
||||
|
||||
def test_empty_repos_list(self):
|
||||
result = analyse_issues([])
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_single_repo(self):
|
||||
data = [{"repo": "solo", "open_issues": 5, "assigned": 3, "unassigned": 2}]
|
||||
result = analyse_issues(data)
|
||||
assert "solo" in result or "issue_analysis" in result
|
||||
143
hermes-sovereign/mempalace/tests/test_retrieval_enforcer.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""Tests for retrieval_enforcer.py.
|
||||
|
||||
Refs: Epic #367, Sub-issue #369
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
|
||||
from mempalace.retrieval_enforcer import (
|
||||
is_recall_query,
|
||||
load_identity,
|
||||
load_scratchpad,
|
||||
enforce_retrieval_order,
|
||||
search_skills,
|
||||
RECALL_PATTERNS,
|
||||
)
|
||||
|
||||
|
||||
class TestRecallDetection:
|
||||
"""Test the recall-query pattern matcher."""
|
||||
|
||||
@pytest.mark.parametrize("query", [
|
||||
"what did we work on yesterday",
|
||||
"status of the mempalace integration",
|
||||
"remember the fleet audit results",
|
||||
"last time we deployed the nexus",
|
||||
"previously you mentioned a CI fix",
|
||||
"we discussed the sovereign deployment",
|
||||
])
|
||||
def test_recall_queries_detected(self, query):
|
||||
assert is_recall_query(query) is True
|
||||
|
||||
@pytest.mark.parametrize("query", [
|
||||
"create a new file called test.py",
|
||||
"run the test suite",
|
||||
"deploy to production",
|
||||
"write a function that sums numbers",
|
||||
"install the package",
|
||||
])
|
||||
def test_non_recall_queries_skipped(self, query):
|
||||
assert is_recall_query(query) is False
|
||||
|
||||
|
||||
class TestLoadIdentity:
|
||||
def test_loads_existing_identity(self, tmp_path):
|
||||
identity_file = tmp_path / "identity.txt"
|
||||
identity_file.write_text("I am Timmy. A sovereign AI.")
|
||||
with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file):
|
||||
result = load_identity()
|
||||
assert "Timmy" in result
|
||||
|
||||
def test_returns_empty_on_missing_file(self, tmp_path):
|
||||
identity_file = tmp_path / "nonexistent.txt"
|
||||
with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file):
|
||||
result = load_identity()
|
||||
assert result == ""
|
||||
|
||||
def test_truncates_long_identity(self, tmp_path):
|
||||
identity_file = tmp_path / "identity.txt"
|
||||
identity_file.write_text(" ".join(["word"] * 300))
|
||||
with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file):
|
||||
result = load_identity()
|
||||
assert result.endswith("...")
|
||||
assert len(result.split()) <= 201 # 200 words + "..."
|
||||
|
||||
|
||||
class TestLoadScratchpad:
|
||||
def test_loads_valid_scratchpad(self, tmp_path):
|
||||
scratch_file = tmp_path / "session123.json"
|
||||
scratch_file.write_text(json.dumps({"note": "test value", "key2": 42}))
|
||||
with patch("mempalace.retrieval_enforcer.SCRATCHPAD_DIR", tmp_path):
|
||||
result = load_scratchpad("session123")
|
||||
assert "note: test value" in result
|
||||
assert "key2: 42" in result
|
||||
|
||||
def test_returns_empty_on_missing_file(self, tmp_path):
|
||||
with patch("mempalace.retrieval_enforcer.SCRATCHPAD_DIR", tmp_path):
|
||||
result = load_scratchpad("nonexistent")
|
||||
assert result == ""
|
||||
|
||||
def test_returns_empty_on_invalid_json(self, tmp_path):
|
||||
scratch_file = tmp_path / "bad.json"
|
||||
scratch_file.write_text("not valid json{{{")
|
||||
with patch("mempalace.retrieval_enforcer.SCRATCHPAD_DIR", tmp_path):
|
||||
result = load_scratchpad("bad")
|
||||
assert result == ""
|
||||
|
||||
|
||||
class TestEnforceRetrievalOrder:
|
||||
def test_skips_non_recall_query(self):
|
||||
result = enforce_retrieval_order("create a new file")
|
||||
assert result["retrieved_from"] is None
|
||||
assert result["tokens"] == 0
|
||||
|
||||
def test_runs_for_recall_query(self, tmp_path):
|
||||
identity_file = tmp_path / "identity.txt"
|
||||
identity_file.write_text("I am Timmy.")
|
||||
with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file), \
|
||||
patch("mempalace.retrieval_enforcer.search_palace", return_value=""), \
|
||||
patch("mempalace.retrieval_enforcer.search_gitea", return_value=""), \
|
||||
patch("mempalace.retrieval_enforcer.search_skills", return_value=""):
|
||||
result = enforce_retrieval_order("what did we work on yesterday")
|
||||
assert "Identity" in result["context"]
|
||||
assert "L0" in result["layers_checked"]
|
||||
|
||||
def test_palace_hit_sets_l1(self, tmp_path):
|
||||
identity_file = tmp_path / "identity.txt"
|
||||
identity_file.write_text("I am Timmy.")
|
||||
with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file), \
|
||||
patch("mempalace.retrieval_enforcer.search_palace", return_value="Found: fleet audit results"), \
|
||||
patch("mempalace.retrieval_enforcer.search_gitea", return_value=""):
|
||||
result = enforce_retrieval_order("what did we discuss yesterday")
|
||||
assert result["retrieved_from"] == "L1"
|
||||
assert "Palace Memory" in result["context"]
|
||||
|
||||
def test_falls_through_to_l5(self, tmp_path):
|
||||
identity_file = tmp_path / "nonexistent.txt"
|
||||
with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file), \
|
||||
patch("mempalace.retrieval_enforcer.search_palace", return_value=""), \
|
||||
patch("mempalace.retrieval_enforcer.search_gitea", return_value=""), \
|
||||
patch("mempalace.retrieval_enforcer.search_skills", return_value=""):
|
||||
result = enforce_retrieval_order("remember the old deployment", skip_if_not_recall=True)
|
||||
assert result["retrieved_from"] == "L5"
|
||||
|
||||
def test_force_mode_skips_recall_check(self, tmp_path):
|
||||
identity_file = tmp_path / "identity.txt"
|
||||
identity_file.write_text("I am Timmy.")
|
||||
with patch("mempalace.retrieval_enforcer.IDENTITY_PATH", identity_file), \
|
||||
patch("mempalace.retrieval_enforcer.search_palace", return_value=""), \
|
||||
patch("mempalace.retrieval_enforcer.search_gitea", return_value=""), \
|
||||
patch("mempalace.retrieval_enforcer.search_skills", return_value=""):
|
||||
result = enforce_retrieval_order("deploy now", skip_if_not_recall=False)
|
||||
assert "Identity" in result["context"]
|
||||
108
hermes-sovereign/mempalace/tests/test_scratchpad.py
Normal file
@@ -0,0 +1,108 @@
|
||||
"""Tests for scratchpad.py.
|
||||
|
||||
Refs: Epic #367, Sub-issue #372
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
|
||||
from mempalace.scratchpad import (
|
||||
write_scratch,
|
||||
read_scratch,
|
||||
delete_scratch,
|
||||
list_sessions,
|
||||
clear_session,
|
||||
_scratch_path,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def scratch_dir(tmp_path):
|
||||
"""Provide a temporary scratchpad directory."""
|
||||
with patch("mempalace.scratchpad.SCRATCHPAD_DIR", tmp_path):
|
||||
yield tmp_path
|
||||
|
||||
|
||||
class TestScratchPath:
|
||||
def test_sanitizes_session_id(self):
|
||||
path = _scratch_path("safe-id_123")
|
||||
assert "safe-id_123.json" in str(path)
|
||||
|
||||
def test_strips_dangerous_chars(self):
|
||||
path = _scratch_path("../../etc/passwd")
|
||||
assert ".." not in path.name
|
||||
assert "/" not in path.name
|
||||
# Dots are stripped, so only alphanumeric chars remain
|
||||
assert path.name == "etcpasswd.json"
|
||||
|
||||
|
||||
class TestWriteAndRead:
|
||||
def test_write_then_read(self, scratch_dir):
|
||||
write_scratch("sess1", "note", "hello world")
|
||||
result = read_scratch("sess1", "note")
|
||||
assert "note" in result
|
||||
assert result["note"]["value"] == "hello world"
|
||||
|
||||
def test_read_all_keys(self, scratch_dir):
|
||||
write_scratch("sess1", "a", 1)
|
||||
write_scratch("sess1", "b", 2)
|
||||
result = read_scratch("sess1")
|
||||
assert "a" in result
|
||||
assert "b" in result
|
||||
|
||||
def test_read_missing_key(self, scratch_dir):
|
||||
write_scratch("sess1", "exists", "yes")
|
||||
result = read_scratch("sess1", "missing")
|
||||
assert result == {}
|
||||
|
||||
def test_read_missing_session(self, scratch_dir):
|
||||
result = read_scratch("nonexistent")
|
||||
assert result == {}
|
||||
|
||||
def test_overwrite_key(self, scratch_dir):
|
||||
write_scratch("sess1", "key", "v1")
|
||||
write_scratch("sess1", "key", "v2")
|
||||
result = read_scratch("sess1", "key")
|
||||
assert result["key"]["value"] == "v2"
|
||||
|
||||
|
||||
class TestDelete:
|
||||
def test_delete_existing_key(self, scratch_dir):
|
||||
write_scratch("sess1", "key", "val")
|
||||
assert delete_scratch("sess1", "key") is True
|
||||
assert read_scratch("sess1", "key") == {}
|
||||
|
||||
def test_delete_missing_key(self, scratch_dir):
|
||||
write_scratch("sess1", "other", "val")
|
||||
assert delete_scratch("sess1", "missing") is False
|
||||
|
||||
|
||||
class TestListSessions:
|
||||
def test_lists_sessions(self, scratch_dir):
|
||||
write_scratch("alpha", "k", "v")
|
||||
write_scratch("beta", "k", "v")
|
||||
sessions = list_sessions()
|
||||
assert "alpha" in sessions
|
||||
assert "beta" in sessions
|
||||
|
||||
def test_empty_directory(self, scratch_dir):
|
||||
assert list_sessions() == []
|
||||
|
||||
|
||||
class TestClearSession:
|
||||
def test_clears_existing(self, scratch_dir):
|
||||
write_scratch("sess1", "k", "v")
|
||||
assert clear_session("sess1") is True
|
||||
assert read_scratch("sess1") == {}
|
||||
|
||||
def test_clear_nonexistent(self, scratch_dir):
|
||||
assert clear_session("ghost") is False
|
||||
255
hermes-sovereign/mempalace/tests/test_sovereign_store.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""Tests for the Sovereign Memory Store and Promotion system.
|
||||
|
||||
Zero-API, zero-network — everything runs against an in-memory SQLite DB.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import unittest
|
||||
|
||||
# Allow imports from parent package
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from sovereign_store import (
|
||||
SovereignStore,
|
||||
encode_text,
|
||||
cosine_similarity_phase,
|
||||
serialize_vector,
|
||||
deserialize_vector,
|
||||
)
|
||||
from promotion import (
|
||||
evaluate_for_promotion,
|
||||
promote,
|
||||
promote_session_batch,
|
||||
)
|
||||
|
||||
|
||||
class TestHRRVectors(unittest.TestCase):
|
||||
"""Test the HRR encoding and similarity functions."""
|
||||
|
||||
def test_deterministic_encoding(self):
|
||||
"""Same text always produces the same vector."""
|
||||
v1 = encode_text("hello world")
|
||||
v2 = encode_text("hello world")
|
||||
self.assertAlmostEqual(cosine_similarity_phase(v1, v2), 1.0, places=5)
|
||||
|
||||
def test_similar_texts_higher_similarity(self):
|
||||
"""Related texts should be more similar than unrelated ones."""
|
||||
v_agent = encode_text("agent memory palace retrieval")
|
||||
v_similar = encode_text("agent recall memory search")
|
||||
v_unrelated = encode_text("banana strawberry fruit smoothie")
|
||||
sim_related = cosine_similarity_phase(v_agent, v_similar)
|
||||
sim_unrelated = cosine_similarity_phase(v_agent, v_unrelated)
|
||||
self.assertGreater(sim_related, sim_unrelated)
|
||||
|
||||
def test_serialize_roundtrip(self):
|
||||
"""Vectors survive serialization to/from bytes."""
|
||||
vec = encode_text("test serialization")
|
||||
blob = serialize_vector(vec)
|
||||
restored = deserialize_vector(blob)
|
||||
sim = cosine_similarity_phase(vec, restored)
|
||||
self.assertAlmostEqual(sim, 1.0, places=5)
|
||||
|
||||
def test_empty_text(self):
|
||||
"""Empty text gets a fallback encoding."""
|
||||
vec = encode_text("")
|
||||
self.assertEqual(len(vec) if hasattr(vec, '__len__') else len(list(vec)), 512)
|
||||
|
||||
|
||||
class TestSovereignStore(unittest.TestCase):
|
||||
"""Test the SQLite-backed sovereign store."""
|
||||
|
||||
def setUp(self):
|
||||
self.db_path = os.path.join(tempfile.mkdtemp(), "test.db")
|
||||
self.store = SovereignStore(db_path=self.db_path)
|
||||
|
||||
def tearDown(self):
|
||||
self.store.close()
|
||||
if os.path.exists(self.db_path):
|
||||
os.remove(self.db_path)
|
||||
|
||||
def test_store_and_retrieve(self):
|
||||
"""Store a fact and find it via search."""
|
||||
mid = self.store.store("Timmy is a sovereign AI agent on Hermes VPS", room="identity")
|
||||
results = self.store.search("sovereign agent", room="identity")
|
||||
self.assertTrue(any(r["memory_id"] == mid for r in results))
|
||||
|
||||
def test_fts_search(self):
|
||||
"""FTS5 keyword search works."""
|
||||
self.store.store("The beacon game uses paperclips mechanics", room="projects")
|
||||
self.store.store("Fleet agents handle delegation and dispatch", room="fleet")
|
||||
results = self.store.search("paperclips")
|
||||
self.assertTrue(len(results) > 0)
|
||||
self.assertIn("paperclips", results[0]["content"].lower())
|
||||
|
||||
def test_hrr_search_semantic(self):
|
||||
"""HRR similarity finds related content even without exact keywords."""
|
||||
self.store.store("Memory palace rooms organize facts spatially", room="memory")
|
||||
self.store.store("Pizza delivery service runs on weekends", room="unrelated")
|
||||
results = self.store.search("organize knowledge rooms", room="memory")
|
||||
self.assertTrue(len(results) > 0)
|
||||
self.assertIn("palace", results[0]["content"].lower())
|
||||
|
||||
def test_room_filtering(self):
|
||||
"""Room filter restricts search scope."""
|
||||
self.store.store("Hermes harness manages tool calls", room="infrastructure")
|
||||
self.store.store("Hermes mythology Greek god", room="lore")
|
||||
results = self.store.search("Hermes", room="infrastructure")
|
||||
self.assertTrue(all(r["room"] == "infrastructure" for r in results))
|
||||
|
||||
def test_trust_boost(self):
|
||||
"""Trust score increases when boosted."""
|
||||
mid = self.store.store("fact", trust=0.5)
|
||||
self.store.boost_trust(mid, delta=0.1)
|
||||
results = self.store.room_contents("general")
|
||||
fact = next(r for r in results if r["memory_id"] == mid)
|
||||
self.assertAlmostEqual(fact["trust_score"], 0.6, places=2)
|
||||
|
||||
def test_trust_decay(self):
|
||||
"""Trust score decreases when decayed."""
|
||||
mid = self.store.store("questionable fact", trust=0.5)
|
||||
self.store.decay_trust(mid, delta=0.2)
|
||||
results = self.store.room_contents("general")
|
||||
fact = next(r for r in results if r["memory_id"] == mid)
|
||||
self.assertAlmostEqual(fact["trust_score"], 0.3, places=2)
|
||||
|
||||
def test_batch_store(self):
|
||||
"""Batch store works."""
|
||||
ids = self.store.store_batch([
|
||||
{"content": "fact one", "room": "test"},
|
||||
{"content": "fact two", "room": "test"},
|
||||
{"content": "fact three", "room": "test"},
|
||||
])
|
||||
self.assertEqual(len(ids), 3)
|
||||
rooms = self.store.list_rooms()
|
||||
test_room = next(r for r in rooms if r["room"] == "test")
|
||||
self.assertEqual(test_room["count"], 3)
|
||||
|
||||
def test_stats(self):
|
||||
"""Stats returns correct counts."""
|
||||
self.store.store("a fact", room="r1")
|
||||
self.store.store("another fact", room="r2")
|
||||
s = self.store.stats()
|
||||
self.assertEqual(s["total"], 2)
|
||||
self.assertEqual(s["room_count"], 2)
|
||||
|
||||
def test_retrieval_count_increments(self):
|
||||
"""Retrieval count goes up when a fact is found via search."""
|
||||
self.store.store("unique searchable content xyz123", room="test")
|
||||
self.store.search("xyz123")
|
||||
results = self.store.room_contents("test")
|
||||
self.assertTrue(any(r["retrieval_count"] > 0 for r in results))
|
||||
|
||||
|
||||
class TestPromotion(unittest.TestCase):
|
||||
"""Test the quality-gated promotion system."""
|
||||
|
||||
def setUp(self):
|
||||
self.db_path = os.path.join(tempfile.mkdtemp(), "promo_test.db")
|
||||
self.store = SovereignStore(db_path=self.db_path)
|
||||
|
||||
def tearDown(self):
|
||||
self.store.close()
|
||||
|
||||
def test_successful_promotion(self):
|
||||
"""Good content passes all gates."""
|
||||
result = promote(
|
||||
content="Timmy runs on the Hermes VPS at 143.198.27.163 with local Ollama inference",
|
||||
store=self.store,
|
||||
session_id="test-session-001",
|
||||
scratch_key="vps_info",
|
||||
room="infrastructure",
|
||||
)
|
||||
self.assertTrue(result.success)
|
||||
self.assertIsNotNone(result.memory_id)
|
||||
|
||||
def test_reject_too_short(self):
|
||||
"""Short fragments get rejected."""
|
||||
result = promote(
|
||||
content="yes",
|
||||
store=self.store,
|
||||
session_id="test",
|
||||
scratch_key="short",
|
||||
)
|
||||
self.assertFalse(result.success)
|
||||
self.assertIn("Too short", result.reason)
|
||||
|
||||
def test_reject_duplicate(self):
|
||||
"""Duplicate content gets rejected."""
|
||||
self.store.store("SOUL.md is the canonical identity document for Timmy", room="identity")
|
||||
result = promote(
|
||||
content="SOUL.md is the canonical identity document for Timmy",
|
||||
store=self.store,
|
||||
session_id="test",
|
||||
scratch_key="soul",
|
||||
room="identity",
|
||||
)
|
||||
self.assertFalse(result.success)
|
||||
self.assertIn("uplicate", result.reason)
|
||||
|
||||
def test_reject_stale(self):
|
||||
"""Old notes get flagged as stale."""
|
||||
old_time = time.time() - (86400 * 10)
|
||||
result = promote(
|
||||
content="This is a note from long ago about something important",
|
||||
store=self.store,
|
||||
session_id="test",
|
||||
scratch_key="old",
|
||||
written_at=old_time,
|
||||
)
|
||||
self.assertFalse(result.success)
|
||||
self.assertIn("Stale", result.reason)
|
||||
|
||||
def test_force_bypasses_gates(self):
|
||||
"""Force flag overrides quality gates."""
|
||||
result = promote(
|
||||
content="ok",
|
||||
store=self.store,
|
||||
session_id="test",
|
||||
scratch_key="forced",
|
||||
force=True,
|
||||
)
|
||||
self.assertTrue(result.success)
|
||||
|
||||
def test_evaluate_dry_run(self):
|
||||
"""Evaluate returns gate details without promoting."""
|
||||
eval_result = evaluate_for_promotion(
|
||||
content="The fleet uses kimi-k2.5 as the primary model for all agent operations",
|
||||
store=self.store,
|
||||
room="fleet",
|
||||
)
|
||||
self.assertTrue(eval_result["eligible"])
|
||||
self.assertTrue(all(p for p, _ in eval_result["gates"].values()))
|
||||
|
||||
def test_batch_promotion(self):
|
||||
"""Batch promotion processes all notes."""
|
||||
notes = {
|
||||
"infra": {"value": "Hermes VPS runs Ubuntu 22.04 with 2 vCPUs and 4GB RAM", "written_at": time.strftime("%Y-%m-%d %H:%M:%S")},
|
||||
"short": {"value": "no", "written_at": time.strftime("%Y-%m-%d %H:%M:%S")},
|
||||
"model": {"value": "The primary local model is gemma4:latest running on Ollama", "written_at": time.strftime("%Y-%m-%d %H:%M:%S")},
|
||||
}
|
||||
results = promote_session_batch(self.store, "batch-session", notes, room="config")
|
||||
promoted = [r for r in results if r.success]
|
||||
rejected = [r for r in results if not r.success]
|
||||
self.assertEqual(len(promoted), 2)
|
||||
self.assertEqual(len(rejected), 1)
|
||||
|
||||
def test_promotion_logged(self):
|
||||
"""Successful promotions appear in the audit log."""
|
||||
promote(
|
||||
content="Forge is hosted at forge.alexanderwhitestone.com running Gitea",
|
||||
store=self.store,
|
||||
session_id="log-test",
|
||||
scratch_key="forge",
|
||||
room="infrastructure",
|
||||
)
|
||||
log = self.store.recent_promotions()
|
||||
self.assertTrue(len(log) > 0)
|
||||
self.assertEqual(log[0]["session_id"], "log-test")
|
||||
self.assertEqual(log[0]["scratch_key"], "forge")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
100
hermes-sovereign/mempalace/tests/test_wakeup.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""Tests for wakeup.py.
|
||||
|
||||
Refs: Epic #367, Sub-issue #372
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
|
||||
from mempalace.wakeup import (
|
||||
palace_wakeup,
|
||||
fleet_status_summary,
|
||||
_load_identity,
|
||||
_palace_context,
|
||||
)
|
||||
|
||||
|
||||
class TestLoadIdentity:
|
||||
def test_loads_identity(self, tmp_path):
|
||||
f = tmp_path / "identity.txt"
|
||||
f.write_text("I am Timmy. A sovereign AI.")
|
||||
with patch("mempalace.wakeup.IDENTITY_PATH", f):
|
||||
result = _load_identity()
|
||||
assert "Timmy" in result
|
||||
|
||||
def test_missing_identity(self, tmp_path):
|
||||
f = tmp_path / "nope.txt"
|
||||
with patch("mempalace.wakeup.IDENTITY_PATH", f):
|
||||
assert _load_identity() == ""
|
||||
|
||||
|
||||
class TestFleetStatus:
|
||||
def test_reads_fleet_json(self, tmp_path):
|
||||
f = tmp_path / "fleet_status.json"
|
||||
f.write_text(json.dumps({
|
||||
"Groq": {"state": "active", "last_seen": "2026-04-07"},
|
||||
"Ezra": {"state": "idle", "last_seen": "2026-04-06"},
|
||||
}))
|
||||
with patch("mempalace.wakeup.FLEET_STATUS_PATH", f):
|
||||
result = fleet_status_summary()
|
||||
assert "Fleet Status" in result
|
||||
assert "Groq" in result
|
||||
assert "active" in result
|
||||
|
||||
def test_missing_fleet_file(self, tmp_path):
|
||||
f = tmp_path / "nope.json"
|
||||
with patch("mempalace.wakeup.FLEET_STATUS_PATH", f):
|
||||
assert fleet_status_summary() == ""
|
||||
|
||||
def test_invalid_json(self, tmp_path):
|
||||
f = tmp_path / "bad.json"
|
||||
f.write_text("not json")
|
||||
with patch("mempalace.wakeup.FLEET_STATUS_PATH", f):
|
||||
assert fleet_status_summary() == ""
|
||||
|
||||
|
||||
class TestPalaceWakeup:
|
||||
def test_generates_context_with_identity(self, tmp_path):
|
||||
identity = tmp_path / "identity.txt"
|
||||
identity.write_text("I am Timmy.")
|
||||
cache = tmp_path / "cache.txt"
|
||||
with patch("mempalace.wakeup.IDENTITY_PATH", identity), \
|
||||
patch("mempalace.wakeup.WAKEUP_CACHE_PATH", cache), \
|
||||
patch("mempalace.wakeup._palace_context", return_value=""), \
|
||||
patch("mempalace.wakeup.fleet_status_summary", return_value=""):
|
||||
result = palace_wakeup(force=True)
|
||||
assert "Identity" in result
|
||||
assert "Timmy" in result
|
||||
assert "Session" in result
|
||||
|
||||
def test_uses_cache_when_fresh(self, tmp_path):
|
||||
cache = tmp_path / "cache.txt"
|
||||
cache.write_text("cached wake-up content")
|
||||
# Touch the file so it's fresh
|
||||
with patch("mempalace.wakeup.WAKEUP_CACHE_PATH", cache), \
|
||||
patch("mempalace.wakeup.WAKEUP_CACHE_TTL", 9999):
|
||||
result = palace_wakeup(force=False)
|
||||
assert result == "cached wake-up content"
|
||||
|
||||
def test_force_bypasses_cache(self, tmp_path):
|
||||
cache = tmp_path / "cache.txt"
|
||||
cache.write_text("stale content")
|
||||
identity = tmp_path / "identity.txt"
|
||||
identity.write_text("I am Timmy.")
|
||||
with patch("mempalace.wakeup.WAKEUP_CACHE_PATH", cache), \
|
||||
patch("mempalace.wakeup.IDENTITY_PATH", identity), \
|
||||
patch("mempalace.wakeup._palace_context", return_value=""), \
|
||||
patch("mempalace.wakeup.fleet_status_summary", return_value=""):
|
||||
result = palace_wakeup(force=True)
|
||||
assert "Identity" in result
|
||||
assert "stale content" not in result
|
||||
161
hermes-sovereign/mempalace/wakeup.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""Wake-up Protocol — session start context injection.
|
||||
|
||||
Generates 300-900 tokens of context when a new Hermes session starts.
|
||||
Loads identity, recent palace context, and fleet status.
|
||||
|
||||
Refs: Epic #367, Sub-issue #372
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
IDENTITY_PATH = Path.home() / ".mempalace" / "identity.txt"
|
||||
MEMPALACE_BIN = "/Library/Frameworks/Python.framework/Versions/3.12/bin/mempalace"
|
||||
FLEET_STATUS_PATH = Path.home() / ".hermes" / "fleet_status.json"
|
||||
WAKEUP_CACHE_PATH = Path.home() / ".hermes" / "last_wakeup.txt"
|
||||
WAKEUP_CACHE_TTL = 300 # 5 minutes — don't regenerate if recent
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _load_identity() -> str:
|
||||
"""Read the agent identity file."""
|
||||
try:
|
||||
if IDENTITY_PATH.exists():
|
||||
text = IDENTITY_PATH.read_text(encoding="utf-8").strip()
|
||||
# Cap at ~150 tokens for wake-up brevity
|
||||
words = text.split()
|
||||
if len(words) > 150:
|
||||
text = " ".join(words[:150]) + "..."
|
||||
return text
|
||||
except (OSError, PermissionError):
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _palace_context() -> str:
|
||||
"""Run mempalace wake-up command for recent context. Degrades gracefully."""
|
||||
try:
|
||||
bin_path = MEMPALACE_BIN if os.path.exists(MEMPALACE_BIN) else "mempalace"
|
||||
result = subprocess.run(
|
||||
[bin_path, "wake-up"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
return result.stdout.strip()
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
||||
# ONNX issues (#373) or CLI not available — degrade gracefully
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def fleet_status_summary() -> str:
|
||||
"""Read cached fleet status for lightweight session context."""
|
||||
try:
|
||||
if FLEET_STATUS_PATH.exists():
|
||||
data = json.loads(FLEET_STATUS_PATH.read_text(encoding="utf-8"))
|
||||
lines = ["## Fleet Status"]
|
||||
|
||||
if isinstance(data, dict):
|
||||
for agent, status in data.items():
|
||||
if isinstance(status, dict):
|
||||
state = status.get("state", "unknown")
|
||||
last_seen = status.get("last_seen", "?")
|
||||
lines.append(f" {agent}: {state} (last: {last_seen})")
|
||||
else:
|
||||
lines.append(f" {agent}: {status}")
|
||||
|
||||
if len(lines) > 1:
|
||||
return "\n".join(lines)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _check_cache() -> str:
|
||||
"""Return cached wake-up if fresh enough."""
|
||||
try:
|
||||
if WAKEUP_CACHE_PATH.exists():
|
||||
age = time.time() - WAKEUP_CACHE_PATH.stat().st_mtime
|
||||
if age < WAKEUP_CACHE_TTL:
|
||||
return WAKEUP_CACHE_PATH.read_text(encoding="utf-8").strip()
|
||||
except OSError:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _write_cache(content: str) -> None:
|
||||
"""Cache the wake-up content."""
|
||||
try:
|
||||
WAKEUP_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
WAKEUP_CACHE_PATH.write_text(content, encoding="utf-8")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def palace_wakeup(force: bool = False) -> str:
|
||||
"""Generate wake-up context for a new session. ~300-900 tokens.
|
||||
|
||||
Args:
|
||||
force: If True, bypass the 5-minute cache and regenerate.
|
||||
|
||||
Returns:
|
||||
Formatted context string suitable for prepending to the system prompt.
|
||||
"""
|
||||
# Check cache first (avoids redundant work on rapid session restarts)
|
||||
if not force:
|
||||
cached = _check_cache()
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
parts = []
|
||||
|
||||
# L0: Identity
|
||||
identity = _load_identity()
|
||||
if identity:
|
||||
parts.append(f"## Identity\n{identity}")
|
||||
|
||||
# L1: Recent palace context
|
||||
palace = _palace_context()
|
||||
if palace:
|
||||
parts.append(palace)
|
||||
|
||||
# Fleet status (lightweight)
|
||||
fleet = fleet_status_summary()
|
||||
if fleet:
|
||||
parts.append(fleet)
|
||||
|
||||
# Timestamp
|
||||
parts.append(f"## Session\nWake-up generated: {time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
content = "\n\n".join(parts)
|
||||
|
||||
# Cache for TTL
|
||||
_write_cache(content)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI entry point for testing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(palace_wakeup(force=True))
|
||||
10
wizards/bezalel-sidecar.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"name": "Bezalel Builder Wizard",
|
||||
"role": "Artificer",
|
||||
"capabilities": [
|
||||
"provisioning",
|
||||
"gpu-orchestration",
|
||||
"model-serving"
|
||||
],
|
||||
"instructions": "Take the provided keys and use them to get GPUs for big models (Gemma 4). Wire them into the harness."
|
||||
}
|
||||