Compare commits

..

3 Commits

Author SHA1 Message Date
Alexander Whitestone
77be46d9c0 fix: tune rerank scoring for temporal recall (#1011)
All checks were successful
Lint / lint (pull_request) Successful in 9s
- use temporal lane support during rerank
- prefer explicit correction facts when fused scores are close
- preserve top-1 improvements on the holographic prompt matrix benchmark
2026-04-22 11:10:41 -04:00
Alexander Whitestone
9de2e87aaa wip: implement multi-path holographic recall pipeline (#1011)
- add lexical, semantic, graph, and temporal retrieval lanes with RRF fusion
- store retrieval traces on fused searches and expose them through the provider
- add benchmark helpers for prompt-matrix before/after evaluation
2026-04-22 11:07:33 -04:00
Alexander Whitestone
3273f469b7 wip: add failing holographic multi-path recall tests (#1011)
- add prompt matrix fixture for semantic, graph, and temporal recall
- add failing tests for lane traces, benchmark report, and provider trace access
2026-04-22 10:57:49 -04:00
10 changed files with 917 additions and 615 deletions

View File

@@ -57,7 +57,7 @@ CONFIGURABLE_TOOLSETS = [
("moa", "🧠 Mixture of Agents", "mixture_of_agents"),
("tts", "🔊 Text-to-Speech", "text_to_speech"),
("skills", "📚 Skills", "list, view, manage"),
("todo", "📋 Task Planning", "todo, ultraplan"),
("todo", "📋 Task Planning", "todo"),
("memory", "💾 Memory", "persistent memory across sessions"),
("session_search", "🔎 Session Search", "search past conversations"),
("clarify", "❓ Clarifying Questions", "clarify"),

View File

@@ -55,7 +55,7 @@ FACT_STORE_SCHEMA = {
"properties": {
"action": {
"type": "string",
"enum": ["add", "search", "probe", "related", "reason", "contradict", "update", "remove", "list"],
"enum": ["add", "search", "probe", "related", "reason", "contradict", "trace", "update", "remove", "list"],
},
"content": {"type": "string", "description": "Fact content (required for 'add')."},
"query": {"type": "string", "description": "Search query (required for 'search')."},
@@ -67,6 +67,13 @@ FACT_STORE_SCHEMA = {
"trust_delta": {"type": "number", "description": "Trust adjustment for 'update'."},
"min_trust": {"type": "number", "description": "Minimum trust filter (default: 0.3)."},
"limit": {"type": "integer", "description": "Max results (default: 10)."},
"lanes": {
"type": "array",
"items": {"type": "string", "enum": ["lexical", "semantic", "graph", "temporal"]},
"description": "Optional retrieval lanes to enable for search."
},
"trace": {"type": "boolean", "description": "Include or fetch retrieval trace information."},
"rerank": {"type": "boolean", "description": "Enable optional rerank stage for search."},
},
"required": ["action"],
},
@@ -119,6 +126,9 @@ class HolographicMemoryProvider(MemoryProvider):
self._store = None
self._retriever = None
self._min_trust = float(self._config.get("min_trust_threshold", 0.3))
self._retrieval_lanes = self._parse_retrieval_lanes(self._config.get("retrieval_lanes"))
self._enable_rerank = str(self._config.get("enable_rerank", "true")).lower() != "false"
self._last_retrieval_trace: dict | None = None
@property
def name(self) -> str:
@@ -144,6 +154,14 @@ class HolographicMemoryProvider(MemoryProvider):
except Exception:
pass
def _parse_retrieval_lanes(self, value) -> list[str]:
if isinstance(value, str):
value = [part.strip() for part in value.split(",") if part.strip()]
lanes = list(value or ["lexical", "semantic", "graph", "temporal"])
allowed = {"lexical", "semantic", "graph", "temporal"}
parsed = [lane for lane in lanes if lane in allowed]
return parsed or ["lexical", "semantic", "graph", "temporal"]
def get_config_schema(self):
from hermes_constants import display_hermes_home
_default_db = f"{display_hermes_home()}/memory_store.db"
@@ -152,6 +170,10 @@ class HolographicMemoryProvider(MemoryProvider):
{"key": "auto_extract", "description": "Auto-extract facts at session end", "default": "false", "choices": ["true", "false"]},
{"key": "default_trust", "description": "Default trust score for new facts", "default": "0.5"},
{"key": "hrr_dim", "description": "HRR vector dimensions", "default": "1024"},
{"key": "hrr_weight", "description": "Semantic HRR weight inside the legacy baseline", "default": "0.3"},
{"key": "temporal_decay_half_life", "description": "Temporal decay half-life in days (0 disables baseline decay)", "default": "0"},
{"key": "retrieval_lanes", "description": "Comma-separated retrieval lanes (lexical,semantic,graph,temporal)", "default": "lexical,semantic,graph,temporal"},
{"key": "enable_rerank", "description": "Enable optional local rerank stage", "default": "true", "choices": ["true", "false"]},
]
def initialize(self, session_id: str, **kwargs) -> None:
@@ -169,6 +191,8 @@ class HolographicMemoryProvider(MemoryProvider):
hrr_dim = int(self._config.get("hrr_dim", 1024))
hrr_weight = float(self._config.get("hrr_weight", 0.3))
temporal_decay = int(self._config.get("temporal_decay_half_life", 0))
self._retrieval_lanes = self._parse_retrieval_lanes(self._config.get("retrieval_lanes", self._retrieval_lanes))
self._enable_rerank = str(self._config.get("enable_rerank", self._enable_rerank)).lower() != "false"
self._store = MemoryStore(db_path=db_path, default_trust=default_trust, hrr_dim=hrr_dim)
self._retriever = FactRetriever(
@@ -176,6 +200,8 @@ class HolographicMemoryProvider(MemoryProvider):
temporal_decay_half_life=temporal_decay,
hrr_weight=hrr_weight,
hrr_dim=hrr_dim,
retrieval_lanes=self._retrieval_lanes,
enable_rerank=self._enable_rerank,
)
self._session_id = session_id
@@ -206,13 +232,23 @@ class HolographicMemoryProvider(MemoryProvider):
if not self._retriever or not query:
return ""
try:
results = self._retriever.search(query, min_trust=self._min_trust, limit=5)
payload = self._retriever.search_with_trace(
query,
min_trust=self._min_trust,
limit=5,
lanes=self._retrieval_lanes,
rerank=self._enable_rerank,
)
self._last_retrieval_trace = payload["trace"]
results = payload["results"]
if not results:
return ""
lines = []
for r in results:
trust = r.get("trust_score", r.get("trust", 0))
lines.append(f"- [{trust:.1f}] {r.get('content', '')}")
lanes = ",".join(r.get("matched_lanes", []))
lane_suffix = f" [{lanes}]" if lanes else ""
lines.append(f"- [{trust:.1f}] {r.get('content', '')}{lane_suffix}")
return "## Holographic Memory\n" + "\n".join(lines)
except Exception as e:
logger.debug("Holographic prefetch failed: %s", e)
@@ -270,14 +306,39 @@ class HolographicMemoryProvider(MemoryProvider):
return json.dumps({"fact_id": fact_id, "status": "added"})
elif action == "search":
lanes = args.get("lanes")
rerank = args.get("rerank")
with_trace = bool(args.get("trace", False))
if with_trace:
payload = retriever.search_with_trace(
args["query"],
category=args.get("category"),
min_trust=float(args.get("min_trust", self._min_trust)),
limit=int(args.get("limit", 10)),
lanes=lanes,
rerank=rerank,
)
self._last_retrieval_trace = payload["trace"]
return json.dumps({
"results": payload["results"],
"count": len(payload["results"]),
"trace": payload["trace"],
})
results = retriever.search(
args["query"],
category=args.get("category"),
min_trust=float(args.get("min_trust", self._min_trust)),
limit=int(args.get("limit", 10)),
lanes=lanes,
rerank=rerank,
)
self._last_retrieval_trace = retriever.last_trace
return json.dumps({"results": results, "count": len(results)})
elif action == "trace":
return json.dumps({"trace": self._last_retrieval_trace or retriever.last_trace or {}})
elif action == "probe":
results = retriever.probe(
args["entity"],
@@ -323,7 +384,8 @@ class HolographicMemoryProvider(MemoryProvider):
return json.dumps({"updated": updated})
elif action == "remove":
removed = store.remove_fact(int(args["fact_id"]))
removed = store.remove_fact(int(args["fact_id"])
)
return json.dumps({"removed": removed})
elif action == "list":

File diff suppressed because it is too large Load Diff

View File

@@ -83,6 +83,7 @@ _TRUST_MAX = 1.0
# Entity extraction patterns
_RE_CAPITALIZED = re.compile(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b')
_RE_SINGLE_PROPER = re.compile(r'\b([A-Z][A-Za-z0-9_-]{2,})\b')
_RE_DOUBLE_QUOTE = re.compile(r'"([^"]+)"')
_RE_SINGLE_QUOTE = re.compile(r"'([^']+)'")
_RE_AKA = re.compile(
@@ -414,6 +415,13 @@ class MemoryStore:
for m in _RE_CAPITALIZED.finditer(text):
_add(m.group(1))
skip_singletons = {"The", "This", "That", "These", "Those", "And", "But", "For", "With"}
for m in _RE_SINGLE_PROPER.finditer(text):
candidate = m.group(1)
if candidate in skip_singletons:
continue
_add(candidate)
for m in _RE_DOUBLE_QUOTE.finditer(text):
_add(m.group(1))

View File

@@ -0,0 +1,56 @@
{
"facts": [
{
"content": "Alexander Whitestone aka Rockachopa.",
"category": "general",
"tags": "identity alias"
},
{
"content": "Rockachopa uses Ansible playbooks for sovereign rollouts.",
"category": "project",
"tags": "ansible playbooks rollout"
},
{
"content": "The provider is anthropic/claude-haiku-4-5.",
"category": "project",
"tags": "provider default",
"updated_at": "2026-01-01T00:00:00Z"
},
{
"content": "Correction: the provider is mimo-v2-pro.",
"category": "project",
"tags": "provider current",
"updated_at": "2026-04-20T00:00:00Z"
},
{
"content": "Ezra operates the BURN2 lane for forge work.",
"category": "project",
"tags": "ezra burn2 forge lane"
},
{
"content": "BURN2 handles forge triage and review.",
"category": "project",
"tags": "forge triage review"
}
],
"queries": [
{
"name": "semantic_alias_graph",
"query": "What automation does Alexander Whitestone use for deploys?",
"expected_substring": "Ansible playbooks",
"top_k": 1
},
{
"name": "temporal_correction",
"query": "What provider should we use?",
"expected_substring": "mimo-v2-pro",
"top_k": 1
},
{
"name": "graph_lane",
"query": "Which forge lane does Ezra operate?",
"expected_substring": "BURN2 lane",
"top_k": 1
}
]
}

View File

@@ -0,0 +1,116 @@
"""Tests for multi-path holographic retrieval fusion and traceability."""
from __future__ import annotations
import json
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
from plugins.memory.holographic import HolographicMemoryProvider
from plugins.memory.holographic.retrieval import FactRetriever, format_benchmark_report
from plugins.memory.holographic.store import MemoryStore
_FIXTURE_PATH = Path(__file__).resolve().parents[2] / "fixtures" / "holographic_recall_matrix.json"
def _fixture() -> dict:
return json.loads(_FIXTURE_PATH.read_text())
def _seed_store(tmp_path) -> MemoryStore:
store = MemoryStore(db_path=tmp_path / "memory_store.db")
for fact in _fixture()["facts"]:
fact_id = store.add_fact(fact["content"], category=fact["category"], tags=fact.get("tags", ""))
if fact.get("updated_at"):
store._conn.execute(
"UPDATE facts SET created_at = ?, updated_at = ? WHERE fact_id = ?",
(fact["updated_at"], fact["updated_at"], fact_id),
)
store._conn.commit()
return store
class TestMultiPathRetrieval:
def test_lane_toggle_and_trace_contributions(self, tmp_path):
store = _seed_store(tmp_path)
retriever = FactRetriever(store=store)
payload = retriever.search_with_trace(
"Which forge lane does Ezra operate?",
limit=3,
lanes=["lexical", "graph"],
)
assert payload["trace"]["lanes_run"] == ["lexical", "graph"]
assert payload["results"]
top = payload["results"][0]
assert "BURN2 lane" in top["content"]
assert "graph" in top["lane_contributions"]
assert set(top["lane_contributions"]).issubset({"lexical", "graph"})
def test_trace_available_for_failed_recall(self, tmp_path):
store = _seed_store(tmp_path)
retriever = FactRetriever(store=store)
payload = retriever.search_with_trace(
"nonexistent memory topic xyz123",
limit=3,
lanes=["lexical", "semantic", "graph", "temporal"],
)
assert payload["results"] == []
assert payload["trace"]["fused_count"] == 0
assert payload["trace"]["lane_hits"]["lexical"] == 0
assert payload["trace"]["lane_hits"]["semantic"] == 0
def test_benchmark_prompt_matrix_shows_gain_over_baseline(self, tmp_path):
store = _seed_store(tmp_path)
retriever = FactRetriever(store=store)
report = retriever.benchmark_prompt_matrix(_fixture()["queries"], limit=3)
assert report["fused_top1_hits"] > report["baseline_top1_hits"]
assert report["improvement"] > 0
rendered = format_benchmark_report(report)
assert "Prompt matrix benchmark" in rendered
assert "semantic_alias_graph" in rendered
assert "improvement" in rendered.lower()
class TestHolographicProviderTrace:
def test_prefetch_records_trace_and_trace_action_returns_it(self, tmp_path):
provider = HolographicMemoryProvider(
config={
"db_path": str(tmp_path / "provider.db"),
"retrieval_lanes": ["lexical", "semantic", "graph", "temporal"],
"enable_rerank": True,
}
)
provider.initialize("test-session")
seed_store = _seed_store(tmp_path / "seed")
rows = seed_store.list_facts(min_trust=0.0, limit=20)
for row in rows:
provider._store.add_fact(row["content"], category=row["category"], tags=row.get("tags", ""))
if row["content"].startswith("The provider is anthropic"):
provider._store._conn.execute(
"UPDATE facts SET created_at = ?, updated_at = ? WHERE content = ?",
("2026-01-01T00:00:00Z", "2026-01-01T00:00:00Z", row["content"]),
)
elif row["content"].startswith("Correction: the provider is mimo"):
provider._store._conn.execute(
"UPDATE facts SET created_at = ?, updated_at = ? WHERE content = ?",
("2026-04-20T00:00:00Z", "2026-04-20T00:00:00Z", row["content"]),
)
provider._store._conn.commit()
block = provider.prefetch("What provider should we use?")
assert "Holographic Memory" in block
assert "mimo-v2-pro" in block
trace_payload = json.loads(provider.handle_tool_call("fact_store", {"action": "trace"}))
assert trace_payload["trace"]["query"] == "What provider should we use?"
assert trace_payload["trace"]["rerank_applied"] in {True, False}
assert trace_payload["trace"]["lane_hits"]["temporal"] >= 1

View File

@@ -294,32 +294,22 @@ class TestBuiltinDiscovery:
"tools.browser_tool",
"tools.clarify_tool",
"tools.code_execution_tool",
"tools.crisis_tool",
"tools.cronjob_tools",
"tools.delegate_tool",
"tools.file_tools",
"tools.homeassistant_tool",
"tools.image_generation_tool",
"tools.local_inference_tool",
"tools.memory_tool",
"tools.mixture_of_agents_tool",
"tools.process_registry",
"tools.rl_training_tool",
"tools.scavenger_fixer",
"tools.send_message_tool",
"tools.session_search_tool",
"tools.skill_manager_tool",
"tools.skills_tool",
"tools.sovereign_router",
"tools.sovereign_scavenger",
"tools.sovereign_teleport",
"tools.static_analyzer",
"tools.symbolic_verify",
"tools.terminal_tool",
"tools.todo_tool",
"tools.tts_tool",
"tools.ultraplan",
"tools.verify_tool",
"tools.vision_tools",
"tools.web_tools",
}

View File

@@ -1,81 +0,0 @@
import json
from pathlib import Path
from toolsets import resolve_toolset
from tools.registry import registry
def test_create_action_saves_markdown_and_json(tmp_path):
from tools.ultraplan import ultraplan_tool
result = json.loads(
ultraplan_tool(
action="create",
mission="Daily autonomous planning",
streams=[
{
"id": "A",
"name": "Backlog burn",
"phases": [
{"id": "A1", "name": "Triage", "artifact": "issue list"},
{"id": "A2", "name": "Ship", "dependencies": ["A1"], "artifact": "PR"},
],
}
],
base_dir=str(tmp_path),
)
)
assert result["success"] is True
assert Path(result["file_path"]).exists()
assert Path(result["json_path"]).exists()
assert "Work Streams" in Path(result["file_path"]).read_text(encoding="utf-8")
def test_load_action_returns_saved_plan(tmp_path):
from tools.ultraplan import ultraplan_tool
created = json.loads(
ultraplan_tool(
action="create",
date="20260422",
mission="Mission from saved plan",
base_dir=str(tmp_path),
)
)
loaded = json.loads(
ultraplan_tool(
action="load",
date="20260422",
base_dir=str(tmp_path),
)
)
assert created["success"] is True
assert loaded["success"] is True
assert loaded["plan"]["mission"] == "Mission from saved plan"
assert loaded["file_path"].endswith("ultraplan_20260422.md")
def test_cron_spec_returns_daily_schedule_and_prompt():
from tools.ultraplan import ultraplan_tool
result = json.loads(ultraplan_tool(action="cron_spec"))
assert result["success"] is True
assert result["schedule"] == "0 6 * * *"
assert "Ultraplan" in result["prompt"]
assert "ultraplan_YYYYMMDD.md" in result["prompt"]
def test_registry_registers_ultraplan_tool():
import tools.ultraplan # noqa: F401
entry = registry.get_entry("ultraplan")
assert entry is not None
assert entry.toolset == "todo"
def test_default_toolsets_include_ultraplan():
assert "ultraplan" in resolve_toolset("todo")
assert "ultraplan" in resolve_toolset("hermes-cli")

View File

@@ -290,9 +290,6 @@ def load_ultraplan(date: str, base_dir: Path = None) -> Optional[Ultraplan]:
return None
DEFAULT_ULTRAPLAN_SCHEDULE = "0 6 * * *"
def generate_daily_cron_prompt() -> str:
"""Generate the prompt for the daily ultraplan cron job."""
return """Generate today's Ultraplan.
@@ -301,9 +298,9 @@ Steps:
1. Check open Gitea issues assigned to you
2. Check open PRs needing review
3. Check fleet health status
4. Decompose work into parallel streams with concrete phases and artifacts
5. Use the ultraplan tool to save ~/.timmy/cron/ultraplan_YYYYMMDD.md and the matching JSON sidecar
6. Optionally file a Gitea issue with the plan summary
4. Decompose work into parallel streams
5. Generate ultraplan_YYYYMMDD.md
6. File Gitea issue with the plan
Output format:
- Mission statement
@@ -311,176 +308,3 @@ Output format:
- Dependency map
- Success metrics
"""
def generate_daily_cron_job_spec(schedule: str = DEFAULT_ULTRAPLAN_SCHEDULE) -> Dict[str, str]:
"""Return a reusable cron job spec for daily Ultraplan generation."""
return {
"name": "Daily Ultraplan",
"schedule": schedule,
"prompt": generate_daily_cron_prompt(),
"path_pattern": "~/.timmy/cron/ultraplan_YYYYMMDD.md",
}
def _resolve_base_dir(base_dir: Optional[str | Path]) -> Path:
"""Normalize the requested Ultraplan base directory."""
if base_dir is None:
return Path.home() / ".timmy" / "cron"
return Path(base_dir).expanduser()
def ultraplan_tool(
action: str,
date: Optional[str] = None,
mission: str = "",
streams: Optional[List[Dict[str, Any]]] = None,
metrics: Optional[Dict[str, Any]] = None,
notes: str = "",
base_dir: Optional[str] = None,
) -> str:
"""Create/load Ultraplan artifacts and expose a daily cron spec."""
from tools.registry import tool_error, tool_result
action = (action or "").strip().lower()
resolved_base_dir = _resolve_base_dir(base_dir)
try:
if action == "create":
plan = create_ultraplan(date=date, mission=mission, streams=streams or [])
if metrics:
plan.metrics = metrics
if notes:
plan.notes = notes
md_path = save_ultraplan(plan, base_dir=resolved_base_dir)
json_path = resolved_base_dir / f"ultraplan_{plan.date}.json"
return tool_result(
success=True,
action="create",
date=plan.date,
file_path=str(md_path),
json_path=str(json_path),
plan=plan.to_dict(),
)
if action == "load":
plan_date = date or datetime.now().strftime("%Y%m%d")
plan = load_ultraplan(plan_date, base_dir=resolved_base_dir)
if plan is None:
return tool_error(
f"No Ultraplan found for {plan_date}",
success=False,
action="load",
date=plan_date,
)
return tool_result(
success=True,
action="load",
date=plan.date,
file_path=str(resolved_base_dir / f"ultraplan_{plan.date}.md"),
json_path=str(resolved_base_dir / f"ultraplan_{plan.date}.json"),
plan=plan.to_dict(),
markdown=plan.to_markdown(),
)
if action == "cron_spec":
spec = generate_daily_cron_job_spec()
return tool_result(success=True, action="cron_spec", **spec)
return tool_error(
f"Unknown Ultraplan action: {action}",
success=False,
action=action,
)
except Exception as e:
return tool_error(f"Ultraplan {action or 'tool'} failed: {e}", success=False, action=action)
ULTRAPLAN_SCHEMA = {
"name": "ultraplan",
"description": (
"Create or load daily Ultraplan planning artifacts under ~/.timmy/cron/ and "
"return a reusable cron spec for autonomous planning. Use this when you want "
"a concrete markdown/json plan file with streams, phases, dependencies, and metrics."
),
"parameters": {
"type": "object",
"properties": {
"action": {
"type": "string",
"enum": ["create", "load", "cron_spec"],
"description": "Operation to perform",
},
"date": {
"type": "string",
"description": "Plan date as YYYYMMDD. Defaults to today for create/load.",
},
"mission": {
"type": "string",
"description": "High-level mission statement for today's plan.",
},
"streams": {
"type": "array",
"description": "Optional work streams with phases/artifacts/dependencies for create.",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"name": {"type": "string"},
"phases": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"name": {"type": "string"},
"description": {"type": "string"},
"artifact": {"type": "string"},
"dependencies": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["name"],
},
},
},
"required": ["name"],
},
},
"metrics": {
"type": "object",
"description": "Optional success metrics to store on the plan.",
"additionalProperties": True,
},
"notes": {
"type": "string",
"description": "Optional free-form notes appended to the saved plan.",
},
"base_dir": {
"type": "string",
"description": "Optional override for the Ultraplan storage directory.",
},
},
"required": ["action"],
},
}
from tools.registry import registry
registry.register(
name="ultraplan",
toolset="todo",
schema=ULTRAPLAN_SCHEMA,
handler=lambda args, **_kw: ultraplan_tool(
action=args.get("action", ""),
date=args.get("date"),
mission=args.get("mission", ""),
streams=args.get("streams"),
metrics=args.get("metrics"),
notes=args.get("notes", ""),
base_dir=args.get("base_dir"),
),
emoji="🗺️",
)

View File

@@ -47,7 +47,7 @@ _HERMES_CORE_TOOLS = [
# Text-to-speech
"text_to_speech",
# Planning & memory
"todo", "ultraplan", "memory",
"todo", "memory",
# Session history search
"session_search",
# Clarifying questions
@@ -157,8 +157,8 @@ TOOLSETS = {
},
"todo": {
"description": "Task planning and tracking for multi-step work, including daily Ultraplan artifacts",
"tools": ["todo", "ultraplan"],
"description": "Task planning and tracking for multi-step work",
"tools": ["todo"],
"includes": []
},