fix(hindsight): overhaul hindsight memory plugin and memory setup wizard

- Dedicated asyncio event loop for Hindsight async calls (fixes aiohttp session leaks) - Client caching (reuse instead of creating per-call) - Local mode daemon management with config change detection and auto-restart - Memory mode support (hybrid/context/tools) and prefetch method (recall/reflect) - Proper shutdown with event loop and client cleanup - Disable HindsightEmbedded.__del__ to avoid GC loop errors - Update API URLs (app -> ui.hindsight.vectorize.io, api_url -> base_url) - Setup wizard: conditional fields (when clause), dynamic defaults (default_from) - Switch dependency install from pip to uv (correct for uv-based venvs) - Add hindsight-all to plugin.yaml and import mapping - 12 new tests for dispatch routing and setup field filtering Original PR #5044 by cdbartholomew.
2026-04-04 12:06:08 -07:00
parent 93aa01c71c
commit 28e1e210ee
5 changed files with 574 additions and 83 deletions
--- a/hermes_cli/memory_setup.py
+++ b/hermes_cli/memory_setup.py
@@ -151,6 +151,7 @@ def _install_dependencies(provider_name: str) -> None:
        "honcho-ai": "honcho",
        "mem0ai": "mem0",
        "hindsight-client": "hindsight_client",
+        "hindsight-all": "hindsight",
    }

    # Check which packages are missing
@@ -166,9 +167,18 @@ def _install_dependencies(provider_name: str) -> None:
        return

    print(f"\n  Installing dependencies: {', '.join(missing)}")
+
+    import shutil
+    uv_path = shutil.which("uv")
+    if not uv_path:
+        print(f"  ⚠ uv not found — cannot install dependencies")
+        print(f"  Install uv: curl -LsSf https://astral.sh/uv/install.sh | sh")
+        print(f"  Then re-run: hermes memory setup")
+        return
+
    try:
        subprocess.run(
-            [sys.executable, "-m", "pip", "install", "--quiet"] + missing,
+            [uv_path, "pip", "install", "--python", sys.executable, "--quiet"] + missing,
            check=True, timeout=120,
            capture_output=True,
        )
@@ -178,10 +188,10 @@ def _install_dependencies(provider_name: str) -> None:
        stderr = (e.stderr or b"").decode()[:200]
        if stderr:
            print(f"    {stderr}")
-        print(f"  Run manually: pip install {' '.join(missing)}")
+        print(f"  Run manually: uv pip install --python {sys.executable} {' '.join(missing)}")
    except Exception as e:
        print(f"  ⚠ Install failed: {e}")
-        print(f"  Run manually: pip install {' '.join(missing)}")
+        print(f"  Run manually: uv pip install --python {sys.executable} {' '.join(missing)}")

    # Also show external dependencies (non-pip) if any
    ext_deps = meta.get("external_dependencies", [])
@@ -275,7 +285,6 @@ def cmd_setup(args) -> None:

    schema = provider.get_config_schema() if hasattr(provider, "get_config_schema") else []

-    # Provider config section
    provider_config = config["memory"].get(name, {})
    if not isinstance(provider_config, dict):
        provider_config = {}
@@ -290,11 +299,25 @@ def cmd_setup(args) -> None:
            key = field["key"]
            desc = field.get("description", key)
            default = field.get("default")
+            # Dynamic default: look up default from another field's value
+            default_from = field.get("default_from")
+            if default_from and isinstance(default_from, dict):
+                ref_field = default_from.get("field", "")
+                ref_map = default_from.get("map", {})
+                ref_value = provider_config.get(ref_field, "")
+                if ref_value and ref_value in ref_map:
+                    default = ref_map[ref_value]
            is_secret = field.get("secret", False)
            choices = field.get("choices")
            env_var = field.get("env_var")
            url = field.get("url")

+            # Skip fields whose "when" condition doesn't match
+            when = field.get("when")
+            if when and isinstance(when, dict):
+                if not all(provider_config.get(k) == v for k, v in when.items()):
+                    continue
+
            if choices and not is_secret:
                # Use curses picker for choice fields
                choice_items = [(c, "") for c in choices]
--- a/plugins/memory/hindsight/README.md
+++ b/plugins/memory/hindsight/README.md
@@ -1,11 +1,11 @@
 # Hindsight Memory Provider

-Long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval. Supports cloud and local modes.
+Long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval. Supports cloud and local (embedded) modes.

 ## Requirements

- Cloud: `pip install hindsight-client` + API key from [app.hindsight.vectorize.io](https://app.hindsight.vectorize.io)
- Local: `pip install hindsight` + LLM API key for embeddings
+- **Cloud:** API key from [ui.hindsight.vectorize.io](https://ui.hindsight.vectorize.io)
+- **Local:** API key for a supported LLM provider (OpenAI, Anthropic, Gemini, Groq, MiniMax, or Ollama). Embeddings and reranking run locally — no additional API keys needed.

 ## Setup

@@ -13,26 +13,86 @@ Long-term memory with knowledge graph, entity resolution, and multi-strategy ret
 hermes memory setup    # select "hindsight"
 ```

-Or manually:
+The setup wizard will install dependencies automatically via `uv` and walk you through configuration.
+
+Or manually (cloud mode with defaults):
 ```bash
 hermes config set memory.provider hindsight
 echo "HINDSIGHT_API_KEY=your-key" >> ~/.hermes/.env
 ```

+### Cloud Mode
+
+Connects to the Hindsight Cloud API. Requires an API key from [ui.hindsight.vectorize.io](https://ui.hindsight.vectorize.io).
+
+### Local Mode
+
+Runs an embedded Hindsight server with built-in PostgreSQL. Requires an LLM API key (e.g. Groq, OpenAI, Anthropic) for memory extraction and synthesis. The daemon starts automatically in the background on first use and stops after 5 minutes of inactivity.
+
+Daemon startup logs: `~/.hermes/logs/hindsight-embed.log`
+Daemon runtime logs: `~/.hindsight/profiles/<profile>.log`
+
 ## Config

-Config file: `$HERMES_HOME/hindsight/config.json` (or `~/.hindsight/config.json` legacy)
+Config file: `~/.hermes/hindsight/config.json`
+
+### Connection

 | Key | Default | Description |
 |-----|---------|-------------|
 | `mode` | `cloud` | `cloud` or `local` |
-| `bank_id` | `hermes` | Memory bank identifier |
-| `budget` | `mid` | Recall thoroughness: `low`/`mid`/`high` |
+| `api_url` | `https://api.hindsight.vectorize.io` | API URL (cloud mode) |
+| `api_url` | `http://localhost:8888` | API URL (local mode, unused — daemon manages its own port) |
+
+### Memory
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `bank_id` | `hermes` | Memory bank name |
+| `budget` | `mid` | Recall thoroughness: `low` / `mid` / `high` |
+
+### Integration
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `memory_mode` | `hybrid` | How memories are integrated into the agent |
+| `prefetch_method` | `recall` | Method for automatic context injection |
+
+**memory_mode:**
+- `hybrid` — automatic context injection + tools available to the LLM
+- `context` — automatic injection only, no tools exposed
+- `tools` — tools only, no automatic injection
+
+**prefetch_method:**
+- `recall` — injects raw memory facts (fast)
+- `reflect` — injects LLM-synthesized summary (slower, more coherent)
+
+### Local Mode LLM
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `llm_provider` | `openai` | LLM provider: `openai`, `anthropic`, `gemini`, `groq`, `minimax`, `ollama` |
+| `llm_model` | per-provider | Model name (e.g. `gpt-4o-mini`, `openai/gpt-oss-120b`) |
+
+The LLM API key is stored in `~/.hermes/.env` as `HINDSIGHT_LLM_API_KEY`.

 ## Tools

+Available in `hybrid` and `tools` memory modes:
+
 | Tool | Description |
 |------|-------------|
 | `hindsight_retain` | Store information with auto entity extraction |
 | `hindsight_recall` | Multi-strategy search (semantic + entity graph) |
 | `hindsight_reflect` | Cross-memory synthesis (LLM-powered) |
+
+## Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `HINDSIGHT_API_KEY` | API key for Hindsight Cloud |
+| `HINDSIGHT_LLM_API_KEY` | LLM API key for local mode |
+| `HINDSIGHT_API_URL` | Override API endpoint |
+| `HINDSIGHT_BANK_ID` | Override bank name |
+| `HINDSIGHT_BUDGET` | Override recall budget |
+| `HINDSIGHT_MODE` | Override mode (`cloud` / `local`) |
--- a/plugins/memory/hindsight/init.py
+++ b/plugins/memory/hindsight/init.py
@@ -1,7 +1,7 @@
 """Hindsight memory plugin — MemoryProvider interface.

 Long-term memory with knowledge graph, entity resolution, and multi-strategy
-retrieval. Supports cloud (API key) and local (embedded PostgreSQL) modes.
+retrieval. Supports cloud (API key) and local modes.

 Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.

@@ -18,10 +18,10 @@ Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to

 from __future__ import annotations

+import asyncio
 import json
 import logging
 import os
-import queue
 import threading
 from typing import Any, Dict, List

@@ -30,30 +30,51 @@ from agent.memory_provider import MemoryProvider
 logger = logging.getLogger(__name__)

 _DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
+_DEFAULT_LOCAL_URL = "http://localhost:8888"
 _VALID_BUDGETS = {"low", "mid", "high"}
+_PROVIDER_DEFAULT_MODELS = {
+    "openai": "gpt-4o-mini",
+    "anthropic": "claude-haiku-4-5",
+    "gemini": "gemini-2.5-flash",
+    "groq": "openai/gpt-oss-120b",
+    "minimax": "MiniMax-M2.7",
+    "ollama": "gemma3:12b",
+    "lmstudio": "local-model",
+}


 # ---------------------------------------------------------------------------
-# Thread helper (from original PR — avoids aiohttp event loop conflicts)
+# Dedicated event loop for Hindsight async calls (one per process, reused).
+# Avoids creating ephemeral loops that leak aiohttp sessions.
 # ---------------------------------------------------------------------------

-def _run_in_thread(fn, timeout: float = 30.0):
-    result_q: queue.Queue = queue.Queue(maxsize=1)
+_loop: asyncio.AbstractEventLoop | None = None
+_loop_thread: threading.Thread | None = None
+_loop_lock = threading.Lock()

-    def _run():
-        import asyncio
-        asyncio.set_event_loop(None)
-        try:
-            result_q.put(("ok", fn()))
-        except Exception as exc:
-            result_q.put(("err", exc))

-    t = threading.Thread(target=_run, daemon=True, name="hindsight-call")
-    t.start()
-    kind, value = result_q.get(timeout=timeout)
-    if kind == "err":
-        raise value
-    return value
+def _get_loop() -> asyncio.AbstractEventLoop:
+    """Return a long-lived event loop running on a background thread."""
+    global _loop, _loop_thread
+    with _loop_lock:
+        if _loop is not None and _loop.is_running():
+            return _loop
+        _loop = asyncio.new_event_loop()
+
+        def _run():
+            asyncio.set_event_loop(_loop)
+            _loop.run_forever()
+
+        _loop_thread = threading.Thread(target=_run, daemon=True, name="hindsight-loop")
+        _loop_thread.start()
+        return _loop
+
+
+def _run_sync(coro, timeout: float = 120.0):
+    """Schedule *coro* on the shared loop and block until done."""
+    loop = _get_loop()
+    future = asyncio.run_coroutine_threadsafe(coro, loop)
+    return future.result(timeout=timeout)


 # ---------------------------------------------------------------------------
@@ -161,9 +182,13 @@ class HindsightMemoryProvider(MemoryProvider):
    def __init__(self):
        self._config = None
        self._api_key = None
+        self._api_url = _DEFAULT_API_URL
        self._bank_id = "hermes"
        self._budget = "mid"
        self._mode = "cloud"
+        self._memory_mode = "hybrid"  # "context", "tools", or "hybrid"
+        self._prefetch_method = "recall"  # "recall" or "reflect"
+        self._client = None
        self._prefetch_result = ""
        self._prefetch_lock = threading.Lock()
        self._prefetch_thread = None
@@ -178,10 +203,10 @@ class HindsightMemoryProvider(MemoryProvider):
            cfg = _load_config()
            mode = cfg.get("mode", "cloud")
            if mode == "local":
-                embed = cfg.get("embed", {})
-                return bool(embed.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY"))
-            api_key = cfg.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", "")
-            return bool(api_key)
+                return True
+            has_key = bool(cfg.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", ""))
+            has_url = bool(cfg.get("api_url") or os.environ.get("HINDSIGHT_API_URL", ""))
+            return has_key or has_url
        except Exception:
            return False

@@ -204,49 +229,148 @@ class HindsightMemoryProvider(MemoryProvider):
    def get_config_schema(self):
        return [
            {"key": "mode", "description": "Cloud API or local embedded mode", "default": "cloud", "choices": ["cloud", "local"]},
-            {"key": "api_key", "description": "Hindsight Cloud API key", "secret": True, "env_var": "HINDSIGHT_API_KEY", "url": "https://app.hindsight.vectorize.io"},
-            {"key": "bank_id", "description": "Memory bank identifier", "default": "hermes"},
+            {"key": "api_url", "description": "Hindsight API URL", "default": _DEFAULT_API_URL, "when": {"mode": "cloud"}},
+            {"key": "api_key", "description": "Hindsight Cloud API key", "secret": True, "env_var": "HINDSIGHT_API_KEY", "url": "https://ui.hindsight.vectorize.io", "when": {"mode": "cloud"}},
+            {"key": "llm_provider", "description": "LLM provider for local mode", "default": "openai", "choices": ["openai", "anthropic", "gemini", "groq", "minimax", "ollama"], "when": {"mode": "local"}},
+            {"key": "llm_api_key", "description": "LLM API key for local Hindsight", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY", "when": {"mode": "local"}},
+            {"key": "llm_model", "description": "LLM model for local mode", "default": "gpt-4o-mini", "default_from": {"field": "llm_provider", "map": _PROVIDER_DEFAULT_MODELS}, "when": {"mode": "local"}},
+            {"key": "bank_id", "description": "Memory bank name", "default": "hermes"},
            {"key": "budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
-            {"key": "llm_provider", "description": "LLM provider for local mode", "default": "anthropic", "choices": ["anthropic", "openai", "groq", "ollama"]},
-            {"key": "llm_api_key", "description": "LLM API key for local mode", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY"},
-            {"key": "llm_model", "description": "LLM model for local mode", "default": "claude-haiku-4-5-20251001"},
+            {"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]},
+            {"key": "prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]},
        ]

-    def _make_client(self):
-        """Create a fresh Hindsight client (thread-safe)."""
-        if self._mode == "local":
-            from hindsight import HindsightEmbedded
-            embed = self._config.get("embed", {})
-            return HindsightEmbedded(
-                profile=embed.get("profile", "hermes"),
-                llm_provider=embed.get("llmProvider", ""),
-                llm_api_key=embed.get("llmApiKey", ""),
-                llm_model=embed.get("llmModel", ""),
-            )
-        from hindsight_client import Hindsight
-        return Hindsight(api_key=self._api_key, timeout=30.0)
+    def _get_client(self):
+        """Return the cached Hindsight client (created once, reused)."""
+        if self._client is None:
+            if self._mode == "local":
+                from hindsight import HindsightEmbedded
+                # Disable __del__ on the class to prevent "attached to a
+                # different loop" errors during GC — we handle cleanup in
+                # shutdown() instead.
+                HindsightEmbedded.__del__ = lambda self: None
+                self._client = HindsightEmbedded(
+                    profile=self._config.get("profile", "hermes"),
+                    llm_provider=self._config.get("llm_provider", ""),
+                    llm_api_key=self._config.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY", ""),
+                    llm_model=self._config.get("llm_model", ""),
+                )
+            else:
+                from hindsight_client import Hindsight
+                kwargs = {"base_url": self._api_url, "timeout": 30.0}
+                if self._api_key:
+                    kwargs["api_key"] = self._api_key
+                self._client = Hindsight(**kwargs)
+        return self._client

    def initialize(self, session_id: str, **kwargs) -> None:
        self._config = _load_config()
        self._mode = self._config.get("mode", "cloud")
        self._api_key = self._config.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", "")
+        default_url = _DEFAULT_LOCAL_URL if self._mode == "local" else _DEFAULT_API_URL
+        self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)

        banks = self._config.get("banks", {}).get("hermes", {})
-        self._bank_id = banks.get("bankId", "hermes")
-        budget = banks.get("budget", "mid")
+        self._bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes")
+        budget = self._config.get("budget") or banks.get("budget", "mid")
        self._budget = budget if budget in _VALID_BUDGETS else "mid"

-        # Ensure bank exists
-        try:
-            client = _run_in_thread(self._make_client)
-            _run_in_thread(lambda: client.create_bank(bank_id=self._bank_id, name=self._bank_id))
-        except Exception:
-            pass  # Already exists
+        memory_mode = self._config.get("memory_mode", "hybrid")
+        self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"
+
+        prefetch_method = self._config.get("prefetch_method", "recall")
+        self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"
+
+        logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s",
+                     self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method)
+
+        # For local mode, start the embedded daemon in the background so it
+        # doesn't block the chat. Redirect stdout/stderr to a log file to
+        # prevent rich startup output from spamming the terminal.
+        if self._mode == "local":
+            def _start_daemon():
+                import traceback
+                from pathlib import Path
+                log_dir = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / "logs"
+                log_dir.mkdir(parents=True, exist_ok=True)
+                log_path = log_dir / "hindsight-embed.log"
+                try:
+                    # Redirect the daemon manager's Rich console to our log file
+                    # instead of stderr. This avoids global fd redirects that
+                    # would capture output from other threads.
+                    import hindsight_embed.daemon_embed_manager as dem
+                    from rich.console import Console
+                    dem.console = Console(file=open(log_path, "a"), force_terminal=False)
+
+                    client = self._get_client()
+                    profile = self._config.get("profile", "hermes")
+
+                    # Update the profile .env to match our current config so
+                    # the daemon always starts with the right settings.
+                    # If the config changed and the daemon is running, stop it.
+                    from pathlib import Path as _Path
+                    profile_env = _Path.home() / ".hindsight" / "profiles" / f"{profile}.env"
+                    current_key = self._config.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
+                    current_provider = self._config.get("llm_provider", "")
+                    current_model = self._config.get("llm_model", "")
+
+                    # Read saved profile config
+                    saved = {}
+                    if profile_env.exists():
+                        for line in profile_env.read_text().splitlines():
+                            if "=" in line and not line.startswith("#"):
+                                k, v = line.split("=", 1)
+                                saved[k.strip()] = v.strip()
+
+                    config_changed = (
+                        saved.get("HINDSIGHT_API_LLM_PROVIDER") != current_provider or
+                        saved.get("HINDSIGHT_API_LLM_MODEL") != current_model or
+                        saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key
+                    )
+
+                    if config_changed:
+                        # Write updated profile .env
+                        profile_env.parent.mkdir(parents=True, exist_ok=True)
+                        profile_env.write_text(
+                            f"HINDSIGHT_API_LLM_PROVIDER={current_provider}\n"
+                            f"HINDSIGHT_API_LLM_API_KEY={current_key}\n"
+                            f"HINDSIGHT_API_LLM_MODEL={current_model}\n"
+                            f"HINDSIGHT_API_LOG_LEVEL=info\n"
+                        )
+                        if client._manager.is_running(profile):
+                            with open(log_path, "a") as f:
+                                f.write("\n=== Config changed, restarting daemon ===\n")
+                            client._manager.stop(profile)
+
+                    client._ensure_started()
+                    with open(log_path, "a") as f:
+                        f.write("\n=== Daemon started successfully ===\n")
+                except Exception as e:
+                    with open(log_path, "a") as f:
+                        f.write(f"\n=== Daemon startup failed: {e} ===\n")
+                        traceback.print_exc(file=f)
+
+            t = threading.Thread(target=_start_daemon, daemon=True, name="hindsight-daemon-start")
+            t.start()

    def system_prompt_block(self) -> str:
+        if self._memory_mode == "context":
+            return (
+                f"# Hindsight Memory\n"
+                f"Active (context mode). Bank: {self._bank_id}, budget: {self._budget}.\n"
+                f"Relevant memories are automatically injected into context."
+            )
+        if self._memory_mode == "tools":
+            return (
+                f"# Hindsight Memory\n"
+                f"Active (tools mode). Bank: {self._bank_id}, budget: {self._budget}.\n"
+                f"Use hindsight_recall to search, hindsight_reflect for synthesis, "
+                f"hindsight_retain to store facts."
+            )
        return (
            f"# Hindsight Memory\n"
            f"Active. Bank: {self._bank_id}, budget: {self._budget}.\n"
+            f"Relevant memories are automatically injected into context. "
            f"Use hindsight_recall to search, hindsight_reflect for synthesis, "
            f"hindsight_retain to store facts."
        )
@@ -262,12 +386,18 @@ class HindsightMemoryProvider(MemoryProvider):
        return f"## Hindsight Memory\n{result}"

    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
+        if self._memory_mode == "tools":
+            return
        def _run():
            try:
-                client = self._make_client()
-                resp = client.recall(bank_id=self._bank_id, query=query, budget=self._budget)
-                if resp.results:
-                    text = "\n".join(r.text for r in resp.results if r.text)
+                client = self._get_client()
+                if self._prefetch_method == "reflect":
+                    resp = _run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget))
+                    text = resp.text or ""
+                else:
+                    resp = _run_sync(client.arecall(bank_id=self._bank_id, query=query, budget=self._budget))
+                    text = "\n".join(r.text for r in resp.results if r.text) if resp.results else ""
+                if text:
                    with self._prefetch_lock:
                        self._prefetch_result = text
            except Exception as e:
@@ -282,11 +412,10 @@ class HindsightMemoryProvider(MemoryProvider):

        def _sync():
            try:
-                _run_in_thread(
-                    lambda: self._make_client().retain(
-                        bank_id=self._bank_id, content=combined, context="conversation"
-                    )
-                )
+                client = self._get_client()
+                _run_sync(client.aretain(
+                    bank_id=self._bank_id, content=combined, context="conversation"
+                ))
            except Exception as e:
                logger.warning("Hindsight sync failed: %s", e)

@@ -296,22 +425,29 @@ class HindsightMemoryProvider(MemoryProvider):
        self._sync_thread.start()

    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        if self._memory_mode == "context":
+            return []
        return [RETAIN_SCHEMA, RECALL_SCHEMA, REFLECT_SCHEMA]

    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
+        try:
+            client = self._get_client()
+        except Exception as e:
+            logger.warning("Hindsight client init failed: %s", e)
+            return json.dumps({"error": f"Hindsight client unavailable: {e}"})
+
        if tool_name == "hindsight_retain":
            content = args.get("content", "")
            if not content:
                return json.dumps({"error": "Missing required parameter: content"})
            context = args.get("context")
            try:
-                _run_in_thread(
-                    lambda: self._make_client().retain(
-                        bank_id=self._bank_id, content=content, context=context
-                    )
-                )
+                _run_sync(client.aretain(
+                    bank_id=self._bank_id, content=content, context=context
+                ))
                return json.dumps({"result": "Memory stored successfully."})
            except Exception as e:
+                logger.warning("hindsight_retain failed: %s", e)
                return json.dumps({"error": f"Failed to store memory: {e}"})

        elif tool_name == "hindsight_recall":
@@ -319,16 +455,15 @@ class HindsightMemoryProvider(MemoryProvider):
            if not query:
                return json.dumps({"error": "Missing required parameter: query"})
            try:
-                resp = _run_in_thread(
-                    lambda: self._make_client().recall(
-                        bank_id=self._bank_id, query=query, budget=self._budget
-                    )
-                )
+                resp = _run_sync(client.arecall(
+                    bank_id=self._bank_id, query=query, budget=self._budget
+                ))
                if not resp.results:
                    return json.dumps({"result": "No relevant memories found."})
                lines = [f"{i}. {r.text}" for i, r in enumerate(resp.results, 1)]
                return json.dumps({"result": "\n".join(lines)})
            except Exception as e:
+                logger.warning("hindsight_recall failed: %s", e)
                return json.dumps({"error": f"Failed to search memory: {e}"})

        elif tool_name == "hindsight_reflect":
@@ -336,21 +471,43 @@ class HindsightMemoryProvider(MemoryProvider):
            if not query:
                return json.dumps({"error": "Missing required parameter: query"})
            try:
-                resp = _run_in_thread(
-                    lambda: self._make_client().reflect(
-                        bank_id=self._bank_id, query=query, budget=self._budget
-                    )
-                )
+                resp = _run_sync(client.areflect(
+                    bank_id=self._bank_id, query=query, budget=self._budget
+                ))
                return json.dumps({"result": resp.text or "No relevant memories found."})
            except Exception as e:
+                logger.warning("hindsight_reflect failed: %s", e)
                return json.dumps({"error": f"Failed to reflect: {e}"})

        return json.dumps({"error": f"Unknown tool: {tool_name}"})

    def shutdown(self) -> None:
+        global _loop, _loop_thread
        for t in (self._prefetch_thread, self._sync_thread):
            if t and t.is_alive():
                t.join(timeout=5.0)
+        if self._client is not None:
+            try:
+                if self._mode == "local":
+                    # Use the public close() API. The RuntimeError from
+                    # aiohttp's "attached to a different loop" is expected
+                    # and harmless — the daemon keeps running independently.
+                    try:
+                        self._client.close()
+                    except RuntimeError:
+                        pass
+                else:
+                    _run_sync(self._client.aclose())
+            except Exception:
+                pass
+            self._client = None
+        # Stop the background event loop so no tasks are pending at exit
+        if _loop is not None and _loop.is_running():
+            _loop.call_soon_threadsafe(_loop.stop)
+            if _loop_thread is not None:
+                _loop_thread.join(timeout=5.0)
+            _loop = None
+            _loop_thread = None


 def register(ctx) -> None:
--- a/plugins/memory/hindsight/plugin.yaml
+++ b/plugins/memory/hindsight/plugin.yaml
@@ -3,6 +3,7 @@ version: 1.0.0
 description: "Hindsight — long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval."
 pip_dependencies:
  - hindsight-client
+  - hindsight-all
 requires_env:
  - HINDSIGHT_API_KEY
 hooks:
--- a/tests/agent/test_memory_provider.py
+++ b/tests/agent/test_memory_provider.py
@@ -547,3 +547,253 @@ class TestPluginMemoryDiscovery:
        """load_memory_provider returns None for unknown names."""
        from plugins.memory import load_memory_provider
        assert load_memory_provider("nonexistent_provider") is None
+
+
+# ---------------------------------------------------------------------------
+# Sequential dispatch routing tests
+# ---------------------------------------------------------------------------
+
+
+class TestSequentialDispatchRouting:
+    """Verify that memory provider tools are correctly routed through
+    memory_manager.has_tool() and handle_tool_call().
+
+    This is a regression test for a bug where _execute_tool_calls_sequential
+    in run_agent.py had its own inline dispatch chain that skipped
+    memory_manager.has_tool(), causing all memory provider tools to fall
+    through to the registry and return "Unknown tool". The fix added
+    has_tool() + handle_tool_call() to the sequential path.
+
+    These tests verify the memory_manager contract that both dispatch
+    paths rely on: has_tool() returns True for registered provider tools,
+    and handle_tool_call() routes to the correct provider.
+    """
+
+    def test_has_tool_returns_true_for_provider_tools(self):
+        """has_tool returns True for tools registered by memory providers."""
+        mgr = MemoryManager()
+        provider = FakeMemoryProvider("ext", tools=[
+            {"name": "ext_recall", "description": "Ext recall", "parameters": {}},
+            {"name": "ext_retain", "description": "Ext retain", "parameters": {}},
+        ])
+        mgr.add_provider(provider)
+
+        assert mgr.has_tool("ext_recall")
+        assert mgr.has_tool("ext_retain")
+
+    def test_has_tool_returns_false_for_builtin_tools(self):
+        """has_tool returns False for agent-level tools (terminal, memory, etc.)."""
+        mgr = MemoryManager()
+        provider = FakeMemoryProvider("ext", tools=[
+            {"name": "ext_recall", "description": "Ext", "parameters": {}},
+        ])
+        mgr.add_provider(provider)
+
+        assert not mgr.has_tool("terminal")
+        assert not mgr.has_tool("memory")
+        assert not mgr.has_tool("todo")
+        assert not mgr.has_tool("session_search")
+        assert not mgr.has_tool("nonexistent")
+
+    def test_handle_tool_call_routes_to_provider(self):
+        """handle_tool_call dispatches to the correct provider's handler."""
+        mgr = MemoryManager()
+        provider = FakeMemoryProvider("hindsight", tools=[
+            {"name": "hindsight_recall", "description": "Recall", "parameters": {}},
+            {"name": "hindsight_retain", "description": "Retain", "parameters": {}},
+        ])
+        mgr.add_provider(provider)
+
+        result = json.loads(mgr.handle_tool_call("hindsight_recall", {"query": "alice"}))
+        assert result["handled"] == "hindsight_recall"
+        assert result["args"] == {"query": "alice"}
+
+    def test_handle_tool_call_unknown_returns_error(self):
+        """handle_tool_call returns error for tools not in any provider."""
+        mgr = MemoryManager()
+        provider = FakeMemoryProvider("ext", tools=[
+            {"name": "ext_recall", "description": "Ext", "parameters": {}},
+        ])
+        mgr.add_provider(provider)
+
+        result = json.loads(mgr.handle_tool_call("terminal", {"command": "ls"}))
+        assert "error" in result
+
+    def test_multiple_providers_route_to_correct_one(self):
+        """Tools from different providers route to the right handler."""
+        mgr = MemoryManager()
+        builtin = FakeMemoryProvider("builtin", tools=[
+            {"name": "builtin_tool", "description": "Builtin", "parameters": {}},
+        ])
+        external = FakeMemoryProvider("hindsight", tools=[
+            {"name": "hindsight_recall", "description": "Recall", "parameters": {}},
+        ])
+        mgr.add_provider(builtin)
+        mgr.add_provider(external)
+
+        r1 = json.loads(mgr.handle_tool_call("builtin_tool", {}))
+        assert r1["handled"] == "builtin_tool"
+
+        r2 = json.loads(mgr.handle_tool_call("hindsight_recall", {"query": "test"}))
+        assert r2["handled"] == "hindsight_recall"
+
+    def test_tool_names_include_all_providers(self):
+        """get_all_tool_names returns tools from all registered providers."""
+        mgr = MemoryManager()
+        builtin = FakeMemoryProvider("builtin", tools=[
+            {"name": "builtin_tool", "description": "B", "parameters": {}},
+        ])
+        external = FakeMemoryProvider("ext", tools=[
+            {"name": "ext_recall", "description": "E1", "parameters": {}},
+            {"name": "ext_retain", "description": "E2", "parameters": {}},
+        ])
+        mgr.add_provider(builtin)
+        mgr.add_provider(external)
+
+        names = mgr.get_all_tool_names()
+        assert names == {"builtin_tool", "ext_recall", "ext_retain"}
+
+
+# ---------------------------------------------------------------------------
+# Setup wizard field filtering tests (when clause and default_from)
+# ---------------------------------------------------------------------------
+
+
+class TestSetupFieldFiltering:
+    """Test the 'when' clause and 'default_from' logic used by the
+    memory setup wizard in hermes_cli/memory_setup.py.
+
+    These features are generic — any memory plugin can use them in
+    get_config_schema(). Currently used by the hindsight plugin.
+    """
+
+    def _filter_fields(self, schema, provider_config):
+        """Simulate the setup wizard's field filtering logic.
+
+        Returns list of (key, effective_default) for fields that pass
+        the 'when' filter.
+        """
+        results = []
+        for field in schema:
+            key = field["key"]
+            default = field.get("default")
+
+            # Dynamic default
+            default_from = field.get("default_from")
+            if default_from and isinstance(default_from, dict):
+                ref_field = default_from.get("field", "")
+                ref_map = default_from.get("map", {})
+                ref_value = provider_config.get(ref_field, "")
+                if ref_value and ref_value in ref_map:
+                    default = ref_map[ref_value]
+
+            # When clause
+            when = field.get("when")
+            if when and isinstance(when, dict):
+                if not all(provider_config.get(k) == v for k, v in when.items()):
+                    continue
+
+            results.append((key, default))
+        return results
+
+    def test_when_clause_filters_fields(self):
+        """Fields with 'when' are skipped if the condition doesn't match."""
+        schema = [
+            {"key": "mode", "default": "cloud"},
+            {"key": "api_url", "default": "https://api.example.com", "when": {"mode": "cloud"}},
+            {"key": "api_key", "default": None, "when": {"mode": "cloud"}},
+            {"key": "llm_provider", "default": "openai", "when": {"mode": "local"}},
+            {"key": "llm_model", "default": "gpt-4o-mini", "when": {"mode": "local"}},
+            {"key": "budget", "default": "mid"},
+        ]
+
+        # Cloud mode: should see mode, api_url, api_key, budget
+        cloud_fields = self._filter_fields(schema, {"mode": "cloud"})
+        cloud_keys = [k for k, _ in cloud_fields]
+        assert cloud_keys == ["mode", "api_url", "api_key", "budget"]
+
+        # Local mode: should see mode, llm_provider, llm_model, budget
+        local_fields = self._filter_fields(schema, {"mode": "local"})
+        local_keys = [k for k, _ in local_fields]
+        assert local_keys == ["mode", "llm_provider", "llm_model", "budget"]
+
+    def test_when_clause_no_condition_always_shown(self):
+        """Fields without 'when' are always included."""
+        schema = [
+            {"key": "bank_id", "default": "hermes"},
+            {"key": "budget", "default": "mid"},
+        ]
+        fields = self._filter_fields(schema, {"mode": "cloud"})
+        assert [k for k, _ in fields] == ["bank_id", "budget"]
+
+    def test_default_from_resolves_dynamic_default(self):
+        """default_from looks up the default from another field's value."""
+        provider_models = {
+            "openai": "gpt-4o-mini",
+            "groq": "openai/gpt-oss-120b",
+            "anthropic": "claude-haiku-4-5",
+        }
+        schema = [
+            {"key": "llm_provider", "default": "openai"},
+            {"key": "llm_model", "default": "gpt-4o-mini",
+             "default_from": {"field": "llm_provider", "map": provider_models}},
+        ]
+
+        # Groq selected: model should default to groq's default
+        fields = self._filter_fields(schema, {"llm_provider": "groq"})
+        model_default = dict(fields)["llm_model"]
+        assert model_default == "openai/gpt-oss-120b"
+
+        # Anthropic selected
+        fields = self._filter_fields(schema, {"llm_provider": "anthropic"})
+        model_default = dict(fields)["llm_model"]
+        assert model_default == "claude-haiku-4-5"
+
+    def test_default_from_falls_back_to_static_default(self):
+        """default_from falls back to static default if provider not in map."""
+        schema = [
+            {"key": "llm_model", "default": "gpt-4o-mini",
+             "default_from": {"field": "llm_provider", "map": {"groq": "openai/gpt-oss-120b"}}},
+        ]
+
+        # Unknown provider: should fall back to static default
+        fields = self._filter_fields(schema, {"llm_provider": "unknown_provider"})
+        model_default = dict(fields)["llm_model"]
+        assert model_default == "gpt-4o-mini"
+
+    def test_default_from_with_no_ref_value(self):
+        """default_from keeps static default if referenced field is not set."""
+        schema = [
+            {"key": "llm_model", "default": "gpt-4o-mini",
+             "default_from": {"field": "llm_provider", "map": {"groq": "openai/gpt-oss-120b"}}},
+        ]
+
+        # No provider set at all
+        fields = self._filter_fields(schema, {})
+        model_default = dict(fields)["llm_model"]
+        assert model_default == "gpt-4o-mini"
+
+    def test_when_and_default_from_combined(self):
+        """when clause and default_from work together correctly."""
+        provider_models = {"groq": "openai/gpt-oss-120b", "openai": "gpt-4o-mini"}
+        schema = [
+            {"key": "mode", "default": "local"},
+            {"key": "llm_provider", "default": "openai", "when": {"mode": "local"}},
+            {"key": "llm_model", "default": "gpt-4o-mini",
+             "default_from": {"field": "llm_provider", "map": provider_models},
+             "when": {"mode": "local"}},
+            {"key": "api_url", "default": "https://api.example.com", "when": {"mode": "cloud"}},
+        ]
+
+        # Local + groq: should see llm_model with groq default, no api_url
+        fields = self._filter_fields(schema, {"mode": "local", "llm_provider": "groq"})
+        keys = [k for k, _ in fields]
+        assert "llm_model" in keys
+        assert "api_url" not in keys
+        assert dict(fields)["llm_model"] == "openai/gpt-oss-120b"
+
+        # Cloud: should see api_url, no llm_model
+        fields = self._filter_fields(schema, {"mode": "cloud"})
+        keys = [k for k, _ in fields]
+        assert "api_url" in keys
+        assert "llm_model" not in keys