Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
eeed075974 feat: add profile-scoped local Hindsight eval path
All checks were successful
Lint / lint (pull_request) Successful in 28s
Closes #1010
2026-04-22 11:12:08 -04:00
12 changed files with 486 additions and 621 deletions

View File

@@ -0,0 +1,69 @@
"""First-class context snapshot artifacts for live runtime memory evaluation."""
from __future__ import annotations
import json
import re
from pathlib import Path
from typing import Any
from hermes_constants import get_hermes_home
_SAFE_SEGMENT_RE = re.compile(r"[^A-Za-z0-9_.-]+")
class ContextSnapshotRecorder:
"""Write per-call prompt-composition artifacts for a Hermes session."""
def __init__(self, session_id: str, *, enabled: bool = False, base_dir: str | Path | None = None):
self.session_id = session_id or "session"
self.enabled = bool(enabled)
self.base_dir = Path(base_dir) if base_dir else get_hermes_home() / "reports" / "context_snapshots"
@property
def session_dir(self) -> Path:
safe_session = _SAFE_SEGMENT_RE.sub("_", self.session_id).strip("._") or "session"
return self.base_dir / safe_session
def record_call(
self,
api_call_count: int,
*,
system_prompt: str,
memory_provider_system_prompt: str = "",
memory_prefetch_raw: str = "",
memory_context_block: str = "",
api_user_message: str = "",
api_messages: list[dict[str, Any]] | None = None,
metadata: dict[str, Any] | None = None,
) -> Path | None:
if not self.enabled:
return None
call_dir = self.session_dir / f"call_{api_call_count:03d}"
call_dir.mkdir(parents=True, exist_ok=True)
self._write_text(call_dir / "system_prompt.txt", system_prompt or "")
self._write_text(call_dir / "memory_provider_system_prompt.txt", memory_provider_system_prompt or "")
self._write_text(call_dir / "memory_prefetch_raw.txt", memory_prefetch_raw or "")
self._write_text(call_dir / "memory_context_block.txt", memory_context_block or "")
self._write_text(call_dir / "api_user_message.txt", api_user_message or "")
self._write_json(call_dir / "api_messages.json", api_messages or [])
self._write_json(
call_dir / "metadata.json",
{
"session_id": self.session_id,
"api_call_count": api_call_count,
**(metadata or {}),
},
)
return call_dir
@staticmethod
def _write_text(path: Path, content: str) -> None:
path.write_text(content, encoding="utf-8")
@staticmethod
def _write_json(path: Path, payload: Any) -> None:
path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")

View File

@@ -0,0 +1,132 @@
# Hindsight local eval homes for live Hermes runtime testing
Issue: #1010
Parent: #985
This document defines a reproducible, profile-scoped evaluation layout for baseline / MemPalace / Hindsight comparisons without requiring Hindsight Cloud.
## Eval home layout
Use three separate `HERMES_HOME` directories so each run has isolated config, memory, sessions, and artifacts.
```text
~/.hermes/profiles/atlas-baseline/
config.yaml
.env
MEMORY.md
USER.md
reports/context_snapshots/
~/.hermes/profiles/atlas-mempalace/
config.yaml
.env
MEMORY.md
USER.md
reports/context_snapshots/
plugins/ # if a local MemPalace plugin is installed for this eval lane
~/.hermes/profiles/atlas-hindsight/
config.yaml
.env
MEMORY.md
USER.md
hindsight/config.json
reports/context_snapshots/
```
## Hindsight local config
The Hindsight provider already loads config from `$HERMES_HOME/hindsight/config.json` first. For the local eval lane, prefer `local_embedded` so Hermes can bring up a local Hindsight daemon without cloud signup.
Example `~/.hermes/profiles/atlas-hindsight/hindsight/config.json`:
```json
{
"mode": "local_embedded",
"memory_mode": "context",
"recall_prefetch_method": "recall",
"llm_provider": "ollama",
"llm_model": "gemma3:12b",
"api_url": "http://localhost:8888"
}
```
Notes:
- `local_embedded` avoids any Hindsight Cloud dependency.
- If `profile` is omitted, Hermes now derives a stable local Hindsight profile name from the active profile identity / `HERMES_HOME` instead of collapsing all local runs into the shared legacy `hermes` profile.
- `local_external` remains valid if you already run a local Hindsight server yourself.
## Runtime switching procedure
Switch by exporting `HERMES_HOME` before launching Hermes.
### 1. Baseline
```bash
export HERMES_HOME="$HOME/.hermes/profiles/atlas-baseline"
unset HERMES_CONTEXT_SNAPSHOTS
hermes chat
```
### 2. MemPalace lane
```bash
export HERMES_HOME="$HOME/.hermes/profiles/atlas-mempalace"
export HERMES_CONTEXT_SNAPSHOTS=1
hermes chat
```
### 3. Hindsight lane
```bash
export HERMES_HOME="$HOME/.hermes/profiles/atlas-hindsight"
export HERMES_CONTEXT_SNAPSHOTS=1
hermes chat
```
## Raw artifact capture
When `HERMES_CONTEXT_SNAPSHOTS=1` is enabled, Hermes writes first-class prompt-composition artifacts under the active home by default.
Artifact tree:
```text
$HERMES_HOME/reports/context_snapshots/<session-id>/call_001/
system_prompt.txt
memory_provider_system_prompt.txt
memory_prefetch_raw.txt
memory_context_block.txt
api_user_message.txt
api_messages.json
metadata.json
```
Minimum files a benchmark should inspect:
- `system_prompt.txt`
- `memory_prefetch_raw.txt`
- `memory_context_block.txt`
- `api_user_message.txt`
- `api_messages.json`
These prove:
- what the system prompt was
- what the provider prefetched
- what entered `<memory-context>`
- what the final API user message looked like
- what full payload reached the model
## Follow-on benchmark workflow
A benchmark issue can now consume this path without redoing integration work:
1. pick one eval home (`atlas-baseline`, `atlas-mempalace`, `atlas-hindsight`)
2. export the corresponding `HERMES_HOME`
3. run Hermes on the same prompt set
4. compare the snapshot artifacts in `reports/context_snapshots/`
5. score recall quality and answer quality separately
## Why this is sovereign
- no hosted Hindsight Cloud dependency is required
- the Hindsight config is profile-scoped under `hindsight/config.json`
- the runtime artifacts stay under the active `HERMES_HOME`
- switching between baseline / MemPalace / Hindsight is just a `HERMES_HOME` swap

View File

@@ -1,66 +0,0 @@
# Morning Review Packet Status — #949
Generated: 2026-04-22T14:57:44.332419+00:00
Epic: [EPIC: Morning review packet — Hermes harness features landed 2026-04-21](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/949)
## Summary
- Child QA issues tracked: 13
- Open child issues: 11
- Closed child issues: 2
- Open child issues already backed by PRs: 7
- Open child issues still unowned on forge: 4
## Child QA Matrix
| Issue | State | Open PRs | Title |
|------:|-------|----------|-------|
| #950 | open | — | [QA] Verify AI Gateway provider UX + attribution headers |
| #951 | open | — | [QA] Verify transport abstraction + AnthropicTransport wiring |
| #952 | open | — | [QA] Verify CLI voice beep toggle |
| #953 | open | [#1020](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1020) | [QA] Verify bundled skill scripts run out of the box |
| #954 | open | [#1021](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1021) | [QA] Verify maps skill guest_house / camp_site / bakery expansion |
| #955 | open | — | [QA] Verify KittenTTS local provider end-to-end |
| #956 | open | [#1018](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1018) | [QA] Verify numbered keyboard shortcuts for approval + clarify prompts |
| #957 | open | [#1015](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1015) | [QA] Verify optional adversarial-ux-test skill catalog flow |
| #958 | open | [#1016](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1016) | [QA] Verify /usage account limits in CLI + gateway |
| #959 | open | [#1014](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1014) | [QA] Verify OpenCode-Go curated catalog additions |
| #960 | open | [#1017](https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1017) | [QA] Verify patch 'did you mean?' suggestions |
| #961 | closed | — | [QA] Verify web dashboard update/restart action buttons |
| #962 | closed | — | [QA] Verify hardcoded-home path guard on burn/921 branch |
## Drift Signals
forge/main is still catching up to the upstream packet.
Active PR-backed child lanes:
- #953 -> #1020 ([QA] Verify bundled skill scripts run out of the box)
- #954 -> #1021 ([QA] Verify maps skill guest_house / camp_site / bakery expansion)
- #956 -> #1018 ([QA] Verify numbered keyboard shortcuts for approval + clarify prompts)
- #957 -> #1015 ([QA] Verify optional adversarial-ux-test skill catalog flow)
- #958 -> #1016 ([QA] Verify /usage account limits in CLI + gateway)
- #959 -> #1014 ([QA] Verify OpenCode-Go curated catalog additions)
- #960 -> #1017 ([QA] Verify patch 'did you mean?' suggestions)
## Unowned Open QA Issues
- #950 [QA] Verify AI Gateway provider UX + attribution headers
- #951 [QA] Verify transport abstraction + AnthropicTransport wiring
- #952 [QA] Verify CLI voice beep toggle
- #955 [QA] Verify KittenTTS local provider end-to-end
## Decomposition Follow-Ups
- #965 [open] [EPIC: Morning review packet — Hermes harness features landed 2026-04-21] Phase 1: Landscape Analysis & Scaffolding
- #966 [open] [EPIC: Morning review packet — Hermes harness features landed 2026-04-21] Phase 2: Core Logic Implementation
- #967 [closed] [EPIC: Morning review packet — Hermes harness features landed 2026-04-21] Phase 3: Poka-yoke Integration & Fleet Verification
## Conclusion
Refs #949 only. This epic remains open until every child QA issue has a truthful PASS/FAIL outcome, attached evidence, and any upstream/main versus forge/main drift is resolved or explicitly documented.
## Regeneration
```bash
python3 scripts/morning_review_packet_status.py --fetch-live --json-out docs/morning-review-packet-2026-04-21.snapshot.json --markdown-out docs/morning-review-packet-2026-04-21-status.md
```

View File

@@ -1,172 +0,0 @@
{
"generated_at": "2026-04-22T14:57:44.332419+00:00",
"repo": "Timmy_Foundation/hermes-agent",
"epic": {
"number": 949,
"title": "EPIC: Morning review packet \u2014 Hermes harness features landed 2026-04-21",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/949"
},
"children": [
{
"number": 950,
"title": "[QA] Verify AI Gateway provider UX + attribution headers",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/950",
"open_prs": []
},
{
"number": 951,
"title": "[QA] Verify transport abstraction + AnthropicTransport wiring",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/951",
"open_prs": []
},
{
"number": 952,
"title": "[QA] Verify CLI voice beep toggle",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/952",
"open_prs": []
},
{
"number": 953,
"title": "[QA] Verify bundled skill scripts run out of the box",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/953",
"open_prs": [
{
"number": 1020,
"title": "fix: ship bundled skill scripts executable",
"head": "fix/953",
"url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1020"
}
]
},
{
"number": 954,
"title": "[QA] Verify maps skill guest_house / camp_site / bakery expansion",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/954",
"open_prs": [
{
"number": 1021,
"title": "feat: sync maps skill and verify guest_house/camp_site/bakery (#954)",
"head": "fix/954",
"url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1021"
}
]
},
{
"number": 955,
"title": "[QA] Verify KittenTTS local provider end-to-end",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/955",
"open_prs": []
},
{
"number": 956,
"title": "[QA] Verify numbered keyboard shortcuts for approval + clarify prompts",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/956",
"open_prs": [
{
"number": 1018,
"title": "fix: add numbered approval and clarify shortcuts (#956)",
"head": "fix/956",
"url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1018"
}
]
},
{
"number": 957,
"title": "[QA] Verify optional adversarial-ux-test skill catalog flow",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/957",
"open_prs": [
{
"number": 1015,
"title": "feat(skills): backport adversarial-ux-test optional skill",
"head": "fix/957",
"url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1015"
}
]
},
{
"number": 958,
"title": "[QA] Verify /usage account limits in CLI + gateway",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/958",
"open_prs": [
{
"number": 1016,
"title": "fix: restore /usage account limits in CLI + gateway (#958)",
"head": "fix/958",
"url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1016"
}
]
},
{
"number": 959,
"title": "[QA] Verify OpenCode-Go curated catalog additions",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/959",
"open_prs": [
{
"number": 1014,
"title": "fix(opencode-go): restore curated catalog additions",
"head": "fix/959",
"url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1014"
}
]
},
{
"number": 960,
"title": "[QA] Verify patch 'did you mean?' suggestions",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/960",
"open_prs": [
{
"number": 1017,
"title": "fix(patch): port and verify did-you-mean suggestions (#960)",
"head": "fix/960",
"url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/1017"
}
]
},
{
"number": 961,
"title": "[QA] Verify web dashboard update/restart action buttons",
"state": "closed",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/961",
"open_prs": []
},
{
"number": 962,
"title": "[QA] Verify hardcoded-home path guard on burn/921 branch",
"state": "closed",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/962",
"open_prs": []
}
],
"decomposition_issues": [
{
"number": 965,
"title": "[EPIC: Morning review packet \u2014 Hermes harness features landed 2026-04-21] Phase 1: Landscape Analysis & Scaffolding",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/965"
},
{
"number": 966,
"title": "[EPIC: Morning review packet \u2014 Hermes harness features landed 2026-04-21] Phase 2: Core Logic Implementation",
"state": "open",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/966"
},
{
"number": 967,
"title": "[EPIC: Morning review packet \u2014 Hermes harness features landed 2026-04-21] Phase 3: Poka-yoke Integration & Fleet Verification",
"state": "closed",
"html_url": "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/issues/967"
}
]
}

View File

@@ -178,6 +178,25 @@ def _load_config() -> dict:
}
def _derive_local_profile_name(agent_identity: str = "", hermes_home: str = "") -> str:
"""Return a stable profile name for local embedded Hindsight storage.
Prefer the active Hermes profile identity when available, otherwise fall back
to the basename of the active HERMES_HOME path. This prevents all local
Hindsight eval homes from sharing the legacy default profile name "hermes".
"""
from pathlib import Path
import re
raw = (agent_identity or "").strip()
if not raw and hermes_home:
raw = Path(hermes_home).name.strip()
if not raw:
raw = "hermes"
safe = re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip(".-_")
return safe or "hermes"
# ---------------------------------------------------------------------------
# MemoryProvider implementation
# ---------------------------------------------------------------------------
@@ -468,6 +487,8 @@ class HindsightMemoryProvider(MemoryProvider):
def initialize(self, session_id: str, **kwargs) -> None:
self._session_id = session_id
hermes_home = str(kwargs.get("hermes_home") or "")
agent_identity = str(kwargs.get("agent_identity") or "")
# Check client version and auto-upgrade if needed
try:
@@ -500,6 +521,11 @@ class HindsightMemoryProvider(MemoryProvider):
# "local" is a legacy alias for "local_embedded"
if self._mode == "local":
self._mode = "local_embedded"
if self._mode == "local_embedded" and not self._config.get("profile"):
self._config["profile"] = _derive_local_profile_name(
agent_identity=agent_identity,
hermes_home=hermes_home,
)
self._api_key = self._config.get("apiKey") or self._config.get("api_key") or os.environ.get("HINDSIGHT_API_KEY", "")
default_url = _DEFAULT_LOCAL_URL if self._mode in ("local_embedded", "local_external") else _DEFAULT_API_URL
self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)

View File

@@ -604,6 +604,8 @@ class AIAgent:
checkpoint_max_snapshots: int = 50,
pass_session_id: bool = False,
persist_session: bool = True,
context_snapshots_enabled: bool | None = None,
context_snapshots_dir: str | None = None,
):
"""
Initialize the AI Agent.
@@ -1129,6 +1131,43 @@ class AIAgent:
except Exception:
_agent_cfg = {}
def _is_enabled(value):
if isinstance(value, bool):
return value
return str(value).strip().lower() in {"1", "true", "yes", "on"}
_debug_cfg = _agent_cfg.get("debug", {}) if isinstance(_agent_cfg, dict) else {}
if not isinstance(_debug_cfg, dict):
_debug_cfg = {}
_snapshot_cfg = _debug_cfg.get("context_snapshots", {})
if not isinstance(_snapshot_cfg, dict):
_snapshot_cfg = {}
_snapshots_env = os.getenv("HERMES_CONTEXT_SNAPSHOTS")
_snapshots_dir_env = os.getenv("HERMES_CONTEXT_SNAPSHOTS_DIR")
if context_snapshots_enabled is None:
if _snapshots_env is not None:
self._context_snapshots_enabled = _is_enabled(_snapshots_env)
else:
self._context_snapshots_enabled = _is_enabled(_snapshot_cfg.get("enabled", False))
else:
self._context_snapshots_enabled = bool(context_snapshots_enabled)
self._context_snapshots_dir = (
context_snapshots_dir
or _snapshots_dir_env
or _snapshot_cfg.get("dir")
or None
)
try:
from agent.context_snapshots import ContextSnapshotRecorder
self._context_snapshot_recorder = ContextSnapshotRecorder(
session_id=self.session_id,
enabled=self._context_snapshots_enabled,
base_dir=self._context_snapshots_dir,
)
except Exception as _snapshot_err:
logger.debug("Context snapshot recorder init failed: %s", _snapshot_err)
self._context_snapshot_recorder = None
# Persistent memory (MEMORY.md + USER.md) -- loaded from disk
self._memory_store = None
self._memory_enabled = False
@@ -8144,12 +8183,17 @@ class AIAgent:
# Use original_user_message (clean input) — user_message may contain
# injected skill content that bloats / breaks provider queries.
_ext_prefetch_cache = ""
_memory_provider_prompt_cache = ""
if self._memory_manager:
try:
_query = original_user_message if isinstance(original_user_message, str) else ""
_ext_prefetch_cache = self._memory_manager.prefetch_all(_query) or ""
except Exception:
pass
try:
_memory_provider_prompt_cache = self._memory_manager.build_system_prompt() or ""
except Exception:
pass
while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) or self._budget_grace_call:
# Reset per-turn checkpoint dedup so each iteration can take one snapshot
@@ -8217,6 +8261,8 @@ class AIAgent:
# However, providers like Moonshot AI require a separate 'reasoning_content' field
# on assistant messages with tool_calls. We handle both cases here.
api_messages = []
_current_api_user_message = ""
_current_memory_context_block = ""
for idx, msg in enumerate(messages):
api_msg = msg.copy()
@@ -8231,12 +8277,15 @@ class AIAgent:
_fenced = build_memory_context_block(_ext_prefetch_cache)
if _fenced:
_injections.append(_fenced)
_current_memory_context_block = _fenced
if _plugin_user_context:
_injections.append(_plugin_user_context)
if _injections:
_base = api_msg.get("content", "")
if isinstance(_base, str):
api_msg["content"] = _base + "\n\n" + "\n\n".join(_injections)
if isinstance(api_msg.get("content"), str):
_current_api_user_message = api_msg["content"]
# For ALL assistant messages, pass reasoning back to the API
# This ensures multi-turn reasoning context is preserved
@@ -8271,7 +8320,13 @@ class AIAgent:
from agent.privacy_filter import PrivacyFilter
pf = PrivacyFilter()
# Sanitize messages before they reach the provider
api_messages = pf.sanitize_messages(api_messages)
_pf_result = pf.sanitize_messages(api_messages)
if isinstance(_pf_result, tuple):
api_messages, _pf_report = _pf_result
if getattr(pf, "last_report", None) is None:
pf.last_report = _pf_report
else:
api_messages = _pf_result
if pf.last_report and pf.last_report.had_redactions:
logger.info(f"Privacy Filter: Redacted sensitive data from turn payload. Details: {pf.last_report.summary()}")
except Exception as e:
@@ -8342,6 +8397,27 @@ class AIAgent:
new_tcs.append(tc)
am["tool_calls"] = new_tcs
if self._context_snapshot_recorder:
try:
self._context_snapshot_recorder.record_call(
api_call_count,
system_prompt=effective_system,
memory_provider_system_prompt=_memory_provider_prompt_cache,
memory_prefetch_raw=_ext_prefetch_cache,
memory_context_block=_current_memory_context_block,
api_user_message=_current_api_user_message,
api_messages=api_messages,
metadata={
"model": self.model,
"provider": self.provider,
"platform": self.platform or "",
"api_mode": self.api_mode,
"memory_providers": [p.name for p in getattr(self._memory_manager, "providers", [])],
},
)
except Exception as _snapshot_err:
logger.debug("Context snapshot capture failed: %s", _snapshot_err)
# Calculate approximate request size for logging
total_chars = sum(len(str(msg)) for msg in api_messages)
approx_tokens = estimate_messages_tokens_rough(api_messages)

View File

@@ -1,288 +0,0 @@
#!/usr/bin/env python3
"""Generate a grounded status report for hermes-agent morning review packet epic #949."""
from __future__ import annotations
import argparse
import base64
import json
import os
import re
import ssl
import urllib.request
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
BASE_API = "https://forge.alexanderwhitestone.com/api/v1"
REPO = "Timmy_Foundation/hermes-agent"
TOKEN_PATH = Path("~/.config/gitea/token").expanduser()
DEFAULT_JSON_OUT = Path("docs/morning-review-packet-2026-04-21.snapshot.json")
DEFAULT_MARKDOWN_OUT = Path("docs/morning-review-packet-2026-04-21-status.md")
def extract_issue_numbers(text: str) -> list[int]:
seen: set[int] = set()
numbers: list[int] = []
for match in re.finditer(r"#(\d+)", text or ""):
num = int(match.group(1))
if num not in seen:
seen.add(num)
numbers.append(num)
return numbers
def _auth_headers(token: str) -> list[dict[str, str]]:
basic = base64.b64encode(f"{token}:".encode()).decode()
return [
{"Authorization": f"token {token}", "Accept": "application/json"},
{"Authorization": f"Basic {basic}", "Accept": "application/json"},
]
def api_get(path: str, *, headers_options: list[dict[str, str]] | None = None) -> Any:
token = TOKEN_PATH.read_text(encoding="utf-8").strip()
headers_options = headers_options or _auth_headers(token)
ctx = ssl.create_default_context()
url = f"{BASE_API}{path}"
last_error: Exception | None = None
for headers in headers_options:
try:
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req, context=ctx, timeout=30) as resp:
return json.loads(resp.read().decode())
except Exception as exc: # pragma: no cover - exercised via live CLI use
last_error = exc
raise RuntimeError(f"GET {url} failed: {last_error}")
def issue_pr_matches(pr: dict[str, Any], issue_num: int) -> bool:
title = pr.get("title") or ""
body = pr.get("body") or ""
head = (pr.get("head") or {}).get("ref") or ""
exact_ref = re.compile(rf"(?<!\d)#{issue_num}(?!\d)")
body_ref = re.compile(rf"(?i)(closes|close|fixes|fix|resolves|resolve|refs|ref)\s+#?{issue_num}(?!\d)")
branch_variants = {
f"fix/{issue_num}",
f"issue-{issue_num}",
f"burn/{issue_num}",
f"fix/issue-{issue_num}",
}
return bool(
exact_ref.search(title)
or exact_ref.search(body)
or body_ref.search(body)
or head in branch_variants
)
def fetch_open_prs(*, headers_options: list[dict[str, str]]) -> list[dict[str, Any]]:
prs: list[dict[str, Any]] = []
page = 1
while True:
batch = api_get(
f"/repos/{REPO}/pulls?state=open&limit=100&page={page}",
headers_options=headers_options,
)
if not batch:
break
prs.extend(batch)
if len(batch) < 100:
break
page += 1
return prs
def fetch_live_snapshot(epic_issue_num: int = 949) -> dict[str, Any]:
token = TOKEN_PATH.read_text(encoding="utf-8").strip()
headers_options = _auth_headers(token)
epic = api_get(f"/repos/{REPO}/issues/{epic_issue_num}", headers_options=headers_options)
comments = api_get(f"/repos/{REPO}/issues/{epic_issue_num}/comments", headers_options=headers_options)
child_numbers = [n for n in extract_issue_numbers(epic.get("body") or "") if n != epic_issue_num]
decomposition_numbers = [
n
for comment in comments
for n in extract_issue_numbers(comment.get("body") or "")
if n not in child_numbers and n != epic_issue_num
]
open_prs = fetch_open_prs(headers_options=headers_options)
children = []
for number in child_numbers:
issue = api_get(f"/repos/{REPO}/issues/{number}", headers_options=headers_options)
matching_prs = [
{
"number": pr["number"],
"title": pr["title"],
"head": pr.get("head", {}).get("ref", ""),
"url": pr["html_url"],
}
for pr in open_prs
if issue_pr_matches(pr, number)
]
children.append(
{
"number": issue["number"],
"title": issue["title"],
"state": issue["state"],
"html_url": issue["html_url"],
"open_prs": matching_prs,
}
)
decomposition_issues = []
for number in decomposition_numbers:
issue = api_get(f"/repos/{REPO}/issues/{number}", headers_options=headers_options)
decomposition_issues.append(
{
"number": issue["number"],
"title": issue["title"],
"state": issue["state"],
"html_url": issue["html_url"],
}
)
return {
"generated_at": datetime.now(timezone.utc).isoformat(),
"repo": REPO,
"epic": {
"number": epic["number"],
"title": epic["title"],
"state": epic["state"],
"html_url": epic["html_url"],
},
"children": children,
"decomposition_issues": decomposition_issues,
}
def summarize_snapshot(snapshot: dict[str, Any]) -> dict[str, int]:
children = snapshot.get("children", [])
open_children = [issue for issue in children if issue.get("state") == "open"]
closed_children = [issue for issue in children if issue.get("state") == "closed"]
open_with_pr = [issue for issue in open_children if issue.get("open_prs")]
open_without_pr = [issue for issue in open_children if not issue.get("open_prs")]
return {
"total_children": len(children),
"open_children": len(open_children),
"closed_children": len(closed_children),
"open_with_pr": len(open_with_pr),
"open_without_pr": len(open_without_pr),
}
def render_markdown(snapshot: dict[str, Any]) -> str:
epic = snapshot["epic"]
children = snapshot.get("children", [])
summary = summarize_snapshot(snapshot)
open_with_pr = [issue for issue in children if issue.get("state") == "open" and issue.get("open_prs")]
open_without_pr = [issue for issue in children if issue.get("state") == "open" and not issue.get("open_prs")]
decomposition = snapshot.get("decomposition_issues", [])
lines = [
f"# Morning Review Packet Status — #{epic['number']}",
"",
f"Generated: {snapshot.get('generated_at', '')}",
f"Epic: [{epic['title']}]({epic.get('html_url', '')})",
"",
"## Summary",
"",
f"- Child QA issues tracked: {summary['total_children']}",
f"- Open child issues: {summary['open_children']}",
f"- Closed child issues: {summary['closed_children']}",
f"- Open child issues already backed by PRs: {summary['open_with_pr']}",
f"- Open child issues still unowned on forge: {summary['open_without_pr']}",
"",
"## Child QA Matrix",
"",
"| Issue | State | Open PRs | Title |",
"|------:|-------|----------|-------|",
]
for issue in children:
rendered_prs = []
for pr in issue.get("open_prs", []):
pr_num = pr.get("number", "?")
pr_url = pr.get("url") or pr.get("html_url") or ""
rendered_prs.append(f"[#{pr_num}]({pr_url})" if pr_url else f"#{pr_num}")
pr_text = ", ".join(rendered_prs) or ""
lines.append(
f"| #{issue['number']} | {issue['state']} | {pr_text} | {issue['title']} |"
)
lines.extend([
"",
"## Drift Signals",
"",
"forge/main is still catching up to the upstream packet.",
])
if open_with_pr:
lines.append("")
lines.append("Active PR-backed child lanes:")
for issue in open_with_pr:
pr_numbers = ", ".join(f"#{pr['number']}" for pr in issue.get("open_prs", []))
lines.append(f"- #{issue['number']} -> {pr_numbers} ({issue['title']})")
if open_without_pr:
lines.extend([
"",
"## Unowned Open QA Issues",
"",
])
for issue in open_without_pr:
lines.append(f"- #{issue['number']} {issue['title']}")
if decomposition:
lines.extend([
"",
"## Decomposition Follow-Ups",
"",
])
for issue in decomposition:
lines.append(f"- #{issue['number']} [{issue['state']}] {issue['title']}")
lines.extend([
"",
"## Conclusion",
"",
"Refs #949 only. This epic remains open until every child QA issue has a truthful PASS/FAIL outcome, attached evidence, and any upstream/main versus forge/main drift is resolved or explicitly documented.",
"",
"## Regeneration",
"",
"```bash",
"python3 scripts/morning_review_packet_status.py --fetch-live --json-out docs/morning-review-packet-2026-04-21.snapshot.json --markdown-out docs/morning-review-packet-2026-04-21-status.md",
"```",
])
return "\n".join(lines) + "\n"
def write_json(path: Path, data: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, indent=2) + "\n", encoding="utf-8")
def main() -> None:
parser = argparse.ArgumentParser(description="Generate grounded status docs for epic #949")
parser.add_argument("--fetch-live", action="store_true", help="Fetch the current packet state from Forge")
parser.add_argument("--snapshot", type=Path, help="Read a local JSON snapshot instead of hitting the API")
parser.add_argument("--json-out", type=Path, default=DEFAULT_JSON_OUT, help="Path to write JSON snapshot")
parser.add_argument("--markdown-out", type=Path, default=DEFAULT_MARKDOWN_OUT, help="Path to write markdown report")
args = parser.parse_args()
if args.fetch_live or not args.snapshot:
snapshot = fetch_live_snapshot()
else:
snapshot = json.loads(args.snapshot.read_text(encoding="utf-8"))
write_json(args.json_out, snapshot)
args.markdown_out.parent.mkdir(parents=True, exist_ok=True)
args.markdown_out.write_text(render_markdown(snapshot), encoding="utf-8")
print(args.markdown_out)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,43 @@
from pathlib import Path
from agent.context_snapshots import ContextSnapshotRecorder
def test_disabled_recorder_writes_nothing(tmp_path):
recorder = ContextSnapshotRecorder(session_id="session-1", enabled=False, base_dir=tmp_path)
out = recorder.record_call(
1,
system_prompt="system",
api_messages=[{"role": "user", "content": "hello"}],
)
assert out is None
assert not (tmp_path / "session-1").exists()
def test_enabled_recorder_writes_expected_artifacts(tmp_path):
recorder = ContextSnapshotRecorder(session_id="session-1", enabled=True, base_dir=tmp_path)
out = recorder.record_call(
1,
system_prompt="system prompt",
memory_provider_system_prompt="# Hindsight Memory\nActive.",
memory_prefetch_raw="- remembered fact",
memory_context_block="<memory-context>\nremembered\n</memory-context>",
api_user_message="What do I prefer?\n\n<memory-context>\nremembered\n</memory-context>",
api_messages=[
{"role": "system", "content": "system prompt"},
{"role": "user", "content": "What do I prefer?"},
],
metadata={"provider": "openai", "memory_providers": ["builtin", "hindsight"]},
)
assert out == tmp_path / "session-1" / "call_001"
assert (out / "system_prompt.txt").read_text(encoding="utf-8") == "system prompt"
assert (out / "memory_provider_system_prompt.txt").read_text(encoding="utf-8").startswith("# Hindsight Memory")
assert (out / "memory_prefetch_raw.txt").read_text(encoding="utf-8") == "- remembered fact"
assert "<memory-context>" in (out / "memory_context_block.txt").read_text(encoding="utf-8")
assert "What do I prefer?" in (out / "api_user_message.txt").read_text(encoding="utf-8")
assert (out / "api_messages.json").read_text(encoding="utf-8").startswith("[")
assert '"hindsight"' in (out / "metadata.json").read_text(encoding="utf-8")

View File

@@ -596,3 +596,26 @@ class TestAvailability:
monkeypatch.setenv("HINDSIGHT_MODE", "local")
p = HindsightMemoryProvider()
assert p.is_available()
def test_local_embedded_profile_defaults_to_agent_identity(self, tmp_path, monkeypatch):
config_path = tmp_path / "hindsight" / "config.json"
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps({
"mode": "local_embedded",
"llm_provider": "ollama",
"llm_model": "gemma3:12b",
}))
monkeypatch.setattr(
"plugins.memory.hindsight.get_hermes_home",
lambda: tmp_path,
)
p = HindsightMemoryProvider()
p.initialize(
session_id="test-session",
hermes_home=str(tmp_path / "profiles" / "atlas-hindsight"),
platform="cli",
agent_identity="atlas-hindsight",
)
assert p._config["profile"] == "atlas-hindsight"

View File

@@ -0,0 +1,94 @@
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import importlib
import sys
import types
def _make_tool_defs(*names: str) -> list:
return [
{
"type": "function",
"function": {
"name": n,
"description": f"{n} tool",
"parameters": {"type": "object", "properties": {}},
},
}
for n in names
]
def _mock_response(content="Done", finish_reason="stop"):
msg = SimpleNamespace(content=content, tool_calls=None)
choice = SimpleNamespace(message=msg, finish_reason=finish_reason)
return SimpleNamespace(choices=[choice], usage=SimpleNamespace(prompt_tokens=1, completion_tokens=1, total_tokens=2))
def _load_ai_agent():
sys.modules.setdefault("agent.auxiliary_client", types.SimpleNamespace(call_llm=lambda *a, **k: ""))
run_agent = importlib.import_module("run_agent")
return run_agent.AIAgent
def test_run_conversation_writes_context_snapshot_artifacts(tmp_path):
AIAgent = _load_ai_agent()
class _FakePrivacyFilter:
def __init__(self):
self.last_report = None
def sanitize_messages(self, messages):
return list(messages)
with (
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
patch("hermes_cli.plugins.invoke_hook", return_value=[]),
patch.dict(sys.modules, {"agent.privacy_filter": types.SimpleNamespace(PrivacyFilter=_FakePrivacyFilter)}),
):
agent = AIAgent(
api_key="test-key-1234567890",
base_url="https://example.com/v1",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
context_snapshots_enabled=True,
context_snapshots_dir=str(tmp_path),
)
agent.client = MagicMock()
agent.client.chat.completions.create.return_value = _mock_response(content="Done")
agent._build_system_prompt = MagicMock(return_value="Core system prompt")
agent._memory_manager = MagicMock()
agent._memory_manager.prefetch_all.return_value = "- remembered preference"
agent._memory_manager.build_system_prompt.return_value = "# Hindsight Memory\nActive."
agent._memory_manager.providers = [
SimpleNamespace(name="builtin"),
SimpleNamespace(name="hindsight"),
]
result = agent.run_conversation("What do I prefer?")
assert result["final_response"] == "Done"
call_dir = tmp_path / agent.session_id / "call_001"
assert call_dir.exists()
assert (call_dir / "system_prompt.txt").read_text(encoding="utf-8") == "Core system prompt"
assert (call_dir / "memory_provider_system_prompt.txt").read_text(encoding="utf-8").startswith("# Hindsight Memory")
assert (call_dir / "memory_prefetch_raw.txt").read_text(encoding="utf-8") == "- remembered preference"
assert "<memory-context>" in (call_dir / "memory_context_block.txt").read_text(encoding="utf-8")
api_user_message = (call_dir / "api_user_message.txt").read_text(encoding="utf-8")
assert "What do I prefer?" in api_user_message
assert "remembered preference" in api_user_message
api_messages = (call_dir / "api_messages.json").read_text(encoding="utf-8")
assert '"role": "system"' in api_messages
assert '"role": "user"' in api_messages
metadata = (call_dir / "metadata.json").read_text(encoding="utf-8")
assert '"hindsight"' in metadata

View File

@@ -0,0 +1,22 @@
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
DOC = ROOT / "docs" / "hindsight-local-eval.md"
def test_hindsight_local_eval_doc_exists_and_covers_switching():
assert DOC.exists(), "missing Hindsight local eval doc"
text = DOC.read_text(encoding="utf-8")
for snippet in (
"atlas-baseline",
"atlas-mempalace",
"atlas-hindsight",
"HERMES_HOME",
"HERMES_CONTEXT_SNAPSHOTS",
"memory_prefetch_raw.txt",
"api_user_message.txt",
"local_embedded",
"hindsight/config.json",
):
assert snippet in text

View File

@@ -1,94 +0,0 @@
"""Tests for the morning review packet status report generator."""
from __future__ import annotations
import importlib.util
from pathlib import Path
SCRIPT_PATH = Path(__file__).resolve().parents[1] / "scripts" / "morning_review_packet_status.py"
DOC_PATH = Path(__file__).resolve().parents[1] / "docs" / "morning-review-packet-2026-04-21-status.md"
def load_module():
assert SCRIPT_PATH.exists(), f"missing status script: {SCRIPT_PATH}"
spec = importlib.util.spec_from_file_location("morning_review_packet_status_test", SCRIPT_PATH)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module
def sample_snapshot():
return {
"epic": {"number": 949, "title": "Morning review packet", "state": "open"},
"children": [
{
"number": 950,
"title": "Verify AI Gateway provider UX + attribution headers",
"state": "open",
"open_prs": [],
},
{
"number": 954,
"title": "Verify maps skill guest_house / camp_site / bakery expansion",
"state": "open",
"open_prs": [
{"number": 1021, "head": "fix/954", "title": "feat: sync maps skill and verify guest_house/camp_site/bakery (#954)"}
],
},
{
"number": 961,
"title": "Verify web dashboard update/restart action buttons",
"state": "closed",
"open_prs": [],
},
],
"decomposition_issues": [
{"number": 965, "title": "Phase 1: Landscape Analysis & Scaffolding", "state": "open"},
{"number": 967, "title": "Phase 3: Poka-yoke Integration & Fleet Verification", "state": "closed"},
],
}
def test_extract_child_issue_numbers_from_epic_body():
module = load_module()
body = """
- [ ] #950 one
- [ ] #951 two
- [ ] #962 three
"""
assert module.extract_issue_numbers(body) == [950, 951, 962]
def test_summarize_snapshot_counts_open_closed_and_pr_backing():
module = load_module()
summary = module.summarize_snapshot(sample_snapshot())
assert summary["total_children"] == 3
assert summary["open_children"] == 2
assert summary["closed_children"] == 1
assert summary["open_with_pr"] == 1
assert summary["open_without_pr"] == 1
def test_render_markdown_includes_issue_matrix_and_drift_sections():
module = load_module()
md = module.render_markdown(sample_snapshot())
assert "# Morning Review Packet Status — #949" in md
assert "## Child QA Matrix" in md
assert "#950" in md
assert "#954" in md
assert "#1021" in md
assert "## Unowned Open QA Issues" in md
assert "## Drift Signals" in md
assert "forge/main is still catching up to the upstream packet" in md
def test_committed_status_doc_exists_and_mentions_live_examples():
assert DOC_PATH.exists(), f"missing generated status doc: {DOC_PATH}"
text = DOC_PATH.read_text(encoding="utf-8")
assert "# Morning Review Packet Status — #949" in text
assert "#954" in text
assert "#1021" in text
assert "#950" in text