Compare commits
1 Commits
mimo/code/
...
mimo/creat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7dfb8a5e6 |
Binary file not shown.
@@ -60,6 +60,23 @@ If the heartbeat is older than --stale-threshold seconds, the
|
|||||||
mind is considered dead even if the process is still running
|
mind is considered dead even if the process is still running
|
||||||
(e.g., hung on a blocking call).
|
(e.g., hung on a blocking call).
|
||||||
|
|
||||||
|
KIMI HEARTBEAT
|
||||||
|
==============
|
||||||
|
The Kimi triage pipeline writes a cron heartbeat file after each run:
|
||||||
|
|
||||||
|
/var/run/bezalel/heartbeats/kimi-heartbeat.last
|
||||||
|
(fallback: ~/.bezalel/heartbeats/kimi-heartbeat.last)
|
||||||
|
{
|
||||||
|
"job": "kimi-heartbeat",
|
||||||
|
"timestamp": 1711843200.0,
|
||||||
|
"interval_seconds": 900,
|
||||||
|
"pid": 12345,
|
||||||
|
"status": "ok"
|
||||||
|
}
|
||||||
|
|
||||||
|
If the heartbeat is stale (>2x declared interval), the watchdog reports
|
||||||
|
a Kimi Heartbeat failure alongside the other checks.
|
||||||
|
|
||||||
ZERO DEPENDENCIES
|
ZERO DEPENDENCIES
|
||||||
=================
|
=================
|
||||||
Pure stdlib. No pip installs. Same machine as the nexus.
|
Pure stdlib. No pip installs. Same machine as the nexus.
|
||||||
@@ -104,6 +121,10 @@ DEFAULT_HEARTBEAT_PATH = Path.home() / ".nexus" / "heartbeat.json"
|
|||||||
DEFAULT_STALE_THRESHOLD = 300 # 5 minutes without a heartbeat = dead
|
DEFAULT_STALE_THRESHOLD = 300 # 5 minutes without a heartbeat = dead
|
||||||
DEFAULT_INTERVAL = 60 # seconds between checks in watch mode
|
DEFAULT_INTERVAL = 60 # seconds between checks in watch mode
|
||||||
|
|
||||||
|
# Kimi Heartbeat — cron job heartbeat file written by the triage pipeline
|
||||||
|
KIMI_HEARTBEAT_JOB = "kimi-heartbeat"
|
||||||
|
KIMI_HEARTBEAT_STALE_MULTIPLIER = 2.0 # stale at 2x declared interval
|
||||||
|
|
||||||
GITEA_URL = os.environ.get("GITEA_URL", "https://forge.alexanderwhitestone.com")
|
GITEA_URL = os.environ.get("GITEA_URL", "https://forge.alexanderwhitestone.com")
|
||||||
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
|
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
|
||||||
GITEA_REPO = os.environ.get("NEXUS_REPO", "Timmy_Foundation/the-nexus")
|
GITEA_REPO = os.environ.get("NEXUS_REPO", "Timmy_Foundation/the-nexus")
|
||||||
@@ -345,6 +366,93 @@ def check_syntax_health() -> CheckResult:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_kimi_heartbeat(
|
||||||
|
job: str = KIMI_HEARTBEAT_JOB,
|
||||||
|
stale_multiplier: float = KIMI_HEARTBEAT_STALE_MULTIPLIER,
|
||||||
|
) -> CheckResult:
|
||||||
|
"""Check if the Kimi Heartbeat cron job is alive.
|
||||||
|
|
||||||
|
Reads the ``<job>.last`` file from the standard Bezalel heartbeat
|
||||||
|
directory (``/var/run/bezalel/heartbeats/`` or fallback
|
||||||
|
``~/.bezalel/heartbeats/``). The file is written atomically by the
|
||||||
|
cron_heartbeat module after each successful triage pipeline run.
|
||||||
|
|
||||||
|
A job is stale when:
|
||||||
|
``time.time() - timestamp > stale_multiplier * interval_seconds``
|
||||||
|
(same rule used by ``check_cron_heartbeats.py``).
|
||||||
|
"""
|
||||||
|
# Resolve heartbeat directory — same logic as cron_heartbeat._resolve
|
||||||
|
primary = Path("/var/run/bezalel/heartbeats")
|
||||||
|
fallback = Path.home() / ".bezalel" / "heartbeats"
|
||||||
|
env_dir = os.environ.get("BEZALEL_HEARTBEAT_DIR")
|
||||||
|
if env_dir:
|
||||||
|
hb_dir = Path(env_dir)
|
||||||
|
elif primary.exists():
|
||||||
|
hb_dir = primary
|
||||||
|
elif fallback.exists():
|
||||||
|
hb_dir = fallback
|
||||||
|
else:
|
||||||
|
return CheckResult(
|
||||||
|
name="Kimi Heartbeat",
|
||||||
|
healthy=False,
|
||||||
|
message="Heartbeat directory not found — no triage pipeline deployed yet",
|
||||||
|
details={"searched": [str(primary), str(fallback)]},
|
||||||
|
)
|
||||||
|
|
||||||
|
hb_file = hb_dir / f"{job}.last"
|
||||||
|
if not hb_file.exists():
|
||||||
|
return CheckResult(
|
||||||
|
name="Kimi Heartbeat",
|
||||||
|
healthy=False,
|
||||||
|
message=f"No heartbeat file at {hb_file} — Kimi triage pipeline has never reported",
|
||||||
|
details={"path": str(hb_file)},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(hb_file.read_text())
|
||||||
|
except (json.JSONDecodeError, OSError) as e:
|
||||||
|
return CheckResult(
|
||||||
|
name="Kimi Heartbeat",
|
||||||
|
healthy=False,
|
||||||
|
message=f"Heartbeat file corrupt: {e}",
|
||||||
|
details={"path": str(hb_file), "error": str(e)},
|
||||||
|
)
|
||||||
|
|
||||||
|
timestamp = float(data.get("timestamp", 0))
|
||||||
|
interval = int(data.get("interval_seconds", 0))
|
||||||
|
raw_status = data.get("status", "unknown")
|
||||||
|
age = time.time() - timestamp
|
||||||
|
|
||||||
|
if interval <= 0:
|
||||||
|
# No declared interval — use raw timestamp age (30 min default)
|
||||||
|
interval = 1800
|
||||||
|
|
||||||
|
threshold = stale_multiplier * interval
|
||||||
|
is_stale = age > threshold
|
||||||
|
|
||||||
|
age_str = f"{int(age)}s" if age < 3600 else f"{int(age // 3600)}h {int((age % 3600) // 60)}m"
|
||||||
|
interval_str = f"{int(interval)}s" if interval < 3600 else f"{int(interval // 3600)}h {int((interval % 3600) // 60)}m"
|
||||||
|
|
||||||
|
if is_stale:
|
||||||
|
return CheckResult(
|
||||||
|
name="Kimi Heartbeat",
|
||||||
|
healthy=False,
|
||||||
|
message=(
|
||||||
|
f"Silent for {age_str} "
|
||||||
|
f"(threshold: {stale_multiplier}x {interval_str} = {int(threshold)}s). "
|
||||||
|
f"Status: {raw_status}"
|
||||||
|
),
|
||||||
|
details=data,
|
||||||
|
)
|
||||||
|
|
||||||
|
return CheckResult(
|
||||||
|
name="Kimi Heartbeat",
|
||||||
|
healthy=True,
|
||||||
|
message=f"Alive — last beat {age_str} ago (interval {interval_str}, status={raw_status})",
|
||||||
|
details=data,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ── Gitea alerting ───────────────────────────────────────────────────
|
# ── Gitea alerting ───────────────────────────────────────────────────
|
||||||
|
|
||||||
def _gitea_request(method: str, path: str, data: Optional[dict] = None) -> Any:
|
def _gitea_request(method: str, path: str, data: Optional[dict] = None) -> Any:
|
||||||
@@ -446,6 +554,7 @@ def run_health_checks(
|
|||||||
check_mind_process(),
|
check_mind_process(),
|
||||||
check_heartbeat(heartbeat_path, stale_threshold),
|
check_heartbeat(heartbeat_path, stale_threshold),
|
||||||
check_syntax_health(),
|
check_syntax_health(),
|
||||||
|
check_kimi_heartbeat(),
|
||||||
]
|
]
|
||||||
return HealthReport(timestamp=time.time(), checks=checks)
|
return HealthReport(timestamp=time.time(), checks=checks)
|
||||||
|
|
||||||
@@ -545,6 +654,14 @@ def main():
|
|||||||
"--json", action="store_true", dest="output_json",
|
"--json", action="store_true", dest="output_json",
|
||||||
help="Output results as JSON (for integration with other tools)",
|
help="Output results as JSON (for integration with other tools)",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--kimi-job", default=KIMI_HEARTBEAT_JOB,
|
||||||
|
help=f"Kimi heartbeat job name (default: {KIMI_HEARTBEAT_JOB})",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--kimi-stale-multiplier", type=float, default=KIMI_HEARTBEAT_STALE_MULTIPLIER,
|
||||||
|
help=f"Kimi heartbeat staleness multiplier (default: {KIMI_HEARTBEAT_STALE_MULTIPLIER})",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|||||||
@@ -29,8 +29,6 @@ from typing import Any, Callable, Optional
|
|||||||
|
|
||||||
import websockets
|
import websockets
|
||||||
|
|
||||||
from bannerlord_trace import BannerlordTraceLogger
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
# ═══════════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
@@ -267,13 +265,11 @@ class BannerlordHarness:
|
|||||||
desktop_command: Optional[list[str]] = None,
|
desktop_command: Optional[list[str]] = None,
|
||||||
steam_command: Optional[list[str]] = None,
|
steam_command: Optional[list[str]] = None,
|
||||||
enable_mock: bool = False,
|
enable_mock: bool = False,
|
||||||
enable_trace: bool = False,
|
|
||||||
):
|
):
|
||||||
self.hermes_ws_url = hermes_ws_url
|
self.hermes_ws_url = hermes_ws_url
|
||||||
self.desktop_command = desktop_command or DEFAULT_MCP_DESKTOP_COMMAND
|
self.desktop_command = desktop_command or DEFAULT_MCP_DESKTOP_COMMAND
|
||||||
self.steam_command = steam_command or DEFAULT_MCP_STEAM_COMMAND
|
self.steam_command = steam_command or DEFAULT_MCP_STEAM_COMMAND
|
||||||
self.enable_mock = enable_mock
|
self.enable_mock = enable_mock
|
||||||
self.enable_trace = enable_trace
|
|
||||||
|
|
||||||
# MCP clients
|
# MCP clients
|
||||||
self.desktop_mcp: Optional[MCPClient] = None
|
self.desktop_mcp: Optional[MCPClient] = None
|
||||||
@@ -288,9 +284,6 @@ class BannerlordHarness:
|
|||||||
self.cycle_count = 0
|
self.cycle_count = 0
|
||||||
self.running = False
|
self.running = False
|
||||||
|
|
||||||
# Session trace logger
|
|
||||||
self.trace_logger: Optional[BannerlordTraceLogger] = None
|
|
||||||
|
|
||||||
# ═══ LIFECYCLE ═══
|
# ═══ LIFECYCLE ═══
|
||||||
|
|
||||||
async def start(self) -> bool:
|
async def start(self) -> bool:
|
||||||
@@ -321,15 +314,6 @@ class BannerlordHarness:
|
|||||||
# Connect to Hermes WebSocket
|
# Connect to Hermes WebSocket
|
||||||
await self._connect_hermes()
|
await self._connect_hermes()
|
||||||
|
|
||||||
# Initialize trace logger if enabled
|
|
||||||
if self.enable_trace:
|
|
||||||
self.trace_logger = BannerlordTraceLogger(
|
|
||||||
harness_session_id=self.session_id,
|
|
||||||
hermes_session_id=self.session_id,
|
|
||||||
)
|
|
||||||
self.trace_logger.start_session()
|
|
||||||
log.info(f"Trace logger started: {self.trace_logger.trace_id}")
|
|
||||||
|
|
||||||
log.info("Harness initialized successfully")
|
log.info("Harness initialized successfully")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -338,12 +322,6 @@ class BannerlordHarness:
|
|||||||
self.running = False
|
self.running = False
|
||||||
log.info("Shutting down harness...")
|
log.info("Shutting down harness...")
|
||||||
|
|
||||||
# Finalize trace logger
|
|
||||||
if self.trace_logger:
|
|
||||||
manifest = self.trace_logger.finish_session()
|
|
||||||
log.info(f"Trace saved: {manifest.trace_file}")
|
|
||||||
log.info(f"Manifest: {self.trace_logger.manifest_file}")
|
|
||||||
|
|
||||||
if self.desktop_mcp:
|
if self.desktop_mcp:
|
||||||
self.desktop_mcp.stop()
|
self.desktop_mcp.stop()
|
||||||
if self.steam_mcp:
|
if self.steam_mcp:
|
||||||
@@ -729,11 +707,6 @@ class BannerlordHarness:
|
|||||||
self.cycle_count = iteration
|
self.cycle_count = iteration
|
||||||
log.info(f"\n--- ODA Cycle {iteration + 1}/{max_iterations} ---")
|
log.info(f"\n--- ODA Cycle {iteration + 1}/{max_iterations} ---")
|
||||||
|
|
||||||
# Start trace cycle
|
|
||||||
trace_cycle = None
|
|
||||||
if self.trace_logger:
|
|
||||||
trace_cycle = self.trace_logger.begin_cycle(iteration)
|
|
||||||
|
|
||||||
# 1. OBSERVE: Capture state
|
# 1. OBSERVE: Capture state
|
||||||
log.info("[OBSERVE] Capturing game state...")
|
log.info("[OBSERVE] Capturing game state...")
|
||||||
state = await self.capture_state()
|
state = await self.capture_state()
|
||||||
@@ -742,24 +715,11 @@ class BannerlordHarness:
|
|||||||
log.info(f" Screen: {state.visual.screen_size}")
|
log.info(f" Screen: {state.visual.screen_size}")
|
||||||
log.info(f" Players online: {state.game_context.current_players_online}")
|
log.info(f" Players online: {state.game_context.current_players_online}")
|
||||||
|
|
||||||
# Populate trace with observation data
|
|
||||||
if trace_cycle:
|
|
||||||
trace_cycle.screenshot_path = state.visual.screenshot_path or ""
|
|
||||||
trace_cycle.window_found = state.visual.window_found
|
|
||||||
trace_cycle.screen_size = list(state.visual.screen_size)
|
|
||||||
trace_cycle.mouse_position = list(state.visual.mouse_position)
|
|
||||||
trace_cycle.playtime_hours = state.game_context.playtime_hours
|
|
||||||
trace_cycle.players_online = state.game_context.current_players_online
|
|
||||||
trace_cycle.is_running = state.game_context.is_running
|
|
||||||
|
|
||||||
# 2. DECIDE: Get actions from decision function
|
# 2. DECIDE: Get actions from decision function
|
||||||
log.info("[DECIDE] Getting actions...")
|
log.info("[DECIDE] Getting actions...")
|
||||||
actions = decision_fn(state)
|
actions = decision_fn(state)
|
||||||
log.info(f" Decision returned {len(actions)} actions")
|
log.info(f" Decision returned {len(actions)} actions")
|
||||||
|
|
||||||
if trace_cycle:
|
|
||||||
trace_cycle.actions_planned = actions
|
|
||||||
|
|
||||||
# 3. ACT: Execute actions
|
# 3. ACT: Execute actions
|
||||||
log.info("[ACT] Executing actions...")
|
log.info("[ACT] Executing actions...")
|
||||||
results = []
|
results = []
|
||||||
@@ -771,13 +731,6 @@ class BannerlordHarness:
|
|||||||
if result.error:
|
if result.error:
|
||||||
log.info(f" Error: {result.error}")
|
log.info(f" Error: {result.error}")
|
||||||
|
|
||||||
if trace_cycle:
|
|
||||||
trace_cycle.actions_executed.append(result.to_dict())
|
|
||||||
|
|
||||||
# Finalize trace cycle
|
|
||||||
if trace_cycle:
|
|
||||||
self.trace_logger.finish_cycle(trace_cycle)
|
|
||||||
|
|
||||||
# Send cycle summary telemetry
|
# Send cycle summary telemetry
|
||||||
await self._send_telemetry({
|
await self._send_telemetry({
|
||||||
"type": "oda_cycle_complete",
|
"type": "oda_cycle_complete",
|
||||||
@@ -883,18 +836,12 @@ async def main():
|
|||||||
default=1.0,
|
default=1.0,
|
||||||
help="Delay between iterations in seconds (default: 1.0)",
|
help="Delay between iterations in seconds (default: 1.0)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--trace",
|
|
||||||
action="store_true",
|
|
||||||
help="Enable session trace logging to ~/.timmy/traces/bannerlord/",
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Create harness
|
# Create harness
|
||||||
harness = BannerlordHarness(
|
harness = BannerlordHarness(
|
||||||
hermes_ws_url=args.hermes_ws,
|
hermes_ws_url=args.hermes_ws,
|
||||||
enable_mock=args.mock,
|
enable_mock=args.mock,
|
||||||
enable_trace=args.trace,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,234 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Bannerlord Session Trace Logger — First-Replayable Training Material
|
|
||||||
|
|
||||||
Captures one Bannerlord session as a replayable trace:
|
|
||||||
- Timestamps on every cycle
|
|
||||||
- Actions executed with success/failure
|
|
||||||
- World-state evidence (screenshots, Steam stats)
|
|
||||||
- Hermes session/log ID mapping
|
|
||||||
|
|
||||||
Storage: ~/.timmy/traces/bannerlord/trace_<session_id>.jsonl
|
|
||||||
Manifest: ~/.timmy/traces/bannerlord/manifest_<session_id>.json
|
|
||||||
|
|
||||||
Each JSONL line is one ODA cycle with full context.
|
|
||||||
The manifest bundles metadata for replay/eval.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
import uuid
|
|
||||||
from dataclasses import dataclass, field, asdict
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
# Storage root — local-first under ~/.timmy/
|
|
||||||
DEFAULT_TRACE_DIR = Path.home() / ".timmy" / "traces" / "bannerlord"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class CycleTrace:
|
|
||||||
"""One ODA cycle captured in full."""
|
|
||||||
cycle_index: int
|
|
||||||
timestamp_start: str
|
|
||||||
timestamp_end: str = ""
|
|
||||||
duration_ms: int = 0
|
|
||||||
|
|
||||||
# Observe
|
|
||||||
screenshot_path: str = ""
|
|
||||||
window_found: bool = False
|
|
||||||
screen_size: list[int] = field(default_factory=lambda: [1920, 1080])
|
|
||||||
mouse_position: list[int] = field(default_factory=lambda: [0, 0])
|
|
||||||
playtime_hours: float = 0.0
|
|
||||||
players_online: int = 0
|
|
||||||
is_running: bool = False
|
|
||||||
|
|
||||||
# Decide
|
|
||||||
actions_planned: list[dict] = field(default_factory=list)
|
|
||||||
decision_note: str = ""
|
|
||||||
|
|
||||||
# Act
|
|
||||||
actions_executed: list[dict] = field(default_factory=list)
|
|
||||||
actions_succeeded: int = 0
|
|
||||||
actions_failed: int = 0
|
|
||||||
|
|
||||||
# Metadata
|
|
||||||
hermes_session_id: str = ""
|
|
||||||
hermes_log_id: str = ""
|
|
||||||
harness_session_id: str = ""
|
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
return asdict(self)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SessionManifest:
|
|
||||||
"""Top-level metadata for a captured session trace."""
|
|
||||||
trace_id: str
|
|
||||||
harness_session_id: str
|
|
||||||
hermes_session_id: str
|
|
||||||
hermes_log_id: str
|
|
||||||
game: str = "Mount & Blade II: Bannerlord"
|
|
||||||
app_id: int = 261550
|
|
||||||
started_at: str = ""
|
|
||||||
finished_at: str = ""
|
|
||||||
total_cycles: int = 0
|
|
||||||
total_actions: int = 0
|
|
||||||
total_succeeded: int = 0
|
|
||||||
total_failed: int = 0
|
|
||||||
trace_file: str = ""
|
|
||||||
trace_dir: str = ""
|
|
||||||
replay_command: str = ""
|
|
||||||
eval_note: str = ""
|
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
return asdict(self)
|
|
||||||
|
|
||||||
|
|
||||||
class BannerlordTraceLogger:
|
|
||||||
"""
|
|
||||||
Captures a single Bannerlord session as a replayable trace.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
logger = BannerlordTraceLogger(hermes_session_id="abc123")
|
|
||||||
logger.start_session()
|
|
||||||
cycle = logger.begin_cycle(0)
|
|
||||||
# ... populate cycle fields ...
|
|
||||||
logger.finish_cycle(cycle)
|
|
||||||
manifest = logger.finish_session()
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
trace_dir: Optional[Path] = None,
|
|
||||||
harness_session_id: str = "",
|
|
||||||
hermes_session_id: str = "",
|
|
||||||
hermes_log_id: str = "",
|
|
||||||
):
|
|
||||||
self.trace_dir = trace_dir or DEFAULT_TRACE_DIR
|
|
||||||
self.trace_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
self.trace_id = f"bl_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
|
||||||
self.harness_session_id = harness_session_id or str(uuid.uuid4())[:8]
|
|
||||||
self.hermes_session_id = hermes_session_id
|
|
||||||
self.hermes_log_id = hermes_log_id
|
|
||||||
|
|
||||||
self.trace_file = self.trace_dir / f"trace_{self.trace_id}.jsonl"
|
|
||||||
self.manifest_file = self.trace_dir / f"manifest_{self.trace_id}.json"
|
|
||||||
|
|
||||||
self.cycles: list[CycleTrace] = []
|
|
||||||
self.started_at: str = ""
|
|
||||||
self.finished_at: str = ""
|
|
||||||
|
|
||||||
def start_session(self) -> str:
|
|
||||||
"""Begin a trace session. Returns trace_id."""
|
|
||||||
self.started_at = datetime.now(timezone.utc).isoformat()
|
|
||||||
return self.trace_id
|
|
||||||
|
|
||||||
def begin_cycle(self, cycle_index: int) -> CycleTrace:
|
|
||||||
"""Start recording one ODA cycle."""
|
|
||||||
cycle = CycleTrace(
|
|
||||||
cycle_index=cycle_index,
|
|
||||||
timestamp_start=datetime.now(timezone.utc).isoformat(),
|
|
||||||
harness_session_id=self.harness_session_id,
|
|
||||||
hermes_session_id=self.hermes_session_id,
|
|
||||||
hermes_log_id=self.hermes_log_id,
|
|
||||||
)
|
|
||||||
return cycle
|
|
||||||
|
|
||||||
def finish_cycle(self, cycle: CycleTrace) -> None:
|
|
||||||
"""Finalize and persist one cycle to the trace file."""
|
|
||||||
cycle.timestamp_end = datetime.now(timezone.utc).isoformat()
|
|
||||||
# Compute duration
|
|
||||||
try:
|
|
||||||
t0 = datetime.fromisoformat(cycle.timestamp_start)
|
|
||||||
t1 = datetime.fromisoformat(cycle.timestamp_end)
|
|
||||||
cycle.duration_ms = int((t1 - t0).total_seconds() * 1000)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
cycle.duration_ms = 0
|
|
||||||
|
|
||||||
# Count successes/failures
|
|
||||||
cycle.actions_succeeded = sum(
|
|
||||||
1 for a in cycle.actions_executed if a.get("success", False)
|
|
||||||
)
|
|
||||||
cycle.actions_failed = sum(
|
|
||||||
1 for a in cycle.actions_executed if not a.get("success", True)
|
|
||||||
)
|
|
||||||
|
|
||||||
self.cycles.append(cycle)
|
|
||||||
|
|
||||||
# Append to JSONL
|
|
||||||
with open(self.trace_file, "a") as f:
|
|
||||||
f.write(json.dumps(cycle.to_dict()) + "\n")
|
|
||||||
|
|
||||||
def finish_session(self) -> SessionManifest:
|
|
||||||
"""Finalize the session and write the manifest."""
|
|
||||||
self.finished_at = datetime.now(timezone.utc).isoformat()
|
|
||||||
|
|
||||||
total_actions = sum(len(c.actions_executed) for c in self.cycles)
|
|
||||||
total_succeeded = sum(c.actions_succeeded for c in self.cycles)
|
|
||||||
total_failed = sum(c.actions_failed for c in self.cycles)
|
|
||||||
|
|
||||||
manifest = SessionManifest(
|
|
||||||
trace_id=self.trace_id,
|
|
||||||
harness_session_id=self.harness_session_id,
|
|
||||||
hermes_session_id=self.hermes_session_id,
|
|
||||||
hermes_log_id=self.hermes_log_id,
|
|
||||||
started_at=self.started_at,
|
|
||||||
finished_at=self.finished_at,
|
|
||||||
total_cycles=len(self.cycles),
|
|
||||||
total_actions=total_actions,
|
|
||||||
total_succeeded=total_succeeded,
|
|
||||||
total_failed=total_failed,
|
|
||||||
trace_file=str(self.trace_file),
|
|
||||||
trace_dir=str(self.trace_dir),
|
|
||||||
replay_command=(
|
|
||||||
f"python -m nexus.bannerlord_harness --mock --replay {self.trace_file}"
|
|
||||||
),
|
|
||||||
eval_note=(
|
|
||||||
"To replay: load this trace, re-execute each cycle's actions_planned "
|
|
||||||
"against a fresh harness in mock mode, compare actions_executed outcomes. "
|
|
||||||
"Success metric: >=90% action parity between original and replay runs."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(self.manifest_file, "w") as f:
|
|
||||||
json.dump(manifest.to_dict(), f, indent=2)
|
|
||||||
|
|
||||||
return manifest
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def load_trace(cls, trace_file: Path) -> list[dict]:
|
|
||||||
"""Load a trace JSONL file for replay or analysis."""
|
|
||||||
cycles = []
|
|
||||||
with open(trace_file) as f:
|
|
||||||
for line in f:
|
|
||||||
line = line.strip()
|
|
||||||
if line:
|
|
||||||
cycles.append(json.loads(line))
|
|
||||||
return cycles
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def load_manifest(cls, manifest_file: Path) -> dict:
|
|
||||||
"""Load a session manifest."""
|
|
||||||
with open(manifest_file) as f:
|
|
||||||
return json.load(f)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def list_traces(cls, trace_dir: Optional[Path] = None) -> list[dict]:
|
|
||||||
"""List all available trace sessions."""
|
|
||||||
d = trace_dir or DEFAULT_TRACE_DIR
|
|
||||||
if not d.exists():
|
|
||||||
return []
|
|
||||||
|
|
||||||
traces = []
|
|
||||||
for mf in sorted(d.glob("manifest_*.json")):
|
|
||||||
try:
|
|
||||||
manifest = cls.load_manifest(mf)
|
|
||||||
traces.append(manifest)
|
|
||||||
except (json.JSONDecodeError, IOError):
|
|
||||||
continue
|
|
||||||
return traces
|
|
||||||
@@ -1,97 +0,0 @@
|
|||||||
# Bannerlord Session Trace — Replay & Eval Guide
|
|
||||||
|
|
||||||
## Storage Layout
|
|
||||||
|
|
||||||
All traces live under `~/.timmy/traces/bannerlord/`:
|
|
||||||
|
|
||||||
```
|
|
||||||
~/.timmy/traces/bannerlord/
|
|
||||||
trace_<trace_id>.jsonl # One line per ODA cycle (full state + actions)
|
|
||||||
manifest_<trace_id>.json # Session metadata, counts, replay command
|
|
||||||
```
|
|
||||||
|
|
||||||
## Trace Format (JSONL)
|
|
||||||
|
|
||||||
Each line is one ODA cycle:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"cycle_index": 0,
|
|
||||||
"timestamp_start": "2026-04-10T20:15:00+00:00",
|
|
||||||
"timestamp_end": "2026-04-10T20:15:45+00:00",
|
|
||||||
"duration_ms": 45000,
|
|
||||||
|
|
||||||
"screenshot_path": "/tmp/bannerlord_capture_1744320900.png",
|
|
||||||
"window_found": true,
|
|
||||||
"screen_size": [1920, 1080],
|
|
||||||
"mouse_position": [960, 540],
|
|
||||||
"playtime_hours": 142.5,
|
|
||||||
"players_online": 8421,
|
|
||||||
"is_running": true,
|
|
||||||
|
|
||||||
"actions_planned": [{"type": "move_to", "x": 960, "y": 540}],
|
|
||||||
"actions_executed": [{"success": true, "action": "move_to", ...}],
|
|
||||||
"actions_succeeded": 1,
|
|
||||||
"actions_failed": 0,
|
|
||||||
|
|
||||||
"hermes_session_id": "f47ac10b",
|
|
||||||
"hermes_log_id": "",
|
|
||||||
"harness_session_id": "f47ac10b"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Capturing a Trace
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Run harness with trace logging enabled
|
|
||||||
cd /path/to/the-nexus
|
|
||||||
python -m nexus.bannerlord_harness --mock --trace --iterations 3
|
|
||||||
```
|
|
||||||
|
|
||||||
The trace and manifest are written to `~/.timmy/traces/bannerlord/` on harness shutdown.
|
|
||||||
|
|
||||||
## Replay Protocol
|
|
||||||
|
|
||||||
1. Load a trace: `BannerlordTraceLogger.load_trace(trace_file)`
|
|
||||||
2. Create a fresh harness in mock mode
|
|
||||||
3. For each cycle in the trace:
|
|
||||||
- Re-execute the `actions_planned` list
|
|
||||||
- Compare actual `actions_executed` outcomes against the recorded ones
|
|
||||||
4. Score: `(matching_actions / total_actions) * 100`
|
|
||||||
|
|
||||||
### Eval Criteria
|
|
||||||
|
|
||||||
| Score | Grade | Meaning |
|
|
||||||
|---------|----------|--------------------------------------------|
|
|
||||||
| >= 90% | PASS | Replay matches original closely |
|
|
||||||
| 70-89% | PARTIAL | Some divergence, investigate differences |
|
|
||||||
| < 70% | FAIL | Significant drift, review action semantics |
|
|
||||||
|
|
||||||
## Replay Script (sketch)
|
|
||||||
|
|
||||||
```python
|
|
||||||
from nexus.bannerlord_trace import BannerlordTraceLogger
|
|
||||||
from nexus.bannerlord_harness import BannerlordHarness
|
|
||||||
|
|
||||||
# Load trace
|
|
||||||
cycles = BannerlordTraceLogger.load_trace(
|
|
||||||
Path.home() / ".timmy" / "traces" / "bannerlord" / "trace_bl_xxx.jsonl"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Replay
|
|
||||||
harness = BannerlordHarness(enable_mock=True, enable_trace=False)
|
|
||||||
await harness.start()
|
|
||||||
|
|
||||||
for cycle in cycles:
|
|
||||||
for action in cycle["actions_planned"]:
|
|
||||||
result = await harness.execute_action(action)
|
|
||||||
# Compare result against cycle["actions_executed"]
|
|
||||||
|
|
||||||
await harness.stop()
|
|
||||||
```
|
|
||||||
|
|
||||||
## Hermes Session Mapping
|
|
||||||
|
|
||||||
The `hermes_session_id` and `hermes_log_id` fields link traces to Hermes session logs.
|
|
||||||
When a trace is captured during a live Hermes session, populate these fields so
|
|
||||||
the trace can be correlated with the broader agent conversation context.
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
{
|
|
||||||
"trace_id": "bl_20260410_201500_a1b2c3",
|
|
||||||
"harness_session_id": "f47ac10b",
|
|
||||||
"hermes_session_id": "f47ac10b",
|
|
||||||
"hermes_log_id": "",
|
|
||||||
"game": "Mount & Blade II: Bannerlord",
|
|
||||||
"app_id": 261550,
|
|
||||||
"started_at": "2026-04-10T20:15:00+00:00",
|
|
||||||
"finished_at": "2026-04-10T20:17:30+00:00",
|
|
||||||
"total_cycles": 3,
|
|
||||||
"total_actions": 6,
|
|
||||||
"total_succeeded": 6,
|
|
||||||
"total_failed": 0,
|
|
||||||
"trace_file": "~/.timmy/traces/bannerlord/trace_bl_20260410_201500_a1b2c3.jsonl",
|
|
||||||
"trace_dir": "~/.timmy/traces/bannerlord",
|
|
||||||
"replay_command": "python -m nexus.bannerlord_harness --mock --replay ~/.timmy/traces/bannerlord/trace_bl_20260410_201500_a1b2c3.jsonl",
|
|
||||||
"eval_note": "To replay: load trace, re-execute each cycle's actions_planned against a fresh harness in mock mode, compare actions_executed outcomes. Success metric: >=90% action parity between original and replay runs."
|
|
||||||
}
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
{"cycle_index": 0, "timestamp_start": "2026-04-10T20:15:00+00:00", "timestamp_end": "2026-04-10T20:15:45+00:00", "duration_ms": 45000, "screenshot_path": "/tmp/bannerlord_capture_1744320900.png", "window_found": true, "screen_size": [1920, 1080], "mouse_position": [960, 540], "playtime_hours": 142.5, "players_online": 8421, "is_running": true, "actions_planned": [{"type": "move_to", "x": 960, "y": 540}, {"type": "press_key", "key": "space"}], "decision_note": "Initial state capture. Move to screen center and press space to advance.", "actions_executed": [{"success": true, "action": "move_to", "params": {"type": "move_to", "x": 960, "y": 540}, "timestamp": "2026-04-10T20:15:30+00:00", "error": null}, {"success": true, "action": "press_key", "params": {"type": "press_key", "key": "space"}, "timestamp": "2026-04-10T20:15:45+00:00", "error": null}], "actions_succeeded": 2, "actions_failed": 0, "hermes_session_id": "f47ac10b", "hermes_log_id": "", "harness_session_id": "f47ac10b"}
|
|
||||||
{"cycle_index": 1, "timestamp_start": "2026-04-10T20:15:45+00:00", "timestamp_end": "2026-04-10T20:16:30+00:00", "duration_ms": 45000, "screenshot_path": "/tmp/bannerlord_capture_1744320945.png", "window_found": true, "screen_size": [1920, 1080], "mouse_position": [960, 540], "playtime_hours": 142.5, "players_online": 8421, "is_running": true, "actions_planned": [{"type": "press_key", "key": "p"}], "decision_note": "Open party screen to inspect troops.", "actions_executed": [{"success": true, "action": "press_key", "params": {"type": "press_key", "key": "p"}, "timestamp": "2026-04-10T20:16:00+00:00", "error": null}], "actions_succeeded": 1, "actions_failed": 0, "hermes_session_id": "f47ac10b", "hermes_log_id": "", "harness_session_id": "f47ac10b"}
|
|
||||||
{"cycle_index": 2, "timestamp_start": "2026-04-10T20:16:30+00:00", "timestamp_end": "2026-04-10T20:17:30+00:00", "duration_ms": 60000, "screenshot_path": "/tmp/bannerlord_capture_1744321020.png", "window_found": true, "screen_size": [1920, 1080], "mouse_position": [960, 540], "playtime_hours": 142.5, "players_online": 8421, "is_running": true, "actions_planned": [{"type": "press_key", "key": "escape"}, {"type": "move_to", "x": 500, "y": 300}, {"type": "click", "x": 500, "y": 300}], "decision_note": "Close party screen, click on campaign map settlement.", "actions_executed": [{"success": true, "action": "press_key", "params": {"type": "press_key", "key": "escape"}, "timestamp": "2026-04-10T20:16:45+00:00", "error": null}, {"success": true, "action": "move_to", "params": {"type": "move_to", "x": 500, "y": 300}, "timestamp": "2026-04-10T20:17:00+00:00", "error": null}, {"success": true, "action": "click", "params": {"type": "click", "x": 500, "y": 300}, "timestamp": "2026-04-10T20:17:30+00:00", "error": null}], "actions_succeeded": 3, "actions_failed": 0, "hermes_session_id": "f47ac10b", "hermes_log_id": "", "harness_session_id": "f47ac10b"}
|
|
||||||
Reference in New Issue
Block a user