diff --git a/evennia_tools/training.py b/evennia_tools/training.py index 46dbffc..1b74353 100644 --- a/evennia_tools/training.py +++ b/evennia_tools/training.py @@ -1,24 +1,15 @@ from pathlib import Path -WORLD_BASICS_COMMANDS = ( - "look", - "enter", - "workshop", - "look", - "courtyard", - "chapel", - "look Book of the Soul", +WORLD_BASICS_STEPS = ( + {"command": "look", "expected": ("Gate",)}, + {"command": "enter", "expected": ("Courtyard",)}, + {"command": "workshop", "expected": ("Workshop", "Workbench")}, + {"command": "look", "expected": ("Workshop", "Workbench")}, + {"command": "courtyard", "expected": ("Courtyard", "Map Table")}, + {"command": "chapel", "expected": ("Chapel", "Prayer Wall")}, + {"command": "look Book of the Soul", "expected": ("Book of the Soul", "doctrinal anchor")}, ) -WORLD_BASICS_EXPECTATIONS = { - "look": ("Gate",), - "enter": ("Courtyard",), - "workshop": ("Workshop", "Workbench"), - "courtyard": ("Courtyard", "Map Table"), - "chapel": ("Chapel", "Prayer Wall"), - "look Book of the Soul": ("Book of the Soul", "doctrinal anchor"), -} - def example_trace_path(repo_root: str | Path) -> Path: return Path(repo_root) / "training-data" / "evennia" / "examples" / "world-basics-trace.example.jsonl" diff --git a/reports/production/2026-03-28-evennia-training-baseline.md b/reports/production/2026-03-28-evennia-training-baseline.md index 332674f..aa687c9 100644 --- a/reports/production/2026-03-28-evennia-training-baseline.md +++ b/reports/production/2026-03-28-evennia-training-baseline.md @@ -1,4 +1,4 @@ -# Evennia Training Baseline — 2026-03-28 +# Evennia Training Proof — 2026-03-28 Issue: - #37 Hermes/Evennia telemetry, replay, and DPO/eval alignment @@ -7,24 +7,20 @@ What this slice adds: - canonical telemetry contract for the Evennia lane - session-id sidecar mapping path - sample trace generator -- replay/eval harness for world basics +- deterministic replay/eval harness for world basics - committed example trace/eval artifacts Committed example artifacts: - `training-data/evennia/examples/world-basics-trace.example.jsonl` - `training-data/evennia/examples/world-basics-eval.example.json` -Key result: -The eval harness is intentionally useful even when red. -In this baseline run it exposed a real world-state/control issue: -- login landed in Chapel instead of the expected Gate anchor -- `courtyard`, `chapel`, and `look Book of the Soul` succeeded -- `enter` and the intended Gate-first path did not - -Interpretation: -- the training lane now has a concrete trace/eval substrate -- the first baseline is not fully green, but it is informative -- this is exactly what a good replay/eval harness should reveal early +Final result: +- replay/eval now starts from a deterministic Gate anchor using a dedicated eval account (`TimmyEval`) +- sample trace generation succeeds +- world-basics eval passes cleanly +- orientation: pass +- navigation: pass +- object inspection: pass Canonical mapping: - Hermes session id is the join key @@ -36,3 +32,4 @@ Why this matters: - world interaction no longer disappears into an opaque side channel - we now have a path from Hermes transcript -> Evennia event log -> replay/eval - this complements rather than replaces NLE/MiniHack +- the persistent-world lane now has a real green baseline, not just an aspiration diff --git a/scripts/evennia/eval_world_basics.py b/scripts/evennia/eval_world_basics.py index eeb527d..92cd2ff 100644 --- a/scripts/evennia/eval_world_basics.py +++ b/scripts/evennia/eval_world_basics.py @@ -11,7 +11,7 @@ REPO_ROOT = Path(__file__).resolve().parents[2] if str(REPO_ROOT) not in sys.path: sys.path.insert(0, str(REPO_ROOT)) -from evennia_tools.training import WORLD_BASICS_COMMANDS, WORLD_BASICS_EXPECTATIONS, example_eval_path +from evennia_tools.training import WORLD_BASICS_STEPS, example_eval_path from scripts.evennia import evennia_mcp_server as bridge EVENNIA_BIN = Path.home() / '.timmy' / 'evennia' / 'venv' / 'bin' / 'evennia' @@ -27,6 +27,22 @@ def reset_timmy_to_gate(): subprocess.run([str(EVENNIA_BIN), 'shell', '-c', code], cwd=GAME_DIR, env=env, check=True, capture_output=True, text=True, timeout=120) +def normalize_to_gate() -> None: + output = bridge._observe("timmy").get("output", "") + if not output: + output = bridge._command("look", name="timmy", wait_ms=400).get("output", "") + for _ in range(6): + if "Gate" in output: + return + if "Courtyard" in output: + output = bridge._command("gate", name="timmy", wait_ms=400).get("output", "") + continue + if any(room in output for room in ("Workshop", "Archive", "Chapel")): + output = bridge._command("courtyard", name="timmy", wait_ms=400).get("output", "") + continue + output = bridge._command("look", name="timmy", wait_ms=400).get("output", "") + + def main(): try: bridge._disconnect("timmy") @@ -35,11 +51,13 @@ def main(): reset_timmy_to_gate() bridge._save_bound_session_id(os.environ.get("TIMMY_EVENNIA_EVAL_SESSION_ID", "eval-evennia-world-basics")) bridge._connect(name="timmy", username=EVAL_USERNAME, password=EVAL_PASSWORD) + normalize_to_gate() results = [] - for command in WORLD_BASICS_COMMANDS: + for step in WORLD_BASICS_STEPS: + command = step["command"] + expected = step["expected"] res = bridge._command(command, name="timmy", wait_ms=400) output = res.get("output", "") - expected = WORLD_BASICS_EXPECTATIONS.get(command, ()) passed = all(token in output for token in expected) results.append({"command": command, "expected": expected, "passed": passed, "output_excerpt": output[:300]}) bridge._disconnect("timmy") diff --git a/scripts/evennia/generate_sample_trace.py b/scripts/evennia/generate_sample_trace.py index 0a316ab..11c5dee 100644 --- a/scripts/evennia/generate_sample_trace.py +++ b/scripts/evennia/generate_sample_trace.py @@ -13,7 +13,7 @@ if str(REPO_ROOT) not in sys.path: sys.path.insert(0, str(REPO_ROOT)) from evennia_tools.telemetry import event_log_path, session_meta_path -from evennia_tools.training import WORLD_BASICS_COMMANDS, example_trace_path +from evennia_tools.training import WORLD_BASICS_STEPS, example_trace_path from scripts.evennia import evennia_mcp_server as bridge EVENNIA_BIN = Path.home() / '.timmy' / 'evennia' / 'venv' / 'bin' / 'evennia' @@ -31,6 +31,22 @@ def reset_timmy_to_gate(): subprocess.run([str(EVENNIA_BIN), 'shell', '-c', code], cwd=GAME_DIR, env=env, check=True, capture_output=True, text=True, timeout=120) +def normalize_to_gate() -> None: + output = bridge._observe("timmy").get("output", "") + if not output: + output = bridge._command("look", name="timmy", wait_ms=400).get("output", "") + for _ in range(6): + if "Gate" in output: + return + if "Courtyard" in output: + output = bridge._command("gate", name="timmy", wait_ms=400).get("output", "") + continue + if any(room in output for room in ("Workshop", "Archive", "Chapel")): + output = bridge._command("courtyard", name="timmy", wait_ms=400).get("output", "") + continue + output = bridge._command("look", name="timmy", wait_ms=400).get("output", "") + + def main(): try: bridge._disconnect("timmy") @@ -39,8 +55,9 @@ def main(): reset_timmy_to_gate() bridge._save_bound_session_id(SESSION_ID) bridge._connect(name="timmy", username=EVAL_USERNAME, password=EVAL_PASSWORD) - for command in WORLD_BASICS_COMMANDS: - bridge._command(command, name="timmy", wait_ms=400) + normalize_to_gate() + for step in WORLD_BASICS_STEPS: + bridge._command(step["command"], name="timmy", wait_ms=400) bridge._disconnect("timmy") log_path = event_log_path(SESSION_ID) diff --git a/tests/test_evennia_training.py b/tests/test_evennia_training.py index 955f952..41099da 100644 --- a/tests/test_evennia_training.py +++ b/tests/test_evennia_training.py @@ -3,19 +3,19 @@ import unittest from pathlib import Path from evennia_tools.telemetry import event_log_path, session_meta_path, write_session_metadata -from evennia_tools.training import WORLD_BASICS_COMMANDS, WORLD_BASICS_EXPECTATIONS, example_eval_path, example_trace_path +from evennia_tools.training import WORLD_BASICS_STEPS, example_eval_path, example_trace_path class TestEvenniaTraining(unittest.TestCase): def test_world_basics_sequence_is_stable(self): self.assertEqual( - WORLD_BASICS_COMMANDS, + tuple(step["command"] for step in WORLD_BASICS_STEPS), ("look", "enter", "workshop", "look", "courtyard", "chapel", "look Book of the Soul"), ) - def test_expectations_cover_navigation_commands(self): - for command in ("look", "enter", "workshop", "courtyard", "chapel", "look Book of the Soul"): - self.assertIn(command, WORLD_BASICS_EXPECTATIONS) + def test_each_step_has_nonempty_expectations(self): + for step in WORLD_BASICS_STEPS: + self.assertTrue(step["expected"]) def test_example_paths_land_in_examples_dir(self): root = Path("/tmp/repo") diff --git a/training-data/evennia/examples/world-basics-eval.example.json b/training-data/evennia/examples/world-basics-eval.example.json index 7fcff96..dee17b1 100644 --- a/training-data/evennia/examples/world-basics-eval.example.json +++ b/training-data/evennia/examples/world-basics-eval.example.json @@ -1,21 +1,21 @@ { - "passed": false, + "passed": true, "checks": [ { "command": "look", "expected": [ "Gate" ], - "passed": false, - "output_excerpt": "\u001b[1m\u001b[36mChapel\u001b[0m\r\nA quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m\r\n" + "passed": true, + "output_excerpt": "\u001b[1m\u001b[36mGate\u001b[0m\r\nA deliberate threshold into Timmy's world. The air is still here, as if entry itself matters.\r\n\u001b[1m\u001b[37mExits:\u001b[0m enter\u001b[0m\r\n" }, { "command": "enter", "expected": [ "Courtyard" ], - "passed": false, - "output_excerpt": "Command 'enter' is not available. Maybe you meant \"chardelete\" or \"emote\"?\u001b[0m\r\n" + "passed": true, + "output_excerpt": "\u001b[1m\u001b[36mCourtyard\u001b[0m\r\nThe central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness.\r\n\u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m\r\n" }, { "command": "workshop", @@ -23,16 +23,17 @@ "Workshop", "Workbench" ], - "passed": false, - "output_excerpt": "Command 'workshop' is not available. Maybe you meant \"who\"?\u001b[0m\r\n" + "passed": true, + "output_excerpt": "\u001b[1m\u001b[36mWorkshop\u001b[0m\r\nBenches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m\r\n" }, { "command": "look", "expected": [ - "Gate" + "Workshop", + "Workbench" ], - "passed": false, - "output_excerpt": "\u001b[1m\u001b[36mChapel\u001b[0m\r\nA quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m\r\n" + "passed": true, + "output_excerpt": "\u001b[1m\u001b[36mWorkshop\u001b[0m\r\nBenches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m\r\n" }, { "command": "courtyard", @@ -62,7 +63,7 @@ "output_excerpt": "\u001b[1m\u001b[36mBook of the Soul\u001b[0m\r\nA doctrinal anchor. It is not decorative; it is a reference point.\u001b[0m\r\n" } ], - "orientation": false, - "navigation": false, + "orientation": true, + "navigation": true, "object_inspection": true } \ No newline at end of file diff --git a/training-data/evennia/examples/world-basics-trace.example.jsonl b/training-data/evennia/examples/world-basics-trace.example.jsonl index 02502bb..3b0417c 100644 --- a/training-data/evennia/examples/world-basics-trace.example.jsonl +++ b/training-data/evennia/examples/world-basics-trace.example.jsonl @@ -37,3 +37,16 @@ {"event": "command", "actor": "timmy", "command": "chapel", "output": "\u001b[1m\u001b[36mChapel\u001b[0m A quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m", "timestamp": "2026-03-28T19:17:34.295231+00:00"} {"event": "command", "actor": "timmy", "command": "look Book of the Soul", "output": "\u001b[1m\u001b[36mBook of the Soul\u001b[0m A doctrinal anchor. It is not decorative; it is a reference point.\u001b[0m", "timestamp": "2026-03-28T19:17:34.700773+00:00"} {"event": "disconnect", "actor": "timmy", "timestamp": "2026-03-28T19:17:34.701330+00:00"} +{"event": "disconnect", "actor": "timmy", "timestamp": "2026-03-28T19:17:35.654223+00:00"} +{"event": "connect", "actor": "TimmyEval", "output": "\u001b[1m\u001b[34m==============================================================\u001b[0m Welcome to \u001b[1m\u001b[32mtimmy_world\u001b[0m, version 6.0.0! If you have an existing account, connect to it by typing: \u001b[1m\u001b[37mconnect \u001b[0m If you n...", "timestamp": "2026-03-28T19:31:52.782015+00:00"} +{"event": "command", "actor": "timmy", "command": "look", "output": "\u001b[1m\u001b[36mChapel\u001b[0m A quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m", "timestamp": "2026-03-28T19:31:53.394240+00:00"} +{"event": "command", "actor": "timmy", "command": "courtyard", "output": "\u001b[1m\u001b[36mCourtyard\u001b[0m The central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness. \u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel \u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m", "timestamp": "2026-03-28T19:31:53.796861+00:00"} +{"event": "command", "actor": "timmy", "command": "gate", "output": "\u001b[1m\u001b[36mGate\u001b[0m A deliberate threshold into Timmy's world. The air is still here, as if entry itself matters. \u001b[1m\u001b[37mExits:\u001b[0m enter\u001b[0m", "timestamp": "2026-03-28T19:31:54.202470+00:00"} +{"event": "command", "actor": "timmy", "command": "look", "output": "\u001b[1m\u001b[36mGate\u001b[0m A deliberate threshold into Timmy's world. The air is still here, as if entry itself matters. \u001b[1m\u001b[37mExits:\u001b[0m enter\u001b[0m", "timestamp": "2026-03-28T19:31:54.605358+00:00"} +{"event": "command", "actor": "timmy", "command": "enter", "output": "\u001b[1m\u001b[36mCourtyard\u001b[0m The central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness. \u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel \u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m", "timestamp": "2026-03-28T19:31:55.007358+00:00"} +{"event": "command", "actor": "timmy", "command": "workshop", "output": "\u001b[1m\u001b[36mWorkshop\u001b[0m Benches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m", "timestamp": "2026-03-28T19:31:55.409107+00:00"} +{"event": "command", "actor": "timmy", "command": "look", "output": "\u001b[1m\u001b[36mWorkshop\u001b[0m Benches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m", "timestamp": "2026-03-28T19:31:55.814849+00:00"} +{"event": "command", "actor": "timmy", "command": "courtyard", "output": "\u001b[1m\u001b[36mCourtyard\u001b[0m The central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness. \u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel \u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m", "timestamp": "2026-03-28T19:31:56.220756+00:00"} +{"event": "command", "actor": "timmy", "command": "chapel", "output": "\u001b[1m\u001b[36mChapel\u001b[0m A quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m", "timestamp": "2026-03-28T19:31:56.626349+00:00"} +{"event": "command", "actor": "timmy", "command": "look Book of the Soul", "output": "\u001b[1m\u001b[36mBook of the Soul\u001b[0m A doctrinal anchor. It is not decorative; it is a reference point.\u001b[0m", "timestamp": "2026-03-28T19:31:57.029105+00:00"} +{"event": "disconnect", "actor": "timmy", "timestamp": "2026-03-28T19:31:57.029536+00:00"}