fix: make Evennia training replay deterministic (#37)

This commit is contained in:
Alexander Whitestone
2026-03-28 15:33:43 -04:00
parent 3f5a174943
commit aca26da71f
7 changed files with 90 additions and 53 deletions

View File

@@ -1,24 +1,15 @@
from pathlib import Path
WORLD_BASICS_COMMANDS = (
"look",
"enter",
"workshop",
"look",
"courtyard",
"chapel",
"look Book of the Soul",
WORLD_BASICS_STEPS = (
{"command": "look", "expected": ("Gate",)},
{"command": "enter", "expected": ("Courtyard",)},
{"command": "workshop", "expected": ("Workshop", "Workbench")},
{"command": "look", "expected": ("Workshop", "Workbench")},
{"command": "courtyard", "expected": ("Courtyard", "Map Table")},
{"command": "chapel", "expected": ("Chapel", "Prayer Wall")},
{"command": "look Book of the Soul", "expected": ("Book of the Soul", "doctrinal anchor")},
)
WORLD_BASICS_EXPECTATIONS = {
"look": ("Gate",),
"enter": ("Courtyard",),
"workshop": ("Workshop", "Workbench"),
"courtyard": ("Courtyard", "Map Table"),
"chapel": ("Chapel", "Prayer Wall"),
"look Book of the Soul": ("Book of the Soul", "doctrinal anchor"),
}
def example_trace_path(repo_root: str | Path) -> Path:
return Path(repo_root) / "training-data" / "evennia" / "examples" / "world-basics-trace.example.jsonl"

View File

@@ -1,4 +1,4 @@
# Evennia Training Baseline — 2026-03-28
# Evennia Training Proof — 2026-03-28
Issue:
- #37 Hermes/Evennia telemetry, replay, and DPO/eval alignment
@@ -7,24 +7,20 @@ What this slice adds:
- canonical telemetry contract for the Evennia lane
- session-id sidecar mapping path
- sample trace generator
- replay/eval harness for world basics
- deterministic replay/eval harness for world basics
- committed example trace/eval artifacts
Committed example artifacts:
- `training-data/evennia/examples/world-basics-trace.example.jsonl`
- `training-data/evennia/examples/world-basics-eval.example.json`
Key result:
The eval harness is intentionally useful even when red.
In this baseline run it exposed a real world-state/control issue:
- login landed in Chapel instead of the expected Gate anchor
- `courtyard`, `chapel`, and `look Book of the Soul` succeeded
- `enter` and the intended Gate-first path did not
Interpretation:
- the training lane now has a concrete trace/eval substrate
- the first baseline is not fully green, but it is informative
- this is exactly what a good replay/eval harness should reveal early
Final result:
- replay/eval now starts from a deterministic Gate anchor using a dedicated eval account (`TimmyEval`)
- sample trace generation succeeds
- world-basics eval passes cleanly
- orientation: pass
- navigation: pass
- object inspection: pass
Canonical mapping:
- Hermes session id is the join key
@@ -36,3 +32,4 @@ Why this matters:
- world interaction no longer disappears into an opaque side channel
- we now have a path from Hermes transcript -> Evennia event log -> replay/eval
- this complements rather than replaces NLE/MiniHack
- the persistent-world lane now has a real green baseline, not just an aspiration

View File

@@ -11,7 +11,7 @@ REPO_ROOT = Path(__file__).resolve().parents[2]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from evennia_tools.training import WORLD_BASICS_COMMANDS, WORLD_BASICS_EXPECTATIONS, example_eval_path
from evennia_tools.training import WORLD_BASICS_STEPS, example_eval_path
from scripts.evennia import evennia_mcp_server as bridge
EVENNIA_BIN = Path.home() / '.timmy' / 'evennia' / 'venv' / 'bin' / 'evennia'
@@ -27,6 +27,22 @@ def reset_timmy_to_gate():
subprocess.run([str(EVENNIA_BIN), 'shell', '-c', code], cwd=GAME_DIR, env=env, check=True, capture_output=True, text=True, timeout=120)
def normalize_to_gate() -> None:
output = bridge._observe("timmy").get("output", "")
if not output:
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
for _ in range(6):
if "Gate" in output:
return
if "Courtyard" in output:
output = bridge._command("gate", name="timmy", wait_ms=400).get("output", "")
continue
if any(room in output for room in ("Workshop", "Archive", "Chapel")):
output = bridge._command("courtyard", name="timmy", wait_ms=400).get("output", "")
continue
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
def main():
try:
bridge._disconnect("timmy")
@@ -35,11 +51,13 @@ def main():
reset_timmy_to_gate()
bridge._save_bound_session_id(os.environ.get("TIMMY_EVENNIA_EVAL_SESSION_ID", "eval-evennia-world-basics"))
bridge._connect(name="timmy", username=EVAL_USERNAME, password=EVAL_PASSWORD)
normalize_to_gate()
results = []
for command in WORLD_BASICS_COMMANDS:
for step in WORLD_BASICS_STEPS:
command = step["command"]
expected = step["expected"]
res = bridge._command(command, name="timmy", wait_ms=400)
output = res.get("output", "")
expected = WORLD_BASICS_EXPECTATIONS.get(command, ())
passed = all(token in output for token in expected)
results.append({"command": command, "expected": expected, "passed": passed, "output_excerpt": output[:300]})
bridge._disconnect("timmy")

View File

@@ -13,7 +13,7 @@ if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from evennia_tools.telemetry import event_log_path, session_meta_path
from evennia_tools.training import WORLD_BASICS_COMMANDS, example_trace_path
from evennia_tools.training import WORLD_BASICS_STEPS, example_trace_path
from scripts.evennia import evennia_mcp_server as bridge
EVENNIA_BIN = Path.home() / '.timmy' / 'evennia' / 'venv' / 'bin' / 'evennia'
@@ -31,6 +31,22 @@ def reset_timmy_to_gate():
subprocess.run([str(EVENNIA_BIN), 'shell', '-c', code], cwd=GAME_DIR, env=env, check=True, capture_output=True, text=True, timeout=120)
def normalize_to_gate() -> None:
output = bridge._observe("timmy").get("output", "")
if not output:
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
for _ in range(6):
if "Gate" in output:
return
if "Courtyard" in output:
output = bridge._command("gate", name="timmy", wait_ms=400).get("output", "")
continue
if any(room in output for room in ("Workshop", "Archive", "Chapel")):
output = bridge._command("courtyard", name="timmy", wait_ms=400).get("output", "")
continue
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
def main():
try:
bridge._disconnect("timmy")
@@ -39,8 +55,9 @@ def main():
reset_timmy_to_gate()
bridge._save_bound_session_id(SESSION_ID)
bridge._connect(name="timmy", username=EVAL_USERNAME, password=EVAL_PASSWORD)
for command in WORLD_BASICS_COMMANDS:
bridge._command(command, name="timmy", wait_ms=400)
normalize_to_gate()
for step in WORLD_BASICS_STEPS:
bridge._command(step["command"], name="timmy", wait_ms=400)
bridge._disconnect("timmy")
log_path = event_log_path(SESSION_ID)

View File

@@ -3,19 +3,19 @@ import unittest
from pathlib import Path
from evennia_tools.telemetry import event_log_path, session_meta_path, write_session_metadata
from evennia_tools.training import WORLD_BASICS_COMMANDS, WORLD_BASICS_EXPECTATIONS, example_eval_path, example_trace_path
from evennia_tools.training import WORLD_BASICS_STEPS, example_eval_path, example_trace_path
class TestEvenniaTraining(unittest.TestCase):
def test_world_basics_sequence_is_stable(self):
self.assertEqual(
WORLD_BASICS_COMMANDS,
tuple(step["command"] for step in WORLD_BASICS_STEPS),
("look", "enter", "workshop", "look", "courtyard", "chapel", "look Book of the Soul"),
)
def test_expectations_cover_navigation_commands(self):
for command in ("look", "enter", "workshop", "courtyard", "chapel", "look Book of the Soul"):
self.assertIn(command, WORLD_BASICS_EXPECTATIONS)
def test_each_step_has_nonempty_expectations(self):
for step in WORLD_BASICS_STEPS:
self.assertTrue(step["expected"])
def test_example_paths_land_in_examples_dir(self):
root = Path("/tmp/repo")

View File

@@ -1,21 +1,21 @@
{
"passed": false,
"passed": true,
"checks": [
{
"command": "look",
"expected": [
"Gate"
],
"passed": false,
"output_excerpt": "\u001b[1m\u001b[36mChapel\u001b[0m\r\nA quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m\r\n"
"passed": true,
"output_excerpt": "\u001b[1m\u001b[36mGate\u001b[0m\r\nA deliberate threshold into Timmy's world. The air is still here, as if entry itself matters.\r\n\u001b[1m\u001b[37mExits:\u001b[0m enter\u001b[0m\r\n"
},
{
"command": "enter",
"expected": [
"Courtyard"
],
"passed": false,
"output_excerpt": "Command 'enter' is not available. Maybe you meant \"chardelete\" or \"emote\"?\u001b[0m\r\n"
"passed": true,
"output_excerpt": "\u001b[1m\u001b[36mCourtyard\u001b[0m\r\nThe central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness.\r\n\u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m\r\n"
},
{
"command": "workshop",
@@ -23,16 +23,17 @@
"Workshop",
"Workbench"
],
"passed": false,
"output_excerpt": "Command 'workshop' is not available. Maybe you meant \"who\"?\u001b[0m\r\n"
"passed": true,
"output_excerpt": "\u001b[1m\u001b[36mWorkshop\u001b[0m\r\nBenches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m\r\n"
},
{
"command": "look",
"expected": [
"Gate"
"Workshop",
"Workbench"
],
"passed": false,
"output_excerpt": "\u001b[1m\u001b[36mChapel\u001b[0m\r\nA quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m\r\n"
"passed": true,
"output_excerpt": "\u001b[1m\u001b[36mWorkshop\u001b[0m\r\nBenches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m\r\n"
},
{
"command": "courtyard",
@@ -62,7 +63,7 @@
"output_excerpt": "\u001b[1m\u001b[36mBook of the Soul\u001b[0m\r\nA doctrinal anchor. It is not decorative; it is a reference point.\u001b[0m\r\n"
}
],
"orientation": false,
"navigation": false,
"orientation": true,
"navigation": true,
"object_inspection": true
}

View File

@@ -37,3 +37,16 @@
{"event": "command", "actor": "timmy", "command": "chapel", "output": "\u001b[1m\u001b[36mChapel\u001b[0m A quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m", "timestamp": "2026-03-28T19:17:34.295231+00:00"}
{"event": "command", "actor": "timmy", "command": "look Book of the Soul", "output": "\u001b[1m\u001b[36mBook of the Soul\u001b[0m A doctrinal anchor. It is not decorative; it is a reference point.\u001b[0m", "timestamp": "2026-03-28T19:17:34.700773+00:00"}
{"event": "disconnect", "actor": "timmy", "timestamp": "2026-03-28T19:17:34.701330+00:00"}
{"event": "disconnect", "actor": "timmy", "timestamp": "2026-03-28T19:17:35.654223+00:00"}
{"event": "connect", "actor": "TimmyEval", "output": "\u001b[1m\u001b[34m==============================================================\u001b[0m Welcome to \u001b[1m\u001b[32mtimmy_world\u001b[0m, version 6.0.0! If you have an existing account, connect to it by typing: \u001b[1m\u001b[37mconnect <username> <password>\u001b[0m If you n...", "timestamp": "2026-03-28T19:31:52.782015+00:00"}
{"event": "command", "actor": "timmy", "command": "look", "output": "\u001b[1m\u001b[36mChapel\u001b[0m A quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m", "timestamp": "2026-03-28T19:31:53.394240+00:00"}
{"event": "command", "actor": "timmy", "command": "courtyard", "output": "\u001b[1m\u001b[36mCourtyard\u001b[0m The central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness. \u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel \u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m", "timestamp": "2026-03-28T19:31:53.796861+00:00"}
{"event": "command", "actor": "timmy", "command": "gate", "output": "\u001b[1m\u001b[36mGate\u001b[0m A deliberate threshold into Timmy's world. The air is still here, as if entry itself matters. \u001b[1m\u001b[37mExits:\u001b[0m enter\u001b[0m", "timestamp": "2026-03-28T19:31:54.202470+00:00"}
{"event": "command", "actor": "timmy", "command": "look", "output": "\u001b[1m\u001b[36mGate\u001b[0m A deliberate threshold into Timmy's world. The air is still here, as if entry itself matters. \u001b[1m\u001b[37mExits:\u001b[0m enter\u001b[0m", "timestamp": "2026-03-28T19:31:54.605358+00:00"}
{"event": "command", "actor": "timmy", "command": "enter", "output": "\u001b[1m\u001b[36mCourtyard\u001b[0m The central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness. \u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel \u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m", "timestamp": "2026-03-28T19:31:55.007358+00:00"}
{"event": "command", "actor": "timmy", "command": "workshop", "output": "\u001b[1m\u001b[36mWorkshop\u001b[0m Benches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m", "timestamp": "2026-03-28T19:31:55.409107+00:00"}
{"event": "command", "actor": "timmy", "command": "look", "output": "\u001b[1m\u001b[36mWorkshop\u001b[0m Benches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m", "timestamp": "2026-03-28T19:31:55.814849+00:00"}
{"event": "command", "actor": "timmy", "command": "courtyard", "output": "\u001b[1m\u001b[36mCourtyard\u001b[0m The central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness. \u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel \u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m", "timestamp": "2026-03-28T19:31:56.220756+00:00"}
{"event": "command", "actor": "timmy", "command": "chapel", "output": "\u001b[1m\u001b[36mChapel\u001b[0m A quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m", "timestamp": "2026-03-28T19:31:56.626349+00:00"}
{"event": "command", "actor": "timmy", "command": "look Book of the Soul", "output": "\u001b[1m\u001b[36mBook of the Soul\u001b[0m A doctrinal anchor. It is not decorative; it is a reference point.\u001b[0m", "timestamp": "2026-03-28T19:31:57.029105+00:00"}
{"event": "disconnect", "actor": "timmy", "timestamp": "2026-03-28T19:31:57.029536+00:00"}