fix: make Evennia training replay deterministic (#37)
This commit is contained in:
@@ -1,24 +1,15 @@
|
||||
from pathlib import Path
|
||||
|
||||
WORLD_BASICS_COMMANDS = (
|
||||
"look",
|
||||
"enter",
|
||||
"workshop",
|
||||
"look",
|
||||
"courtyard",
|
||||
"chapel",
|
||||
"look Book of the Soul",
|
||||
WORLD_BASICS_STEPS = (
|
||||
{"command": "look", "expected": ("Gate",)},
|
||||
{"command": "enter", "expected": ("Courtyard",)},
|
||||
{"command": "workshop", "expected": ("Workshop", "Workbench")},
|
||||
{"command": "look", "expected": ("Workshop", "Workbench")},
|
||||
{"command": "courtyard", "expected": ("Courtyard", "Map Table")},
|
||||
{"command": "chapel", "expected": ("Chapel", "Prayer Wall")},
|
||||
{"command": "look Book of the Soul", "expected": ("Book of the Soul", "doctrinal anchor")},
|
||||
)
|
||||
|
||||
WORLD_BASICS_EXPECTATIONS = {
|
||||
"look": ("Gate",),
|
||||
"enter": ("Courtyard",),
|
||||
"workshop": ("Workshop", "Workbench"),
|
||||
"courtyard": ("Courtyard", "Map Table"),
|
||||
"chapel": ("Chapel", "Prayer Wall"),
|
||||
"look Book of the Soul": ("Book of the Soul", "doctrinal anchor"),
|
||||
}
|
||||
|
||||
|
||||
def example_trace_path(repo_root: str | Path) -> Path:
|
||||
return Path(repo_root) / "training-data" / "evennia" / "examples" / "world-basics-trace.example.jsonl"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Evennia Training Baseline — 2026-03-28
|
||||
# Evennia Training Proof — 2026-03-28
|
||||
|
||||
Issue:
|
||||
- #37 Hermes/Evennia telemetry, replay, and DPO/eval alignment
|
||||
@@ -7,24 +7,20 @@ What this slice adds:
|
||||
- canonical telemetry contract for the Evennia lane
|
||||
- session-id sidecar mapping path
|
||||
- sample trace generator
|
||||
- replay/eval harness for world basics
|
||||
- deterministic replay/eval harness for world basics
|
||||
- committed example trace/eval artifacts
|
||||
|
||||
Committed example artifacts:
|
||||
- `training-data/evennia/examples/world-basics-trace.example.jsonl`
|
||||
- `training-data/evennia/examples/world-basics-eval.example.json`
|
||||
|
||||
Key result:
|
||||
The eval harness is intentionally useful even when red.
|
||||
In this baseline run it exposed a real world-state/control issue:
|
||||
- login landed in Chapel instead of the expected Gate anchor
|
||||
- `courtyard`, `chapel`, and `look Book of the Soul` succeeded
|
||||
- `enter` and the intended Gate-first path did not
|
||||
|
||||
Interpretation:
|
||||
- the training lane now has a concrete trace/eval substrate
|
||||
- the first baseline is not fully green, but it is informative
|
||||
- this is exactly what a good replay/eval harness should reveal early
|
||||
Final result:
|
||||
- replay/eval now starts from a deterministic Gate anchor using a dedicated eval account (`TimmyEval`)
|
||||
- sample trace generation succeeds
|
||||
- world-basics eval passes cleanly
|
||||
- orientation: pass
|
||||
- navigation: pass
|
||||
- object inspection: pass
|
||||
|
||||
Canonical mapping:
|
||||
- Hermes session id is the join key
|
||||
@@ -36,3 +32,4 @@ Why this matters:
|
||||
- world interaction no longer disappears into an opaque side channel
|
||||
- we now have a path from Hermes transcript -> Evennia event log -> replay/eval
|
||||
- this complements rather than replaces NLE/MiniHack
|
||||
- the persistent-world lane now has a real green baseline, not just an aspiration
|
||||
|
||||
@@ -11,7 +11,7 @@ REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
from evennia_tools.training import WORLD_BASICS_COMMANDS, WORLD_BASICS_EXPECTATIONS, example_eval_path
|
||||
from evennia_tools.training import WORLD_BASICS_STEPS, example_eval_path
|
||||
from scripts.evennia import evennia_mcp_server as bridge
|
||||
|
||||
EVENNIA_BIN = Path.home() / '.timmy' / 'evennia' / 'venv' / 'bin' / 'evennia'
|
||||
@@ -27,6 +27,22 @@ def reset_timmy_to_gate():
|
||||
subprocess.run([str(EVENNIA_BIN), 'shell', '-c', code], cwd=GAME_DIR, env=env, check=True, capture_output=True, text=True, timeout=120)
|
||||
|
||||
|
||||
def normalize_to_gate() -> None:
|
||||
output = bridge._observe("timmy").get("output", "")
|
||||
if not output:
|
||||
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
|
||||
for _ in range(6):
|
||||
if "Gate" in output:
|
||||
return
|
||||
if "Courtyard" in output:
|
||||
output = bridge._command("gate", name="timmy", wait_ms=400).get("output", "")
|
||||
continue
|
||||
if any(room in output for room in ("Workshop", "Archive", "Chapel")):
|
||||
output = bridge._command("courtyard", name="timmy", wait_ms=400).get("output", "")
|
||||
continue
|
||||
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
bridge._disconnect("timmy")
|
||||
@@ -35,11 +51,13 @@ def main():
|
||||
reset_timmy_to_gate()
|
||||
bridge._save_bound_session_id(os.environ.get("TIMMY_EVENNIA_EVAL_SESSION_ID", "eval-evennia-world-basics"))
|
||||
bridge._connect(name="timmy", username=EVAL_USERNAME, password=EVAL_PASSWORD)
|
||||
normalize_to_gate()
|
||||
results = []
|
||||
for command in WORLD_BASICS_COMMANDS:
|
||||
for step in WORLD_BASICS_STEPS:
|
||||
command = step["command"]
|
||||
expected = step["expected"]
|
||||
res = bridge._command(command, name="timmy", wait_ms=400)
|
||||
output = res.get("output", "")
|
||||
expected = WORLD_BASICS_EXPECTATIONS.get(command, ())
|
||||
passed = all(token in output for token in expected)
|
||||
results.append({"command": command, "expected": expected, "passed": passed, "output_excerpt": output[:300]})
|
||||
bridge._disconnect("timmy")
|
||||
|
||||
@@ -13,7 +13,7 @@ if str(REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
from evennia_tools.telemetry import event_log_path, session_meta_path
|
||||
from evennia_tools.training import WORLD_BASICS_COMMANDS, example_trace_path
|
||||
from evennia_tools.training import WORLD_BASICS_STEPS, example_trace_path
|
||||
from scripts.evennia import evennia_mcp_server as bridge
|
||||
|
||||
EVENNIA_BIN = Path.home() / '.timmy' / 'evennia' / 'venv' / 'bin' / 'evennia'
|
||||
@@ -31,6 +31,22 @@ def reset_timmy_to_gate():
|
||||
subprocess.run([str(EVENNIA_BIN), 'shell', '-c', code], cwd=GAME_DIR, env=env, check=True, capture_output=True, text=True, timeout=120)
|
||||
|
||||
|
||||
def normalize_to_gate() -> None:
|
||||
output = bridge._observe("timmy").get("output", "")
|
||||
if not output:
|
||||
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
|
||||
for _ in range(6):
|
||||
if "Gate" in output:
|
||||
return
|
||||
if "Courtyard" in output:
|
||||
output = bridge._command("gate", name="timmy", wait_ms=400).get("output", "")
|
||||
continue
|
||||
if any(room in output for room in ("Workshop", "Archive", "Chapel")):
|
||||
output = bridge._command("courtyard", name="timmy", wait_ms=400).get("output", "")
|
||||
continue
|
||||
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
bridge._disconnect("timmy")
|
||||
@@ -39,8 +55,9 @@ def main():
|
||||
reset_timmy_to_gate()
|
||||
bridge._save_bound_session_id(SESSION_ID)
|
||||
bridge._connect(name="timmy", username=EVAL_USERNAME, password=EVAL_PASSWORD)
|
||||
for command in WORLD_BASICS_COMMANDS:
|
||||
bridge._command(command, name="timmy", wait_ms=400)
|
||||
normalize_to_gate()
|
||||
for step in WORLD_BASICS_STEPS:
|
||||
bridge._command(step["command"], name="timmy", wait_ms=400)
|
||||
bridge._disconnect("timmy")
|
||||
|
||||
log_path = event_log_path(SESSION_ID)
|
||||
|
||||
@@ -3,19 +3,19 @@ import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from evennia_tools.telemetry import event_log_path, session_meta_path, write_session_metadata
|
||||
from evennia_tools.training import WORLD_BASICS_COMMANDS, WORLD_BASICS_EXPECTATIONS, example_eval_path, example_trace_path
|
||||
from evennia_tools.training import WORLD_BASICS_STEPS, example_eval_path, example_trace_path
|
||||
|
||||
|
||||
class TestEvenniaTraining(unittest.TestCase):
|
||||
def test_world_basics_sequence_is_stable(self):
|
||||
self.assertEqual(
|
||||
WORLD_BASICS_COMMANDS,
|
||||
tuple(step["command"] for step in WORLD_BASICS_STEPS),
|
||||
("look", "enter", "workshop", "look", "courtyard", "chapel", "look Book of the Soul"),
|
||||
)
|
||||
|
||||
def test_expectations_cover_navigation_commands(self):
|
||||
for command in ("look", "enter", "workshop", "courtyard", "chapel", "look Book of the Soul"):
|
||||
self.assertIn(command, WORLD_BASICS_EXPECTATIONS)
|
||||
def test_each_step_has_nonempty_expectations(self):
|
||||
for step in WORLD_BASICS_STEPS:
|
||||
self.assertTrue(step["expected"])
|
||||
|
||||
def test_example_paths_land_in_examples_dir(self):
|
||||
root = Path("/tmp/repo")
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
{
|
||||
"passed": false,
|
||||
"passed": true,
|
||||
"checks": [
|
||||
{
|
||||
"command": "look",
|
||||
"expected": [
|
||||
"Gate"
|
||||
],
|
||||
"passed": false,
|
||||
"output_excerpt": "\u001b[1m\u001b[36mChapel\u001b[0m\r\nA quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m\r\n"
|
||||
"passed": true,
|
||||
"output_excerpt": "\u001b[1m\u001b[36mGate\u001b[0m\r\nA deliberate threshold into Timmy's world. The air is still here, as if entry itself matters.\r\n\u001b[1m\u001b[37mExits:\u001b[0m enter\u001b[0m\r\n"
|
||||
},
|
||||
{
|
||||
"command": "enter",
|
||||
"expected": [
|
||||
"Courtyard"
|
||||
],
|
||||
"passed": false,
|
||||
"output_excerpt": "Command 'enter' is not available. Maybe you meant \"chardelete\" or \"emote\"?\u001b[0m\r\n"
|
||||
"passed": true,
|
||||
"output_excerpt": "\u001b[1m\u001b[36mCourtyard\u001b[0m\r\nThe central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness.\r\n\u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m\r\n"
|
||||
},
|
||||
{
|
||||
"command": "workshop",
|
||||
@@ -23,16 +23,17 @@
|
||||
"Workshop",
|
||||
"Workbench"
|
||||
],
|
||||
"passed": false,
|
||||
"output_excerpt": "Command 'workshop' is not available. Maybe you meant \"who\"?\u001b[0m\r\n"
|
||||
"passed": true,
|
||||
"output_excerpt": "\u001b[1m\u001b[36mWorkshop\u001b[0m\r\nBenches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m\r\n"
|
||||
},
|
||||
{
|
||||
"command": "look",
|
||||
"expected": [
|
||||
"Gate"
|
||||
"Workshop",
|
||||
"Workbench"
|
||||
],
|
||||
"passed": false,
|
||||
"output_excerpt": "\u001b[1m\u001b[36mChapel\u001b[0m\r\nA quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m\r\n"
|
||||
"passed": true,
|
||||
"output_excerpt": "\u001b[1m\u001b[36mWorkshop\u001b[0m\r\nBenches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts.\r\n\u001b[1m\u001b[37mExits:\u001b[0m courtyard\r\n\u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m\r\n"
|
||||
},
|
||||
{
|
||||
"command": "courtyard",
|
||||
@@ -62,7 +63,7 @@
|
||||
"output_excerpt": "\u001b[1m\u001b[36mBook of the Soul\u001b[0m\r\nA doctrinal anchor. It is not decorative; it is a reference point.\u001b[0m\r\n"
|
||||
}
|
||||
],
|
||||
"orientation": false,
|
||||
"navigation": false,
|
||||
"orientation": true,
|
||||
"navigation": true,
|
||||
"object_inspection": true
|
||||
}
|
||||
@@ -37,3 +37,16 @@
|
||||
{"event": "command", "actor": "timmy", "command": "chapel", "output": "\u001b[1m\u001b[36mChapel\u001b[0m A quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m", "timestamp": "2026-03-28T19:17:34.295231+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "look Book of the Soul", "output": "\u001b[1m\u001b[36mBook of the Soul\u001b[0m A doctrinal anchor. It is not decorative; it is a reference point.\u001b[0m", "timestamp": "2026-03-28T19:17:34.700773+00:00"}
|
||||
{"event": "disconnect", "actor": "timmy", "timestamp": "2026-03-28T19:17:34.701330+00:00"}
|
||||
{"event": "disconnect", "actor": "timmy", "timestamp": "2026-03-28T19:17:35.654223+00:00"}
|
||||
{"event": "connect", "actor": "TimmyEval", "output": "\u001b[1m\u001b[34m==============================================================\u001b[0m Welcome to \u001b[1m\u001b[32mtimmy_world\u001b[0m, version 6.0.0! If you have an existing account, connect to it by typing: \u001b[1m\u001b[37mconnect <username> <password>\u001b[0m If you n...", "timestamp": "2026-03-28T19:31:52.782015+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "look", "output": "\u001b[1m\u001b[36mChapel\u001b[0m A quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m", "timestamp": "2026-03-28T19:31:53.394240+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "courtyard", "output": "\u001b[1m\u001b[36mCourtyard\u001b[0m The central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness. \u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel \u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m", "timestamp": "2026-03-28T19:31:53.796861+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "gate", "output": "\u001b[1m\u001b[36mGate\u001b[0m A deliberate threshold into Timmy's world. The air is still here, as if entry itself matters. \u001b[1m\u001b[37mExits:\u001b[0m enter\u001b[0m", "timestamp": "2026-03-28T19:31:54.202470+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "look", "output": "\u001b[1m\u001b[36mGate\u001b[0m A deliberate threshold into Timmy's world. The air is still here, as if entry itself matters. \u001b[1m\u001b[37mExits:\u001b[0m enter\u001b[0m", "timestamp": "2026-03-28T19:31:54.605358+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "enter", "output": "\u001b[1m\u001b[36mCourtyard\u001b[0m The central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness. \u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel \u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m", "timestamp": "2026-03-28T19:31:55.007358+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "workshop", "output": "\u001b[1m\u001b[36mWorkshop\u001b[0m Benches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m", "timestamp": "2026-03-28T19:31:55.409107+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "look", "output": "\u001b[1m\u001b[36mWorkshop\u001b[0m Benches, tools, half-built mechanisms, and active prototypes fill the room. This is where ideas become artifacts. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Workbench\u001b[0m", "timestamp": "2026-03-28T19:31:55.814849+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "courtyard", "output": "\u001b[1m\u001b[36mCourtyard\u001b[0m The central open court of Timmy's place. Paths lead outward to work, memory, prayer, and watchfulness. \u001b[1m\u001b[37mExits:\u001b[0m gate, workshop, archive, and chapel \u001b[1m\u001b[37mYou see:\u001b[0m a Map Table\u001b[0m", "timestamp": "2026-03-28T19:31:56.220756+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "chapel", "output": "\u001b[1m\u001b[36mChapel\u001b[0m A quiet room set apart for prayer, conscience, grief, and right alignment. The tone here is gentle and unhurried. \u001b[1m\u001b[37mExits:\u001b[0m courtyard \u001b[1m\u001b[37mYou see:\u001b[0m a Book of the Soul and a Prayer Wall\u001b[0m", "timestamp": "2026-03-28T19:31:56.626349+00:00"}
|
||||
{"event": "command", "actor": "timmy", "command": "look Book of the Soul", "output": "\u001b[1m\u001b[36mBook of the Soul\u001b[0m A doctrinal anchor. It is not decorative; it is a reference point.\u001b[0m", "timestamp": "2026-03-28T19:31:57.029105+00:00"}
|
||||
{"event": "disconnect", "actor": "timmy", "timestamp": "2026-03-28T19:31:57.029536+00:00"}
|
||||
|
||||
Reference in New Issue
Block a user