fix: make Evennia training replay deterministic (#37)

This commit is contained in:
Alexander Whitestone
2026-03-28 15:33:43 -04:00
parent 3f5a174943
commit aca26da71f
7 changed files with 90 additions and 53 deletions

View File

@@ -11,7 +11,7 @@ REPO_ROOT = Path(__file__).resolve().parents[2]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from evennia_tools.training import WORLD_BASICS_COMMANDS, WORLD_BASICS_EXPECTATIONS, example_eval_path
from evennia_tools.training import WORLD_BASICS_STEPS, example_eval_path
from scripts.evennia import evennia_mcp_server as bridge
EVENNIA_BIN = Path.home() / '.timmy' / 'evennia' / 'venv' / 'bin' / 'evennia'
@@ -27,6 +27,22 @@ def reset_timmy_to_gate():
subprocess.run([str(EVENNIA_BIN), 'shell', '-c', code], cwd=GAME_DIR, env=env, check=True, capture_output=True, text=True, timeout=120)
def normalize_to_gate() -> None:
output = bridge._observe("timmy").get("output", "")
if not output:
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
for _ in range(6):
if "Gate" in output:
return
if "Courtyard" in output:
output = bridge._command("gate", name="timmy", wait_ms=400).get("output", "")
continue
if any(room in output for room in ("Workshop", "Archive", "Chapel")):
output = bridge._command("courtyard", name="timmy", wait_ms=400).get("output", "")
continue
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
def main():
try:
bridge._disconnect("timmy")
@@ -35,11 +51,13 @@ def main():
reset_timmy_to_gate()
bridge._save_bound_session_id(os.environ.get("TIMMY_EVENNIA_EVAL_SESSION_ID", "eval-evennia-world-basics"))
bridge._connect(name="timmy", username=EVAL_USERNAME, password=EVAL_PASSWORD)
normalize_to_gate()
results = []
for command in WORLD_BASICS_COMMANDS:
for step in WORLD_BASICS_STEPS:
command = step["command"]
expected = step["expected"]
res = bridge._command(command, name="timmy", wait_ms=400)
output = res.get("output", "")
expected = WORLD_BASICS_EXPECTATIONS.get(command, ())
passed = all(token in output for token in expected)
results.append({"command": command, "expected": expected, "passed": passed, "output_excerpt": output[:300]})
bridge._disconnect("timmy")

View File

@@ -13,7 +13,7 @@ if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from evennia_tools.telemetry import event_log_path, session_meta_path
from evennia_tools.training import WORLD_BASICS_COMMANDS, example_trace_path
from evennia_tools.training import WORLD_BASICS_STEPS, example_trace_path
from scripts.evennia import evennia_mcp_server as bridge
EVENNIA_BIN = Path.home() / '.timmy' / 'evennia' / 'venv' / 'bin' / 'evennia'
@@ -31,6 +31,22 @@ def reset_timmy_to_gate():
subprocess.run([str(EVENNIA_BIN), 'shell', '-c', code], cwd=GAME_DIR, env=env, check=True, capture_output=True, text=True, timeout=120)
def normalize_to_gate() -> None:
output = bridge._observe("timmy").get("output", "")
if not output:
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
for _ in range(6):
if "Gate" in output:
return
if "Courtyard" in output:
output = bridge._command("gate", name="timmy", wait_ms=400).get("output", "")
continue
if any(room in output for room in ("Workshop", "Archive", "Chapel")):
output = bridge._command("courtyard", name="timmy", wait_ms=400).get("output", "")
continue
output = bridge._command("look", name="timmy", wait_ms=400).get("output", "")
def main():
try:
bridge._disconnect("timmy")
@@ -39,8 +55,9 @@ def main():
reset_timmy_to_gate()
bridge._save_bound_session_id(SESSION_ID)
bridge._connect(name="timmy", username=EVAL_USERNAME, password=EVAL_PASSWORD)
for command in WORLD_BASICS_COMMANDS:
bridge._command(command, name="timmy", wait_ms=400)
normalize_to_gate()
for step in WORLD_BASICS_STEPS:
bridge._command(step["command"], name="timmy", wait_ms=400)
bridge._disconnect("timmy")
log_path = event_log_path(SESSION_ID)