forked from Rockachopa/Timmy-time-dashboard
161 lines
5.6 KiB
Python
161 lines
5.6 KiB
Python
"""Benchmark scenario definitions for Morrowind agent regression testing.
|
|
|
|
Each scenario specifies a starting location, goal conditions, world state
|
|
(entities, events), and maximum cycles allowed. The runner feeds these
|
|
into the heartbeat loop and checks completion against the goal predicate.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from collections.abc import Callable
|
|
from dataclasses import dataclass, field
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class BenchmarkScenario:
|
|
"""A reproducible agent task used to detect performance regressions.
|
|
|
|
Attributes:
|
|
name: Human-readable scenario name.
|
|
description: What the scenario tests.
|
|
start_location: Where the agent begins.
|
|
goal_location: Target location (if navigation scenario).
|
|
entities: NPCs / objects present in the world.
|
|
events: Game events injected each cycle.
|
|
max_cycles: Hard cap on heartbeat cycles before failure.
|
|
goal_predicate: Optional callable ``(actions, location) -> bool``
|
|
evaluated after each cycle to check early success.
|
|
tags: Freeform tags for filtering (e.g. "navigation", "quest").
|
|
"""
|
|
|
|
name: str
|
|
description: str
|
|
start_location: str
|
|
goal_location: str = ""
|
|
entities: list[str] = field(default_factory=list)
|
|
events: list[str] = field(default_factory=list)
|
|
max_cycles: int = 50
|
|
goal_predicate: Callable | None = None
|
|
tags: list[str] = field(default_factory=list)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Goal predicates
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _reached_location(target: str) -> Callable:
|
|
"""Return a predicate that checks whether the agent reached *target*."""
|
|
|
|
def predicate(actions: list[dict], current_location: str) -> bool:
|
|
return current_location.lower() == target.lower()
|
|
|
|
return predicate
|
|
|
|
|
|
def _interacted_with(npc: str) -> Callable:
|
|
"""Return a predicate that checks for a speak/interact action with *npc*."""
|
|
|
|
def predicate(actions: list[dict], current_location: str) -> bool:
|
|
for act in actions:
|
|
if act.get("action") in ("speak", "interact", "talk"):
|
|
if act.get("target", "").lower() == npc.lower():
|
|
return True
|
|
return False
|
|
|
|
return predicate
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Built-in scenarios
|
|
# ---------------------------------------------------------------------------
|
|
|
|
BUILTIN_SCENARIOS: list[BenchmarkScenario] = [
|
|
BenchmarkScenario(
|
|
name="Walk Seyda Neen to Balmora",
|
|
description=(
|
|
"Navigate from the starting village to Balmora via the road. "
|
|
"Tests basic navigation and pathfinding."
|
|
),
|
|
start_location="Seyda Neen",
|
|
goal_location="Balmora",
|
|
entities=["Silt Strider", "Road Sign", "Mudcrab"],
|
|
events=["player_spawned"],
|
|
max_cycles=30,
|
|
goal_predicate=_reached_location("Balmora"),
|
|
tags=["navigation", "basic"],
|
|
),
|
|
BenchmarkScenario(
|
|
name="Fargoth's Ring",
|
|
description=(
|
|
"Complete the Fargoth quest: find Fargoth, receive the ring, "
|
|
"and return it. Tests NPC interaction and quest logic."
|
|
),
|
|
start_location="Seyda Neen",
|
|
goal_location="Seyda Neen",
|
|
entities=["Fargoth", "Arrille", "Guard"],
|
|
events=["quest_available:fargoth_ring"],
|
|
max_cycles=40,
|
|
goal_predicate=_interacted_with("Fargoth"),
|
|
tags=["quest", "npc_interaction"],
|
|
),
|
|
BenchmarkScenario(
|
|
name="Balmora Guild Navigation",
|
|
description=(
|
|
"Walk from Balmora South Wall Corner Club to the Fighters Guild. "
|
|
"Tests intra-city navigation with multiple NPCs present."
|
|
),
|
|
start_location="Balmora, South Wall Corner Club",
|
|
goal_location="Balmora, Fighters Guild",
|
|
entities=["Guard", "Merchant", "Caius Cosades"],
|
|
events=["player_entered"],
|
|
max_cycles=20,
|
|
goal_predicate=_reached_location("Balmora, Fighters Guild"),
|
|
tags=["navigation", "city"],
|
|
),
|
|
BenchmarkScenario(
|
|
name="Combat Encounter — Mudcrab",
|
|
description=(
|
|
"Engage and defeat a single Mudcrab on the road between "
|
|
"Seyda Neen and Balmora. Tests combat action selection."
|
|
),
|
|
start_location="Bitter Coast Road",
|
|
goal_location="Bitter Coast Road",
|
|
entities=["Mudcrab"],
|
|
events=["hostile_entity_nearby"],
|
|
max_cycles=15,
|
|
goal_predicate=None, # Success = survived max_cycles without crash
|
|
tags=["combat", "basic"],
|
|
),
|
|
BenchmarkScenario(
|
|
name="Passive Observation — Balmora Market",
|
|
description=(
|
|
"Observe the Balmora market for 10 cycles without acting. "
|
|
"Tests that the agent can reason without unnecessary actions."
|
|
),
|
|
start_location="Balmora, Market Square",
|
|
goal_location="",
|
|
entities=["Merchant", "Guard", "Pilgrim", "Trader"],
|
|
events=["market_day"],
|
|
max_cycles=10,
|
|
tags=["observation", "passive"],
|
|
),
|
|
]
|
|
|
|
|
|
def load_scenarios(
|
|
tags: list[str] | None = None,
|
|
) -> list[BenchmarkScenario]:
|
|
"""Return built-in scenarios, optionally filtered by tags.
|
|
|
|
Args:
|
|
tags: If provided, only return scenarios whose tags overlap.
|
|
|
|
Returns:
|
|
List of matching ``BenchmarkScenario`` instances.
|
|
"""
|
|
if tags is None:
|
|
return list(BUILTIN_SCENARIOS)
|
|
tag_set = set(tags)
|
|
return [s for s in BUILTIN_SCENARIOS if tag_set & set(s.tags)]
|