Timmy-time-dashboard/src/infrastructure/world/benchmark/scenarios.py

"""Benchmark scenario definitions for Morrowind agent regression testing.

Each scenario specifies a starting location, goal conditions, world state
(entities, events), and maximum cycles allowed.  The runner feeds these
into the heartbeat loop and checks completion against the goal predicate.
"""

from __future__ import annotations

from collections.abc import Callable
from dataclasses import dataclass, field


@dataclass(frozen=True)
class BenchmarkScenario:
    """A reproducible agent task used to detect performance regressions.

    Attributes:
        name:           Human-readable scenario name.
        description:    What the scenario tests.
        start_location: Where the agent begins.
        goal_location:  Target location (if navigation scenario).
        entities:       NPCs / objects present in the world.
        events:         Game events injected each cycle.
        max_cycles:     Hard cap on heartbeat cycles before failure.
        goal_predicate: Optional callable ``(actions, location) -> bool``
                        evaluated after each cycle to check early success.
        tags:           Freeform tags for filtering (e.g. "navigation", "quest").
    """

    name: str
    description: str
    start_location: str
    goal_location: str = ""
    entities: list[str] = field(default_factory=list)
    events: list[str] = field(default_factory=list)
    max_cycles: int = 50
    goal_predicate: Callable | None = None
    tags: list[str] = field(default_factory=list)


# ---------------------------------------------------------------------------
# Goal predicates
# ---------------------------------------------------------------------------


def _reached_location(target: str) -> Callable:
    """Return a predicate that checks whether the agent reached *target*."""

    def predicate(actions: list[dict], current_location: str) -> bool:
        return current_location.lower() == target.lower()

    return predicate


def _interacted_with(npc: str) -> Callable:
    """Return a predicate that checks for a speak/interact action with *npc*."""

    def predicate(actions: list[dict], current_location: str) -> bool:
        for act in actions:
            if act.get("action") in ("speak", "interact", "talk"):
                if act.get("target", "").lower() == npc.lower():
                    return True
        return False

    return predicate


# ---------------------------------------------------------------------------
# Built-in scenarios
# ---------------------------------------------------------------------------

BUILTIN_SCENARIOS: list[BenchmarkScenario] = [
    BenchmarkScenario(
        name="Walk Seyda Neen to Balmora",
        description=(
            "Navigate from the starting village to Balmora via the road. "
            "Tests basic navigation and pathfinding."
        ),
        start_location="Seyda Neen",
        goal_location="Balmora",
        entities=["Silt Strider", "Road Sign", "Mudcrab"],
        events=["player_spawned"],
        max_cycles=30,
        goal_predicate=_reached_location("Balmora"),
        tags=["navigation", "basic"],
    ),
    BenchmarkScenario(
        name="Fargoth's Ring",
        description=(
            "Complete the Fargoth quest: find Fargoth, receive the ring, "
            "and return it.  Tests NPC interaction and quest logic."
        ),
        start_location="Seyda Neen",
        goal_location="Seyda Neen",
        entities=["Fargoth", "Arrille", "Guard"],
        events=["quest_available:fargoth_ring"],
        max_cycles=40,
        goal_predicate=_interacted_with("Fargoth"),
        tags=["quest", "npc_interaction"],
    ),
    BenchmarkScenario(
        name="Balmora Guild Navigation",
        description=(
            "Walk from Balmora South Wall Corner Club to the Fighters Guild. "
            "Tests intra-city navigation with multiple NPCs present."
        ),
        start_location="Balmora, South Wall Corner Club",
        goal_location="Balmora, Fighters Guild",
        entities=["Guard", "Merchant", "Caius Cosades"],
        events=["player_entered"],
        max_cycles=20,
        goal_predicate=_reached_location("Balmora, Fighters Guild"),
        tags=["navigation", "city"],
    ),
    BenchmarkScenario(
        name="Combat Encounter — Mudcrab",
        description=(
            "Engage and defeat a single Mudcrab on the road between "
            "Seyda Neen and Balmora.  Tests combat action selection."
        ),
        start_location="Bitter Coast Road",
        goal_location="Bitter Coast Road",
        entities=["Mudcrab"],
        events=["hostile_entity_nearby"],
        max_cycles=15,
        goal_predicate=None,  # Success = survived max_cycles without crash
        tags=["combat", "basic"],
    ),
    BenchmarkScenario(
        name="Passive Observation — Balmora Market",
        description=(
            "Observe the Balmora market for 10 cycles without acting. "
            "Tests that the agent can reason without unnecessary actions."
        ),
        start_location="Balmora, Market Square",
        goal_location="",
        entities=["Merchant", "Guard", "Pilgrim", "Trader"],
        events=["market_day"],
        max_cycles=10,
        tags=["observation", "passive"],
    ),
]


def load_scenarios(
    tags: list[str] | None = None,
) -> list[BenchmarkScenario]:
    """Return built-in scenarios, optionally filtered by tags.

    Args:
        tags: If provided, only return scenarios whose tags overlap.

    Returns:
        List of matching ``BenchmarkScenario`` instances.
    """
    if tags is None:
        return list(BUILTIN_SCENARIOS)
    tag_set = set(tags)
    return [s for s in BUILTIN_SCENARIOS if tag_set & set(s.tags)]