forked from Rockachopa/Timmy-time-dashboard
Co-authored-by: Claude (Opus 4.6) <claude@hermes.local> Co-committed-by: Claude (Opus 4.6) <claude@hermes.local>
This commit is contained in:
@@ -3,6 +3,14 @@
|
||||
from .api import router
|
||||
from .cascade import CascadeRouter, Provider, ProviderStatus, get_router
|
||||
from .history import HealthHistoryStore, get_history_store
|
||||
from .metabolic import (
|
||||
DEFAULT_TIER_MODELS,
|
||||
MetabolicRouter,
|
||||
ModelTier,
|
||||
build_prompt,
|
||||
classify_complexity,
|
||||
get_metabolic_router,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"CascadeRouter",
|
||||
@@ -12,4 +20,11 @@ __all__ = [
|
||||
"router",
|
||||
"HealthHistoryStore",
|
||||
"get_history_store",
|
||||
# Metabolic router
|
||||
"MetabolicRouter",
|
||||
"ModelTier",
|
||||
"DEFAULT_TIER_MODELS",
|
||||
"classify_complexity",
|
||||
"build_prompt",
|
||||
"get_metabolic_router",
|
||||
]
|
||||
|
||||
381
src/infrastructure/router/metabolic.py
Normal file
381
src/infrastructure/router/metabolic.py
Normal file
@@ -0,0 +1,381 @@
|
||||
"""Three-tier metabolic LLM router.
|
||||
|
||||
Routes queries to the cheapest-sufficient model tier using MLX for all
|
||||
inference on Apple Silicon GPU:
|
||||
|
||||
T1 — Routine (Qwen3-8B Q6_K, ~45-55 tok/s): Simple navigation, basic choices.
|
||||
T2 — Medium (Qwen3-14B Q5_K_M, ~20-28 tok/s): Dialogue, inventory management.
|
||||
T3 — Complex (Qwen3-32B Q4_K_M, ~8-12 tok/s): Quest planning, stuck recovery.
|
||||
|
||||
Memory budget:
|
||||
- T1+T2 always loaded (~8.5 GB combined)
|
||||
- T3 loaded on demand (+20 GB) — game pauses during inference
|
||||
|
||||
Design notes:
|
||||
- 70% of game ticks never reach the LLM (handled upstream by behavior trees)
|
||||
- T3 pauses the game world before inference and unpauses after (graceful if no world)
|
||||
- All inference via vllm-mlx / Ollama — local-first, no cloud for game ticks
|
||||
|
||||
References:
|
||||
- Issue #966 — Three-Tier Metabolic LLM Router
|
||||
- Issue #1063 — Best Local Uncensored Agent Model for M3 Max 36GB
|
||||
- Issue #1075 — Claude Quota Monitor + Metabolic Protocol
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ModelTier(StrEnum):
|
||||
"""Three metabolic model tiers ordered by cost and capability.
|
||||
|
||||
Tier selection is driven by classify_complexity(). The cheapest
|
||||
sufficient tier is always chosen — T1 handles routine tasks, T2
|
||||
handles dialogue and management, T3 handles planning and recovery.
|
||||
"""
|
||||
|
||||
T1_ROUTINE = "t1_routine" # Fast, cheap — Qwen3-8B, always loaded
|
||||
T2_MEDIUM = "t2_medium" # Balanced — Qwen3-14B, always loaded
|
||||
T3_COMPLEX = "t3_complex" # Deep — Qwen3-32B, loaded on demand, pauses game
|
||||
|
||||
|
||||
# ── Classification vocabulary ────────────────────────────────────────────────
|
||||
|
||||
# T1: single-action navigation and binary-choice words
|
||||
_T1_KEYWORDS = frozenset(
|
||||
{
|
||||
"go", "move", "walk", "run", "north", "south", "east", "west",
|
||||
"up", "down", "left", "right", "yes", "no", "ok", "okay",
|
||||
"open", "close", "take", "drop", "look", "pick", "use",
|
||||
"wait", "rest", "save", "attack", "flee", "jump", "crouch",
|
||||
}
|
||||
)
|
||||
|
||||
# T3: planning, optimisation, or recovery signals
|
||||
_T3_KEYWORDS = frozenset(
|
||||
{
|
||||
"plan", "strategy", "optimize", "optimise", "quest", "stuck",
|
||||
"recover", "multi-step", "long-term", "negotiate", "persuade",
|
||||
"faction", "reputation", "best", "optimal", "recommend",
|
||||
"analyze", "analyse", "evaluate", "decide", "complex", "how do i",
|
||||
"what should i do", "help me figure", "what is the best",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def classify_complexity(task: str, state: dict) -> ModelTier:
|
||||
"""Classify a task to the cheapest-sufficient model tier.
|
||||
|
||||
Classification priority (highest wins):
|
||||
1. T3 — any T3 keyword, stuck indicator, or ``state["require_t3"] = True``
|
||||
2. T1 — short task with only T1 keywords and no active context
|
||||
3. T2 — everything else (safe default)
|
||||
|
||||
Args:
|
||||
task: Natural-language task description or player input.
|
||||
state: Current game state dict. Recognised keys:
|
||||
``stuck`` (bool), ``require_t3`` (bool),
|
||||
``active_quests`` (list), ``dialogue_active`` (bool).
|
||||
|
||||
Returns:
|
||||
ModelTier appropriate for the task.
|
||||
"""
|
||||
task_lower = task.lower()
|
||||
words = set(task_lower.split())
|
||||
|
||||
# ── T3 signals ──────────────────────────────────────────────────────────
|
||||
t3_keyword_hit = bool(words & _T3_KEYWORDS)
|
||||
# Check multi-word T3 phrases
|
||||
t3_phrase_hit = any(phrase in task_lower for phrase in _T3_KEYWORDS if " " in phrase)
|
||||
is_stuck = bool(state.get("stuck", False))
|
||||
explicit_t3 = bool(state.get("require_t3", False))
|
||||
|
||||
if t3_keyword_hit or t3_phrase_hit or is_stuck or explicit_t3:
|
||||
logger.debug(
|
||||
"classify_complexity → T3 (keywords=%s stuck=%s explicit=%s)",
|
||||
t3_keyword_hit or t3_phrase_hit,
|
||||
is_stuck,
|
||||
explicit_t3,
|
||||
)
|
||||
return ModelTier.T3_COMPLEX
|
||||
|
||||
# ── T1 signals ──────────────────────────────────────────────────────────
|
||||
t1_keyword_hit = bool(words & _T1_KEYWORDS)
|
||||
task_short = len(task.split()) <= 6
|
||||
no_active_context = (
|
||||
not state.get("active_quests")
|
||||
and not state.get("dialogue_active")
|
||||
and not state.get("combat_active")
|
||||
)
|
||||
|
||||
if t1_keyword_hit and task_short and no_active_context:
|
||||
logger.debug("classify_complexity → T1 (keywords=%s short=%s)", t1_keyword_hit, task_short)
|
||||
return ModelTier.T1_ROUTINE
|
||||
|
||||
# ── Default: T2 ─────────────────────────────────────────────────────────
|
||||
logger.debug("classify_complexity → T2 (default)")
|
||||
return ModelTier.T2_MEDIUM
|
||||
|
||||
|
||||
def build_prompt(
|
||||
state: dict,
|
||||
ui_state: dict,
|
||||
text: str,
|
||||
visual_context: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""Build an OpenAI-compatible messages list from game context.
|
||||
|
||||
Assembles a system message from structured game state and a user
|
||||
message from the player's text input. This format is accepted by
|
||||
CascadeRouter.complete() directly.
|
||||
|
||||
Args:
|
||||
state: Current game state dict. Common keys:
|
||||
``location`` (str), ``health`` (int/float),
|
||||
``inventory`` (list), ``active_quests`` (list),
|
||||
``stuck`` (bool).
|
||||
ui_state: Current UI state dict. Common keys:
|
||||
``dialogue_active`` (bool), ``dialogue_npc`` (str),
|
||||
``menu_open`` (str), ``combat_active`` (bool).
|
||||
text: Player text or task description (becomes user message).
|
||||
visual_context: Optional free-text description of the current screen
|
||||
or scene — from a vision model or rule-based extractor.
|
||||
|
||||
Returns:
|
||||
List of message dicts: [{"role": "system", ...}, {"role": "user", ...}]
|
||||
"""
|
||||
context_lines: list[str] = []
|
||||
|
||||
location = state.get("location", "unknown")
|
||||
context_lines.append(f"Location: {location}")
|
||||
|
||||
health = state.get("health")
|
||||
if health is not None:
|
||||
context_lines.append(f"Health: {health}")
|
||||
|
||||
inventory = state.get("inventory", [])
|
||||
if inventory:
|
||||
items = [i if isinstance(i, str) else i.get("name", str(i)) for i in inventory[:10]]
|
||||
context_lines.append(f"Inventory: {', '.join(items)}")
|
||||
|
||||
active_quests = state.get("active_quests", [])
|
||||
if active_quests:
|
||||
names = [
|
||||
q if isinstance(q, str) else q.get("name", str(q)) for q in active_quests[:5]
|
||||
]
|
||||
context_lines.append(f"Active quests: {', '.join(names)}")
|
||||
|
||||
if state.get("stuck"):
|
||||
context_lines.append("Status: STUCK — need recovery strategy")
|
||||
|
||||
if ui_state.get("dialogue_active"):
|
||||
npc = ui_state.get("dialogue_npc", "NPC")
|
||||
context_lines.append(f"In dialogue with: {npc}")
|
||||
|
||||
if ui_state.get("menu_open"):
|
||||
context_lines.append(f"Menu open: {ui_state['menu_open']}")
|
||||
|
||||
if ui_state.get("combat_active"):
|
||||
context_lines.append("Status: IN COMBAT")
|
||||
|
||||
if visual_context:
|
||||
context_lines.append(f"Scene: {visual_context}")
|
||||
|
||||
system_content = (
|
||||
"You are Timmy, an AI game agent. "
|
||||
"Respond with valid game commands only.\n\n"
|
||||
+ "\n".join(context_lines)
|
||||
)
|
||||
|
||||
return [
|
||||
{"role": "system", "content": system_content},
|
||||
{"role": "user", "content": text},
|
||||
]
|
||||
|
||||
|
||||
# ── Default model assignments ────────────────────────────────────────────────
|
||||
# Overridable per deployment via MetabolicRouter(tier_models={...}).
|
||||
# Model benchmarks (M3 Max 36 GB, issue #1063):
|
||||
# Qwen3-8B Q6_K — 0.933 F1 tool calling, ~45-55 tok/s (~6 GB)
|
||||
# Qwen3-14B Q5_K_M — 0.971 F1 tool calling, ~20-28 tok/s (~9.5 GB)
|
||||
# Qwen3-32B Q4_K_M — highest quality, ~8-12 tok/s (~20 GB, on demand)
|
||||
DEFAULT_TIER_MODELS: dict[ModelTier, str] = {
|
||||
ModelTier.T1_ROUTINE: "qwen3:8b",
|
||||
ModelTier.T2_MEDIUM: "qwen3:14b",
|
||||
ModelTier.T3_COMPLEX: "qwen3:30b", # Closest Ollama tag to 32B Q4
|
||||
}
|
||||
|
||||
|
||||
class MetabolicRouter:
|
||||
"""Routes LLM requests to the cheapest-sufficient model tier.
|
||||
|
||||
Wraps CascadeRouter with:
|
||||
- Complexity classification via classify_complexity()
|
||||
- Prompt assembly via build_prompt()
|
||||
- T3 world-pause / world-unpause (graceful if no world adapter)
|
||||
|
||||
Usage::
|
||||
|
||||
router = MetabolicRouter()
|
||||
|
||||
# Simple route call — classification + prompt + inference in one step
|
||||
result = await router.route(
|
||||
task="Go north",
|
||||
state={"location": "Balmora"},
|
||||
ui_state={},
|
||||
)
|
||||
print(result["content"], result["tier"])
|
||||
|
||||
# Pre-classify if you need the tier for telemetry
|
||||
tier = router.classify("Plan the best path to Vivec", game_state)
|
||||
|
||||
# Wire in world adapter for T3 pause/unpause
|
||||
router.set_world(world_adapter)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cascade: Any | None = None,
|
||||
tier_models: dict[ModelTier, str] | None = None,
|
||||
) -> None:
|
||||
"""Initialise the metabolic router.
|
||||
|
||||
Args:
|
||||
cascade: CascadeRouter instance to use. If None, the
|
||||
singleton returned by get_router() is used lazily.
|
||||
tier_models: Override default model names per tier.
|
||||
"""
|
||||
self._cascade = cascade
|
||||
self._tier_models: dict[ModelTier, str] = dict(DEFAULT_TIER_MODELS)
|
||||
if tier_models:
|
||||
self._tier_models.update(tier_models)
|
||||
self._world: Any | None = None
|
||||
|
||||
def set_world(self, world: Any) -> None:
|
||||
"""Wire in a world adapter for T3 pause / unpause support.
|
||||
|
||||
The adapter only needs to implement ``act(CommandInput)`` — the full
|
||||
WorldInterface contract is not required. A missing or broken world
|
||||
adapter degrades gracefully (logs a warning, inference continues).
|
||||
|
||||
Args:
|
||||
world: Any object with an ``act(CommandInput)`` method.
|
||||
"""
|
||||
self._world = world
|
||||
|
||||
def _get_cascade(self) -> Any:
|
||||
"""Return the CascadeRouter, creating the singleton if needed."""
|
||||
if self._cascade is None:
|
||||
from infrastructure.router.cascade import get_router
|
||||
|
||||
self._cascade = get_router()
|
||||
return self._cascade
|
||||
|
||||
def classify(self, task: str, state: dict) -> ModelTier:
|
||||
"""Classify task complexity. Delegates to classify_complexity()."""
|
||||
return classify_complexity(task, state)
|
||||
|
||||
async def _pause_world(self) -> None:
|
||||
"""Pause the game world before T3 inference (graceful degradation)."""
|
||||
if self._world is None:
|
||||
return
|
||||
try:
|
||||
from infrastructure.world.types import CommandInput
|
||||
|
||||
await asyncio.to_thread(self._world.act, CommandInput(action="pause"))
|
||||
logger.debug("MetabolicRouter: world paused for T3 inference")
|
||||
except Exception as exc:
|
||||
logger.warning("world.pause() failed — continuing without pause: %s", exc)
|
||||
|
||||
async def _unpause_world(self) -> None:
|
||||
"""Unpause the game world after T3 inference (always called, even on error)."""
|
||||
if self._world is None:
|
||||
return
|
||||
try:
|
||||
from infrastructure.world.types import CommandInput
|
||||
|
||||
await asyncio.to_thread(self._world.act, CommandInput(action="unpause"))
|
||||
logger.debug("MetabolicRouter: world unpaused after T3 inference")
|
||||
except Exception as exc:
|
||||
logger.warning("world.unpause() failed — game may remain paused: %s", exc)
|
||||
|
||||
async def route(
|
||||
self,
|
||||
task: str,
|
||||
state: dict,
|
||||
ui_state: dict | None = None,
|
||||
visual_context: str | None = None,
|
||||
temperature: float = 0.3,
|
||||
max_tokens: int | None = None,
|
||||
) -> dict:
|
||||
"""Route a task to the appropriate model tier and return the LLM response.
|
||||
|
||||
Selects the tier via classify_complexity(), assembles the prompt via
|
||||
build_prompt(), and dispatches to CascadeRouter. For T3, the game
|
||||
world is paused before inference and unpaused after (in a finally block).
|
||||
|
||||
Args:
|
||||
task: Natural-language task description or player input.
|
||||
state: Current game state dict.
|
||||
ui_state: Current UI state dict (optional, defaults to {}).
|
||||
visual_context: Optional screen/scene description from vision model.
|
||||
temperature: Sampling temperature (default 0.3 for game commands).
|
||||
max_tokens: Maximum tokens to generate.
|
||||
|
||||
Returns:
|
||||
Dict with keys: ``content``, ``provider``, ``model``, ``tier``,
|
||||
``latency_ms``, plus any extra keys from CascadeRouter.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If all providers fail (propagated from CascadeRouter).
|
||||
"""
|
||||
ui_state = ui_state or {}
|
||||
tier = self.classify(task, state)
|
||||
model = self._tier_models[tier]
|
||||
messages = build_prompt(state, ui_state, task, visual_context)
|
||||
cascade = self._get_cascade()
|
||||
|
||||
logger.info(
|
||||
"MetabolicRouter: tier=%s model=%s task=%r",
|
||||
tier,
|
||||
model,
|
||||
task[:80],
|
||||
)
|
||||
|
||||
if tier == ModelTier.T3_COMPLEX:
|
||||
await self._pause_world()
|
||||
try:
|
||||
result = await cascade.complete(
|
||||
messages=messages,
|
||||
model=model,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
finally:
|
||||
await self._unpause_world()
|
||||
else:
|
||||
result = await cascade.complete(
|
||||
messages=messages,
|
||||
model=model,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
|
||||
result["tier"] = tier
|
||||
return result
|
||||
|
||||
|
||||
# ── Module-level singleton ────────────────────────────────────────────────────
|
||||
_metabolic_router: MetabolicRouter | None = None
|
||||
|
||||
|
||||
def get_metabolic_router() -> MetabolicRouter:
|
||||
"""Get or create the MetabolicRouter singleton."""
|
||||
global _metabolic_router
|
||||
if _metabolic_router is None:
|
||||
_metabolic_router = MetabolicRouter()
|
||||
return _metabolic_router
|
||||
Reference in New Issue
Block a user