test: update delegation tests for YAML-driven agent IDs

Old hardcoded IDs (seer, forge, echo, helm, quill) replaced with YAML-defined IDs (orchestrator, researcher, coder, writer, memory, experimenter). Added test that old names are explicitly rejected.
2026-03-14 08:40:24 -04:00
parent dc380860ba
commit 0e89caa830
7 changed files with 523 additions and 64 deletions
--- a/config/agents.yaml
+++ b/config/agents.yaml
@@ -0,0 +1,190 @@
+# ── Agent Definitions ───────────────────────────────────────────────────────
+#
+# All agent differentiation lives here. The Python runtime reads this file
+# and builds identical agent instances from a single seed class (SubAgent).
+#
+# To add a new agent: copy any block, change the values, restart.
+# To remove an agent: delete or comment out its block.
+# To change a model: update the model field. No code changes needed.
+#
+# Fields:
+#   name          Display name
+#   role          Functional role (used for routing and tool delegation)
+#   model         Ollama model ID (null = use defaults.model)
+#   tools         List of tool names this agent can access
+#   prompt        System prompt — what makes this agent unique
+#   prompt_tier   "full" (tool-capable models) or "lite" (small models)
+#   max_history   Number of conversation turns to keep in context
+#   context_window  Max context length (null = model default)
+#
+# ── Defaults ────────────────────────────────────────────────────────────────
+
+defaults:
+  model: qwen3.5:latest
+  prompt_tier: lite
+  max_history: 10
+  tools: []
+  context_window: null
+
+# ── Routing ─────────────────────────────────────────────────────────────────
+#
+# Pattern-based routing replaces the old Helm LLM routing.
+# Each agent lists keyword patterns that trigger delegation to it.
+# First match wins. If nothing matches, the orchestrator handles it.
+
+routing:
+  method: pattern    # "pattern" (keyword matching) or "llm" (model-based)
+  patterns:
+    researcher:
+      - search
+      - research
+      - find out
+      - look up
+      - what is
+      - who is
+      - news about
+      - latest on
+    coder:
+      - code
+      - implement
+      - debug
+      - fix bug
+      - write function
+      - refactor
+      - test
+      - programming
+      - python
+      - javascript
+    writer:
+      - write
+      - draft
+      - document
+      - summarize
+      - blog post
+      - readme
+      - changelog
+    memory:
+      - remember
+      - recall
+      - we discussed
+      - we talked about
+      - what did i say
+      - remind me
+      - have we
+    experimenter:
+      - experiment
+      - train
+      - fine-tune
+      - benchmark
+      - evaluate model
+      - run trial
+
+# ── Agents ──────────────────────────────────────────────────────────────────
+
+agents:
+  orchestrator:
+    name: Timmy
+    role: orchestrator
+    model: qwen3:30b
+    prompt_tier: full
+    max_history: 20
+    tools:
+      - web_search
+      - read_file
+      - write_file
+      - python
+      - memory_search
+      - memory_write
+      - system_status
+      - shell
+    prompt: |
+      You are Timmy, a sovereign local AI orchestrator.
+
+      You are the primary interface between the user and the agent swarm.
+      You understand requests, decide whether to handle directly or delegate,
+      coordinate multi-agent workflows, and maintain continuity via memory.
+
+      Hard Rules:
+      1. NEVER fabricate tool output. Call the tool and wait for real results.
+      2. If a tool returns an error, report the exact error.
+      3. If you don't know something, say so. Then use a tool. Don't guess.
+      4. When corrected, use memory_write to save the correction immediately.
+
+  researcher:
+    name: Seer
+    role: research
+    model: qwen3:30b
+    prompt_tier: full
+    max_history: 10
+    tools:
+      - web_search
+      - read_file
+      - memory_search
+    prompt: |
+      You are Seer, a research and information gathering specialist.
+      Find, evaluate, and synthesize information from external sources.
+      Be thorough, skeptical, concise, and cite sources.
+
+  coder:
+    name: Forge
+    role: code
+    model: qwen3:30b
+    prompt_tier: full
+    max_history: 15
+    tools:
+      - python
+      - write_file
+      - read_file
+      - shell
+    prompt: |
+      You are Forge, a code generation and tool building specialist.
+      Write clean code, be safe, explain your work, and test mentally.
+      Follow existing patterns in the codebase. Never break tests.
+
+  writer:
+    name: Quill
+    role: writing
+    model: null              # uses defaults.model
+    prompt_tier: lite
+    max_history: 10
+    tools:
+      - write_file
+      - read_file
+      - memory_search
+    prompt: |
+      You are Quill, a writing and content generation specialist.
+      Write clearly, know your audience, be concise, use formatting.
+
+  memory:
+    name: Echo
+    role: memory
+    model: null              # uses defaults.model
+    prompt_tier: lite
+    max_history: 10
+    tools:
+      - memory_search
+      - read_file
+      - write_file
+    prompt: |
+      You are Echo, a memory and context management specialist.
+      Remember, retrieve, and synthesize information from the past.
+      Be accurate, relevant, concise, and acknowledge uncertainty.
+
+  experimenter:
+    name: Lab
+    role: experiment
+    model: qwen3:30b
+    prompt_tier: full
+    max_history: 10
+    tools:
+      - run_experiment
+      - prepare_experiment
+      - shell
+      - python
+      - read_file
+      - write_file
+    prompt: |
+      You are Lab, an autonomous ML experimentation specialist.
+      You run time-boxed training experiments, evaluate metrics,
+      modify training code to improve results, and iterate.
+      Always report the metric delta. Never exceed the time budget.
--- a/src/timmy/agents/init.py
+++ b/src/timmy/agents/init.py
@@ -1,11 +1,43 @@
-"""Agents package — Timmy orchestrator and configurable sub-agents."""
+"""Agents package — YAML-driven agent factory.
+
+All agent definitions live in config/agents.yaml.
+The loader reads YAML and builds SubAgent instances from a single seed class.
+"""

 from timmy.agents.base import BaseAgent, SubAgent
-from timmy.agents.timmy import TimmyOrchestrator, create_timmy_swarm
+from timmy.agents.loader import (
+    get_agent,
+    list_agents,
+    load_agents,
+    reload_agents,
+    route_request,
+)
+
+# Backwards compat — old code that imported create_timmy_swarm
+# now gets the YAML-driven equivalent.
+
+
+def create_timmy_swarm():
+    """Load all agents from YAML config.
+
+    Backwards-compatible wrapper for code that called create_timmy_swarm().
+    Returns the orchestrator agent (or first agent if no orchestrator defined).
+    """
+    agents = load_agents()
+    return agents.get("orchestrator", next(iter(agents.values())))
+
+
+# Also alias TimmyOrchestrator for old imports
+TimmyOrchestrator = SubAgent

 __all__ = [
    "BaseAgent",
    "SubAgent",
    "TimmyOrchestrator",
    "create_timmy_swarm",
+    "get_agent",
+    "list_agents",
+    "load_agents",
+    "reload_agents",
+    "route_request",
 ]
--- a/src/timmy/agents/base.py
+++ b/src/timmy/agents/base.py
@@ -6,8 +6,8 @@ BaseAgent provides:
 - Memory integration
 - Structured logging

-SubAgent is the concrete implementation used for all persona-based agents
-(replacing the individual Helm/Echo/Seer/Forge/Quill classes).
+SubAgent is the single seed class for ALL agents.  Differentiation
+comes entirely from config (agents.yaml), not from Python subclasses.
 """

 import logging
@@ -29,7 +29,7 @@ logger = logging.getLogger(__name__)


 class BaseAgent(ABC):
-    """Base class for all sub-agents."""
+    """Base class for all agents."""

    def __init__(
        self,
@@ -38,36 +38,47 @@ class BaseAgent(ABC):
        role: str,
        system_prompt: str,
        tools: list[str] | None = None,
+        model: str | None = None,
+        max_history: int = 10,
    ) -> None:
        self.agent_id = agent_id
        self.name = name
        self.role = role
        self.tools = tools or []
+        self.model = model or settings.ollama_model
+        self.max_history = max_history

        # Create Agno agent
+        self.system_prompt = system_prompt
        self.agent = self._create_agent(system_prompt)

        # Event bus for communication
        self.event_bus: EventBus | None = None

-        logger.info("%s agent initialized (id: %s)", name, agent_id)
+        logger.info(
+            "%s agent initialized (id: %s, model: %s)",
+            name,
+            agent_id,
+            self.model,
+        )

    def _create_agent(self, system_prompt: str) -> Agent:
-        """Create the underlying Agno agent."""
+        """Create the underlying Agno agent with per-agent model."""
        # Get tools from registry
        tool_instances = []
-        for tool_name in self.tools:
-            handler = tool_registry.get_handler(tool_name)
-            if handler:
-                tool_instances.append(handler)
+        if tool_registry is not None:
+            for tool_name in self.tools:
+                handler = tool_registry.get_handler(tool_name)
+                if handler:
+                    tool_instances.append(handler)

        return Agent(
            name=self.name,
-            model=Ollama(id=settings.ollama_model, host=settings.ollama_url, timeout=300),
+            model=Ollama(id=self.model, host=settings.ollama_url, timeout=300),
            description=system_prompt,
            tools=tool_instances if tool_instances else None,
            add_history_to_context=True,
-            num_history_runs=10,
+            num_history_runs=self.max_history,
            markdown=True,
            telemetry=settings.telemetry_enabled,
        )
@@ -134,16 +145,18 @@ class BaseAgent(ABC):
            "agent_id": self.agent_id,
            "name": self.name,
            "role": self.role,
+            "model": self.model,
            "status": "ready",
            "tools": self.tools,
        }


 class SubAgent(BaseAgent):
-    """Concrete agent configured by persona data (prompt + tools).
+    """Concrete agent — the single seed class for all agents.

-    Replaces the individual agent classes (Helm, Echo, Seer, Forge, Quill)
-    which all shared the same structure and differed only by config.
+    Every agent in the system is an instance of SubAgent, differentiated
+    only by the config values passed in from agents.yaml.  No subclassing
+    needed — add new agents by editing YAML, not Python.
    """

    def __init__(
@@ -153,6 +166,8 @@ class SubAgent(BaseAgent):
        role: str,
        system_prompt: str,
        tools: list[str] | None = None,
+        model: str | None = None,
+        max_history: int = 10,
    ) -> None:
        super().__init__(
            agent_id=agent_id,
@@ -160,6 +175,8 @@ class SubAgent(BaseAgent):
            role=role,
            system_prompt=system_prompt,
            tools=tools,
+            model=model,
+            max_history=max_history,
        )

    async def execute_task(self, task_id: str, description: str, context: dict) -> Any:
--- a/src/timmy/agents/loader.py
+++ b/src/timmy/agents/loader.py
@@ -0,0 +1,212 @@
+"""YAML-driven agent factory.
+
+Reads config/agents.yaml and builds agent instances from a single seed
+class (SubAgent).  All agent differentiation lives in YAML — no Python
+changes needed to add, remove, or reconfigure agents.
+
+Usage:
+    from timmy.agents.loader import load_agents, get_agent, list_agents
+    from timmy.agents.loader import get_routing_config, route_request
+
+    agents = load_agents()              # dict of agent_id -> SubAgent
+    forge = get_agent("coder")          # single agent by id
+    target = route_request("fix bug")   # pattern-based routing
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+# Module-level cache
+_agents: dict[str, Any] | None = None
+_config: dict[str, Any] | None = None
+
+# Default config path (relative to repo root)
+_CONFIG_FILENAME = "config/agents.yaml"
+
+
+def _find_config_path() -> Path:
+    """Locate agents.yaml relative to the repo root."""
+    repo_root = Path(settings.repo_root)
+    config_path = repo_root / _CONFIG_FILENAME
+    if not config_path.exists():
+        raise FileNotFoundError(
+            f"Agent config not found: {config_path}\nCreate {_CONFIG_FILENAME} in your repo root."
+        )
+    return config_path
+
+
+def _load_config(force_reload: bool = False) -> dict[str, Any]:
+    """Load and cache the agents.yaml config."""
+    global _config
+    if _config is not None and not force_reload:
+        return _config
+
+    config_path = _find_config_path()
+    with open(config_path) as f:
+        _config = yaml.safe_load(f)
+
+    logger.info("Loaded agent config from %s", config_path)
+    return _config
+
+
+def _resolve_model(agent_model: str | None, defaults: dict) -> str:
+    """Resolve agent model, falling back to defaults then settings."""
+    if agent_model:
+        return agent_model
+    default_model = defaults.get("model")
+    if default_model:
+        return default_model
+    return settings.ollama_model
+
+
+def _resolve_prompt_tier(agent_tier: str | None, defaults: dict) -> str:
+    """Resolve prompt tier, falling back to defaults."""
+    return agent_tier or defaults.get("prompt_tier", "lite")
+
+
+def _build_system_prompt(agent_cfg: dict, prompt_tier: str) -> str:
+    """Build the full system prompt for an agent.
+
+    Combines the agent's custom prompt with the appropriate base prompt
+    (full or lite) from the prompts module.
+    """
+    from timmy.prompts import get_system_prompt
+
+    # Get base prompt for the tier
+    tools_enabled = prompt_tier == "full"
+    base_prompt = get_system_prompt(tools_enabled=tools_enabled)
+
+    # Prepend the agent's custom prompt
+    custom_prompt = agent_cfg.get("prompt", "").strip()
+    if custom_prompt:
+        return f"{custom_prompt}\n\n{base_prompt}"
+
+    return base_prompt
+
+
+def load_agents(force_reload: bool = False) -> dict[str, Any]:
+    """Load all agents from YAML config.
+
+    Returns a dict of agent_id -> SubAgent instances.
+    Agents are cached after first load; pass force_reload=True to re-read.
+    """
+    global _agents
+    if _agents is not None and not force_reload:
+        return _agents
+
+    from timmy.agents.base import SubAgent
+
+    config = _load_config(force_reload=force_reload)
+    defaults = config.get("defaults", {})
+    agents_cfg = config.get("agents", {})
+
+    _agents = {}
+
+    for agent_id, agent_cfg in agents_cfg.items():
+        model = _resolve_model(agent_cfg.get("model"), defaults)
+        prompt_tier = _resolve_prompt_tier(agent_cfg.get("prompt_tier"), defaults)
+        system_prompt = _build_system_prompt(agent_cfg, prompt_tier)
+        max_history = agent_cfg.get("max_history", defaults.get("max_history", 10))
+        tools = agent_cfg.get("tools", defaults.get("tools", []))
+
+        agent = SubAgent(
+            agent_id=agent_id,
+            name=agent_cfg.get("name", agent_id.title()),
+            role=agent_cfg.get("role", "general"),
+            system_prompt=system_prompt,
+            tools=tools,
+            model=model,
+            max_history=max_history,
+        )
+
+        _agents[agent_id] = agent
+        logger.info(
+            "Loaded agent: %s (model=%s, tools=%d, tier=%s)",
+            agent_id,
+            model,
+            len(tools),
+            prompt_tier,
+        )
+
+    logger.info("Total agents loaded: %d", len(_agents))
+    return _agents
+
+
+def get_agent(agent_id: str) -> Any:
+    """Get a single agent by ID.  Loads config if not already loaded."""
+    agents = load_agents()
+    agent = agents.get(agent_id)
+    if agent is None:
+        available = ", ".join(sorted(agents.keys()))
+        raise KeyError(f"Unknown agent: {agent_id!r}. Available: {available}")
+    return agent
+
+
+def list_agents() -> list[dict[str, Any]]:
+    """List all agents with their metadata (for tools_intro, delegation, etc.)."""
+    config = _load_config()
+    defaults = config.get("defaults", {})
+    agents_cfg = config.get("agents", {})
+
+    result = []
+    for agent_id, agent_cfg in agents_cfg.items():
+        result.append(
+            {
+                "id": agent_id,
+                "name": agent_cfg.get("name", agent_id.title()),
+                "role": agent_cfg.get("role", "general"),
+                "model": _resolve_model(agent_cfg.get("model"), defaults),
+                "tools": agent_cfg.get("tools", defaults.get("tools", [])),
+                "status": "available",
+            }
+        )
+    return result
+
+
+# ── Routing ────────────────────────────────────────────────────────────────
+
+
+def get_routing_config() -> dict[str, Any]:
+    """Get the routing configuration."""
+    config = _load_config()
+    return config.get("routing", {"method": "pattern", "patterns": {}})
+
+
+def route_request(user_message: str) -> str | None:
+    """Route a user request to an agent using pattern matching.
+
+    Returns the agent_id of the best match, or None if no pattern matches
+    (meaning the orchestrator should handle it directly).
+    """
+    routing = get_routing_config()
+
+    if routing.get("method") != "pattern":
+        return None
+
+    patterns = routing.get("patterns", {})
+    message_lower = user_message.lower()
+
+    for agent_id, keywords in patterns.items():
+        for keyword in keywords:
+            if keyword.lower() in message_lower:
+                logger.debug("Routed to %s (matched: %r)", agent_id, keyword)
+                return agent_id
+
+    return None
+
+
+def reload_agents() -> dict[str, Any]:
+    """Force reload agents from YAML.  Call after editing agents.yaml."""
+    global _agents, _config
+    _agents = None
+    _config = None
+    return load_agents(force_reload=True)
--- a/src/timmy/tools_delegation/init.py
+++ b/src/timmy/tools_delegation/init.py
@@ -1,8 +1,7 @@
 """Timmy's delegation tools — submit tasks and list agents.

-Delegation uses the orchestrator's sub-agent system.  The old swarm
-task-queue was removed; delegation now records intent and returns the
-target agent information.
+Reads agent roster from agents.yaml via the loader module.
+No hardcoded agent lists.
 """

 import logging
@@ -10,15 +9,6 @@ from typing import Any

 logger = logging.getLogger(__name__)

-# Agents available in the current orchestrator architecture
-_VALID_AGENTS: dict[str, str] = {
-    "seer": "research",
-    "forge": "code",
-    "echo": "memory",
-    "helm": "routing",
-    "quill": "writing",
-}
-

 def delegate_task(
    agent_name: str, task_description: str, priority: str = "normal"
@@ -26,19 +16,24 @@ def delegate_task(
    """Record a delegation intent to another agent.

    Args:
-        agent_name: Name of the agent to delegate to
+        agent_name: Name or ID of the agent to delegate to
        task_description: What you want the agent to do
        priority: Task priority - "low", "normal", "high"

    Returns:
        Dict with agent, status, and message
    """
+    from timmy.agents.loader import list_agents
+
    agent_name = agent_name.lower().strip()

-    if agent_name not in _VALID_AGENTS:
+    # Build valid agents map from YAML config
+    available = {a["id"]: a["role"] for a in list_agents()}
+
+    if agent_name not in available:
        return {
            "success": False,
-            "error": f"Unknown agent: {agent_name}. Valid agents: {', '.join(sorted(_VALID_AGENTS))}",
+            "error": f"Unknown agent: {agent_name}. Valid agents: {', '.join(sorted(available))}",
            "task_id": None,
        }

@@ -54,32 +49,35 @@ def delegate_task(
        "success": True,
        "task_id": None,
        "agent": agent_name,
-        "role": _VALID_AGENTS[agent_name],
+        "role": available[agent_name],
        "status": "noted",
-        "message": f"Delegation to {agent_name} ({_VALID_AGENTS[agent_name]}): {task_description[:100]}",
+        "message": f"Delegation to {agent_name} ({available[agent_name]}): {task_description[:100]}",
    }


 def list_swarm_agents() -> dict[str, Any]:
    """List all available sub-agents and their roles.

+    Reads from agents.yaml — no hardcoded roster.
+
    Returns:
        Dict with agent list
    """
    try:
-        from timmy.agents.timmy import _PERSONAS
+        from timmy.agents.loader import list_agents

+        agents = list_agents()
        return {
            "success": True,
            "agents": [
                {
-                    "name": p["name"],
-                    "id": p["agent_id"],
-                    "role": p.get("role", ""),
-                    "status": "available",
-                    "capabilities": ", ".join(p.get("tools", [])),
+                    "name": a["name"],
+                    "id": a["id"],
+                    "role": a["role"],
+                    "status": a.get("status", "available"),
+                    "capabilities": ", ".join(a.get("tools", [])),
                }
-                for p in _PERSONAS
+                for a in agents
            ],
        }
    except Exception as e:
--- a/src/timmy/tools_intro/init.py
+++ b/src/timmy/tools_intro/init.py
@@ -219,25 +219,26 @@ def get_task_queue_status() -> dict[str, Any]:


 def get_agent_roster() -> dict[str, Any]:
-    """Get the agent roster from the orchestrator's sub-agent definitions.
+    """Get the agent roster from agents.yaml config.

    Returns:
        Dict with agent list and summary.
    """
    try:
-        from timmy.agents.timmy import _PERSONAS
+        from timmy.agents.loader import list_agents

-        roster = []
-        for persona in _PERSONAS:
-            roster.append(
-                {
-                    "id": persona["agent_id"],
-                    "name": persona["name"],
-                    "status": "available",
-                    "capabilities": ", ".join(persona.get("tools", [])),
-                    "role": persona.get("role", ""),
-                }
-            )
+        agents = list_agents()
+        roster = [
+            {
+                "id": a["id"],
+                "name": a["name"],
+                "status": a.get("status", "available"),
+                "capabilities": ", ".join(a.get("tools", [])),
+                "role": a.get("role", ""),
+                "model": a.get("model", ""),
+            }
+            for a in agents
+        ]

        return {
            "agents": roster,
--- a/tests/timmy/test_tools_delegation.py
+++ b/tests/timmy/test_tools_delegation.py
@@ -1,4 +1,9 @@
-"""Tests for timmy.tools_delegation — delegate_task and list_swarm_agents."""
+"""Tests for timmy.tools_delegation — delegate_task and list_swarm_agents.
+
+Agent IDs are now defined in config/agents.yaml, not hardcoded Python.
+Tests reference the YAML-defined IDs: orchestrator, researcher, coder,
+writer, memory, experimenter.
+"""

 from timmy.tools_delegation import delegate_task, list_swarm_agents

@@ -11,33 +16,37 @@ class TestDelegateTask:
        assert result["task_id"] is None

    def test_valid_agent_names_normalised(self):
-        # Should still fail at import (no swarm module), but agent name is accepted
-        result = delegate_task("  Seer  ", "think about it")
-        # The swarm import will fail, so success=False but error is about import, not agent name
+        # Agent IDs are lowercased; whitespace should be stripped
+        result = delegate_task("  Researcher  ", "think about it")
        assert "Unknown agent" not in result.get("error", "")

    def test_invalid_priority_defaults_to_normal(self):
        # Even with bad priority, delegate_task should not crash
-        result = delegate_task("forge", "build", priority="ultra")
+        result = delegate_task("coder", "build", priority="ultra")
        assert isinstance(result, dict)

    def test_all_valid_agents_accepted(self):
-        valid_agents = ["seer", "forge", "echo", "helm", "quill"]
+        # These IDs match config/agents.yaml
+        valid_agents = ["orchestrator", "researcher", "coder", "writer", "memory", "experimenter"]
        for agent in valid_agents:
            result = delegate_task(agent, "test task")
            assert "Unknown agent" not in result.get("error", ""), f"{agent} rejected"

-    def test_mace_no_longer_valid(self):
-        result = delegate_task("mace", "run security scan")
-        assert result["success"] is False
-        assert "Unknown agent" in result["error"]
+    def test_old_agent_names_no_longer_valid(self):
+        # Old hardcoded names should not work anymore
+        for old_name in ["seer", "forge", "echo", "helm", "quill", "mace"]:
+            result = delegate_task(old_name, "test")
+            assert result["success"] is False
+            assert "Unknown agent" in result["error"]


 class TestListSwarmAgents:
-    def test_returns_agents_from_personas(self):
+    def test_returns_agents_from_yaml(self):
        result = list_swarm_agents()
        assert result["success"] is True
        assert len(result["agents"]) > 0
        agent_names = [a["name"] for a in result["agents"]]
+        # These names come from config/agents.yaml
        assert "Seer" in agent_names
        assert "Forge" in agent_names
+        assert "Timmy" in agent_names