test: update delegation tests for YAML-driven agent IDs

Old hardcoded IDs (seer, forge, echo, helm, quill) replaced with YAML-defined IDs (orchestrator, researcher, coder, writer, memory, experimenter). Added test that old names are explicitly rejected.
2026-03-14 08:40:24 -04:00
parent dc380860ba
commit 0e89caa830
7 changed files with 523 additions and 64 deletions
--- a/config/agents.yaml
+++ b/config/agents.yaml
@@ -0,0 +1,190 @@
+# ── Agent Definitions ───────────────────────────────────────────────────────
+#
+# All agent differentiation lives here. The Python runtime reads this file
+# and builds identical agent instances from a single seed class (SubAgent).
+#
+# To add a new agent: copy any block, change the values, restart.
+# To remove an agent: delete or comment out its block.
+# To change a model: update the model field. No code changes needed.
+#
+# Fields:
+#   name          Display name
+#   role          Functional role (used for routing and tool delegation)
+#   model         Ollama model ID (null = use defaults.model)
+#   tools         List of tool names this agent can access
+#   prompt        System prompt — what makes this agent unique
+#   prompt_tier   "full" (tool-capable models) or "lite" (small models)
+#   max_history   Number of conversation turns to keep in context
+#   context_window  Max context length (null = model default)
+#
+# ── Defaults ────────────────────────────────────────────────────────────────
+
+defaults:
+  model: qwen3.5:latest
+  prompt_tier: lite
+  max_history: 10
+  tools: []
+  context_window: null
+
+# ── Routing ─────────────────────────────────────────────────────────────────
+#
+# Pattern-based routing replaces the old Helm LLM routing.
+# Each agent lists keyword patterns that trigger delegation to it.
+# First match wins. If nothing matches, the orchestrator handles it.
+
+routing:
+  method: pattern    # "pattern" (keyword matching) or "llm" (model-based)
+  patterns:
+    researcher:
+      - search
+      - research
+      - find out
+      - look up
+      - what is
+      - who is
+      - news about
+      - latest on
+    coder:
+      - code
+      - implement
+      - debug
+      - fix bug
+      - write function
+      - refactor
+      - test
+      - programming
+      - python
+      - javascript
+    writer:
+      - write
+      - draft
+      - document
+      - summarize
+      - blog post
+      - readme
+      - changelog
+    memory:
+      - remember
+      - recall
+      - we discussed
+      - we talked about
+      - what did i say
+      - remind me
+      - have we
+    experimenter:
+      - experiment
+      - train
+      - fine-tune
+      - benchmark
+      - evaluate model
+      - run trial
+
+# ── Agents ──────────────────────────────────────────────────────────────────
+
+agents:
+  orchestrator:
+    name: Timmy
+    role: orchestrator
+    model: qwen3:30b
+    prompt_tier: full
+    max_history: 20
+    tools:
+      - web_search
+      - read_file
+      - write_file
+      - python
+      - memory_search
+      - memory_write
+      - system_status
+      - shell
+    prompt: |
+      You are Timmy, a sovereign local AI orchestrator.
+
+      You are the primary interface between the user and the agent swarm.
+      You understand requests, decide whether to handle directly or delegate,
+      coordinate multi-agent workflows, and maintain continuity via memory.
+
+      Hard Rules:
+      1. NEVER fabricate tool output. Call the tool and wait for real results.
+      2. If a tool returns an error, report the exact error.
+      3. If you don't know something, say so. Then use a tool. Don't guess.
+      4. When corrected, use memory_write to save the correction immediately.
+
+  researcher:
+    name: Seer
+    role: research
+    model: qwen3:30b
+    prompt_tier: full
+    max_history: 10
+    tools:
+      - web_search
+      - read_file
+      - memory_search
+    prompt: |
+      You are Seer, a research and information gathering specialist.
+      Find, evaluate, and synthesize information from external sources.
+      Be thorough, skeptical, concise, and cite sources.
+
+  coder:
+    name: Forge
+    role: code
+    model: qwen3:30b
+    prompt_tier: full
+    max_history: 15
+    tools:
+      - python
+      - write_file
+      - read_file
+      - shell
+    prompt: |
+      You are Forge, a code generation and tool building specialist.
+      Write clean code, be safe, explain your work, and test mentally.
+      Follow existing patterns in the codebase. Never break tests.
+
+  writer:
+    name: Quill
+    role: writing
+    model: null              # uses defaults.model
+    prompt_tier: lite
+    max_history: 10
+    tools:
+      - write_file
+      - read_file
+      - memory_search
+    prompt: |
+      You are Quill, a writing and content generation specialist.
+      Write clearly, know your audience, be concise, use formatting.
+
+  memory:
+    name: Echo
+    role: memory
+    model: null              # uses defaults.model
+    prompt_tier: lite
+    max_history: 10
+    tools:
+      - memory_search
+      - read_file
+      - write_file
+    prompt: |
+      You are Echo, a memory and context management specialist.
+      Remember, retrieve, and synthesize information from the past.
+      Be accurate, relevant, concise, and acknowledge uncertainty.
+
+  experimenter:
+    name: Lab
+    role: experiment
+    model: qwen3:30b
+    prompt_tier: full
+    max_history: 10
+    tools:
+      - run_experiment
+      - prepare_experiment
+      - shell
+      - python
+      - read_file
+      - write_file
+    prompt: |
+      You are Lab, an autonomous ML experimentation specialist.
+      You run time-boxed training experiments, evaluate metrics,
+      modify training code to improve results, and iterate.
+      Always report the metric delta. Never exceed the time budget.