[claude] Qwen3 two-model routing via task complexity classifier (#1065) v2 #1233
@@ -25,6 +25,19 @@ providers:
|
|||||||
tier: local
|
tier: local
|
||||||
url: "http://localhost:11434"
|
url: "http://localhost:11434"
|
||||||
models:
|
models:
|
||||||
|
# ── Dual-model routing: Qwen3-8B (fast) + Qwen3-14B (quality) ──────────
|
||||||
|
# Both models fit simultaneously: ~6.6 GB + ~10.5 GB = ~17 GB combined.
|
||||||
|
# Requires OLLAMA_MAX_LOADED_MODELS=2 (set in .env) to stay hot.
|
||||||
|
# Ref: issue #1065 — Qwen3-8B/14B dual-model routing strategy
|
||||||
|
- name: qwen3:8b
|
||||||
|
context_window: 32768
|
||||||
|
capabilities: [text, tools, json, streaming, routine]
|
||||||
|
description: "Qwen3-8B Q6_K — fast router for routine tasks (~6.6 GB, 45-55 tok/s)"
|
||||||
|
- name: qwen3:14b
|
||||||
|
context_window: 40960
|
||||||
|
capabilities: [text, tools, json, streaming, complex, reasoning]
|
||||||
|
description: "Qwen3-14B Q5_K_M — complex reasoning and planning (~10.5 GB, 20-28 tok/s)"
|
||||||
|
|
||||||
# Text + Tools models
|
# Text + Tools models
|
||||||
- name: qwen3:30b
|
- name: qwen3:30b
|
||||||
default: true
|
default: true
|
||||||
@@ -187,6 +200,20 @@ fallback_chains:
|
|||||||
- dolphin3 # base Dolphin 3.0 8B (uncensored, no custom system prompt)
|
- dolphin3 # base Dolphin 3.0 8B (uncensored, no custom system prompt)
|
||||||
- qwen3:30b # primary fallback — usually sufficient with a good system prompt
|
- qwen3:30b # primary fallback — usually sufficient with a good system prompt
|
||||||
|
|
||||||
|
# ── Complexity-based routing chains (issue #1065) ───────────────────────
|
||||||
|
# Routine tasks: prefer Qwen3-8B for low latency (~45-55 tok/s)
|
||||||
|
routine:
|
||||||
|
- qwen3:8b # Primary fast model
|
||||||
|
- llama3.1:8b-instruct # Fallback fast model
|
||||||
|
- llama3.2:3b # Smallest available
|
||||||
|
|
||||||
|
# Complex tasks: prefer Qwen3-14B for quality (~20-28 tok/s)
|
||||||
|
complex:
|
||||||
|
- qwen3:14b # Primary quality model
|
||||||
|
- hermes4-14b # Native tool calling, hybrid reasoning
|
||||||
|
- qwen3:30b # Highest local quality
|
||||||
|
- qwen2.5:14b # Additional fallback
|
||||||
|
|
||||||
# ── Custom Models ───────────────────────────────────────────────────────────
|
# ── Custom Models ───────────────────────────────────────────────────────────
|
||||||
# Register custom model weights for per-agent assignment.
|
# Register custom model weights for per-agent assignment.
|
||||||
# Supports GGUF (Ollama), safetensors, and HuggingFace checkpoint dirs.
|
# Supports GGUF (Ollama), safetensors, and HuggingFace checkpoint dirs.
|
||||||
|
|||||||
@@ -51,6 +51,13 @@ class Settings(BaseSettings):
|
|||||||
# Set to 0 to use model defaults.
|
# Set to 0 to use model defaults.
|
||||||
ollama_num_ctx: int = 32768
|
ollama_num_ctx: int = 32768
|
||||||
|
|
||||||
|
# Maximum models loaded simultaneously in Ollama — override with OLLAMA_MAX_LOADED_MODELS
|
||||||
|
# Set to 2 so Qwen3-8B and Qwen3-14B can stay hot concurrently (~17 GB combined).
|
||||||
|
# Requires Ollama ≥ 0.1.33. Export this to the Ollama process environment:
|
||||||
|
# OLLAMA_MAX_LOADED_MODELS=2 ollama serve
|
||||||
|
# or add it to your systemd/launchd unit before starting the harness.
|
||||||
|
ollama_max_loaded_models: int = 2
|
||||||
|
|
||||||
# Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
|
# Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
|
||||||
# as comma-separated strings, e.g. FALLBACK_MODELS="qwen3:8b,qwen2.5:14b"
|
# as comma-separated strings, e.g. FALLBACK_MODELS="qwen3:8b,qwen2.5:14b"
|
||||||
# Or edit config/providers.yaml → fallback_chains for the canonical source.
|
# Or edit config/providers.yaml → fallback_chains for the canonical source.
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
from .api import router
|
from .api import router
|
||||||
from .cascade import CascadeRouter, Provider, ProviderStatus, get_router
|
from .cascade import CascadeRouter, Provider, ProviderStatus, get_router
|
||||||
|
from .classifier import TaskComplexity, classify_task
|
||||||
from .history import HealthHistoryStore, get_history_store
|
from .history import HealthHistoryStore, get_history_store
|
||||||
from .metabolic import (
|
from .metabolic import (
|
||||||
DEFAULT_TIER_MODELS,
|
DEFAULT_TIER_MODELS,
|
||||||
@@ -27,4 +28,7 @@ __all__ = [
|
|||||||
"classify_complexity",
|
"classify_complexity",
|
||||||
"build_prompt",
|
"build_prompt",
|
||||||
"get_metabolic_router",
|
"get_metabolic_router",
|
||||||
|
# Classifier
|
||||||
|
"TaskComplexity",
|
||||||
|
"classify_task",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -593,6 +593,34 @@ class CascadeRouter:
|
|||||||
"is_fallback_model": is_fallback_model,
|
"is_fallback_model": is_fallback_model,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _get_model_for_complexity(
|
||||||
|
self, provider: Provider, complexity: "TaskComplexity"
|
||||||
|
) -> str | None:
|
||||||
|
"""Return the best model on *provider* for the given complexity tier.
|
||||||
|
|
||||||
|
Checks fallback chains first (routine / complex), then falls back to
|
||||||
|
any model with the matching capability tag, then the provider default.
|
||||||
|
"""
|
||||||
|
from infrastructure.router.classifier import TaskComplexity
|
||||||
|
|
||||||
|
chain_key = "routine" if complexity == TaskComplexity.SIMPLE else "complex"
|
||||||
|
|
||||||
|
# Walk the capability fallback chain — first model present on this provider wins
|
||||||
|
for model_name in self.config.fallback_chains.get(chain_key, []):
|
||||||
|
if any(m["name"] == model_name for m in provider.models):
|
||||||
|
return model_name
|
||||||
|
|
||||||
|
# Direct capability lookup — only return if a model explicitly has the tag
|
||||||
|
# (do not use get_model_with_capability here as it falls back to the default)
|
||||||
|
cap_model = next(
|
||||||
|
(m["name"] for m in provider.models if chain_key in m.get("capabilities", [])),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if cap_model:
|
||||||
|
return cap_model
|
||||||
|
|
||||||
|
return None # Caller will use provider default
|
||||||
|
|
||||||
async def complete(
|
async def complete(
|
||||||
self,
|
self,
|
||||||
messages: list[dict],
|
messages: list[dict],
|
||||||
@@ -600,6 +628,7 @@ class CascadeRouter:
|
|||||||
temperature: float = 0.7,
|
temperature: float = 0.7,
|
||||||
max_tokens: int | None = None,
|
max_tokens: int | None = None,
|
||||||
cascade_tier: str | None = None,
|
cascade_tier: str | None = None,
|
||||||
|
complexity_hint: str | None = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Complete a chat conversation with automatic failover.
|
"""Complete a chat conversation with automatic failover.
|
||||||
|
|
||||||
@@ -608,33 +637,103 @@ class CascadeRouter:
|
|||||||
- Falls back to vision-capable models when needed
|
- Falls back to vision-capable models when needed
|
||||||
- Supports image URLs, paths, and base64 encoding
|
- Supports image URLs, paths, and base64 encoding
|
||||||
|
|
||||||
|
Complexity-based routing (issue #1065):
|
||||||
|
- ``complexity_hint="simple"`` → routes to Qwen3-8B (low-latency)
|
||||||
|
- ``complexity_hint="complex"`` → routes to Qwen3-14B (quality)
|
||||||
|
- ``complexity_hint=None`` (default) → auto-classifies from messages
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
messages: List of message dicts with role and content
|
messages: List of message dicts with role and content
|
||||||
model: Preferred model (tries this first, then provider defaults)
|
model: Preferred model (tries this first; complexity routing is
|
||||||
|
skipped when an explicit model is given)
|
||||||
temperature: Sampling temperature
|
temperature: Sampling temperature
|
||||||
max_tokens: Maximum tokens to generate
|
max_tokens: Maximum tokens to generate
|
||||||
cascade_tier: If specified, filters providers by this tier.
|
cascade_tier: If specified, filters providers by this tier.
|
||||||
- "frontier_required": Uses only Anthropic provider for top-tier models.
|
- "frontier_required": Uses only Anthropic provider for top-tier models.
|
||||||
|
complexity_hint: "simple", "complex", or None (auto-detect).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict with content, provider_used, and metrics
|
Dict with content, provider_used, model, latency_ms,
|
||||||
|
is_fallback_model, and complexity fields.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
RuntimeError: If all providers fail
|
RuntimeError: If all providers fail
|
||||||
"""
|
"""
|
||||||
|
from infrastructure.router.classifier import TaskComplexity, classify_task
|
||||||
|
|
||||||
content_type = self._detect_content_type(messages)
|
content_type = self._detect_content_type(messages)
|
||||||
if content_type != ContentType.TEXT:
|
if content_type != ContentType.TEXT:
|
||||||
logger.debug("Detected %s content, selecting appropriate model", content_type.value)
|
logger.debug("Detected %s content, selecting appropriate model", content_type.value)
|
||||||
|
|
||||||
|
# Resolve task complexity ─────────────────────────────────────────────
|
||||||
|
# Skip complexity routing when caller explicitly specifies a model.
|
||||||
|
complexity: TaskComplexity | None = None
|
||||||
|
if model is None:
|
||||||
|
if complexity_hint is not None:
|
||||||
|
try:
|
||||||
|
complexity = TaskComplexity(complexity_hint.lower())
|
||||||
|
except ValueError:
|
||||||
|
logger.warning("Unknown complexity_hint %r, auto-classifying", complexity_hint)
|
||||||
|
complexity = classify_task(messages)
|
||||||
|
else:
|
||||||
|
complexity = classify_task(messages)
|
||||||
|
logger.debug("Task complexity: %s", complexity.value)
|
||||||
|
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
providers = self._filter_providers(cascade_tier)
|
providers = self._filter_providers(cascade_tier)
|
||||||
|
|
||||||
for provider in providers:
|
for provider in providers:
|
||||||
result = await self._try_single_provider(
|
if not self._is_provider_available(provider):
|
||||||
provider, messages, model, temperature, max_tokens, content_type, errors
|
continue
|
||||||
|
|
||||||
|
# Metabolic protocol: skip cloud providers when quota is low
|
||||||
|
if provider.type in ("anthropic", "openai", "grok"):
|
||||||
|
if not self._quota_allows_cloud(provider):
|
||||||
|
logger.info(
|
||||||
|
"Metabolic protocol: skipping cloud provider %s (quota too low)",
|
||||||
|
provider.name,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Complexity-based model selection (only when no explicit model) ──
|
||||||
|
effective_model = model
|
||||||
|
if effective_model is None and complexity is not None:
|
||||||
|
effective_model = self._get_model_for_complexity(provider, complexity)
|
||||||
|
if effective_model:
|
||||||
|
logger.debug(
|
||||||
|
"Complexity routing [%s]: %s → %s",
|
||||||
|
complexity.value,
|
||||||
|
provider.name,
|
||||||
|
effective_model,
|
||||||
|
)
|
||||||
|
|
||||||
|
selected_model, is_fallback_model = self._select_model(
|
||||||
|
provider, effective_model, content_type
|
||||||
)
|
)
|
||||||
if result is not None:
|
|
||||||
return result
|
try:
|
||||||
|
result = await self._attempt_with_retry(
|
||||||
|
provider,
|
||||||
|
messages,
|
||||||
|
selected_model,
|
||||||
|
temperature,
|
||||||
|
max_tokens,
|
||||||
|
content_type,
|
||||||
|
)
|
||||||
|
except RuntimeError as exc:
|
||||||
|
errors.append(str(exc))
|
||||||
|
self._record_failure(provider)
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._record_success(provider, result.get("latency_ms", 0))
|
||||||
|
return {
|
||||||
|
"content": result["content"],
|
||||||
|
"provider": provider.name,
|
||||||
|
"model": result.get("model", selected_model or provider.get_default_model()),
|
||||||
|
"latency_ms": result.get("latency_ms", 0),
|
||||||
|
"is_fallback_model": is_fallback_model,
|
||||||
|
"complexity": complexity.value if complexity is not None else None,
|
||||||
|
}
|
||||||
|
|
||||||
raise RuntimeError(f"All providers failed: {'; '.join(errors)}")
|
raise RuntimeError(f"All providers failed: {'; '.join(errors)}")
|
||||||
|
|
||||||
|
|||||||
166
src/infrastructure/router/classifier.py
Normal file
166
src/infrastructure/router/classifier.py
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
"""Task complexity classifier for Qwen3 dual-model routing.
|
||||||
|
|
||||||
|
Classifies incoming tasks as SIMPLE (route to Qwen3-8B for low-latency)
|
||||||
|
or COMPLEX (route to Qwen3-14B for quality-sensitive work).
|
||||||
|
|
||||||
|
Classification is fully heuristic — no LLM inference required.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class TaskComplexity(Enum):
|
||||||
|
"""Task complexity tier for model routing."""
|
||||||
|
|
||||||
|
SIMPLE = "simple" # Qwen3-8B Q6_K: routine, latency-sensitive
|
||||||
|
COMPLEX = "complex" # Qwen3-14B Q5_K_M: quality-sensitive, multi-step
|
||||||
|
|
||||||
|
|
||||||
|
# Keywords strongly associated with complex tasks
|
||||||
|
_COMPLEX_KEYWORDS: frozenset[str] = frozenset(
|
||||||
|
[
|
||||||
|
"plan",
|
||||||
|
"review",
|
||||||
|
"analyze",
|
||||||
|
"analyse",
|
||||||
|
"triage",
|
||||||
|
"refactor",
|
||||||
|
"design",
|
||||||
|
"architecture",
|
||||||
|
"implement",
|
||||||
|
"compare",
|
||||||
|
"debug",
|
||||||
|
"explain",
|
||||||
|
"prioritize",
|
||||||
|
"prioritise",
|
||||||
|
"strategy",
|
||||||
|
"optimize",
|
||||||
|
"optimise",
|
||||||
|
"evaluate",
|
||||||
|
"assess",
|
||||||
|
"brainstorm",
|
||||||
|
"outline",
|
||||||
|
"summarize",
|
||||||
|
"summarise",
|
||||||
|
"generate code",
|
||||||
|
"write a",
|
||||||
|
"write the",
|
||||||
|
"code review",
|
||||||
|
"pull request",
|
||||||
|
"multi-step",
|
||||||
|
"multi step",
|
||||||
|
"step by step",
|
||||||
|
"backlog prioriti",
|
||||||
|
"issue triage",
|
||||||
|
"root cause",
|
||||||
|
"how does",
|
||||||
|
"why does",
|
||||||
|
"what are the",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Keywords strongly associated with simple/routine tasks
|
||||||
|
_SIMPLE_KEYWORDS: frozenset[str] = frozenset(
|
||||||
|
[
|
||||||
|
"status",
|
||||||
|
"list ",
|
||||||
|
"show ",
|
||||||
|
"what is",
|
||||||
|
"how many",
|
||||||
|
"ping",
|
||||||
|
"run ",
|
||||||
|
"execute ",
|
||||||
|
"ls ",
|
||||||
|
"cat ",
|
||||||
|
"ps ",
|
||||||
|
"fetch ",
|
||||||
|
"count ",
|
||||||
|
"tail ",
|
||||||
|
"head ",
|
||||||
|
"grep ",
|
||||||
|
"find file",
|
||||||
|
"read file",
|
||||||
|
"get ",
|
||||||
|
"query ",
|
||||||
|
"check ",
|
||||||
|
"yes",
|
||||||
|
"no",
|
||||||
|
"ok",
|
||||||
|
"done",
|
||||||
|
"thanks",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Content longer than this is treated as complex regardless of keywords
|
||||||
|
_COMPLEX_CHAR_THRESHOLD = 500
|
||||||
|
|
||||||
|
# Short content defaults to simple
|
||||||
|
_SIMPLE_CHAR_THRESHOLD = 150
|
||||||
|
|
||||||
|
# More than this many messages suggests an ongoing complex conversation
|
||||||
|
_COMPLEX_CONVERSATION_DEPTH = 6
|
||||||
|
|
||||||
|
|
||||||
|
def classify_task(messages: list[dict]) -> TaskComplexity:
|
||||||
|
"""Classify task complexity from a list of messages.
|
||||||
|
|
||||||
|
Uses heuristic rules — no LLM call required. Errs toward COMPLEX
|
||||||
|
when uncertain so that quality is preserved.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: List of message dicts with ``role`` and ``content`` keys.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
TaskComplexity.SIMPLE or TaskComplexity.COMPLEX
|
||||||
|
"""
|
||||||
|
if not messages:
|
||||||
|
return TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
# Concatenate all user-turn content for analysis
|
||||||
|
user_content = " ".join(
|
||||||
|
msg.get("content", "")
|
||||||
|
for msg in messages
|
||||||
|
if msg.get("role") in ("user", "human")
|
||||||
|
and isinstance(msg.get("content"), str)
|
||||||
|
).lower().strip()
|
||||||
|
|
||||||
|
if not user_content:
|
||||||
|
return TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
# Complexity signals override everything -----------------------------------
|
||||||
|
|
||||||
|
# Explicit complex keywords
|
||||||
|
for kw in _COMPLEX_KEYWORDS:
|
||||||
|
if kw in user_content:
|
||||||
|
return TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
# Numbered / multi-step instruction list: "1. do this 2. do that"
|
||||||
|
if re.search(r"\b\d+\.\s+\w", user_content):
|
||||||
|
return TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
# Code blocks embedded in messages
|
||||||
|
if "```" in user_content:
|
||||||
|
return TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
# Long content → complex reasoning likely required
|
||||||
|
if len(user_content) > _COMPLEX_CHAR_THRESHOLD:
|
||||||
|
return TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
# Deep conversation → complex ongoing task
|
||||||
|
if len(messages) > _COMPLEX_CONVERSATION_DEPTH:
|
||||||
|
return TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
# Simplicity signals -------------------------------------------------------
|
||||||
|
|
||||||
|
# Explicit simple keywords
|
||||||
|
for kw in _SIMPLE_KEYWORDS:
|
||||||
|
if kw in user_content:
|
||||||
|
return TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
# Short single-sentence messages default to simple
|
||||||
|
if len(user_content) <= _SIMPLE_CHAR_THRESHOLD:
|
||||||
|
return TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
# When uncertain, prefer quality (complex model)
|
||||||
|
return TaskComplexity.COMPLEX
|
||||||
@@ -1512,3 +1512,195 @@ class TestTrySingleProvider:
|
|||||||
assert len(errors) == 1
|
assert len(errors) == 1
|
||||||
assert "boom" in errors[0]
|
assert "boom" in errors[0]
|
||||||
assert provider.metrics.failed_requests == 1
|
assert provider.metrics.failed_requests == 1
|
||||||
|
|
||||||
|
|
||||||
|
class TestComplexityRouting:
|
||||||
|
"""Tests for Qwen3-8B / Qwen3-14B dual-model routing (issue #1065)."""
|
||||||
|
|
||||||
|
def _make_dual_model_provider(self) -> Provider:
|
||||||
|
"""Build an Ollama provider with both Qwen3 models registered."""
|
||||||
|
return Provider(
|
||||||
|
name="ollama-local",
|
||||||
|
type="ollama",
|
||||||
|
enabled=True,
|
||||||
|
priority=1,
|
||||||
|
url="http://localhost:11434",
|
||||||
|
models=[
|
||||||
|
{
|
||||||
|
"name": "qwen3:8b",
|
||||||
|
"capabilities": ["text", "tools", "json", "streaming", "routine"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "qwen3:14b",
|
||||||
|
"default": True,
|
||||||
|
"capabilities": ["text", "tools", "json", "streaming", "complex", "reasoning"],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_get_model_for_complexity_simple_returns_8b(self):
|
||||||
|
"""Simple tasks should select the model with 'routine' capability."""
|
||||||
|
from infrastructure.router.classifier import TaskComplexity
|
||||||
|
|
||||||
|
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||||
|
router.config.fallback_chains = {
|
||||||
|
"routine": ["qwen3:8b"],
|
||||||
|
"complex": ["qwen3:14b"],
|
||||||
|
}
|
||||||
|
provider = self._make_dual_model_provider()
|
||||||
|
|
||||||
|
model = router._get_model_for_complexity(provider, TaskComplexity.SIMPLE)
|
||||||
|
assert model == "qwen3:8b"
|
||||||
|
|
||||||
|
def test_get_model_for_complexity_complex_returns_14b(self):
|
||||||
|
"""Complex tasks should select the model with 'complex' capability."""
|
||||||
|
from infrastructure.router.classifier import TaskComplexity
|
||||||
|
|
||||||
|
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||||
|
router.config.fallback_chains = {
|
||||||
|
"routine": ["qwen3:8b"],
|
||||||
|
"complex": ["qwen3:14b"],
|
||||||
|
}
|
||||||
|
provider = self._make_dual_model_provider()
|
||||||
|
|
||||||
|
model = router._get_model_for_complexity(provider, TaskComplexity.COMPLEX)
|
||||||
|
assert model == "qwen3:14b"
|
||||||
|
|
||||||
|
def test_get_model_for_complexity_returns_none_when_no_match(self):
|
||||||
|
"""Returns None when provider has no matching model in chain."""
|
||||||
|
from infrastructure.router.classifier import TaskComplexity
|
||||||
|
|
||||||
|
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||||
|
router.config.fallback_chains = {} # empty chains
|
||||||
|
|
||||||
|
provider = Provider(
|
||||||
|
name="test",
|
||||||
|
type="ollama",
|
||||||
|
enabled=True,
|
||||||
|
priority=1,
|
||||||
|
models=[{"name": "llama3.2:3b", "default": True, "capabilities": ["text"]}],
|
||||||
|
)
|
||||||
|
|
||||||
|
# No 'routine' or 'complex' model available
|
||||||
|
model = router._get_model_for_complexity(provider, TaskComplexity.SIMPLE)
|
||||||
|
assert model is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_complete_with_simple_hint_routes_to_8b(self):
|
||||||
|
"""complexity_hint='simple' should use qwen3:8b."""
|
||||||
|
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||||
|
router.config.fallback_chains = {
|
||||||
|
"routine": ["qwen3:8b"],
|
||||||
|
"complex": ["qwen3:14b"],
|
||||||
|
}
|
||||||
|
router.providers = [self._make_dual_model_provider()]
|
||||||
|
|
||||||
|
with patch.object(router, "_call_ollama") as mock_call:
|
||||||
|
mock_call.return_value = {"content": "fast answer", "model": "qwen3:8b"}
|
||||||
|
result = await router.complete(
|
||||||
|
messages=[{"role": "user", "content": "list tasks"}],
|
||||||
|
complexity_hint="simple",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["model"] == "qwen3:8b"
|
||||||
|
assert result["complexity"] == "simple"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_complete_with_complex_hint_routes_to_14b(self):
|
||||||
|
"""complexity_hint='complex' should use qwen3:14b."""
|
||||||
|
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||||
|
router.config.fallback_chains = {
|
||||||
|
"routine": ["qwen3:8b"],
|
||||||
|
"complex": ["qwen3:14b"],
|
||||||
|
}
|
||||||
|
router.providers = [self._make_dual_model_provider()]
|
||||||
|
|
||||||
|
with patch.object(router, "_call_ollama") as mock_call:
|
||||||
|
mock_call.return_value = {"content": "detailed answer", "model": "qwen3:14b"}
|
||||||
|
result = await router.complete(
|
||||||
|
messages=[{"role": "user", "content": "review this PR"}],
|
||||||
|
complexity_hint="complex",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["model"] == "qwen3:14b"
|
||||||
|
assert result["complexity"] == "complex"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_explicit_model_bypasses_complexity_routing(self):
|
||||||
|
"""When model is explicitly provided, complexity routing is skipped."""
|
||||||
|
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||||
|
router.config.fallback_chains = {
|
||||||
|
"routine": ["qwen3:8b"],
|
||||||
|
"complex": ["qwen3:14b"],
|
||||||
|
}
|
||||||
|
router.providers = [self._make_dual_model_provider()]
|
||||||
|
|
||||||
|
with patch.object(router, "_call_ollama") as mock_call:
|
||||||
|
mock_call.return_value = {"content": "response", "model": "qwen3:14b"}
|
||||||
|
result = await router.complete(
|
||||||
|
messages=[{"role": "user", "content": "list tasks"}],
|
||||||
|
model="qwen3:14b", # explicit override
|
||||||
|
)
|
||||||
|
|
||||||
|
# Explicit model wins — complexity field is None
|
||||||
|
assert result["model"] == "qwen3:14b"
|
||||||
|
assert result["complexity"] is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_auto_classification_routes_simple_message(self):
|
||||||
|
"""Short, simple messages should auto-classify as SIMPLE → 8B."""
|
||||||
|
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||||
|
router.config.fallback_chains = {
|
||||||
|
"routine": ["qwen3:8b"],
|
||||||
|
"complex": ["qwen3:14b"],
|
||||||
|
}
|
||||||
|
router.providers = [self._make_dual_model_provider()]
|
||||||
|
|
||||||
|
with patch.object(router, "_call_ollama") as mock_call:
|
||||||
|
mock_call.return_value = {"content": "ok", "model": "qwen3:8b"}
|
||||||
|
result = await router.complete(
|
||||||
|
messages=[{"role": "user", "content": "status"}],
|
||||||
|
# no complexity_hint — auto-classify
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["complexity"] == "simple"
|
||||||
|
assert result["model"] == "qwen3:8b"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_auto_classification_routes_complex_message(self):
|
||||||
|
"""Complex messages should auto-classify → 14B."""
|
||||||
|
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||||
|
router.config.fallback_chains = {
|
||||||
|
"routine": ["qwen3:8b"],
|
||||||
|
"complex": ["qwen3:14b"],
|
||||||
|
}
|
||||||
|
router.providers = [self._make_dual_model_provider()]
|
||||||
|
|
||||||
|
with patch.object(router, "_call_ollama") as mock_call:
|
||||||
|
mock_call.return_value = {"content": "deep analysis", "model": "qwen3:14b"}
|
||||||
|
result = await router.complete(
|
||||||
|
messages=[{"role": "user", "content": "analyze and prioritize the backlog"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["complexity"] == "complex"
|
||||||
|
assert result["model"] == "qwen3:14b"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_invalid_complexity_hint_falls_back_to_auto(self):
|
||||||
|
"""Invalid complexity_hint should log a warning and auto-classify."""
|
||||||
|
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||||
|
router.config.fallback_chains = {
|
||||||
|
"routine": ["qwen3:8b"],
|
||||||
|
"complex": ["qwen3:14b"],
|
||||||
|
}
|
||||||
|
router.providers = [self._make_dual_model_provider()]
|
||||||
|
|
||||||
|
with patch.object(router, "_call_ollama") as mock_call:
|
||||||
|
mock_call.return_value = {"content": "ok", "model": "qwen3:8b"}
|
||||||
|
# Should not raise
|
||||||
|
result = await router.complete(
|
||||||
|
messages=[{"role": "user", "content": "status"}],
|
||||||
|
complexity_hint="INVALID_HINT",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["complexity"] in ("simple", "complex") # auto-classified
|
||||||
|
|||||||
134
tests/infrastructure/test_router_classifier.py
Normal file
134
tests/infrastructure/test_router_classifier.py
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
"""Tests for Qwen3 dual-model task complexity classifier."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from infrastructure.router.classifier import TaskComplexity, classify_task
|
||||||
|
|
||||||
|
|
||||||
|
class TestClassifyTask:
|
||||||
|
"""Tests for classify_task heuristics."""
|
||||||
|
|
||||||
|
# ── Simple / routine tasks ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_empty_messages_is_simple(self):
|
||||||
|
assert classify_task([]) == TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
def test_no_user_content_is_simple(self):
|
||||||
|
messages = [{"role": "system", "content": "You are Timmy."}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
def test_short_status_query_is_simple(self):
|
||||||
|
messages = [{"role": "user", "content": "status"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
def test_list_command_is_simple(self):
|
||||||
|
messages = [{"role": "user", "content": "list all tasks"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
def test_get_command_is_simple(self):
|
||||||
|
messages = [{"role": "user", "content": "get the latest log entry"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
def test_short_message_under_threshold_is_simple(self):
|
||||||
|
messages = [{"role": "user", "content": "run the build"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
def test_affirmation_is_simple(self):
|
||||||
|
messages = [{"role": "user", "content": "yes"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
# ── Complex / quality-sensitive tasks ──────────────────────────────────
|
||||||
|
|
||||||
|
def test_plan_keyword_is_complex(self):
|
||||||
|
messages = [{"role": "user", "content": "plan the sprint"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_review_keyword_is_complex(self):
|
||||||
|
messages = [{"role": "user", "content": "review this code"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_analyze_keyword_is_complex(self):
|
||||||
|
messages = [{"role": "user", "content": "analyze performance"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_triage_keyword_is_complex(self):
|
||||||
|
messages = [{"role": "user", "content": "triage the open issues"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_refactor_keyword_is_complex(self):
|
||||||
|
messages = [{"role": "user", "content": "refactor the auth module"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_explain_keyword_is_complex(self):
|
||||||
|
messages = [{"role": "user", "content": "explain how the router works"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_prioritize_keyword_is_complex(self):
|
||||||
|
messages = [{"role": "user", "content": "prioritize the backlog"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_long_message_is_complex(self):
|
||||||
|
long_msg = "do something " * 50 # > 500 chars
|
||||||
|
messages = [{"role": "user", "content": long_msg}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_numbered_list_is_complex(self):
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "1. Read the file 2. Analyze it 3. Write a report",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_code_block_is_complex(self):
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "Here is the code:\n```python\nprint('hello')\n```"}
|
||||||
|
]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_deep_conversation_is_complex(self):
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "hi"},
|
||||||
|
{"role": "assistant", "content": "hello"},
|
||||||
|
{"role": "user", "content": "ok"},
|
||||||
|
{"role": "assistant", "content": "yes"},
|
||||||
|
{"role": "user", "content": "ok"},
|
||||||
|
{"role": "assistant", "content": "yes"},
|
||||||
|
{"role": "user", "content": "now do the thing"},
|
||||||
|
]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_analyse_british_spelling_is_complex(self):
|
||||||
|
messages = [{"role": "user", "content": "analyse this dataset"}]
|
||||||
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||||
|
|
||||||
|
def test_non_string_content_is_ignored(self):
|
||||||
|
"""Non-string content should not crash the classifier."""
|
||||||
|
messages = [{"role": "user", "content": ["part1", "part2"]}]
|
||||||
|
# Should not raise; result doesn't matter — just must not blow up
|
||||||
|
result = classify_task(messages)
|
||||||
|
assert isinstance(result, TaskComplexity)
|
||||||
|
|
||||||
|
def test_system_message_not_counted_as_user(self):
|
||||||
|
"""System message alone should not trigger complex keywords."""
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "analyze everything carefully"},
|
||||||
|
{"role": "user", "content": "yes"},
|
||||||
|
]
|
||||||
|
# "analyze" is in system message (not user) — user says "yes" → simple
|
||||||
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||||
|
|
||||||
|
|
||||||
|
class TestTaskComplexityEnum:
|
||||||
|
"""Tests for TaskComplexity enum values."""
|
||||||
|
|
||||||
|
def test_simple_value(self):
|
||||||
|
assert TaskComplexity.SIMPLE.value == "simple"
|
||||||
|
|
||||||
|
def test_complex_value(self):
|
||||||
|
assert TaskComplexity.COMPLEX.value == "complex"
|
||||||
|
|
||||||
|
def test_lookup_by_value(self):
|
||||||
|
assert TaskComplexity("simple") == TaskComplexity.SIMPLE
|
||||||
|
assert TaskComplexity("complex") == TaskComplexity.COMPLEX
|
||||||
Reference in New Issue
Block a user