forked from Rockachopa/Timmy-time-dashboard
Co-authored-by: Claude (Opus 4.6) <claude@hermes.local> Co-committed-by: Claude (Opus 4.6) <claude@hermes.local>
This commit is contained in:
@@ -1512,3 +1512,195 @@ class TestTrySingleProvider:
|
||||
assert len(errors) == 1
|
||||
assert "boom" in errors[0]
|
||||
assert provider.metrics.failed_requests == 1
|
||||
|
||||
|
||||
class TestComplexityRouting:
|
||||
"""Tests for Qwen3-8B / Qwen3-14B dual-model routing (issue #1065)."""
|
||||
|
||||
def _make_dual_model_provider(self) -> Provider:
|
||||
"""Build an Ollama provider with both Qwen3 models registered."""
|
||||
return Provider(
|
||||
name="ollama-local",
|
||||
type="ollama",
|
||||
enabled=True,
|
||||
priority=1,
|
||||
url="http://localhost:11434",
|
||||
models=[
|
||||
{
|
||||
"name": "qwen3:8b",
|
||||
"capabilities": ["text", "tools", "json", "streaming", "routine"],
|
||||
},
|
||||
{
|
||||
"name": "qwen3:14b",
|
||||
"default": True,
|
||||
"capabilities": ["text", "tools", "json", "streaming", "complex", "reasoning"],
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
def test_get_model_for_complexity_simple_returns_8b(self):
|
||||
"""Simple tasks should select the model with 'routine' capability."""
|
||||
from infrastructure.router.classifier import TaskComplexity
|
||||
|
||||
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||
router.config.fallback_chains = {
|
||||
"routine": ["qwen3:8b"],
|
||||
"complex": ["qwen3:14b"],
|
||||
}
|
||||
provider = self._make_dual_model_provider()
|
||||
|
||||
model = router._get_model_for_complexity(provider, TaskComplexity.SIMPLE)
|
||||
assert model == "qwen3:8b"
|
||||
|
||||
def test_get_model_for_complexity_complex_returns_14b(self):
|
||||
"""Complex tasks should select the model with 'complex' capability."""
|
||||
from infrastructure.router.classifier import TaskComplexity
|
||||
|
||||
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||
router.config.fallback_chains = {
|
||||
"routine": ["qwen3:8b"],
|
||||
"complex": ["qwen3:14b"],
|
||||
}
|
||||
provider = self._make_dual_model_provider()
|
||||
|
||||
model = router._get_model_for_complexity(provider, TaskComplexity.COMPLEX)
|
||||
assert model == "qwen3:14b"
|
||||
|
||||
def test_get_model_for_complexity_returns_none_when_no_match(self):
|
||||
"""Returns None when provider has no matching model in chain."""
|
||||
from infrastructure.router.classifier import TaskComplexity
|
||||
|
||||
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||
router.config.fallback_chains = {} # empty chains
|
||||
|
||||
provider = Provider(
|
||||
name="test",
|
||||
type="ollama",
|
||||
enabled=True,
|
||||
priority=1,
|
||||
models=[{"name": "llama3.2:3b", "default": True, "capabilities": ["text"]}],
|
||||
)
|
||||
|
||||
# No 'routine' or 'complex' model available
|
||||
model = router._get_model_for_complexity(provider, TaskComplexity.SIMPLE)
|
||||
assert model is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_complete_with_simple_hint_routes_to_8b(self):
|
||||
"""complexity_hint='simple' should use qwen3:8b."""
|
||||
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||
router.config.fallback_chains = {
|
||||
"routine": ["qwen3:8b"],
|
||||
"complex": ["qwen3:14b"],
|
||||
}
|
||||
router.providers = [self._make_dual_model_provider()]
|
||||
|
||||
with patch.object(router, "_call_ollama") as mock_call:
|
||||
mock_call.return_value = {"content": "fast answer", "model": "qwen3:8b"}
|
||||
result = await router.complete(
|
||||
messages=[{"role": "user", "content": "list tasks"}],
|
||||
complexity_hint="simple",
|
||||
)
|
||||
|
||||
assert result["model"] == "qwen3:8b"
|
||||
assert result["complexity"] == "simple"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_complete_with_complex_hint_routes_to_14b(self):
|
||||
"""complexity_hint='complex' should use qwen3:14b."""
|
||||
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||
router.config.fallback_chains = {
|
||||
"routine": ["qwen3:8b"],
|
||||
"complex": ["qwen3:14b"],
|
||||
}
|
||||
router.providers = [self._make_dual_model_provider()]
|
||||
|
||||
with patch.object(router, "_call_ollama") as mock_call:
|
||||
mock_call.return_value = {"content": "detailed answer", "model": "qwen3:14b"}
|
||||
result = await router.complete(
|
||||
messages=[{"role": "user", "content": "review this PR"}],
|
||||
complexity_hint="complex",
|
||||
)
|
||||
|
||||
assert result["model"] == "qwen3:14b"
|
||||
assert result["complexity"] == "complex"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_explicit_model_bypasses_complexity_routing(self):
|
||||
"""When model is explicitly provided, complexity routing is skipped."""
|
||||
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||
router.config.fallback_chains = {
|
||||
"routine": ["qwen3:8b"],
|
||||
"complex": ["qwen3:14b"],
|
||||
}
|
||||
router.providers = [self._make_dual_model_provider()]
|
||||
|
||||
with patch.object(router, "_call_ollama") as mock_call:
|
||||
mock_call.return_value = {"content": "response", "model": "qwen3:14b"}
|
||||
result = await router.complete(
|
||||
messages=[{"role": "user", "content": "list tasks"}],
|
||||
model="qwen3:14b", # explicit override
|
||||
)
|
||||
|
||||
# Explicit model wins — complexity field is None
|
||||
assert result["model"] == "qwen3:14b"
|
||||
assert result["complexity"] is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_classification_routes_simple_message(self):
|
||||
"""Short, simple messages should auto-classify as SIMPLE → 8B."""
|
||||
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||
router.config.fallback_chains = {
|
||||
"routine": ["qwen3:8b"],
|
||||
"complex": ["qwen3:14b"],
|
||||
}
|
||||
router.providers = [self._make_dual_model_provider()]
|
||||
|
||||
with patch.object(router, "_call_ollama") as mock_call:
|
||||
mock_call.return_value = {"content": "ok", "model": "qwen3:8b"}
|
||||
result = await router.complete(
|
||||
messages=[{"role": "user", "content": "status"}],
|
||||
# no complexity_hint — auto-classify
|
||||
)
|
||||
|
||||
assert result["complexity"] == "simple"
|
||||
assert result["model"] == "qwen3:8b"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_classification_routes_complex_message(self):
|
||||
"""Complex messages should auto-classify → 14B."""
|
||||
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||
router.config.fallback_chains = {
|
||||
"routine": ["qwen3:8b"],
|
||||
"complex": ["qwen3:14b"],
|
||||
}
|
||||
router.providers = [self._make_dual_model_provider()]
|
||||
|
||||
with patch.object(router, "_call_ollama") as mock_call:
|
||||
mock_call.return_value = {"content": "deep analysis", "model": "qwen3:14b"}
|
||||
result = await router.complete(
|
||||
messages=[{"role": "user", "content": "analyze and prioritize the backlog"}],
|
||||
)
|
||||
|
||||
assert result["complexity"] == "complex"
|
||||
assert result["model"] == "qwen3:14b"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_invalid_complexity_hint_falls_back_to_auto(self):
|
||||
"""Invalid complexity_hint should log a warning and auto-classify."""
|
||||
router = CascadeRouter(config_path=Path("/nonexistent"))
|
||||
router.config.fallback_chains = {
|
||||
"routine": ["qwen3:8b"],
|
||||
"complex": ["qwen3:14b"],
|
||||
}
|
||||
router.providers = [self._make_dual_model_provider()]
|
||||
|
||||
with patch.object(router, "_call_ollama") as mock_call:
|
||||
mock_call.return_value = {"content": "ok", "model": "qwen3:8b"}
|
||||
# Should not raise
|
||||
result = await router.complete(
|
||||
messages=[{"role": "user", "content": "status"}],
|
||||
complexity_hint="INVALID_HINT",
|
||||
)
|
||||
|
||||
assert result["complexity"] in ("simple", "complex") # auto-classified
|
||||
|
||||
134
tests/infrastructure/test_router_classifier.py
Normal file
134
tests/infrastructure/test_router_classifier.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Tests for Qwen3 dual-model task complexity classifier."""
|
||||
|
||||
import pytest
|
||||
|
||||
from infrastructure.router.classifier import TaskComplexity, classify_task
|
||||
|
||||
|
||||
class TestClassifyTask:
|
||||
"""Tests for classify_task heuristics."""
|
||||
|
||||
# ── Simple / routine tasks ──────────────────────────────────────────────
|
||||
|
||||
def test_empty_messages_is_simple(self):
|
||||
assert classify_task([]) == TaskComplexity.SIMPLE
|
||||
|
||||
def test_no_user_content_is_simple(self):
|
||||
messages = [{"role": "system", "content": "You are Timmy."}]
|
||||
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||
|
||||
def test_short_status_query_is_simple(self):
|
||||
messages = [{"role": "user", "content": "status"}]
|
||||
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||
|
||||
def test_list_command_is_simple(self):
|
||||
messages = [{"role": "user", "content": "list all tasks"}]
|
||||
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||
|
||||
def test_get_command_is_simple(self):
|
||||
messages = [{"role": "user", "content": "get the latest log entry"}]
|
||||
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||
|
||||
def test_short_message_under_threshold_is_simple(self):
|
||||
messages = [{"role": "user", "content": "run the build"}]
|
||||
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||
|
||||
def test_affirmation_is_simple(self):
|
||||
messages = [{"role": "user", "content": "yes"}]
|
||||
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||
|
||||
# ── Complex / quality-sensitive tasks ──────────────────────────────────
|
||||
|
||||
def test_plan_keyword_is_complex(self):
|
||||
messages = [{"role": "user", "content": "plan the sprint"}]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_review_keyword_is_complex(self):
|
||||
messages = [{"role": "user", "content": "review this code"}]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_analyze_keyword_is_complex(self):
|
||||
messages = [{"role": "user", "content": "analyze performance"}]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_triage_keyword_is_complex(self):
|
||||
messages = [{"role": "user", "content": "triage the open issues"}]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_refactor_keyword_is_complex(self):
|
||||
messages = [{"role": "user", "content": "refactor the auth module"}]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_explain_keyword_is_complex(self):
|
||||
messages = [{"role": "user", "content": "explain how the router works"}]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_prioritize_keyword_is_complex(self):
|
||||
messages = [{"role": "user", "content": "prioritize the backlog"}]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_long_message_is_complex(self):
|
||||
long_msg = "do something " * 50 # > 500 chars
|
||||
messages = [{"role": "user", "content": long_msg}]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_numbered_list_is_complex(self):
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "1. Read the file 2. Analyze it 3. Write a report",
|
||||
}
|
||||
]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_code_block_is_complex(self):
|
||||
messages = [
|
||||
{"role": "user", "content": "Here is the code:\n```python\nprint('hello')\n```"}
|
||||
]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_deep_conversation_is_complex(self):
|
||||
messages = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{"role": "assistant", "content": "hello"},
|
||||
{"role": "user", "content": "ok"},
|
||||
{"role": "assistant", "content": "yes"},
|
||||
{"role": "user", "content": "ok"},
|
||||
{"role": "assistant", "content": "yes"},
|
||||
{"role": "user", "content": "now do the thing"},
|
||||
]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_analyse_british_spelling_is_complex(self):
|
||||
messages = [{"role": "user", "content": "analyse this dataset"}]
|
||||
assert classify_task(messages) == TaskComplexity.COMPLEX
|
||||
|
||||
def test_non_string_content_is_ignored(self):
|
||||
"""Non-string content should not crash the classifier."""
|
||||
messages = [{"role": "user", "content": ["part1", "part2"]}]
|
||||
# Should not raise; result doesn't matter — just must not blow up
|
||||
result = classify_task(messages)
|
||||
assert isinstance(result, TaskComplexity)
|
||||
|
||||
def test_system_message_not_counted_as_user(self):
|
||||
"""System message alone should not trigger complex keywords."""
|
||||
messages = [
|
||||
{"role": "system", "content": "analyze everything carefully"},
|
||||
{"role": "user", "content": "yes"},
|
||||
]
|
||||
# "analyze" is in system message (not user) — user says "yes" → simple
|
||||
assert classify_task(messages) == TaskComplexity.SIMPLE
|
||||
|
||||
|
||||
class TestTaskComplexityEnum:
|
||||
"""Tests for TaskComplexity enum values."""
|
||||
|
||||
def test_simple_value(self):
|
||||
assert TaskComplexity.SIMPLE.value == "simple"
|
||||
|
||||
def test_complex_value(self):
|
||||
assert TaskComplexity.COMPLEX.value == "complex"
|
||||
|
||||
def test_lookup_by_value(self):
|
||||
assert TaskComplexity("simple") == TaskComplexity.SIMPLE
|
||||
assert TaskComplexity("complex") == TaskComplexity.COMPLEX
|
||||
Reference in New Issue
Block a user