Co-authored-by: Claude (Opus 4.6) <claude@hermes.local> Co-committed-by: Claude (Opus 4.6) <claude@hermes.local>
135 lines
5.5 KiB
Python
135 lines
5.5 KiB
Python
"""Tests for Qwen3 dual-model task complexity classifier."""
|
|
|
|
import pytest
|
|
|
|
from infrastructure.router.classifier import TaskComplexity, classify_task
|
|
|
|
|
|
class TestClassifyTask:
|
|
"""Tests for classify_task heuristics."""
|
|
|
|
# ── Simple / routine tasks ──────────────────────────────────────────────
|
|
|
|
def test_empty_messages_is_simple(self):
|
|
assert classify_task([]) == TaskComplexity.SIMPLE
|
|
|
|
def test_no_user_content_is_simple(self):
|
|
messages = [{"role": "system", "content": "You are Timmy."}]
|
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
|
|
|
def test_short_status_query_is_simple(self):
|
|
messages = [{"role": "user", "content": "status"}]
|
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
|
|
|
def test_list_command_is_simple(self):
|
|
messages = [{"role": "user", "content": "list all tasks"}]
|
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
|
|
|
def test_get_command_is_simple(self):
|
|
messages = [{"role": "user", "content": "get the latest log entry"}]
|
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
|
|
|
def test_short_message_under_threshold_is_simple(self):
|
|
messages = [{"role": "user", "content": "run the build"}]
|
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
|
|
|
def test_affirmation_is_simple(self):
|
|
messages = [{"role": "user", "content": "yes"}]
|
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
|
|
|
# ── Complex / quality-sensitive tasks ──────────────────────────────────
|
|
|
|
def test_plan_keyword_is_complex(self):
|
|
messages = [{"role": "user", "content": "plan the sprint"}]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_review_keyword_is_complex(self):
|
|
messages = [{"role": "user", "content": "review this code"}]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_analyze_keyword_is_complex(self):
|
|
messages = [{"role": "user", "content": "analyze performance"}]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_triage_keyword_is_complex(self):
|
|
messages = [{"role": "user", "content": "triage the open issues"}]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_refactor_keyword_is_complex(self):
|
|
messages = [{"role": "user", "content": "refactor the auth module"}]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_explain_keyword_is_complex(self):
|
|
messages = [{"role": "user", "content": "explain how the router works"}]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_prioritize_keyword_is_complex(self):
|
|
messages = [{"role": "user", "content": "prioritize the backlog"}]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_long_message_is_complex(self):
|
|
long_msg = "do something " * 50 # > 500 chars
|
|
messages = [{"role": "user", "content": long_msg}]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_numbered_list_is_complex(self):
|
|
messages = [
|
|
{
|
|
"role": "user",
|
|
"content": "1. Read the file 2. Analyze it 3. Write a report",
|
|
}
|
|
]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_code_block_is_complex(self):
|
|
messages = [
|
|
{"role": "user", "content": "Here is the code:\n```python\nprint('hello')\n```"}
|
|
]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_deep_conversation_is_complex(self):
|
|
messages = [
|
|
{"role": "user", "content": "hi"},
|
|
{"role": "assistant", "content": "hello"},
|
|
{"role": "user", "content": "ok"},
|
|
{"role": "assistant", "content": "yes"},
|
|
{"role": "user", "content": "ok"},
|
|
{"role": "assistant", "content": "yes"},
|
|
{"role": "user", "content": "now do the thing"},
|
|
]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_analyse_british_spelling_is_complex(self):
|
|
messages = [{"role": "user", "content": "analyse this dataset"}]
|
|
assert classify_task(messages) == TaskComplexity.COMPLEX
|
|
|
|
def test_non_string_content_is_ignored(self):
|
|
"""Non-string content should not crash the classifier."""
|
|
messages = [{"role": "user", "content": ["part1", "part2"]}]
|
|
# Should not raise; result doesn't matter — just must not blow up
|
|
result = classify_task(messages)
|
|
assert isinstance(result, TaskComplexity)
|
|
|
|
def test_system_message_not_counted_as_user(self):
|
|
"""System message alone should not trigger complex keywords."""
|
|
messages = [
|
|
{"role": "system", "content": "analyze everything carefully"},
|
|
{"role": "user", "content": "yes"},
|
|
]
|
|
# "analyze" is in system message (not user) — user says "yes" → simple
|
|
assert classify_task(messages) == TaskComplexity.SIMPLE
|
|
|
|
|
|
class TestTaskComplexityEnum:
|
|
"""Tests for TaskComplexity enum values."""
|
|
|
|
def test_simple_value(self):
|
|
assert TaskComplexity.SIMPLE.value == "simple"
|
|
|
|
def test_complex_value(self):
|
|
assert TaskComplexity.COMPLEX.value == "complex"
|
|
|
|
def test_lookup_by_value(self):
|
|
assert TaskComplexity("simple") == TaskComplexity.SIMPLE
|
|
assert TaskComplexity("complex") == TaskComplexity.COMPLEX
|