""" Enhanced Task Classifier for Uniwizard Classifies incoming prompts into task types and maps them to ranked backend preferences. Integrates with the 7-backend fallback chain defined in config.yaml. """ from __future__ import annotations import re from dataclasses import dataclass from enum import Enum from typing import Any, Dict, List, Optional, Set, Tuple class TaskType(Enum): """Classification categories for incoming prompts.""" CODE = "code" REASONING = "reasoning" RESEARCH = "research" CREATIVE = "creative" FAST_OPS = "fast_ops" TOOL_USE = "tool_use" UNKNOWN = "unknown" class ComplexityLevel(Enum): """Complexity tiers for prompt analysis.""" LOW = "low" MEDIUM = "medium" HIGH = "high" # Backend identifiers (match fallback_providers chain order) BACKEND_ANTHROPIC = "anthropic" BACKEND_OPENAI_CODEX = "openai-codex" BACKEND_GEMINI = "gemini" BACKEND_GROQ = "groq" BACKEND_GROK = "grok" BACKEND_KIMI = "kimi-coding" BACKEND_OPENROUTER = "openrouter" ALL_BACKENDS = [ BACKEND_ANTHROPIC, BACKEND_OPENAI_CODEX, BACKEND_GEMINI, BACKEND_GROQ, BACKEND_GROK, BACKEND_KIMI, BACKEND_OPENROUTER, ] # Task-specific keyword mappings CODE_KEYWORDS: Set[str] = { "code", "coding", "program", "programming", "function", "class", "implement", "implementation", "refactor", "debug", "debugging", "error", "exception", "traceback", "stacktrace", "test", "tests", "pytest", "unittest", "import", "module", "package", "library", "api", "endpoint", "route", "middleware", "database", "query", "sql", "orm", "migration", "deploy", "docker", "kubernetes", "k8s", "ci/cd", "pipeline", "build", "compile", "syntax", "lint", "format", "black", "flake8", "mypy", "type", "typing", "async", "await", "callback", "promise", "thread", "process", "concurrency", "parallel", "optimization", "optimize", "performance", "memory", "leak", "bug", "fix", "patch", "commit", "git", "repository", "repo", "clone", "fork", "merge", "conflict", "branch", "pull request", "pr", "review", "crud", "rest", "graphql", "json", "xml", "yaml", "toml", "csv", "parse", "regex", "regular expression", "string", "bytes", "encoding", "decoding", "serialize", "deserialize", "marshal", "unmarshal", "encrypt", "decrypt", "hash", "checksum", "signature", "jwt", "oauth", "authentication", "authorization", "auth", "login", "logout", "session", "cookie", "token", "permission", "role", "rbac", "acl", "security", "vulnerability", "cve", "exploit", "sandbox", "isolate", "container", "vm", "virtual machine", } REASONING_KEYWORDS: Set[str] = { "analyze", "analysis", "investigate", "investigation", "compare", "comparison", "contrast", "evaluate", "evaluation", "assess", "assessment", "reason", "reasoning", "logic", "logical", "deduce", "deduction", "infer", "inference", "synthesize", "synthesis", "critique", "criticism", "review", "argument", "premise", "conclusion", "evidence", "proof", "theorem", "axiom", "corollary", "lemma", "proposition", "hypothesis", "theory", "model", "framework", "paradigm", "philosophy", "ethical", "ethics", "moral", "morality", "implication", "consequence", "trade-off", "tradeoff", "pros and cons", "advantage", "disadvantage", "benefit", "drawback", "risk", "mitigation", "strategy", "strategic", "plan", "planning", "design", "architecture", "system", "complex", "complicated", "nuanced", "subtle", "sophisticated", "rigorous", "thorough", "comprehensive", "exhaustive", "step by step", "chain of thought", "think through", "work through", "figure out", "understand", "comprehend", } RESEARCH_KEYWORDS: Set[str] = { "research", "find", "search", "look up", "lookup", "investigate", "study", "explore", "discover", "paper", "publication", "journal", "article", "study", "arxiv", "scholar", "academic", "scientific", "literature", "review", "survey", "meta-analysis", "bibliography", "citation", "reference", "source", "primary source", "secondary source", "peer review", "empirical", "experiment", "experimental", "observational", "longitudinal", "cross-sectional", "qualitative", "quantitative", "mixed methods", "case study", "dataset", "data", "statistics", "statistical", "correlation", "causation", "regression", "machine learning", "ml", "ai", "neural network", "deep learning", "transformer", "llm", "benchmark", "evaluation", "metric", "sota", "state of the art", "survey", "poll", "interview", "focus group", "ethnography", "field work", "archive", "archival", "repository", "collection", "index", "catalog", "database", "librar", "museum", "histor", "genealogy", "ancestry", "patent", "trademark", "copyright", "legislation", "regulation", "policy", "compliance", } CREATIVE_KEYWORDS: Set[str] = { "create", "creative", "creativity", "design", "designer", "art", "artistic", "artist", "paint", "painting", "draw", "drawing", "sketch", "illustration", "illustrator", "graphic", "visual", "image", "photo", "photography", "photographer", "video", "film", "movie", "animation", "animate", "motion", "music", "musical", "song", "lyric", "compose", "composition", "melody", "harmony", "rhythm", "beat", "sound", "audio", "write", "writing", "writer", "author", "story", "storytelling", "narrative", "plot", "character", "dialogue", "scene", "novel", "fiction", "short story", "poem", "poetry", "poet", "verse", "prose", "essay", "blog", "article", "content", "copy", "copywriting", "marketing", "brand", "branding", "slogan", "tagline", "headline", "title", "name", "naming", "brainstorm", "ideate", "concept", "conceptualize", "imagine", "imagination", "inspire", "inspiration", "muse", "vision", "aesthetic", "style", "theme", "mood", "tone", "voice", "unique", "original", "fresh", "novel", "innovative", "unconventional", "experimental", "avant-garde", "edgy", "humor", "funny", "comedy", "satire", "parody", "wit", "romance", "romantic", "drama", "dramatic", "thriller", "mystery", "horror", "sci-fi", "science fiction", "fantasy", "adventure", "action", "documentary", "biopic", "memoir", } FAST_OPS_KEYWORDS: Set[str] = { "quick", "fast", "brief", "short", "simple", "easy", "status", "check", "list", "ls", "show", "display", "get", "fetch", "retrieve", "read", "cat", "view", "summary", "summarize", "tl;dr", "tldr", "overview", "count", "number", "how many", "total", "sum", "average", "min", "max", "sort", "filter", "grep", "search", "find", "locate", "which", "where", "what is", "what's", "who", "when", "yes/no", "confirm", "verify", "validate", "ping", "health", "alive", "up", "running", "online", "date", "time", "timezone", "clock", "timer", "alarm", "remind", "reminder", "note", "jot", "save", "store", "delete", "remove", "rm", "clean", "clear", "purge", "start", "stop", "restart", "enable", "disable", "toggle", "on", "off", "open", "close", "switch", "change", "set", "update", "upgrade", "install", "uninstall", "download", "upload", "sync", "backup", "restore", "export", "import", "convert", "transform", "format", "parse", "extract", "compress", "decompress", "zip", "unzip", "tar", "archive", "copy", "cp", "move", "mv", "rename", "link", "symlink", "permission", "chmod", "chown", "access", "ownership", "hello", "hi", "hey", "greeting", "thanks", "thank you", "bye", "goodbye", "help", "?", "how to", "how do i", } TOOL_USE_KEYWORDS: Set[str] = { "tool", "tools", "use tool", "call tool", "invoke", "run command", "execute", "terminal", "shell", "bash", "zsh", "powershell", "cmd", "command line", "cli", "file", "files", "directory", "folder", "path", "fs", "read file", "write file", "edit file", "patch file", "search files", "find files", "grep", "rg", "ack", "browser", "web", "navigate", "click", "scroll", "screenshot", "vision", "image", "analyze image", "delegate", "subagent", "agent", "spawn", "task", "mcp", "server", "mcporter", "protocol", "process", "background", "kill", "signal", "pid", "git", "commit", "push", "pull", "clone", "branch", "docker", "container", "compose", "dockerfile", "kubernetes", "kubectl", "k8s", "pod", "deployment", "aws", "gcp", "azure", "cloud", "s3", "bucket", "database", "db", "sql", "query", "migrate", "seed", "api", "endpoint", "request", "response", "curl", "http", "https", "rest", "graphql", "websocket", "json", "xml", "yaml", "csv", "parse", "serialize", "scrap", "crawl", "extract", "parse html", "xpath", "schedule", "cron", "job", "task queue", "worker", "notification", "alert", "webhook", "event", "trigger", } # URL pattern for detecting web/research tasks _URL_PATTERN = re.compile( r'https?://(?:[-\w.])+(?:[:\d]+)?(?:/(?:[\w/_.])*(?:\?(?:[\w&=%.])*)?(?:#(?:[\w.])*)?)?', re.IGNORECASE ) # Code block detection (count ``` blocks, not individual lines) _CODE_BLOCK_PATTERN = re.compile(r'```[\w]*\n', re.MULTILINE) def _count_code_blocks(text: str) -> int: """Count complete code blocks (opening ``` to closing ```).""" # Count pairs of ``` - each pair is one code block fence_count = text.count('```') return fence_count // 2 _INLINE_CODE_PATTERN = re.compile(r'`[^`]+`') # Complexity thresholds COMPLEXITY_THRESHOLDS = { "chars": {"low": 200, "medium": 800}, "words": {"low": 35, "medium": 150}, "lines": {"low": 3, "medium": 15}, "urls": {"low": 0, "medium": 2}, "code_blocks": {"low": 0, "medium": 1}, } @dataclass class ClassificationResult: """Result of task classification.""" task_type: TaskType preferred_backends: List[str] complexity: ComplexityLevel reason: str confidence: float features: Dict[str, Any] class TaskClassifier: """ Enhanced task classifier for routing prompts to appropriate backends. Maps task types to ranked backend preferences based on: - Backend strengths (coding, reasoning, speed, context length, etc.) - Message complexity (length, structure, keywords) - Detected features (URLs, code blocks, specific terminology) """ # Backend preference rankings by task type # Order matters: first is most preferred TASK_BACKEND_MAP: Dict[TaskType, List[str]] = { TaskType.CODE: [ BACKEND_OPENAI_CODEX, # Best for code generation BACKEND_ANTHROPIC, # Excellent for code review, complex analysis BACKEND_KIMI, # Long context for large codebases BACKEND_GEMINI, # Good multimodal code understanding BACKEND_GROQ, # Fast for simple code tasks BACKEND_OPENROUTER, # Overflow option BACKEND_GROK, # General knowledge backup ], TaskType.REASONING: [ BACKEND_ANTHROPIC, # Deep reasoning champion BACKEND_GEMINI, # Strong analytical capabilities BACKEND_KIMI, # Long context for complex reasoning chains BACKEND_GROK, # Broad knowledge for reasoning BACKEND_OPENAI_CODEX, # Structured reasoning BACKEND_OPENROUTER, # Overflow BACKEND_GROQ, # Fast fallback ], TaskType.RESEARCH: [ BACKEND_GEMINI, # Research and multimodal leader BACKEND_KIMI, # 262K context for long documents BACKEND_ANTHROPIC, # Deep analysis BACKEND_GROK, # Broad knowledge BACKEND_OPENROUTER, # Broadest model access BACKEND_OPENAI_CODEX, # Structured research BACKEND_GROQ, # Fast triage ], TaskType.CREATIVE: [ BACKEND_GROK, # Creative writing and drafting BACKEND_ANTHROPIC, # Nuanced creative work BACKEND_GEMINI, # Multimodal creativity BACKEND_OPENAI_CODEX, # Creative coding BACKEND_KIMI, # Long-form creative BACKEND_OPENROUTER, # Variety of creative models BACKEND_GROQ, # Fast creative ops ], TaskType.FAST_OPS: [ BACKEND_GROQ, # 284ms response time champion BACKEND_OPENROUTER, # Fast mini models BACKEND_GEMINI, # Flash models BACKEND_GROK, # Fast for simple queries BACKEND_ANTHROPIC, # If precision needed BACKEND_OPENAI_CODEX, # Structured ops BACKEND_KIMI, # Overflow ], TaskType.TOOL_USE: [ BACKEND_ANTHROPIC, # Excellent tool use capabilities BACKEND_OPENAI_CODEX, # Good tool integration BACKEND_GEMINI, # Multimodal tool use BACKEND_GROQ, # Fast tool chaining BACKEND_KIMI, # Long context tool sessions BACKEND_OPENROUTER, # Overflow BACKEND_GROK, # General tool use ], TaskType.UNKNOWN: [ BACKEND_ANTHROPIC, # Default to strongest general model BACKEND_GEMINI, # Good all-rounder BACKEND_OPENAI_CODEX, # Structured approach BACKEND_KIMI, # Long context safety BACKEND_GROK, # Broad knowledge BACKEND_GROQ, # Fast fallback BACKEND_OPENROUTER, # Ultimate overflow ], } def __init__(self): """Initialize the classifier with compiled patterns.""" self.url_pattern = _URL_PATTERN self.code_block_pattern = _CODE_BLOCK_PATTERN self.inline_code_pattern = _INLINE_CODE_PATTERN def classify( self, prompt: str, context: Optional[Dict[str, Any]] = None ) -> ClassificationResult: """ Classify a prompt and return routing recommendation. Args: prompt: The user message to classify context: Optional context (previous messages, session state, etc.) Returns: ClassificationResult with task type, preferred backends, complexity, and reasoning """ text = (prompt or "").strip() if not text: return self._default_result("Empty prompt") # Extract features features = self._extract_features(text) # Determine complexity complexity = self._assess_complexity(features) # Classify task type task_type, task_confidence, task_reason = self._classify_task_type(text, features) # Get preferred backends preferred_backends = self._get_backends_for_task(task_type, complexity, features) # Build reason string reason = self._build_reason(task_type, complexity, task_reason, features) return ClassificationResult( task_type=task_type, preferred_backends=preferred_backends, complexity=complexity, reason=reason, confidence=task_confidence, features=features, ) def _extract_features(self, text: str) -> Dict[str, Any]: """Extract features from the prompt text.""" lowered = text.lower() words = set(token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()) # Count code blocks (complete ``` pairs) code_blocks = _count_code_blocks(text) inline_code = len(self.inline_code_pattern.findall(text)) # Count URLs urls = self.url_pattern.findall(text) # Count lines lines = text.count('\n') + 1 return { "char_count": len(text), "word_count": len(text.split()), "line_count": lines, "url_count": len(urls), "urls": urls, "code_block_count": code_blocks, "inline_code_count": inline_code, "has_code": code_blocks > 0 or inline_code > 0, "unique_words": words, "lowercased_text": lowered, } def _assess_complexity(self, features: Dict[str, Any]) -> ComplexityLevel: """Assess the complexity level of the prompt.""" scores = { "chars": features["char_count"], "words": features["word_count"], "lines": features["line_count"], "urls": features["url_count"], "code_blocks": features["code_block_count"], } # Count how many metrics exceed medium threshold medium_count = 0 high_count = 0 for metric, value in scores.items(): thresholds = COMPLEXITY_THRESHOLDS.get(metric, {"low": 0, "medium": 0}) if value > thresholds["medium"]: high_count += 1 elif value > thresholds["low"]: medium_count += 1 # Determine complexity if high_count >= 2 or scores["code_blocks"] > 2: return ComplexityLevel.HIGH elif medium_count >= 2 or high_count >= 1: return ComplexityLevel.MEDIUM else: return ComplexityLevel.LOW def _classify_task_type( self, text: str, features: Dict[str, Any] ) -> Tuple[TaskType, float, str]: """ Classify the task type based on keywords and features. Returns: Tuple of (task_type, confidence, reason) """ words = features["unique_words"] lowered = features["lowercased_text"] # Score each task type scores: Dict[TaskType, float] = {task: 0.0 for task in TaskType} reasons: Dict[TaskType, str] = {} # CODE scoring code_matches = words & CODE_KEYWORDS if features["has_code"]: scores[TaskType.CODE] += 2.0 reasons[TaskType.CODE] = "Contains code blocks" if code_matches: scores[TaskType.CODE] += min(len(code_matches) * 0.5, 3.0) if TaskType.CODE not in reasons: reasons[TaskType.CODE] = f"Code keywords: {', '.join(list(code_matches)[:3])}" # REASONING scoring reasoning_matches = words & REASONING_KEYWORDS if reasoning_matches: scores[TaskType.REASONING] += min(len(reasoning_matches) * 0.4, 2.5) reasons[TaskType.REASONING] = f"Reasoning keywords: {', '.join(list(reasoning_matches)[:3])}" if any(phrase in lowered for phrase in ["step by step", "chain of thought", "think through"]): scores[TaskType.REASONING] += 1.5 reasons[TaskType.REASONING] = "Explicit reasoning request" # RESEARCH scoring research_matches = words & RESEARCH_KEYWORDS if features["url_count"] > 0: scores[TaskType.RESEARCH] += 1.5 reasons[TaskType.RESEARCH] = f"Contains {features['url_count']} URL(s)" if research_matches: scores[TaskType.RESEARCH] += min(len(research_matches) * 0.4, 2.0) if TaskType.RESEARCH not in reasons: reasons[TaskType.RESEARCH] = f"Research keywords: {', '.join(list(research_matches)[:3])}" # CREATIVE scoring creative_matches = words & CREATIVE_KEYWORDS if creative_matches: scores[TaskType.CREATIVE] += min(len(creative_matches) * 0.4, 2.5) reasons[TaskType.CREATIVE] = f"Creative keywords: {', '.join(list(creative_matches)[:3])}" # FAST_OPS scoring (simple queries) - ONLY if no other strong signals fast_ops_matches = words & FAST_OPS_KEYWORDS is_very_short = features["word_count"] <= 5 and features["char_count"] < 50 # Only score fast_ops if it's very short OR has no other task indicators other_scores_possible = bool( (words & CODE_KEYWORDS) or (words & REASONING_KEYWORDS) or (words & RESEARCH_KEYWORDS) or (words & CREATIVE_KEYWORDS) or (words & TOOL_USE_KEYWORDS) or features["has_code"] ) if is_very_short and not other_scores_possible: scores[TaskType.FAST_OPS] += 1.5 reasons[TaskType.FAST_OPS] = "Very short, simple query" elif not other_scores_possible and fast_ops_matches and features["word_count"] < 30: scores[TaskType.FAST_OPS] += min(len(fast_ops_matches) * 0.3, 1.0) reasons[TaskType.FAST_OPS] = f"Simple query keywords: {', '.join(list(fast_ops_matches)[:3])}" # TOOL_USE scoring tool_matches = words & TOOL_USE_KEYWORDS if tool_matches: scores[TaskType.TOOL_USE] += min(len(tool_matches) * 0.4, 2.0) reasons[TaskType.TOOL_USE] = f"Tool keywords: {', '.join(list(tool_matches)[:3])}" if any(cmd in lowered for cmd in ["run ", "execute ", "call ", "use "]): scores[TaskType.TOOL_USE] += 0.5 # Find highest scoring task type best_task = TaskType.UNKNOWN best_score = 0.0 for task, score in scores.items(): if score > best_score: best_score = score best_task = task # Calculate confidence confidence = min(best_score / 4.0, 1.0) if best_score > 0 else 0.0 reason = reasons.get(best_task, "No strong indicators") return best_task, confidence, reason def _get_backends_for_task( self, task_type: TaskType, complexity: ComplexityLevel, features: Dict[str, Any] ) -> List[str]: """Get ranked list of preferred backends for the task.""" base_backends = self.TASK_BACKEND_MAP.get(task_type, self.TASK_BACKEND_MAP[TaskType.UNKNOWN]) # Adjust for complexity if complexity == ComplexityLevel.HIGH and task_type in (TaskType.RESEARCH, TaskType.CODE): # For high complexity, prioritize long-context models if BACKEND_KIMI in base_backends: # Move kimi earlier for long context base_backends = self._prioritize_backend(base_backends, BACKEND_KIMI, 2) if BACKEND_GEMINI in base_backends: base_backends = self._prioritize_backend(base_backends, BACKEND_GEMINI, 3) elif complexity == ComplexityLevel.LOW and task_type == TaskType.FAST_OPS: # For simple ops, ensure GROQ is first base_backends = self._prioritize_backend(base_backends, BACKEND_GROQ, 0) # Adjust for code presence if features["has_code"] and task_type != TaskType.CODE: # Boost OpenAI Codex if there's code but not explicitly a code task base_backends = self._prioritize_backend(base_backends, BACKEND_OPENAI_CODEX, 2) return list(base_backends) def _prioritize_backend( self, backends: List[str], target: str, target_index: int ) -> List[str]: """Move a backend to a specific index in the list.""" if target not in backends: return backends new_backends = list(backends) new_backends.remove(target) new_backends.insert(min(target_index, len(new_backends)), target) return new_backends def _build_reason( self, task_type: TaskType, complexity: ComplexityLevel, task_reason: str, features: Dict[str, Any] ) -> str: """Build a human-readable reason string.""" parts = [ f"Task: {task_type.value}", f"Complexity: {complexity.value}", ] if task_reason: parts.append(f"Indicators: {task_reason}") # Add feature summary feature_parts = [] if features["has_code"]: feature_parts.append(f"{features['code_block_count']} code block(s)") if features["url_count"] > 0: feature_parts.append(f"{features['url_count']} URL(s)") if features["word_count"] > 100: feature_parts.append(f"{features['word_count']} words") if feature_parts: parts.append(f"Features: {', '.join(feature_parts)}") return "; ".join(parts) def _default_result(self, reason: str) -> ClassificationResult: """Return a default result for edge cases.""" return ClassificationResult( task_type=TaskType.UNKNOWN, preferred_backends=list(self.TASK_BACKEND_MAP[TaskType.UNKNOWN]), complexity=ComplexityLevel.LOW, reason=reason, confidence=0.0, features={}, ) def to_dict(self, result: ClassificationResult) -> Dict[str, Any]: """Convert classification result to dictionary format.""" return { "task_type": result.task_type.value, "preferred_backends": result.preferred_backends, "complexity": result.complexity.value, "reason": result.reason, "confidence": round(result.confidence, 2), "features": { k: v for k, v in result.features.items() if k not in ("unique_words", "lowercased_text", "urls") }, } # Convenience function for direct usage def classify_prompt( prompt: str, context: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: """ Classify a prompt and return routing recommendation as a dictionary. Args: prompt: The user message to classify context: Optional context (previous messages, session state, etc.) Returns: Dictionary with task_type, preferred_backends, complexity, reason, confidence """ classifier = TaskClassifier() result = classifier.classify(prompt, context) return classifier.to_dict(result) if __name__ == "__main__": # Example usage and quick test test_prompts = [ "Hello, how are you?", "Implement a Python function to calculate fibonacci numbers", "Analyze the architectural trade-offs between microservices and monoliths", "Research the latest papers on transformer architectures", "Write a creative story about AI", "Check the status of the server and list running processes", "Use the browser to navigate to https://example.com and take a screenshot", "Refactor this large codebase: [2000 lines of code]", ] classifier = TaskClassifier() for prompt in test_prompts: result = classifier.classify(prompt) print(f"\nPrompt: {prompt[:60]}...") print(f" Type: {result.task_type.value}") print(f" Complexity: {result.complexity.value}") print(f" Confidence: {result.confidence:.2f}") print(f" Backends: {', '.join(result.preferred_backends[:3])}") print(f" Reason: {result.reason}")