fix: correct complexity routing to not fall back to default model

`_get_model_for_complexity` was calling `get_model_with_capability`, which silently falls back to the provider default when no model has the requested capability tag. This caused the method to return a generic model instead of None when neither the fallback chain nor any explicit capability tag matched, misleading callers into skipping the provider default logic. Replace the call with an explicit next() comprehension that returns None when no model explicitly carries the 'routine' or 'complex' capability. Refs #1065 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
WIP: Claude Code progress on #1065
2026-03-23 15:30:23 -04:00 · 2026-03-23 14:41:42 -04:00
11 changed files with 724 additions and 1511 deletions
--- a/config/providers.yaml
+++ b/config/providers.yaml
@@ -25,6 +25,19 @@ providers:
    tier: local
    url: "http://localhost:11434"
    models:
+      # ── Dual-model routing: Qwen3-8B (fast) + Qwen3-14B (quality) ──────────
+      # Both models fit simultaneously: ~6.6 GB + ~10.5 GB = ~17 GB combined.
+      # Requires OLLAMA_MAX_LOADED_MODELS=2 (set in .env) to stay hot.
+      # Ref: issue #1065 — Qwen3-8B/14B dual-model routing strategy
+      - name: qwen3:8b
+        context_window: 32768
+        capabilities: [text, tools, json, streaming, routine]
+        description: "Qwen3-8B Q6_K — fast router for routine tasks (~6.6 GB, 45-55 tok/s)"
+      - name: qwen3:14b
+        context_window: 40960
+        capabilities: [text, tools, json, streaming, complex, reasoning]
+        description: "Qwen3-14B Q5_K_M — complex reasoning and planning (~10.5 GB, 20-28 tok/s)"
+
      # Text + Tools models
      - name: qwen3:30b
        default: true
@@ -187,6 +200,20 @@ fallback_chains:
    - dolphin3          # base Dolphin 3.0 8B (uncensored, no custom system prompt)
    - qwen3:30b         # primary fallback — usually sufficient with a good system prompt

+  # ── Complexity-based routing chains (issue #1065) ───────────────────────
+  # Routine tasks: prefer Qwen3-8B for low latency (~45-55 tok/s)
+  routine:
+    - qwen3:8b              # Primary fast model
+    - llama3.1:8b-instruct  # Fallback fast model
+    - llama3.2:3b           # Smallest available
+
+  # Complex tasks: prefer Qwen3-14B for quality (~20-28 tok/s)
+  complex:
+    - qwen3:14b             # Primary quality model
+    - hermes4-14b           # Native tool calling, hybrid reasoning
+    - qwen3:30b             # Highest local quality
+    - qwen2.5:14b           # Additional fallback
+
 # ── Custom Models ───────────────────────────────────────────────────────────
 # Register custom model weights for per-agent assignment.
 # Supports GGUF (Ollama), safetensors, and HuggingFace checkpoint dirs.
--- a/src/config.py
+++ b/src/config.py
@@ -41,6 +41,13 @@ class Settings(BaseSettings):
    # 4096 keeps memory at ~19GB. Set to 0 to use model defaults.
    ollama_num_ctx: int = 4096

+    # Maximum models loaded simultaneously in Ollama — override with OLLAMA_MAX_LOADED_MODELS
+    # Set to 2 so Qwen3-8B and Qwen3-14B can stay hot concurrently (~17 GB combined).
+    # Requires Ollama ≥ 0.1.33.  Export this to the Ollama process environment:
+    #   OLLAMA_MAX_LOADED_MODELS=2 ollama serve
+    # or add it to your systemd/launchd unit before starting the harness.
+    ollama_max_loaded_models: int = 2
+
    # Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
    # as comma-separated strings, e.g. FALLBACK_MODELS="qwen3:30b,llama3.1"
    # Or edit config/providers.yaml → fallback_chains for the canonical source.
@@ -304,16 +311,6 @@ class Settings(BaseSettings):
    mcp_timeout: int = 15
    mcp_bridge_timeout: int = 60  # HTTP timeout for MCP bridge Ollama calls (seconds)

-    # ── Backlog Triage Loop ────────────────────────────────────────────
-    # Autonomous loop: fetch open issues, score, assign to agents.
-    backlog_triage_enabled: bool = False
-    # Seconds between triage cycles (default: 15 minutes).
-    backlog_triage_interval_seconds: int = 900
-    # When True, score and summarize but don't write to Gitea.
-    backlog_triage_dry_run: bool = False
-    # Create a daily triage summary issue/comment.
-    backlog_triage_daily_summary: bool = True
-
    # ── Loop QA (Self-Testing) ─────────────────────────────────────────
    # Self-test orchestrator that probes capabilities alongside the thinking loop.
    loop_qa_enabled: bool = True
--- a/src/infrastructure/router/init.py
+++ b/src/infrastructure/router/init.py
@@ -2,6 +2,7 @@

 from .api import router
 from .cascade import CascadeRouter, Provider, ProviderStatus, get_router
+from .classifier import TaskComplexity, classify_task
 from .history import HealthHistoryStore, get_history_store

 __all__ = [
@@ -12,4 +13,6 @@ __all__ = [
    "router",
    "HealthHistoryStore",
    "get_history_store",
+    "TaskComplexity",
+    "classify_task",
 ]
--- a/src/infrastructure/router/cascade.py
+++ b/src/infrastructure/router/cascade.py
@@ -528,6 +528,34 @@ class CascadeRouter:

        return True

+    def _get_model_for_complexity(
+        self, provider: Provider, complexity: "TaskComplexity"
+    ) -> str | None:
+        """Return the best model on *provider* for the given complexity tier.
+
+        Checks fallback chains first (routine / complex), then falls back to
+        any model with the matching capability tag, then the provider default.
+        """
+        from infrastructure.router.classifier import TaskComplexity
+
+        chain_key = "routine" if complexity == TaskComplexity.SIMPLE else "complex"
+
+        # Walk the capability fallback chain — first model present on this provider wins
+        for model_name in self.config.fallback_chains.get(chain_key, []):
+            if any(m["name"] == model_name for m in provider.models):
+                return model_name
+
+        # Direct capability lookup — only return if a model explicitly has the tag
+        # (do not use get_model_with_capability here as it falls back to the default)
+        cap_model = next(
+            (m["name"] for m in provider.models if chain_key in m.get("capabilities", [])),
+            None,
+        )
+        if cap_model:
+            return cap_model
+
+        return None  # Caller will use provider default
+
    async def complete(
        self,
        messages: list[dict],
@@ -535,6 +563,7 @@ class CascadeRouter:
        temperature: float = 0.7,
        max_tokens: int | None = None,
        cascade_tier: str | None = None,
+        complexity_hint: str | None = None,
    ) -> dict:
        """Complete a chat conversation with automatic failover.

@@ -543,24 +572,48 @@ class CascadeRouter:
        - Falls back to vision-capable models when needed
        - Supports image URLs, paths, and base64 encoding

+        Complexity-based routing (issue #1065):
+        - ``complexity_hint="simple"`` → routes to Qwen3-8B (low-latency)
+        - ``complexity_hint="complex"`` → routes to Qwen3-14B (quality)
+        - ``complexity_hint=None`` (default) → auto-classifies from messages
+
        Args:
            messages: List of message dicts with role and content
-            model: Preferred model (tries this first, then provider defaults)
+            model: Preferred model (tries this first; complexity routing is
+                skipped when an explicit model is given)
            temperature: Sampling temperature
            max_tokens: Maximum tokens to generate
            cascade_tier: If specified, filters providers by this tier.
                - "frontier_required": Uses only Anthropic provider for top-tier models.
+            complexity_hint: "simple", "complex", or None (auto-detect).

        Returns:
-            Dict with content, provider_used, and metrics
+            Dict with content, provider_used, model, latency_ms,
+            is_fallback_model, and complexity fields.

        Raises:
            RuntimeError: If all providers fail
        """
+        from infrastructure.router.classifier import TaskComplexity, classify_task
+
        content_type = self._detect_content_type(messages)
        if content_type != ContentType.TEXT:
            logger.debug("Detected %s content, selecting appropriate model", content_type.value)

+        # Resolve task complexity ─────────────────────────────────────────────
+        # Skip complexity routing when caller explicitly specifies a model.
+        complexity: TaskComplexity | None = None
+        if model is None:
+            if complexity_hint is not None:
+                try:
+                    complexity = TaskComplexity(complexity_hint.lower())
+                except ValueError:
+                    logger.warning("Unknown complexity_hint %r, auto-classifying", complexity_hint)
+                    complexity = classify_task(messages)
+            else:
+                complexity = classify_task(messages)
+            logger.debug("Task complexity: %s", complexity.value)
+
        errors = []

        providers = self.providers
@@ -573,7 +626,6 @@ class CascadeRouter:
            if not providers:
                raise RuntimeError(f"No providers found for tier: {cascade_tier}")

-
        for provider in providers:
            if not self._is_provider_available(provider):
                continue
@@ -587,7 +639,21 @@ class CascadeRouter:
                    )
                    continue

-            selected_model, is_fallback_model = self._select_model(provider, model, content_type)
+            # Complexity-based model selection (only when no explicit model) ──
+            effective_model = model
+            if effective_model is None and complexity is not None:
+                effective_model = self._get_model_for_complexity(provider, complexity)
+                if effective_model:
+                    logger.debug(
+                        "Complexity routing [%s]: %s → %s",
+                        complexity.value,
+                        provider.name,
+                        effective_model,
+                    )
+
+            selected_model, is_fallback_model = self._select_model(
+                provider, effective_model, content_type
+            )

            try:
                result = await self._attempt_with_retry(
@@ -610,6 +676,7 @@ class CascadeRouter:
                "model": result.get("model", selected_model or provider.get_default_model()),
                "latency_ms": result.get("latency_ms", 0),
                "is_fallback_model": is_fallback_model,
+                "complexity": complexity.value if complexity is not None else None,
            }

        raise RuntimeError(f"All providers failed: {'; '.join(errors)}")
--- a/src/infrastructure/router/classifier.py
+++ b/src/infrastructure/router/classifier.py
@@ -0,0 +1,166 @@
+"""Task complexity classifier for Qwen3 dual-model routing.
+
+Classifies incoming tasks as SIMPLE (route to Qwen3-8B for low-latency)
+or COMPLEX (route to Qwen3-14B for quality-sensitive work).
+
+Classification is fully heuristic — no LLM inference required.
+"""
+
+import re
+from enum import Enum
+
+
+class TaskComplexity(Enum):
+    """Task complexity tier for model routing."""
+
+    SIMPLE = "simple"   # Qwen3-8B Q6_K: routine, latency-sensitive
+    COMPLEX = "complex"  # Qwen3-14B Q5_K_M: quality-sensitive, multi-step
+
+
+# Keywords strongly associated with complex tasks
+_COMPLEX_KEYWORDS: frozenset[str] = frozenset(
+    [
+        "plan",
+        "review",
+        "analyze",
+        "analyse",
+        "triage",
+        "refactor",
+        "design",
+        "architecture",
+        "implement",
+        "compare",
+        "debug",
+        "explain",
+        "prioritize",
+        "prioritise",
+        "strategy",
+        "optimize",
+        "optimise",
+        "evaluate",
+        "assess",
+        "brainstorm",
+        "outline",
+        "summarize",
+        "summarise",
+        "generate code",
+        "write a",
+        "write the",
+        "code review",
+        "pull request",
+        "multi-step",
+        "multi step",
+        "step by step",
+        "backlog prioriti",
+        "issue triage",
+        "root cause",
+        "how does",
+        "why does",
+        "what are the",
+    ]
+)
+
+# Keywords strongly associated with simple/routine tasks
+_SIMPLE_KEYWORDS: frozenset[str] = frozenset(
+    [
+        "status",
+        "list ",
+        "show ",
+        "what is",
+        "how many",
+        "ping",
+        "run ",
+        "execute ",
+        "ls ",
+        "cat ",
+        "ps ",
+        "fetch ",
+        "count ",
+        "tail ",
+        "head ",
+        "grep ",
+        "find file",
+        "read file",
+        "get ",
+        "query ",
+        "check ",
+        "yes",
+        "no",
+        "ok",
+        "done",
+        "thanks",
+    ]
+)
+
+# Content longer than this is treated as complex regardless of keywords
+_COMPLEX_CHAR_THRESHOLD = 500
+
+# Short content defaults to simple
+_SIMPLE_CHAR_THRESHOLD = 150
+
+# More than this many messages suggests an ongoing complex conversation
+_COMPLEX_CONVERSATION_DEPTH = 6
+
+
+def classify_task(messages: list[dict]) -> TaskComplexity:
+    """Classify task complexity from a list of messages.
+
+    Uses heuristic rules — no LLM call required.  Errs toward COMPLEX
+    when uncertain so that quality is preserved.
+
+    Args:
+        messages: List of message dicts with ``role`` and ``content`` keys.
+
+    Returns:
+        TaskComplexity.SIMPLE or TaskComplexity.COMPLEX
+    """
+    if not messages:
+        return TaskComplexity.SIMPLE
+
+    # Concatenate all user-turn content for analysis
+    user_content = " ".join(
+        msg.get("content", "")
+        for msg in messages
+        if msg.get("role") in ("user", "human")
+        and isinstance(msg.get("content"), str)
+    ).lower().strip()
+
+    if not user_content:
+        return TaskComplexity.SIMPLE
+
+    # Complexity signals override everything -----------------------------------
+
+    # Explicit complex keywords
+    for kw in _COMPLEX_KEYWORDS:
+        if kw in user_content:
+            return TaskComplexity.COMPLEX
+
+    # Numbered / multi-step instruction list: "1. do this  2. do that"
+    if re.search(r"\b\d+\.\s+\w", user_content):
+        return TaskComplexity.COMPLEX
+
+    # Code blocks embedded in messages
+    if "```" in user_content:
+        return TaskComplexity.COMPLEX
+
+    # Long content → complex reasoning likely required
+    if len(user_content) > _COMPLEX_CHAR_THRESHOLD:
+        return TaskComplexity.COMPLEX
+
+    # Deep conversation → complex ongoing task
+    if len(messages) > _COMPLEX_CONVERSATION_DEPTH:
+        return TaskComplexity.COMPLEX
+
+    # Simplicity signals -------------------------------------------------------
+
+    # Explicit simple keywords
+    for kw in _SIMPLE_KEYWORDS:
+        if kw in user_content:
+            return TaskComplexity.SIMPLE
+
+    # Short single-sentence messages default to simple
+    if len(user_content) <= _SIMPLE_CHAR_THRESHOLD:
+        return TaskComplexity.SIMPLE
+
+    # When uncertain, prefer quality (complex model)
+    return TaskComplexity.COMPLEX
--- a/src/timmy/backlog_triage.py
+++ b/src/timmy/backlog_triage.py
@@ -1,759 +0,0 @@
-"""Autonomous backlog triage loop — Timmy scans Gitea and assigns work.
-
-Continuously fetches open issues, scores/prioritizes them, and decides
-what to work on next without waiting to be asked.
-
-Loop flow::
-
-    while true:
-        1. Fetch all open issues from Gitea API
-        2. Score/prioritize by labels, age, type, blocked status
-        3. Identify unassigned high-priority items
-        4. Decide: assign to claude, dispatch to kimi, or flag for Alex
-        5. Execute the assignment (comment + assign)
-        6. Optionally post a daily triage summary
-        7. Sleep for configurable interval (default 15 min)
-
-Priority tiers:
-    P0 — security, data loss, blocking bugs → immediate action
-    P1 — core functionality, ready issues → next sprint
-    P2 — improvements, low-score issues → backlog
-    P3 — philosophy, meta → someday/never (skip in triage)
-
-Usage::
-
-    from timmy.backlog_triage import BacklogTriageLoop
-
-    loop = BacklogTriageLoop()
-    await loop.run_once()           # single triage cycle
-    await loop.start()              # background daemon loop
-    loop.stop()                     # graceful shutdown
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-import re
-from dataclasses import dataclass, field
-from datetime import UTC, datetime, timedelta
-from typing import Any
-
-import httpx
-
-from config import settings
-
-logger = logging.getLogger(__name__)
-
-# ── Constants ────────────────────────────────────────────────────────────────
-
-# Minimum triage score to be considered "ready" for assignment
-READY_THRESHOLD = 5
-
-# Agent Gitea logins
-AGENT_CLAUDE = "claude"
-AGENT_KIMI = "kimi"
-OWNER_LOGIN = "rockachopa"  # Alex — human owner
-
-# Labels
-KIMI_READY_LABEL = "kimi-ready"
-TRIAGE_DONE_LABEL = "triage-done"
-
-# Tag sets (mirrors scripts/triage_score.py)
-_BUG_TAGS = frozenset({"bug", "broken", "crash", "error", "fix", "regression", "hotfix"})
-_FEATURE_TAGS = frozenset({"feature", "feat", "enhancement", "capability", "timmy-capability"})
-_REFACTOR_TAGS = frozenset({"refactor", "cleanup", "tech-debt", "optimization", "perf"})
-_META_TAGS = frozenset({"philosophy", "soul-gap", "discussion", "question", "rfc"})
-_P0_TAGS = frozenset({"security", "data-loss", "blocking", "p0", "critical"})
-_RESEARCH_TAGS = frozenset({"research", "kimi-ready", "investigation", "spike"})
-_LOOP_TAG = "loop-generated"
-
-# Regex patterns for scoring
-_TAG_RE = re.compile(r"\[([^\]]+)\]")
-_FILE_RE = re.compile(r"(?:src/|tests/|scripts/|\.py|\.html|\.js|\.yaml|\.toml|\.sh)", re.IGNORECASE)
-_FUNC_RE = re.compile(r"(?:def |class |function |method |`\w+\(\)`)", re.IGNORECASE)
-_ACCEPT_RE = re.compile(
-    r"(?:should|must|expect|verify|assert|test.?case|acceptance|criteria"
-    r"|pass(?:es|ing)|fail(?:s|ing)|return(?:s)?|raise(?:s)?)",
-    re.IGNORECASE,
-)
-_TEST_RE = re.compile(r"(?:tox|pytest|test_\w+|\.test\.|assert\s)", re.IGNORECASE)
-_BLOCKED_RE = re.compile(r"\bblock(?:ed|s|ing)\b", re.IGNORECASE)
-
-
-# ── Data types ───────────────────────────────────────────────────────────────
-
-
-@dataclass
-class ScoredIssue:
-    """A Gitea issue enriched with triage scoring."""
-
-    number: int
-    title: str
-    body: str
-    labels: list[str]
-    tags: set[str]
-    assignees: list[str]
-    created_at: datetime
-    issue_type: str  # bug | feature | refactor | philosophy | research | unknown
-
-    score: int = 0
-    scope: int = 0
-    acceptance: int = 0
-    alignment: int = 0
-    ready: bool = False
-    age_days: int = 0
-    is_p0: bool = False
-    is_blocked: bool = False
-
-    @property
-    def is_unassigned(self) -> bool:
-        return len(self.assignees) == 0
-
-    @property
-    def needs_kimi(self) -> bool:
-        return bool(self.tags & _RESEARCH_TAGS) or KIMI_READY_LABEL in self.labels
-
-
-@dataclass
-class TriageDecision:
-    """The outcome of a triage decision for a single issue."""
-
-    issue_number: int
-    action: str  # "assign_claude" | "assign_kimi" | "flag_alex" | "skip"
-    reason: str
-    agent: str = ""  # the agent assigned (login)
-    executed: bool = False
-    error: str = ""
-
-
-@dataclass
-class TriageCycleResult:
-    """Summary of one complete triage cycle."""
-
-    timestamp: str
-    total_open: int
-    scored: int
-    ready: int
-    decisions: list[TriageDecision] = field(default_factory=list)
-    errors: list[str] = field(default_factory=list)
-    duration_ms: int = 0
-
-
-# ── Scoring ──────────────────────────────────────────────────────────────────
-
-
-def _extract_tags(title: str, labels: list[str]) -> set[str]:
-    """Pull tags from [bracket] title notation + Gitea label names."""
-    tags: set[str] = set()
-    for m in _TAG_RE.finditer(title):
-        tags.add(m.group(1).lower().strip())
-    for lbl in labels:
-        tags.add(lbl.lower().strip())
-    return tags
-
-
-def _score_scope(title: str, body: str, tags: set[str]) -> int:
-    """0–3: How well-scoped is this issue?"""
-    text = f"{title}\n{body}"
-    score = 0
-    if _FILE_RE.search(text):
-        score += 1
-    if _FUNC_RE.search(text):
-        score += 1
-    clean = _TAG_RE.sub("", title).strip()
-    if len(clean) < 80:
-        score += 1
-    if tags & _META_TAGS:
-        score = max(0, score - 2)
-    return min(3, score)
-
-
-def _score_acceptance(title: str, body: str, tags: set[str]) -> int:
-    """0–3: Does this have clear acceptance criteria?"""
-    text = f"{title}\n{body}"
-    score = 0
-    matches = len(_ACCEPT_RE.findall(text))
-    if matches >= 3:
-        score += 2
-    elif matches >= 1:
-        score += 1
-    if _TEST_RE.search(text):
-        score += 1
-    if re.search(r"##\s*(problem|solution|expected|actual|steps)", body, re.IGNORECASE):
-        score += 1
-    if tags & _META_TAGS:
-        score = max(0, score - 1)
-    return min(3, score)
-
-
-def _score_alignment(title: str, body: str, tags: set[str]) -> int:
-    """0–3: How aligned is this with the north star?"""
-    score = 0
-    if tags & _BUG_TAGS:
-        return 3
-    if tags & _REFACTOR_TAGS:
-        score += 2
-    if tags & _FEATURE_TAGS:
-        score += 2
-    if _LOOP_TAG in tags:
-        score += 1
-    if tags & _META_TAGS:
-        score = 0
-    return min(3, score)
-
-
-def score_issue(issue: dict[str, Any]) -> ScoredIssue:
-    """Score and classify a raw Gitea issue dict."""
-    number = issue["number"]
-    title = issue.get("title", "")
-    body = issue.get("body") or ""
-    label_names = [lbl["name"] for lbl in issue.get("labels", [])]
-    tags = _extract_tags(title, label_names)
-    assignees = [a["login"] for a in issue.get("assignees", [])]
-
-    # Parse created_at
-    raw_ts = issue.get("created_at", "")
-    try:
-        created_at = datetime.fromisoformat(raw_ts.replace("Z", "+00:00"))
-    except (ValueError, AttributeError):
-        created_at = datetime.now(UTC)
-    age_days = (datetime.now(UTC) - created_at).days
-
-    # Scores
-    scope = _score_scope(title, body, tags)
-    acceptance = _score_acceptance(title, body, tags)
-    alignment = _score_alignment(title, body, tags)
-    total = scope + acceptance + alignment
-
-    # Classify
-    if tags & _BUG_TAGS:
-        issue_type = "bug"
-    elif tags & _RESEARCH_TAGS:
-        issue_type = "research"
-    elif tags & _FEATURE_TAGS:
-        issue_type = "feature"
-    elif tags & _REFACTOR_TAGS:
-        issue_type = "refactor"
-    elif tags & _META_TAGS:
-        issue_type = "philosophy"
-    else:
-        issue_type = "unknown"
-
-    is_p0 = bool(tags & _P0_TAGS) or issue_type == "bug"
-    is_blocked = bool(_BLOCKED_RE.search(title) or _BLOCKED_RE.search(body))
-
-    return ScoredIssue(
-        number=number,
-        title=_TAG_RE.sub("", title).strip(),
-        body=body,
-        labels=label_names,
-        tags=tags,
-        assignees=assignees,
-        created_at=created_at,
-        issue_type=issue_type,
-        score=total,
-        scope=scope,
-        acceptance=acceptance,
-        alignment=alignment,
-        ready=total >= READY_THRESHOLD,
-        age_days=age_days,
-        is_p0=is_p0,
-        is_blocked=is_blocked,
-    )
-
-
-# ── Decision logic ───────────────────────────────────────────────────────────
-
-
-def decide(issue: ScoredIssue) -> TriageDecision:
-    """Decide what to do with an issue.
-
-    Returns a TriageDecision with action, reason, and agent.
-    Decision is not yet executed — call execute_decision() for that.
-    """
-    num = issue.number
-
-    # Skip philosophy/meta — not dev-actionable
-    if issue.issue_type == "philosophy":
-        return TriageDecision(
-            issue_number=num,
-            action="skip",
-            reason="Philosophy/meta issue — not dev-actionable in the triage loop.",
-        )
-
-    # Skip already-assigned issues
-    if not issue.is_unassigned:
-        return TriageDecision(
-            issue_number=num,
-            action="skip",
-            reason=f"Already assigned to: {', '.join(issue.assignees)}.",
-        )
-
-    # Skip if not ready (low score)
-    if not issue.ready:
-        return TriageDecision(
-            issue_number=num,
-            action="skip",
-            reason=f"Score {issue.score} < {READY_THRESHOLD} threshold — needs more detail before assignment.",
-        )
-
-    # Blocked: flag for Alex
-    if issue.is_blocked:
-        return TriageDecision(
-            issue_number=num,
-            action="flag_alex",
-            agent=OWNER_LOGIN,
-            reason=(
-                "Issue appears blocked. Flagging for @rockachopa to unblock before autonomous assignment."
-            ),
-        )
-
-    # Research / Kimi-ready
-    if issue.needs_kimi:
-        return TriageDecision(
-            issue_number=num,
-            action="assign_kimi",
-            agent=AGENT_KIMI,
-            reason=(
-                f"Issue type '{issue.issue_type}' with research/investigation scope. "
-                f"Assigning kimi-ready label for Kimi agent to pick up."
-            ),
-        )
-
-    # P0 bugs and blocking issues → Claude immediately
-    if issue.is_p0:
-        return TriageDecision(
-            issue_number=num,
-            action="assign_claude",
-            agent=AGENT_CLAUDE,
-            reason=(
-                f"P0/{issue.issue_type} issue (score={issue.score}, age={issue.age_days}d). "
-                f"Assigning to Claude Code for immediate attention."
-            ),
-        )
-
-    # Everything else that is ready → Claude Code
-    return TriageDecision(
-        issue_number=num,
-        action="assign_claude",
-        agent=AGENT_CLAUDE,
-        reason=(
-            f"Unassigned ready issue (type={issue.issue_type}, score={issue.score}, "
-            f"age={issue.age_days}d). Assigning to Claude Code."
-        ),
-    )
-
-
-# ── Gitea API client ─────────────────────────────────────────────────────────
-
-
-def _api_headers() -> dict[str, str]:
-    return {
-        "Authorization": f"token {settings.gitea_token}",
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-
-
-def _repo_url(path: str) -> str:
-    owner, repo = settings.gitea_repo.split("/", 1)
-    return f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/{path}"
-
-
-async def fetch_open_issues(client: httpx.AsyncClient) -> list[dict[str, Any]]:
-    """Fetch all open issues from Gitea, paginating as needed."""
-    all_issues: list[dict[str, Any]] = []
-    page = 1
-    while True:
-        url = _repo_url(f"issues?state=open&type=issues&limit=50&page={page}")
-        try:
-            resp = await client.get(url, headers=_api_headers())
-            if resp.status_code != 200:
-                logger.warning("Gitea issues fetch failed (HTTP %s)", resp.status_code)
-                break
-            batch: list[dict[str, Any]] = resp.json()
-            if not batch:
-                break
-            all_issues.extend(batch)
-            if len(batch) < 50:
-                break
-            page += 1
-        except (httpx.ConnectError, httpx.ReadError, httpx.TimeoutException) as exc:
-            logger.warning("Gitea connection error fetching issues: %s", exc)
-            break
-    return all_issues
-
-
-async def post_comment(
-    client: httpx.AsyncClient,
-    issue_number: int,
-    body: str,
-) -> bool:
-    """Post a comment on a Gitea issue. Returns True on success."""
-    url = _repo_url(f"issues/{issue_number}/comments")
-    try:
-        resp = await client.post(url, headers=_api_headers(), json={"body": body})
-        return resp.status_code in (200, 201)
-    except (httpx.ConnectError, httpx.ReadError, httpx.TimeoutException) as exc:
-        logger.warning("Failed to post comment on #%d: %s", issue_number, exc)
-        return False
-
-
-async def assign_issue(
-    client: httpx.AsyncClient,
-    issue_number: int,
-    assignee: str,
-) -> bool:
-    """Assign an issue to a Gitea user. Returns True on success."""
-    url = _repo_url(f"issues/{issue_number}")
-    try:
-        resp = await client.patch(
-            url,
-            headers=_api_headers(),
-            json={"assignees": [assignee]},
-        )
-        return resp.status_code in (200, 201)
-    except (httpx.ConnectError, httpx.ReadError, httpx.TimeoutException) as exc:
-        logger.warning("Failed to assign #%d to %s: %s", issue_number, assignee, exc)
-        return False
-
-
-async def add_label(
-    client: httpx.AsyncClient,
-    issue_number: int,
-    label_name: str,
-) -> bool:
-    """Add a label to a Gitea issue by name (auto-creates if missing). Returns True on success."""
-    owner, repo = settings.gitea_repo.split("/", 1)
-    labels_url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/labels"
-    headers = _api_headers()
-
-    try:
-        # Fetch existing labels
-        resp = await client.get(labels_url, headers=headers)
-        if resp.status_code != 200:
-            return False
-        existing = {lbl["name"]: lbl["id"] for lbl in resp.json()}
-
-        if label_name in existing:
-            label_id = existing[label_name]
-        else:
-            # Auto-create the label
-            create_resp = await client.post(
-                labels_url,
-                headers=headers,
-                json={"name": label_name, "color": "#006b75"},
-            )
-            if create_resp.status_code not in (200, 201):
-                return False
-            label_id = create_resp.json()["id"]
-
-        # Apply to the issue
-        apply_url = _repo_url(f"issues/{issue_number}/labels")
-        apply_resp = await client.post(
-            apply_url, headers=headers, json={"labels": [label_id]}
-        )
-        return apply_resp.status_code in (200, 201)
-
-    except (httpx.ConnectError, httpx.ReadError, httpx.TimeoutException) as exc:
-        logger.warning("Failed to add label %r to #%d: %s", label_name, issue_number, exc)
-        return False
-
-
-# ── Decision execution ───────────────────────────────────────────────────────
-
-
-async def execute_decision(
-    client: httpx.AsyncClient,
-    decision: TriageDecision,
-    dry_run: bool = False,
-) -> TriageDecision:
-    """Execute a triage decision — comment + assign/label.
-
-    When dry_run=True, logs the decision but makes no Gitea API calls.
-    Returns the updated decision with executed=True on success.
-    """
-    num = decision.issue_number
-
-    if decision.action == "skip":
-        logger.debug("Triage skip #%d: %s", num, decision.reason)
-        decision.executed = True
-        return decision
-
-    audit_comment = _build_audit_comment(decision)
-
-    if dry_run:
-        logger.info(
-            "[DRY RUN] #%d → %s (%s): %s",
-            num,
-            decision.action,
-            decision.agent,
-            decision.reason,
-        )
-        decision.executed = True
-        return decision
-
-    # Post audit comment first (always, so Alex can see reasoning)
-    comment_ok = await post_comment(client, num, audit_comment)
-    if not comment_ok:
-        decision.error = "Failed to post audit comment"
-        logger.warning("Triage #%d: comment failed", num)
-        return decision
-
-    # Execute assignment
-    ok = False
-    if decision.action == "assign_claude":
-        ok = await assign_issue(client, num, AGENT_CLAUDE)
-    elif decision.action == "assign_kimi":
-        ok = await add_label(client, num, KIMI_READY_LABEL)
-    elif decision.action == "flag_alex":
-        # Comment already posted above — that's sufficient for flagging
-        ok = True
-
-    if ok:
-        decision.executed = True
-        logger.info("Triage #%d → %s OK", num, decision.action)
-    else:
-        decision.error = f"Action {decision.action!r} failed"
-        logger.warning("Triage #%d: action %r failed", num, decision.action)
-
-    return decision
-
-
-def _build_audit_comment(decision: TriageDecision) -> str:
-    """Build the audit trail comment that Alex can read to see reasoning."""
-    ts = datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC")
-    action_text = {
-        "assign_claude": f"Assigning to @{AGENT_CLAUDE} for implementation.",
-        "assign_kimi": f"Adding `{KIMI_READY_LABEL}` label — queuing for Kimi research agent.",
-        "flag_alex": f"Flagging for @{OWNER_LOGIN} — issue appears blocked or needs human decision.",
-    }.get(decision.action, decision.action)
-
-    return (
-        f"**[Timmy Triage — {ts}]**\n\n"
-        f"**Decision:** {action_text}\n\n"
-        f"**Why:** {decision.reason}\n\n"
-        f"*Autonomous triage by Timmy. Reply to override.*"
-    )
-
-
-# ── Daily summary ─────────────────────────────────────────────────────────────
-
-
-def _build_daily_summary(result: TriageCycleResult, scored: list[ScoredIssue]) -> str:
-    """Build the daily triage summary body."""
-    now = datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC")
-    assigned = [d for d in result.decisions if d.executed and d.action != "skip"]
-    skipped = [d for d in result.decisions if d.action == "skip"]
-
-    lines = [
-        f"# Timmy Backlog Triage — {now}",
-        "",
-        f"**Open issues:** {result.total_open}  |  "
-        f"**Scored:** {result.scored}  |  "
-        f"**Ready:** {result.ready}  |  "
-        f"**Assigned this cycle:** {len(assigned)}",
-        "",
-        "## Top 10 Ready Issues (by score)",
-        "",
-    ]
-
-    top = sorted([s for s in scored if s.ready], key=lambda s: (-s.score, s.number))[:10]
-    for s in top:
-        flag = "🐛" if s.issue_type == "bug" else "⚡" if s.is_p0 else "✦"
-        lines.append(
-            f"- {flag} **#{s.number}** (score={s.score}, age={s.age_days}d) — {s.title[:80]}"
-        )
-
-    if assigned:
-        lines += ["", "## Actions Taken", ""]
-        for d in assigned:
-            lines.append(f"- #{d.issue_number} → `{d.action}` ({d.agent}): {d.reason[:100]}")
-
-    if skipped:
-        lines += ["", f"## Skipped ({len(skipped)} issues)", ""]
-        for d in skipped[:5]:
-            lines.append(f"- #{d.issue_number}: {d.reason[:80]}")
-        if len(skipped) > 5:
-            lines.append(f"- … and {len(skipped) - 5} more")
-
-    lines += [
-        "",
-        "---",
-        "*Auto-generated by Timmy's backlog triage loop. "
-        "Override any decision by reassigning or commenting.*",
-    ]
-    return "\n".join(lines)
-
-
-async def post_daily_summary(
-    client: httpx.AsyncClient,
-    result: TriageCycleResult,
-    scored: list[ScoredIssue],
-    dry_run: bool = False,
-) -> bool:
-    """Post a daily triage summary as a new Gitea issue."""
-    today = datetime.now(UTC).strftime("%Y-%m-%d")
-    title = f"[Triage] Daily backlog summary — {today}"
-    body = _build_daily_summary(result, scored)
-
-    if dry_run:
-        logger.info("[DRY RUN] Would post daily summary: %s", title)
-        return True
-
-    url = _repo_url("issues")
-    try:
-        resp = await client.post(
-            url,
-            headers=_api_headers(),
-            json={
-                "title": title,
-                "body": body,
-                "labels": [],
-            },
-        )
-        if resp.status_code in (200, 201):
-            issue_num = resp.json().get("number", "?")
-            logger.info("Daily triage summary posted as issue #%s", issue_num)
-            return True
-        logger.warning("Daily summary post failed (HTTP %s)", resp.status_code)
-        return False
-    except (httpx.ConnectError, httpx.ReadError, httpx.TimeoutException) as exc:
-        logger.warning("Failed to post daily summary: %s", exc)
-        return False
-
-
-# ── Main loop class ───────────────────────────────────────────────────────────
-
-
-class BacklogTriageLoop:
-    """Autonomous backlog triage loop.
-
-    Fetches, scores, and assigns Gitea issues on a configurable interval.
-
-    Parameters
-    ----------
-    interval:
-        Seconds between triage cycles. Default: settings.backlog_triage_interval_seconds.
-    dry_run:
-        When True, score and log decisions but don't write to Gitea.
-    daily_summary:
-        When True, post a daily triage summary issue after each cycle.
-    """
-
-    def __init__(
-        self,
-        *,
-        interval: float | None = None,
-        dry_run: bool | None = None,
-        daily_summary: bool | None = None,
-    ) -> None:
-        self._interval = float(interval or settings.backlog_triage_interval_seconds)
-        self._dry_run = dry_run if dry_run is not None else settings.backlog_triage_dry_run
-        self._daily_summary = (
-            daily_summary if daily_summary is not None else settings.backlog_triage_daily_summary
-        )
-        self._running = False
-        self._task: asyncio.Task | None = None
-        self._cycle_count = 0
-        self._last_summary_date: str = ""
-        self.history: list[TriageCycleResult] = []
-
-    @property
-    def is_running(self) -> bool:
-        return self._running
-
-    @property
-    def cycle_count(self) -> int:
-        return self._cycle_count
-
-    async def run_once(self) -> TriageCycleResult:
-        """Execute one full triage cycle.
-
-        1. Fetch all open Gitea issues
-        2. Score and prioritize
-        3. Decide on each unassigned ready issue
-        4. Execute decisions
-        5. Optionally post daily summary
-        """
-        import time
-
-        self._cycle_count += 1
-        start = time.monotonic()
-        ts = datetime.now(UTC).isoformat()
-        result = TriageCycleResult(timestamp=ts, total_open=0, scored=0, ready=0)
-
-        if not settings.gitea_enabled or not settings.gitea_token:
-            logger.warning("Backlog triage: Gitea not configured — skipping cycle")
-            return result
-
-        async with httpx.AsyncClient(timeout=30) as client:
-            # 1. Fetch
-            raw_issues = await fetch_open_issues(client)
-            result.total_open = len(raw_issues)
-            logger.info("Triage cycle #%d: fetched %d open issues", self._cycle_count, len(raw_issues))
-
-            # 2. Score
-            scored = [score_issue(i) for i in raw_issues]
-            result.scored = len(scored)
-            result.ready = sum(1 for s in scored if s.ready)
-
-            # 3 & 4. Decide and execute for each issue
-            for issue in scored:
-                decision = decide(issue)
-                if decision.action == "skip":
-                    result.decisions.append(decision)
-                    continue
-                decision = await execute_decision(client, decision, dry_run=self._dry_run)
-                result.decisions.append(decision)
-
-                # Rate-limit: short pause between API writes to avoid hammering Gitea
-                if not self._dry_run:
-                    await asyncio.sleep(0.5)
-
-            # 5. Daily summary (once per UTC day)
-            today = datetime.now(UTC).strftime("%Y-%m-%d")
-            if self._daily_summary and today != self._last_summary_date:
-                await post_daily_summary(client, result, scored, dry_run=self._dry_run)
-                self._last_summary_date = today
-
-        result.duration_ms = int((time.monotonic() - start) * 1000)
-        self.history.append(result)
-
-        assigned_count = sum(1 for d in result.decisions if d.executed and d.action != "skip")
-        logger.info(
-            "Triage cycle #%d complete (%d ms): %d open, %d ready, %d assigned",
-            self._cycle_count,
-            result.duration_ms,
-            result.total_open,
-            result.ready,
-            assigned_count,
-        )
-        return result
-
-    async def start(self) -> None:
-        """Start the triage loop as a background task."""
-        if self._running:
-            logger.warning("BacklogTriageLoop already running")
-            return
-        self._running = True
-        await self._loop()
-
-    async def _loop(self) -> None:
-        logger.info(
-            "BacklogTriageLoop started (interval=%.0fs, dry_run=%s)",
-            self._interval,
-            self._dry_run,
-        )
-        while self._running:
-            try:
-                await self.run_once()
-            except Exception:
-                logger.exception("Backlog triage cycle failed")
-            await asyncio.sleep(self._interval)
-
-    def stop(self) -> None:
-        """Signal the loop to stop after the current cycle."""
-        self._running = False
-        logger.info("BacklogTriageLoop stop requested")
--- a/src/timmy/mcp_bridge.py
+++ b/src/timmy/mcp_bridge.py
@@ -399,74 +399,6 @@ class MCPBridge:
            logger.warning("Tool '%s' execution failed: %s", name, exc)
            return f"Error executing {name}: {exc}"

-    @staticmethod
-    def _build_initial_messages(
-        prompt: str, system_prompt: str | None
-    ) -> list[dict]:
-        """Build the initial message list for a run."""
-        messages: list[dict] = []
-        if system_prompt:
-            messages.append({"role": "system", "content": system_prompt})
-        messages.append({"role": "user", "content": prompt})
-        return messages
-
-    async def _process_round_tool_calls(
-        self,
-        messages: list[dict],
-        model_tool_calls: list[dict],
-        rounds: int,
-        tool_calls_made: list[dict],
-    ) -> None:
-        """Execute all tool calls in one round, appending results to messages."""
-        for tc in model_tool_calls:
-            func = tc.get("function", {})
-            tool_name = func.get("name", "unknown")
-            tool_args = func.get("arguments", {})
-            logger.info(
-                "Bridge tool call [round %d]: %s(%s)",
-                rounds,
-                tool_name,
-                tool_args,
-            )
-            result = await self._execute_tool_call(tc)
-            tool_calls_made.append(
-                {
-                    "round": rounds,
-                    "tool": tool_name,
-                    "arguments": tool_args,
-                    "result": result[:500],  # Truncate for logging
-                }
-            )
-            messages.append({"role": "tool", "content": result})
-
-    async def _run_tool_loop(
-        self, messages: list[dict], tools: list[dict]
-    ) -> tuple[str, list[dict], int, str]:
-        """Run the tool-call loop until final response or max rounds reached.
-
-        Returns:
-            Tuple of (content, tool_calls_made, rounds, error).
-        """
-        tool_calls_made: list[dict] = []
-        rounds = 0
-
-        for round_num in range(self.max_rounds):
-            rounds = round_num + 1
-            response = await self._chat(messages, tools)
-            msg = response.get("message", {})
-            model_tool_calls = msg.get("tool_calls", [])
-
-            if not model_tool_calls:
-                return msg.get("content", ""), tool_calls_made, rounds, ""
-
-            messages.append(msg)
-            await self._process_round_tool_calls(
-                messages, model_tool_calls, rounds, tool_calls_made
-            )
-
-        error = f"Exceeded maximum of {self.max_rounds} tool-call rounds"
-        return "(max tool-call rounds reached)", tool_calls_made, rounds, error
-
    async def run(
        self,
        prompt: str,
@@ -487,37 +419,115 @@ class MCPBridge:
            BridgeResult with the final response and tool call history.
        """
        start = time.time()
-        messages = self._build_initial_messages(prompt, system_prompt)
+        messages: list[dict] = []
+
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+
+        messages.append({"role": "user", "content": prompt})
+
        tools = self._build_ollama_tools()
        tool_calls_made: list[dict] = []
        rounds = 0
-        error_msg = ""

        try:
-            content, tool_calls_made, rounds, error_msg = await self._run_tool_loop(
-                messages, tools
-            )
-        except httpx.ConnectError as exc:
-            logger.warning("Ollama connection failed: %s", exc)
-            error_msg = f"Ollama connection failed: {exc}"
-            content = ""
-        except httpx.HTTPStatusError as exc:
-            logger.warning("Ollama HTTP error: %s", exc)
-            error_msg = f"Ollama HTTP error: {exc.response.status_code}"
-            content = ""
-        except Exception as exc:
-            logger.error("MCPBridge run failed: %s", exc)
-            error_msg = str(exc)
-            content = ""
+            for round_num in range(self.max_rounds):
+                rounds = round_num + 1
+                response = await self._chat(messages, tools)
+                msg = response.get("message", {})

-        return BridgeResult(
-            content=content,
-            tool_calls_made=tool_calls_made,
-            rounds=rounds,
-            latency_ms=(time.time() - start) * 1000,
-            model=self.model,
-            error=error_msg,
-        )
+                # Check if model made tool calls
+                model_tool_calls = msg.get("tool_calls", [])
+                if not model_tool_calls:
+                    # Final text response — done.
+                    content = msg.get("content", "")
+                    latency = (time.time() - start) * 1000
+                    return BridgeResult(
+                        content=content,
+                        tool_calls_made=tool_calls_made,
+                        rounds=rounds,
+                        latency_ms=latency,
+                        model=self.model,
+                    )
+
+                # Append the assistant message (with tool_calls) to history
+                messages.append(msg)
+
+                # Execute each tool call and add results
+                for tc in model_tool_calls:
+                    func = tc.get("function", {})
+                    tool_name = func.get("name", "unknown")
+                    tool_args = func.get("arguments", {})
+
+                    logger.info(
+                        "Bridge tool call [round %d]: %s(%s)",
+                        rounds,
+                        tool_name,
+                        tool_args,
+                    )
+
+                    result = await self._execute_tool_call(tc)
+                    tool_calls_made.append(
+                        {
+                            "round": rounds,
+                            "tool": tool_name,
+                            "arguments": tool_args,
+                            "result": result[:500],  # Truncate for logging
+                        }
+                    )
+
+                    # Add tool result to message history
+                    messages.append(
+                        {
+                            "role": "tool",
+                            "content": result,
+                        }
+                    )
+
+            # Hit max rounds
+            latency = (time.time() - start) * 1000
+            return BridgeResult(
+                content="(max tool-call rounds reached)",
+                tool_calls_made=tool_calls_made,
+                rounds=rounds,
+                latency_ms=latency,
+                model=self.model,
+                error=f"Exceeded maximum of {self.max_rounds} tool-call rounds",
+            )
+
+        except httpx.ConnectError as exc:
+            latency = (time.time() - start) * 1000
+            logger.warning("Ollama connection failed: %s", exc)
+            return BridgeResult(
+                content="",
+                tool_calls_made=tool_calls_made,
+                rounds=rounds,
+                latency_ms=latency,
+                model=self.model,
+                error=f"Ollama connection failed: {exc}",
+            )
+        except httpx.HTTPStatusError as exc:
+            latency = (time.time() - start) * 1000
+            logger.warning("Ollama HTTP error: %s", exc)
+            return BridgeResult(
+                content="",
+                tool_calls_made=tool_calls_made,
+                rounds=rounds,
+                latency_ms=latency,
+                model=self.model,
+                error=f"Ollama HTTP error: {exc.response.status_code}",
+            )
+        except Exception as exc:
+            latency = (time.time() - start) * 1000
+            logger.error("MCPBridge run failed: %s", exc)
+            return BridgeResult(
+                content="",
+                tool_calls_made=tool_calls_made,
+                rounds=rounds,
+                latency_ms=latency,
+                model=self.model,
+                error=str(exc),
+            )

    def status(self) -> dict:
        """Return bridge status for the dashboard."""
--- a/src/timmy/tools.py
+++ b/src/timmy/tools.py
@@ -462,8 +462,7 @@ def consult_grok(query: str) -> str:
            inv = ln.create_invoice(sats, f"Grok query: {query[:_INVOICE_MEMO_MAX_LEN]}")
            invoice_info = f"\n[Lightning invoice: {sats} sats — {inv.payment_request[:40]}...]"
        except (ImportError, OSError, ValueError) as exc:
-            logger.error("Lightning invoice creation failed: %s", exc)
-            return "Error: Failed to create Lightning invoice. Please check logs."
+            logger.warning("Tool execution failed (Lightning invoice): %s", exc)

    result = backend.run(query)

@@ -534,8 +533,7 @@ def _register_web_fetch_tool(toolkit: Toolkit) -> None:
    try:
        toolkit.register(web_fetch, name="web_fetch")
    except Exception as exc:
-        logger.error("Failed to register web_fetch tool: %s", exc)
-        raise
+        logger.warning("Tool execution failed (web_fetch registration): %s", exc)


 def _register_core_tools(toolkit: Toolkit, base_path: Path) -> None:
@@ -567,8 +565,8 @@ def _register_grok_tool(toolkit: Toolkit) -> None:
            toolkit.register(consult_grok, name="consult_grok")
            logger.info("Grok consultation tool registered")
    except (ImportError, AttributeError) as exc:
-        logger.error("Failed to register Grok tool: %s", exc)
-        raise
+        logger.warning("Tool execution failed (Grok registration): %s", exc)
+        logger.debug("Grok tool not available")


 def _register_memory_tools(toolkit: Toolkit) -> None:
@@ -581,8 +579,8 @@ def _register_memory_tools(toolkit: Toolkit) -> None:
        toolkit.register(memory_read, name="memory_read")
        toolkit.register(memory_forget, name="memory_forget")
    except (ImportError, AttributeError) as exc:
-        logger.error("Failed to register Memory tools: %s", exc)
-        raise
+        logger.warning("Tool execution failed (Memory tools registration): %s", exc)
+        logger.debug("Memory tools not available")


 def _register_agentic_loop_tool(toolkit: Toolkit) -> None:
@@ -630,8 +628,8 @@ def _register_agentic_loop_tool(toolkit: Toolkit) -> None:

        toolkit.register(plan_and_execute, name="plan_and_execute")
    except (ImportError, AttributeError) as exc:
-        logger.error("Failed to register plan_and_execute tool: %s", exc)
-        raise
+        logger.warning("Tool execution failed (plan_and_execute registration): %s", exc)
+        logger.debug("plan_and_execute tool not available")


 def _register_introspection_tools(toolkit: Toolkit) -> None:
@@ -649,16 +647,15 @@ def _register_introspection_tools(toolkit: Toolkit) -> None:
        toolkit.register(get_memory_status, name="get_memory_status")
        toolkit.register(run_self_tests, name="run_self_tests")
    except (ImportError, AttributeError) as exc:
-        logger.error("Failed to register Introspection tools: %s", exc)
-        raise
+        logger.warning("Tool execution failed (Introspection tools registration): %s", exc)
+        logger.debug("Introspection tools not available")

    try:
        from timmy.mcp_tools import update_gitea_avatar

        toolkit.register(update_gitea_avatar, name="update_gitea_avatar")
    except (ImportError, AttributeError) as exc:
-        logger.error("Failed to register update_gitea_avatar tool: %s", exc)
-        raise
+        logger.debug("update_gitea_avatar tool not available: %s", exc)

    try:
        from timmy.session_logger import self_reflect, session_history
@@ -666,8 +663,8 @@ def _register_introspection_tools(toolkit: Toolkit) -> None:
        toolkit.register(session_history, name="session_history")
        toolkit.register(self_reflect, name="self_reflect")
    except (ImportError, AttributeError) as exc:
-        logger.error("Failed to register session_history tool: %s", exc)
-        raise
+        logger.warning("Tool execution failed (session_history registration): %s", exc)
+        logger.debug("session_history tool not available")


 def _register_delegation_tools(toolkit: Toolkit) -> None:
@@ -679,8 +676,8 @@ def _register_delegation_tools(toolkit: Toolkit) -> None:
        toolkit.register(delegate_to_kimi, name="delegate_to_kimi")
        toolkit.register(list_swarm_agents, name="list_swarm_agents")
    except Exception as exc:
-        logger.error("Failed to register Delegation tools: %s", exc)
-        raise
+        logger.warning("Tool execution failed (Delegation tools registration): %s", exc)
+        logger.debug("Delegation tools not available")


 def _register_gematria_tool(toolkit: Toolkit) -> None:
@@ -690,8 +687,8 @@ def _register_gematria_tool(toolkit: Toolkit) -> None:

        toolkit.register(gematria, name="gematria")
    except (ImportError, AttributeError) as exc:
-        logger.error("Failed to register Gematria tool: %s", exc)
-        raise
+        logger.warning("Tool execution failed (Gematria registration): %s", exc)
+        logger.debug("Gematria tool not available")


 def _register_artifact_tools(toolkit: Toolkit) -> None:
@@ -702,8 +699,8 @@ def _register_artifact_tools(toolkit: Toolkit) -> None:
        toolkit.register(jot_note, name="jot_note")
        toolkit.register(log_decision, name="log_decision")
    except (ImportError, AttributeError) as exc:
-        logger.error("Failed to register Artifact tools: %s", exc)
-        raise
+        logger.warning("Tool execution failed (Artifact tools registration): %s", exc)
+        logger.debug("Artifact tools not available")


 def _register_thinking_tools(toolkit: Toolkit) -> None:
@@ -713,8 +710,8 @@ def _register_thinking_tools(toolkit: Toolkit) -> None:

        toolkit.register(search_thoughts, name="thought_search")
    except (ImportError, AttributeError) as exc:
-        logger.error("Failed to register Thinking tools: %s", exc)
-        raise
+        logger.warning("Tool execution failed (Thinking tools registration): %s", exc)
+        logger.debug("Thinking tools not available")


 def create_full_toolkit(base_dir: str | Path | None = None):
--- a/tests/infrastructure/test_router_cascade.py
+++ b/tests/infrastructure/test_router_cascade.py
@@ -968,3 +968,195 @@ class TestCascadeRouterReload:

        assert router.providers[0].name == "low-priority"
        assert router.providers[1].name == "high-priority"
+
+
+class TestComplexityRouting:
+    """Tests for Qwen3-8B / Qwen3-14B dual-model routing (issue #1065)."""
+
+    def _make_dual_model_provider(self) -> Provider:
+        """Build an Ollama provider with both Qwen3 models registered."""
+        return Provider(
+            name="ollama-local",
+            type="ollama",
+            enabled=True,
+            priority=1,
+            url="http://localhost:11434",
+            models=[
+                {
+                    "name": "qwen3:8b",
+                    "capabilities": ["text", "tools", "json", "streaming", "routine"],
+                },
+                {
+                    "name": "qwen3:14b",
+                    "default": True,
+                    "capabilities": ["text", "tools", "json", "streaming", "complex", "reasoning"],
+                },
+            ],
+        )
+
+    def test_get_model_for_complexity_simple_returns_8b(self):
+        """Simple tasks should select the model with 'routine' capability."""
+        from infrastructure.router.classifier import TaskComplexity
+
+        router = CascadeRouter(config_path=Path("/nonexistent"))
+        router.config.fallback_chains = {
+            "routine": ["qwen3:8b"],
+            "complex": ["qwen3:14b"],
+        }
+        provider = self._make_dual_model_provider()
+
+        model = router._get_model_for_complexity(provider, TaskComplexity.SIMPLE)
+        assert model == "qwen3:8b"
+
+    def test_get_model_for_complexity_complex_returns_14b(self):
+        """Complex tasks should select the model with 'complex' capability."""
+        from infrastructure.router.classifier import TaskComplexity
+
+        router = CascadeRouter(config_path=Path("/nonexistent"))
+        router.config.fallback_chains = {
+            "routine": ["qwen3:8b"],
+            "complex": ["qwen3:14b"],
+        }
+        provider = self._make_dual_model_provider()
+
+        model = router._get_model_for_complexity(provider, TaskComplexity.COMPLEX)
+        assert model == "qwen3:14b"
+
+    def test_get_model_for_complexity_returns_none_when_no_match(self):
+        """Returns None when provider has no matching model in chain."""
+        from infrastructure.router.classifier import TaskComplexity
+
+        router = CascadeRouter(config_path=Path("/nonexistent"))
+        router.config.fallback_chains = {}  # empty chains
+
+        provider = Provider(
+            name="test",
+            type="ollama",
+            enabled=True,
+            priority=1,
+            models=[{"name": "llama3.2:3b", "default": True, "capabilities": ["text"]}],
+        )
+
+        # No 'routine' or 'complex' model available
+        model = router._get_model_for_complexity(provider, TaskComplexity.SIMPLE)
+        assert model is None
+
+    @pytest.mark.asyncio
+    async def test_complete_with_simple_hint_routes_to_8b(self):
+        """complexity_hint='simple' should use qwen3:8b."""
+        router = CascadeRouter(config_path=Path("/nonexistent"))
+        router.config.fallback_chains = {
+            "routine": ["qwen3:8b"],
+            "complex": ["qwen3:14b"],
+        }
+        router.providers = [self._make_dual_model_provider()]
+
+        with patch.object(router, "_call_ollama") as mock_call:
+            mock_call.return_value = {"content": "fast answer", "model": "qwen3:8b"}
+            result = await router.complete(
+                messages=[{"role": "user", "content": "list tasks"}],
+                complexity_hint="simple",
+            )
+
+        assert result["model"] == "qwen3:8b"
+        assert result["complexity"] == "simple"
+
+    @pytest.mark.asyncio
+    async def test_complete_with_complex_hint_routes_to_14b(self):
+        """complexity_hint='complex' should use qwen3:14b."""
+        router = CascadeRouter(config_path=Path("/nonexistent"))
+        router.config.fallback_chains = {
+            "routine": ["qwen3:8b"],
+            "complex": ["qwen3:14b"],
+        }
+        router.providers = [self._make_dual_model_provider()]
+
+        with patch.object(router, "_call_ollama") as mock_call:
+            mock_call.return_value = {"content": "detailed answer", "model": "qwen3:14b"}
+            result = await router.complete(
+                messages=[{"role": "user", "content": "review this PR"}],
+                complexity_hint="complex",
+            )
+
+        assert result["model"] == "qwen3:14b"
+        assert result["complexity"] == "complex"
+
+    @pytest.mark.asyncio
+    async def test_explicit_model_bypasses_complexity_routing(self):
+        """When model is explicitly provided, complexity routing is skipped."""
+        router = CascadeRouter(config_path=Path("/nonexistent"))
+        router.config.fallback_chains = {
+            "routine": ["qwen3:8b"],
+            "complex": ["qwen3:14b"],
+        }
+        router.providers = [self._make_dual_model_provider()]
+
+        with patch.object(router, "_call_ollama") as mock_call:
+            mock_call.return_value = {"content": "response", "model": "qwen3:14b"}
+            result = await router.complete(
+                messages=[{"role": "user", "content": "list tasks"}],
+                model="qwen3:14b",  # explicit override
+            )
+
+        # Explicit model wins — complexity field is None
+        assert result["model"] == "qwen3:14b"
+        assert result["complexity"] is None
+
+    @pytest.mark.asyncio
+    async def test_auto_classification_routes_simple_message(self):
+        """Short, simple messages should auto-classify as SIMPLE → 8B."""
+        router = CascadeRouter(config_path=Path("/nonexistent"))
+        router.config.fallback_chains = {
+            "routine": ["qwen3:8b"],
+            "complex": ["qwen3:14b"],
+        }
+        router.providers = [self._make_dual_model_provider()]
+
+        with patch.object(router, "_call_ollama") as mock_call:
+            mock_call.return_value = {"content": "ok", "model": "qwen3:8b"}
+            result = await router.complete(
+                messages=[{"role": "user", "content": "status"}],
+                # no complexity_hint — auto-classify
+            )
+
+        assert result["complexity"] == "simple"
+        assert result["model"] == "qwen3:8b"
+
+    @pytest.mark.asyncio
+    async def test_auto_classification_routes_complex_message(self):
+        """Complex messages should auto-classify → 14B."""
+        router = CascadeRouter(config_path=Path("/nonexistent"))
+        router.config.fallback_chains = {
+            "routine": ["qwen3:8b"],
+            "complex": ["qwen3:14b"],
+        }
+        router.providers = [self._make_dual_model_provider()]
+
+        with patch.object(router, "_call_ollama") as mock_call:
+            mock_call.return_value = {"content": "deep analysis", "model": "qwen3:14b"}
+            result = await router.complete(
+                messages=[{"role": "user", "content": "analyze and prioritize the backlog"}],
+            )
+
+        assert result["complexity"] == "complex"
+        assert result["model"] == "qwen3:14b"
+
+    @pytest.mark.asyncio
+    async def test_invalid_complexity_hint_falls_back_to_auto(self):
+        """Invalid complexity_hint should log a warning and auto-classify."""
+        router = CascadeRouter(config_path=Path("/nonexistent"))
+        router.config.fallback_chains = {
+            "routine": ["qwen3:8b"],
+            "complex": ["qwen3:14b"],
+        }
+        router.providers = [self._make_dual_model_provider()]
+
+        with patch.object(router, "_call_ollama") as mock_call:
+            mock_call.return_value = {"content": "ok", "model": "qwen3:8b"}
+            # Should not raise
+            result = await router.complete(
+                messages=[{"role": "user", "content": "status"}],
+                complexity_hint="INVALID_HINT",
+            )
+
+        assert result["complexity"] in ("simple", "complex")  # auto-classified
--- a/tests/infrastructure/test_router_classifier.py
+++ b/tests/infrastructure/test_router_classifier.py
@@ -0,0 +1,134 @@
+"""Tests for Qwen3 dual-model task complexity classifier."""
+
+import pytest
+
+from infrastructure.router.classifier import TaskComplexity, classify_task
+
+
+class TestClassifyTask:
+    """Tests for classify_task heuristics."""
+
+    # ── Simple / routine tasks ──────────────────────────────────────────────
+
+    def test_empty_messages_is_simple(self):
+        assert classify_task([]) == TaskComplexity.SIMPLE
+
+    def test_no_user_content_is_simple(self):
+        messages = [{"role": "system", "content": "You are Timmy."}]
+        assert classify_task(messages) == TaskComplexity.SIMPLE
+
+    def test_short_status_query_is_simple(self):
+        messages = [{"role": "user", "content": "status"}]
+        assert classify_task(messages) == TaskComplexity.SIMPLE
+
+    def test_list_command_is_simple(self):
+        messages = [{"role": "user", "content": "list all tasks"}]
+        assert classify_task(messages) == TaskComplexity.SIMPLE
+
+    def test_get_command_is_simple(self):
+        messages = [{"role": "user", "content": "get the latest log entry"}]
+        assert classify_task(messages) == TaskComplexity.SIMPLE
+
+    def test_short_message_under_threshold_is_simple(self):
+        messages = [{"role": "user", "content": "run the build"}]
+        assert classify_task(messages) == TaskComplexity.SIMPLE
+
+    def test_affirmation_is_simple(self):
+        messages = [{"role": "user", "content": "yes"}]
+        assert classify_task(messages) == TaskComplexity.SIMPLE
+
+    # ── Complex / quality-sensitive tasks ──────────────────────────────────
+
+    def test_plan_keyword_is_complex(self):
+        messages = [{"role": "user", "content": "plan the sprint"}]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_review_keyword_is_complex(self):
+        messages = [{"role": "user", "content": "review this code"}]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_analyze_keyword_is_complex(self):
+        messages = [{"role": "user", "content": "analyze performance"}]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_triage_keyword_is_complex(self):
+        messages = [{"role": "user", "content": "triage the open issues"}]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_refactor_keyword_is_complex(self):
+        messages = [{"role": "user", "content": "refactor the auth module"}]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_explain_keyword_is_complex(self):
+        messages = [{"role": "user", "content": "explain how the router works"}]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_prioritize_keyword_is_complex(self):
+        messages = [{"role": "user", "content": "prioritize the backlog"}]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_long_message_is_complex(self):
+        long_msg = "do something " * 50  # > 500 chars
+        messages = [{"role": "user", "content": long_msg}]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_numbered_list_is_complex(self):
+        messages = [
+            {
+                "role": "user",
+                "content": "1. Read the file  2. Analyze it  3. Write a report",
+            }
+        ]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_code_block_is_complex(self):
+        messages = [
+            {"role": "user", "content": "Here is the code:\n```python\nprint('hello')\n```"}
+        ]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_deep_conversation_is_complex(self):
+        messages = [
+            {"role": "user", "content": "hi"},
+            {"role": "assistant", "content": "hello"},
+            {"role": "user", "content": "ok"},
+            {"role": "assistant", "content": "yes"},
+            {"role": "user", "content": "ok"},
+            {"role": "assistant", "content": "yes"},
+            {"role": "user", "content": "now do the thing"},
+        ]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_analyse_british_spelling_is_complex(self):
+        messages = [{"role": "user", "content": "analyse this dataset"}]
+        assert classify_task(messages) == TaskComplexity.COMPLEX
+
+    def test_non_string_content_is_ignored(self):
+        """Non-string content should not crash the classifier."""
+        messages = [{"role": "user", "content": ["part1", "part2"]}]
+        # Should not raise; result doesn't matter — just must not blow up
+        result = classify_task(messages)
+        assert isinstance(result, TaskComplexity)
+
+    def test_system_message_not_counted_as_user(self):
+        """System message alone should not trigger complex keywords."""
+        messages = [
+            {"role": "system", "content": "analyze everything carefully"},
+            {"role": "user", "content": "yes"},
+        ]
+        # "analyze" is in system message (not user) — user says "yes" → simple
+        assert classify_task(messages) == TaskComplexity.SIMPLE
+
+
+class TestTaskComplexityEnum:
+    """Tests for TaskComplexity enum values."""
+
+    def test_simple_value(self):
+        assert TaskComplexity.SIMPLE.value == "simple"
+
+    def test_complex_value(self):
+        assert TaskComplexity.COMPLEX.value == "complex"
+
+    def test_lookup_by_value(self):
+        assert TaskComplexity("simple") == TaskComplexity.SIMPLE
+        assert TaskComplexity("complex") == TaskComplexity.COMPLEX
--- a/tests/unit/test_backlog_triage.py
+++ b/tests/unit/test_backlog_triage.py
@@ -1,621 +0,0 @@
-"""Unit tests for timmy.backlog_triage — autonomous backlog triage loop."""
-
-from datetime import UTC, datetime
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from timmy.backlog_triage import (
-    AGENT_CLAUDE,
-    AGENT_KIMI,
-    KIMI_READY_LABEL,
-    OWNER_LOGIN,
-    READY_THRESHOLD,
-    BacklogTriageLoop,
-    ScoredIssue,
-    TriageCycleResult,
-    TriageDecision,
-    _build_audit_comment,
-    _build_daily_summary,
-    _extract_tags,
-    _score_acceptance,
-    _score_alignment,
-    _score_scope,
-    decide,
-    score_issue,
-)
-
-
-# ── Fixtures ─────────────────────────────────────────────────────────────────
-
-
-def _make_raw_issue(
-    number: int = 1,
-    title: str = "Fix the login bug",
-    body: str = "## Problem\nLogin fails on empty password.\n\n## Steps\nassert response == 200",
-    labels: list | None = None,
-    assignees: list | None = None,
-    created_at: str = "2026-03-20T10:00:00Z",
-) -> dict:
-    return {
-        "number": number,
-        "title": title,
-        "body": body,
-        "labels": [{"name": lbl} for lbl in (labels or [])],
-        "assignees": [{"login": a} for a in (assignees or [])],
-        "created_at": created_at,
-    }
-
-
-def _make_scored_issue(
-    number: int = 1,
-    title: str = "Fix login bug",
-    issue_type: str = "bug",
-    score: int = 7,
-    ready: bool = True,
-    is_p0: bool = True,
-    is_blocked: bool = False,
-    assignees: list | None = None,
-    tags: set | None = None,
-    labels: list | None = None,
-    age_days: int = 3,
-) -> ScoredIssue:
-    return ScoredIssue(
-        number=number,
-        title=title,
-        body="",
-        labels=labels or [],
-        tags=tags or {"bug"},
-        assignees=assignees or [],
-        created_at=datetime.now(UTC),
-        issue_type=issue_type,
-        score=score,
-        scope=2,
-        acceptance=2,
-        alignment=3,
-        ready=ready,
-        age_days=age_days,
-        is_p0=is_p0,
-        is_blocked=is_blocked,
-    )
-
-
-# ── _extract_tags ─────────────────────────────────────────────────────────────
-
-
-class TestExtractTags:
-    def test_bracket_tags_in_title(self):
-        tags = _extract_tags("[Bug] Login fails", [])
-        assert "bug" in tags
-
-    def test_multiple_brackets(self):
-        tags = _extract_tags("[Bug][P0] Crash on startup", [])
-        assert "bug" in tags
-        assert "p0" in tags
-
-    def test_label_names(self):
-        tags = _extract_tags("Fix thing", ["security", "hotfix"])
-        assert "security" in tags
-        assert "hotfix" in tags
-
-    def test_labels_lowercased(self):
-        tags = _extract_tags("Title", ["Bug", "FEATURE"])
-        assert "bug" in tags
-        assert "feature" in tags
-
-    def test_empty_inputs(self):
-        tags = _extract_tags("", [])
-        assert tags == set()
-
-
-# ── Scoring functions ─────────────────────────────────────────────────────────
-
-
-class TestScoreScope:
-    def test_file_reference_adds_point(self):
-        score = _score_scope("Fix auth", "Edit src/timmy/auth.py", set())
-        assert score >= 1
-
-    def test_function_reference_adds_point(self):
-        score = _score_scope("Fix auth", "def validate_token()", set())
-        assert score >= 1
-
-    def test_short_title_adds_point(self):
-        score = _score_scope("Short title", "", set())
-        assert score >= 1
-
-    def test_meta_tag_penalizes(self):
-        score = _score_scope("Discussion about philosophy", "long body " * 5, {"philosophy"})
-        assert score <= 1
-
-    def test_max_score_3(self):
-        score = _score_scope("Fix auth", "src/auth.py\ndef login()", set())
-        assert score <= 3
-
-
-class TestScoreAcceptance:
-    def test_acceptance_keywords(self):
-        body = "should return 200\nmust pass tests\nexpect response"
-        score = _score_acceptance("Title", body, set())
-        assert score >= 2
-
-    def test_test_reference_adds_point(self):
-        score = _score_acceptance("Title", "Run tox -e unit", set())
-        assert score >= 1
-
-    def test_structured_sections(self):
-        body = "## Problem\nX\n## Solution\nY"
-        score = _score_acceptance("Title", body, set())
-        assert score >= 1
-
-    def test_meta_tag_penalizes(self):
-        score = _score_acceptance("Title", "should do something", {"philosophy"})
-        # still counts but penalized
-        assert score <= 2
-
-    def test_empty_body(self):
-        score = _score_acceptance("Title", "", set())
-        assert score == 0
-
-
-class TestScoreAlignment:
-    def test_bug_tags_score_max(self):
-        assert _score_alignment("", "", {"bug"}) == 3
-
-    def test_hotfix_tag_max(self):
-        assert _score_alignment("", "", {"hotfix"}) == 3
-
-    def test_refactor_tag(self):
-        score = _score_alignment("", "", {"refactor"})
-        assert score >= 2
-
-    def test_feature_tag(self):
-        score = _score_alignment("", "", {"feature"})
-        assert score >= 2
-
-    def test_meta_tags_zero(self):
-        assert _score_alignment("", "", {"philosophy"}) == 0
-
-    def test_loop_generated_bonus(self):
-        score = _score_alignment("", "", {"loop-generated"})
-        assert score >= 1
-
-
-# ── score_issue ───────────────────────────────────────────────────────────────
-
-
-class TestScoreIssue:
-    def test_bug_issue_classified_correctly(self):
-        raw = _make_raw_issue(labels=["bug"], title="[Bug] Crash on startup")
-        scored = score_issue(raw)
-        assert scored.issue_type == "bug"
-        assert scored.is_p0 is True
-
-    def test_feature_issue_classified(self):
-        raw = _make_raw_issue(labels=["feature"], title="Add voice support")
-        scored = score_issue(raw)
-        assert scored.issue_type == "feature"
-
-    def test_philosophy_issue_classified(self):
-        raw = _make_raw_issue(labels=["philosophy"], title="[Philosophy] Should Timmy sleep?")
-        scored = score_issue(raw)
-        assert scored.issue_type == "philosophy"
-
-    def test_research_issue_classified(self):
-        raw = _make_raw_issue(labels=["research"], title="Investigate model options")
-        scored = score_issue(raw)
-        assert scored.issue_type == "research"
-
-    def test_ready_flag_set_when_score_high(self):
-        body = (
-            "## Problem\nX breaks.\n## Solution\nFix src/timmy/agent.py def run()\n"
-            "should return True\nmust pass tox -e unit"
-        )
-        raw = _make_raw_issue(labels=["bug"], body=body)
-        scored = score_issue(raw)
-        assert scored.score >= READY_THRESHOLD
-        assert scored.ready is True
-
-    def test_is_blocked_detected_in_body(self):
-        raw = _make_raw_issue(body="This is blocked by issue #50")
-        scored = score_issue(raw)
-        assert scored.is_blocked is True
-
-    def test_is_blocked_detected_in_title(self):
-        raw = _make_raw_issue(title="[blocking] Cannot proceed")
-        scored = score_issue(raw)
-        # "blocking" in brackets becomes a tag
-        assert scored.is_blocked is True
-
-    def test_unassigned_when_no_assignees(self):
-        raw = _make_raw_issue(assignees=[])
-        scored = score_issue(raw)
-        assert scored.is_unassigned is True
-
-    def test_assigned_when_has_assignee(self):
-        raw = _make_raw_issue(assignees=["claude"])
-        scored = score_issue(raw)
-        assert scored.is_unassigned is False
-
-    def test_age_days_computed(self):
-        old_ts = "2026-01-01T00:00:00Z"
-        raw = _make_raw_issue(created_at=old_ts)
-        scored = score_issue(raw)
-        assert scored.age_days > 0
-
-    def test_needs_kimi_for_research_label(self):
-        raw = _make_raw_issue(labels=["kimi-ready"])
-        scored = score_issue(raw)
-        assert scored.needs_kimi is True
-
-
-# ── decide ────────────────────────────────────────────────────────────────────
-
-
-class TestDecide:
-    def test_philosophy_skipped(self):
-        issue = _make_scored_issue(issue_type="philosophy", tags={"philosophy"})
-        d = decide(issue)
-        assert d.action == "skip"
-        assert "philosophy" in d.reason.lower()
-
-    def test_assigned_issue_skipped(self):
-        issue = _make_scored_issue(assignees=["perplexity"])
-        d = decide(issue)
-        assert d.action == "skip"
-        assert "assigned" in d.reason.lower()
-
-    def test_low_score_skipped(self):
-        issue = _make_scored_issue(score=2, ready=False)
-        d = decide(issue)
-        assert d.action == "skip"
-        assert "threshold" in d.reason.lower()
-
-    def test_blocked_issue_flagged_for_alex(self):
-        issue = _make_scored_issue(is_blocked=True)
-        d = decide(issue)
-        assert d.action == "flag_alex"
-        assert d.agent == OWNER_LOGIN
-
-    def test_research_issue_assigned_kimi(self):
-        issue = _make_scored_issue(
-            issue_type="research",
-            tags={"research"},
-            is_p0=False,
-            is_blocked=False,
-        )
-        d = decide(issue)
-        assert d.action == "assign_kimi"
-        assert d.agent == AGENT_KIMI
-
-    def test_kimi_ready_label_assigns_kimi(self):
-        issue = _make_scored_issue(
-            issue_type="unknown",
-            tags={"kimi-ready"},
-            labels=["kimi-ready"],
-            is_p0=False,
-            is_blocked=False,
-        )
-        d = decide(issue)
-        assert d.action == "assign_kimi"
-
-    def test_p0_bug_assigns_claude(self):
-        issue = _make_scored_issue(issue_type="bug", is_p0=True, is_blocked=False)
-        d = decide(issue)
-        assert d.action == "assign_claude"
-        assert d.agent == AGENT_CLAUDE
-
-    def test_ready_feature_assigns_claude(self):
-        issue = _make_scored_issue(
-            issue_type="feature",
-            is_p0=False,
-            is_blocked=False,
-            tags={"feature"},
-        )
-        d = decide(issue)
-        assert d.action == "assign_claude"
-        assert d.agent == AGENT_CLAUDE
-
-    def test_decision_has_reason(self):
-        issue = _make_scored_issue()
-        d = decide(issue)
-        assert len(d.reason) > 10
-
-
-# ── _build_audit_comment ──────────────────────────────────────────────────────
-
-
-class TestBuildAuditComment:
-    def test_contains_timmy_triage_header(self):
-        d = TriageDecision(42, "assign_claude", "High priority bug", agent=AGENT_CLAUDE)
-        comment = _build_audit_comment(d)
-        assert "Timmy Triage" in comment
-
-    def test_contains_issue_reason(self):
-        d = TriageDecision(42, "assign_claude", "Urgent P0 bug", agent=AGENT_CLAUDE)
-        comment = _build_audit_comment(d)
-        assert "Urgent P0 bug" in comment
-
-    def test_assign_claude_mentions_agent(self):
-        d = TriageDecision(42, "assign_claude", "reason", agent=AGENT_CLAUDE)
-        comment = _build_audit_comment(d)
-        assert AGENT_CLAUDE in comment
-
-    def test_assign_kimi_mentions_label(self):
-        d = TriageDecision(42, "assign_kimi", "reason", agent=AGENT_KIMI)
-        comment = _build_audit_comment(d)
-        assert KIMI_READY_LABEL in comment
-
-    def test_flag_alex_mentions_owner(self):
-        d = TriageDecision(42, "flag_alex", "blocked", agent=OWNER_LOGIN)
-        comment = _build_audit_comment(d)
-        assert OWNER_LOGIN in comment
-
-    def test_contains_override_note(self):
-        d = TriageDecision(42, "assign_claude", "reason", agent=AGENT_CLAUDE)
-        comment = _build_audit_comment(d)
-        assert "override" in comment.lower()
-
-
-# ── _build_daily_summary ──────────────────────────────────────────────────────
-
-
-class TestBuildDailySummary:
-    def _make_result(self, decisions=None) -> TriageCycleResult:
-        return TriageCycleResult(
-            timestamp=datetime.now(UTC).isoformat(),
-            total_open=10,
-            scored=8,
-            ready=5,
-            decisions=decisions or [],
-        )
-
-    def test_contains_open_count(self):
-        result = self._make_result()
-        scored = [_make_scored_issue(number=i, ready=True, score=6) for i in range(1, 4)]
-        summary = _build_daily_summary(result, scored)
-        assert "10" in summary  # total_open
-
-    def test_contains_ready_count(self):
-        result = self._make_result()
-        summary = _build_daily_summary(result, [])
-        assert "5" in summary
-
-    def test_actions_taken_section(self):
-        decisions = [
-            TriageDecision(1, "assign_claude", "P0 bug", agent="claude", executed=True),
-        ]
-        result = self._make_result(decisions=decisions)
-        summary = _build_daily_summary(result, [])
-        assert "Actions Taken" in summary
-        assert "#1" in summary
-
-    def test_top_issues_listed(self):
-        scored = [_make_scored_issue(number=99, ready=True, score=8)]
-        result = self._make_result()
-        summary = _build_daily_summary(result, scored)
-        assert "#99" in summary
-
-    def test_footer_present(self):
-        summary = _build_daily_summary(self._make_result(), [])
-        assert "Auto-generated" in summary
-
-
-# ── BacklogTriageLoop ─────────────────────────────────────────────────────────
-
-
-class TestBacklogTriageLoop:
-    def test_default_interval_from_settings(self):
-        loop = BacklogTriageLoop()
-        from config import settings
-
-        assert loop._interval == float(settings.backlog_triage_interval_seconds)
-
-    def test_custom_interval(self):
-        loop = BacklogTriageLoop(interval=300)
-        assert loop._interval == 300.0
-
-    def test_dry_run_default(self):
-        loop = BacklogTriageLoop(dry_run=True)
-        assert loop._dry_run is True
-
-    def test_not_running_initially(self):
-        loop = BacklogTriageLoop()
-        assert loop.is_running is False
-
-    def test_stop_sets_running_false(self):
-        loop = BacklogTriageLoop()
-        loop._running = True
-        loop.stop()
-        assert loop._running is False
-
-    def test_cycle_count_starts_zero(self):
-        loop = BacklogTriageLoop()
-        assert loop.cycle_count == 0
-
-    @pytest.mark.asyncio
-    async def test_run_once_skips_when_no_gitea_token(self):
-        loop = BacklogTriageLoop()
-        mock_settings = MagicMock()
-        mock_settings.gitea_enabled = True
-        mock_settings.gitea_token = ""
-        mock_settings.backlog_triage_interval_seconds = 900
-        mock_settings.backlog_triage_dry_run = False
-        mock_settings.backlog_triage_daily_summary = False
-
-        with patch("timmy.backlog_triage.settings", mock_settings):
-            result = await loop.run_once()
-
-        assert result.total_open == 0
-
-    @pytest.mark.asyncio
-    async def test_run_once_dry_run_no_api_writes(self):
-        """In dry_run mode, decisions are made but no Gitea API writes happen."""
-        loop = BacklogTriageLoop(dry_run=True, daily_summary=False)
-
-        raw_issues = [
-            _make_raw_issue(
-                number=10,
-                title="Fix crash",
-                labels=["bug"],
-                body=(
-                    "## Problem\nCrash on login.\n## Solution\nFix src/auth.py "
-                    "def login()\nshould return 200\nmust pass tox tests"
-                ),
-            )
-        ]
-
-        mock_settings = MagicMock()
-        mock_settings.gitea_enabled = True
-        mock_settings.gitea_token = "fake-token"
-        mock_settings.gitea_repo = "owner/repo"
-        mock_settings.gitea_url = "http://gitea.local"
-        mock_settings.backlog_triage_interval_seconds = 900
-        mock_settings.backlog_triage_dry_run = True
-        mock_settings.backlog_triage_daily_summary = False
-
-        mock_client = AsyncMock()
-        mock_client.get.return_value = MagicMock(
-            status_code=200, json=MagicMock(return_value=raw_issues)
-        )
-
-        mock_ctx = AsyncMock()
-        mock_ctx.__aenter__.return_value = mock_client
-        mock_ctx.__aexit__.return_value = False
-
-        with (
-            patch("timmy.backlog_triage.settings", mock_settings),
-            patch("httpx.AsyncClient", return_value=mock_ctx),
-        ):
-            result = await loop.run_once()
-
-        # No POST/PATCH calls in dry run
-        mock_client.post.assert_not_called()
-        mock_client.patch.assert_not_called()
-
-        assert result.total_open == 1
-        assert loop.cycle_count == 1
-        assert len(loop.history) == 1
-
-    @pytest.mark.asyncio
-    async def test_run_once_assigns_unassigned_bug(self):
-        """Unassigned ready bug should be assigned to Claude with audit comment."""
-        loop = BacklogTriageLoop(dry_run=False, daily_summary=False)
-
-        body = (
-            "## Problem\nCrash on login.\n## Solution\nFix src/auth.py "
-            "def login()\nshould return 200\nmust pass tox tests"
-        )
-        raw_issues = [_make_raw_issue(number=5, title="Fix crash", labels=["bug"], body=body)]
-
-        mock_settings = MagicMock()
-        mock_settings.gitea_enabled = True
-        mock_settings.gitea_token = "fake-token"
-        mock_settings.gitea_repo = "owner/repo"
-        mock_settings.gitea_url = "http://gitea.local"
-        mock_settings.backlog_triage_interval_seconds = 900
-        mock_settings.backlog_triage_dry_run = False
-        mock_settings.backlog_triage_daily_summary = False
-
-        # GET /issues returns our issue
-        get_issues_resp = MagicMock(status_code=200)
-        get_issues_resp.json.return_value = raw_issues
-
-        # POST /comments returns success
-        comment_resp = MagicMock(status_code=201)
-        comment_resp.json.return_value = {"id": 1}
-
-        # PATCH /issues/{n} (assign) returns success
-        assign_resp = MagicMock(status_code=200)
-        assign_resp.json.return_value = {"number": 5}
-
-        mock_client = AsyncMock()
-        mock_client.get.return_value = get_issues_resp
-        mock_client.post.return_value = comment_resp
-        mock_client.patch.return_value = assign_resp
-
-        mock_ctx = AsyncMock()
-        mock_ctx.__aenter__.return_value = mock_client
-        mock_ctx.__aexit__.return_value = False
-
-        with (
-            patch("timmy.backlog_triage.settings", mock_settings),
-            patch("httpx.AsyncClient", return_value=mock_ctx),
-            patch("asyncio.sleep", new_callable=AsyncMock),
-        ):
-            result = await loop.run_once()
-
-        assert result.total_open == 1
-        # Comment should have been posted
-        mock_client.post.assert_called()
-        # Assign should have been called (PATCH)
-        mock_client.patch.assert_called()
-
-    @pytest.mark.asyncio
-    async def test_run_once_skips_already_assigned(self):
-        """Issues already assigned should not be acted upon."""
-        loop = BacklogTriageLoop(dry_run=False, daily_summary=False)
-
-        raw_issues = [
-            _make_raw_issue(
-                number=3,
-                labels=["bug"],
-                assignees=["perplexity"],
-                body="## Problem\nX\nmust pass tox\nshould return 200 at least 3 times",
-            )
-        ]
-
-        mock_settings = MagicMock()
-        mock_settings.gitea_enabled = True
-        mock_settings.gitea_token = "tok"
-        mock_settings.gitea_repo = "owner/repo"
-        mock_settings.gitea_url = "http://gitea.local"
-        mock_settings.backlog_triage_interval_seconds = 900
-        mock_settings.backlog_triage_dry_run = False
-        mock_settings.backlog_triage_daily_summary = False
-
-        get_resp = MagicMock(status_code=200)
-        get_resp.json.return_value = raw_issues
-
-        mock_client = AsyncMock()
-        mock_client.get.return_value = get_resp
-
-        mock_ctx = AsyncMock()
-        mock_ctx.__aenter__.return_value = mock_client
-        mock_ctx.__aexit__.return_value = False
-
-        with (
-            patch("timmy.backlog_triage.settings", mock_settings),
-            patch("httpx.AsyncClient", return_value=mock_ctx),
-        ):
-            result = await loop.run_once()
-
-        # No writes for already-assigned issue
-        mock_client.post.assert_not_called()
-        mock_client.patch.assert_not_called()
-        assert result.decisions[0].action == "skip"
-
-
-# ── ScoredIssue properties ────────────────────────────────────────────────────
-
-
-class TestScoredIssueProperties:
-    def test_is_unassigned_true_when_no_assignees(self):
-        issue = _make_scored_issue(assignees=[])
-        assert issue.is_unassigned is True
-
-    def test_is_unassigned_false_when_assigned(self):
-        issue = _make_scored_issue(assignees=["claude"])
-        assert issue.is_unassigned is False
-
-    def test_needs_kimi_for_research_tag(self):
-        issue = _make_scored_issue(tags={"research"})
-        assert issue.needs_kimi is True
-
-    def test_needs_kimi_for_kimi_ready_label(self):
-        issue = _make_scored_issue(labels=["kimi-ready"], tags=set())
-        assert issue.needs_kimi is True
-
-    def test_needs_kimi_false_for_bug(self):
-        issue = _make_scored_issue(tags={"bug"}, labels=[])
-        assert issue.needs_kimi is False