Compare commits

..

2 Commits

Author SHA1 Message Date
Alexander Whitestone
0b284972cb fix: correct complexity routing to not fall back to default model
Some checks failed
Tests / lint (pull_request) Failing after 17s
Tests / test (pull_request) Has been skipped
`_get_model_for_complexity` was calling `get_model_with_capability`,
which silently falls back to the provider default when no model has the
requested capability tag.  This caused the method to return a generic
model instead of None when neither the fallback chain nor any explicit
capability tag matched, misleading callers into skipping the provider
default logic.

Replace the call with an explicit next() comprehension that returns None
when no model explicitly carries the 'routine' or 'complex' capability.

Refs #1065

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-23 15:30:23 -04:00
Alexander Whitestone
6c5f55230b WIP: Claude Code progress on #1065
Automated salvage commit — agent session ended (exit 124).
Work in progress, may need continuation.
2026-03-23 14:41:42 -04:00
11 changed files with 724 additions and 1511 deletions

View File

@@ -25,6 +25,19 @@ providers:
tier: local
url: "http://localhost:11434"
models:
# ── Dual-model routing: Qwen3-8B (fast) + Qwen3-14B (quality) ──────────
# Both models fit simultaneously: ~6.6 GB + ~10.5 GB = ~17 GB combined.
# Requires OLLAMA_MAX_LOADED_MODELS=2 (set in .env) to stay hot.
# Ref: issue #1065 — Qwen3-8B/14B dual-model routing strategy
- name: qwen3:8b
context_window: 32768
capabilities: [text, tools, json, streaming, routine]
description: "Qwen3-8B Q6_K — fast router for routine tasks (~6.6 GB, 45-55 tok/s)"
- name: qwen3:14b
context_window: 40960
capabilities: [text, tools, json, streaming, complex, reasoning]
description: "Qwen3-14B Q5_K_M — complex reasoning and planning (~10.5 GB, 20-28 tok/s)"
# Text + Tools models
- name: qwen3:30b
default: true
@@ -187,6 +200,20 @@ fallback_chains:
- dolphin3 # base Dolphin 3.0 8B (uncensored, no custom system prompt)
- qwen3:30b # primary fallback — usually sufficient with a good system prompt
# ── Complexity-based routing chains (issue #1065) ───────────────────────
# Routine tasks: prefer Qwen3-8B for low latency (~45-55 tok/s)
routine:
- qwen3:8b # Primary fast model
- llama3.1:8b-instruct # Fallback fast model
- llama3.2:3b # Smallest available
# Complex tasks: prefer Qwen3-14B for quality (~20-28 tok/s)
complex:
- qwen3:14b # Primary quality model
- hermes4-14b # Native tool calling, hybrid reasoning
- qwen3:30b # Highest local quality
- qwen2.5:14b # Additional fallback
# ── Custom Models ───────────────────────────────────────────────────────────
# Register custom model weights for per-agent assignment.
# Supports GGUF (Ollama), safetensors, and HuggingFace checkpoint dirs.

View File

@@ -41,6 +41,13 @@ class Settings(BaseSettings):
# 4096 keeps memory at ~19GB. Set to 0 to use model defaults.
ollama_num_ctx: int = 4096
# Maximum models loaded simultaneously in Ollama — override with OLLAMA_MAX_LOADED_MODELS
# Set to 2 so Qwen3-8B and Qwen3-14B can stay hot concurrently (~17 GB combined).
# Requires Ollama ≥ 0.1.33. Export this to the Ollama process environment:
# OLLAMA_MAX_LOADED_MODELS=2 ollama serve
# or add it to your systemd/launchd unit before starting the harness.
ollama_max_loaded_models: int = 2
# Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
# as comma-separated strings, e.g. FALLBACK_MODELS="qwen3:30b,llama3.1"
# Or edit config/providers.yaml → fallback_chains for the canonical source.
@@ -304,16 +311,6 @@ class Settings(BaseSettings):
mcp_timeout: int = 15
mcp_bridge_timeout: int = 60 # HTTP timeout for MCP bridge Ollama calls (seconds)
# ── Backlog Triage Loop ────────────────────────────────────────────
# Autonomous loop: fetch open issues, score, assign to agents.
backlog_triage_enabled: bool = False
# Seconds between triage cycles (default: 15 minutes).
backlog_triage_interval_seconds: int = 900
# When True, score and summarize but don't write to Gitea.
backlog_triage_dry_run: bool = False
# Create a daily triage summary issue/comment.
backlog_triage_daily_summary: bool = True
# ── Loop QA (Self-Testing) ─────────────────────────────────────────
# Self-test orchestrator that probes capabilities alongside the thinking loop.
loop_qa_enabled: bool = True

View File

@@ -2,6 +2,7 @@
from .api import router
from .cascade import CascadeRouter, Provider, ProviderStatus, get_router
from .classifier import TaskComplexity, classify_task
from .history import HealthHistoryStore, get_history_store
__all__ = [
@@ -12,4 +13,6 @@ __all__ = [
"router",
"HealthHistoryStore",
"get_history_store",
"TaskComplexity",
"classify_task",
]

View File

@@ -528,6 +528,34 @@ class CascadeRouter:
return True
def _get_model_for_complexity(
self, provider: Provider, complexity: "TaskComplexity"
) -> str | None:
"""Return the best model on *provider* for the given complexity tier.
Checks fallback chains first (routine / complex), then falls back to
any model with the matching capability tag, then the provider default.
"""
from infrastructure.router.classifier import TaskComplexity
chain_key = "routine" if complexity == TaskComplexity.SIMPLE else "complex"
# Walk the capability fallback chain — first model present on this provider wins
for model_name in self.config.fallback_chains.get(chain_key, []):
if any(m["name"] == model_name for m in provider.models):
return model_name
# Direct capability lookup — only return if a model explicitly has the tag
# (do not use get_model_with_capability here as it falls back to the default)
cap_model = next(
(m["name"] for m in provider.models if chain_key in m.get("capabilities", [])),
None,
)
if cap_model:
return cap_model
return None # Caller will use provider default
async def complete(
self,
messages: list[dict],
@@ -535,6 +563,7 @@ class CascadeRouter:
temperature: float = 0.7,
max_tokens: int | None = None,
cascade_tier: str | None = None,
complexity_hint: str | None = None,
) -> dict:
"""Complete a chat conversation with automatic failover.
@@ -543,24 +572,48 @@ class CascadeRouter:
- Falls back to vision-capable models when needed
- Supports image URLs, paths, and base64 encoding
Complexity-based routing (issue #1065):
- ``complexity_hint="simple"`` → routes to Qwen3-8B (low-latency)
- ``complexity_hint="complex"`` → routes to Qwen3-14B (quality)
- ``complexity_hint=None`` (default) → auto-classifies from messages
Args:
messages: List of message dicts with role and content
model: Preferred model (tries this first, then provider defaults)
model: Preferred model (tries this first; complexity routing is
skipped when an explicit model is given)
temperature: Sampling temperature
max_tokens: Maximum tokens to generate
cascade_tier: If specified, filters providers by this tier.
- "frontier_required": Uses only Anthropic provider for top-tier models.
complexity_hint: "simple", "complex", or None (auto-detect).
Returns:
Dict with content, provider_used, and metrics
Dict with content, provider_used, model, latency_ms,
is_fallback_model, and complexity fields.
Raises:
RuntimeError: If all providers fail
"""
from infrastructure.router.classifier import TaskComplexity, classify_task
content_type = self._detect_content_type(messages)
if content_type != ContentType.TEXT:
logger.debug("Detected %s content, selecting appropriate model", content_type.value)
# Resolve task complexity ─────────────────────────────────────────────
# Skip complexity routing when caller explicitly specifies a model.
complexity: TaskComplexity | None = None
if model is None:
if complexity_hint is not None:
try:
complexity = TaskComplexity(complexity_hint.lower())
except ValueError:
logger.warning("Unknown complexity_hint %r, auto-classifying", complexity_hint)
complexity = classify_task(messages)
else:
complexity = classify_task(messages)
logger.debug("Task complexity: %s", complexity.value)
errors = []
providers = self.providers
@@ -573,7 +626,6 @@ class CascadeRouter:
if not providers:
raise RuntimeError(f"No providers found for tier: {cascade_tier}")
for provider in providers:
if not self._is_provider_available(provider):
continue
@@ -587,7 +639,21 @@ class CascadeRouter:
)
continue
selected_model, is_fallback_model = self._select_model(provider, model, content_type)
# Complexity-based model selection (only when no explicit model) ──
effective_model = model
if effective_model is None and complexity is not None:
effective_model = self._get_model_for_complexity(provider, complexity)
if effective_model:
logger.debug(
"Complexity routing [%s]: %s%s",
complexity.value,
provider.name,
effective_model,
)
selected_model, is_fallback_model = self._select_model(
provider, effective_model, content_type
)
try:
result = await self._attempt_with_retry(
@@ -610,6 +676,7 @@ class CascadeRouter:
"model": result.get("model", selected_model or provider.get_default_model()),
"latency_ms": result.get("latency_ms", 0),
"is_fallback_model": is_fallback_model,
"complexity": complexity.value if complexity is not None else None,
}
raise RuntimeError(f"All providers failed: {'; '.join(errors)}")

View File

@@ -0,0 +1,166 @@
"""Task complexity classifier for Qwen3 dual-model routing.
Classifies incoming tasks as SIMPLE (route to Qwen3-8B for low-latency)
or COMPLEX (route to Qwen3-14B for quality-sensitive work).
Classification is fully heuristic — no LLM inference required.
"""
import re
from enum import Enum
class TaskComplexity(Enum):
"""Task complexity tier for model routing."""
SIMPLE = "simple" # Qwen3-8B Q6_K: routine, latency-sensitive
COMPLEX = "complex" # Qwen3-14B Q5_K_M: quality-sensitive, multi-step
# Keywords strongly associated with complex tasks
_COMPLEX_KEYWORDS: frozenset[str] = frozenset(
[
"plan",
"review",
"analyze",
"analyse",
"triage",
"refactor",
"design",
"architecture",
"implement",
"compare",
"debug",
"explain",
"prioritize",
"prioritise",
"strategy",
"optimize",
"optimise",
"evaluate",
"assess",
"brainstorm",
"outline",
"summarize",
"summarise",
"generate code",
"write a",
"write the",
"code review",
"pull request",
"multi-step",
"multi step",
"step by step",
"backlog prioriti",
"issue triage",
"root cause",
"how does",
"why does",
"what are the",
]
)
# Keywords strongly associated with simple/routine tasks
_SIMPLE_KEYWORDS: frozenset[str] = frozenset(
[
"status",
"list ",
"show ",
"what is",
"how many",
"ping",
"run ",
"execute ",
"ls ",
"cat ",
"ps ",
"fetch ",
"count ",
"tail ",
"head ",
"grep ",
"find file",
"read file",
"get ",
"query ",
"check ",
"yes",
"no",
"ok",
"done",
"thanks",
]
)
# Content longer than this is treated as complex regardless of keywords
_COMPLEX_CHAR_THRESHOLD = 500
# Short content defaults to simple
_SIMPLE_CHAR_THRESHOLD = 150
# More than this many messages suggests an ongoing complex conversation
_COMPLEX_CONVERSATION_DEPTH = 6
def classify_task(messages: list[dict]) -> TaskComplexity:
"""Classify task complexity from a list of messages.
Uses heuristic rules — no LLM call required. Errs toward COMPLEX
when uncertain so that quality is preserved.
Args:
messages: List of message dicts with ``role`` and ``content`` keys.
Returns:
TaskComplexity.SIMPLE or TaskComplexity.COMPLEX
"""
if not messages:
return TaskComplexity.SIMPLE
# Concatenate all user-turn content for analysis
user_content = " ".join(
msg.get("content", "")
for msg in messages
if msg.get("role") in ("user", "human")
and isinstance(msg.get("content"), str)
).lower().strip()
if not user_content:
return TaskComplexity.SIMPLE
# Complexity signals override everything -----------------------------------
# Explicit complex keywords
for kw in _COMPLEX_KEYWORDS:
if kw in user_content:
return TaskComplexity.COMPLEX
# Numbered / multi-step instruction list: "1. do this 2. do that"
if re.search(r"\b\d+\.\s+\w", user_content):
return TaskComplexity.COMPLEX
# Code blocks embedded in messages
if "```" in user_content:
return TaskComplexity.COMPLEX
# Long content → complex reasoning likely required
if len(user_content) > _COMPLEX_CHAR_THRESHOLD:
return TaskComplexity.COMPLEX
# Deep conversation → complex ongoing task
if len(messages) > _COMPLEX_CONVERSATION_DEPTH:
return TaskComplexity.COMPLEX
# Simplicity signals -------------------------------------------------------
# Explicit simple keywords
for kw in _SIMPLE_KEYWORDS:
if kw in user_content:
return TaskComplexity.SIMPLE
# Short single-sentence messages default to simple
if len(user_content) <= _SIMPLE_CHAR_THRESHOLD:
return TaskComplexity.SIMPLE
# When uncertain, prefer quality (complex model)
return TaskComplexity.COMPLEX

View File

@@ -1,759 +0,0 @@
"""Autonomous backlog triage loop — Timmy scans Gitea and assigns work.
Continuously fetches open issues, scores/prioritizes them, and decides
what to work on next without waiting to be asked.
Loop flow::
while true:
1. Fetch all open issues from Gitea API
2. Score/prioritize by labels, age, type, blocked status
3. Identify unassigned high-priority items
4. Decide: assign to claude, dispatch to kimi, or flag for Alex
5. Execute the assignment (comment + assign)
6. Optionally post a daily triage summary
7. Sleep for configurable interval (default 15 min)
Priority tiers:
P0 — security, data loss, blocking bugs → immediate action
P1 — core functionality, ready issues → next sprint
P2 — improvements, low-score issues → backlog
P3 — philosophy, meta → someday/never (skip in triage)
Usage::
from timmy.backlog_triage import BacklogTriageLoop
loop = BacklogTriageLoop()
await loop.run_once() # single triage cycle
await loop.start() # background daemon loop
loop.stop() # graceful shutdown
"""
from __future__ import annotations
import asyncio
import logging
import re
from dataclasses import dataclass, field
from datetime import UTC, datetime, timedelta
from typing import Any
import httpx
from config import settings
logger = logging.getLogger(__name__)
# ── Constants ────────────────────────────────────────────────────────────────
# Minimum triage score to be considered "ready" for assignment
READY_THRESHOLD = 5
# Agent Gitea logins
AGENT_CLAUDE = "claude"
AGENT_KIMI = "kimi"
OWNER_LOGIN = "rockachopa" # Alex — human owner
# Labels
KIMI_READY_LABEL = "kimi-ready"
TRIAGE_DONE_LABEL = "triage-done"
# Tag sets (mirrors scripts/triage_score.py)
_BUG_TAGS = frozenset({"bug", "broken", "crash", "error", "fix", "regression", "hotfix"})
_FEATURE_TAGS = frozenset({"feature", "feat", "enhancement", "capability", "timmy-capability"})
_REFACTOR_TAGS = frozenset({"refactor", "cleanup", "tech-debt", "optimization", "perf"})
_META_TAGS = frozenset({"philosophy", "soul-gap", "discussion", "question", "rfc"})
_P0_TAGS = frozenset({"security", "data-loss", "blocking", "p0", "critical"})
_RESEARCH_TAGS = frozenset({"research", "kimi-ready", "investigation", "spike"})
_LOOP_TAG = "loop-generated"
# Regex patterns for scoring
_TAG_RE = re.compile(r"\[([^\]]+)\]")
_FILE_RE = re.compile(r"(?:src/|tests/|scripts/|\.py|\.html|\.js|\.yaml|\.toml|\.sh)", re.IGNORECASE)
_FUNC_RE = re.compile(r"(?:def |class |function |method |`\w+\(\)`)", re.IGNORECASE)
_ACCEPT_RE = re.compile(
r"(?:should|must|expect|verify|assert|test.?case|acceptance|criteria"
r"|pass(?:es|ing)|fail(?:s|ing)|return(?:s)?|raise(?:s)?)",
re.IGNORECASE,
)
_TEST_RE = re.compile(r"(?:tox|pytest|test_\w+|\.test\.|assert\s)", re.IGNORECASE)
_BLOCKED_RE = re.compile(r"\bblock(?:ed|s|ing)\b", re.IGNORECASE)
# ── Data types ───────────────────────────────────────────────────────────────
@dataclass
class ScoredIssue:
"""A Gitea issue enriched with triage scoring."""
number: int
title: str
body: str
labels: list[str]
tags: set[str]
assignees: list[str]
created_at: datetime
issue_type: str # bug | feature | refactor | philosophy | research | unknown
score: int = 0
scope: int = 0
acceptance: int = 0
alignment: int = 0
ready: bool = False
age_days: int = 0
is_p0: bool = False
is_blocked: bool = False
@property
def is_unassigned(self) -> bool:
return len(self.assignees) == 0
@property
def needs_kimi(self) -> bool:
return bool(self.tags & _RESEARCH_TAGS) or KIMI_READY_LABEL in self.labels
@dataclass
class TriageDecision:
"""The outcome of a triage decision for a single issue."""
issue_number: int
action: str # "assign_claude" | "assign_kimi" | "flag_alex" | "skip"
reason: str
agent: str = "" # the agent assigned (login)
executed: bool = False
error: str = ""
@dataclass
class TriageCycleResult:
"""Summary of one complete triage cycle."""
timestamp: str
total_open: int
scored: int
ready: int
decisions: list[TriageDecision] = field(default_factory=list)
errors: list[str] = field(default_factory=list)
duration_ms: int = 0
# ── Scoring ──────────────────────────────────────────────────────────────────
def _extract_tags(title: str, labels: list[str]) -> set[str]:
"""Pull tags from [bracket] title notation + Gitea label names."""
tags: set[str] = set()
for m in _TAG_RE.finditer(title):
tags.add(m.group(1).lower().strip())
for lbl in labels:
tags.add(lbl.lower().strip())
return tags
def _score_scope(title: str, body: str, tags: set[str]) -> int:
"""03: How well-scoped is this issue?"""
text = f"{title}\n{body}"
score = 0
if _FILE_RE.search(text):
score += 1
if _FUNC_RE.search(text):
score += 1
clean = _TAG_RE.sub("", title).strip()
if len(clean) < 80:
score += 1
if tags & _META_TAGS:
score = max(0, score - 2)
return min(3, score)
def _score_acceptance(title: str, body: str, tags: set[str]) -> int:
"""03: Does this have clear acceptance criteria?"""
text = f"{title}\n{body}"
score = 0
matches = len(_ACCEPT_RE.findall(text))
if matches >= 3:
score += 2
elif matches >= 1:
score += 1
if _TEST_RE.search(text):
score += 1
if re.search(r"##\s*(problem|solution|expected|actual|steps)", body, re.IGNORECASE):
score += 1
if tags & _META_TAGS:
score = max(0, score - 1)
return min(3, score)
def _score_alignment(title: str, body: str, tags: set[str]) -> int:
"""03: How aligned is this with the north star?"""
score = 0
if tags & _BUG_TAGS:
return 3
if tags & _REFACTOR_TAGS:
score += 2
if tags & _FEATURE_TAGS:
score += 2
if _LOOP_TAG in tags:
score += 1
if tags & _META_TAGS:
score = 0
return min(3, score)
def score_issue(issue: dict[str, Any]) -> ScoredIssue:
"""Score and classify a raw Gitea issue dict."""
number = issue["number"]
title = issue.get("title", "")
body = issue.get("body") or ""
label_names = [lbl["name"] for lbl in issue.get("labels", [])]
tags = _extract_tags(title, label_names)
assignees = [a["login"] for a in issue.get("assignees", [])]
# Parse created_at
raw_ts = issue.get("created_at", "")
try:
created_at = datetime.fromisoformat(raw_ts.replace("Z", "+00:00"))
except (ValueError, AttributeError):
created_at = datetime.now(UTC)
age_days = (datetime.now(UTC) - created_at).days
# Scores
scope = _score_scope(title, body, tags)
acceptance = _score_acceptance(title, body, tags)
alignment = _score_alignment(title, body, tags)
total = scope + acceptance + alignment
# Classify
if tags & _BUG_TAGS:
issue_type = "bug"
elif tags & _RESEARCH_TAGS:
issue_type = "research"
elif tags & _FEATURE_TAGS:
issue_type = "feature"
elif tags & _REFACTOR_TAGS:
issue_type = "refactor"
elif tags & _META_TAGS:
issue_type = "philosophy"
else:
issue_type = "unknown"
is_p0 = bool(tags & _P0_TAGS) or issue_type == "bug"
is_blocked = bool(_BLOCKED_RE.search(title) or _BLOCKED_RE.search(body))
return ScoredIssue(
number=number,
title=_TAG_RE.sub("", title).strip(),
body=body,
labels=label_names,
tags=tags,
assignees=assignees,
created_at=created_at,
issue_type=issue_type,
score=total,
scope=scope,
acceptance=acceptance,
alignment=alignment,
ready=total >= READY_THRESHOLD,
age_days=age_days,
is_p0=is_p0,
is_blocked=is_blocked,
)
# ── Decision logic ───────────────────────────────────────────────────────────
def decide(issue: ScoredIssue) -> TriageDecision:
"""Decide what to do with an issue.
Returns a TriageDecision with action, reason, and agent.
Decision is not yet executed — call execute_decision() for that.
"""
num = issue.number
# Skip philosophy/meta — not dev-actionable
if issue.issue_type == "philosophy":
return TriageDecision(
issue_number=num,
action="skip",
reason="Philosophy/meta issue — not dev-actionable in the triage loop.",
)
# Skip already-assigned issues
if not issue.is_unassigned:
return TriageDecision(
issue_number=num,
action="skip",
reason=f"Already assigned to: {', '.join(issue.assignees)}.",
)
# Skip if not ready (low score)
if not issue.ready:
return TriageDecision(
issue_number=num,
action="skip",
reason=f"Score {issue.score} < {READY_THRESHOLD} threshold — needs more detail before assignment.",
)
# Blocked: flag for Alex
if issue.is_blocked:
return TriageDecision(
issue_number=num,
action="flag_alex",
agent=OWNER_LOGIN,
reason=(
"Issue appears blocked. Flagging for @rockachopa to unblock before autonomous assignment."
),
)
# Research / Kimi-ready
if issue.needs_kimi:
return TriageDecision(
issue_number=num,
action="assign_kimi",
agent=AGENT_KIMI,
reason=(
f"Issue type '{issue.issue_type}' with research/investigation scope. "
f"Assigning kimi-ready label for Kimi agent to pick up."
),
)
# P0 bugs and blocking issues → Claude immediately
if issue.is_p0:
return TriageDecision(
issue_number=num,
action="assign_claude",
agent=AGENT_CLAUDE,
reason=(
f"P0/{issue.issue_type} issue (score={issue.score}, age={issue.age_days}d). "
f"Assigning to Claude Code for immediate attention."
),
)
# Everything else that is ready → Claude Code
return TriageDecision(
issue_number=num,
action="assign_claude",
agent=AGENT_CLAUDE,
reason=(
f"Unassigned ready issue (type={issue.issue_type}, score={issue.score}, "
f"age={issue.age_days}d). Assigning to Claude Code."
),
)
# ── Gitea API client ─────────────────────────────────────────────────────────
def _api_headers() -> dict[str, str]:
return {
"Authorization": f"token {settings.gitea_token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
def _repo_url(path: str) -> str:
owner, repo = settings.gitea_repo.split("/", 1)
return f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/{path}"
async def fetch_open_issues(client: httpx.AsyncClient) -> list[dict[str, Any]]:
"""Fetch all open issues from Gitea, paginating as needed."""
all_issues: list[dict[str, Any]] = []
page = 1
while True:
url = _repo_url(f"issues?state=open&type=issues&limit=50&page={page}")
try:
resp = await client.get(url, headers=_api_headers())
if resp.status_code != 200:
logger.warning("Gitea issues fetch failed (HTTP %s)", resp.status_code)
break
batch: list[dict[str, Any]] = resp.json()
if not batch:
break
all_issues.extend(batch)
if len(batch) < 50:
break
page += 1
except (httpx.ConnectError, httpx.ReadError, httpx.TimeoutException) as exc:
logger.warning("Gitea connection error fetching issues: %s", exc)
break
return all_issues
async def post_comment(
client: httpx.AsyncClient,
issue_number: int,
body: str,
) -> bool:
"""Post a comment on a Gitea issue. Returns True on success."""
url = _repo_url(f"issues/{issue_number}/comments")
try:
resp = await client.post(url, headers=_api_headers(), json={"body": body})
return resp.status_code in (200, 201)
except (httpx.ConnectError, httpx.ReadError, httpx.TimeoutException) as exc:
logger.warning("Failed to post comment on #%d: %s", issue_number, exc)
return False
async def assign_issue(
client: httpx.AsyncClient,
issue_number: int,
assignee: str,
) -> bool:
"""Assign an issue to a Gitea user. Returns True on success."""
url = _repo_url(f"issues/{issue_number}")
try:
resp = await client.patch(
url,
headers=_api_headers(),
json={"assignees": [assignee]},
)
return resp.status_code in (200, 201)
except (httpx.ConnectError, httpx.ReadError, httpx.TimeoutException) as exc:
logger.warning("Failed to assign #%d to %s: %s", issue_number, assignee, exc)
return False
async def add_label(
client: httpx.AsyncClient,
issue_number: int,
label_name: str,
) -> bool:
"""Add a label to a Gitea issue by name (auto-creates if missing). Returns True on success."""
owner, repo = settings.gitea_repo.split("/", 1)
labels_url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/labels"
headers = _api_headers()
try:
# Fetch existing labels
resp = await client.get(labels_url, headers=headers)
if resp.status_code != 200:
return False
existing = {lbl["name"]: lbl["id"] for lbl in resp.json()}
if label_name in existing:
label_id = existing[label_name]
else:
# Auto-create the label
create_resp = await client.post(
labels_url,
headers=headers,
json={"name": label_name, "color": "#006b75"},
)
if create_resp.status_code not in (200, 201):
return False
label_id = create_resp.json()["id"]
# Apply to the issue
apply_url = _repo_url(f"issues/{issue_number}/labels")
apply_resp = await client.post(
apply_url, headers=headers, json={"labels": [label_id]}
)
return apply_resp.status_code in (200, 201)
except (httpx.ConnectError, httpx.ReadError, httpx.TimeoutException) as exc:
logger.warning("Failed to add label %r to #%d: %s", label_name, issue_number, exc)
return False
# ── Decision execution ───────────────────────────────────────────────────────
async def execute_decision(
client: httpx.AsyncClient,
decision: TriageDecision,
dry_run: bool = False,
) -> TriageDecision:
"""Execute a triage decision — comment + assign/label.
When dry_run=True, logs the decision but makes no Gitea API calls.
Returns the updated decision with executed=True on success.
"""
num = decision.issue_number
if decision.action == "skip":
logger.debug("Triage skip #%d: %s", num, decision.reason)
decision.executed = True
return decision
audit_comment = _build_audit_comment(decision)
if dry_run:
logger.info(
"[DRY RUN] #%d%s (%s): %s",
num,
decision.action,
decision.agent,
decision.reason,
)
decision.executed = True
return decision
# Post audit comment first (always, so Alex can see reasoning)
comment_ok = await post_comment(client, num, audit_comment)
if not comment_ok:
decision.error = "Failed to post audit comment"
logger.warning("Triage #%d: comment failed", num)
return decision
# Execute assignment
ok = False
if decision.action == "assign_claude":
ok = await assign_issue(client, num, AGENT_CLAUDE)
elif decision.action == "assign_kimi":
ok = await add_label(client, num, KIMI_READY_LABEL)
elif decision.action == "flag_alex":
# Comment already posted above — that's sufficient for flagging
ok = True
if ok:
decision.executed = True
logger.info("Triage #%d%s OK", num, decision.action)
else:
decision.error = f"Action {decision.action!r} failed"
logger.warning("Triage #%d: action %r failed", num, decision.action)
return decision
def _build_audit_comment(decision: TriageDecision) -> str:
"""Build the audit trail comment that Alex can read to see reasoning."""
ts = datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC")
action_text = {
"assign_claude": f"Assigning to @{AGENT_CLAUDE} for implementation.",
"assign_kimi": f"Adding `{KIMI_READY_LABEL}` label — queuing for Kimi research agent.",
"flag_alex": f"Flagging for @{OWNER_LOGIN} — issue appears blocked or needs human decision.",
}.get(decision.action, decision.action)
return (
f"**[Timmy Triage — {ts}]**\n\n"
f"**Decision:** {action_text}\n\n"
f"**Why:** {decision.reason}\n\n"
f"*Autonomous triage by Timmy. Reply to override.*"
)
# ── Daily summary ─────────────────────────────────────────────────────────────
def _build_daily_summary(result: TriageCycleResult, scored: list[ScoredIssue]) -> str:
"""Build the daily triage summary body."""
now = datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC")
assigned = [d for d in result.decisions if d.executed and d.action != "skip"]
skipped = [d for d in result.decisions if d.action == "skip"]
lines = [
f"# Timmy Backlog Triage — {now}",
"",
f"**Open issues:** {result.total_open} | "
f"**Scored:** {result.scored} | "
f"**Ready:** {result.ready} | "
f"**Assigned this cycle:** {len(assigned)}",
"",
"## Top 10 Ready Issues (by score)",
"",
]
top = sorted([s for s in scored if s.ready], key=lambda s: (-s.score, s.number))[:10]
for s in top:
flag = "🐛" if s.issue_type == "bug" else "" if s.is_p0 else ""
lines.append(
f"- {flag} **#{s.number}** (score={s.score}, age={s.age_days}d) — {s.title[:80]}"
)
if assigned:
lines += ["", "## Actions Taken", ""]
for d in assigned:
lines.append(f"- #{d.issue_number} → `{d.action}` ({d.agent}): {d.reason[:100]}")
if skipped:
lines += ["", f"## Skipped ({len(skipped)} issues)", ""]
for d in skipped[:5]:
lines.append(f"- #{d.issue_number}: {d.reason[:80]}")
if len(skipped) > 5:
lines.append(f"- … and {len(skipped) - 5} more")
lines += [
"",
"---",
"*Auto-generated by Timmy's backlog triage loop. "
"Override any decision by reassigning or commenting.*",
]
return "\n".join(lines)
async def post_daily_summary(
client: httpx.AsyncClient,
result: TriageCycleResult,
scored: list[ScoredIssue],
dry_run: bool = False,
) -> bool:
"""Post a daily triage summary as a new Gitea issue."""
today = datetime.now(UTC).strftime("%Y-%m-%d")
title = f"[Triage] Daily backlog summary — {today}"
body = _build_daily_summary(result, scored)
if dry_run:
logger.info("[DRY RUN] Would post daily summary: %s", title)
return True
url = _repo_url("issues")
try:
resp = await client.post(
url,
headers=_api_headers(),
json={
"title": title,
"body": body,
"labels": [],
},
)
if resp.status_code in (200, 201):
issue_num = resp.json().get("number", "?")
logger.info("Daily triage summary posted as issue #%s", issue_num)
return True
logger.warning("Daily summary post failed (HTTP %s)", resp.status_code)
return False
except (httpx.ConnectError, httpx.ReadError, httpx.TimeoutException) as exc:
logger.warning("Failed to post daily summary: %s", exc)
return False
# ── Main loop class ───────────────────────────────────────────────────────────
class BacklogTriageLoop:
"""Autonomous backlog triage loop.
Fetches, scores, and assigns Gitea issues on a configurable interval.
Parameters
----------
interval:
Seconds between triage cycles. Default: settings.backlog_triage_interval_seconds.
dry_run:
When True, score and log decisions but don't write to Gitea.
daily_summary:
When True, post a daily triage summary issue after each cycle.
"""
def __init__(
self,
*,
interval: float | None = None,
dry_run: bool | None = None,
daily_summary: bool | None = None,
) -> None:
self._interval = float(interval or settings.backlog_triage_interval_seconds)
self._dry_run = dry_run if dry_run is not None else settings.backlog_triage_dry_run
self._daily_summary = (
daily_summary if daily_summary is not None else settings.backlog_triage_daily_summary
)
self._running = False
self._task: asyncio.Task | None = None
self._cycle_count = 0
self._last_summary_date: str = ""
self.history: list[TriageCycleResult] = []
@property
def is_running(self) -> bool:
return self._running
@property
def cycle_count(self) -> int:
return self._cycle_count
async def run_once(self) -> TriageCycleResult:
"""Execute one full triage cycle.
1. Fetch all open Gitea issues
2. Score and prioritize
3. Decide on each unassigned ready issue
4. Execute decisions
5. Optionally post daily summary
"""
import time
self._cycle_count += 1
start = time.monotonic()
ts = datetime.now(UTC).isoformat()
result = TriageCycleResult(timestamp=ts, total_open=0, scored=0, ready=0)
if not settings.gitea_enabled or not settings.gitea_token:
logger.warning("Backlog triage: Gitea not configured — skipping cycle")
return result
async with httpx.AsyncClient(timeout=30) as client:
# 1. Fetch
raw_issues = await fetch_open_issues(client)
result.total_open = len(raw_issues)
logger.info("Triage cycle #%d: fetched %d open issues", self._cycle_count, len(raw_issues))
# 2. Score
scored = [score_issue(i) for i in raw_issues]
result.scored = len(scored)
result.ready = sum(1 for s in scored if s.ready)
# 3 & 4. Decide and execute for each issue
for issue in scored:
decision = decide(issue)
if decision.action == "skip":
result.decisions.append(decision)
continue
decision = await execute_decision(client, decision, dry_run=self._dry_run)
result.decisions.append(decision)
# Rate-limit: short pause between API writes to avoid hammering Gitea
if not self._dry_run:
await asyncio.sleep(0.5)
# 5. Daily summary (once per UTC day)
today = datetime.now(UTC).strftime("%Y-%m-%d")
if self._daily_summary and today != self._last_summary_date:
await post_daily_summary(client, result, scored, dry_run=self._dry_run)
self._last_summary_date = today
result.duration_ms = int((time.monotonic() - start) * 1000)
self.history.append(result)
assigned_count = sum(1 for d in result.decisions if d.executed and d.action != "skip")
logger.info(
"Triage cycle #%d complete (%d ms): %d open, %d ready, %d assigned",
self._cycle_count,
result.duration_ms,
result.total_open,
result.ready,
assigned_count,
)
return result
async def start(self) -> None:
"""Start the triage loop as a background task."""
if self._running:
logger.warning("BacklogTriageLoop already running")
return
self._running = True
await self._loop()
async def _loop(self) -> None:
logger.info(
"BacklogTriageLoop started (interval=%.0fs, dry_run=%s)",
self._interval,
self._dry_run,
)
while self._running:
try:
await self.run_once()
except Exception:
logger.exception("Backlog triage cycle failed")
await asyncio.sleep(self._interval)
def stop(self) -> None:
"""Signal the loop to stop after the current cycle."""
self._running = False
logger.info("BacklogTriageLoop stop requested")

View File

@@ -399,74 +399,6 @@ class MCPBridge:
logger.warning("Tool '%s' execution failed: %s", name, exc)
return f"Error executing {name}: {exc}"
@staticmethod
def _build_initial_messages(
prompt: str, system_prompt: str | None
) -> list[dict]:
"""Build the initial message list for a run."""
messages: list[dict] = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
return messages
async def _process_round_tool_calls(
self,
messages: list[dict],
model_tool_calls: list[dict],
rounds: int,
tool_calls_made: list[dict],
) -> None:
"""Execute all tool calls in one round, appending results to messages."""
for tc in model_tool_calls:
func = tc.get("function", {})
tool_name = func.get("name", "unknown")
tool_args = func.get("arguments", {})
logger.info(
"Bridge tool call [round %d]: %s(%s)",
rounds,
tool_name,
tool_args,
)
result = await self._execute_tool_call(tc)
tool_calls_made.append(
{
"round": rounds,
"tool": tool_name,
"arguments": tool_args,
"result": result[:500], # Truncate for logging
}
)
messages.append({"role": "tool", "content": result})
async def _run_tool_loop(
self, messages: list[dict], tools: list[dict]
) -> tuple[str, list[dict], int, str]:
"""Run the tool-call loop until final response or max rounds reached.
Returns:
Tuple of (content, tool_calls_made, rounds, error).
"""
tool_calls_made: list[dict] = []
rounds = 0
for round_num in range(self.max_rounds):
rounds = round_num + 1
response = await self._chat(messages, tools)
msg = response.get("message", {})
model_tool_calls = msg.get("tool_calls", [])
if not model_tool_calls:
return msg.get("content", ""), tool_calls_made, rounds, ""
messages.append(msg)
await self._process_round_tool_calls(
messages, model_tool_calls, rounds, tool_calls_made
)
error = f"Exceeded maximum of {self.max_rounds} tool-call rounds"
return "(max tool-call rounds reached)", tool_calls_made, rounds, error
async def run(
self,
prompt: str,
@@ -487,37 +419,115 @@ class MCPBridge:
BridgeResult with the final response and tool call history.
"""
start = time.time()
messages = self._build_initial_messages(prompt, system_prompt)
messages: list[dict] = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
tools = self._build_ollama_tools()
tool_calls_made: list[dict] = []
rounds = 0
error_msg = ""
try:
content, tool_calls_made, rounds, error_msg = await self._run_tool_loop(
messages, tools
)
except httpx.ConnectError as exc:
logger.warning("Ollama connection failed: %s", exc)
error_msg = f"Ollama connection failed: {exc}"
content = ""
except httpx.HTTPStatusError as exc:
logger.warning("Ollama HTTP error: %s", exc)
error_msg = f"Ollama HTTP error: {exc.response.status_code}"
content = ""
except Exception as exc:
logger.error("MCPBridge run failed: %s", exc)
error_msg = str(exc)
content = ""
for round_num in range(self.max_rounds):
rounds = round_num + 1
response = await self._chat(messages, tools)
msg = response.get("message", {})
return BridgeResult(
content=content,
tool_calls_made=tool_calls_made,
rounds=rounds,
latency_ms=(time.time() - start) * 1000,
model=self.model,
error=error_msg,
)
# Check if model made tool calls
model_tool_calls = msg.get("tool_calls", [])
if not model_tool_calls:
# Final text response — done.
content = msg.get("content", "")
latency = (time.time() - start) * 1000
return BridgeResult(
content=content,
tool_calls_made=tool_calls_made,
rounds=rounds,
latency_ms=latency,
model=self.model,
)
# Append the assistant message (with tool_calls) to history
messages.append(msg)
# Execute each tool call and add results
for tc in model_tool_calls:
func = tc.get("function", {})
tool_name = func.get("name", "unknown")
tool_args = func.get("arguments", {})
logger.info(
"Bridge tool call [round %d]: %s(%s)",
rounds,
tool_name,
tool_args,
)
result = await self._execute_tool_call(tc)
tool_calls_made.append(
{
"round": rounds,
"tool": tool_name,
"arguments": tool_args,
"result": result[:500], # Truncate for logging
}
)
# Add tool result to message history
messages.append(
{
"role": "tool",
"content": result,
}
)
# Hit max rounds
latency = (time.time() - start) * 1000
return BridgeResult(
content="(max tool-call rounds reached)",
tool_calls_made=tool_calls_made,
rounds=rounds,
latency_ms=latency,
model=self.model,
error=f"Exceeded maximum of {self.max_rounds} tool-call rounds",
)
except httpx.ConnectError as exc:
latency = (time.time() - start) * 1000
logger.warning("Ollama connection failed: %s", exc)
return BridgeResult(
content="",
tool_calls_made=tool_calls_made,
rounds=rounds,
latency_ms=latency,
model=self.model,
error=f"Ollama connection failed: {exc}",
)
except httpx.HTTPStatusError as exc:
latency = (time.time() - start) * 1000
logger.warning("Ollama HTTP error: %s", exc)
return BridgeResult(
content="",
tool_calls_made=tool_calls_made,
rounds=rounds,
latency_ms=latency,
model=self.model,
error=f"Ollama HTTP error: {exc.response.status_code}",
)
except Exception as exc:
latency = (time.time() - start) * 1000
logger.error("MCPBridge run failed: %s", exc)
return BridgeResult(
content="",
tool_calls_made=tool_calls_made,
rounds=rounds,
latency_ms=latency,
model=self.model,
error=str(exc),
)
def status(self) -> dict:
"""Return bridge status for the dashboard."""

View File

@@ -462,8 +462,7 @@ def consult_grok(query: str) -> str:
inv = ln.create_invoice(sats, f"Grok query: {query[:_INVOICE_MEMO_MAX_LEN]}")
invoice_info = f"\n[Lightning invoice: {sats} sats — {inv.payment_request[:40]}...]"
except (ImportError, OSError, ValueError) as exc:
logger.error("Lightning invoice creation failed: %s", exc)
return "Error: Failed to create Lightning invoice. Please check logs."
logger.warning("Tool execution failed (Lightning invoice): %s", exc)
result = backend.run(query)
@@ -534,8 +533,7 @@ def _register_web_fetch_tool(toolkit: Toolkit) -> None:
try:
toolkit.register(web_fetch, name="web_fetch")
except Exception as exc:
logger.error("Failed to register web_fetch tool: %s", exc)
raise
logger.warning("Tool execution failed (web_fetch registration): %s", exc)
def _register_core_tools(toolkit: Toolkit, base_path: Path) -> None:
@@ -567,8 +565,8 @@ def _register_grok_tool(toolkit: Toolkit) -> None:
toolkit.register(consult_grok, name="consult_grok")
logger.info("Grok consultation tool registered")
except (ImportError, AttributeError) as exc:
logger.error("Failed to register Grok tool: %s", exc)
raise
logger.warning("Tool execution failed (Grok registration): %s", exc)
logger.debug("Grok tool not available")
def _register_memory_tools(toolkit: Toolkit) -> None:
@@ -581,8 +579,8 @@ def _register_memory_tools(toolkit: Toolkit) -> None:
toolkit.register(memory_read, name="memory_read")
toolkit.register(memory_forget, name="memory_forget")
except (ImportError, AttributeError) as exc:
logger.error("Failed to register Memory tools: %s", exc)
raise
logger.warning("Tool execution failed (Memory tools registration): %s", exc)
logger.debug("Memory tools not available")
def _register_agentic_loop_tool(toolkit: Toolkit) -> None:
@@ -630,8 +628,8 @@ def _register_agentic_loop_tool(toolkit: Toolkit) -> None:
toolkit.register(plan_and_execute, name="plan_and_execute")
except (ImportError, AttributeError) as exc:
logger.error("Failed to register plan_and_execute tool: %s", exc)
raise
logger.warning("Tool execution failed (plan_and_execute registration): %s", exc)
logger.debug("plan_and_execute tool not available")
def _register_introspection_tools(toolkit: Toolkit) -> None:
@@ -649,16 +647,15 @@ def _register_introspection_tools(toolkit: Toolkit) -> None:
toolkit.register(get_memory_status, name="get_memory_status")
toolkit.register(run_self_tests, name="run_self_tests")
except (ImportError, AttributeError) as exc:
logger.error("Failed to register Introspection tools: %s", exc)
raise
logger.warning("Tool execution failed (Introspection tools registration): %s", exc)
logger.debug("Introspection tools not available")
try:
from timmy.mcp_tools import update_gitea_avatar
toolkit.register(update_gitea_avatar, name="update_gitea_avatar")
except (ImportError, AttributeError) as exc:
logger.error("Failed to register update_gitea_avatar tool: %s", exc)
raise
logger.debug("update_gitea_avatar tool not available: %s", exc)
try:
from timmy.session_logger import self_reflect, session_history
@@ -666,8 +663,8 @@ def _register_introspection_tools(toolkit: Toolkit) -> None:
toolkit.register(session_history, name="session_history")
toolkit.register(self_reflect, name="self_reflect")
except (ImportError, AttributeError) as exc:
logger.error("Failed to register session_history tool: %s", exc)
raise
logger.warning("Tool execution failed (session_history registration): %s", exc)
logger.debug("session_history tool not available")
def _register_delegation_tools(toolkit: Toolkit) -> None:
@@ -679,8 +676,8 @@ def _register_delegation_tools(toolkit: Toolkit) -> None:
toolkit.register(delegate_to_kimi, name="delegate_to_kimi")
toolkit.register(list_swarm_agents, name="list_swarm_agents")
except Exception as exc:
logger.error("Failed to register Delegation tools: %s", exc)
raise
logger.warning("Tool execution failed (Delegation tools registration): %s", exc)
logger.debug("Delegation tools not available")
def _register_gematria_tool(toolkit: Toolkit) -> None:
@@ -690,8 +687,8 @@ def _register_gematria_tool(toolkit: Toolkit) -> None:
toolkit.register(gematria, name="gematria")
except (ImportError, AttributeError) as exc:
logger.error("Failed to register Gematria tool: %s", exc)
raise
logger.warning("Tool execution failed (Gematria registration): %s", exc)
logger.debug("Gematria tool not available")
def _register_artifact_tools(toolkit: Toolkit) -> None:
@@ -702,8 +699,8 @@ def _register_artifact_tools(toolkit: Toolkit) -> None:
toolkit.register(jot_note, name="jot_note")
toolkit.register(log_decision, name="log_decision")
except (ImportError, AttributeError) as exc:
logger.error("Failed to register Artifact tools: %s", exc)
raise
logger.warning("Tool execution failed (Artifact tools registration): %s", exc)
logger.debug("Artifact tools not available")
def _register_thinking_tools(toolkit: Toolkit) -> None:
@@ -713,8 +710,8 @@ def _register_thinking_tools(toolkit: Toolkit) -> None:
toolkit.register(search_thoughts, name="thought_search")
except (ImportError, AttributeError) as exc:
logger.error("Failed to register Thinking tools: %s", exc)
raise
logger.warning("Tool execution failed (Thinking tools registration): %s", exc)
logger.debug("Thinking tools not available")
def create_full_toolkit(base_dir: str | Path | None = None):

View File

@@ -968,3 +968,195 @@ class TestCascadeRouterReload:
assert router.providers[0].name == "low-priority"
assert router.providers[1].name == "high-priority"
class TestComplexityRouting:
"""Tests for Qwen3-8B / Qwen3-14B dual-model routing (issue #1065)."""
def _make_dual_model_provider(self) -> Provider:
"""Build an Ollama provider with both Qwen3 models registered."""
return Provider(
name="ollama-local",
type="ollama",
enabled=True,
priority=1,
url="http://localhost:11434",
models=[
{
"name": "qwen3:8b",
"capabilities": ["text", "tools", "json", "streaming", "routine"],
},
{
"name": "qwen3:14b",
"default": True,
"capabilities": ["text", "tools", "json", "streaming", "complex", "reasoning"],
},
],
)
def test_get_model_for_complexity_simple_returns_8b(self):
"""Simple tasks should select the model with 'routine' capability."""
from infrastructure.router.classifier import TaskComplexity
router = CascadeRouter(config_path=Path("/nonexistent"))
router.config.fallback_chains = {
"routine": ["qwen3:8b"],
"complex": ["qwen3:14b"],
}
provider = self._make_dual_model_provider()
model = router._get_model_for_complexity(provider, TaskComplexity.SIMPLE)
assert model == "qwen3:8b"
def test_get_model_for_complexity_complex_returns_14b(self):
"""Complex tasks should select the model with 'complex' capability."""
from infrastructure.router.classifier import TaskComplexity
router = CascadeRouter(config_path=Path("/nonexistent"))
router.config.fallback_chains = {
"routine": ["qwen3:8b"],
"complex": ["qwen3:14b"],
}
provider = self._make_dual_model_provider()
model = router._get_model_for_complexity(provider, TaskComplexity.COMPLEX)
assert model == "qwen3:14b"
def test_get_model_for_complexity_returns_none_when_no_match(self):
"""Returns None when provider has no matching model in chain."""
from infrastructure.router.classifier import TaskComplexity
router = CascadeRouter(config_path=Path("/nonexistent"))
router.config.fallback_chains = {} # empty chains
provider = Provider(
name="test",
type="ollama",
enabled=True,
priority=1,
models=[{"name": "llama3.2:3b", "default": True, "capabilities": ["text"]}],
)
# No 'routine' or 'complex' model available
model = router._get_model_for_complexity(provider, TaskComplexity.SIMPLE)
assert model is None
@pytest.mark.asyncio
async def test_complete_with_simple_hint_routes_to_8b(self):
"""complexity_hint='simple' should use qwen3:8b."""
router = CascadeRouter(config_path=Path("/nonexistent"))
router.config.fallback_chains = {
"routine": ["qwen3:8b"],
"complex": ["qwen3:14b"],
}
router.providers = [self._make_dual_model_provider()]
with patch.object(router, "_call_ollama") as mock_call:
mock_call.return_value = {"content": "fast answer", "model": "qwen3:8b"}
result = await router.complete(
messages=[{"role": "user", "content": "list tasks"}],
complexity_hint="simple",
)
assert result["model"] == "qwen3:8b"
assert result["complexity"] == "simple"
@pytest.mark.asyncio
async def test_complete_with_complex_hint_routes_to_14b(self):
"""complexity_hint='complex' should use qwen3:14b."""
router = CascadeRouter(config_path=Path("/nonexistent"))
router.config.fallback_chains = {
"routine": ["qwen3:8b"],
"complex": ["qwen3:14b"],
}
router.providers = [self._make_dual_model_provider()]
with patch.object(router, "_call_ollama") as mock_call:
mock_call.return_value = {"content": "detailed answer", "model": "qwen3:14b"}
result = await router.complete(
messages=[{"role": "user", "content": "review this PR"}],
complexity_hint="complex",
)
assert result["model"] == "qwen3:14b"
assert result["complexity"] == "complex"
@pytest.mark.asyncio
async def test_explicit_model_bypasses_complexity_routing(self):
"""When model is explicitly provided, complexity routing is skipped."""
router = CascadeRouter(config_path=Path("/nonexistent"))
router.config.fallback_chains = {
"routine": ["qwen3:8b"],
"complex": ["qwen3:14b"],
}
router.providers = [self._make_dual_model_provider()]
with patch.object(router, "_call_ollama") as mock_call:
mock_call.return_value = {"content": "response", "model": "qwen3:14b"}
result = await router.complete(
messages=[{"role": "user", "content": "list tasks"}],
model="qwen3:14b", # explicit override
)
# Explicit model wins — complexity field is None
assert result["model"] == "qwen3:14b"
assert result["complexity"] is None
@pytest.mark.asyncio
async def test_auto_classification_routes_simple_message(self):
"""Short, simple messages should auto-classify as SIMPLE → 8B."""
router = CascadeRouter(config_path=Path("/nonexistent"))
router.config.fallback_chains = {
"routine": ["qwen3:8b"],
"complex": ["qwen3:14b"],
}
router.providers = [self._make_dual_model_provider()]
with patch.object(router, "_call_ollama") as mock_call:
mock_call.return_value = {"content": "ok", "model": "qwen3:8b"}
result = await router.complete(
messages=[{"role": "user", "content": "status"}],
# no complexity_hint — auto-classify
)
assert result["complexity"] == "simple"
assert result["model"] == "qwen3:8b"
@pytest.mark.asyncio
async def test_auto_classification_routes_complex_message(self):
"""Complex messages should auto-classify → 14B."""
router = CascadeRouter(config_path=Path("/nonexistent"))
router.config.fallback_chains = {
"routine": ["qwen3:8b"],
"complex": ["qwen3:14b"],
}
router.providers = [self._make_dual_model_provider()]
with patch.object(router, "_call_ollama") as mock_call:
mock_call.return_value = {"content": "deep analysis", "model": "qwen3:14b"}
result = await router.complete(
messages=[{"role": "user", "content": "analyze and prioritize the backlog"}],
)
assert result["complexity"] == "complex"
assert result["model"] == "qwen3:14b"
@pytest.mark.asyncio
async def test_invalid_complexity_hint_falls_back_to_auto(self):
"""Invalid complexity_hint should log a warning and auto-classify."""
router = CascadeRouter(config_path=Path("/nonexistent"))
router.config.fallback_chains = {
"routine": ["qwen3:8b"],
"complex": ["qwen3:14b"],
}
router.providers = [self._make_dual_model_provider()]
with patch.object(router, "_call_ollama") as mock_call:
mock_call.return_value = {"content": "ok", "model": "qwen3:8b"}
# Should not raise
result = await router.complete(
messages=[{"role": "user", "content": "status"}],
complexity_hint="INVALID_HINT",
)
assert result["complexity"] in ("simple", "complex") # auto-classified

View File

@@ -0,0 +1,134 @@
"""Tests for Qwen3 dual-model task complexity classifier."""
import pytest
from infrastructure.router.classifier import TaskComplexity, classify_task
class TestClassifyTask:
"""Tests for classify_task heuristics."""
# ── Simple / routine tasks ──────────────────────────────────────────────
def test_empty_messages_is_simple(self):
assert classify_task([]) == TaskComplexity.SIMPLE
def test_no_user_content_is_simple(self):
messages = [{"role": "system", "content": "You are Timmy."}]
assert classify_task(messages) == TaskComplexity.SIMPLE
def test_short_status_query_is_simple(self):
messages = [{"role": "user", "content": "status"}]
assert classify_task(messages) == TaskComplexity.SIMPLE
def test_list_command_is_simple(self):
messages = [{"role": "user", "content": "list all tasks"}]
assert classify_task(messages) == TaskComplexity.SIMPLE
def test_get_command_is_simple(self):
messages = [{"role": "user", "content": "get the latest log entry"}]
assert classify_task(messages) == TaskComplexity.SIMPLE
def test_short_message_under_threshold_is_simple(self):
messages = [{"role": "user", "content": "run the build"}]
assert classify_task(messages) == TaskComplexity.SIMPLE
def test_affirmation_is_simple(self):
messages = [{"role": "user", "content": "yes"}]
assert classify_task(messages) == TaskComplexity.SIMPLE
# ── Complex / quality-sensitive tasks ──────────────────────────────────
def test_plan_keyword_is_complex(self):
messages = [{"role": "user", "content": "plan the sprint"}]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_review_keyword_is_complex(self):
messages = [{"role": "user", "content": "review this code"}]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_analyze_keyword_is_complex(self):
messages = [{"role": "user", "content": "analyze performance"}]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_triage_keyword_is_complex(self):
messages = [{"role": "user", "content": "triage the open issues"}]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_refactor_keyword_is_complex(self):
messages = [{"role": "user", "content": "refactor the auth module"}]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_explain_keyword_is_complex(self):
messages = [{"role": "user", "content": "explain how the router works"}]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_prioritize_keyword_is_complex(self):
messages = [{"role": "user", "content": "prioritize the backlog"}]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_long_message_is_complex(self):
long_msg = "do something " * 50 # > 500 chars
messages = [{"role": "user", "content": long_msg}]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_numbered_list_is_complex(self):
messages = [
{
"role": "user",
"content": "1. Read the file 2. Analyze it 3. Write a report",
}
]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_code_block_is_complex(self):
messages = [
{"role": "user", "content": "Here is the code:\n```python\nprint('hello')\n```"}
]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_deep_conversation_is_complex(self):
messages = [
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
{"role": "user", "content": "ok"},
{"role": "assistant", "content": "yes"},
{"role": "user", "content": "ok"},
{"role": "assistant", "content": "yes"},
{"role": "user", "content": "now do the thing"},
]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_analyse_british_spelling_is_complex(self):
messages = [{"role": "user", "content": "analyse this dataset"}]
assert classify_task(messages) == TaskComplexity.COMPLEX
def test_non_string_content_is_ignored(self):
"""Non-string content should not crash the classifier."""
messages = [{"role": "user", "content": ["part1", "part2"]}]
# Should not raise; result doesn't matter — just must not blow up
result = classify_task(messages)
assert isinstance(result, TaskComplexity)
def test_system_message_not_counted_as_user(self):
"""System message alone should not trigger complex keywords."""
messages = [
{"role": "system", "content": "analyze everything carefully"},
{"role": "user", "content": "yes"},
]
# "analyze" is in system message (not user) — user says "yes" → simple
assert classify_task(messages) == TaskComplexity.SIMPLE
class TestTaskComplexityEnum:
"""Tests for TaskComplexity enum values."""
def test_simple_value(self):
assert TaskComplexity.SIMPLE.value == "simple"
def test_complex_value(self):
assert TaskComplexity.COMPLEX.value == "complex"
def test_lookup_by_value(self):
assert TaskComplexity("simple") == TaskComplexity.SIMPLE
assert TaskComplexity("complex") == TaskComplexity.COMPLEX

View File

@@ -1,621 +0,0 @@
"""Unit tests for timmy.backlog_triage — autonomous backlog triage loop."""
from datetime import UTC, datetime
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from timmy.backlog_triage import (
AGENT_CLAUDE,
AGENT_KIMI,
KIMI_READY_LABEL,
OWNER_LOGIN,
READY_THRESHOLD,
BacklogTriageLoop,
ScoredIssue,
TriageCycleResult,
TriageDecision,
_build_audit_comment,
_build_daily_summary,
_extract_tags,
_score_acceptance,
_score_alignment,
_score_scope,
decide,
score_issue,
)
# ── Fixtures ─────────────────────────────────────────────────────────────────
def _make_raw_issue(
number: int = 1,
title: str = "Fix the login bug",
body: str = "## Problem\nLogin fails on empty password.\n\n## Steps\nassert response == 200",
labels: list | None = None,
assignees: list | None = None,
created_at: str = "2026-03-20T10:00:00Z",
) -> dict:
return {
"number": number,
"title": title,
"body": body,
"labels": [{"name": lbl} for lbl in (labels or [])],
"assignees": [{"login": a} for a in (assignees or [])],
"created_at": created_at,
}
def _make_scored_issue(
number: int = 1,
title: str = "Fix login bug",
issue_type: str = "bug",
score: int = 7,
ready: bool = True,
is_p0: bool = True,
is_blocked: bool = False,
assignees: list | None = None,
tags: set | None = None,
labels: list | None = None,
age_days: int = 3,
) -> ScoredIssue:
return ScoredIssue(
number=number,
title=title,
body="",
labels=labels or [],
tags=tags or {"bug"},
assignees=assignees or [],
created_at=datetime.now(UTC),
issue_type=issue_type,
score=score,
scope=2,
acceptance=2,
alignment=3,
ready=ready,
age_days=age_days,
is_p0=is_p0,
is_blocked=is_blocked,
)
# ── _extract_tags ─────────────────────────────────────────────────────────────
class TestExtractTags:
def test_bracket_tags_in_title(self):
tags = _extract_tags("[Bug] Login fails", [])
assert "bug" in tags
def test_multiple_brackets(self):
tags = _extract_tags("[Bug][P0] Crash on startup", [])
assert "bug" in tags
assert "p0" in tags
def test_label_names(self):
tags = _extract_tags("Fix thing", ["security", "hotfix"])
assert "security" in tags
assert "hotfix" in tags
def test_labels_lowercased(self):
tags = _extract_tags("Title", ["Bug", "FEATURE"])
assert "bug" in tags
assert "feature" in tags
def test_empty_inputs(self):
tags = _extract_tags("", [])
assert tags == set()
# ── Scoring functions ─────────────────────────────────────────────────────────
class TestScoreScope:
def test_file_reference_adds_point(self):
score = _score_scope("Fix auth", "Edit src/timmy/auth.py", set())
assert score >= 1
def test_function_reference_adds_point(self):
score = _score_scope("Fix auth", "def validate_token()", set())
assert score >= 1
def test_short_title_adds_point(self):
score = _score_scope("Short title", "", set())
assert score >= 1
def test_meta_tag_penalizes(self):
score = _score_scope("Discussion about philosophy", "long body " * 5, {"philosophy"})
assert score <= 1
def test_max_score_3(self):
score = _score_scope("Fix auth", "src/auth.py\ndef login()", set())
assert score <= 3
class TestScoreAcceptance:
def test_acceptance_keywords(self):
body = "should return 200\nmust pass tests\nexpect response"
score = _score_acceptance("Title", body, set())
assert score >= 2
def test_test_reference_adds_point(self):
score = _score_acceptance("Title", "Run tox -e unit", set())
assert score >= 1
def test_structured_sections(self):
body = "## Problem\nX\n## Solution\nY"
score = _score_acceptance("Title", body, set())
assert score >= 1
def test_meta_tag_penalizes(self):
score = _score_acceptance("Title", "should do something", {"philosophy"})
# still counts but penalized
assert score <= 2
def test_empty_body(self):
score = _score_acceptance("Title", "", set())
assert score == 0
class TestScoreAlignment:
def test_bug_tags_score_max(self):
assert _score_alignment("", "", {"bug"}) == 3
def test_hotfix_tag_max(self):
assert _score_alignment("", "", {"hotfix"}) == 3
def test_refactor_tag(self):
score = _score_alignment("", "", {"refactor"})
assert score >= 2
def test_feature_tag(self):
score = _score_alignment("", "", {"feature"})
assert score >= 2
def test_meta_tags_zero(self):
assert _score_alignment("", "", {"philosophy"}) == 0
def test_loop_generated_bonus(self):
score = _score_alignment("", "", {"loop-generated"})
assert score >= 1
# ── score_issue ───────────────────────────────────────────────────────────────
class TestScoreIssue:
def test_bug_issue_classified_correctly(self):
raw = _make_raw_issue(labels=["bug"], title="[Bug] Crash on startup")
scored = score_issue(raw)
assert scored.issue_type == "bug"
assert scored.is_p0 is True
def test_feature_issue_classified(self):
raw = _make_raw_issue(labels=["feature"], title="Add voice support")
scored = score_issue(raw)
assert scored.issue_type == "feature"
def test_philosophy_issue_classified(self):
raw = _make_raw_issue(labels=["philosophy"], title="[Philosophy] Should Timmy sleep?")
scored = score_issue(raw)
assert scored.issue_type == "philosophy"
def test_research_issue_classified(self):
raw = _make_raw_issue(labels=["research"], title="Investigate model options")
scored = score_issue(raw)
assert scored.issue_type == "research"
def test_ready_flag_set_when_score_high(self):
body = (
"## Problem\nX breaks.\n## Solution\nFix src/timmy/agent.py def run()\n"
"should return True\nmust pass tox -e unit"
)
raw = _make_raw_issue(labels=["bug"], body=body)
scored = score_issue(raw)
assert scored.score >= READY_THRESHOLD
assert scored.ready is True
def test_is_blocked_detected_in_body(self):
raw = _make_raw_issue(body="This is blocked by issue #50")
scored = score_issue(raw)
assert scored.is_blocked is True
def test_is_blocked_detected_in_title(self):
raw = _make_raw_issue(title="[blocking] Cannot proceed")
scored = score_issue(raw)
# "blocking" in brackets becomes a tag
assert scored.is_blocked is True
def test_unassigned_when_no_assignees(self):
raw = _make_raw_issue(assignees=[])
scored = score_issue(raw)
assert scored.is_unassigned is True
def test_assigned_when_has_assignee(self):
raw = _make_raw_issue(assignees=["claude"])
scored = score_issue(raw)
assert scored.is_unassigned is False
def test_age_days_computed(self):
old_ts = "2026-01-01T00:00:00Z"
raw = _make_raw_issue(created_at=old_ts)
scored = score_issue(raw)
assert scored.age_days > 0
def test_needs_kimi_for_research_label(self):
raw = _make_raw_issue(labels=["kimi-ready"])
scored = score_issue(raw)
assert scored.needs_kimi is True
# ── decide ────────────────────────────────────────────────────────────────────
class TestDecide:
def test_philosophy_skipped(self):
issue = _make_scored_issue(issue_type="philosophy", tags={"philosophy"})
d = decide(issue)
assert d.action == "skip"
assert "philosophy" in d.reason.lower()
def test_assigned_issue_skipped(self):
issue = _make_scored_issue(assignees=["perplexity"])
d = decide(issue)
assert d.action == "skip"
assert "assigned" in d.reason.lower()
def test_low_score_skipped(self):
issue = _make_scored_issue(score=2, ready=False)
d = decide(issue)
assert d.action == "skip"
assert "threshold" in d.reason.lower()
def test_blocked_issue_flagged_for_alex(self):
issue = _make_scored_issue(is_blocked=True)
d = decide(issue)
assert d.action == "flag_alex"
assert d.agent == OWNER_LOGIN
def test_research_issue_assigned_kimi(self):
issue = _make_scored_issue(
issue_type="research",
tags={"research"},
is_p0=False,
is_blocked=False,
)
d = decide(issue)
assert d.action == "assign_kimi"
assert d.agent == AGENT_KIMI
def test_kimi_ready_label_assigns_kimi(self):
issue = _make_scored_issue(
issue_type="unknown",
tags={"kimi-ready"},
labels=["kimi-ready"],
is_p0=False,
is_blocked=False,
)
d = decide(issue)
assert d.action == "assign_kimi"
def test_p0_bug_assigns_claude(self):
issue = _make_scored_issue(issue_type="bug", is_p0=True, is_blocked=False)
d = decide(issue)
assert d.action == "assign_claude"
assert d.agent == AGENT_CLAUDE
def test_ready_feature_assigns_claude(self):
issue = _make_scored_issue(
issue_type="feature",
is_p0=False,
is_blocked=False,
tags={"feature"},
)
d = decide(issue)
assert d.action == "assign_claude"
assert d.agent == AGENT_CLAUDE
def test_decision_has_reason(self):
issue = _make_scored_issue()
d = decide(issue)
assert len(d.reason) > 10
# ── _build_audit_comment ──────────────────────────────────────────────────────
class TestBuildAuditComment:
def test_contains_timmy_triage_header(self):
d = TriageDecision(42, "assign_claude", "High priority bug", agent=AGENT_CLAUDE)
comment = _build_audit_comment(d)
assert "Timmy Triage" in comment
def test_contains_issue_reason(self):
d = TriageDecision(42, "assign_claude", "Urgent P0 bug", agent=AGENT_CLAUDE)
comment = _build_audit_comment(d)
assert "Urgent P0 bug" in comment
def test_assign_claude_mentions_agent(self):
d = TriageDecision(42, "assign_claude", "reason", agent=AGENT_CLAUDE)
comment = _build_audit_comment(d)
assert AGENT_CLAUDE in comment
def test_assign_kimi_mentions_label(self):
d = TriageDecision(42, "assign_kimi", "reason", agent=AGENT_KIMI)
comment = _build_audit_comment(d)
assert KIMI_READY_LABEL in comment
def test_flag_alex_mentions_owner(self):
d = TriageDecision(42, "flag_alex", "blocked", agent=OWNER_LOGIN)
comment = _build_audit_comment(d)
assert OWNER_LOGIN in comment
def test_contains_override_note(self):
d = TriageDecision(42, "assign_claude", "reason", agent=AGENT_CLAUDE)
comment = _build_audit_comment(d)
assert "override" in comment.lower()
# ── _build_daily_summary ──────────────────────────────────────────────────────
class TestBuildDailySummary:
def _make_result(self, decisions=None) -> TriageCycleResult:
return TriageCycleResult(
timestamp=datetime.now(UTC).isoformat(),
total_open=10,
scored=8,
ready=5,
decisions=decisions or [],
)
def test_contains_open_count(self):
result = self._make_result()
scored = [_make_scored_issue(number=i, ready=True, score=6) for i in range(1, 4)]
summary = _build_daily_summary(result, scored)
assert "10" in summary # total_open
def test_contains_ready_count(self):
result = self._make_result()
summary = _build_daily_summary(result, [])
assert "5" in summary
def test_actions_taken_section(self):
decisions = [
TriageDecision(1, "assign_claude", "P0 bug", agent="claude", executed=True),
]
result = self._make_result(decisions=decisions)
summary = _build_daily_summary(result, [])
assert "Actions Taken" in summary
assert "#1" in summary
def test_top_issues_listed(self):
scored = [_make_scored_issue(number=99, ready=True, score=8)]
result = self._make_result()
summary = _build_daily_summary(result, scored)
assert "#99" in summary
def test_footer_present(self):
summary = _build_daily_summary(self._make_result(), [])
assert "Auto-generated" in summary
# ── BacklogTriageLoop ─────────────────────────────────────────────────────────
class TestBacklogTriageLoop:
def test_default_interval_from_settings(self):
loop = BacklogTriageLoop()
from config import settings
assert loop._interval == float(settings.backlog_triage_interval_seconds)
def test_custom_interval(self):
loop = BacklogTriageLoop(interval=300)
assert loop._interval == 300.0
def test_dry_run_default(self):
loop = BacklogTriageLoop(dry_run=True)
assert loop._dry_run is True
def test_not_running_initially(self):
loop = BacklogTriageLoop()
assert loop.is_running is False
def test_stop_sets_running_false(self):
loop = BacklogTriageLoop()
loop._running = True
loop.stop()
assert loop._running is False
def test_cycle_count_starts_zero(self):
loop = BacklogTriageLoop()
assert loop.cycle_count == 0
@pytest.mark.asyncio
async def test_run_once_skips_when_no_gitea_token(self):
loop = BacklogTriageLoop()
mock_settings = MagicMock()
mock_settings.gitea_enabled = True
mock_settings.gitea_token = ""
mock_settings.backlog_triage_interval_seconds = 900
mock_settings.backlog_triage_dry_run = False
mock_settings.backlog_triage_daily_summary = False
with patch("timmy.backlog_triage.settings", mock_settings):
result = await loop.run_once()
assert result.total_open == 0
@pytest.mark.asyncio
async def test_run_once_dry_run_no_api_writes(self):
"""In dry_run mode, decisions are made but no Gitea API writes happen."""
loop = BacklogTriageLoop(dry_run=True, daily_summary=False)
raw_issues = [
_make_raw_issue(
number=10,
title="Fix crash",
labels=["bug"],
body=(
"## Problem\nCrash on login.\n## Solution\nFix src/auth.py "
"def login()\nshould return 200\nmust pass tox tests"
),
)
]
mock_settings = MagicMock()
mock_settings.gitea_enabled = True
mock_settings.gitea_token = "fake-token"
mock_settings.gitea_repo = "owner/repo"
mock_settings.gitea_url = "http://gitea.local"
mock_settings.backlog_triage_interval_seconds = 900
mock_settings.backlog_triage_dry_run = True
mock_settings.backlog_triage_daily_summary = False
mock_client = AsyncMock()
mock_client.get.return_value = MagicMock(
status_code=200, json=MagicMock(return_value=raw_issues)
)
mock_ctx = AsyncMock()
mock_ctx.__aenter__.return_value = mock_client
mock_ctx.__aexit__.return_value = False
with (
patch("timmy.backlog_triage.settings", mock_settings),
patch("httpx.AsyncClient", return_value=mock_ctx),
):
result = await loop.run_once()
# No POST/PATCH calls in dry run
mock_client.post.assert_not_called()
mock_client.patch.assert_not_called()
assert result.total_open == 1
assert loop.cycle_count == 1
assert len(loop.history) == 1
@pytest.mark.asyncio
async def test_run_once_assigns_unassigned_bug(self):
"""Unassigned ready bug should be assigned to Claude with audit comment."""
loop = BacklogTriageLoop(dry_run=False, daily_summary=False)
body = (
"## Problem\nCrash on login.\n## Solution\nFix src/auth.py "
"def login()\nshould return 200\nmust pass tox tests"
)
raw_issues = [_make_raw_issue(number=5, title="Fix crash", labels=["bug"], body=body)]
mock_settings = MagicMock()
mock_settings.gitea_enabled = True
mock_settings.gitea_token = "fake-token"
mock_settings.gitea_repo = "owner/repo"
mock_settings.gitea_url = "http://gitea.local"
mock_settings.backlog_triage_interval_seconds = 900
mock_settings.backlog_triage_dry_run = False
mock_settings.backlog_triage_daily_summary = False
# GET /issues returns our issue
get_issues_resp = MagicMock(status_code=200)
get_issues_resp.json.return_value = raw_issues
# POST /comments returns success
comment_resp = MagicMock(status_code=201)
comment_resp.json.return_value = {"id": 1}
# PATCH /issues/{n} (assign) returns success
assign_resp = MagicMock(status_code=200)
assign_resp.json.return_value = {"number": 5}
mock_client = AsyncMock()
mock_client.get.return_value = get_issues_resp
mock_client.post.return_value = comment_resp
mock_client.patch.return_value = assign_resp
mock_ctx = AsyncMock()
mock_ctx.__aenter__.return_value = mock_client
mock_ctx.__aexit__.return_value = False
with (
patch("timmy.backlog_triage.settings", mock_settings),
patch("httpx.AsyncClient", return_value=mock_ctx),
patch("asyncio.sleep", new_callable=AsyncMock),
):
result = await loop.run_once()
assert result.total_open == 1
# Comment should have been posted
mock_client.post.assert_called()
# Assign should have been called (PATCH)
mock_client.patch.assert_called()
@pytest.mark.asyncio
async def test_run_once_skips_already_assigned(self):
"""Issues already assigned should not be acted upon."""
loop = BacklogTriageLoop(dry_run=False, daily_summary=False)
raw_issues = [
_make_raw_issue(
number=3,
labels=["bug"],
assignees=["perplexity"],
body="## Problem\nX\nmust pass tox\nshould return 200 at least 3 times",
)
]
mock_settings = MagicMock()
mock_settings.gitea_enabled = True
mock_settings.gitea_token = "tok"
mock_settings.gitea_repo = "owner/repo"
mock_settings.gitea_url = "http://gitea.local"
mock_settings.backlog_triage_interval_seconds = 900
mock_settings.backlog_triage_dry_run = False
mock_settings.backlog_triage_daily_summary = False
get_resp = MagicMock(status_code=200)
get_resp.json.return_value = raw_issues
mock_client = AsyncMock()
mock_client.get.return_value = get_resp
mock_ctx = AsyncMock()
mock_ctx.__aenter__.return_value = mock_client
mock_ctx.__aexit__.return_value = False
with (
patch("timmy.backlog_triage.settings", mock_settings),
patch("httpx.AsyncClient", return_value=mock_ctx),
):
result = await loop.run_once()
# No writes for already-assigned issue
mock_client.post.assert_not_called()
mock_client.patch.assert_not_called()
assert result.decisions[0].action == "skip"
# ── ScoredIssue properties ────────────────────────────────────────────────────
class TestScoredIssueProperties:
def test_is_unassigned_true_when_no_assignees(self):
issue = _make_scored_issue(assignees=[])
assert issue.is_unassigned is True
def test_is_unassigned_false_when_assigned(self):
issue = _make_scored_issue(assignees=["claude"])
assert issue.is_unassigned is False
def test_needs_kimi_for_research_tag(self):
issue = _make_scored_issue(tags={"research"})
assert issue.needs_kimi is True
def test_needs_kimi_for_kimi_ready_label(self):
issue = _make_scored_issue(labels=["kimi-ready"], tags=set())
assert issue.needs_kimi is True
def test_needs_kimi_false_for_bug(self):
issue = _make_scored_issue(tags={"bug"}, labels=[])
assert issue.needs_kimi is False