[claude] Add vllm-mlx as high-performance local inference backend (#1069) (#1089)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled

Co-authored-by: Claude (Opus 4.6) <claude@hermes.local>
Co-committed-by: Claude (Opus 4.6) <claude@hermes.local>
This commit was merged in pull request #1089.
This commit is contained in:
2026-03-23 15:34:13 +00:00
committed by Timmy Time
parent 7fdd532260
commit f2a277f7b5
12 changed files with 350 additions and 77 deletions

View File

@@ -67,6 +67,29 @@ providers:
capabilities: [text, creative, streaming]
description: "Dolphin 3.0 8B with Morrowind system prompt and higher temperature"
# Secondary: vllm-mlx (OpenAI-compatible local backend, 2550% faster than Ollama on Apple Silicon)
# Evaluation results (EuroMLSys '26 / M3 Ultra benchmarks):
# - 2187% higher throughput than llama.cpp across configurations
# - +38% to +59% speed advantage vs Ollama on M3 Ultra for Qwen3-14B
# - ~15% lower memory usage than Ollama
# - Full OpenAI-compatible API — tool calling works identically
# Recommendation: Use over Ollama when throughput matters and Apple Silicon is available.
# Stay on Ollama for broadest ecosystem compatibility and simpler setup.
# To enable: start vllm-mlx server (`python -m vllm.entrypoints.openai.api_server
# --model Qwen/Qwen2.5-14B-Instruct-MLX --port 8000`) then set enabled: true.
- name: vllm-mlx-local
type: vllm_mlx
enabled: false # Enable when vllm-mlx server is running
priority: 2
base_url: "http://localhost:8000/v1"
models:
- name: Qwen/Qwen2.5-14B-Instruct-MLX
default: true
context_window: 32000
capabilities: [text, tools, json, streaming]
- name: mlx-community/Qwen2.5-7B-Instruct-4bit
context_window: 32000
capabilities: [text, tools, json, streaming]
# Tertiary: OpenAI (if API key available)
- name: openai-backup

View File

@@ -25,18 +25,17 @@ import logging
import subprocess
import urllib.request
from dataclasses import dataclass
from datetime import datetime, timezone
from enum import Enum
from typing import Optional
from datetime import UTC, datetime
from enum import StrEnum
logger = logging.getLogger(__name__)
class MetabolicTier(str, Enum):
class MetabolicTier(StrEnum):
"""The three-tier metabolic protocol from the Timmy Time architecture."""
BURST = "burst" # Cloud API (Claude/Groq) — expensive, best quality
ACTIVE = "active" # Local 14B (Qwen3-14B) — free, good quality
BURST = "burst" # Cloud API (Claude/Groq) — expensive, best quality
ACTIVE = "active" # Local 14B (Qwen3-14B) — free, good quality
RESTING = "resting" # Local 8B (Qwen3-8B) — free, fast, adequate
@@ -44,10 +43,10 @@ class MetabolicTier(str, Enum):
class QuotaStatus:
"""Current Claude quota state."""
five_hour_utilization: float # 0.0 to 1.0
five_hour_resets_at: Optional[str]
seven_day_utilization: float # 0.0 to 1.0
seven_day_resets_at: Optional[str]
five_hour_utilization: float # 0.0 to 1.0
five_hour_resets_at: str | None
seven_day_utilization: float # 0.0 to 1.0
seven_day_resets_at: str | None
raw_response: dict
fetched_at: datetime
@@ -101,11 +100,11 @@ class QuotaMonitor:
USER_AGENT = "claude-code/2.0.32"
def __init__(self) -> None:
self._token: Optional[str] = None
self._last_status: Optional[QuotaStatus] = None
self._token: str | None = None
self._last_status: QuotaStatus | None = None
self._cache_seconds = 30 # Don't hammer the API
def _get_token(self) -> Optional[str]:
def _get_token(self) -> str | None:
"""Extract OAuth token from macOS Keychain."""
if self._token:
return self._token
@@ -126,11 +125,16 @@ class QuotaMonitor:
self._token = oauth.get("accessToken")
return self._token
except (json.JSONDecodeError, KeyError, FileNotFoundError, subprocess.TimeoutExpired) as exc:
except (
json.JSONDecodeError,
KeyError,
FileNotFoundError,
subprocess.TimeoutExpired,
) as exc:
logger.warning("Could not read Claude Code credentials: %s", exc)
return None
def check(self, force: bool = False) -> Optional[QuotaStatus]:
def check(self, force: bool = False) -> QuotaStatus | None:
"""
Fetch current quota status.
@@ -139,7 +143,7 @@ class QuotaMonitor:
"""
# Return cached if fresh
if not force and self._last_status:
age = (datetime.now(timezone.utc) - self._last_status.fetched_at).total_seconds()
age = (datetime.now(UTC) - self._last_status.fetched_at).total_seconds()
if age < self._cache_seconds:
return self._last_status
@@ -170,7 +174,7 @@ class QuotaMonitor:
seven_day_utilization=float(seven_day.get("utilization", 0.0)),
seven_day_resets_at=seven_day.get("resets_at"),
raw_response=data,
fetched_at=datetime.now(timezone.utc),
fetched_at=datetime.now(UTC),
)
return self._last_status
@@ -195,13 +199,13 @@ class QuotaMonitor:
tier = status.recommended_tier
if tier == MetabolicTier.BURST and task_complexity == "high":
return "claude-sonnet-4-6" # Cloud — best quality
return "claude-sonnet-4-6" # Cloud — best quality
elif tier == MetabolicTier.BURST and task_complexity == "medium":
return "qwen3:14b" # Save cloud for truly hard tasks
return "qwen3:14b" # Save cloud for truly hard tasks
elif tier == MetabolicTier.ACTIVE:
return "qwen3:14b" # Local 14B — good enough
return "qwen3:14b" # Local 14B — good enough
else: # RESTING
return "qwen3:8b" # Local 8B — conserve everything
return "qwen3:8b" # Local 8B — conserve everything
def should_use_cloud(self, task_value: str = "normal") -> bool:
"""
@@ -224,14 +228,14 @@ class QuotaMonitor:
return False # Never waste cloud on routine
def _time_remaining(reset_at: Optional[str]) -> str:
def _time_remaining(reset_at: str | None) -> str:
"""Format time until reset as human-readable string."""
if not reset_at or reset_at == "null":
return "unknown"
try:
reset = datetime.fromisoformat(reset_at.replace("Z", "+00:00"))
now = datetime.now(timezone.utc)
now = datetime.now(UTC)
diff = reset - now
if diff.total_seconds() <= 0:
@@ -249,7 +253,7 @@ def _time_remaining(reset_at: Optional[str]) -> str:
# Module-level singleton
_quota_monitor: Optional[QuotaMonitor] = None
_quota_monitor: QuotaMonitor | None = None
def get_quota_monitor() -> QuotaMonitor:

View File

@@ -310,6 +310,22 @@ class CascadeRouter:
logger.debug("Ollama provider check error: %s", exc)
return False
elif provider.type == "vllm_mlx":
# Check if local vllm-mlx server is running (OpenAI-compatible)
if requests is None:
return True
try:
base_url = provider.base_url or provider.url or "http://localhost:8000"
# Strip /v1 suffix — health endpoint is at the root
server_root = base_url.rstrip("/")
if server_root.endswith("/v1"):
server_root = server_root[:-3]
response = requests.get(f"{server_root}/health", timeout=5)
return response.status_code == 200
except Exception as exc:
logger.debug("vllm-mlx provider check error: %s", exc)
return False
elif provider.type in ("openai", "anthropic", "grok"):
# Check if API key is set
return provider.api_key is not None and provider.api_key != ""
@@ -619,6 +635,14 @@ class CascadeRouter:
temperature=temperature,
max_tokens=max_tokens,
)
elif provider.type == "vllm_mlx":
result = await self._call_vllm_mlx(
provider=provider,
messages=messages,
model=model or provider.get_default_model(),
temperature=temperature,
max_tokens=max_tokens,
)
else:
raise ValueError(f"Unknown provider type: {provider.type}")
@@ -815,6 +839,48 @@ class CascadeRouter:
"model": response.model,
}
async def _call_vllm_mlx(
self,
provider: Provider,
messages: list[dict],
model: str,
temperature: float,
max_tokens: int | None,
) -> dict:
"""Call vllm-mlx via its OpenAI-compatible API.
vllm-mlx exposes the same /v1/chat/completions endpoint as OpenAI,
so we reuse the OpenAI client pointed at the local server.
No API key is required for local deployments.
"""
import openai
base_url = provider.base_url or provider.url or "http://localhost:8000"
# Ensure the base_url ends with /v1 as expected by the OpenAI client
if not base_url.rstrip("/").endswith("/v1"):
base_url = base_url.rstrip("/") + "/v1"
client = openai.AsyncOpenAI(
api_key=provider.api_key or "no-key-required",
base_url=base_url,
timeout=self.config.timeout_seconds,
)
kwargs: dict = {
"model": model,
"messages": messages,
"temperature": temperature,
}
if max_tokens:
kwargs["max_tokens"] = max_tokens
response = await client.chat.completions.create(**kwargs)
return {
"content": response.choices[0].message.content,
"model": response.model,
}
def _record_success(self, provider: Provider, latency_ms: float) -> None:
"""Record a successful request."""
provider.metrics.total_requests += 1

View File

@@ -299,9 +299,7 @@ async def poll_kimi_issue(
"error": None,
}
else:
logger.warning(
"Poll issue #%s returned %s", issue_number, resp.status_code
)
logger.warning("Poll issue #%s returned %s", issue_number, resp.status_code)
except Exception as exc:
logger.warning("Poll error for issue #%s: %s", issue_number, exc)
@@ -332,7 +330,7 @@ def _extract_action_items(text: str) -> list[str]:
items: list[str] = []
patterns = [
re.compile(r"^[-*]\s+\[ \]\s+(.+)", re.MULTILINE), # - [ ] checkbox
re.compile(r"^\d+\.\s+(.+)", re.MULTILINE), # 1. numbered list
re.compile(r"^\d+\.\s+(.+)", re.MULTILINE), # 1. numbered list
re.compile(r"^(?:Action|TODO|Next step):\s*(.+)", re.MULTILINE | re.IGNORECASE),
]
seen: set[str] = set()

View File

@@ -54,9 +54,7 @@ class ActionItem:
parts.append(f"- {url}")
if source_issue:
parts.append(
f"\n### Origin\nExtracted from research in #{source_issue}"
)
parts.append(f"\n### Origin\nExtracted from research in #{source_issue}")
parts.append("\n---\n*Auto-triaged from research findings by Timmy*")
return "\n".join(parts)
@@ -123,7 +121,7 @@ def _validate_action_item(raw_item: dict[str, Any]) -> ActionItem | None:
labels = raw_item.get("labels", [])
if isinstance(labels, str):
labels = [l.strip() for l in labels.split(",") if l.strip()]
labels = [lbl.strip() for lbl in labels.split(",") if lbl.strip()]
if not isinstance(labels, list):
labels = []
@@ -303,7 +301,7 @@ async def _resolve_label_ids(
if resp.status_code != 200:
return []
existing = {l["name"]: l["id"] for l in resp.json()}
existing = {lbl["name"]: lbl["id"] for lbl in resp.json()}
label_ids = []
for name in label_names:

View File

@@ -14,7 +14,9 @@ app = typer.Typer(help="Timmy Serve — sovereign AI agent API")
def start(
port: int = typer.Option(8402, "--port", "-p", help="Port for the serve API"),
host: str = typer.Option("0.0.0.0", "--host", "-h", help="Host to bind to"),
price: int = typer.Option(None, "--price", help="Price per request in sats (default: from config)"),
price: int = typer.Option(
None, "--price", help="Price per request in sats (default: from config)"
),
dry_run: bool = typer.Option(False, "--dry-run", help="Print config and exit (for testing)"),
):
"""Start Timmy in serve mode."""

View File

@@ -24,7 +24,6 @@ from dashboard.routes.health import (
_generate_recommendations,
)
# ---------------------------------------------------------------------------
# Pydantic models
# ---------------------------------------------------------------------------
@@ -118,7 +117,9 @@ class TestGenerateRecommendations:
def test_unavailable_service(self):
deps = [
DependencyStatus(name="Ollama AI", status="unavailable", sovereignty_score=10, details={})
DependencyStatus(
name="Ollama AI", status="unavailable", sovereignty_score=10, details={}
)
]
recs = _generate_recommendations(deps)
assert any("Ollama AI is unavailable" in r for r in recs)
@@ -137,9 +138,7 @@ class TestGenerateRecommendations:
def test_degraded_non_lightning(self):
"""Degraded non-Lightning dep produces no specific recommendation."""
deps = [
DependencyStatus(name="Redis", status="degraded", sovereignty_score=5, details={})
]
deps = [DependencyStatus(name="Redis", status="degraded", sovereignty_score=5, details={})]
recs = _generate_recommendations(deps)
assert recs == ["System operating optimally - all dependencies healthy"]
@@ -379,7 +378,9 @@ class TestHealthEndpoint:
assert response.status_code == 200
def test_ok_when_ollama_up(self, client):
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
with patch(
"dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True
):
data = client.get("/health").json()
assert data["status"] == "ok"
@@ -415,7 +416,9 @@ class TestHealthStatusPanel:
assert "text/html" in response.headers["content-type"]
def test_shows_up_when_ollama_healthy(self, client):
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
with patch(
"dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True
):
text = client.get("/health/status").text
assert "UP" in text

View File

@@ -1,9 +1,7 @@
"""Tests for Claude Quota Monitor and Metabolic Protocol."""
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch
import pytest
from datetime import UTC, datetime, timedelta
from unittest.mock import patch
from infrastructure.claude_quota import (
MetabolicTier,
@@ -22,7 +20,7 @@ def _make_status(five_hour: float = 0.0, seven_day: float = 0.0) -> QuotaStatus:
seven_day_utilization=seven_day,
seven_day_resets_at=None,
raw_response={},
fetched_at=datetime.now(timezone.utc),
fetched_at=datetime.now(UTC),
)
@@ -104,25 +102,25 @@ class TestTimeRemaining:
assert _time_remaining("") == "unknown"
def test_past_time_returns_resetting_now(self):
past = (datetime.now(timezone.utc) - timedelta(hours=1)).isoformat()
past = (datetime.now(UTC) - timedelta(hours=1)).isoformat()
assert _time_remaining(past) == "resetting now"
def test_future_time_hours_and_minutes(self):
future = (datetime.now(timezone.utc) + timedelta(hours=2, minutes=15)).isoformat()
future = (datetime.now(UTC) + timedelta(hours=2, minutes=15)).isoformat()
result = _time_remaining(future)
assert "2h" in result
# Minutes may vary ±1 due to test execution time
assert "m" in result
def test_future_time_minutes_only(self):
future = (datetime.now(timezone.utc) + timedelta(minutes=45)).isoformat()
future = (datetime.now(UTC) + timedelta(minutes=45)).isoformat()
result = _time_remaining(future)
assert "h" not in result
# Minutes may vary ±1 due to test execution time
assert "m" in result
def test_z_suffix_handled(self):
future = (datetime.now(timezone.utc) + timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%SZ")
future = (datetime.now(UTC) + timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%SZ")
result = _time_remaining(future)
assert result != "unknown"
@@ -238,7 +236,7 @@ class TestQuotaMonitorCaching:
def test_stale_cache_triggers_fetch(self):
monitor = QuotaMonitor()
old_time = datetime.now(timezone.utc) - timedelta(seconds=60)
old_time = datetime.now(UTC) - timedelta(seconds=60)
stale_status = QuotaStatus(
five_hour_utilization=0.10,
five_hour_resets_at=None,

View File

@@ -489,6 +489,197 @@ class TestProviderAvailabilityCheck:
assert router._check_provider_available(provider) is False
def test_check_vllm_mlx_without_requests(self):
"""Test vllm-mlx returns True when requests not available (fallback)."""
router = CascadeRouter(config_path=Path("/nonexistent"))
provider = Provider(
name="vllm-mlx-local",
type="vllm_mlx",
enabled=True,
priority=2,
base_url="http://localhost:8000/v1",
)
import infrastructure.router.cascade as cascade_module
old_requests = cascade_module.requests
cascade_module.requests = None
try:
assert router._check_provider_available(provider) is True
finally:
cascade_module.requests = old_requests
def test_check_vllm_mlx_server_healthy(self):
"""Test vllm-mlx when health check succeeds."""
from unittest.mock import MagicMock, patch
router = CascadeRouter(config_path=Path("/nonexistent"))
provider = Provider(
name="vllm-mlx-local",
type="vllm_mlx",
enabled=True,
priority=2,
base_url="http://localhost:8000/v1",
)
mock_response = MagicMock()
mock_response.status_code = 200
with patch("infrastructure.router.cascade.requests") as mock_requests:
mock_requests.get.return_value = mock_response
result = router._check_provider_available(provider)
assert result is True
mock_requests.get.assert_called_once_with("http://localhost:8000/health", timeout=5)
def test_check_vllm_mlx_server_down(self):
"""Test vllm-mlx when server is not running."""
from unittest.mock import patch
router = CascadeRouter(config_path=Path("/nonexistent"))
provider = Provider(
name="vllm-mlx-local",
type="vllm_mlx",
enabled=True,
priority=2,
base_url="http://localhost:8000/v1",
)
with patch("infrastructure.router.cascade.requests") as mock_requests:
mock_requests.get.side_effect = ConnectionRefusedError("Connection refused")
result = router._check_provider_available(provider)
assert result is False
def test_check_vllm_mlx_default_url(self):
"""Test vllm-mlx uses default localhost:8000 when no URL configured."""
from unittest.mock import MagicMock, patch
router = CascadeRouter(config_path=Path("/nonexistent"))
provider = Provider(
name="vllm-mlx-local",
type="vllm_mlx",
enabled=True,
priority=2,
)
mock_response = MagicMock()
mock_response.status_code = 200
with patch("infrastructure.router.cascade.requests") as mock_requests:
mock_requests.get.return_value = mock_response
router._check_provider_available(provider)
mock_requests.get.assert_called_once_with("http://localhost:8000/health", timeout=5)
@pytest.mark.asyncio
class TestVllmMlxProvider:
"""Test vllm-mlx provider integration."""
async def test_complete_with_vllm_mlx(self):
"""Test successful completion via vllm-mlx."""
router = CascadeRouter(config_path=Path("/nonexistent"))
provider = Provider(
name="vllm-mlx-local",
type="vllm_mlx",
enabled=True,
priority=2,
base_url="http://localhost:8000/v1",
models=[{"name": "Qwen/Qwen2.5-14B-Instruct-MLX", "default": True}],
)
router.providers = [provider]
with patch.object(router, "_call_vllm_mlx") as mock_call:
mock_call.return_value = {
"content": "MLX response",
"model": "Qwen/Qwen2.5-14B-Instruct-MLX",
}
result = await router.complete(
messages=[{"role": "user", "content": "Hi"}],
)
assert result["content"] == "MLX response"
assert result["provider"] == "vllm-mlx-local"
assert result["model"] == "Qwen/Qwen2.5-14B-Instruct-MLX"
async def test_vllm_mlx_base_url_normalization(self):
"""Test _call_vllm_mlx appends /v1 when missing."""
from unittest.mock import AsyncMock, MagicMock, patch
router = CascadeRouter(config_path=Path("/nonexistent"))
provider = Provider(
name="vllm-mlx-local",
type="vllm_mlx",
enabled=True,
priority=2,
base_url="http://localhost:8000", # No /v1
models=[{"name": "qwen-mlx", "default": True}],
)
mock_choice = MagicMock()
mock_choice.message.content = "hello"
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.model = "qwen-mlx"
async def fake_create(**kwargs):
return mock_response
with patch("openai.AsyncOpenAI") as mock_openai_cls:
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(side_effect=fake_create)
mock_openai_cls.return_value = mock_client
await router._call_vllm_mlx(
provider=provider,
messages=[{"role": "user", "content": "hi"}],
model="qwen-mlx",
temperature=0.7,
max_tokens=None,
)
call_kwargs = mock_openai_cls.call_args
base_url_used = call_kwargs.kwargs.get("base_url") or call_kwargs[1].get("base_url")
assert base_url_used.endswith("/v1")
async def test_vllm_mlx_is_local_not_cloud(self):
"""Confirm vllm_mlx is not subject to metabolic protocol cloud skip."""
router = CascadeRouter(config_path=Path("/nonexistent"))
provider = Provider(
name="vllm-mlx-local",
type="vllm_mlx",
enabled=True,
priority=2,
base_url="http://localhost:8000/v1",
models=[{"name": "qwen-mlx", "default": True}],
)
router.providers = [provider]
# Quota monitor returns False (block cloud) — vllm_mlx should still be tried
with patch("infrastructure.router.cascade._quota_monitor") as mock_qm:
mock_qm.check.return_value = object()
mock_qm.should_use_cloud.return_value = False
with patch.object(router, "_call_vllm_mlx") as mock_call:
mock_call.return_value = {
"content": "Local MLX response",
"model": "qwen-mlx",
}
result = await router.complete(
messages=[{"role": "user", "content": "hi"}],
)
assert result["content"] == "Local MLX response"
class TestCascadeRouterReload:
"""Test hot-reload of providers.yaml."""

View File

@@ -175,9 +175,7 @@ async def test_bridge_run_simple_response():
bridge = MCPBridge(include_gitea=False, include_shell=False)
mock_resp = MagicMock()
mock_resp.json.return_value = {
"message": {"role": "assistant", "content": "Hello!"}
}
mock_resp.json.return_value = {"message": {"role": "assistant", "content": "Hello!"}}
mock_resp.raise_for_status = MagicMock()
mock_client = AsyncMock()
@@ -238,9 +236,7 @@ async def test_bridge_run_with_tool_call():
# Round 2: model returns final text
final_resp = MagicMock()
final_resp.json.return_value = {
"message": {"role": "assistant", "content": "Done with tools!"}
}
final_resp.json.return_value = {"message": {"role": "assistant", "content": "Done with tools!"}}
final_resp.raise_for_status = MagicMock()
mock_client = AsyncMock()
@@ -276,17 +272,13 @@ async def test_bridge_run_unknown_tool():
"message": {
"role": "assistant",
"content": "",
"tool_calls": [
{"function": {"name": "nonexistent", "arguments": {}}}
],
"tool_calls": [{"function": {"name": "nonexistent", "arguments": {}}}],
}
}
tool_call_resp.raise_for_status = MagicMock()
final_resp = MagicMock()
final_resp.json.return_value = {
"message": {"role": "assistant", "content": "OK"}
}
final_resp.json.return_value = {"message": {"role": "assistant", "content": "OK"}}
final_resp.raise_for_status = MagicMock()
mock_client = AsyncMock()
@@ -332,9 +324,7 @@ async def test_bridge_run_max_rounds():
"message": {
"role": "assistant",
"content": "",
"tool_calls": [
{"function": {"name": "loop_tool", "arguments": {}}}
],
"tool_calls": [{"function": {"name": "loop_tool", "arguments": {}}}],
}
}
tool_call_resp.raise_for_status = MagicMock()
@@ -365,9 +355,7 @@ async def test_bridge_run_connection_error():
bridge = MCPBridge(include_gitea=False, include_shell=False)
mock_client = AsyncMock()
mock_client.post = AsyncMock(
side_effect=httpx.ConnectError("Connection refused")
)
mock_client.post = AsyncMock(side_effect=httpx.ConnectError("Connection refused"))
mock_client.aclose = AsyncMock()
bridge._client = mock_client

View File

@@ -9,7 +9,6 @@ import pytest
from timmy.research_triage import (
ActionItem,
_parse_llm_response,
_resolve_label_ids,
_validate_action_item,
create_gitea_issue,
extract_action_items,
@@ -250,7 +249,9 @@ class TestCreateGiteaIssue:
with (
patch("timmy.research_triage.settings") as mock_settings,
patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[1]),
patch(
"timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[1]
),
patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
):
mock_settings.gitea_enabled = True
@@ -284,7 +285,9 @@ class TestCreateGiteaIssue:
with (
patch("timmy.research_triage.settings") as mock_settings,
patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]),
patch(
"timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]
),
patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
):
mock_settings.gitea_enabled = True
@@ -331,7 +334,9 @@ class TestTriageResearchReport:
with (
patch("timmy.research_triage.settings") as mock_settings,
patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]),
patch(
"timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]
),
patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
):
mock_settings.gitea_enabled = True

View File

@@ -14,7 +14,6 @@ from timmy.kimi_delegation import (
exceeds_local_capacity,
)
# ── Constants ─────────────────────────────────────────────────────────────────
@@ -455,9 +454,7 @@ class TestExtractAndCreateFollowups:
patch("config.settings", mock_settings),
patch("httpx.AsyncClient", return_value=async_ctx),
):
result = await extract_and_create_followups(
"1. Do the thing\n2. Do another thing", 10
)
result = await extract_and_create_followups("1. Do the thing\n2. Do another thing", 10)
assert result["success"] is True
assert 200 in result["created"]