From 04f87df29c699fd106aa95b7efb327cead7b109e Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Mon, 23 Mar 2026 11:25:36 -0400 Subject: [PATCH 1/2] feat: add vllm-mlx as OpenAI-compatible local inference backend (#1069) Adds vllm-mlx as a supported provider type in the cascade router. vllm-mlx offers 25-50% higher throughput than Ollama on Apple Silicon with ~15% lower memory usage, via a fully OpenAI-compatible API. Changes: - cascade.py: add `vllm_mlx` provider type with health check (strips /v1 suffix from base_url before calling /health) and `_call_vllm_mlx` method (reuses OpenAI client pointed at local server, no API key needed) - providers.yaml: add `vllm-mlx-local` entry (disabled by default, priority 2) with benchmark notes and setup instructions - test_router_cascade.py: tests for availability checks (no-requests fallback, healthy/down server, default URL) and completion routing (correct provider used, URL normalisation, metabolic-protocol bypass) Recommendation (documented in providers.yaml): prefer vllm-mlx over Ollama when throughput matters on Apple Silicon; stay on Ollama for broadest ecosystem compatibility and simpler setup. Refs #1069 Co-Authored-By: Claude Sonnet 4.6 --- config/providers.yaml | 23 +++ src/infrastructure/router/cascade.py | 66 +++++++ tests/infrastructure/test_router_cascade.py | 195 ++++++++++++++++++++ 3 files changed, 284 insertions(+) diff --git a/config/providers.yaml b/config/providers.yaml index aeb1c720..c90256fc 100644 --- a/config/providers.yaml +++ b/config/providers.yaml @@ -67,6 +67,29 @@ providers: capabilities: [text, creative, streaming] description: "Dolphin 3.0 8B with Morrowind system prompt and higher temperature" + # Secondary: vllm-mlx (OpenAI-compatible local backend, 25–50% faster than Ollama on Apple Silicon) + # Evaluation results (EuroMLSys '26 / M3 Ultra benchmarks): + # - 21–87% higher throughput than llama.cpp across configurations + # - +38% to +59% speed advantage vs Ollama on M3 Ultra for Qwen3-14B + # - ~15% lower memory usage than Ollama + # - Full OpenAI-compatible API — tool calling works identically + # Recommendation: Use over Ollama when throughput matters and Apple Silicon is available. + # Stay on Ollama for broadest ecosystem compatibility and simpler setup. + # To enable: start vllm-mlx server (`python -m vllm.entrypoints.openai.api_server + # --model Qwen/Qwen2.5-14B-Instruct-MLX --port 8000`) then set enabled: true. + - name: vllm-mlx-local + type: vllm_mlx + enabled: false # Enable when vllm-mlx server is running + priority: 2 + base_url: "http://localhost:8000/v1" + models: + - name: Qwen/Qwen2.5-14B-Instruct-MLX + default: true + context_window: 32000 + capabilities: [text, tools, json, streaming] + - name: mlx-community/Qwen2.5-7B-Instruct-4bit + context_window: 32000 + capabilities: [text, tools, json, streaming] # Tertiary: OpenAI (if API key available) - name: openai-backup diff --git a/src/infrastructure/router/cascade.py b/src/infrastructure/router/cascade.py index c6798cb4..b9c07514 100644 --- a/src/infrastructure/router/cascade.py +++ b/src/infrastructure/router/cascade.py @@ -310,6 +310,22 @@ class CascadeRouter: logger.debug("Ollama provider check error: %s", exc) return False + elif provider.type == "vllm_mlx": + # Check if local vllm-mlx server is running (OpenAI-compatible) + if requests is None: + return True + try: + base_url = provider.base_url or provider.url or "http://localhost:8000" + # Strip /v1 suffix — health endpoint is at the root + server_root = base_url.rstrip("/") + if server_root.endswith("/v1"): + server_root = server_root[:-3] + response = requests.get(f"{server_root}/health", timeout=5) + return response.status_code == 200 + except Exception as exc: + logger.debug("vllm-mlx provider check error: %s", exc) + return False + elif provider.type in ("openai", "anthropic", "grok"): # Check if API key is set return provider.api_key is not None and provider.api_key != "" @@ -619,6 +635,14 @@ class CascadeRouter: temperature=temperature, max_tokens=max_tokens, ) + elif provider.type == "vllm_mlx": + result = await self._call_vllm_mlx( + provider=provider, + messages=messages, + model=model or provider.get_default_model(), + temperature=temperature, + max_tokens=max_tokens, + ) else: raise ValueError(f"Unknown provider type: {provider.type}") @@ -815,6 +839,48 @@ class CascadeRouter: "model": response.model, } + async def _call_vllm_mlx( + self, + provider: Provider, + messages: list[dict], + model: str, + temperature: float, + max_tokens: int | None, + ) -> dict: + """Call vllm-mlx via its OpenAI-compatible API. + + vllm-mlx exposes the same /v1/chat/completions endpoint as OpenAI, + so we reuse the OpenAI client pointed at the local server. + No API key is required for local deployments. + """ + import openai + + base_url = provider.base_url or provider.url or "http://localhost:8000" + # Ensure the base_url ends with /v1 as expected by the OpenAI client + if not base_url.rstrip("/").endswith("/v1"): + base_url = base_url.rstrip("/") + "/v1" + + client = openai.AsyncOpenAI( + api_key=provider.api_key or "no-key-required", + base_url=base_url, + timeout=self.config.timeout_seconds, + ) + + kwargs: dict = { + "model": model, + "messages": messages, + "temperature": temperature, + } + if max_tokens: + kwargs["max_tokens"] = max_tokens + + response = await client.chat.completions.create(**kwargs) + + return { + "content": response.choices[0].message.content, + "model": response.model, + } + def _record_success(self, provider: Provider, latency_ms: float) -> None: """Record a successful request.""" provider.metrics.total_requests += 1 diff --git a/tests/infrastructure/test_router_cascade.py b/tests/infrastructure/test_router_cascade.py index cab475fa..29140659 100644 --- a/tests/infrastructure/test_router_cascade.py +++ b/tests/infrastructure/test_router_cascade.py @@ -489,6 +489,201 @@ class TestProviderAvailabilityCheck: assert router._check_provider_available(provider) is False + def test_check_vllm_mlx_without_requests(self): + """Test vllm-mlx returns True when requests not available (fallback).""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="vllm-mlx-local", + type="vllm_mlx", + enabled=True, + priority=2, + base_url="http://localhost:8000/v1", + ) + + import infrastructure.router.cascade as cascade_module + + old_requests = cascade_module.requests + cascade_module.requests = None + try: + assert router._check_provider_available(provider) is True + finally: + cascade_module.requests = old_requests + + def test_check_vllm_mlx_server_healthy(self): + """Test vllm-mlx when health check succeeds.""" + from unittest.mock import MagicMock, patch + + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="vllm-mlx-local", + type="vllm_mlx", + enabled=True, + priority=2, + base_url="http://localhost:8000/v1", + ) + + mock_response = MagicMock() + mock_response.status_code = 200 + + with patch("infrastructure.router.cascade.requests") as mock_requests: + mock_requests.get.return_value = mock_response + result = router._check_provider_available(provider) + + assert result is True + mock_requests.get.assert_called_once_with("http://localhost:8000/health", timeout=5) + + def test_check_vllm_mlx_server_down(self): + """Test vllm-mlx when server is not running.""" + from unittest.mock import patch + + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="vllm-mlx-local", + type="vllm_mlx", + enabled=True, + priority=2, + base_url="http://localhost:8000/v1", + ) + + with patch("infrastructure.router.cascade.requests") as mock_requests: + mock_requests.get.side_effect = ConnectionRefusedError("Connection refused") + result = router._check_provider_available(provider) + + assert result is False + + def test_check_vllm_mlx_default_url(self): + """Test vllm-mlx uses default localhost:8000 when no URL configured.""" + from unittest.mock import MagicMock, patch + + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="vllm-mlx-local", + type="vllm_mlx", + enabled=True, + priority=2, + ) + + mock_response = MagicMock() + mock_response.status_code = 200 + + with patch("infrastructure.router.cascade.requests") as mock_requests: + mock_requests.get.return_value = mock_response + router._check_provider_available(provider) + + mock_requests.get.assert_called_once_with("http://localhost:8000/health", timeout=5) + + +@pytest.mark.asyncio +class TestVllmMlxProvider: + """Test vllm-mlx provider integration.""" + + async def test_complete_with_vllm_mlx(self): + """Test successful completion via vllm-mlx.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="vllm-mlx-local", + type="vllm_mlx", + enabled=True, + priority=2, + base_url="http://localhost:8000/v1", + models=[{"name": "Qwen/Qwen2.5-14B-Instruct-MLX", "default": True}], + ) + router.providers = [provider] + + with patch.object(router, "_call_vllm_mlx") as mock_call: + mock_call.return_value = { + "content": "MLX response", + "model": "Qwen/Qwen2.5-14B-Instruct-MLX", + } + + result = await router.complete( + messages=[{"role": "user", "content": "Hi"}], + ) + + assert result["content"] == "MLX response" + assert result["provider"] == "vllm-mlx-local" + assert result["model"] == "Qwen/Qwen2.5-14B-Instruct-MLX" + + async def test_vllm_mlx_base_url_normalization(self): + """Test _call_vllm_mlx appends /v1 when missing.""" + from unittest.mock import AsyncMock, MagicMock, patch + + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="vllm-mlx-local", + type="vllm_mlx", + enabled=True, + priority=2, + base_url="http://localhost:8000", # No /v1 + models=[{"name": "qwen-mlx", "default": True}], + ) + + mock_choice = MagicMock() + mock_choice.message.content = "hello" + mock_response = MagicMock() + mock_response.choices = [mock_choice] + mock_response.model = "qwen-mlx" + + captured_base_url = [] + + async def fake_create(**kwargs): + return mock_response + + with patch("openai.AsyncOpenAI") as mock_openai_cls: + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock(side_effect=fake_create) + mock_openai_cls.return_value = mock_client + + await router._call_vllm_mlx( + provider=provider, + messages=[{"role": "user", "content": "hi"}], + model="qwen-mlx", + temperature=0.7, + max_tokens=None, + ) + + call_kwargs = mock_openai_cls.call_args + base_url_used = call_kwargs.kwargs.get("base_url") or call_kwargs[1].get( + "base_url" + ) + assert base_url_used.endswith("/v1") + + async def test_vllm_mlx_is_local_not_cloud(self): + """Confirm vllm_mlx is not subject to metabolic protocol cloud skip.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="vllm-mlx-local", + type="vllm_mlx", + enabled=True, + priority=2, + base_url="http://localhost:8000/v1", + models=[{"name": "qwen-mlx", "default": True}], + ) + router.providers = [provider] + + # Quota monitor returns False (block cloud) — vllm_mlx should still be tried + with patch("infrastructure.router.cascade._quota_monitor") as mock_qm: + mock_qm.check.return_value = object() + mock_qm.should_use_cloud.return_value = False + + with patch.object(router, "_call_vllm_mlx") as mock_call: + mock_call.return_value = { + "content": "Local MLX response", + "model": "qwen-mlx", + } + result = await router.complete( + messages=[{"role": "user", "content": "hi"}], + ) + + assert result["content"] == "Local MLX response" + class TestCascadeRouterReload: """Test hot-reload of providers.yaml.""" -- 2.43.0 From 28b16b166eca1e3ae3a847553c44e88a5dd946cd Mon Sep 17 00:00:00 2001 From: Hermes Date: Mon, 23 Mar 2026 11:33:12 -0400 Subject: [PATCH 2/2] fix: resolve lint errors across codebase (ruff UP042, UP045, UP017, E741, F841, F401, I001) --- src/infrastructure/claude_quota.py | 52 +++++++++++---------- src/timmy/kimi_delegation.py | 6 +-- src/timmy/research_triage.py | 8 ++-- src/timmy_serve/cli.py | 4 +- tests/dashboard/test_health.py | 17 ++++--- tests/infrastructure/test_claude_quota.py | 18 ++++--- tests/infrastructure/test_router_cascade.py | 6 +-- tests/timmy/test_mcp_bridge.py | 24 +++------- tests/timmy/test_research_triage.py | 13 ++++-- tests/unit/test_kimi_delegation.py | 5 +- 10 files changed, 71 insertions(+), 82 deletions(-) diff --git a/src/infrastructure/claude_quota.py b/src/infrastructure/claude_quota.py index 43b77763..90f915b2 100644 --- a/src/infrastructure/claude_quota.py +++ b/src/infrastructure/claude_quota.py @@ -25,18 +25,17 @@ import logging import subprocess import urllib.request from dataclasses import dataclass -from datetime import datetime, timezone -from enum import Enum -from typing import Optional +from datetime import UTC, datetime +from enum import StrEnum logger = logging.getLogger(__name__) -class MetabolicTier(str, Enum): +class MetabolicTier(StrEnum): """The three-tier metabolic protocol from the Timmy Time architecture.""" - BURST = "burst" # Cloud API (Claude/Groq) — expensive, best quality - ACTIVE = "active" # Local 14B (Qwen3-14B) — free, good quality + BURST = "burst" # Cloud API (Claude/Groq) — expensive, best quality + ACTIVE = "active" # Local 14B (Qwen3-14B) — free, good quality RESTING = "resting" # Local 8B (Qwen3-8B) — free, fast, adequate @@ -44,10 +43,10 @@ class MetabolicTier(str, Enum): class QuotaStatus: """Current Claude quota state.""" - five_hour_utilization: float # 0.0 to 1.0 - five_hour_resets_at: Optional[str] - seven_day_utilization: float # 0.0 to 1.0 - seven_day_resets_at: Optional[str] + five_hour_utilization: float # 0.0 to 1.0 + five_hour_resets_at: str | None + seven_day_utilization: float # 0.0 to 1.0 + seven_day_resets_at: str | None raw_response: dict fetched_at: datetime @@ -101,11 +100,11 @@ class QuotaMonitor: USER_AGENT = "claude-code/2.0.32" def __init__(self) -> None: - self._token: Optional[str] = None - self._last_status: Optional[QuotaStatus] = None + self._token: str | None = None + self._last_status: QuotaStatus | None = None self._cache_seconds = 30 # Don't hammer the API - def _get_token(self) -> Optional[str]: + def _get_token(self) -> str | None: """Extract OAuth token from macOS Keychain.""" if self._token: return self._token @@ -126,11 +125,16 @@ class QuotaMonitor: self._token = oauth.get("accessToken") return self._token - except (json.JSONDecodeError, KeyError, FileNotFoundError, subprocess.TimeoutExpired) as exc: + except ( + json.JSONDecodeError, + KeyError, + FileNotFoundError, + subprocess.TimeoutExpired, + ) as exc: logger.warning("Could not read Claude Code credentials: %s", exc) return None - def check(self, force: bool = False) -> Optional[QuotaStatus]: + def check(self, force: bool = False) -> QuotaStatus | None: """ Fetch current quota status. @@ -139,7 +143,7 @@ class QuotaMonitor: """ # Return cached if fresh if not force and self._last_status: - age = (datetime.now(timezone.utc) - self._last_status.fetched_at).total_seconds() + age = (datetime.now(UTC) - self._last_status.fetched_at).total_seconds() if age < self._cache_seconds: return self._last_status @@ -170,7 +174,7 @@ class QuotaMonitor: seven_day_utilization=float(seven_day.get("utilization", 0.0)), seven_day_resets_at=seven_day.get("resets_at"), raw_response=data, - fetched_at=datetime.now(timezone.utc), + fetched_at=datetime.now(UTC), ) return self._last_status @@ -195,13 +199,13 @@ class QuotaMonitor: tier = status.recommended_tier if tier == MetabolicTier.BURST and task_complexity == "high": - return "claude-sonnet-4-6" # Cloud — best quality + return "claude-sonnet-4-6" # Cloud — best quality elif tier == MetabolicTier.BURST and task_complexity == "medium": - return "qwen3:14b" # Save cloud for truly hard tasks + return "qwen3:14b" # Save cloud for truly hard tasks elif tier == MetabolicTier.ACTIVE: - return "qwen3:14b" # Local 14B — good enough + return "qwen3:14b" # Local 14B — good enough else: # RESTING - return "qwen3:8b" # Local 8B — conserve everything + return "qwen3:8b" # Local 8B — conserve everything def should_use_cloud(self, task_value: str = "normal") -> bool: """ @@ -224,14 +228,14 @@ class QuotaMonitor: return False # Never waste cloud on routine -def _time_remaining(reset_at: Optional[str]) -> str: +def _time_remaining(reset_at: str | None) -> str: """Format time until reset as human-readable string.""" if not reset_at or reset_at == "null": return "unknown" try: reset = datetime.fromisoformat(reset_at.replace("Z", "+00:00")) - now = datetime.now(timezone.utc) + now = datetime.now(UTC) diff = reset - now if diff.total_seconds() <= 0: @@ -249,7 +253,7 @@ def _time_remaining(reset_at: Optional[str]) -> str: # Module-level singleton -_quota_monitor: Optional[QuotaMonitor] = None +_quota_monitor: QuotaMonitor | None = None def get_quota_monitor() -> QuotaMonitor: diff --git a/src/timmy/kimi_delegation.py b/src/timmy/kimi_delegation.py index 7b910302..671cce84 100644 --- a/src/timmy/kimi_delegation.py +++ b/src/timmy/kimi_delegation.py @@ -299,9 +299,7 @@ async def poll_kimi_issue( "error": None, } else: - logger.warning( - "Poll issue #%s returned %s", issue_number, resp.status_code - ) + logger.warning("Poll issue #%s returned %s", issue_number, resp.status_code) except Exception as exc: logger.warning("Poll error for issue #%s: %s", issue_number, exc) @@ -332,7 +330,7 @@ def _extract_action_items(text: str) -> list[str]: items: list[str] = [] patterns = [ re.compile(r"^[-*]\s+\[ \]\s+(.+)", re.MULTILINE), # - [ ] checkbox - re.compile(r"^\d+\.\s+(.+)", re.MULTILINE), # 1. numbered list + re.compile(r"^\d+\.\s+(.+)", re.MULTILINE), # 1. numbered list re.compile(r"^(?:Action|TODO|Next step):\s*(.+)", re.MULTILINE | re.IGNORECASE), ] seen: set[str] = set() diff --git a/src/timmy/research_triage.py b/src/timmy/research_triage.py index 2af0df32..9a1344ac 100644 --- a/src/timmy/research_triage.py +++ b/src/timmy/research_triage.py @@ -54,9 +54,7 @@ class ActionItem: parts.append(f"- {url}") if source_issue: - parts.append( - f"\n### Origin\nExtracted from research in #{source_issue}" - ) + parts.append(f"\n### Origin\nExtracted from research in #{source_issue}") parts.append("\n---\n*Auto-triaged from research findings by Timmy*") return "\n".join(parts) @@ -123,7 +121,7 @@ def _validate_action_item(raw_item: dict[str, Any]) -> ActionItem | None: labels = raw_item.get("labels", []) if isinstance(labels, str): - labels = [l.strip() for l in labels.split(",") if l.strip()] + labels = [lbl.strip() for lbl in labels.split(",") if lbl.strip()] if not isinstance(labels, list): labels = [] @@ -303,7 +301,7 @@ async def _resolve_label_ids( if resp.status_code != 200: return [] - existing = {l["name"]: l["id"] for l in resp.json()} + existing = {lbl["name"]: lbl["id"] for lbl in resp.json()} label_ids = [] for name in label_names: diff --git a/src/timmy_serve/cli.py b/src/timmy_serve/cli.py index f9d8b24a..0f4fd16d 100644 --- a/src/timmy_serve/cli.py +++ b/src/timmy_serve/cli.py @@ -14,7 +14,9 @@ app = typer.Typer(help="Timmy Serve — sovereign AI agent API") def start( port: int = typer.Option(8402, "--port", "-p", help="Port for the serve API"), host: str = typer.Option("0.0.0.0", "--host", "-h", help="Host to bind to"), - price: int = typer.Option(None, "--price", help="Price per request in sats (default: from config)"), + price: int = typer.Option( + None, "--price", help="Price per request in sats (default: from config)" + ), dry_run: bool = typer.Option(False, "--dry-run", help="Print config and exit (for testing)"), ): """Start Timmy in serve mode.""" diff --git a/tests/dashboard/test_health.py b/tests/dashboard/test_health.py index 7d6eded3..9f83f91e 100644 --- a/tests/dashboard/test_health.py +++ b/tests/dashboard/test_health.py @@ -24,7 +24,6 @@ from dashboard.routes.health import ( _generate_recommendations, ) - # --------------------------------------------------------------------------- # Pydantic models # --------------------------------------------------------------------------- @@ -118,7 +117,9 @@ class TestGenerateRecommendations: def test_unavailable_service(self): deps = [ - DependencyStatus(name="Ollama AI", status="unavailable", sovereignty_score=10, details={}) + DependencyStatus( + name="Ollama AI", status="unavailable", sovereignty_score=10, details={} + ) ] recs = _generate_recommendations(deps) assert any("Ollama AI is unavailable" in r for r in recs) @@ -137,9 +138,7 @@ class TestGenerateRecommendations: def test_degraded_non_lightning(self): """Degraded non-Lightning dep produces no specific recommendation.""" - deps = [ - DependencyStatus(name="Redis", status="degraded", sovereignty_score=5, details={}) - ] + deps = [DependencyStatus(name="Redis", status="degraded", sovereignty_score=5, details={})] recs = _generate_recommendations(deps) assert recs == ["System operating optimally - all dependencies healthy"] @@ -379,7 +378,9 @@ class TestHealthEndpoint: assert response.status_code == 200 def test_ok_when_ollama_up(self, client): - with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True): + with patch( + "dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True + ): data = client.get("/health").json() assert data["status"] == "ok" @@ -415,7 +416,9 @@ class TestHealthStatusPanel: assert "text/html" in response.headers["content-type"] def test_shows_up_when_ollama_healthy(self, client): - with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True): + with patch( + "dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True + ): text = client.get("/health/status").text assert "UP" in text diff --git a/tests/infrastructure/test_claude_quota.py b/tests/infrastructure/test_claude_quota.py index c1b7b04e..d6bd8b1f 100644 --- a/tests/infrastructure/test_claude_quota.py +++ b/tests/infrastructure/test_claude_quota.py @@ -1,9 +1,7 @@ """Tests for Claude Quota Monitor and Metabolic Protocol.""" -from datetime import datetime, timedelta, timezone -from unittest.mock import MagicMock, patch - -import pytest +from datetime import UTC, datetime, timedelta +from unittest.mock import patch from infrastructure.claude_quota import ( MetabolicTier, @@ -22,7 +20,7 @@ def _make_status(five_hour: float = 0.0, seven_day: float = 0.0) -> QuotaStatus: seven_day_utilization=seven_day, seven_day_resets_at=None, raw_response={}, - fetched_at=datetime.now(timezone.utc), + fetched_at=datetime.now(UTC), ) @@ -104,25 +102,25 @@ class TestTimeRemaining: assert _time_remaining("") == "unknown" def test_past_time_returns_resetting_now(self): - past = (datetime.now(timezone.utc) - timedelta(hours=1)).isoformat() + past = (datetime.now(UTC) - timedelta(hours=1)).isoformat() assert _time_remaining(past) == "resetting now" def test_future_time_hours_and_minutes(self): - future = (datetime.now(timezone.utc) + timedelta(hours=2, minutes=15)).isoformat() + future = (datetime.now(UTC) + timedelta(hours=2, minutes=15)).isoformat() result = _time_remaining(future) assert "2h" in result # Minutes may vary ±1 due to test execution time assert "m" in result def test_future_time_minutes_only(self): - future = (datetime.now(timezone.utc) + timedelta(minutes=45)).isoformat() + future = (datetime.now(UTC) + timedelta(minutes=45)).isoformat() result = _time_remaining(future) assert "h" not in result # Minutes may vary ±1 due to test execution time assert "m" in result def test_z_suffix_handled(self): - future = (datetime.now(timezone.utc) + timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%SZ") + future = (datetime.now(UTC) + timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%SZ") result = _time_remaining(future) assert result != "unknown" @@ -238,7 +236,7 @@ class TestQuotaMonitorCaching: def test_stale_cache_triggers_fetch(self): monitor = QuotaMonitor() - old_time = datetime.now(timezone.utc) - timedelta(seconds=60) + old_time = datetime.now(UTC) - timedelta(seconds=60) stale_status = QuotaStatus( five_hour_utilization=0.10, five_hour_resets_at=None, diff --git a/tests/infrastructure/test_router_cascade.py b/tests/infrastructure/test_router_cascade.py index 29140659..fc55f6e6 100644 --- a/tests/infrastructure/test_router_cascade.py +++ b/tests/infrastructure/test_router_cascade.py @@ -630,8 +630,6 @@ class TestVllmMlxProvider: mock_response.choices = [mock_choice] mock_response.model = "qwen-mlx" - captured_base_url = [] - async def fake_create(**kwargs): return mock_response @@ -649,9 +647,7 @@ class TestVllmMlxProvider: ) call_kwargs = mock_openai_cls.call_args - base_url_used = call_kwargs.kwargs.get("base_url") or call_kwargs[1].get( - "base_url" - ) + base_url_used = call_kwargs.kwargs.get("base_url") or call_kwargs[1].get("base_url") assert base_url_used.endswith("/v1") async def test_vllm_mlx_is_local_not_cloud(self): diff --git a/tests/timmy/test_mcp_bridge.py b/tests/timmy/test_mcp_bridge.py index d11b5c63..45d218e1 100644 --- a/tests/timmy/test_mcp_bridge.py +++ b/tests/timmy/test_mcp_bridge.py @@ -175,9 +175,7 @@ async def test_bridge_run_simple_response(): bridge = MCPBridge(include_gitea=False, include_shell=False) mock_resp = MagicMock() - mock_resp.json.return_value = { - "message": {"role": "assistant", "content": "Hello!"} - } + mock_resp.json.return_value = {"message": {"role": "assistant", "content": "Hello!"}} mock_resp.raise_for_status = MagicMock() mock_client = AsyncMock() @@ -238,9 +236,7 @@ async def test_bridge_run_with_tool_call(): # Round 2: model returns final text final_resp = MagicMock() - final_resp.json.return_value = { - "message": {"role": "assistant", "content": "Done with tools!"} - } + final_resp.json.return_value = {"message": {"role": "assistant", "content": "Done with tools!"}} final_resp.raise_for_status = MagicMock() mock_client = AsyncMock() @@ -276,17 +272,13 @@ async def test_bridge_run_unknown_tool(): "message": { "role": "assistant", "content": "", - "tool_calls": [ - {"function": {"name": "nonexistent", "arguments": {}}} - ], + "tool_calls": [{"function": {"name": "nonexistent", "arguments": {}}}], } } tool_call_resp.raise_for_status = MagicMock() final_resp = MagicMock() - final_resp.json.return_value = { - "message": {"role": "assistant", "content": "OK"} - } + final_resp.json.return_value = {"message": {"role": "assistant", "content": "OK"}} final_resp.raise_for_status = MagicMock() mock_client = AsyncMock() @@ -332,9 +324,7 @@ async def test_bridge_run_max_rounds(): "message": { "role": "assistant", "content": "", - "tool_calls": [ - {"function": {"name": "loop_tool", "arguments": {}}} - ], + "tool_calls": [{"function": {"name": "loop_tool", "arguments": {}}}], } } tool_call_resp.raise_for_status = MagicMock() @@ -365,9 +355,7 @@ async def test_bridge_run_connection_error(): bridge = MCPBridge(include_gitea=False, include_shell=False) mock_client = AsyncMock() - mock_client.post = AsyncMock( - side_effect=httpx.ConnectError("Connection refused") - ) + mock_client.post = AsyncMock(side_effect=httpx.ConnectError("Connection refused")) mock_client.aclose = AsyncMock() bridge._client = mock_client diff --git a/tests/timmy/test_research_triage.py b/tests/timmy/test_research_triage.py index 66407373..8bf092b7 100644 --- a/tests/timmy/test_research_triage.py +++ b/tests/timmy/test_research_triage.py @@ -9,7 +9,6 @@ import pytest from timmy.research_triage import ( ActionItem, _parse_llm_response, - _resolve_label_ids, _validate_action_item, create_gitea_issue, extract_action_items, @@ -250,7 +249,9 @@ class TestCreateGiteaIssue: with ( patch("timmy.research_triage.settings") as mock_settings, - patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[1]), + patch( + "timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[1] + ), patch("timmy.research_triage.httpx.AsyncClient") as mock_cls, ): mock_settings.gitea_enabled = True @@ -284,7 +285,9 @@ class TestCreateGiteaIssue: with ( patch("timmy.research_triage.settings") as mock_settings, - patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]), + patch( + "timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[] + ), patch("timmy.research_triage.httpx.AsyncClient") as mock_cls, ): mock_settings.gitea_enabled = True @@ -331,7 +334,9 @@ class TestTriageResearchReport: with ( patch("timmy.research_triage.settings") as mock_settings, - patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]), + patch( + "timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[] + ), patch("timmy.research_triage.httpx.AsyncClient") as mock_cls, ): mock_settings.gitea_enabled = True diff --git a/tests/unit/test_kimi_delegation.py b/tests/unit/test_kimi_delegation.py index 38b2a163..14227298 100644 --- a/tests/unit/test_kimi_delegation.py +++ b/tests/unit/test_kimi_delegation.py @@ -14,7 +14,6 @@ from timmy.kimi_delegation import ( exceeds_local_capacity, ) - # ── Constants ───────────────────────────────────────────────────────────────── @@ -455,9 +454,7 @@ class TestExtractAndCreateFollowups: patch("config.settings", mock_settings), patch("httpx.AsyncClient", return_value=async_ctx), ): - result = await extract_and_create_followups( - "1. Do the thing\n2. Do another thing", 10 - ) + result = await extract_and_create_followups("1. Do the thing\n2. Do another thing", 10) assert result["success"] is True assert 200 in result["created"] -- 2.43.0