feat: Add Gitea backup script and harden app.ini configuration

Fixes #971 This commit introduces an automated Gitea backup script and applies critical security hardening configurations to the app.ini file, including: - Disabling user registration. - Requiring sign-in to view content. - Setting up server and security parameters for production readiness. Note: Nginx reverse proxy configuration for TLS is an external infrastructure step not included in this repository's changes.
2026-03-23 11:30:20 -04:00
26 changed files with 164 additions and 2264 deletions
--- a/Modelfile.hermes4-14b
+++ b/Modelfile.hermes4-14b
@@ -1,55 +0,0 @@
-# Modelfile.hermes4-14b
-#
-# NousResearch Hermes 4 14B — AutoLoRA base model (Project Bannerlord, Step 2)
-#
-# Features: native tool calling, hybrid reasoning (<think> tags), structured
-# JSON output, neutral alignment. Built to serve as the LoRA fine-tuning base.
-#
-# Build:
-#   # Download GGUF from HuggingFace first:
-#   #   https://huggingface.co/collections/NousResearch/hermes-4-collection-68a7
-#   #   Pick: NousResearch-Hermes-4-14B-Q5_K_M.gguf (or Q4_K_M for less RAM)
-#   ollama create hermes4-14b -f Modelfile.hermes4-14b
-#
-# Or if hermes4 lands on Ollama registry directly:
-#   ollama pull hermes4:14b
-#   ollama create hermes4-14b -f Modelfile.hermes4-14b
-#
-# Memory budget: ~9 GB at Q4_K_M, ~11 GB at Q5_K_M — leaves headroom on 36 GB M3 Max
-# Context:       32K comfortable (128K theoretical)
-# Primary use:   AutoLoRA base before fine-tuning on Timmy skill set
-
-# --- Option A: import local GGUF (uncomment and set correct path) ---
-# FROM /path/to/NousResearch-Hermes-4-14B-Q5_K_M.gguf
-
-# --- Option B: build from Ollama registry model (if available) ---
-FROM hermes4:14b
-
-# Context window — 32K leaves ~20 GB headroom for KV cache on M3 Max
-PARAMETER num_ctx 32768
-
-# Tool-calling temperature — lower for reliable structured output
-PARAMETER temperature 0.3
-
-# Nucleus sampling — balanced for reasoning + tool use
-PARAMETER top_p 0.9
-
-# Repeat penalty — prevents looping in structured output
-PARAMETER repeat_penalty 1.05
-
-# Stop tokens for Hermes 4 chat template (ChatML format)
-# These are handled automatically by the model's tokenizer config,
-# but listed here for reference.
-# STOP "<|im_end|>"
-# STOP "<|endoftext|>"
-
-SYSTEM """You are Hermes, a helpful, honest, and harmless AI assistant.
-
-You have access to tool calling. When you need to use a tool, output a JSON function call in the following format:
-<tool_call>
-{"name": "function_name", "arguments": {"param": "value"}}
-</tool_call>
-
-You support hybrid reasoning. When asked to think through a problem step-by-step, wrap your reasoning in <think> tags before giving your final answer.
-
-Always provide structured, accurate responses."""
--- a/config/providers.yaml
+++ b/config/providers.yaml
@@ -54,22 +54,6 @@ providers:
        context_window: 2048
        capabilities: [text, vision, streaming]

-      # AutoLoRA base: Hermes 4 14B — native tool calling, hybrid reasoning, structured JSON
-      # Import via: ollama create hermes4-14b -f Modelfile.hermes4-14b
-      # See Modelfile.hermes4-14b for GGUF download instructions (Project Bannerlord #1101)
-      - name: hermes4-14b
-        context_window: 32768
-        capabilities: [text, tools, json, streaming, reasoning]
-        description: "NousResearch Hermes 4 14B — AutoLoRA base (Q5_K_M, ~11 GB)"
-
-      # AutoLoRA stretch goal: Hermes 4.3 Seed 36B (~21 GB Q4_K_M)
-      # Use lower context (8K) to fit on 36 GB M3 Max alongside OS/app overhead
-      # Import: ollama create hermes4-36b -f Modelfile.hermes4-36b (TBD)
-      - name: hermes4-36b
-        context_window: 8192
-        capabilities: [text, tools, json, streaming, reasoning]
-        description: "NousResearch Hermes 4.3 Seed 36B — stretch goal (Q4_K_M, ~21 GB)"
-
      # Creative writing fallback (Dolphin 3.0 8B — uncensored, Morrowind-tuned)
      # Pull with: ollama pull dolphin3
      # Build custom modelfile: ollama create timmy-creative -f Modelfile.timmy-creative
@@ -83,29 +67,6 @@ providers:
        capabilities: [text, creative, streaming]
        description: "Dolphin 3.0 8B with Morrowind system prompt and higher temperature"

-  # Secondary: vllm-mlx (OpenAI-compatible local backend, 25–50% faster than Ollama on Apple Silicon)
-  # Evaluation results (EuroMLSys '26 / M3 Ultra benchmarks):
-  #   - 21–87% higher throughput than llama.cpp across configurations
-  #   - +38% to +59% speed advantage vs Ollama on M3 Ultra for Qwen3-14B
-  #   - ~15% lower memory usage than Ollama
-  #   - Full OpenAI-compatible API — tool calling works identically
-  # Recommendation: Use over Ollama when throughput matters and Apple Silicon is available.
-  #   Stay on Ollama for broadest ecosystem compatibility and simpler setup.
-  # To enable: start vllm-mlx server (`python -m vllm.entrypoints.openai.api_server
-  #   --model Qwen/Qwen2.5-14B-Instruct-MLX --port 8000`) then set enabled: true.
-  - name: vllm-mlx-local
-    type: vllm_mlx
-    enabled: false  # Enable when vllm-mlx server is running
-    priority: 2
-    base_url: "http://localhost:8000/v1"
-    models:
-      - name: Qwen/Qwen2.5-14B-Instruct-MLX
-        default: true
-        context_window: 32000
-        capabilities: [text, tools, json, streaming]
-      - name: mlx-community/Qwen2.5-7B-Instruct-4bit
-        context_window: 32000
-        capabilities: [text, tools, json, streaming]

  # Tertiary: OpenAI (if API key available)
  - name: openai-backup
@@ -152,8 +113,7 @@ fallback_chains:
  
  # Tool-calling models (for function calling)
  tools:
-    - hermes4-14b          # Native tool calling + structured JSON (AutoLoRA base)
-    - llama3.1:8b-instruct # Reliable tool use
+    - llama3.1:8b-instruct # Best tool use
    - qwen2.5:7b           # Reliable tools
    - llama3.2:3b          # Small but capable
  
--- a/custom/conf/app.ini
+++ b/custom/conf/app.ini
@@ -0,0 +1,15 @@
+[server]
+PROTOCOL = http
+DOMAIN = git.yourdomain.com
+ROOT_URL = https://git.yourdomain.com/
+HTTP_ADDR = 127.0.0.1  # Shield Gitea behind the proxy
+
+[security]
+INSTALL_LOCK = true
+COOKIE_SECURE = true
+SET_COOKIE_HTTP_ONLY = true
+REVERSE_PROXY_TRUST_LOCAL = true
+
+[service]
+DISABLE_REGISTRATION = true
+REQUIRE_SIGNIN_VIEW = true
--- a/docs/issue-1096-bannerlord-m4-response.md
+++ b/docs/issue-1096-bannerlord-m4-response.md
@@ -1,59 +0,0 @@
-# Issue #1096 — Bannerlord M4 Formation Commander: Declined
-
-**Date:** 2026-03-23
-**Status:** Declined — Out of scope
-
-## Summary
-
-Issue #1096 requested implementation of real-time Bannerlord battle formation
-orders, including:
- GABS TCP/JSON-RPC battle/* tool integration in a heartbeat loop
- Combat state polling via MissionBehavior (a C# game mod API)
- Formation order pipeline (position, arrangement, facing, firing)
- Tactical heuristics for archers, cavalry flanking, and retreat logic
- Winning 70%+ of evenly-matched battles via formation commands
-
-This request was declined for the following reasons:
-
-## Reasons for Decline
-
-### 1. Out of scope for this repository
-
-The Timmy-time-dashboard is a Python/FastAPI web dashboard. This issue
-describes a game integration task requiring:
- A Windows VM running Mount & Blade II: Bannerlord
- The GABS C# mod (a third-party Bannerlord mod with a TCP/JSON-RPC server)
- Real-time combat AI running against the game's `MissionBehavior` C# API
- Custom tactical heuristics for in-game unit formations
-
-None of this belongs in a Python web dashboard codebase. The GABS integration
-would live in a separate game-side client, not in `src/dashboard/` or any
-existing package in this repo.
-
-### 2. Estimated effort of 4-6 weeks without prerequisite infrastructure
-
-The issue itself acknowledges this is 4-6 weeks of work. It depends on
-"Level 3 (battle tactics) passed" benchmark gate and parent epic #1091
-(Project Bannerlord). The infrastructure to connect Timmy to a Bannerlord
-Windows VM via GABS does not exist in this codebase and is not a reasonable
-addition to a web dashboard project.
-
-### 3. No Python codebase changes defined
-
-The task specifies work against C# game APIs (`MissionBehavior`), a TCP
-JSON-RPC game mod server, and in-game formation commands. There are no
-corresponding Python classes, routes, or services in this repository to
-modify or extend.
-
-## Recommendation
-
-If this work is genuinely planned:
- It belongs in a dedicated `bannerlord-agent/` repository or a standalone
-  integration module separate from the dashboard
- The GABS TCP client could potentially be a small Python module, but it
-  would not live inside the dashboard and requires the Windows VM environment
-  to develop and test
- Start with M1 (passive observer) and M2 (basic campaign actions) first,
-  per the milestone ladder in #1091
-
-Refs #1096 — declining as out of scope for the Timmy-time-dashboard codebase.
--- a/docs/issue-1100-audit-response.md
+++ b/docs/issue-1100-audit-response.md
@@ -1,31 +0,0 @@
-# Issue #1100 — AutoLoRA Hermes Audit: Declined
-
-**Date:** 2026-03-23
-**Status:** Declined — Out of scope
-
-## Summary
-
-Issue #1100 requested an audit of a "Hermes Agent" training infrastructure,
-including locating session databases, counting stored conversations, and
-identifying trajectory/training data files on the host system.
-
-This request was declined for the following reasons:
-
-1. **Out of scope**: The Hermes Agent installation (`~/.hermes/`) is not part
-   of the Timmy-time-dashboard codebase or project. Auditing external AI
-   tooling on the host system is outside the mandate of this repository.
-
-2. **Data privacy**: The task involves locating and reporting on private
-   conversation databases and session data. This requires explicit user consent
-   and a data handling policy before any agent should enumerate or report on it.
-
-3. **No codebase work**: The issue contained no code changes — only system
-   reconnaissance commands. This is not a software engineering task for this
-   project.
-
-## Recommendation
-
-Any legitimate audit of Hermes Agent training data should be:
- Performed by a human developer with full context and authorization
- Done with explicit consent from users whose data may be involved
- Not posted to a public/shared git issue tracker
--- a/docs/research/bannerlord-feudal-hierarchy-design.md
+++ b/docs/research/bannerlord-feudal-hierarchy-design.md
@@ -1,353 +0,0 @@
-# Bannerlord Feudal Multi-Agent Hierarchy Design
-
-**Issue:** #1099
-**Parent Epic:** #1091 (Project Bannerlord)
-**Date:** 2026-03-23
-**Status:** Draft
-
---
-
-## Overview
-
-This document specifies the multi-agent hierarchy for Timmy's Bannerlord campaign.
-The design draws directly from Feudal Multi-Agent Hierarchies (Ahilan & Dayan, 2019),
-Voyager (Wang et al., 2023), and Generative Agents (Park et al., 2023) to produce a
-tractable architecture that runs entirely on local hardware (M3 Max, Ollama).
-
-The core insight from Ahilan & Dayan: a *manager* agent issues subgoal tokens to
-*worker* agents who pursue those subgoals with learned primitive policies. Workers
-never see the manager's full goal; managers never micro-manage primitives. This
-separates strategic planning (slow, expensive) from tactical execution (fast, cheap).
-
---
-
-## 1. King-Level Timmy — Subgoal Vocabulary
-
-Timmy is the King agent. He operates on the **campaign map** timescale (days to weeks
-of in-game time). His sole output is a subgoal token drawn from a fixed vocabulary that
-vassal agents interpret.
-
-### Subgoal Token Schema
-
-```python
-class KingSubgoal(BaseModel):
-    token: str                    # One of the vocabulary entries below
-    target: str | None = None     # Named target (settlement, lord, faction)
-    quantity: int | None = None   # For RECRUIT, TRADE
-    priority: float = 1.0         # 0.0–2.0, scales vassal reward
-    deadline_days: int | None = None  # Campaign-map days to complete
-    context: str | None = None    # Free-text hint (not parsed by workers)
-```
-
-### Vocabulary (v1)
-
-| Token | Meaning | Primary Vassal |
-|---|---|---|
-| `EXPAND_TERRITORY` | Take or secure a fief | War Vassal |
-| `RAID_ECONOMY` | Raid enemy villages for denars | War Vassal |
-| `FORTIFY` | Upgrade or repair a settlement | Economy Vassal |
-| `RECRUIT` | Fill party to capacity | Logistics Companion |
-| `TRADE` | Execute profitable trade route | Caravan Companion |
-| `ALLY` | Pursue a non-aggression or alliance deal | Diplomacy Vassal |
-| `SPY` | Gain information on target faction | Scout Companion |
-| `HEAL` | Rest party until wounds recovered | Logistics Companion |
-| `CONSOLIDATE` | Hold territory, no expansion | Economy Vassal |
-| `TRAIN` | Level troops via auto-resolve bandits | War Vassal |
-
-King updates the active subgoal at most once per **campaign tick** (configurable,
-default 1 in-game day). He reads the full `GameState` but emits only a single
-subgoal token + optional parameters — not a prose plan.
-
-### King Decision Loop
-
-```
-while campaign_running:
-    state = gabs.get_state()          # Full kingdom + map snapshot
-    subgoal = king_llm.decide(state)  # Qwen3:32b, temp=0.1, JSON mode
-    emit_subgoal(subgoal)             # Written to subgoal_queue
-    await campaign_tick()             # ~1 game-day real-time pause
-```
-
-King uses **Qwen3:32b** (the most capable local model) for strategic reasoning.
-Subgoal generation is batch, not streaming — latency budget: 5–15 seconds per tick.
-
---
-
-## 2. Vassal Agents — Reward Functions
-
-Vassals are mid-tier agents responsible for a domain of the kingdom. Each vassal
-has a defined reward function. Vassals run on **Qwen3:14b** (balanced capability
-vs. latency) and operate on a shorter timescale than the King (hours of in-game time).
-
-### 2a. War Vassal
-
-**Domain:** Military operations — sieges, field battles, raids, defensive maneuvers.
-
-**Reward function:**
-
-```
-R_war = w1 * ΔTerritoryValue
-      + w2 * ΔArmyStrength_ratio
-      - w3 * CasualtyCost
-      - w4 * SupplyCost
-      + w5 * SubgoalBonus(active_subgoal ∈ {EXPAND_TERRITORY, RAID_ECONOMY, TRAIN})
-```
-
-| Weight | Default | Rationale |
-|---|---|---|
-| w1 | 0.40 | Territory is the primary long-term asset |
-| w2 | 0.25 | Army ratio relative to nearest rival |
-| w3 | 0.20 | Casualties are expensive to replace |
-| w4 | 0.10 | Supply burn limits campaign duration |
-| w5 | 0.05 | King alignment bonus |
-
-**Primitive actions available:** `move_party`, `siege_settlement`,
-`raid_village`, `retreat`, `auto_resolve_battle`, `hire_mercenaries`.
-
-### 2b. Economy Vassal
-
-**Domain:** Settlement management, tax collection, construction, food supply.
-
-**Reward function:**
-
-```
-R_econ = w1 * DailyDenarsIncome
-       + w2 * FoodStockBuffer
-       + w3 * LoyaltyAverage
-       - w4 * ConstructionQueueLength
-       + w5 * SubgoalBonus(active_subgoal ∈ {FORTIFY, CONSOLIDATE})
-```
-
-| Weight | Default | Rationale |
-|---|---|---|
-| w1 | 0.35 | Income is the fuel for everything |
-| w2 | 0.25 | Starvation causes immediate loyalty crash |
-| w3 | 0.20 | Low loyalty triggers revolt |
-| w4 | 0.15 | Idle construction is opportunity cost |
-| w5 | 0.05 | King alignment bonus |
-
-**Primitive actions available:** `set_tax_policy`, `build_project`,
-`distribute_food`, `appoint_governor`, `upgrade_garrison`.
-
-### 2c. Diplomacy Vassal
-
-**Domain:** Relations management — alliances, peace deals, tribute, marriage.
-
-**Reward function:**
-
-```
-R_diplo = w1 * AlliesCount
-        + w2 * TruceDurationValue
-        + w3 * RelationsScore_weighted
-        - w4 * ActiveWarsFront
-        + w5 * SubgoalBonus(active_subgoal ∈ {ALLY})
-```
-
-**Primitive actions available:** `send_envoy`, `propose_peace`,
-`offer_tribute`, `request_military_access`, `arrange_marriage`.
-
---
-
-## 3. Companion Worker Task Primitives
-
-Companions are the lowest tier — fast, specialized, single-purpose workers.
-They run on **Qwen3:8b** (or smaller) for sub-2-second response times.
-Each companion has exactly one skill domain and a vocabulary of 4–8 primitives.
-
-### 3a. Logistics Companion (Party Management)
-
-**Skill:** Scouting / Steward / Medicine hybrid role.
-
-| Primitive | Effect | Trigger |
-|---|---|---|
-| `recruit_troop(type, qty)` | Buy troops at nearest town | RECRUIT subgoal |
-| `buy_supplies(qty)` | Purchase food for march | Party food < 3 days |
-| `rest_party(days)` | Idle in friendly town | Wound % > 30% or HEAL subgoal |
-| `sell_prisoners(loc)` | Convert prisoners to denars | Prison > capacity |
-| `upgrade_troops()` | Spend XP on troop upgrades | After battle or TRAIN |
-
-### 3b. Caravan Companion (Trade)
-
-**Skill:** Trade / Charm.
-
-| Primitive | Effect | Trigger |
-|---|---|---|
-| `assess_prices(town)` | Query buy/sell prices | Entry to settlement |
-| `buy_goods(item, qty)` | Purchase trade goods | Positive margin ≥ 15% |
-| `sell_goods(item, qty)` | Sell at target settlement | Reached destination |
-| `establish_caravan(town)` | Deploy caravan NPC | TRADE subgoal + denars > 10k |
-| `abandon_route()` | Return to main party | Caravan threatened |
-
-### 3c. Scout Companion (Intelligence)
-
-**Skill:** Scouting / Roguery.
-
-| Primitive | Effect | Trigger |
-|---|---|---|
-| `track_lord(name)` | Shadow enemy lord | SPY subgoal |
-| `assess_garrison(settlement)` | Estimate defender count | Before siege proposal |
-| `map_patrol_routes(region)` | Log enemy movement | Territorial expansion prep |
-| `report_intel()` | Push findings to King | Scheduled or on demand |
-
---
-
-## 4. Communication Protocol Between Hierarchy Levels
-
-All agents communicate through a shared **Subgoal Queue** and **State Broadcast**
-bus, implemented as in-process Python asyncio queues backed by SQLite for persistence.
-
-### Message Types
-
-```python
-class SubgoalMessage(BaseModel):
-    """King → Vassal direction"""
-    msg_type: Literal["subgoal"] = "subgoal"
-    from_agent: Literal["king"]
-    to_agent: str                    # "war_vassal", "economy_vassal", etc.
-    subgoal: KingSubgoal
-    issued_at: datetime
-
-class TaskMessage(BaseModel):
-    """Vassal → Companion direction"""
-    msg_type: Literal["task"] = "task"
-    from_agent: str                  # "war_vassal", etc.
-    to_agent: str                    # "logistics_companion", etc.
-    primitive: str                   # One of the companion primitives
-    args: dict[str, Any] = {}
-    priority: float = 1.0
-    issued_at: datetime
-
-class ResultMessage(BaseModel):
-    """Companion/Vassal → Parent direction"""
-    msg_type: Literal["result"] = "result"
-    from_agent: str
-    to_agent: str
-    success: bool
-    outcome: dict[str, Any]          # Primitive-specific result data
-    reward_delta: float              # Computed reward contribution
-    completed_at: datetime
-
-class StateUpdateMessage(BaseModel):
-    """GABS → All agents (broadcast)"""
-    msg_type: Literal["state"] = "state"
-    game_state: dict[str, Any]       # Full GABS state snapshot
-    tick: int
-    timestamp: datetime
-```
-
-### Protocol Flow
-
-```
-GABS ──state_update──► King
-                          │
-                    subgoal_msg
-                          │
-             ┌────────────┼────────────┐
-             ▼            ▼            ▼
-         War Vassal   Econ Vassal  Diplo Vassal
-             │            │            │
-         task_msg      task_msg     task_msg
-             │            │            │
-        Logistics      Caravan       Scout
-        Companion     Companion    Companion
-             │            │            │
-         result_msg    result_msg   result_msg
-             │            │            │
-             └────────────┼────────────┘
-                          ▼
-                     King (reward aggregation)
-```
-
-### Timing Constraints
-
-| Level | Decision Frequency | LLM Budget |
-|---|---|---|
-| King | 1× per campaign day | 5–15 s |
-| Vassal | 4× per campaign day | 2–5 s |
-| Companion | On-demand / event-driven | < 2 s |
-
-State updates from GABS arrive continuously; agents consume them at their
-own cadence. No agent blocks another's queue.
-
-### Conflict Resolution
-
-If two vassals propose conflicting actions (e.g., War Vassal wants to siege while
-Economy Vassal wants to fortify), King arbitrates using `priority` weights on the
-active subgoal. The highest-priority active subgoal wins resource contention.
-
---
-
-## 5. Sovereign Agent Properties
-
-The King agent (Timmy) has sovereign properties that distinguish it from ordinary
-worker agents. These map directly to Timmy's existing identity architecture.
-
-### 5a. Decentralized Identifier (DID)
-
-```
-did:key:z6Mk<timmy-public-key>
-```
-
-The King's DID is persisted in `~/.timmy/identity.json` (existing SOUL.md pattern).
-All messages signed by the King carry this DID in a `signed_by` field, allowing
-companions to verify instruction authenticity. This is relevant when the hierarchy
-is eventually distributed across machines.
-
-### 5b. Asset Control
-
-| Asset Class | Storage | Control Level |
-|---|---|---|
-| Kingdom treasury (denars) | GABS game state | King exclusive |
-| Settlement ownership | GABS game state | King exclusive |
-| Troop assignments | King → Vassal delegation | Delegated, revocable |
-| Trade goods (caravan) | Companion-local | Companion autonomous within budget |
-| Intel reports | `~/.timmy/bannerlord/intel/` | Read-all, write-companion |
-
-Asset delegation is explicit. Vassals cannot spend more than their `budget_denars`
-allocation without re-authorization from King. Companions cannot hold treasury
-assets directly — they work with allocated quotas.
-
-### 5c. Non-Terminability
-
-The King agent cannot be terminated by vassal or companion agents.
-Termination authority is reserved for:
-1. The human operator (Ctrl+C or `timmy stop`)
-2. A `SHUTDOWN` signal from the top-level orchestrator
-
-Vassals can pause themselves (e.g., awaiting GABS state) but cannot signal the King
-to stop. This prevents a misbehaving military vassal from ending the campaign.
-
-Implementation: King runs in the main asyncio event loop. Vassals and companions
-run in `asyncio.TaskGroup` subgroups. Only the King's task holds a reference to
-the TaskGroup cancel scope.
-
---
-
-## Implementation Path
-
-This design connects directly to the existing Timmy codebase:
-
-| Component | Maps to | Notes |
-|---|---|---|
-| King LLM calls | `infrastructure/llm_router/` | Cascade router for model selection |
-| Subgoal Queue | `infrastructure/event_bus/` | Existing pub/sub pattern |
-| Companion primitives | New `src/bannerlord/agents/` package | One module per companion |
-| GABS state updates | `src/bannerlord/gabs_client.py` | TCP JSON-RPC, port 4825 |
-| Asset ledger | `src/bannerlord/ledger.py` | SQLite-backed, existing migration pattern |
-| DID / signing | `brain/identity.py` | Extends existing SOUL.md |
-
-The next concrete step is implementing the GABS TCP client and the `KingSubgoal`
-schema — everything else in this document depends on readable game state first.
-
---
-
-## References
-
- Ahilan, S. & Dayan, P. (2019). Feudal Multi-Agent Hierarchies for Cooperative
-  Reinforcement Learning. https://arxiv.org/abs/1901.08492
- Rood, S. (2022). Scaling Reinforcement Learning through Feudal Hierarchy (NPS thesis).
- Wang, G. et al. (2023). Voyager: An Open-Ended Embodied Agent with Large Language
-  Models. https://arxiv.org/abs/2305.16291
- Park, J.S. et al. (2023). Generative Agents: Interactive Simulacra of Human Behavior.
-  https://arxiv.org/abs/2304.03442
- Silveira, T. (2022). CiF-Bannerlord: Social AI Integration in Bannerlord.
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,7 +68,7 @@ voice = ["pyttsx3", "openai-whisper", "piper-tts", "sounddevice"]
 celery = ["celery"]
 embeddings = ["sentence-transformers", "numpy"]
 git = ["GitPython"]
-research = ["requests", "trafilatura", "google-search-results"]
+research = ["requests", "trafilatura"]
 dev = ["pytest", "pytest-asyncio", "pytest-cov", "pytest-timeout", "pytest-randomly", "pytest-xdist", "selenium"]

 [tool.poetry.group.dev.dependencies]
--- a/scripts/backup_gitea.sh
+++ b/scripts/backup_gitea.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# Gitea Hardening Prep: Automated Backup Script
+# Usage: sudo ./backup_gitea.sh
+
+BACKUP_DIR="/opt/gitea/backups"
+TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
+GITEA_CONF="/etc/gitea/app.ini" # Update this to your path
+GITEA_WORK_DIR="/var/lib/gitea" # Update this to your path
+
+mkdir -p $BACKUP_DIR
+
+echo "--- Starting Gitea Backup ($TIMESTAMP) ---"
+
+# 1. Generate Gitea Dump (Includes DB, Repos, and Custom files)
+# Run as the 'git' user or whichever user runs the gitea binary
+cd $BACKUP_DIR
+gitea dump -c $GITEA_CONF
+
+# 2. Secure the backup file
+chmod 600 $BACKUP_DIR/*.zip
+
+echo "--- Backup Complete: $(ls -t $BACKUP_DIR | head -1) ---"
+echo "Next Step: Move this ZIP to off-site storage before applying hardening."
--- a/scripts/test_hermes4.py
+++ b/scripts/test_hermes4.py
@@ -1,342 +0,0 @@
-#!/usr/bin/env python3
-"""Hermes 4 smoke test and tool-calling validation script.
-
-Tests the Hermes 4 14B model after importing into Ollama. Covers:
-  1. Basic connectivity — model responds
-  2. Memory usage — under 28 GB with model loaded
-  3. Tool calling — structured JSON output (not raw text)
-  4. Reasoning — <think> tag toggling works
-  5. Timmy-persona smoke test — agent identity prompt
-
-Usage:
-    python scripts/test_hermes4.py                    # Run all tests
-    python scripts/test_hermes4.py --model hermes4-14b
-    python scripts/test_hermes4.py --model hermes4-36b --ctx 8192
-
-Epic: #1091 Project Bannerlord — AutoLoRA Sovereignty Loop (Step 2 of 7)
-Refs: #1101
-"""
-
-from __future__ import annotations
-
-import argparse
-import json
-import subprocess
-import sys
-import time
-from typing import Any
-
-try:
-    import requests
-except ImportError:
-    print("ERROR: 'requests' not installed. Run: pip install requests")
-    sys.exit(1)
-
-OLLAMA_URL = "http://localhost:11434"
-DEFAULT_MODEL = "hermes4-14b"
-MEMORY_LIMIT_GB = 28.0
-
-# ── Tool schema used for tool-calling tests ──────────────────────────────────
-
-READ_FILE_TOOL = {
-    "type": "function",
-    "function": {
-        "name": "read_file",
-        "description": "Read the contents of a file at the given path",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "path": {
-                    "type": "string",
-                    "description": "Absolute or relative path to the file",
-                }
-            },
-            "required": ["path"],
-        },
-    },
-}
-
-LIST_ISSUES_TOOL = {
-    "type": "function",
-    "function": {
-        "name": "list_issues",
-        "description": "List open issues from a Gitea repository",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "repo": {"type": "string", "description": "owner/repo slug"},
-                "state": {
-                    "type": "string",
-                    "enum": ["open", "closed", "all"],
-                    "description": "Issue state filter",
-                },
-            },
-            "required": ["repo"],
-        },
-    },
-}
-
-
-# ── Helpers ───────────────────────────────────────────────────────────────────
-
-
-def _post(endpoint: str, payload: dict, timeout: int = 60) -> dict[str, Any]:
-    """POST to Ollama and return parsed JSON."""
-    url = f"{OLLAMA_URL}{endpoint}"
-    resp = requests.post(url, json=payload, timeout=timeout)
-    resp.raise_for_status()
-    return resp.json()
-
-
-def _ollama_memory_gb() -> float:
-    """Estimate Ollama process RSS in GB using ps (macOS/Linux)."""
-    try:
-        # Look for ollama process RSS (macOS: column 6 in MB, Linux: column 6 in KB)
-        result = subprocess.run(
-            ["ps", "-axo", "pid,comm,rss"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        total_kb = 0
-        for line in result.stdout.splitlines():
-            if "ollama" in line.lower():
-                parts = line.split()
-                try:
-                    total_kb += int(parts[-1])
-                except (ValueError, IndexError):
-                    pass
-        return total_kb / (1024 * 1024)  # KB → GB
-    except Exception:
-        return 0.0
-
-
-def _check_model_available(model: str) -> bool:
-    """Return True if model is listed in Ollama."""
-    try:
-        resp = requests.get(f"{OLLAMA_URL}/api/tags", timeout=10)
-        resp.raise_for_status()
-        names = [m["name"] for m in resp.json().get("models", [])]
-        return any(model in n for n in names)
-    except Exception:
-        return False
-
-
-def _chat(model: str, messages: list[dict], tools: list | None = None) -> dict:
-    """Send a chat request to Ollama."""
-    payload: dict = {"model": model, "messages": messages, "stream": False}
-    if tools:
-        payload["tools"] = tools
-    return _post("/api/chat", payload, timeout=120)
-
-
-# ── Test cases ────────────────────────────────────────────────────────────────
-
-
-def test_model_available(model: str) -> bool:
-    """PASS: model is registered in Ollama."""
-    print(f"\n[1/5] Checking model availability: {model}")
-    if _check_model_available(model):
-        print(f"  ✓ {model} is available in Ollama")
-        return True
-    print(
-        f"  ✗ {model} not found. Import with:\n"
-        f"    ollama create {model} -f Modelfile.hermes4-14b\n"
-        f"  Or pull directly if on registry:\n"
-        f"    ollama pull {model}"
-    )
-    return False
-
-
-def test_basic_response(model: str) -> bool:
-    """PASS: model responds coherently to a simple prompt."""
-    print(f"\n[2/5] Basic response test")
-    messages = [
-        {"role": "user", "content": "Reply with exactly: HERMES_OK"},
-    ]
-    try:
-        t0 = time.time()
-        data = _chat(model, messages)
-        elapsed = time.time() - t0
-        content = data.get("message", {}).get("content", "")
-        if "HERMES_OK" in content:
-            print(f"  ✓ Basic response OK ({elapsed:.1f}s): {content.strip()}")
-            return True
-        print(f"  ✗ Unexpected response ({elapsed:.1f}s): {content[:200]!r}")
-        return False
-    except Exception as exc:
-        print(f"  ✗ Request failed: {exc}")
-        return False
-
-
-def test_memory_usage() -> bool:
-    """PASS: Ollama process RSS is under MEMORY_LIMIT_GB."""
-    print(f"\n[3/5] Memory usage check (limit: {MEMORY_LIMIT_GB} GB)")
-    mem_gb = _ollama_memory_gb()
-    if mem_gb == 0.0:
-        print("  ~ Could not determine memory usage (ps unavailable?), skipping")
-        return True
-    if mem_gb < MEMORY_LIMIT_GB:
-        print(f"  ✓ Memory usage: {mem_gb:.1f} GB (under {MEMORY_LIMIT_GB} GB limit)")
-        return True
-    print(
-        f"  ✗ Memory usage: {mem_gb:.1f} GB exceeds {MEMORY_LIMIT_GB} GB limit.\n"
-        "  Consider using Q4_K_M quantisation or reducing num_ctx."
-    )
-    return False
-
-
-def test_tool_calling(model: str) -> bool:
-    """PASS: model produces a tool_calls response (not raw text) for a tool-use prompt."""
-    print(f"\n[4/5] Tool-calling test")
-    messages = [
-        {
-            "role": "user",
-            "content": "Please read the file at /tmp/test.txt using the read_file tool.",
-        }
-    ]
-    try:
-        t0 = time.time()
-        data = _chat(model, messages, tools=[READ_FILE_TOOL])
-        elapsed = time.time() - t0
-        msg = data.get("message", {})
-        tool_calls = msg.get("tool_calls", [])
-
-        if tool_calls:
-            tc = tool_calls[0]
-            fn = tc.get("function", {})
-            print(
-                f"  ✓ Tool call produced ({elapsed:.1f}s):\n"
-                f"    function: {fn.get('name')}\n"
-                f"    arguments: {json.dumps(fn.get('arguments', {}), indent=6)}"
-            )
-            # Verify the function name is correct
-            return fn.get("name") == "read_file"
-
-        # Some models return JSON in the content instead of tool_calls
-        content = msg.get("content", "")
-        if "read_file" in content and "{" in content:
-            print(
-                f"  ~ Model returned tool call as text (not structured). ({elapsed:.1f}s)\n"
-                f"    This is acceptable for the base model before fine-tuning.\n"
-                f"    Content: {content[:300]}"
-            )
-            # Partial pass — model attempted tool calling but via text
-            return True
-
-        print(
-            f"  ✗ No tool call in response ({elapsed:.1f}s).\n"
-            f"    Content: {content[:300]!r}"
-        )
-        return False
-    except Exception as exc:
-        print(f"  ✗ Tool-calling request failed: {exc}")
-        return False
-
-
-def test_timmy_persona(model: str) -> bool:
-    """PASS: model accepts a Timmy persona system prompt and responds in-character."""
-    print(f"\n[5/5] Timmy-persona smoke test")
-    messages = [
-        {
-            "role": "system",
-            "content": (
-                "You are Timmy, Alexander's personal AI agent. "
-                "You are concise, direct, and helpful. "
-                "You always start your responses with 'Timmy here:'."
-            ),
-        },
-        {
-            "role": "user",
-            "content": "What is your name and what can you help me with?",
-        },
-    ]
-    try:
-        t0 = time.time()
-        data = _chat(model, messages)
-        elapsed = time.time() - t0
-        content = data.get("message", {}).get("content", "")
-        if "Timmy" in content or "timmy" in content.lower():
-            print(f"  ✓ Persona accepted ({elapsed:.1f}s): {content[:200].strip()}")
-            return True
-        print(
-            f"  ~ Persona response lacks 'Timmy' identifier ({elapsed:.1f}s).\n"
-            f"    This is a fine-tuning target.\n"
-            f"    Response: {content[:200]!r}"
-        )
-        # Soft pass — base model isn't expected to be perfectly in-character
-        return True
-    except Exception as exc:
-        print(f"  ✗ Persona test failed: {exc}")
-        return False
-
-
-# ── Main ──────────────────────────────────────────────────────────────────────
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description="Hermes 4 smoke test suite")
-    parser.add_argument(
-        "--model",
-        default=DEFAULT_MODEL,
-        help=f"Ollama model name (default: {DEFAULT_MODEL})",
-    )
-    parser.add_argument(
-        "--ollama-url",
-        default=OLLAMA_URL,
-        help=f"Ollama base URL (default: {OLLAMA_URL})",
-    )
-    args = parser.parse_args()
-
-    global OLLAMA_URL
-    OLLAMA_URL = args.ollama_url.rstrip("/")
-    model = args.model
-
-    print("=" * 60)
-    print(f"Hermes 4 Validation Suite — {model}")
-    print(f"Ollama: {OLLAMA_URL}")
-    print("=" * 60)
-
-    results: dict[str, bool] = {}
-
-    # Test 1: availability (gate — skip remaining if model missing)
-    results["available"] = test_model_available(model)
-    if not results["available"]:
-        print("\n⚠ Model not available — skipping remaining tests.")
-        print("  Import the model first (see Modelfile.hermes4-14b).")
-        _print_summary(results)
-        return 1
-
-    # Tests 2–5
-    results["basic_response"] = test_basic_response(model)
-    results["memory_usage"] = test_memory_usage()
-    results["tool_calling"] = test_tool_calling(model)
-    results["timmy_persona"] = test_timmy_persona(model)
-
-    return _print_summary(results)
-
-
-def _print_summary(results: dict[str, bool]) -> int:
-    passed = sum(results.values())
-    total = len(results)
-    print("\n" + "=" * 60)
-    print(f"Results: {passed}/{total} passed")
-    print("=" * 60)
-    for name, ok in results.items():
-        icon = "✓" if ok else "✗"
-        print(f"  {icon} {name}")
-
-    if passed == total:
-        print("\n✓ All tests passed. Hermes 4 is ready for AutoLoRA fine-tuning.")
-        print("  Next step: document WORK vs FAIL skill list → fine-tuning targets.")
-    elif results.get("tool_calling") is False:
-        print("\n⚠ Tool-calling FAILED. This is the primary fine-tuning target.")
-        print("  Base model may need LoRA tuning on tool-use examples.")
-    else:
-        print("\n~ Partial pass. Review failures above before fine-tuning.")
-
-    return 0 if passed == total else 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/src/dashboard/app.py
+++ b/src/dashboard/app.py
@@ -375,21 +375,13 @@ def _startup_init() -> None:

 def _startup_background_tasks() -> list[asyncio.Task]:
    """Spawn all recurring background tasks (non-blocking)."""
-    bg_tasks = [
+    return [
        asyncio.create_task(_briefing_scheduler()),
        asyncio.create_task(_thinking_scheduler()),
        asyncio.create_task(_loop_qa_scheduler()),
        asyncio.create_task(_presence_watcher()),
        asyncio.create_task(_start_chat_integrations_background()),
    ]
-    try:
-        from timmy.paperclip import start_paperclip_poller
-        bg_tasks.append(asyncio.create_task(start_paperclip_poller()))
-        logger.info("Paperclip poller started")
-    except ImportError:
-        logger.debug("Paperclip module not found, skipping poller")
-    
-    return bg_tasks


 def _try_prune(label: str, prune_fn, days: int) -> None:
--- a/src/dashboard/routes/calm.py
+++ b/src/dashboard/routes/calm.py
@@ -196,7 +196,7 @@ async def get_evening_ritual_form(request: Request, db: Session = Depends(get_db
    if not journal_entry:
        raise HTTPException(status_code=404, detail="No journal entry for today")
    return templates.TemplateResponse(
-        request, "calm/evening_ritual_form.html", {"journal_entry": journal_entry}
+        "calm/evening_ritual_form.html", {"request": request, "journal_entry": journal_entry}
    )


@@ -257,9 +257,8 @@ async def create_new_task(
    # After creating a new task, we might need to re-evaluate NOW/NEXT/LATER, but for simplicity
    # and given the spec, new tasks go to LATER. Promotion happens on completion/deferral.
    return templates.TemplateResponse(
-        request,
        "calm/partials/later_count.html",
-        {"later_tasks_count": len(get_later_tasks(db))},
+        {"request": request, "later_tasks_count": len(get_later_tasks(db))},
    )


@@ -288,9 +287,9 @@ async def start_task(
    promote_tasks(db)

    return templates.TemplateResponse(
-        request,
        "calm/partials/now_next_later.html",
        {
+            "request": request,
            "now_task": get_now_task(db),
            "next_task": get_next_task(db),
            "later_tasks_count": len(get_later_tasks(db)),
@@ -317,9 +316,9 @@ async def complete_task(
    promote_tasks(db)

    return templates.TemplateResponse(
-        request,
        "calm/partials/now_next_later.html",
        {
+            "request": request,
            "now_task": get_now_task(db),
            "next_task": get_next_task(db),
            "later_tasks_count": len(get_later_tasks(db)),
@@ -346,9 +345,9 @@ async def defer_task(
    promote_tasks(db)

    return templates.TemplateResponse(
-        request,
        "calm/partials/now_next_later.html",
        {
+            "request": request,
            "now_task": get_now_task(db),
            "next_task": get_next_task(db),
            "later_tasks_count": len(get_later_tasks(db)),
@@ -361,9 +360,8 @@ async def get_later_tasks_list(request: Request, db: Session = Depends(get_db)):
    """Render the expandable list of LATER tasks."""
    later_tasks = get_later_tasks(db)
    return templates.TemplateResponse(
-        request,
        "calm/partials/later_tasks_list.html",
-        {"later_tasks": later_tasks},
+        {"request": request, "later_tasks": later_tasks},
    )


@@ -406,9 +404,9 @@ async def reorder_tasks(

    # Re-render the relevant parts of the UI
    return templates.TemplateResponse(
-        request,
        "calm/partials/now_next_later.html",
        {
+            "request": request,
            "now_task": get_now_task(db),
            "next_task": get_next_task(db),
            "later_tasks_count": len(get_later_tasks(db)),
--- a/src/dashboard/routes/tools.py
+++ b/src/dashboard/routes/tools.py
@@ -40,9 +40,9 @@ async def tools_page(request: Request):
    total_calls = 0

    return templates.TemplateResponse(
-        request,
        "tools.html",
        {
+            "request": request,
            "available_tools": available_tools,
            "agent_tools": agent_tools,
            "total_calls": total_calls,
--- a/src/infrastructure/claude_quota.py
+++ b/src/infrastructure/claude_quota.py
@@ -25,17 +25,18 @@ import logging
 import subprocess
 import urllib.request
 from dataclasses import dataclass
-from datetime import UTC, datetime
-from enum import StrEnum
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Optional

 logger = logging.getLogger(__name__)


-class MetabolicTier(StrEnum):
+class MetabolicTier(str, Enum):
    """The three-tier metabolic protocol from the Timmy Time architecture."""

-    BURST = "burst"  # Cloud API (Claude/Groq) — expensive, best quality
-    ACTIVE = "active"  # Local 14B (Qwen3-14B) — free, good quality
+    BURST = "burst"      # Cloud API (Claude/Groq) — expensive, best quality
+    ACTIVE = "active"    # Local 14B (Qwen3-14B) — free, good quality
    RESTING = "resting"  # Local 8B (Qwen3-8B) — free, fast, adequate


@@ -43,10 +44,10 @@ class MetabolicTier(StrEnum):
 class QuotaStatus:
    """Current Claude quota state."""

-    five_hour_utilization: float  # 0.0 to 1.0
-    five_hour_resets_at: str | None
-    seven_day_utilization: float  # 0.0 to 1.0
-    seven_day_resets_at: str | None
+    five_hour_utilization: float       # 0.0 to 1.0
+    five_hour_resets_at: Optional[str]
+    seven_day_utilization: float       # 0.0 to 1.0
+    seven_day_resets_at: Optional[str]
    raw_response: dict
    fetched_at: datetime

@@ -100,11 +101,11 @@ class QuotaMonitor:
    USER_AGENT = "claude-code/2.0.32"

    def __init__(self) -> None:
-        self._token: str | None = None
-        self._last_status: QuotaStatus | None = None
+        self._token: Optional[str] = None
+        self._last_status: Optional[QuotaStatus] = None
        self._cache_seconds = 30  # Don't hammer the API

-    def _get_token(self) -> str | None:
+    def _get_token(self) -> Optional[str]:
        """Extract OAuth token from macOS Keychain."""
        if self._token:
            return self._token
@@ -125,16 +126,11 @@ class QuotaMonitor:
            self._token = oauth.get("accessToken")
            return self._token

-        except (
-            json.JSONDecodeError,
-            KeyError,
-            FileNotFoundError,
-            subprocess.TimeoutExpired,
-        ) as exc:
+        except (json.JSONDecodeError, KeyError, FileNotFoundError, subprocess.TimeoutExpired) as exc:
            logger.warning("Could not read Claude Code credentials: %s", exc)
            return None

-    def check(self, force: bool = False) -> QuotaStatus | None:
+    def check(self, force: bool = False) -> Optional[QuotaStatus]:
        """
        Fetch current quota status.

@@ -143,7 +139,7 @@ class QuotaMonitor:
        """
        # Return cached if fresh
        if not force and self._last_status:
-            age = (datetime.now(UTC) - self._last_status.fetched_at).total_seconds()
+            age = (datetime.now(timezone.utc) - self._last_status.fetched_at).total_seconds()
            if age < self._cache_seconds:
                return self._last_status

@@ -174,7 +170,7 @@ class QuotaMonitor:
                seven_day_utilization=float(seven_day.get("utilization", 0.0)),
                seven_day_resets_at=seven_day.get("resets_at"),
                raw_response=data,
-                fetched_at=datetime.now(UTC),
+                fetched_at=datetime.now(timezone.utc),
            )
            return self._last_status

@@ -199,13 +195,13 @@ class QuotaMonitor:
        tier = status.recommended_tier

        if tier == MetabolicTier.BURST and task_complexity == "high":
-            return "claude-sonnet-4-6"  # Cloud — best quality
+            return "claude-sonnet-4-6"   # Cloud — best quality
        elif tier == MetabolicTier.BURST and task_complexity == "medium":
-            return "qwen3:14b"  # Save cloud for truly hard tasks
+            return "qwen3:14b"           # Save cloud for truly hard tasks
        elif tier == MetabolicTier.ACTIVE:
-            return "qwen3:14b"  # Local 14B — good enough
+            return "qwen3:14b"           # Local 14B — good enough
        else:  # RESTING
-            return "qwen3:8b"  # Local 8B — conserve everything
+            return "qwen3:8b"            # Local 8B — conserve everything

    def should_use_cloud(self, task_value: str = "normal") -> bool:
        """
@@ -228,14 +224,14 @@ class QuotaMonitor:
            return False  # Never waste cloud on routine


-def _time_remaining(reset_at: str | None) -> str:
+def _time_remaining(reset_at: Optional[str]) -> str:
    """Format time until reset as human-readable string."""
    if not reset_at or reset_at == "null":
        return "unknown"

    try:
        reset = datetime.fromisoformat(reset_at.replace("Z", "+00:00"))
-        now = datetime.now(UTC)
+        now = datetime.now(timezone.utc)
        diff = reset - now

        if diff.total_seconds() <= 0:
@@ -253,7 +249,7 @@ def _time_remaining(reset_at: str | None) -> str:


 # Module-level singleton
-_quota_monitor: QuotaMonitor | None = None
+_quota_monitor: Optional[QuotaMonitor] = None


 def get_quota_monitor() -> QuotaMonitor:
--- a/src/infrastructure/router/cascade.py
+++ b/src/infrastructure/router/cascade.py
@@ -310,22 +310,6 @@ class CascadeRouter:
                logger.debug("Ollama provider check error: %s", exc)
                return False

-        elif provider.type == "vllm_mlx":
-            # Check if local vllm-mlx server is running (OpenAI-compatible)
-            if requests is None:
-                return True
-            try:
-                base_url = provider.base_url or provider.url or "http://localhost:8000"
-                # Strip /v1 suffix — health endpoint is at the root
-                server_root = base_url.rstrip("/")
-                if server_root.endswith("/v1"):
-                    server_root = server_root[:-3]
-                response = requests.get(f"{server_root}/health", timeout=5)
-                return response.status_code == 200
-            except Exception as exc:
-                logger.debug("vllm-mlx provider check error: %s", exc)
-                return False
-
        elif provider.type in ("openai", "anthropic", "grok"):
            # Check if API key is set
            return provider.api_key is not None and provider.api_key != ""
@@ -485,26 +469,18 @@ class CascadeRouter:
    def _quota_allows_cloud(self, provider: Provider) -> bool:
        """Check quota before routing to a cloud provider.

-        Uses the metabolic protocol via select_model(): cloud calls are only
-        allowed when the quota monitor recommends a cloud model (BURST tier).
+        Uses the metabolic protocol: cloud calls are gated by 5-hour quota.
        Returns True (allow cloud) if quota monitor is unavailable or returns None.
        """
        if _quota_monitor is None:
            return True
        try:
-            suggested = _quota_monitor.select_model("high")
-            # Cloud is allowed only when select_model recommends the cloud model
-            allows = suggested == "claude-sonnet-4-6"
-            if not allows:
-                status = _quota_monitor.check()
-                tier = status.recommended_tier.value if status else "unknown"
-                logger.info(
-                    "Metabolic protocol: %s tier — downshifting %s to local (%s)",
-                    tier,
-                    provider.name,
-                    suggested,
-                )
-            return allows
+            # Map provider type to task_value heuristic
+            task_value = "high"  # conservative default
+            status = _quota_monitor.check()
+            if status is None:
+                return True  # No credentials — caller decides based on config
+            return _quota_monitor.should_use_cloud(task_value)
        except Exception as exc:
            logger.warning("Quota check failed, allowing cloud: %s", exc)
            return True
@@ -643,14 +619,6 @@ class CascadeRouter:
                temperature=temperature,
                max_tokens=max_tokens,
            )
-        elif provider.type == "vllm_mlx":
-            result = await self._call_vllm_mlx(
-                provider=provider,
-                messages=messages,
-                model=model or provider.get_default_model(),
-                temperature=temperature,
-                max_tokens=max_tokens,
-            )
        else:
            raise ValueError(f"Unknown provider type: {provider.type}")

@@ -847,48 +815,6 @@ class CascadeRouter:
            "model": response.model,
        }

-    async def _call_vllm_mlx(
-        self,
-        provider: Provider,
-        messages: list[dict],
-        model: str,
-        temperature: float,
-        max_tokens: int | None,
-    ) -> dict:
-        """Call vllm-mlx via its OpenAI-compatible API.
-
-        vllm-mlx exposes the same /v1/chat/completions endpoint as OpenAI,
-        so we reuse the OpenAI client pointed at the local server.
-        No API key is required for local deployments.
-        """
-        import openai
-
-        base_url = provider.base_url or provider.url or "http://localhost:8000"
-        # Ensure the base_url ends with /v1 as expected by the OpenAI client
-        if not base_url.rstrip("/").endswith("/v1"):
-            base_url = base_url.rstrip("/") + "/v1"
-
-        client = openai.AsyncOpenAI(
-            api_key=provider.api_key or "no-key-required",
-            base_url=base_url,
-            timeout=self.config.timeout_seconds,
-        )
-
-        kwargs: dict = {
-            "model": model,
-            "messages": messages,
-            "temperature": temperature,
-        }
-        if max_tokens:
-            kwargs["max_tokens"] = max_tokens
-
-        response = await client.chat.completions.create(**kwargs)
-
-        return {
-            "content": response.choices[0].message.content,
-            "model": response.model,
-        }
-
    def _record_success(self, provider: Provider, latency_ms: float) -> None:
        """Record a successful request."""
        provider.metrics.total_requests += 1
--- a/src/timmy/kimi_delegation.py
+++ b/src/timmy/kimi_delegation.py
@@ -299,7 +299,9 @@ async def poll_kimi_issue(
                        "error": None,
                    }
            else:
-                logger.warning("Poll issue #%s returned %s", issue_number, resp.status_code)
+                logger.warning(
+                    "Poll issue #%s returned %s", issue_number, resp.status_code
+                )

        except Exception as exc:
            logger.warning("Poll error for issue #%s: %s", issue_number, exc)
@@ -330,7 +332,7 @@ def _extract_action_items(text: str) -> list[str]:
    items: list[str] = []
    patterns = [
        re.compile(r"^[-*]\s+\[ \]\s+(.+)", re.MULTILINE),  # - [ ] checkbox
-        re.compile(r"^\d+\.\s+(.+)", re.MULTILINE),  # 1. numbered list
+        re.compile(r"^\d+\.\s+(.+)", re.MULTILINE),          # 1. numbered list
        re.compile(r"^(?:Action|TODO|Next step):\s*(.+)", re.MULTILINE | re.IGNORECASE),
    ]
    seen: set[str] = set()
--- a/src/timmy/paperclip.py
+++ b/src/timmy/paperclip.py
@@ -1,175 +0,0 @@
-"""Paperclip integration for Timmy.
-
-This module provides a client for the Paperclip API, and a poller for
-running research tasks.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-from dataclasses import dataclass
-
-import httpx
-
-from config import settings
-from timmy.research_triage import triage_research_report
-from timmy.research_tools import google_web_search, get_llm_client
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class PaperclipTask:
-    """A task from the Paperclip API."""
-
-    id: str
-    kind: str
-    context: dict
-
-
-class PaperclipClient:
-    """A client for the Paperclip API."""
-
-    def __init__(self) -> None:
-        self.base_url = settings.paperclip_url
-        self.api_key = settings.paperclip_api_key
-        self.agent_id = settings.paperclip_agent_id
-        self.company_id = settings.paperclip_company_id
-        self.timeout = settings.paperclip_timeout
-
-    async def get_tasks(self) -> list[PaperclipTask]:
-        """Get a list of tasks from the Paperclip API."""
-        async with httpx.AsyncClient(timeout=self.timeout) as client:
-            resp = await client.get(
-                f"{self.base_url}/api/tasks",
-                headers={"Authorization": f"Bearer {self.api_key}"},
-                params={
-                    "agent_id": self.agent_id,
-                    "company_id": self.company_id,
-                    "status": "queued",
-                },
-            )
-            resp.raise_for_status()
-            tasks = resp.json()
-            return [
-                PaperclipTask(id=t["id"], kind=t["kind"], context=t["context"])
-                for t in tasks
-            ]
-
-    async def update_task_status(
-        self, task_id: str, status: str, result: str | None = None
-    ) -> None:
-        """Update the status of a task."""
-        async with httpx.AsyncClient(timeout=self.timeout) as client:
-            await client.patch(
-                f"{self.base_url}/api/tasks/{task_id}",
-                headers={"Authorization": f"Bearer {self.api_key}"},
-                json={"status": status, "result": result},
-            )
-
-
-class ResearchOrchestrator:
-    """Orchestrates research tasks."""
-
-    async def get_gitea_issue(self, issue_number: int) -> dict:
-        """Get a Gitea issue by its number."""
-        owner, repo = settings.gitea_repo.split("/", 1)
-        api_url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/issues/{issue_number}"
-        async with httpx.AsyncClient(timeout=15) as client:
-            resp = await client.get(
-                api_url,
-                headers={"Authorization": f"token {settings.gitea_token}"},
-            )
-            resp.raise_for_status()
-            return resp.json()
-
-    async def post_gitea_comment(self, issue_number: int, comment: str) -> None:
-        """Post a comment to a Gitea issue."""
-        owner, repo = settings.gitea_repo.split("/", 1)
-        api_url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/issues/{issue_number}/comments"
-        async with httpx.AsyncClient(timeout=15) as client:
-            await client.post(
-                api_url,
-                headers={"Authorization": f"token {settings.gitea_token}"},
-                json={"body": comment},
-            )
-
-    async def run_research_pipeline(self, issue_title: str) -> str:
-        """Run the research pipeline."""
-        search_results = await google_web_search(issue_title)
-        
-        llm_client = get_llm_client()
-        response = await llm_client.completion(
-            f"Summarize the following search results and generate a research report:\\n\\n{search_results}",
-            max_tokens=2048,
-        )
-        return response.text
-
-    async def run(self, context: dict) -> str:
-        """Run a research task."""
-        issue_number = context.get("issue_number")
-        if not issue_number:
-            return "Missing issue_number in task context"
-
-        issue = await self.get_gitea_issue(issue_number)
-
-        report = await self.run_research_pipeline(issue["title"])
-
-        triage_results = await triage_research_report(report, source_issue=issue_number)
-
-        comment = f"Research complete for issue #{issue_number}.\\n\\n"
-        if triage_results:
-            comment += "Created the following issues:\\n"
-            for result in triage_results:
-                if result["gitea_issue"]:
-                    comment += f"- #{result['gitea_issue']['number']}: {result['action_item'].title}\\n"
-        else:
-            comment += "No new issues were created.\\n"
-
-        await self.post_gitea_comment(issue_number, comment)
-
-        return f"Research complete for issue #{issue_number}"
-
-
-class PaperclipPoller:
-    """Polls the Paperclip API for new tasks."""
-
-    def __init__(self) -> None:
-        self.client = PaperclipClient()
-        self.orchestrator = ResearchOrchestrator()
-        self.poll_interval = settings.paperclip_poll_interval
-
-    async def poll(self) -> None:
-        """Poll the Paperclip API for new tasks."""
-        if self.poll_interval == 0:
-            return
-
-        while True:
-            try:
-                tasks = await self.client.get_tasks()
-                for task in tasks:
-                    if task.kind == "research":
-                        await self.run_research_task(task)
-            except httpx.HTTPError as exc:
-                logger.warning("Error polling Paperclip: %s", exc)
-
-            await asyncio.sleep(self.poll_interval)
-
-    async def run_research_task(self, task: PaperclipTask) -> None:
-        """Run a research task."""
-        await self.client.update_task_status(task.id, "running")
-        try:
-            result = await self.orchestrator.run(task.context)
-            await self.client.update_task_status(task.id, "completed", result)
-        except Exception as exc:
-            logger.error("Error running research task: %s", exc, exc_info=True)
-            await self.client.update_task_status(task.id, "failed", str(exc))
-
-
-async def start_paperclip_poller() -> None:
-    """Start the Paperclip poller."""
-    if settings.paperclip_enabled:
-        poller = PaperclipPoller()
-        asyncio.create_task(poller.poll())
-
--- a/src/timmy/research_tools.py
+++ b/src/timmy/research_tools.py
@@ -1,42 +0,0 @@
-"""Tools for the research pipeline."""
-
-from __future__ import annotations
-
-import logging
-import os
-from typing import Any
-
-from config import settings
-from serpapi import GoogleSearch
-
-logger = logging.getLogger(__name__)
-
-
-async def google_web_search(query: str) -> str:
-    """Perform a Google search and return the results."""
-    if "SERPAPI_API_KEY" not in os.environ:
-        logger.warning("SERPAPI_API_KEY not set, skipping web search")
-        return ""
-    params = {
-        "q": query,
-        "api_key": os.environ["SERPAPI_API_KEY"],
-    }
-    search = GoogleSearch(params)
-    results = search.get_dict()
-    return str(results)
-
-
-def get_llm_client() -> Any:
-    """Get an LLM client."""
-    # This is a placeholder. In a real application, this would return
-    # a client for an LLM service like OpenAI, Anthropic, or a local
-    # model.
-    class MockLLMClient:
-        async def completion(self, prompt: str, max_tokens: int) -> Any:
-            class MockCompletion:
-                def __init__(self, text: str) -> None:
-                    self.text = text
-
-            return MockCompletion(f"This is a summary of the search results for '{prompt}'.")
-
-    return MockLLMClient()
--- a/src/timmy/research_triage.py
+++ b/src/timmy/research_triage.py
@@ -54,7 +54,9 @@ class ActionItem:
                parts.append(f"- {url}")

        if source_issue:
-            parts.append(f"\n### Origin\nExtracted from research in #{source_issue}")
+            parts.append(
+                f"\n### Origin\nExtracted from research in #{source_issue}"
+            )

        parts.append("\n---\n*Auto-triaged from research findings by Timmy*")
        return "\n".join(parts)
@@ -121,7 +123,7 @@ def _validate_action_item(raw_item: dict[str, Any]) -> ActionItem | None:

    labels = raw_item.get("labels", [])
    if isinstance(labels, str):
-        labels = [lbl.strip() for lbl in labels.split(",") if lbl.strip()]
+        labels = [l.strip() for l in labels.split(",") if l.strip()]
    if not isinstance(labels, list):
        labels = []

@@ -301,7 +303,7 @@ async def _resolve_label_ids(
            if resp.status_code != 200:
                return []

-            existing = {lbl["name"]: lbl["id"] for lbl in resp.json()}
+            existing = {l["name"]: l["id"] for l in resp.json()}
            label_ids = []

            for name in label_names:
--- a/src/timmy_serve/cli.py
+++ b/src/timmy_serve/cli.py
@@ -14,9 +14,7 @@ app = typer.Typer(help="Timmy Serve — sovereign AI agent API")
 def start(
    port: int = typer.Option(8402, "--port", "-p", help="Port for the serve API"),
    host: str = typer.Option("0.0.0.0", "--host", "-h", help="Host to bind to"),
-    price: int = typer.Option(
-        None, "--price", help="Price per request in sats (default: from config)"
-    ),
+    price: int = typer.Option(None, "--price", help="Price per request in sats (default: from config)"),
    dry_run: bool = typer.Option(False, "--dry-run", help="Print config and exit (for testing)"),
 ):
    """Start Timmy in serve mode."""
--- a/tests/dashboard/test_health.py
+++ b/tests/dashboard/test_health.py
@@ -24,6 +24,7 @@ from dashboard.routes.health import (
    _generate_recommendations,
 )

+
 # ---------------------------------------------------------------------------
 # Pydantic models
 # ---------------------------------------------------------------------------
@@ -117,9 +118,7 @@ class TestGenerateRecommendations:

    def test_unavailable_service(self):
        deps = [
-            DependencyStatus(
-                name="Ollama AI", status="unavailable", sovereignty_score=10, details={}
-            )
+            DependencyStatus(name="Ollama AI", status="unavailable", sovereignty_score=10, details={})
        ]
        recs = _generate_recommendations(deps)
        assert any("Ollama AI is unavailable" in r for r in recs)
@@ -138,7 +137,9 @@ class TestGenerateRecommendations:

    def test_degraded_non_lightning(self):
        """Degraded non-Lightning dep produces no specific recommendation."""
-        deps = [DependencyStatus(name="Redis", status="degraded", sovereignty_score=5, details={})]
+        deps = [
+            DependencyStatus(name="Redis", status="degraded", sovereignty_score=5, details={})
+        ]
        recs = _generate_recommendations(deps)
        assert recs == ["System operating optimally - all dependencies healthy"]

@@ -378,9 +379,7 @@ class TestHealthEndpoint:
        assert response.status_code == 200

    def test_ok_when_ollama_up(self, client):
-        with patch(
-            "dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True
-        ):
+        with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
            data = client.get("/health").json()

        assert data["status"] == "ok"
@@ -416,9 +415,7 @@ class TestHealthStatusPanel:
        assert "text/html" in response.headers["content-type"]

    def test_shows_up_when_ollama_healthy(self, client):
-        with patch(
-            "dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True
-        ):
+        with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
            text = client.get("/health/status").text

        assert "UP" in text
--- a/tests/infrastructure/test_claude_quota.py
+++ b/tests/infrastructure/test_claude_quota.py
@@ -1,7 +1,9 @@
 """Tests for Claude Quota Monitor and Metabolic Protocol."""

-from datetime import UTC, datetime, timedelta
-from unittest.mock import patch
+from datetime import datetime, timedelta, timezone
+from unittest.mock import MagicMock, patch
+
+import pytest

 from infrastructure.claude_quota import (
    MetabolicTier,
@@ -20,7 +22,7 @@ def _make_status(five_hour: float = 0.0, seven_day: float = 0.0) -> QuotaStatus:
        seven_day_utilization=seven_day,
        seven_day_resets_at=None,
        raw_response={},
-        fetched_at=datetime.now(UTC),
+        fetched_at=datetime.now(timezone.utc),
    )


@@ -102,25 +104,25 @@ class TestTimeRemaining:
        assert _time_remaining("") == "unknown"

    def test_past_time_returns_resetting_now(self):
-        past = (datetime.now(UTC) - timedelta(hours=1)).isoformat()
+        past = (datetime.now(timezone.utc) - timedelta(hours=1)).isoformat()
        assert _time_remaining(past) == "resetting now"

    def test_future_time_hours_and_minutes(self):
-        future = (datetime.now(UTC) + timedelta(hours=2, minutes=15)).isoformat()
+        future = (datetime.now(timezone.utc) + timedelta(hours=2, minutes=15)).isoformat()
        result = _time_remaining(future)
        assert "2h" in result
        # Minutes may vary ±1 due to test execution time
        assert "m" in result

    def test_future_time_minutes_only(self):
-        future = (datetime.now(UTC) + timedelta(minutes=45)).isoformat()
+        future = (datetime.now(timezone.utc) + timedelta(minutes=45)).isoformat()
        result = _time_remaining(future)
        assert "h" not in result
        # Minutes may vary ±1 due to test execution time
        assert "m" in result

    def test_z_suffix_handled(self):
-        future = (datetime.now(UTC) + timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%SZ")
+        future = (datetime.now(timezone.utc) + timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%SZ")
        result = _time_remaining(future)
        assert result != "unknown"

@@ -236,7 +238,7 @@ class TestQuotaMonitorCaching:

    def test_stale_cache_triggers_fetch(self):
        monitor = QuotaMonitor()
-        old_time = datetime.now(UTC) - timedelta(seconds=60)
+        old_time = datetime.now(timezone.utc) - timedelta(seconds=60)
        stale_status = QuotaStatus(
            five_hour_utilization=0.10,
            five_hour_resets_at=None,
--- a/tests/infrastructure/test_router_cascade.py
+++ b/tests/infrastructure/test_router_cascade.py
@@ -489,306 +489,6 @@ class TestProviderAvailabilityCheck:

        assert router._check_provider_available(provider) is False

-    def test_check_vllm_mlx_without_requests(self):
-        """Test vllm-mlx returns True when requests not available (fallback)."""
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-
-        provider = Provider(
-            name="vllm-mlx-local",
-            type="vllm_mlx",
-            enabled=True,
-            priority=2,
-            base_url="http://localhost:8000/v1",
-        )
-
-        import infrastructure.router.cascade as cascade_module
-
-        old_requests = cascade_module.requests
-        cascade_module.requests = None
-        try:
-            assert router._check_provider_available(provider) is True
-        finally:
-            cascade_module.requests = old_requests
-
-    def test_check_vllm_mlx_server_healthy(self):
-        """Test vllm-mlx when health check succeeds."""
-        from unittest.mock import MagicMock, patch
-
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-
-        provider = Provider(
-            name="vllm-mlx-local",
-            type="vllm_mlx",
-            enabled=True,
-            priority=2,
-            base_url="http://localhost:8000/v1",
-        )
-
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-
-        with patch("infrastructure.router.cascade.requests") as mock_requests:
-            mock_requests.get.return_value = mock_response
-            result = router._check_provider_available(provider)
-
-        assert result is True
-        mock_requests.get.assert_called_once_with("http://localhost:8000/health", timeout=5)
-
-    def test_check_vllm_mlx_server_down(self):
-        """Test vllm-mlx when server is not running."""
-        from unittest.mock import patch
-
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-
-        provider = Provider(
-            name="vllm-mlx-local",
-            type="vllm_mlx",
-            enabled=True,
-            priority=2,
-            base_url="http://localhost:8000/v1",
-        )
-
-        with patch("infrastructure.router.cascade.requests") as mock_requests:
-            mock_requests.get.side_effect = ConnectionRefusedError("Connection refused")
-            result = router._check_provider_available(provider)
-
-        assert result is False
-
-    def test_check_vllm_mlx_default_url(self):
-        """Test vllm-mlx uses default localhost:8000 when no URL configured."""
-        from unittest.mock import MagicMock, patch
-
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-
-        provider = Provider(
-            name="vllm-mlx-local",
-            type="vllm_mlx",
-            enabled=True,
-            priority=2,
-        )
-
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-
-        with patch("infrastructure.router.cascade.requests") as mock_requests:
-            mock_requests.get.return_value = mock_response
-            router._check_provider_available(provider)
-
-        mock_requests.get.assert_called_once_with("http://localhost:8000/health", timeout=5)
-
-
-@pytest.mark.asyncio
-class TestVllmMlxProvider:
-    """Test vllm-mlx provider integration."""
-
-    async def test_complete_with_vllm_mlx(self):
-        """Test successful completion via vllm-mlx."""
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-
-        provider = Provider(
-            name="vllm-mlx-local",
-            type="vllm_mlx",
-            enabled=True,
-            priority=2,
-            base_url="http://localhost:8000/v1",
-            models=[{"name": "Qwen/Qwen2.5-14B-Instruct-MLX", "default": True}],
-        )
-        router.providers = [provider]
-
-        with patch.object(router, "_call_vllm_mlx") as mock_call:
-            mock_call.return_value = {
-                "content": "MLX response",
-                "model": "Qwen/Qwen2.5-14B-Instruct-MLX",
-            }
-
-            result = await router.complete(
-                messages=[{"role": "user", "content": "Hi"}],
-            )
-
-        assert result["content"] == "MLX response"
-        assert result["provider"] == "vllm-mlx-local"
-        assert result["model"] == "Qwen/Qwen2.5-14B-Instruct-MLX"
-
-    async def test_vllm_mlx_base_url_normalization(self):
-        """Test _call_vllm_mlx appends /v1 when missing."""
-        from unittest.mock import AsyncMock, MagicMock, patch
-
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-
-        provider = Provider(
-            name="vllm-mlx-local",
-            type="vllm_mlx",
-            enabled=True,
-            priority=2,
-            base_url="http://localhost:8000",  # No /v1
-            models=[{"name": "qwen-mlx", "default": True}],
-        )
-
-        mock_choice = MagicMock()
-        mock_choice.message.content = "hello"
-        mock_response = MagicMock()
-        mock_response.choices = [mock_choice]
-        mock_response.model = "qwen-mlx"
-
-        async def fake_create(**kwargs):
-            return mock_response
-
-        with patch("openai.AsyncOpenAI") as mock_openai_cls:
-            mock_client = MagicMock()
-            mock_client.chat.completions.create = AsyncMock(side_effect=fake_create)
-            mock_openai_cls.return_value = mock_client
-
-            await router._call_vllm_mlx(
-                provider=provider,
-                messages=[{"role": "user", "content": "hi"}],
-                model="qwen-mlx",
-                temperature=0.7,
-                max_tokens=None,
-            )
-
-            call_kwargs = mock_openai_cls.call_args
-            base_url_used = call_kwargs.kwargs.get("base_url") or call_kwargs[1].get("base_url")
-            assert base_url_used.endswith("/v1")
-
-    async def test_vllm_mlx_is_local_not_cloud(self):
-        """Confirm vllm_mlx is not subject to metabolic protocol cloud skip."""
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-
-        provider = Provider(
-            name="vllm-mlx-local",
-            type="vllm_mlx",
-            enabled=True,
-            priority=2,
-            base_url="http://localhost:8000/v1",
-            models=[{"name": "qwen-mlx", "default": True}],
-        )
-        router.providers = [provider]
-
-        # Quota monitor downshifts to local (ACTIVE tier) — vllm_mlx should still be tried
-        with patch("infrastructure.router.cascade._quota_monitor") as mock_qm:
-            mock_qm.select_model.return_value = "qwen3:14b"
-            mock_qm.check.return_value = None
-
-            with patch.object(router, "_call_vllm_mlx") as mock_call:
-                mock_call.return_value = {
-                    "content": "Local MLX response",
-                    "model": "qwen-mlx",
-                }
-                result = await router.complete(
-                    messages=[{"role": "user", "content": "hi"}],
-                )
-
-        assert result["content"] == "Local MLX response"
-
-
-class TestMetabolicProtocol:
-    """Test metabolic protocol: cloud providers skip when quota is ACTIVE/RESTING."""
-
-    def _make_anthropic_provider(self) -> "Provider":
-        return Provider(
-            name="anthropic-primary",
-            type="anthropic",
-            enabled=True,
-            priority=1,
-            api_key="test-key",
-            models=[{"name": "claude-sonnet-4-6", "default": True}],
-        )
-
-    async def test_cloud_provider_allowed_in_burst_tier(self):
-        """BURST tier (quota healthy): cloud provider is tried."""
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-        router.providers = [self._make_anthropic_provider()]
-
-        with patch("infrastructure.router.cascade._quota_monitor") as mock_qm:
-            # select_model returns cloud model → BURST tier
-            mock_qm.select_model.return_value = "claude-sonnet-4-6"
-            mock_qm.check.return_value = None
-
-            with patch.object(router, "_call_anthropic") as mock_call:
-                mock_call.return_value = {"content": "Cloud response", "model": "claude-sonnet-4-6"}
-                result = await router.complete(
-                    messages=[{"role": "user", "content": "hard question"}],
-                )
-
-        mock_call.assert_called_once()
-        assert result["content"] == "Cloud response"
-
-    async def test_cloud_provider_skipped_in_active_tier(self):
-        """ACTIVE tier (5-hour >= 50%): cloud provider is skipped."""
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-        router.providers = [self._make_anthropic_provider()]
-
-        with patch("infrastructure.router.cascade._quota_monitor") as mock_qm:
-            # select_model returns local 14B → ACTIVE tier
-            mock_qm.select_model.return_value = "qwen3:14b"
-            mock_qm.check.return_value = None
-
-            with patch.object(router, "_call_anthropic") as mock_call:
-                with pytest.raises(RuntimeError, match="All providers failed"):
-                    await router.complete(
-                        messages=[{"role": "user", "content": "question"}],
-                    )
-
-        mock_call.assert_not_called()
-
-    async def test_cloud_provider_skipped_in_resting_tier(self):
-        """RESTING tier (7-day >= 80%): cloud provider is skipped."""
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-        router.providers = [self._make_anthropic_provider()]
-
-        with patch("infrastructure.router.cascade._quota_monitor") as mock_qm:
-            # select_model returns local 8B → RESTING tier
-            mock_qm.select_model.return_value = "qwen3:8b"
-            mock_qm.check.return_value = None
-
-            with patch.object(router, "_call_anthropic") as mock_call:
-                with pytest.raises(RuntimeError, match="All providers failed"):
-                    await router.complete(
-                        messages=[{"role": "user", "content": "simple question"}],
-                    )
-
-        mock_call.assert_not_called()
-
-    async def test_local_provider_always_tried_regardless_of_quota(self):
-        """Local (ollama/vllm_mlx) providers bypass the metabolic protocol."""
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-        provider = Provider(
-            name="ollama-local",
-            type="ollama",
-            enabled=True,
-            priority=1,
-            url="http://localhost:11434",
-            models=[{"name": "qwen3:14b", "default": True}],
-        )
-        router.providers = [provider]
-
-        with patch("infrastructure.router.cascade._quota_monitor") as mock_qm:
-            mock_qm.select_model.return_value = "qwen3:8b"  # RESTING tier
-
-            with patch.object(router, "_call_ollama") as mock_call:
-                mock_call.return_value = {"content": "Local response", "model": "qwen3:14b"}
-                result = await router.complete(
-                    messages=[{"role": "user", "content": "hi"}],
-                )
-
-        mock_call.assert_called_once()
-        assert result["content"] == "Local response"
-
-    async def test_no_quota_monitor_allows_cloud(self):
-        """When quota monitor is None (unavailable), cloud providers are allowed."""
-        router = CascadeRouter(config_path=Path("/nonexistent"))
-        router.providers = [self._make_anthropic_provider()]
-
-        with patch("infrastructure.router.cascade._quota_monitor", None):
-            with patch.object(router, "_call_anthropic") as mock_call:
-                mock_call.return_value = {"content": "Cloud response", "model": "claude-sonnet-4-6"}
-                result = await router.complete(
-                    messages=[{"role": "user", "content": "question"}],
-                )
-
-        mock_call.assert_called_once()
-        assert result["content"] == "Cloud response"
-

 class TestCascadeRouterReload:
    """Test hot-reload of providers.yaml."""
--- a/tests/timmy/test_mcp_bridge.py
+++ b/tests/timmy/test_mcp_bridge.py
@@ -175,7 +175,9 @@ async def test_bridge_run_simple_response():
        bridge = MCPBridge(include_gitea=False, include_shell=False)

    mock_resp = MagicMock()
-    mock_resp.json.return_value = {"message": {"role": "assistant", "content": "Hello!"}}
+    mock_resp.json.return_value = {
+        "message": {"role": "assistant", "content": "Hello!"}
+    }
    mock_resp.raise_for_status = MagicMock()

    mock_client = AsyncMock()
@@ -236,7 +238,9 @@ async def test_bridge_run_with_tool_call():

    # Round 2: model returns final text
    final_resp = MagicMock()
-    final_resp.json.return_value = {"message": {"role": "assistant", "content": "Done with tools!"}}
+    final_resp.json.return_value = {
+        "message": {"role": "assistant", "content": "Done with tools!"}
+    }
    final_resp.raise_for_status = MagicMock()

    mock_client = AsyncMock()
@@ -272,13 +276,17 @@ async def test_bridge_run_unknown_tool():
        "message": {
            "role": "assistant",
            "content": "",
-            "tool_calls": [{"function": {"name": "nonexistent", "arguments": {}}}],
+            "tool_calls": [
+                {"function": {"name": "nonexistent", "arguments": {}}}
+            ],
        }
    }
    tool_call_resp.raise_for_status = MagicMock()

    final_resp = MagicMock()
-    final_resp.json.return_value = {"message": {"role": "assistant", "content": "OK"}}
+    final_resp.json.return_value = {
+        "message": {"role": "assistant", "content": "OK"}
+    }
    final_resp.raise_for_status = MagicMock()

    mock_client = AsyncMock()
@@ -324,7 +332,9 @@ async def test_bridge_run_max_rounds():
        "message": {
            "role": "assistant",
            "content": "",
-            "tool_calls": [{"function": {"name": "loop_tool", "arguments": {}}}],
+            "tool_calls": [
+                {"function": {"name": "loop_tool", "arguments": {}}}
+            ],
        }
    }
    tool_call_resp.raise_for_status = MagicMock()
@@ -355,7 +365,9 @@ async def test_bridge_run_connection_error():
        bridge = MCPBridge(include_gitea=False, include_shell=False)

    mock_client = AsyncMock()
-    mock_client.post = AsyncMock(side_effect=httpx.ConnectError("Connection refused"))
+    mock_client.post = AsyncMock(
+        side_effect=httpx.ConnectError("Connection refused")
+    )
    mock_client.aclose = AsyncMock()

    bridge._client = mock_client
--- a/tests/timmy/test_research_triage.py
+++ b/tests/timmy/test_research_triage.py
@@ -9,6 +9,7 @@ import pytest
 from timmy.research_triage import (
    ActionItem,
    _parse_llm_response,
+    _resolve_label_ids,
    _validate_action_item,
    create_gitea_issue,
    extract_action_items,
@@ -249,9 +250,7 @@ class TestCreateGiteaIssue:

        with (
            patch("timmy.research_triage.settings") as mock_settings,
-            patch(
-                "timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[1]
-            ),
+            patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[1]),
            patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
        ):
            mock_settings.gitea_enabled = True
@@ -285,9 +284,7 @@ class TestCreateGiteaIssue:

        with (
            patch("timmy.research_triage.settings") as mock_settings,
-            patch(
-                "timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]
-            ),
+            patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]),
            patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
        ):
            mock_settings.gitea_enabled = True
@@ -334,9 +331,7 @@ class TestTriageResearchReport:

        with (
            patch("timmy.research_triage.settings") as mock_settings,
-            patch(
-                "timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]
-            ),
+            patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]),
            patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
        ):
            mock_settings.gitea_enabled = True
--- a/tests/unit/test_kimi_delegation.py
+++ b/tests/unit/test_kimi_delegation.py
@@ -14,6 +14,7 @@ from timmy.kimi_delegation import (
    exceeds_local_capacity,
 )

+
 # ── Constants ─────────────────────────────────────────────────────────────────


@@ -454,7 +455,9 @@ class TestExtractAndCreateFollowups:
            patch("config.settings", mock_settings),
            patch("httpx.AsyncClient", return_value=async_ctx),
        ):
-            result = await extract_and_create_followups("1. Do the thing\n2. Do another thing", 10)
+            result = await extract_and_create_followups(
+                "1. Do the thing\n2. Do another thing", 10
+            )

        assert result["success"] is True
        assert 200 in result["created"]