From c8aa6a5fbb6e594c0fb57d55331d50c6f49272f6 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 21 Feb 2026 17:21:47 +0000
Subject: [PATCH] =?UTF-8?q?feat:=20quality=20analysis=20=E2=80=94=20bug=20?=
 =?UTF-8?q?fixes,=20mobile=20tests,=20HITL=20checklist?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt
---
 src/dashboard/app.py                          |   2 +
 src/dashboard/routes/health.py                |   2 +-
 src/dashboard/routes/mobile_test.py           | 257 ++++++++++++
 src/dashboard/templates/base.html             |   1 +
 src/dashboard/templates/mobile_test.html      | 375 ++++++++++++++++++
 .../templates/partials/health_status.html     |   2 +-
 src/timmy/backends.py                         |  36 +-
 static/style.css                              |  14 +
 tests/test_mobile_scenarios.py                | 281 +++++++++++++
 9 files changed, 958 insertions(+), 12 deletions(-)
 create mode 100644 src/dashboard/routes/mobile_test.py
 create mode 100644 src/dashboard/templates/mobile_test.html
 create mode 100644 tests/test_mobile_scenarios.py

diff --git a/src/dashboard/app.py b/src/dashboard/app.py
index 91312b5d..bdc66e90 100644
--- a/src/dashboard/app.py
+++ b/src/dashboard/app.py
@@ -9,6 +9,7 @@ from fastapi.templating import Jinja2Templates
 from config import settings
 from dashboard.routes.agents import router as agents_router
 from dashboard.routes.health import router as health_router
+from dashboard.routes.mobile_test import router as mobile_test_router
 
 logging.basicConfig(
     level=logging.INFO,
@@ -33,6 +34,7 @@ app.mount("/static", StaticFiles(directory=str(PROJECT_ROOT / "static")), name="
 
 app.include_router(health_router)
 app.include_router(agents_router)
+app.include_router(mobile_test_router)
 
 
 @app.get("/", response_class=HTMLResponse)
diff --git a/src/dashboard/routes/health.py b/src/dashboard/routes/health.py
index 05968e72..f4b81aff 100644
--- a/src/dashboard/routes/health.py
+++ b/src/dashboard/routes/health.py
@@ -38,5 +38,5 @@ async def health_status(request: Request):
     return templates.TemplateResponse(
         request,
         "partials/health_status.html",
-        {"ollama": ollama_ok},
+        {"ollama": ollama_ok, "model": settings.ollama_model},
     )
diff --git a/src/dashboard/routes/mobile_test.py b/src/dashboard/routes/mobile_test.py
new file mode 100644
index 00000000..ef22337d
--- /dev/null
+++ b/src/dashboard/routes/mobile_test.py
@@ -0,0 +1,257 @@
+"""Mobile HITL (Human-in-the-Loop) test checklist route.
+
+GET /mobile-test   — interactive checklist for a human tester on their phone.
+
+Each scenario specifies what to do and what to observe.  The tester marks
+each one PASS / FAIL / SKIP.  Results are stored in sessionStorage so they
+survive page scrolling without hitting the server.
+"""
+
+from pathlib import Path
+
+from fastapi import APIRouter, Request
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+
+router = APIRouter(tags=["mobile-test"])
+templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))
+
+# ── Test scenarios ────────────────────────────────────────────────────────────
+# Each dict: id, category, title, steps (list), expected
+SCENARIOS = [
+    # Layout
+    {
+        "id": "L01",
+        "category": "Layout",
+        "title": "Sidebar renders as horizontal strip",
+        "steps": [
+            "Open the Mission Control page on your phone.",
+            "Look at the top section above the chat window.",
+        ],
+        "expected": (
+            "AGENTS and SYSTEM HEALTH panels appear side-by-side in a "
+            "horizontally scrollable strip — not stacked vertically."
+        ),
+    },
+    {
+        "id": "L02",
+        "category": "Layout",
+        "title": "Sidebar panels are horizontally scrollable",
+        "steps": [
+            "Swipe left/right on the AGENTS / SYSTEM HEALTH strip.",
+        ],
+        "expected": "Both panels slide smoothly; no page scroll is triggered.",
+    },
+    {
+        "id": "L03",
+        "category": "Layout",
+        "title": "Chat panel fills ≥ 60 % of viewport height",
+        "steps": [
+            "Look at the TIMMY INTERFACE chat card below the strip.",
+        ],
+        "expected": "The chat card occupies at least 60 % of the visible screen height.",
+    },
+    {
+        "id": "L04",
+        "category": "Layout",
+        "title": "Header stays fixed while chat scrolls",
+        "steps": [
+            "Send several messages until the chat overflows.",
+            "Scroll the chat log up and down.",
+        ],
+        "expected": "The TIMMY TIME / MISSION CONTROL header remains pinned at the top.",
+    },
+    {
+        "id": "L05",
+        "category": "Layout",
+        "title": "No horizontal page overflow",
+        "steps": [
+            "Try swiping left or right anywhere on the page.",
+        ],
+        "expected": "The page does not scroll horizontally; nothing is cut off.",
+    },
+    # Touch & Input
+    {
+        "id": "T01",
+        "category": "Touch & Input",
+        "title": "iOS does NOT zoom when tapping the input",
+        "steps": [
+            "Tap the message input field once.",
+            "Watch whether the browser zooms in.",
+        ],
+        "expected": "The keyboard rises; the layout does NOT zoom in.",
+    },
+    {
+        "id": "T02",
+        "category": "Touch & Input",
+        "title": "Keyboard return key is labelled 'Send'",
+        "steps": [
+            "Tap the message input to open the iOS/Android keyboard.",
+            "Look at the return / action key in the bottom-right of the keyboard.",
+        ],
+        "expected": "The key is labelled 'Send' (not 'Return' or 'Go').",
+    },
+    {
+        "id": "T03",
+        "category": "Touch & Input",
+        "title": "Send button is easy to tap (≥ 44 px tall)",
+        "steps": [
+            "Try tapping the SEND button with your thumb.",
+        ],
+        "expected": "The button registers the tap reliably on the first attempt.",
+    },
+    {
+        "id": "T04",
+        "category": "Touch & Input",
+        "title": "SEND button disabled during in-flight request",
+        "steps": [
+            "Type a message and press SEND.",
+            "Immediately try to tap SEND again before a response arrives.",
+        ],
+        "expected": "The button is visually disabled; no duplicate message is sent.",
+    },
+    {
+        "id": "T05",
+        "category": "Touch & Input",
+        "title": "Empty message cannot be submitted",
+        "steps": [
+            "Leave the input blank.",
+            "Tap SEND.",
+        ],
+        "expected": "Nothing is submitted; the form shows a required-field indicator.",
+    },
+    {
+        "id": "T06",
+        "category": "Touch & Input",
+        "title": "CLEAR button shows confirmation dialog",
+        "steps": [
+            "Send at least one message.",
+            "Tap the CLEAR button in the top-right of the chat header.",
+        ],
+        "expected": "A browser confirmation dialog appears before history is cleared.",
+    },
+    # Chat behaviour
+    {
+        "id": "C01",
+        "category": "Chat",
+        "title": "Chat auto-scrolls to the latest message",
+        "steps": [
+            "Scroll the chat log to the top.",
+            "Send a new message.",
+        ],
+        "expected": "After the response arrives the chat automatically scrolls to the bottom.",
+    },
+    {
+        "id": "C02",
+        "category": "Chat",
+        "title": "Multi-turn conversation — Timmy remembers context",
+        "steps": [
+            "Send: 'My name is <your name>.'",
+            "Then send: 'What is my name?'",
+        ],
+        "expected": "Timmy replies with your name, demonstrating conversation memory.",
+    },
+    {
+        "id": "C03",
+        "category": "Chat",
+        "title": "Loading indicator appears while waiting",
+        "steps": [
+            "Send a message and watch the SEND button.",
+        ],
+        "expected": "A blinking cursor (▋) appears next to SEND while the response is loading.",
+    },
+    {
+        "id": "C04",
+        "category": "Chat",
+        "title": "Offline error is shown gracefully",
+        "steps": [
+            "Stop Ollama on your host machine (or disconnect from Wi-Fi temporarily).",
+            "Send a message from your phone.",
+        ],
+        "expected": "A red 'Timmy is offline' error appears in the chat — no crash or spinner hang.",
+    },
+    # Health panel
+    {
+        "id": "H01",
+        "category": "Health",
+        "title": "Health panel shows Ollama UP when running",
+        "steps": [
+            "Ensure Ollama is running on your host.",
+            "Check the SYSTEM HEALTH panel.",
+        ],
+        "expected": "OLLAMA badge shows green UP.",
+    },
+    {
+        "id": "H02",
+        "category": "Health",
+        "title": "Health panel auto-refreshes without reload",
+        "steps": [
+            "Start Ollama if it is not running.",
+            "Wait up to 35 seconds with the page open.",
+        ],
+        "expected": "The OLLAMA badge flips from DOWN → UP automatically, without a page reload.",
+    },
+    # Scroll & overscroll
+    {
+        "id": "S01",
+        "category": "Scroll",
+        "title": "No rubber-band / bounce on the main page",
+        "steps": [
+            "Scroll to the very top of the page.",
+            "Continue pulling downward.",
+        ],
+        "expected": "The page does not bounce or show a white gap — overscroll is suppressed.",
+    },
+    {
+        "id": "S02",
+        "category": "Scroll",
+        "title": "Chat log scrolls independently inside the card",
+        "steps": [
+            "Scroll inside the chat log area.",
+        ],
+        "expected": "The chat log scrolls smoothly; the outer page does not move.",
+    },
+    # Safe area / notch
+    {
+        "id": "N01",
+        "category": "Notch / Home Bar",
+        "title": "Header clears the status bar / Dynamic Island",
+        "steps": [
+            "On a notched iPhone (Face ID), look at the top of the page.",
+        ],
+        "expected": "The TIMMY TIME header text is not obscured by the notch or Dynamic Island.",
+    },
+    {
+        "id": "N02",
+        "category": "Notch / Home Bar",
+        "title": "Chat input not hidden behind home indicator",
+        "steps": [
+            "Tap the input field and look at the bottom of the screen.",
+        ],
+        "expected": "The input row sits above the iPhone home indicator bar — nothing is cut off.",
+    },
+    # Clock
+    {
+        "id": "X01",
+        "category": "Live UI",
+        "title": "Clock updates every second",
+        "steps": [
+            "Look at the time display in the top-right of the header.",
+            "Watch for 3 seconds.",
+        ],
+        "expected": "The time increments each second in HH:MM:SS format.",
+    },
+]
+
+
+@router.get("/mobile-test", response_class=HTMLResponse)
+async def mobile_test(request: Request):
+    """Interactive HITL mobile test checklist — open on your phone."""
+    categories: dict[str, list] = {}
+    for s in SCENARIOS:
+        categories.setdefault(s["category"], []).append(s)
+    return templates.TemplateResponse(
+        request,
+        "mobile_test.html",
+        {"scenarios": SCENARIOS, "categories": categories, "total": len(SCENARIOS)},
+    )
diff --git a/src/dashboard/templates/base.html b/src/dashboard/templates/base.html
index d8003e78..cd0499c2 100644
--- a/src/dashboard/templates/base.html
+++ b/src/dashboard/templates/base.html
@@ -21,6 +21,7 @@
       <span class="mc-subtitle">MISSION CONTROL</span>
     </div>
     <div class="mc-header-right">
+      <a href="/mobile-test" class="mc-test-link">TEST</a>
       <span class="mc-time" id="clock"></span>
     </div>
   </header>
diff --git a/src/dashboard/templates/mobile_test.html b/src/dashboard/templates/mobile_test.html
new file mode 100644
index 00000000..a92c9f85
--- /dev/null
+++ b/src/dashboard/templates/mobile_test.html
@@ -0,0 +1,375 @@
+{% extends "base.html" %}
+{% block title %}Mobile Test — Timmy Time{% endblock %}
+
+{% block content %}
+<div class="container-fluid mc-content" style="height:auto; overflow:visible;">
+
+  <!-- ── Page header ─────────────────────────────────────────────────── -->
+  <div class="mt-hitl-header">
+    <div>
+      <span class="mt-title">// MOBILE TEST SUITE</span>
+      <span class="mt-sub">HUMAN-IN-THE-LOOP</span>
+    </div>
+    <div class="mt-score-wrap">
+      <span class="mt-score" id="score-display">0 / {{ total }}</span>
+      <span class="mt-score-label">PASSED</span>
+    </div>
+  </div>
+
+  <!-- ── Progress bar ────────────────────────────────────────────────── -->
+  <div class="mt-progress-wrap">
+    <div class="progress" style="height:6px; background:var(--bg-card); border-radius:3px;">
+      <div class="progress-bar mt-progress-bar"
+           id="progress-bar"
+           role="progressbar"
+           style="width:0%; background:var(--green);"
+           aria-valuenow="0" aria-valuemin="0" aria-valuemax="{{ total }}"></div>
+    </div>
+    <div class="mt-progress-legend">
+      <span><span class="mt-dot green"></span>PASS</span>
+      <span><span class="mt-dot red"></span>FAIL</span>
+      <span><span class="mt-dot amber"></span>SKIP</span>
+      <span><span class="mt-dot" style="background:var(--text-dim);box-shadow:none;"></span>PENDING</span>
+    </div>
+  </div>
+
+  <!-- ── Reset / Back ────────────────────────────────────────────────── -->
+  <div class="mt-actions">
+    <a href="/" class="mc-btn-clear">← MISSION CONTROL</a>
+    <button class="mc-btn-clear" onclick="resetAll()" style="border-color:var(--red);color:var(--red);">RESET ALL</button>
+  </div>
+
+  <!-- ── Scenario cards ──────────────────────────────────────────────── -->
+  {% for category, items in categories.items() %}
+  <div class="mt-category-label">{{ category | upper }}</div>
+
+  {% for s in items %}
+  <div class="card mc-panel mt-card" id="card-{{ s.id }}" data-scenario="{{ s.id }}">
+    <div class="card-header mc-panel-header d-flex justify-content-between align-items-center">
+      <div>
+        <span class="mt-id-badge" id="badge-{{ s.id }}">{{ s.id }}</span>
+        <span class="mt-scenario-title">{{ s.title }}</span>
+      </div>
+      <span class="mt-state-chip" id="chip-{{ s.id }}">PENDING</span>
+    </div>
+    <div class="card-body p-3">
+
+      <div class="mt-steps-label">STEPS</div>
+      <ol class="mt-steps">
+        {% for step in s.steps %}
+        <li>{{ step }}</li>
+        {% endfor %}
+      </ol>
+
+      <div class="mt-expected-label">EXPECTED</div>
+      <div class="mt-expected">{{ s.expected }}</div>
+
+      <div class="mt-btn-row">
+        <button class="mt-btn mt-btn-pass" onclick="mark('{{ s.id }}', 'pass')">✓ PASS</button>
+        <button class="mt-btn mt-btn-fail" onclick="mark('{{ s.id }}', 'fail')">✗ FAIL</button>
+        <button class="mt-btn mt-btn-skip" onclick="mark('{{ s.id }}', 'skip')">— SKIP</button>
+      </div>
+
+    </div>
+  </div>
+  {% endfor %}
+  {% endfor %}
+
+  <!-- ── Summary footer ──────────────────────────────────────────────── -->
+  <div class="card mc-panel mt-summary" id="summary">
+    <div class="card-header mc-panel-header">// SUMMARY</div>
+    <div class="card-body p-3" id="summary-body">
+      <p class="mt-summary-hint">Mark all scenarios above to see your final score.</p>
+    </div>
+  </div>
+
+</div><!-- /container -->
+
+
+<!-- ── Styles (scoped to this page) ────────────────────────────────────── -->
+<style>
+  .mt-hitl-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: flex-end;
+    padding: 16px 0 12px;
+    border-bottom: 1px solid var(--border);
+    margin-bottom: 12px;
+  }
+  .mt-title {
+    font-size: 14px;
+    font-weight: 700;
+    color: var(--text-bright);
+    letter-spacing: 0.18em;
+    display: block;
+  }
+  .mt-sub {
+    font-size: 10px;
+    color: var(--text-dim);
+    letter-spacing: 0.2em;
+    display: block;
+    margin-top: 2px;
+  }
+  .mt-score-wrap { text-align: right; }
+  .mt-score {
+    font-size: 22px;
+    font-weight: 700;
+    color: var(--green);
+    letter-spacing: 0.06em;
+    display: block;
+  }
+  .mt-score-label { font-size: 9px; color: var(--text-dim); letter-spacing: 0.2em; }
+
+  .mt-progress-wrap { margin-bottom: 10px; }
+  .mt-progress-legend {
+    display: flex;
+    gap: 16px;
+    font-size: 9px;
+    color: var(--text-dim);
+    letter-spacing: 0.12em;
+    margin-top: 6px;
+  }
+  .mt-dot {
+    display: inline-block;
+    width: 7px;
+    height: 7px;
+    border-radius: 50%;
+    margin-right: 4px;
+    vertical-align: middle;
+  }
+  .mt-dot.green { background: var(--green); box-shadow: 0 0 5px var(--green); }
+  .mt-dot.red   { background: var(--red);   box-shadow: 0 0 5px var(--red); }
+  .mt-dot.amber { background: var(--amber); box-shadow: 0 0 5px var(--amber); }
+
+  .mt-actions {
+    display: flex;
+    gap: 10px;
+    margin-bottom: 16px;
+  }
+
+  .mt-category-label {
+    font-size: 9px;
+    font-weight: 700;
+    color: var(--text-dim);
+    letter-spacing: 0.25em;
+    margin: 20px 0 8px;
+    padding-left: 2px;
+  }
+
+  .mt-card {
+    margin-bottom: 10px;
+    transition: border-color 0.2s;
+  }
+  .mt-card.state-pass { border-color: var(--green) !important; }
+  .mt-card.state-fail { border-color: var(--red)   !important; }
+  .mt-card.state-skip { border-color: var(--amber) !important; opacity: 0.7; }
+
+  .mt-id-badge {
+    font-size: 9px;
+    font-weight: 700;
+    background: var(--border);
+    color: var(--text-dim);
+    border-radius: 2px;
+    padding: 2px 6px;
+    letter-spacing: 0.12em;
+    margin-right: 8px;
+  }
+  .mt-card.state-pass .mt-id-badge { background: var(--green-dim); color: var(--green); }
+  .mt-card.state-fail .mt-id-badge { background: var(--red-dim);   color: var(--red); }
+  .mt-card.state-skip .mt-id-badge { background: var(--amber-dim); color: var(--amber); }
+
+  .mt-scenario-title {
+    font-size: 12px;
+    font-weight: 700;
+    color: var(--text-bright);
+    letter-spacing: 0.05em;
+  }
+
+  .mt-state-chip {
+    font-size: 9px;
+    font-weight: 700;
+    letter-spacing: 0.15em;
+    color: var(--text-dim);
+    padding: 2px 8px;
+    border: 1px solid var(--border);
+    border-radius: 2px;
+    white-space: nowrap;
+  }
+  .mt-card.state-pass .mt-state-chip { color: var(--green); border-color: var(--green); }
+  .mt-card.state-fail .mt-state-chip { color: var(--red);   border-color: var(--red); }
+  .mt-card.state-skip .mt-state-chip { color: var(--amber); border-color: var(--amber); }
+
+  .mt-steps-label, .mt-expected-label {
+    font-size: 9px;
+    font-weight: 700;
+    color: var(--text-dim);
+    letter-spacing: 0.2em;
+    margin-bottom: 6px;
+  }
+  .mt-expected-label { margin-top: 12px; }
+
+  .mt-steps {
+    padding-left: 18px;
+    margin: 0;
+    font-size: 12px;
+    line-height: 1.8;
+    color: var(--text);
+  }
+  .mt-expected {
+    font-size: 12px;
+    line-height: 1.65;
+    color: var(--text-bright);
+    background: var(--bg-card);
+    border-left: 3px solid var(--border-glow);
+    padding: 8px 12px;
+    border-radius: 0 3px 3px 0;
+  }
+
+  .mt-btn-row {
+    display: flex;
+    gap: 8px;
+    margin-top: 14px;
+  }
+  .mt-btn {
+    flex: 1;
+    min-height: 44px;
+    border: 1px solid var(--border);
+    border-radius: 3px;
+    background: var(--bg-deep);
+    color: var(--text-dim);
+    font-family: var(--font);
+    font-size: 11px;
+    font-weight: 700;
+    letter-spacing: 0.12em;
+    cursor: pointer;
+    touch-action: manipulation;
+    transition: background 0.15s, color 0.15s, border-color 0.15s;
+  }
+  .mt-btn-pass:hover, .mt-btn-pass.active { background: var(--green-dim); color: var(--green); border-color: var(--green); }
+  .mt-btn-fail:hover, .mt-btn-fail.active { background: var(--red-dim);   color: var(--red);   border-color: var(--red); }
+  .mt-btn-skip:hover, .mt-btn-skip.active { background: var(--amber-dim); color: var(--amber); border-color: var(--amber); }
+
+  .mt-summary { margin-top: 24px; margin-bottom: 32px; }
+  .mt-summary-hint { color: var(--text-dim); font-size: 12px; margin: 0; }
+
+  .mt-summary-row {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    padding: 8px 0;
+    border-bottom: 1px solid var(--border);
+    font-size: 12px;
+  }
+  .mt-summary-row:last-child { border-bottom: none; }
+  .mt-summary-score { font-size: 28px; font-weight: 700; color: var(--green); margin: 12px 0 4px; }
+  .mt-summary-pct   { font-size: 13px; color: var(--text-dim); }
+
+  @media (max-width: 768px) {
+    .mt-btn-row { gap: 6px; }
+    .mt-btn     { font-size: 10px; padding: 0 4px; }
+  }
+</style>
+
+
+<!-- ── HITL State Machine (sessionStorage) ─────────────────────────────── -->
+<script>
+  const TOTAL = {{ total }};
+  const KEY   = "timmy-mobile-test-results";
+
+  function loadResults() {
+    try { return JSON.parse(sessionStorage.getItem(KEY) || "{}"); }
+    catch { return {}; }
+  }
+  function saveResults(r) {
+    sessionStorage.setItem(KEY, JSON.stringify(r));
+  }
+
+  function mark(id, state) {
+    const results = loadResults();
+    results[id] = state;
+    saveResults(results);
+    applyState(id, state);
+    updateScore(results);
+    updateSummary(results);
+  }
+
+  function applyState(id, state) {
+    const card  = document.getElementById("card-" + id);
+    const chip  = document.getElementById("chip-" + id);
+    const labels = { pass: "PASS", fail: "FAIL", skip: "SKIP" };
+
+    card.classList.remove("state-pass", "state-fail", "state-skip");
+    if (state) card.classList.add("state-" + state);
+    chip.textContent = state ? labels[state] : "PENDING";
+
+    // highlight active button
+    card.querySelectorAll(".mt-btn").forEach(btn => btn.classList.remove("active"));
+    const activeBtn = card.querySelector(".mt-btn-" + state);
+    if (activeBtn) activeBtn.classList.add("active");
+  }
+
+  function updateScore(results) {
+    const passed  = Object.values(results).filter(v => v === "pass").length;
+    const decided = Object.values(results).filter(v => v !== undefined).length;
+    document.getElementById("score-display").textContent = passed + " / " + TOTAL;
+
+    const pct = TOTAL ? (decided / TOTAL) * 100 : 0;
+    const bar = document.getElementById("progress-bar");
+    bar.style.width = pct + "%";
+
+    // colour the bar by overall health
+    const failCount = Object.values(results).filter(v => v === "fail").length;
+    bar.style.background = failCount > 0
+      ? "var(--red)"
+      : passed === TOTAL ? "var(--green)" : "var(--amber)";
+  }
+
+  function updateSummary(results) {
+    const passed  = Object.values(results).filter(v => v === "pass").length;
+    const failed  = Object.values(results).filter(v => v === "fail").length;
+    const skipped = Object.values(results).filter(v => v === "skip").length;
+    const decided = passed + failed + skipped;
+
+    if (decided < TOTAL) {
+      document.getElementById("summary-body").innerHTML =
+        '<p class="mt-summary-hint">' + (TOTAL - decided) + ' scenario(s) still pending.</p>';
+      return;
+    }
+
+    const pct   = TOTAL ? Math.round((passed / TOTAL) * 100) : 0;
+    const color = failed > 0 ? "var(--red)" : "var(--green)";
+    document.getElementById("summary-body").innerHTML = `
+      <div class="mt-summary-score" style="color:${color}">${passed} / ${TOTAL}</div>
+      <div class="mt-summary-pct">${pct}% pass rate</div>
+      <div style="margin-top:16px;">
+        <div class="mt-summary-row"><span>PASSED</span><span style="color:var(--green);font-weight:700;">${passed}</span></div>
+        <div class="mt-summary-row"><span>FAILED</span><span style="color:var(--red);font-weight:700;">${failed}</span></div>
+        <div class="mt-summary-row"><span>SKIPPED</span><span style="color:var(--amber);font-weight:700;">${skipped}</span></div>
+      </div>
+      ${failed > 0 ? '<p style="color:var(--red);margin-top:12px;font-size:11px;">⚠ ' + failed + ' failure(s) need attention before release.</p>' : '<p style="color:var(--green);margin-top:12px;font-size:11px;">All tested scenarios passed — ship it.</p>'}
+    `;
+  }
+
+  function resetAll() {
+    if (!confirm("Reset all test results?")) return;
+    sessionStorage.removeItem(KEY);
+    const results = {};
+    document.querySelectorAll("[data-scenario]").forEach(card => {
+      const id = card.dataset.scenario;
+      applyState(id, null);
+    });
+    updateScore(results);
+    document.getElementById("summary-body").innerHTML =
+      '<p class="mt-summary-hint">Mark all scenarios above to see your final score.</p>';
+  }
+
+  // Restore saved state on load
+  (function init() {
+    const results = loadResults();
+    Object.entries(results).forEach(([id, state]) => applyState(id, state));
+    updateScore(results);
+    updateSummary(results);
+  })();
+</script>
+
+{% endblock %}
diff --git a/src/dashboard/templates/partials/health_status.html b/src/dashboard/templates/partials/health_status.html
index 41385923..ec25fe47 100644
--- a/src/dashboard/templates/partials/health_status.html
+++ b/src/dashboard/templates/partials/health_status.html
@@ -14,6 +14,6 @@
   </div>
   <div class="health-row">
     <span class="health-label">MODEL</span>
-    <span class="badge mc-badge-ready">llama3.2</span>
+    <span class="badge mc-badge-ready">{{ model }}</span>
   </div>
 </div>
diff --git a/src/timmy/backends.py b/src/timmy/backends.py
index 88d03085..ba94f304 100644
--- a/src/timmy/backends.py
+++ b/src/timmy/backends.py
@@ -1,14 +1,16 @@
 """AirLLM backend — only imported when the airllm extra is installed.
 
 Provides TimmyAirLLMAgent: a drop-in replacement for an Agno Agent that
-exposes the same print_response(message, stream) surface while routing
-inference through AirLLM.  On Apple Silicon (arm64 Darwin) the MLX backend
-is selected automatically; everywhere else AutoModel (PyTorch) is used.
+exposes both the run(message, stream) → RunResult interface used by the
+dashboard and the print_response(message, stream) interface used by the CLI.
+On Apple Silicon (arm64 Darwin) the MLX backend is selected automatically;
+everywhere else AutoModel (PyTorch) is used.
 
 No cloud.  No telemetry.  Sats are sovereignty, boss.
 """
 
 import platform
+from dataclasses import dataclass
 from typing import Literal
 
 from timmy.prompts import TIMMY_SYSTEM_PROMPT
@@ -23,6 +25,12 @@ _AIRLLM_MODELS: dict[str, str] = {
 ModelSize = Literal["8b", "70b", "405b"]
 
 
+@dataclass
+class RunResult:
+    """Minimal Agno-compatible run result — carries the model's response text."""
+    content: str
+
+
 def is_apple_silicon() -> bool:
     """Return True when running on an M-series Mac (arm64 Darwin)."""
     return platform.system() == "Darwin" and platform.machine() == "arm64"
@@ -38,7 +46,11 @@ def airllm_available() -> bool:
 
 
 class TimmyAirLLMAgent:
-    """Thin AirLLM wrapper with the same print_response interface as Agno Agent.
+    """Thin AirLLM wrapper compatible with both dashboard and CLI call sites.
+
+    Exposes:
+      run(message, stream)           → RunResult(content=...)  [dashboard]
+      print_response(message, stream) → None                   [CLI]
 
     Maintains a rolling 10-turn in-memory history so Timmy remembers the
     conversation within a session — no SQLite needed at this layer.
@@ -64,12 +76,11 @@ class TimmyAirLLMAgent:
 
     # ── public interface (mirrors Agno Agent) ────────────────────────────────
 
-    def print_response(self, message: str, *, stream: bool = True) -> None:
-        """Run inference, update history, and render the response to stdout.
+    def run(self, message: str, *, stream: bool = False) -> RunResult:
+        """Run inference and return a structured result (matches Agno Agent.run()).
 
-        `stream` is accepted for API compatibility but AirLLM generates the
-        full output in one pass — the result is still printed as soon as it
-        is ready.
+        `stream` is accepted for API compatibility; AirLLM always generates
+        the full output in one pass.
         """
         prompt = self._build_prompt(message)
 
@@ -97,7 +108,12 @@ class TimmyAirLLMAgent:
         self._history.append(f"User: {message}")
         self._history.append(f"Timmy: {response}")
 
-        self._render(response)
+        return RunResult(content=response)
+
+    def print_response(self, message: str, *, stream: bool = True) -> None:
+        """Run inference and render the response to stdout (CLI interface)."""
+        result = self.run(message, stream=stream)
+        self._render(result.content)
 
     # ── private helpers ──────────────────────────────────────────────────────
 
diff --git a/static/style.css b/static/style.css
index ea673e0b..f1e42852 100644
--- a/static/style.css
+++ b/static/style.css
@@ -58,6 +58,7 @@ body {
   z-index: 100;
 }
 .mc-header-left { display: flex; align-items: baseline; gap: 0; }
+.mc-header-right { display: flex; align-items: center; gap: 16px; }
 .mc-title {
   font-size: 18px;
   font-weight: 700;
@@ -75,6 +76,19 @@ body {
   color: var(--blue);
   letter-spacing: 0.1em;
 }
+.mc-test-link {
+  font-size: 9px;
+  font-weight: 700;
+  color: var(--text-dim);
+  letter-spacing: 0.2em;
+  text-decoration: none;
+  border: 1px solid var(--border);
+  border-radius: 2px;
+  padding: 3px 8px;
+  transition: border-color 0.15s, color 0.15s;
+  touch-action: manipulation;
+}
+.mc-test-link:hover { border-color: var(--blue); color: var(--blue); }
 
 /* ── Main layout ─────────────────────────────────── */
 .mc-main {
diff --git a/tests/test_mobile_scenarios.py b/tests/test_mobile_scenarios.py
new file mode 100644
index 00000000..84a7cebc
--- /dev/null
+++ b/tests/test_mobile_scenarios.py
@@ -0,0 +1,281 @@
+"""Mobile-first quality tests — automated validation of mobile UX requirements.
+
+These tests verify the HTML, CSS, and HTMX attributes that make the dashboard
+work correctly on phones.  No browser / Playwright required: we parse the
+static assets and server responses directly.
+
+Categories:
+  M1xx  Viewport & meta tags
+  M2xx  Touch target sizing
+  M3xx  iOS keyboard & zoom prevention
+  M4xx  HTMX robustness (double-submit, sync)
+  M5xx  Safe-area / notch support
+  M6xx  AirLLM backend interface contract
+"""
+
+import re
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+
+# ── helpers ───────────────────────────────────────────────────────────────────
+
+def _css() -> str:
+    """Read the main stylesheet."""
+    css_path = Path(__file__).parent.parent / "static" / "style.css"
+    return css_path.read_text()
+
+
+def _index_html(client) -> str:
+    return client.get("/").text
+
+
+# ── M1xx — Viewport & meta tags ───────────────────────────────────────────────
+
+def test_M101_viewport_meta_present(client):
+    """viewport meta tag must exist for correct mobile scaling."""
+    html = _index_html(client)
+    assert 'name="viewport"' in html
+
+
+def test_M102_viewport_includes_width_device_width(client):
+    html = _index_html(client)
+    assert "width=device-width" in html
+
+
+def test_M103_viewport_includes_initial_scale_1(client):
+    html = _index_html(client)
+    assert "initial-scale=1" in html
+
+
+def test_M104_viewport_includes_viewport_fit_cover(client):
+    """viewport-fit=cover is required for iPhone notch / Dynamic Island support."""
+    html = _index_html(client)
+    assert "viewport-fit=cover" in html
+
+
+def test_M105_apple_mobile_web_app_capable(client):
+    """Enables full-screen / standalone mode when added to iPhone home screen."""
+    html = _index_html(client)
+    assert "apple-mobile-web-app-capable" in html
+
+
+def test_M106_theme_color_meta_present(client):
+    """theme-color sets the browser chrome colour on Android Chrome."""
+    html = _index_html(client)
+    assert 'name="theme-color"' in html
+
+
+def test_M107_apple_status_bar_style_present(client):
+    html = _index_html(client)
+    assert "apple-mobile-web-app-status-bar-style" in html
+
+
+def test_M108_lang_attribute_on_html(client):
+    """lang attribute aids screen readers and mobile TTS."""
+    html = _index_html(client)
+    assert '<html lang="en"' in html
+
+
+# ── M2xx — Touch target sizing ────────────────────────────────────────────────
+
+def test_M201_send_button_min_height_44px():
+    """SEND button must be at least 44 × 44 px — Apple HIG minimum."""
+    css = _css()
+    # Inside the mobile media query the send button must have min-height: 44px
+    assert "min-height: 44px" in css
+
+
+def test_M202_input_min_height_44px():
+    """Chat input must meet 44 px touch target height on mobile."""
+    css = _css()
+    assert "min-height: 44px" in css
+
+
+def test_M203_send_button_min_width_64px():
+    """Send button needs sufficient width so it isn't accidentally missed."""
+    css = _css()
+    assert "min-width: 64px" in css
+
+
+def test_M204_touch_action_manipulation_on_buttons():
+    """touch-action: manipulation removes 300ms tap delay on mobile browsers."""
+    css = _css()
+    assert "touch-action: manipulation" in css
+
+
+# ── M3xx — iOS keyboard & zoom prevention ─────────────────────────────────────
+
+def test_M301_input_font_size_16px_in_mobile_query():
+    """iOS Safari zooms in when input font-size < 16px.  Must be exactly 16px."""
+    css = _css()
+    # The mobile media-query block must override to 16px
+    mobile_block_match = re.search(
+        r"@media\s*\(max-width:\s*768px\)(.*)", css, re.DOTALL
+    )
+    assert mobile_block_match, "Mobile media query not found"
+    mobile_block = mobile_block_match.group(1)
+    assert "font-size: 16px" in mobile_block
+
+
+def test_M302_input_autocapitalize_none(client):
+    """autocapitalize=none prevents iOS from capitalising chat commands."""
+    html = _index_html(client)
+    assert 'autocapitalize="none"' in html
+
+
+def test_M303_input_autocorrect_off(client):
+    """autocorrect=off prevents iOS from mangling technical / proper-noun input."""
+    html = _index_html(client)
+    assert 'autocorrect="off"' in html
+
+
+def test_M304_input_enterkeyhint_send(client):
+    """enterkeyhint=send labels the iOS return key 'Send' for clearer UX."""
+    html = _index_html(client)
+    assert 'enterkeyhint="send"' in html
+
+
+def test_M305_input_spellcheck_false(client):
+    """spellcheck=false prevents red squiggles on technical terms."""
+    html = _index_html(client)
+    assert 'spellcheck="false"' in html
+
+
+# ── M4xx — HTMX robustness ────────────────────────────────────────────────────
+
+def test_M401_form_hx_sync_drop(client):
+    """hx-sync=this:drop discards duplicate submissions (fast double-tap)."""
+    html = _index_html(client)
+    assert 'hx-sync="this:drop"' in html
+
+
+def test_M402_form_hx_disabled_elt(client):
+    """hx-disabled-elt disables the SEND button while a request is in-flight."""
+    html = _index_html(client)
+    assert "hx-disabled-elt" in html
+
+
+def test_M403_form_hx_indicator(client):
+    """hx-indicator wires up the loading spinner to the in-flight state."""
+    html = _index_html(client)
+    assert "hx-indicator" in html
+
+
+def test_M404_health_panel_auto_refreshes(client):
+    """Health panel must poll via HTMX trigger — 'every 30s' confirms this."""
+    html = _index_html(client)
+    assert "every 30s" in html
+
+
+def test_M405_chat_log_loads_history_on_boot(client):
+    """Chat log fetches history via hx-trigger=load so it's populated on open."""
+    html = _index_html(client)
+    assert 'hx-trigger="load"' in html
+
+
+# ── M5xx — Safe-area / notch support ─────────────────────────────────────────
+
+def test_M501_safe_area_inset_top_in_header():
+    """Header padding must accommodate the iPhone notch / status bar."""
+    css = _css()
+    assert "safe-area-inset-top" in css
+
+
+def test_M502_safe_area_inset_bottom_in_footer():
+    """Chat footer padding must clear the iPhone home indicator bar."""
+    css = _css()
+    assert "safe-area-inset-bottom" in css
+
+
+def test_M503_overscroll_behavior_none():
+    """overscroll-behavior: none prevents the jarring rubber-band effect."""
+    css = _css()
+    assert "overscroll-behavior: none" in css
+
+
+def test_M504_webkit_overflow_scrolling_touch():
+    """-webkit-overflow-scrolling: touch gives momentum scrolling on iOS."""
+    css = _css()
+    assert "-webkit-overflow-scrolling: touch" in css
+
+
+def test_M505_dvh_units_used():
+    """Dynamic viewport height (dvh) accounts for collapsing browser chrome."""
+    css = _css()
+    assert "dvh" in css
+
+
+# ── M6xx — AirLLM backend interface contract ──────────────────────────────────
+
+def test_M601_airllm_agent_has_run_method():
+    """TimmyAirLLMAgent must expose run() so the dashboard route can call it."""
+    from timmy.backends import TimmyAirLLMAgent
+    assert hasattr(TimmyAirLLMAgent, "run"), (
+        "TimmyAirLLMAgent is missing run() — dashboard will fail with AirLLM backend"
+    )
+
+
+def test_M602_airllm_run_returns_content_attribute():
+    """run() must return an object with a .content attribute (Agno RunResponse compat)."""
+    with patch("timmy.backends.is_apple_silicon", return_value=False):
+        from timmy.backends import TimmyAirLLMAgent
+        agent = TimmyAirLLMAgent(model_size="8b")
+
+    mock_model = MagicMock()
+    mock_tokenizer = MagicMock()
+    input_ids_mock = MagicMock()
+    input_ids_mock.shape = [1, 5]
+    mock_tokenizer.return_value = {"input_ids": input_ids_mock}
+    mock_tokenizer.decode.return_value = "Sir, affirmative."
+    mock_model.tokenizer = mock_tokenizer
+    mock_model.generate.return_value = [list(range(10))]
+    agent._model = mock_model
+
+    result = agent.run("test")
+    assert hasattr(result, "content"), "run() result must have a .content attribute"
+    assert isinstance(result.content, str)
+
+
+def test_M603_airllm_run_updates_history():
+    """run() must update _history so multi-turn context is preserved."""
+    with patch("timmy.backends.is_apple_silicon", return_value=False):
+        from timmy.backends import TimmyAirLLMAgent
+        agent = TimmyAirLLMAgent(model_size="8b")
+
+    mock_model = MagicMock()
+    mock_tokenizer = MagicMock()
+    input_ids_mock = MagicMock()
+    input_ids_mock.shape = [1, 5]
+    mock_tokenizer.return_value = {"input_ids": input_ids_mock}
+    mock_tokenizer.decode.return_value = "Acknowledged."
+    mock_model.tokenizer = mock_tokenizer
+    mock_model.generate.return_value = [list(range(10))]
+    agent._model = mock_model
+
+    assert len(agent._history) == 0
+    agent.run("hello")
+    assert len(agent._history) == 2
+    assert any("hello" in h for h in agent._history)
+
+
+def test_M604_airllm_print_response_delegates_to_run():
+    """print_response must use run() so both interfaces share one inference path."""
+    with patch("timmy.backends.is_apple_silicon", return_value=False):
+        from timmy.backends import TimmyAirLLMAgent, RunResult
+        agent = TimmyAirLLMAgent(model_size="8b")
+
+    with patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run, \
+         patch.object(agent, "_render"):
+        agent.print_response("hello", stream=True)
+
+    mock_run.assert_called_once_with("hello", stream=True)
+
+
+def test_M605_health_status_passes_model_to_template(client):
+    """Health status partial must receive the configured model name, not a hardcoded string."""
+    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
+        response = client.get("/health/status")
+    # The default model is llama3.2 — it should appear in the partial from settings, not hardcoded
+    assert response.status_code == 200
+    assert "llama3.2" in response.text  # rendered via template variable, not hardcoded literal