From c8aa6a5fbb6e594c0fb57d55331d50c6f49272f6 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 21 Feb 2026 17:21:47 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20quality=20analysis=20=E2=80=94=20bug=20?= =?UTF-8?q?fixes,=20mobile=20tests,=20HITL=20checklist?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Senior architect review findings + remediations: BUG FIX — critical interface mismatch - TimmyAirLLMAgent only exposed print_response(); dashboard route calls agent.run() → AttributeError when AirLLM backend is selected. Added run() → RunResult(content) as primary inference entry point; print_response() now delegates to run() so both call sites share one inference path. - Added RunResult dataclass for Agno-compatible structured return. BUG FIX — hardcoded model name in health status partial - health_status.html rendered literal "llama3.2" regardless of OLLAMA_MODEL env var. Route now passes settings.ollama_model to the template context; partial renders {{ model }} instead. FEATURE — /mobile-test HITL checklist page - 22 human-executable test scenarios across: Layout, Touch & Input, Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI. - Pass/Fail/Skip buttons with sessionStorage state persistence. - Live progress bar + final score summary. - TEST link added to Mission Control header for quick access on phone. TEST — 32 new automated mobile quality tests (M1xx–M6xx) - M1xx: viewport/meta tags (8 tests) - M2xx: touch target sizing — 44 px min-height, manipulation (4 tests) - M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests) - M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests) - M5xx: safe-area insets, overscroll, dvh units (5 tests) - M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests) Total test count: 61 → 93 (all passing). https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt --- src/dashboard/app.py | 2 + src/dashboard/routes/health.py | 2 +- src/dashboard/routes/mobile_test.py | 257 ++++++++++++ src/dashboard/templates/base.html | 1 + src/dashboard/templates/mobile_test.html | 375 ++++++++++++++++++ .../templates/partials/health_status.html | 2 +- src/timmy/backends.py | 36 +- static/style.css | 14 + tests/test_mobile_scenarios.py | 281 +++++++++++++ 9 files changed, 958 insertions(+), 12 deletions(-) create mode 100644 src/dashboard/routes/mobile_test.py create mode 100644 src/dashboard/templates/mobile_test.html create mode 100644 tests/test_mobile_scenarios.py diff --git a/src/dashboard/app.py b/src/dashboard/app.py index 91312b5d..bdc66e90 100644 --- a/src/dashboard/app.py +++ b/src/dashboard/app.py @@ -9,6 +9,7 @@ from fastapi.templating import Jinja2Templates from config import settings from dashboard.routes.agents import router as agents_router from dashboard.routes.health import router as health_router +from dashboard.routes.mobile_test import router as mobile_test_router logging.basicConfig( level=logging.INFO, @@ -33,6 +34,7 @@ app.mount("/static", StaticFiles(directory=str(PROJECT_ROOT / "static")), name=" app.include_router(health_router) app.include_router(agents_router) +app.include_router(mobile_test_router) @app.get("/", response_class=HTMLResponse) diff --git a/src/dashboard/routes/health.py b/src/dashboard/routes/health.py index 05968e72..f4b81aff 100644 --- a/src/dashboard/routes/health.py +++ b/src/dashboard/routes/health.py @@ -38,5 +38,5 @@ async def health_status(request: Request): return templates.TemplateResponse( request, "partials/health_status.html", - {"ollama": ollama_ok}, + {"ollama": ollama_ok, "model": settings.ollama_model}, ) diff --git a/src/dashboard/routes/mobile_test.py b/src/dashboard/routes/mobile_test.py new file mode 100644 index 00000000..ef22337d --- /dev/null +++ b/src/dashboard/routes/mobile_test.py @@ -0,0 +1,257 @@ +"""Mobile HITL (Human-in-the-Loop) test checklist route. + +GET /mobile-test — interactive checklist for a human tester on their phone. + +Each scenario specifies what to do and what to observe. The tester marks +each one PASS / FAIL / SKIP. Results are stored in sessionStorage so they +survive page scrolling without hitting the server. +""" + +from pathlib import Path + +from fastapi import APIRouter, Request +from fastapi.responses import HTMLResponse +from fastapi.templating import Jinja2Templates + +router = APIRouter(tags=["mobile-test"]) +templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates")) + +# ── Test scenarios ──────────────────────────────────────────────────────────── +# Each dict: id, category, title, steps (list), expected +SCENARIOS = [ + # Layout + { + "id": "L01", + "category": "Layout", + "title": "Sidebar renders as horizontal strip", + "steps": [ + "Open the Mission Control page on your phone.", + "Look at the top section above the chat window.", + ], + "expected": ( + "AGENTS and SYSTEM HEALTH panels appear side-by-side in a " + "horizontally scrollable strip — not stacked vertically." + ), + }, + { + "id": "L02", + "category": "Layout", + "title": "Sidebar panels are horizontally scrollable", + "steps": [ + "Swipe left/right on the AGENTS / SYSTEM HEALTH strip.", + ], + "expected": "Both panels slide smoothly; no page scroll is triggered.", + }, + { + "id": "L03", + "category": "Layout", + "title": "Chat panel fills ≥ 60 % of viewport height", + "steps": [ + "Look at the TIMMY INTERFACE chat card below the strip.", + ], + "expected": "The chat card occupies at least 60 % of the visible screen height.", + }, + { + "id": "L04", + "category": "Layout", + "title": "Header stays fixed while chat scrolls", + "steps": [ + "Send several messages until the chat overflows.", + "Scroll the chat log up and down.", + ], + "expected": "The TIMMY TIME / MISSION CONTROL header remains pinned at the top.", + }, + { + "id": "L05", + "category": "Layout", + "title": "No horizontal page overflow", + "steps": [ + "Try swiping left or right anywhere on the page.", + ], + "expected": "The page does not scroll horizontally; nothing is cut off.", + }, + # Touch & Input + { + "id": "T01", + "category": "Touch & Input", + "title": "iOS does NOT zoom when tapping the input", + "steps": [ + "Tap the message input field once.", + "Watch whether the browser zooms in.", + ], + "expected": "The keyboard rises; the layout does NOT zoom in.", + }, + { + "id": "T02", + "category": "Touch & Input", + "title": "Keyboard return key is labelled 'Send'", + "steps": [ + "Tap the message input to open the iOS/Android keyboard.", + "Look at the return / action key in the bottom-right of the keyboard.", + ], + "expected": "The key is labelled 'Send' (not 'Return' or 'Go').", + }, + { + "id": "T03", + "category": "Touch & Input", + "title": "Send button is easy to tap (≥ 44 px tall)", + "steps": [ + "Try tapping the SEND button with your thumb.", + ], + "expected": "The button registers the tap reliably on the first attempt.", + }, + { + "id": "T04", + "category": "Touch & Input", + "title": "SEND button disabled during in-flight request", + "steps": [ + "Type a message and press SEND.", + "Immediately try to tap SEND again before a response arrives.", + ], + "expected": "The button is visually disabled; no duplicate message is sent.", + }, + { + "id": "T05", + "category": "Touch & Input", + "title": "Empty message cannot be submitted", + "steps": [ + "Leave the input blank.", + "Tap SEND.", + ], + "expected": "Nothing is submitted; the form shows a required-field indicator.", + }, + { + "id": "T06", + "category": "Touch & Input", + "title": "CLEAR button shows confirmation dialog", + "steps": [ + "Send at least one message.", + "Tap the CLEAR button in the top-right of the chat header.", + ], + "expected": "A browser confirmation dialog appears before history is cleared.", + }, + # Chat behaviour + { + "id": "C01", + "category": "Chat", + "title": "Chat auto-scrolls to the latest message", + "steps": [ + "Scroll the chat log to the top.", + "Send a new message.", + ], + "expected": "After the response arrives the chat automatically scrolls to the bottom.", + }, + { + "id": "C02", + "category": "Chat", + "title": "Multi-turn conversation — Timmy remembers context", + "steps": [ + "Send: 'My name is .'", + "Then send: 'What is my name?'", + ], + "expected": "Timmy replies with your name, demonstrating conversation memory.", + }, + { + "id": "C03", + "category": "Chat", + "title": "Loading indicator appears while waiting", + "steps": [ + "Send a message and watch the SEND button.", + ], + "expected": "A blinking cursor (▋) appears next to SEND while the response is loading.", + }, + { + "id": "C04", + "category": "Chat", + "title": "Offline error is shown gracefully", + "steps": [ + "Stop Ollama on your host machine (or disconnect from Wi-Fi temporarily).", + "Send a message from your phone.", + ], + "expected": "A red 'Timmy is offline' error appears in the chat — no crash or spinner hang.", + }, + # Health panel + { + "id": "H01", + "category": "Health", + "title": "Health panel shows Ollama UP when running", + "steps": [ + "Ensure Ollama is running on your host.", + "Check the SYSTEM HEALTH panel.", + ], + "expected": "OLLAMA badge shows green UP.", + }, + { + "id": "H02", + "category": "Health", + "title": "Health panel auto-refreshes without reload", + "steps": [ + "Start Ollama if it is not running.", + "Wait up to 35 seconds with the page open.", + ], + "expected": "The OLLAMA badge flips from DOWN → UP automatically, without a page reload.", + }, + # Scroll & overscroll + { + "id": "S01", + "category": "Scroll", + "title": "No rubber-band / bounce on the main page", + "steps": [ + "Scroll to the very top of the page.", + "Continue pulling downward.", + ], + "expected": "The page does not bounce or show a white gap — overscroll is suppressed.", + }, + { + "id": "S02", + "category": "Scroll", + "title": "Chat log scrolls independently inside the card", + "steps": [ + "Scroll inside the chat log area.", + ], + "expected": "The chat log scrolls smoothly; the outer page does not move.", + }, + # Safe area / notch + { + "id": "N01", + "category": "Notch / Home Bar", + "title": "Header clears the status bar / Dynamic Island", + "steps": [ + "On a notched iPhone (Face ID), look at the top of the page.", + ], + "expected": "The TIMMY TIME header text is not obscured by the notch or Dynamic Island.", + }, + { + "id": "N02", + "category": "Notch / Home Bar", + "title": "Chat input not hidden behind home indicator", + "steps": [ + "Tap the input field and look at the bottom of the screen.", + ], + "expected": "The input row sits above the iPhone home indicator bar — nothing is cut off.", + }, + # Clock + { + "id": "X01", + "category": "Live UI", + "title": "Clock updates every second", + "steps": [ + "Look at the time display in the top-right of the header.", + "Watch for 3 seconds.", + ], + "expected": "The time increments each second in HH:MM:SS format.", + }, +] + + +@router.get("/mobile-test", response_class=HTMLResponse) +async def mobile_test(request: Request): + """Interactive HITL mobile test checklist — open on your phone.""" + categories: dict[str, list] = {} + for s in SCENARIOS: + categories.setdefault(s["category"], []).append(s) + return templates.TemplateResponse( + request, + "mobile_test.html", + {"scenarios": SCENARIOS, "categories": categories, "total": len(SCENARIOS)}, + ) diff --git a/src/dashboard/templates/base.html b/src/dashboard/templates/base.html index d8003e78..cd0499c2 100644 --- a/src/dashboard/templates/base.html +++ b/src/dashboard/templates/base.html @@ -21,6 +21,7 @@ MISSION CONTROL
+ TEST
diff --git a/src/dashboard/templates/mobile_test.html b/src/dashboard/templates/mobile_test.html new file mode 100644 index 00000000..a92c9f85 --- /dev/null +++ b/src/dashboard/templates/mobile_test.html @@ -0,0 +1,375 @@ +{% extends "base.html" %} +{% block title %}Mobile Test — Timmy Time{% endblock %} + +{% block content %} +
+ + +
+
+ // MOBILE TEST SUITE + HUMAN-IN-THE-LOOP +
+
+ 0 / {{ total }} + PASSED +
+
+ + +
+
+
+
+
+ PASS + FAIL + SKIP + PENDING +
+
+ + +
+ ← MISSION CONTROL + +
+ + + {% for category, items in categories.items() %} +
{{ category | upper }}
+ + {% for s in items %} +
+
+
+ {{ s.id }} + {{ s.title }} +
+ PENDING +
+
+ +
STEPS
+
    + {% for step in s.steps %} +
  1. {{ step }}
  2. + {% endfor %} +
+ +
EXPECTED
+
{{ s.expected }}
+ +
+ + + +
+ +
+
+ {% endfor %} + {% endfor %} + + +
+
// SUMMARY
+
+

Mark all scenarios above to see your final score.

+
+
+ +
+ + + + + + + + + +{% endblock %} diff --git a/src/dashboard/templates/partials/health_status.html b/src/dashboard/templates/partials/health_status.html index 41385923..ec25fe47 100644 --- a/src/dashboard/templates/partials/health_status.html +++ b/src/dashboard/templates/partials/health_status.html @@ -14,6 +14,6 @@
MODEL - llama3.2 + {{ model }}
diff --git a/src/timmy/backends.py b/src/timmy/backends.py index 88d03085..ba94f304 100644 --- a/src/timmy/backends.py +++ b/src/timmy/backends.py @@ -1,14 +1,16 @@ """AirLLM backend — only imported when the airllm extra is installed. Provides TimmyAirLLMAgent: a drop-in replacement for an Agno Agent that -exposes the same print_response(message, stream) surface while routing -inference through AirLLM. On Apple Silicon (arm64 Darwin) the MLX backend -is selected automatically; everywhere else AutoModel (PyTorch) is used. +exposes both the run(message, stream) → RunResult interface used by the +dashboard and the print_response(message, stream) interface used by the CLI. +On Apple Silicon (arm64 Darwin) the MLX backend is selected automatically; +everywhere else AutoModel (PyTorch) is used. No cloud. No telemetry. Sats are sovereignty, boss. """ import platform +from dataclasses import dataclass from typing import Literal from timmy.prompts import TIMMY_SYSTEM_PROMPT @@ -23,6 +25,12 @@ _AIRLLM_MODELS: dict[str, str] = { ModelSize = Literal["8b", "70b", "405b"] +@dataclass +class RunResult: + """Minimal Agno-compatible run result — carries the model's response text.""" + content: str + + def is_apple_silicon() -> bool: """Return True when running on an M-series Mac (arm64 Darwin).""" return platform.system() == "Darwin" and platform.machine() == "arm64" @@ -38,7 +46,11 @@ def airllm_available() -> bool: class TimmyAirLLMAgent: - """Thin AirLLM wrapper with the same print_response interface as Agno Agent. + """Thin AirLLM wrapper compatible with both dashboard and CLI call sites. + + Exposes: + run(message, stream) → RunResult(content=...) [dashboard] + print_response(message, stream) → None [CLI] Maintains a rolling 10-turn in-memory history so Timmy remembers the conversation within a session — no SQLite needed at this layer. @@ -64,12 +76,11 @@ class TimmyAirLLMAgent: # ── public interface (mirrors Agno Agent) ──────────────────────────────── - def print_response(self, message: str, *, stream: bool = True) -> None: - """Run inference, update history, and render the response to stdout. + def run(self, message: str, *, stream: bool = False) -> RunResult: + """Run inference and return a structured result (matches Agno Agent.run()). - `stream` is accepted for API compatibility but AirLLM generates the - full output in one pass — the result is still printed as soon as it - is ready. + `stream` is accepted for API compatibility; AirLLM always generates + the full output in one pass. """ prompt = self._build_prompt(message) @@ -97,7 +108,12 @@ class TimmyAirLLMAgent: self._history.append(f"User: {message}") self._history.append(f"Timmy: {response}") - self._render(response) + return RunResult(content=response) + + def print_response(self, message: str, *, stream: bool = True) -> None: + """Run inference and render the response to stdout (CLI interface).""" + result = self.run(message, stream=stream) + self._render(result.content) # ── private helpers ────────────────────────────────────────────────────── diff --git a/static/style.css b/static/style.css index ea673e0b..f1e42852 100644 --- a/static/style.css +++ b/static/style.css @@ -58,6 +58,7 @@ body { z-index: 100; } .mc-header-left { display: flex; align-items: baseline; gap: 0; } +.mc-header-right { display: flex; align-items: center; gap: 16px; } .mc-title { font-size: 18px; font-weight: 700; @@ -75,6 +76,19 @@ body { color: var(--blue); letter-spacing: 0.1em; } +.mc-test-link { + font-size: 9px; + font-weight: 700; + color: var(--text-dim); + letter-spacing: 0.2em; + text-decoration: none; + border: 1px solid var(--border); + border-radius: 2px; + padding: 3px 8px; + transition: border-color 0.15s, color 0.15s; + touch-action: manipulation; +} +.mc-test-link:hover { border-color: var(--blue); color: var(--blue); } /* ── Main layout ─────────────────────────────────── */ .mc-main { diff --git a/tests/test_mobile_scenarios.py b/tests/test_mobile_scenarios.py new file mode 100644 index 00000000..84a7cebc --- /dev/null +++ b/tests/test_mobile_scenarios.py @@ -0,0 +1,281 @@ +"""Mobile-first quality tests — automated validation of mobile UX requirements. + +These tests verify the HTML, CSS, and HTMX attributes that make the dashboard +work correctly on phones. No browser / Playwright required: we parse the +static assets and server responses directly. + +Categories: + M1xx Viewport & meta tags + M2xx Touch target sizing + M3xx iOS keyboard & zoom prevention + M4xx HTMX robustness (double-submit, sync) + M5xx Safe-area / notch support + M6xx AirLLM backend interface contract +""" + +import re +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + + +# ── helpers ─────────────────────────────────────────────────────────────────── + +def _css() -> str: + """Read the main stylesheet.""" + css_path = Path(__file__).parent.parent / "static" / "style.css" + return css_path.read_text() + + +def _index_html(client) -> str: + return client.get("/").text + + +# ── M1xx — Viewport & meta tags ─────────────────────────────────────────────── + +def test_M101_viewport_meta_present(client): + """viewport meta tag must exist for correct mobile scaling.""" + html = _index_html(client) + assert 'name="viewport"' in html + + +def test_M102_viewport_includes_width_device_width(client): + html = _index_html(client) + assert "width=device-width" in html + + +def test_M103_viewport_includes_initial_scale_1(client): + html = _index_html(client) + assert "initial-scale=1" in html + + +def test_M104_viewport_includes_viewport_fit_cover(client): + """viewport-fit=cover is required for iPhone notch / Dynamic Island support.""" + html = _index_html(client) + assert "viewport-fit=cover" in html + + +def test_M105_apple_mobile_web_app_capable(client): + """Enables full-screen / standalone mode when added to iPhone home screen.""" + html = _index_html(client) + assert "apple-mobile-web-app-capable" in html + + +def test_M106_theme_color_meta_present(client): + """theme-color sets the browser chrome colour on Android Chrome.""" + html = _index_html(client) + assert 'name="theme-color"' in html + + +def test_M107_apple_status_bar_style_present(client): + html = _index_html(client) + assert "apple-mobile-web-app-status-bar-style" in html + + +def test_M108_lang_attribute_on_html(client): + """lang attribute aids screen readers and mobile TTS.""" + html = _index_html(client) + assert '