tests/dashboard/test_mobile_scenarios.py

"""Mobile-first quality tests — automated validation of mobile UX requirements.

These tests verify the HTML, CSS, and HTMX attributes that make the dashboard
work correctly on phones.  No browser / Playwright required: we parse the
static assets and server responses directly.

Categories:
  M1xx  Viewport & meta tags
  M2xx  Touch target sizing
  M3xx  iOS keyboard & zoom prevention
  M4xx  HTMX robustness (double-submit, sync)
  M5xx  Safe-area / notch support
  M6xx  AirLLM backend interface contract
"""

import re
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch

# ── helpers ───────────────────────────────────────────────────────────────────


def _css() -> str:
    """Read the main stylesheet."""
    css_path = Path(__file__).parent.parent.parent / "static" / "style.css"
    return css_path.read_text()


def _index_html(client) -> str:
    return client.get("/").text


def _timmy_panel_html(client) -> str:
    """Fetch the Timmy chat panel (loaded dynamically from index via HTMX)."""
    return client.get("/agents/default/panel").text


# ── M1xx — Viewport & meta tags ───────────────────────────────────────────────


def test_M101_viewport_meta_present(client):
    """viewport meta tag must exist for correct mobile scaling."""
    html = _index_html(client)
    assert 'name="viewport"' in html


def test_M102_viewport_includes_width_device_width(client):
    html = _index_html(client)
    assert "width=device-width" in html


def test_M103_viewport_includes_initial_scale_1(client):
    html = _index_html(client)
    assert "initial-scale=1" in html


def test_M104_viewport_includes_viewport_fit_cover(client):
    """viewport-fit=cover is required for iPhone notch / Dynamic Island support."""
    html = _index_html(client)
    assert "viewport-fit=cover" in html


def test_M105_apple_mobile_web_app_capable(client):
    """Enables full-screen / standalone mode when added to iPhone home screen."""
    html = _index_html(client)
    assert "apple-mobile-web-app-capable" in html


def test_M106_theme_color_meta_present(client):
    """theme-color sets the browser chrome colour on Android Chrome."""
    html = _index_html(client)
    assert 'name="theme-color"' in html


def test_M107_apple_status_bar_style_present(client):
    html = _index_html(client)
    assert "apple-mobile-web-app-status-bar-style" in html


def test_M108_lang_attribute_on_html(client):
    """lang attribute aids screen readers and mobile TTS."""
    html = _index_html(client)
    assert '<html lang="en"' in html


# ── M2xx — Touch target sizing ────────────────────────────────────────────────


def test_M201_send_button_min_height_44px():
    """SEND button must be at least 44 × 44 px — Apple HIG minimum."""
    css = _css()
    # Inside the mobile media query the send button must have min-height: 44px
    assert "min-height: 44px" in css


def test_M203_send_button_min_width_64px():
    """Send button needs sufficient width so it isn't accidentally missed."""
    css = _css()
    assert "min-width: 64px" in css


def test_M204_touch_action_manipulation_on_buttons():
    """touch-action: manipulation removes 300ms tap delay on mobile browsers."""
    css = _css()
    assert "touch-action: manipulation" in css


# ── M3xx — iOS keyboard & zoom prevention ─────────────────────────────────────


def test_M301_input_font_size_16px_in_mobile_query():
    """iOS Safari zooms in when input font-size < 16px.  Must be exactly 16px."""
    css = _css()
    # The mobile media-query block must override to 16px
    mobile_block_match = re.search(r"@media\s*\(max-width:\s*768px\)(.*)", css, re.DOTALL)
    assert mobile_block_match, "Mobile media query not found"
    mobile_block = mobile_block_match.group(1)
    assert "font-size: 16px" in mobile_block


def test_M302_input_autocapitalize_none(client):
    """autocapitalize=none prevents iOS from capitalising chat commands."""
    html = _timmy_panel_html(client)
    assert 'autocapitalize="none"' in html


def test_M303_input_autocorrect_off(client):
    """autocorrect=off prevents iOS from mangling technical / proper-noun input."""
    html = _timmy_panel_html(client)
    assert 'autocorrect="off"' in html


def test_M304_input_enterkeyhint_send(client):
    """enterkeyhint=send labels the iOS return key 'Send' for clearer UX."""
    html = _timmy_panel_html(client)
    assert 'enterkeyhint="send"' in html


def test_M305_input_spellcheck_false(client):
    """spellcheck=false prevents red squiggles on technical terms."""
    html = _timmy_panel_html(client)
    assert 'spellcheck="false"' in html


# ── M4xx — HTMX robustness ────────────────────────────────────────────────────


def test_M401_form_hx_sync_drop(client):
    """hx-sync=this:drop discards duplicate submissions (fast double-tap)."""
    html = _timmy_panel_html(client)
    assert 'hx-sync="this:drop"' in html


def test_M402_form_hx_disabled_elt(client):
    """hx-disabled-elt disables the SEND button while a request is in-flight."""
    html = _timmy_panel_html(client)
    assert "hx-disabled-elt" in html


def test_M403_form_hx_indicator(client):
    """hx-indicator wires up the loading spinner to the in-flight state."""
    html = _timmy_panel_html(client)
    assert "hx-indicator" in html


def test_M404_health_panel_auto_refreshes(client):
    """Health panel must poll via HTMX trigger — 'every 30s' confirms this."""
    html = _index_html(client)
    assert "every 30s" in html


def test_M405_chat_log_loads_history_on_boot(client):
    """Chat log fetches history via hx-trigger=load so it's populated on open."""
    html = _index_html(client)
    assert 'hx-trigger="load"' in html


# ── M5xx — Safe-area / notch support ─────────────────────────────────────────


def test_M501_safe_area_inset_top_in_header():
    """Header padding must accommodate the iPhone notch / status bar."""
    css = _css()
    assert "safe-area-inset-top" in css


def test_M502_safe_area_inset_bottom_in_footer():
    """Chat footer padding must clear the iPhone home indicator bar."""
    css = _css()
    assert "safe-area-inset-bottom" in css


def test_M503_overscroll_behavior_none():
    """overscroll-behavior: none prevents the jarring rubber-band effect."""
    css = _css()
    assert "overscroll-behavior: none" in css


def test_M504_webkit_overflow_scrolling_touch():
    """-webkit-overflow-scrolling: touch gives momentum scrolling on iOS."""
    css = _css()
    assert "-webkit-overflow-scrolling: touch" in css


def test_M505_dvh_units_used():
    """Dynamic viewport height (dvh) accounts for collapsing browser chrome."""
    css = _css()
    assert "dvh" in css


# ── M6xx — AirLLM backend interface contract ──────────────────────────────────


def test_M601_airllm_agent_has_run_method():
    """TimmyAirLLMAgent must expose run() so the dashboard route can call it."""
    from timmy.backends import TimmyAirLLMAgent

    assert hasattr(TimmyAirLLMAgent, "run"), (
        "TimmyAirLLMAgent is missing run() — dashboard will fail with AirLLM backend"
    )


def test_M602_airllm_run_returns_content_attribute():
    """run() must return an object with a .content attribute (Agno RunResponse compat)."""
    with patch("timmy.backends.is_apple_silicon", return_value=False):
        from timmy.backends import TimmyAirLLMAgent

        agent = TimmyAirLLMAgent(model_size="8b")

    mock_model = MagicMock()
    mock_tokenizer = MagicMock()
    input_ids_mock = MagicMock()
    input_ids_mock.shape = [1, 5]
    mock_tokenizer.return_value = {"input_ids": input_ids_mock}
    mock_tokenizer.decode.return_value = "Sir, affirmative."
    mock_model.tokenizer = mock_tokenizer
    mock_model.generate.return_value = [list(range(10))]
    agent._model = mock_model

    result = agent.run("test")
    assert hasattr(result, "content"), "run() result must have a .content attribute"
    assert isinstance(result.content, str)


def test_M603_airllm_run_updates_history():
    """run() must update _history so multi-turn context is preserved."""
    with patch("timmy.backends.is_apple_silicon", return_value=False):
        from timmy.backends import TimmyAirLLMAgent

        agent = TimmyAirLLMAgent(model_size="8b")

    mock_model = MagicMock()
    mock_tokenizer = MagicMock()
    input_ids_mock = MagicMock()
    input_ids_mock.shape = [1, 5]
    mock_tokenizer.return_value = {"input_ids": input_ids_mock}
    mock_tokenizer.decode.return_value = "Acknowledged."
    mock_model.tokenizer = mock_tokenizer
    mock_model.generate.return_value = [list(range(10))]
    agent._model = mock_model

    assert len(agent._history) == 0
    agent.run("hello")
    assert len(agent._history) == 2
    assert any("hello" in h for h in agent._history)


def test_M604_airllm_print_response_delegates_to_run():
    """print_response must use run() so both interfaces share one inference path."""
    with patch("timmy.backends.is_apple_silicon", return_value=False):
        from timmy.backends import RunResult, TimmyAirLLMAgent

        agent = TimmyAirLLMAgent(model_size="8b")

    with (
        patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run,
        patch.object(agent, "_render"),
    ):
        agent.print_response("hello", stream=True)

    mock_run.assert_called_once_with("hello", stream=True)


def test_M605_health_status_passes_model_to_template(client):
    """Health status partial must receive the configured model name, not a hardcoded string."""
    from config import settings

    with patch(
        "dashboard.routes.health.check_ollama",
        new_callable=AsyncMock,
        return_value=True,
    ):
        response = client.get("/health/status")
    # Model name should come from settings, not be hardcoded
    assert response.status_code == 200
    model_short = settings.ollama_model.split(":")[0]
    assert model_short in response.text


# ── M7xx — XSS prevention ─────────────────────────────────────────────────────


def _mobile_html() -> str:
    """Read the mobile template source."""
    path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "mobile.html"
    return path.read_text()


def _swarm_live_html() -> str:
    """Read the swarm live template source."""
    path = (
        Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "swarm_live.html"
    )
    return path.read_text()


def test_M701_mobile_chat_no_raw_message_interpolation():
    """mobile.html must not interpolate ${message} directly into innerHTML — XSS risk."""
    html = _mobile_html()
    # The vulnerable pattern is `${message}` inside a template literal assigned to innerHTML
    # After the fix, message must only appear via textContent assignment
    assert "textContent = message" in html or "textContent=message" in html, (
        "mobile.html still uses innerHTML + ${message} interpolation — XSS vulnerability"
    )


def test_M702_mobile_chat_user_input_not_in_innerhtml_template_literal():
    """${message} must not appear inside a backtick string that is assigned to innerHTML."""
    html = _mobile_html()
    # Find all innerHTML += `...` blocks and verify none contain ${message}
    blocks = re.findall(r"innerHTML\s*\+=?\s*`([^`]*)`", html, re.DOTALL)
    for block in blocks:
        assert "${message}" not in block, (
            "innerHTML template literal still contains ${message} — XSS vulnerability"
        )


def test_M703_swarm_live_agent_name_not_interpolated_in_innerhtml():
    """swarm_live.html must not put ${agent.name} inside innerHTML template literals."""
    html = _swarm_live_html()
    blocks = re.findall(r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL)
    assert len(blocks) == 0, (
        "swarm_live.html still uses innerHTML=agents.map(…) with interpolated agent data — XSS vulnerability"
    )


def test_M704_swarm_live_uses_textcontent_for_agent_data():
    """swarm_live.html must use textContent (not innerHTML) to set agent name/description."""
    html = _swarm_live_html()
    assert "textContent" in html, (
        "swarm_live.html does not use textContent — agent data may be raw-interpolated into DOM"
    )
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								"""Mobile-first quality tests — automated validation of mobile UX requirements.
 								These tests verify the HTML, CSS, and HTMX attributes that make the dashboard
 								work correctly on phones.  No browser / Playwright required: we parse the
 								static assets and server responses directly.
 								Categories:
 								  M1xx  Viewport & meta tags
 								  M2xx  Touch target sizing
 								  M3xx  iOS keyboard & zoom prevention
 								  M4xx  HTMX robustness (double-submit, sync)
 								  M5xx  Safe-area / notch support
 								  M6xx  AirLLM backend interface contract
 								"""
 								import re
 								from pathlib import Path
 								from unittest.mock import AsyncMock, MagicMock, patch
 								# ── helpers ───────────────────────────────────────────────────────────────────
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								def _css() -> str:
 								    """Read the main stylesheet."""
-												refactor: Phase 3 — reorganize tests into module-mirroring subdirectories

Move 97 test files from flat tests/ into 13 subdirectories:
  tests/dashboard/   (8 files — routes, mobile, mission control)
  tests/swarm/       (17 files — coordinator, docker, routing, tasks)
  tests/timmy/       (12 files — agent, backends, CLI, tools)
  tests/self_coding/  (14 files — git safety, indexer, self-modify)
  tests/lightning/   (3 files — L402, LND, interface)
  tests/creative/    (8 files — assembler, director, image/music/video)
  tests/integrations/ (10 files — chat bridge, telegram, voice, websocket)
  tests/mcp/         (4 files — bootstrap, discovery, executor)
  tests/spark/       (3 files — engine, tools, events)
  tests/hands/       (3 files — registry, oracle, phase5)
  tests/scripture/   (1 file)
  tests/infrastructure/ (3 files — router cascade, API)
  tests/security/    (3 files — XSS, regression)

Fix Path(__file__) reference in test_mobile_scenarios.py for new depth.
Add __init__.py to all test subdirectories.

Tests: 1503 passed, 9 failed (pre-existing), 53 errors (pre-existing)

https://claude.ai/code/session_019oMFNvD8uSGSSmBMGkBfQN

											
										
										
											2026-02-26 21:21:28 +00:00
+								    css_path = Path(__file__).parent.parent.parent / "static" / "style.css"
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								    return css_path.read_text()
 								def _index_html(client) -> str:
 								    return client.get("/").text
-												feat: add Docker-based swarm agent containerization

Add infrastructure for running swarm agents as isolated Docker
containers with HTTP-based coordination, startup recovery, and
enhanced dashboard UI for agent management.

- Dockerfile and docker-compose.yml for multi-service orchestration
- DockerAgentRunner for programmatic container lifecycle management
- Internal HTTP API for container agents to poll tasks and submit bids
- Startup recovery system to reconcile orphaned tasks and stale agents
- Enhanced UI partials for agent panels, chat, and task assignment
- Timmy docker entry point with heartbeat and task polling
- New Makefile targets for Docker workflows
- Tests for swarm recovery

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 16:21:32 -05:00
+								def _timmy_panel_html(client) -> str:
 								    """Fetch the Timmy chat panel (loaded dynamically from index via HTMX)."""
-												Add pre-commit hook enforcing 30s test suite time limit (#132)
											
										
										
											2026-03-05 19:45:38 -05:00
+								    return client.get("/agents/default/panel").text
-												feat: add Docker-based swarm agent containerization

Add infrastructure for running swarm agents as isolated Docker
containers with HTTP-based coordination, startup recovery, and
enhanced dashboard UI for agent management.

- Dockerfile and docker-compose.yml for multi-service orchestration
- DockerAgentRunner for programmatic container lifecycle management
- Internal HTTP API for container agents to poll tasks and submit bids
- Startup recovery system to reconcile orphaned tasks and stale agents
- Enhanced UI partials for agent panels, chat, and task assignment
- Timmy docker entry point with heartbeat and task polling
- New Makefile targets for Docker workflows
- Tests for swarm recovery

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 16:21:32 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								# ── M1xx — Viewport & meta tags ───────────────────────────────────────────────
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								def test_M101_viewport_meta_present(client):
 								    """viewport meta tag must exist for correct mobile scaling."""
 								    html = _index_html(client)
 								    assert 'name="viewport"' in html
 								def test_M102_viewport_includes_width_device_width(client):
 								    html = _index_html(client)
 								    assert "width=device-width" in html
 								def test_M103_viewport_includes_initial_scale_1(client):
 								    html = _index_html(client)
 								    assert "initial-scale=1" in html
 								def test_M104_viewport_includes_viewport_fit_cover(client):
 								    """viewport-fit=cover is required for iPhone notch / Dynamic Island support."""
 								    html = _index_html(client)
 								    assert "viewport-fit=cover" in html
 								def test_M105_apple_mobile_web_app_capable(client):
 								    """Enables full-screen / standalone mode when added to iPhone home screen."""
 								    html = _index_html(client)
 								    assert "apple-mobile-web-app-capable" in html
 								def test_M106_theme_color_meta_present(client):
 								    """theme-color sets the browser chrome colour on Android Chrome."""
 								    html = _index_html(client)
 								    assert 'name="theme-color"' in html
 								def test_M107_apple_status_bar_style_present(client):
 								    html = _index_html(client)
 								    assert "apple-mobile-web-app-status-bar-style" in html
 								def test_M108_lang_attribute_on_html(client):
 								    """lang attribute aids screen readers and mobile TTS."""
 								    html = _index_html(client)
 								    assert '<html lang="en"' in html
 								# ── M2xx — Touch target sizing ────────────────────────────────────────────────
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								def test_M201_send_button_min_height_44px():
 								    """SEND button must be at least 44 × 44 px — Apple HIG minimum."""
 								    css = _css()
 								    # Inside the mobile media query the send button must have min-height: 44px
 								    assert "min-height: 44px" in css
 								def test_M203_send_button_min_width_64px():
 								    """Send button needs sufficient width so it isn't accidentally missed."""
 								    css = _css()
 								    assert "min-width: 64px" in css
 								def test_M204_touch_action_manipulation_on_buttons():
 								    """touch-action: manipulation removes 300ms tap delay on mobile browsers."""
 								    css = _css()
 								    assert "touch-action: manipulation" in css
 								# ── M3xx — iOS keyboard & zoom prevention ─────────────────────────────────────
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								def test_M301_input_font_size_16px_in_mobile_query():
 								    """iOS Safari zooms in when input font-size < 16px.  Must be exactly 16px."""
 								    css = _css()
 								    # The mobile media-query block must override to 16px
-												feat: code quality audit + autoresearch integration + infra hardening (#150)
											
										
										
											2026-03-08 12:50:44 -04:00
+								    mobile_block_match = re.search(r"@media\s*\(max-width:\s*768px\)(.*)", css, re.DOTALL)
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								    assert mobile_block_match, "Mobile media query not found"
 								    mobile_block = mobile_block_match.group(1)
 								    assert "font-size: 16px" in mobile_block
 								def test_M302_input_autocapitalize_none(client):
 								    """autocapitalize=none prevents iOS from capitalising chat commands."""
-												feat: add Docker-based swarm agent containerization

Add infrastructure for running swarm agents as isolated Docker
containers with HTTP-based coordination, startup recovery, and
enhanced dashboard UI for agent management.

- Dockerfile and docker-compose.yml for multi-service orchestration
- DockerAgentRunner for programmatic container lifecycle management
- Internal HTTP API for container agents to poll tasks and submit bids
- Startup recovery system to reconcile orphaned tasks and stale agents
- Enhanced UI partials for agent panels, chat, and task assignment
- Timmy docker entry point with heartbeat and task polling
- New Makefile targets for Docker workflows
- Tests for swarm recovery

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 16:21:32 -05:00
+								    html = _timmy_panel_html(client)
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								    assert 'autocapitalize="none"' in html
 								def test_M303_input_autocorrect_off(client):
 								    """autocorrect=off prevents iOS from mangling technical / proper-noun input."""
-												feat: add Docker-based swarm agent containerization

Add infrastructure for running swarm agents as isolated Docker
containers with HTTP-based coordination, startup recovery, and
enhanced dashboard UI for agent management.

- Dockerfile and docker-compose.yml for multi-service orchestration
- DockerAgentRunner for programmatic container lifecycle management
- Internal HTTP API for container agents to poll tasks and submit bids
- Startup recovery system to reconcile orphaned tasks and stale agents
- Enhanced UI partials for agent panels, chat, and task assignment
- Timmy docker entry point with heartbeat and task polling
- New Makefile targets for Docker workflows
- Tests for swarm recovery

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 16:21:32 -05:00
+								    html = _timmy_panel_html(client)
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								    assert 'autocorrect="off"' in html
 								def test_M304_input_enterkeyhint_send(client):
 								    """enterkeyhint=send labels the iOS return key 'Send' for clearer UX."""
-												feat: add Docker-based swarm agent containerization

Add infrastructure for running swarm agents as isolated Docker
containers with HTTP-based coordination, startup recovery, and
enhanced dashboard UI for agent management.

- Dockerfile and docker-compose.yml for multi-service orchestration
- DockerAgentRunner for programmatic container lifecycle management
- Internal HTTP API for container agents to poll tasks and submit bids
- Startup recovery system to reconcile orphaned tasks and stale agents
- Enhanced UI partials for agent panels, chat, and task assignment
- Timmy docker entry point with heartbeat and task polling
- New Makefile targets for Docker workflows
- Tests for swarm recovery

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 16:21:32 -05:00
+								    html = _timmy_panel_html(client)
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								    assert 'enterkeyhint="send"' in html
 								def test_M305_input_spellcheck_false(client):
 								    """spellcheck=false prevents red squiggles on technical terms."""
-												feat: add Docker-based swarm agent containerization

Add infrastructure for running swarm agents as isolated Docker
containers with HTTP-based coordination, startup recovery, and
enhanced dashboard UI for agent management.

- Dockerfile and docker-compose.yml for multi-service orchestration
- DockerAgentRunner for programmatic container lifecycle management
- Internal HTTP API for container agents to poll tasks and submit bids
- Startup recovery system to reconcile orphaned tasks and stale agents
- Enhanced UI partials for agent panels, chat, and task assignment
- Timmy docker entry point with heartbeat and task polling
- New Makefile targets for Docker workflows
- Tests for swarm recovery

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 16:21:32 -05:00
+								    html = _timmy_panel_html(client)
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								    assert 'spellcheck="false"' in html
 								# ── M4xx — HTMX robustness ────────────────────────────────────────────────────
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								def test_M401_form_hx_sync_drop(client):
 								    """hx-sync=this:drop discards duplicate submissions (fast double-tap)."""
-												feat: add Docker-based swarm agent containerization

Add infrastructure for running swarm agents as isolated Docker
containers with HTTP-based coordination, startup recovery, and
enhanced dashboard UI for agent management.

- Dockerfile and docker-compose.yml for multi-service orchestration
- DockerAgentRunner for programmatic container lifecycle management
- Internal HTTP API for container agents to poll tasks and submit bids
- Startup recovery system to reconcile orphaned tasks and stale agents
- Enhanced UI partials for agent panels, chat, and task assignment
- Timmy docker entry point with heartbeat and task polling
- New Makefile targets for Docker workflows
- Tests for swarm recovery

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 16:21:32 -05:00
+								    html = _timmy_panel_html(client)
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								    assert 'hx-sync="this:drop"' in html
 								def test_M402_form_hx_disabled_elt(client):
 								    """hx-disabled-elt disables the SEND button while a request is in-flight."""
-												feat: add Docker-based swarm agent containerization

Add infrastructure for running swarm agents as isolated Docker
containers with HTTP-based coordination, startup recovery, and
enhanced dashboard UI for agent management.

- Dockerfile and docker-compose.yml for multi-service orchestration
- DockerAgentRunner for programmatic container lifecycle management
- Internal HTTP API for container agents to poll tasks and submit bids
- Startup recovery system to reconcile orphaned tasks and stale agents
- Enhanced UI partials for agent panels, chat, and task assignment
- Timmy docker entry point with heartbeat and task polling
- New Makefile targets for Docker workflows
- Tests for swarm recovery

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 16:21:32 -05:00
+								    html = _timmy_panel_html(client)
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								    assert "hx-disabled-elt" in html
 								def test_M403_form_hx_indicator(client):
 								    """hx-indicator wires up the loading spinner to the in-flight state."""
-												feat: add Docker-based swarm agent containerization

Add infrastructure for running swarm agents as isolated Docker
containers with HTTP-based coordination, startup recovery, and
enhanced dashboard UI for agent management.

- Dockerfile and docker-compose.yml for multi-service orchestration
- DockerAgentRunner for programmatic container lifecycle management
- Internal HTTP API for container agents to poll tasks and submit bids
- Startup recovery system to reconcile orphaned tasks and stale agents
- Enhanced UI partials for agent panels, chat, and task assignment
- Timmy docker entry point with heartbeat and task polling
- New Makefile targets for Docker workflows
- Tests for swarm recovery

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 16:21:32 -05:00
+								    html = _timmy_panel_html(client)
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								    assert "hx-indicator" in html
 								def test_M404_health_panel_auto_refreshes(client):
 								    """Health panel must poll via HTMX trigger — 'every 30s' confirms this."""
 								    html = _index_html(client)
 								    assert "every 30s" in html
 								def test_M405_chat_log_loads_history_on_boot(client):
 								    """Chat log fetches history via hx-trigger=load so it's populated on open."""
 								    html = _index_html(client)
 								    assert 'hx-trigger="load"' in html
 								# ── M5xx — Safe-area / notch support ─────────────────────────────────────────
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								def test_M501_safe_area_inset_top_in_header():
 								    """Header padding must accommodate the iPhone notch / status bar."""
 								    css = _css()
 								    assert "safe-area-inset-top" in css
 								def test_M502_safe_area_inset_bottom_in_footer():
 								    """Chat footer padding must clear the iPhone home indicator bar."""
 								    css = _css()
 								    assert "safe-area-inset-bottom" in css
 								def test_M503_overscroll_behavior_none():
 								    """overscroll-behavior: none prevents the jarring rubber-band effect."""
 								    css = _css()
 								    assert "overscroll-behavior: none" in css
 								def test_M504_webkit_overflow_scrolling_touch():
 								    """-webkit-overflow-scrolling: touch gives momentum scrolling on iOS."""
 								    css = _css()
 								    assert "-webkit-overflow-scrolling: touch" in css
 								def test_M505_dvh_units_used():
 								    """Dynamic viewport height (dvh) accounts for collapsing browser chrome."""
 								    css = _css()
 								    assert "dvh" in css
 								# ── M6xx — AirLLM backend interface contract ──────────────────────────────────
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								def test_M601_airllm_agent_has_run_method():
 								    """TimmyAirLLMAgent must expose run() so the dashboard route can call it."""
 								    from timmy.backends import TimmyAirLLMAgent
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												ruff (#169)

* polish: streamline nav, extract inline styles, improve tablet UX

- Restructure desktop nav from 8+ flat links + overflow dropdown into
  5 grouped dropdowns (Core, Agents, Intel, System, More) matching
  the mobile menu structure to reduce decision fatigue
- Extract all inline styles from mission_control.html and base.html
  notification elements into mission-control.css with semantic classes
- Replace JS-built innerHTML with secure DOM construction in
  notification loader and chat history
- Add CONNECTING state to connection indicator (amber) instead of
  showing OFFLINE before WebSocket connects
- Add tablet breakpoint (1024px) with larger touch targets for
  Apple Pencil / stylus use and safe-area padding for iPad toolbar
- Add active-link highlighting in desktop dropdown menus
- Rename "Mission Control" page title to "System Overview" to
  disambiguate from the chat home page
- Add "Home — Timmy Time" page title to index.html

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

* fix(security): move auth-gate credentials to environment variables

Hardcoded username, password, and HMAC secret in auth-gate.py replaced
with os.environ lookups. Startup now refuses to run if any variable is
unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example.

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

* refactor(tooling): migrate from black+isort+bandit to ruff

Replace three separate linting/formatting tools with a single ruff
invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs),
.pre-commit-config.yaml, and CI workflow. Fixes all ruff errors
including unused imports, missing raise-from, and undefined names.
Ruff config maps existing bandit skips to equivalent S-rules.

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

---------

Co-authored-by: Claude <noreply@anthropic.com>
											
										
										
											2026-03-11 12:23:35 -04:00
+								    assert hasattr(TimmyAirLLMAgent, "run"), (
 								        "TimmyAirLLMAgent is missing run() — dashboard will fail with AirLLM backend"
 								    )
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
 								def test_M602_airllm_run_returns_content_attribute():
 								    """run() must return an object with a .content attribute (Agno RunResponse compat)."""
 								    with patch("timmy.backends.is_apple_silicon", return_value=False):
 								        from timmy.backends import TimmyAirLLMAgent
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								        agent = TimmyAirLLMAgent(model_size="8b")
 								    mock_model = MagicMock()
 								    mock_tokenizer = MagicMock()
 								    input_ids_mock = MagicMock()
 								    input_ids_mock.shape = [1, 5]
 								    mock_tokenizer.return_value = {"input_ids": input_ids_mock}
 								    mock_tokenizer.decode.return_value = "Sir, affirmative."
 								    mock_model.tokenizer = mock_tokenizer
 								    mock_model.generate.return_value = [list(range(10))]
 								    agent._model = mock_model
 								    result = agent.run("test")
 								    assert hasattr(result, "content"), "run() result must have a .content attribute"
 								    assert isinstance(result.content, str)
 								def test_M603_airllm_run_updates_history():
 								    """run() must update _history so multi-turn context is preserved."""
 								    with patch("timmy.backends.is_apple_silicon", return_value=False):
 								        from timmy.backends import TimmyAirLLMAgent
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								        agent = TimmyAirLLMAgent(model_size="8b")
 								    mock_model = MagicMock()
 								    mock_tokenizer = MagicMock()
 								    input_ids_mock = MagicMock()
 								    input_ids_mock.shape = [1, 5]
 								    mock_tokenizer.return_value = {"input_ids": input_ids_mock}
 								    mock_tokenizer.decode.return_value = "Acknowledged."
 								    mock_model.tokenizer = mock_tokenizer
 								    mock_model.generate.return_value = [list(range(10))]
 								    agent._model = mock_model
 								    assert len(agent._history) == 0
 								    agent.run("hello")
 								    assert len(agent._history) == 2
 								    assert any("hello" in h for h in agent._history)
 								def test_M604_airllm_print_response_delegates_to_run():
 								    """print_response must use run() so both interfaces share one inference path."""
 								    with patch("timmy.backends.is_apple_silicon", return_value=False):
-												feat: code quality audit + autoresearch integration + infra hardening (#150)
											
										
										
											2026-03-08 12:50:44 -04:00
+								        from timmy.backends import RunResult, TimmyAirLLMAgent
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								        agent = TimmyAirLLMAgent(model_size="8b")
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
+								    with (
 								        patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run,
 								        patch.object(agent, "_render"),
 								    ):
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								        agent.print_response("hello", stream=True)
 								    mock_run.assert_called_once_with("hello", stream=True)
 								def test_M605_health_status_passes_model_to_template(client):
 								    """Health status partial must receive the configured model name, not a hardcoded string."""
-												feat: upgrade to qwen3.5, self-hosted Gitea CI, optimize Docker image

Model upgrade:
- qwen2.5:14b → qwen3.5:latest across config, tools, and docs
- Added qwen3.5 to multimodal model registry

Self-hosted Gitea CI:
- .gitea/workflows/tests.yml: lint + test jobs via act_runner
- Unified Dockerfile: pre-baked deps from poetry.lock for fast CI
- sitepackages=true in tox for ~2s dep resolution (was ~40s)
- OLLAMA_URL set to dead port in CI to prevent real LLM calls

Test isolation fixes:
- Smoke test fixture mocks create_timmy (was hitting real Ollama)
- WebSocket sends initial_state before joining broadcast pool (race fix)
- Tests use settings.ollama_model/url instead of hardcoded values
- skip_ci marker for Ollama-dependent tests, excluded in CI tox envs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 18:36:42 -04:00
+								    from config import settings
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
+								    with patch(
 								        "dashboard.routes.health.check_ollama",
 								        new_callable=AsyncMock,
 								        return_value=True,
 								    ):
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								        response = client.get("/health/status")
-												feat: upgrade to qwen3.5, self-hosted Gitea CI, optimize Docker image

Model upgrade:
- qwen2.5:14b → qwen3.5:latest across config, tools, and docs
- Added qwen3.5 to multimodal model registry

Self-hosted Gitea CI:
- .gitea/workflows/tests.yml: lint + test jobs via act_runner
- Unified Dockerfile: pre-baked deps from poetry.lock for fast CI
- sitepackages=true in tox for ~2s dep resolution (was ~40s)
- OLLAMA_URL set to dead port in CI to prevent real LLM calls

Test isolation fixes:
- Smoke test fixture mocks create_timmy (was hitting real Ollama)
- WebSocket sends initial_state before joining broadcast pool (race fix)
- Tests use settings.ollama_model/url instead of hardcoded values
- skip_ci marker for Ollama-dependent tests, excluded in CI tox envs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 18:36:42 -04:00
+								    # Model name should come from settings, not be hardcoded
-												feat: quality analysis — bug fixes, mobile tests, HITL checklist

Senior architect review findings + remediations:

BUG FIX — critical interface mismatch
- TimmyAirLLMAgent only exposed print_response(); dashboard route calls
  agent.run() → AttributeError when AirLLM backend is selected.
  Added run() → RunResult(content) as primary inference entry point;
  print_response() now delegates to run() so both call sites share
  one inference path.
- Added RunResult dataclass for Agno-compatible structured return.

BUG FIX — hardcoded model name in health status partial
- health_status.html rendered literal "llama3.2" regardless of
  OLLAMA_MODEL env var. Route now passes settings.ollama_model to
  the template context; partial renders {{ model }} instead.

FEATURE — /mobile-test HITL checklist page
- 22 human-executable test scenarios across: Layout, Touch & Input,
  Chat behaviour, Health, Scroll, Notch/Home Bar, Live UI.
- Pass/Fail/Skip buttons with sessionStorage state persistence.
- Live progress bar + final score summary.
- TEST link added to Mission Control header for quick access on phone.

TEST — 32 new automated mobile quality tests (M1xx–M6xx)
- M1xx: viewport/meta tags (8 tests)
- M2xx: touch target sizing — 44 px min-height, manipulation (4 tests)
- M3xx: iOS zoom prevention, autocapitalize, enterkeyhint (5 tests)
- M4xx: HTMX robustness — hx-sync drop, disabled-elt, polling (5 tests)
- M5xx: safe-area insets, overscroll, dvh units (5 tests)
- M6xx: AirLLM interface contract — run(), RunResult, delegation (5 tests)

Total test count: 61 → 93 (all passing).

https://claude.ai/code/session_01RBuRCBXZNkAQQXXGiJNDmt

											
										
										
											2026-02-21 17:21:47 +00:00
+								    assert response.status_code == 200
-												feat: upgrade to qwen3.5, self-hosted Gitea CI, optimize Docker image

Model upgrade:
- qwen2.5:14b → qwen3.5:latest across config, tools, and docs
- Added qwen3.5 to multimodal model registry

Self-hosted Gitea CI:
- .gitea/workflows/tests.yml: lint + test jobs via act_runner
- Unified Dockerfile: pre-baked deps from poetry.lock for fast CI
- sitepackages=true in tox for ~2s dep resolution (was ~40s)
- OLLAMA_URL set to dead port in CI to prevent real LLM calls

Test isolation fixes:
- Smoke test fixture mocks create_timmy (was hitting real Ollama)
- WebSocket sends initial_state before joining broadcast pool (race fix)
- Tests use settings.ollama_model/url instead of hardcoded values
- skip_ci marker for Ollama-dependent tests, excluded in CI tox envs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 18:36:42 -04:00
+								    model_short = settings.ollama_model.split(":")[0]
 								    assert model_short in response.text
-												feat: senior architect quality analysis + XSS fixes + HITL guide

- Add QUALITY_ANALYSIS.md — 10-point architect review covering
  architecture coherence, completeness (~35-40% vs vision), mobile UX,
  security, test coverage, code quality, and DX
- Fix P0 XSS: mobile.html chat input now uses DOM textContent instead
  of innerHTML string interpolation with raw user input
- Fix P0 XSS: swarm_live.html agent/auction rendering rewritten with
  safe DOM methods (_t/_el helpers) — no more ${agent.name} in innerHTML
- Add M7xx test category (4 new tests) covering XSS prevention assertions;
  total suite now 232 passing (was 228)
- HITL session guide included in analysis with step-by-step phone test
  instructions and critical scenario priority ordering

https://claude.ai/code/session_0183Nzcy7TMqjrAopnTtygds

											
										
										
											2026-02-21 18:11:22 +00:00
 								# ── M7xx — XSS prevention ─────────────────────────────────────────────────────
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
-												feat: senior architect quality analysis + XSS fixes + HITL guide

- Add QUALITY_ANALYSIS.md — 10-point architect review covering
  architecture coherence, completeness (~35-40% vs vision), mobile UX,
  security, test coverage, code quality, and DX
- Fix P0 XSS: mobile.html chat input now uses DOM textContent instead
  of innerHTML string interpolation with raw user input
- Fix P0 XSS: swarm_live.html agent/auction rendering rewritten with
  safe DOM methods (_t/_el helpers) — no more ${agent.name} in innerHTML
- Add M7xx test category (4 new tests) covering XSS prevention assertions;
  total suite now 232 passing (was 228)
- HITL session guide included in analysis with step-by-step phone test
  instructions and critical scenario priority ordering

https://claude.ai/code/session_0183Nzcy7TMqjrAopnTtygds

											
										
										
											2026-02-21 18:11:22 +00:00
+								def _mobile_html() -> str:
 								    """Read the mobile template source."""
-												feat: code quality audit + autoresearch integration + infra hardening (#150)
											
										
										
											2026-03-08 12:50:44 -04:00
+								    path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "mobile.html"
-												feat: senior architect quality analysis + XSS fixes + HITL guide

- Add QUALITY_ANALYSIS.md — 10-point architect review covering
  architecture coherence, completeness (~35-40% vs vision), mobile UX,
  security, test coverage, code quality, and DX
- Fix P0 XSS: mobile.html chat input now uses DOM textContent instead
  of innerHTML string interpolation with raw user input
- Fix P0 XSS: swarm_live.html agent/auction rendering rewritten with
  safe DOM methods (_t/_el helpers) — no more ${agent.name} in innerHTML
- Add M7xx test category (4 new tests) covering XSS prevention assertions;
  total suite now 232 passing (was 228)
- HITL session guide included in analysis with step-by-step phone test
  instructions and critical scenario priority ordering

https://claude.ai/code/session_0183Nzcy7TMqjrAopnTtygds

											
										
										
											2026-02-21 18:11:22 +00:00
+								    return path.read_text()
 								def _swarm_live_html() -> str:
 								    """Read the swarm live template source."""
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
+								    path = (
-												feat: code quality audit + autoresearch integration + infra hardening (#150)
											
										
										
											2026-03-08 12:50:44 -04:00
+								        Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "swarm_live.html"
-												feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
											
										
										
											2026-02-26 23:39:13 -05:00
+								    )
-												feat: senior architect quality analysis + XSS fixes + HITL guide

- Add QUALITY_ANALYSIS.md — 10-point architect review covering
  architecture coherence, completeness (~35-40% vs vision), mobile UX,
  security, test coverage, code quality, and DX
- Fix P0 XSS: mobile.html chat input now uses DOM textContent instead
  of innerHTML string interpolation with raw user input
- Fix P0 XSS: swarm_live.html agent/auction rendering rewritten with
  safe DOM methods (_t/_el helpers) — no more ${agent.name} in innerHTML
- Add M7xx test category (4 new tests) covering XSS prevention assertions;
  total suite now 232 passing (was 228)
- HITL session guide included in analysis with step-by-step phone test
  instructions and critical scenario priority ordering

https://claude.ai/code/session_0183Nzcy7TMqjrAopnTtygds

											
										
										
											2026-02-21 18:11:22 +00:00
+								    return path.read_text()
 								def test_M701_mobile_chat_no_raw_message_interpolation():
 								    """mobile.html must not interpolate ${message} directly into innerHTML — XSS risk."""
 								    html = _mobile_html()
 								    # The vulnerable pattern is `${message}` inside a template literal assigned to innerHTML
 								    # After the fix, message must only appear via textContent assignment
-												ruff (#169)

* polish: streamline nav, extract inline styles, improve tablet UX

- Restructure desktop nav from 8+ flat links + overflow dropdown into
  5 grouped dropdowns (Core, Agents, Intel, System, More) matching
  the mobile menu structure to reduce decision fatigue
- Extract all inline styles from mission_control.html and base.html
  notification elements into mission-control.css with semantic classes
- Replace JS-built innerHTML with secure DOM construction in
  notification loader and chat history
- Add CONNECTING state to connection indicator (amber) instead of
  showing OFFLINE before WebSocket connects
- Add tablet breakpoint (1024px) with larger touch targets for
  Apple Pencil / stylus use and safe-area padding for iPad toolbar
- Add active-link highlighting in desktop dropdown menus
- Rename "Mission Control" page title to "System Overview" to
  disambiguate from the chat home page
- Add "Home — Timmy Time" page title to index.html

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

* fix(security): move auth-gate credentials to environment variables

Hardcoded username, password, and HMAC secret in auth-gate.py replaced
with os.environ lookups. Startup now refuses to run if any variable is
unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example.

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

* refactor(tooling): migrate from black+isort+bandit to ruff

Replace three separate linting/formatting tools with a single ruff
invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs),
.pre-commit-config.yaml, and CI workflow. Fixes all ruff errors
including unused imports, missing raise-from, and undefined names.
Ruff config maps existing bandit skips to equivalent S-rules.

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

---------

Co-authored-by: Claude <noreply@anthropic.com>
											
										
										
											2026-03-11 12:23:35 -04:00
+								    assert "textContent = message" in html or "textContent=message" in html, (
 								        "mobile.html still uses innerHTML + ${message} interpolation — XSS vulnerability"
 								    )
-												feat: senior architect quality analysis + XSS fixes + HITL guide

- Add QUALITY_ANALYSIS.md — 10-point architect review covering
  architecture coherence, completeness (~35-40% vs vision), mobile UX,
  security, test coverage, code quality, and DX
- Fix P0 XSS: mobile.html chat input now uses DOM textContent instead
  of innerHTML string interpolation with raw user input
- Fix P0 XSS: swarm_live.html agent/auction rendering rewritten with
  safe DOM methods (_t/_el helpers) — no more ${agent.name} in innerHTML
- Add M7xx test category (4 new tests) covering XSS prevention assertions;
  total suite now 232 passing (was 228)
- HITL session guide included in analysis with step-by-step phone test
  instructions and critical scenario priority ordering

https://claude.ai/code/session_0183Nzcy7TMqjrAopnTtygds

											
										
										
											2026-02-21 18:11:22 +00:00
 								def test_M702_mobile_chat_user_input_not_in_innerhtml_template_literal():
 								    """${message} must not appear inside a backtick string that is assigned to innerHTML."""
 								    html = _mobile_html()
 								    # Find all innerHTML += `...` blocks and verify none contain ${message}
 								    blocks = re.findall(r"innerHTML\s*\+=?\s*`([^`]*)`", html, re.DOTALL)
 								    for block in blocks:
-												ruff (#169)

* polish: streamline nav, extract inline styles, improve tablet UX

- Restructure desktop nav from 8+ flat links + overflow dropdown into
  5 grouped dropdowns (Core, Agents, Intel, System, More) matching
  the mobile menu structure to reduce decision fatigue
- Extract all inline styles from mission_control.html and base.html
  notification elements into mission-control.css with semantic classes
- Replace JS-built innerHTML with secure DOM construction in
  notification loader and chat history
- Add CONNECTING state to connection indicator (amber) instead of
  showing OFFLINE before WebSocket connects
- Add tablet breakpoint (1024px) with larger touch targets for
  Apple Pencil / stylus use and safe-area padding for iPad toolbar
- Add active-link highlighting in desktop dropdown menus
- Rename "Mission Control" page title to "System Overview" to
  disambiguate from the chat home page
- Add "Home — Timmy Time" page title to index.html

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

* fix(security): move auth-gate credentials to environment variables

Hardcoded username, password, and HMAC secret in auth-gate.py replaced
with os.environ lookups. Startup now refuses to run if any variable is
unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example.

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

* refactor(tooling): migrate from black+isort+bandit to ruff

Replace three separate linting/formatting tools with a single ruff
invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs),
.pre-commit-config.yaml, and CI workflow. Fixes all ruff errors
including unused imports, missing raise-from, and undefined names.
Ruff config maps existing bandit skips to equivalent S-rules.

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

---------

Co-authored-by: Claude <noreply@anthropic.com>
											
										
										
											2026-03-11 12:23:35 -04:00
+								        assert "${message}" not in block, (
 								            "innerHTML template literal still contains ${message} — XSS vulnerability"
 								        )
-												feat: senior architect quality analysis + XSS fixes + HITL guide

- Add QUALITY_ANALYSIS.md — 10-point architect review covering
  architecture coherence, completeness (~35-40% vs vision), mobile UX,
  security, test coverage, code quality, and DX
- Fix P0 XSS: mobile.html chat input now uses DOM textContent instead
  of innerHTML string interpolation with raw user input
- Fix P0 XSS: swarm_live.html agent/auction rendering rewritten with
  safe DOM methods (_t/_el helpers) — no more ${agent.name} in innerHTML
- Add M7xx test category (4 new tests) covering XSS prevention assertions;
  total suite now 232 passing (was 228)
- HITL session guide included in analysis with step-by-step phone test
  instructions and critical scenario priority ordering

https://claude.ai/code/session_0183Nzcy7TMqjrAopnTtygds

											
										
										
											2026-02-21 18:11:22 +00:00
 								def test_M703_swarm_live_agent_name_not_interpolated_in_innerhtml():
 								    """swarm_live.html must not put ${agent.name} inside innerHTML template literals."""
 								    html = _swarm_live_html()
-												feat: code quality audit + autoresearch integration + infra hardening (#150)
											
										
										
											2026-03-08 12:50:44 -04:00
+								    blocks = re.findall(r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL)
-												ruff (#169)

* polish: streamline nav, extract inline styles, improve tablet UX

- Restructure desktop nav from 8+ flat links + overflow dropdown into
  5 grouped dropdowns (Core, Agents, Intel, System, More) matching
  the mobile menu structure to reduce decision fatigue
- Extract all inline styles from mission_control.html and base.html
  notification elements into mission-control.css with semantic classes
- Replace JS-built innerHTML with secure DOM construction in
  notification loader and chat history
- Add CONNECTING state to connection indicator (amber) instead of
  showing OFFLINE before WebSocket connects
- Add tablet breakpoint (1024px) with larger touch targets for
  Apple Pencil / stylus use and safe-area padding for iPad toolbar
- Add active-link highlighting in desktop dropdown menus
- Rename "Mission Control" page title to "System Overview" to
  disambiguate from the chat home page
- Add "Home — Timmy Time" page title to index.html

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

* fix(security): move auth-gate credentials to environment variables

Hardcoded username, password, and HMAC secret in auth-gate.py replaced
with os.environ lookups. Startup now refuses to run if any variable is
unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example.

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

* refactor(tooling): migrate from black+isort+bandit to ruff

Replace three separate linting/formatting tools with a single ruff
invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs),
.pre-commit-config.yaml, and CI workflow. Fixes all ruff errors
including unused imports, missing raise-from, and undefined names.
Ruff config maps existing bandit skips to equivalent S-rules.

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

---------

Co-authored-by: Claude <noreply@anthropic.com>
											
										
										
											2026-03-11 12:23:35 -04:00
+								    assert len(blocks) == 0, (
 								        "swarm_live.html still uses innerHTML=agents.map(…) with interpolated agent data — XSS vulnerability"
 								    )
-												feat: senior architect quality analysis + XSS fixes + HITL guide

- Add QUALITY_ANALYSIS.md — 10-point architect review covering
  architecture coherence, completeness (~35-40% vs vision), mobile UX,
  security, test coverage, code quality, and DX
- Fix P0 XSS: mobile.html chat input now uses DOM textContent instead
  of innerHTML string interpolation with raw user input
- Fix P0 XSS: swarm_live.html agent/auction rendering rewritten with
  safe DOM methods (_t/_el helpers) — no more ${agent.name} in innerHTML
- Add M7xx test category (4 new tests) covering XSS prevention assertions;
  total suite now 232 passing (was 228)
- HITL session guide included in analysis with step-by-step phone test
  instructions and critical scenario priority ordering

https://claude.ai/code/session_0183Nzcy7TMqjrAopnTtygds

											
										
										
											2026-02-21 18:11:22 +00:00
 								def test_M704_swarm_live_uses_textcontent_for_agent_data():
 								    """swarm_live.html must use textContent (not innerHTML) to set agent name/description."""
 								    html = _swarm_live_html()
-												ruff (#169)

* polish: streamline nav, extract inline styles, improve tablet UX

- Restructure desktop nav from 8+ flat links + overflow dropdown into
  5 grouped dropdowns (Core, Agents, Intel, System, More) matching
  the mobile menu structure to reduce decision fatigue
- Extract all inline styles from mission_control.html and base.html
  notification elements into mission-control.css with semantic classes
- Replace JS-built innerHTML with secure DOM construction in
  notification loader and chat history
- Add CONNECTING state to connection indicator (amber) instead of
  showing OFFLINE before WebSocket connects
- Add tablet breakpoint (1024px) with larger touch targets for
  Apple Pencil / stylus use and safe-area padding for iPad toolbar
- Add active-link highlighting in desktop dropdown menus
- Rename "Mission Control" page title to "System Overview" to
  disambiguate from the chat home page
- Add "Home — Timmy Time" page title to index.html

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

* fix(security): move auth-gate credentials to environment variables

Hardcoded username, password, and HMAC secret in auth-gate.py replaced
with os.environ lookups. Startup now refuses to run if any variable is
unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example.

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

* refactor(tooling): migrate from black+isort+bandit to ruff

Replace three separate linting/formatting tools with a single ruff
invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs),
.pre-commit-config.yaml, and CI workflow. Fixes all ruff errors
including unused imports, missing raise-from, and undefined names.
Ruff config maps existing bandit skips to equivalent S-rules.

https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h

---------

Co-authored-by: Claude <noreply@anthropic.com>
											
										
										
											2026-03-11 12:23:35 -04:00
+								    assert "textContent" in html, (
 								        "swarm_live.html does not use textContent — agent data may be raw-interpolated into DOM"
 								    )