tests/test_agent.py

from unittest.mock import MagicMock, patch


def test_create_timmy_returns_agent():
    """create_timmy should delegate to Agno Agent with correct config."""
    with patch("timmy.agent.Agent") as MockAgent, \
         patch("timmy.agent.Ollama"), \
         patch("timmy.agent.SqliteDb"):

        mock_instance = MagicMock()
        MockAgent.return_value = mock_instance

        from timmy.agent import create_timmy
        result = create_timmy()

        assert result is mock_instance
        MockAgent.assert_called_once()


def test_create_timmy_agent_name():
    with patch("timmy.agent.Agent") as MockAgent, \
         patch("timmy.agent.Ollama"), \
         patch("timmy.agent.SqliteDb"):

        from timmy.agent import create_timmy
        create_timmy()

        kwargs = MockAgent.call_args.kwargs
        assert kwargs["name"] == "Timmy"


def test_create_timmy_uses_llama32():
    with patch("timmy.agent.Agent"), \
         patch("timmy.agent.Ollama") as MockOllama, \
         patch("timmy.agent.SqliteDb"):

        from timmy.agent import create_timmy
        create_timmy()

        MockOllama.assert_called_once()
        kwargs = MockOllama.call_args.kwargs
        assert kwargs["id"] == "llama3.2"


def test_create_timmy_history_config():
    with patch("timmy.agent.Agent") as MockAgent, \
         patch("timmy.agent.Ollama"), \
         patch("timmy.agent.SqliteDb"):

        from timmy.agent import create_timmy
        create_timmy()

        kwargs = MockAgent.call_args.kwargs
        assert kwargs["add_history_to_context"] is True
        assert kwargs["num_history_runs"] == 20
        assert kwargs["markdown"] is True


def test_create_timmy_custom_db_file():
    with patch("timmy.agent.Agent"), \
         patch("timmy.agent.Ollama"), \
         patch("timmy.agent.SqliteDb") as MockDb:

        from timmy.agent import create_timmy
        create_timmy(db_file="custom.db")

        MockDb.assert_called_once_with(db_file="custom.db")


def test_create_timmy_embeds_system_prompt():
    from timmy.prompts import TIMMY_SYSTEM_PROMPT

    with patch("timmy.agent.Agent") as MockAgent, \
         patch("timmy.agent.Ollama"), \
         patch("timmy.agent.SqliteDb"):

        from timmy.agent import create_timmy
        create_timmy()

        kwargs = MockAgent.call_args.kwargs
        assert kwargs["description"] == TIMMY_SYSTEM_PROMPT


# ── Ollama host regression (container connectivity) ─────────────────────────

def test_create_timmy_passes_ollama_url_to_model():
    """Regression: Ollama model must receive settings.ollama_url as host.

    Without this, containers default to localhost:11434 which is unreachable
    when Ollama runs on the Docker host.
    """
    with patch("timmy.agent.Agent"), \
         patch("timmy.agent.Ollama") as MockOllama, \
         patch("timmy.agent.SqliteDb"):

        from timmy.agent import create_timmy
        create_timmy()

        kwargs = MockOllama.call_args.kwargs
        assert "host" in kwargs, "Ollama() must receive host= parameter"
        assert kwargs["host"] == "http://localhost:11434"  # default from config


def test_create_timmy_respects_custom_ollama_url():
    """Ollama host should follow OLLAMA_URL when overridden in config."""
    custom_url = "http://host.docker.internal:11434"
    with patch("timmy.agent.Agent"), \
         patch("timmy.agent.Ollama") as MockOllama, \
         patch("timmy.agent.SqliteDb"), \
         patch("timmy.agent.settings") as mock_settings:

        mock_settings.ollama_model = "llama3.2"
        mock_settings.ollama_url = custom_url
        mock_settings.timmy_model_backend = "ollama"
        mock_settings.airllm_model_size = "70b"

        from timmy.agent import create_timmy
        create_timmy()

        kwargs = MockOllama.call_args.kwargs
        assert kwargs["host"] == custom_url


# ── AirLLM path ──────────────────────────────────────────────────────────────

def test_create_timmy_airllm_returns_airllm_agent():
    """backend='airllm' must return a TimmyAirLLMAgent, not an Agno Agent."""
    with patch("timmy.backends.is_apple_silicon", return_value=False):
        from timmy.agent import create_timmy
        from timmy.backends import TimmyAirLLMAgent

        result = create_timmy(backend="airllm", model_size="8b")

    assert isinstance(result, TimmyAirLLMAgent)


def test_create_timmy_airllm_does_not_call_agno_agent():
    """When using the airllm backend, Agno Agent should never be instantiated."""
    with patch("timmy.agent.Agent") as MockAgent, \
         patch("timmy.backends.is_apple_silicon", return_value=False):

        from timmy.agent import create_timmy
        create_timmy(backend="airllm", model_size="8b")

    MockAgent.assert_not_called()


def test_create_timmy_explicit_ollama_ignores_autodetect():
    """backend='ollama' must always use Ollama, even on Apple Silicon."""
    with patch("timmy.agent.Agent") as MockAgent, \
         patch("timmy.agent.Ollama"), \
         patch("timmy.agent.SqliteDb"):

        from timmy.agent import create_timmy
        create_timmy(backend="ollama")

    MockAgent.assert_called_once()


# ── _resolve_backend ─────────────────────────────────────────────────────────

def test_resolve_backend_explicit_takes_priority():
    from timmy.agent import _resolve_backend
    assert _resolve_backend("airllm") == "airllm"
    assert _resolve_backend("ollama") == "ollama"


def test_resolve_backend_defaults_to_ollama_without_config():
    """Default config (timmy_model_backend='ollama') → 'ollama'."""
    from timmy.agent import _resolve_backend
    assert _resolve_backend(None) == "ollama"


def test_resolve_backend_auto_uses_airllm_on_apple_silicon():
    """'auto' on Apple Silicon with airllm stubbed → 'airllm'."""
    with patch("timmy.backends.is_apple_silicon", return_value=True), \
         patch("timmy.agent.settings") as mock_settings:
        mock_settings.timmy_model_backend = "auto"
        mock_settings.airllm_model_size = "70b"
        mock_settings.ollama_model = "llama3.2"

        from timmy.agent import _resolve_backend
        assert _resolve_backend(None) == "airllm"


def test_resolve_backend_auto_falls_back_on_non_apple():
    """'auto' on non-Apple Silicon → 'ollama'."""
    with patch("timmy.backends.is_apple_silicon", return_value=False), \
         patch("timmy.agent.settings") as mock_settings:
        mock_settings.timmy_model_backend = "auto"
        mock_settings.airllm_model_size = "70b"
        mock_settings.ollama_model = "llama3.2"

        from timmy.agent import _resolve_backend
        assert _resolve_backend(None) == "ollama"
feat: scaffold Timmy Time Mission Control (v1.0.0 Genesis) - src/timmy/ — Agno agent wrapper (llama3.2 via Ollama, SQLite memory, TIMMY_SYSTEM_PROMPT) - src/dashboard/ — FastAPI + HTMX + Jinja2 Mission Control UI - /health + /health/status (Ollama ping, HTMX 30s poll) - /agents list + /agents/timmy/chat (HTMX form submission) - static/style.css — dark terminal mission-control aesthetic - tests/ — 27 pytest tests (prompts, agent config, dashboard routes); no Ollama required - pyproject.toml — hatchling build, pytest configured with pythonpath=src https://claude.ai/code/session_01M4L3R98N5fgXFZRvV8X9b6 2026-02-19 19:05:01 +00:00			`from unittest.mock import MagicMock, patch`


			`def test_create_timmy_returns_agent():`
			`"""create_timmy should delegate to Agno Agent with correct config."""`
			`with patch("timmy.agent.Agent") as MockAgent, \`
			`patch("timmy.agent.Ollama"), \`
			`patch("timmy.agent.SqliteDb"):`

			`mock_instance = MagicMock()`
			`MockAgent.return_value = mock_instance`

			`from timmy.agent import create_timmy`
			`result = create_timmy()`

			`assert result is mock_instance`
			`MockAgent.assert_called_once()`


			`def test_create_timmy_agent_name():`
			`with patch("timmy.agent.Agent") as MockAgent, \`
			`patch("timmy.agent.Ollama"), \`
			`patch("timmy.agent.SqliteDb"):`

			`from timmy.agent import create_timmy`
			`create_timmy()`

			`kwargs = MockAgent.call_args.kwargs`
			`assert kwargs["name"] == "Timmy"`


			`def test_create_timmy_uses_llama32():`
			`with patch("timmy.agent.Agent"), \`
			`patch("timmy.agent.Ollama") as MockOllama, \`
			`patch("timmy.agent.SqliteDb"):`

			`from timmy.agent import create_timmy`
			`create_timmy()`

feat: single-command Docker startup, fix UI bugs, add Selenium tests - Add `make up` / `make up DEV=1` for one-command Docker startup with optional hot-reload via docker-compose.dev.yml overlay - Add `timmy up --dev` / `timmy down` CLI commands - Fix cross-platform font resolution in creative assembler (7 test failures) - Fix Ollama host URL not passed to Agno model (container connectivity) - Fix task panel route shadowing by reordering literal routes before parameterized routes in swarm.py - Fix chat input not clearing after send (hx-on::after-request) - Fix chat scroll overflow (CSS min-height: 0 on flex children) - Add Selenium UI smoke tests (17 tests, gated behind SELENIUM_UI=1) - Install fonts-dejavu-core in Dockerfile for container font support - Remove obsolete docker-compose version key - Bump CSS cache-bust to v4 833 unit tests pass, 15 Selenium tests pass (2 skipped). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-25 07:20:56 -05:00			`MockOllama.assert_called_once()`
			`kwargs = MockOllama.call_args.kwargs`
			`assert kwargs["id"] == "llama3.2"`
feat: scaffold Timmy Time Mission Control (v1.0.0 Genesis) - src/timmy/ — Agno agent wrapper (llama3.2 via Ollama, SQLite memory, TIMMY_SYSTEM_PROMPT) - src/dashboard/ — FastAPI + HTMX + Jinja2 Mission Control UI - /health + /health/status (Ollama ping, HTMX 30s poll) - /agents list + /agents/timmy/chat (HTMX form submission) - static/style.css — dark terminal mission-control aesthetic - tests/ — 27 pytest tests (prompts, agent config, dashboard routes); no Ollama required - pyproject.toml — hatchling build, pytest configured with pythonpath=src https://claude.ai/code/session_01M4L3R98N5fgXFZRvV8X9b6 2026-02-19 19:05:01 +00:00

			`def test_create_timmy_history_config():`
			`with patch("timmy.agent.Agent") as MockAgent, \`
			`patch("timmy.agent.Ollama"), \`
			`patch("timmy.agent.SqliteDb"):`

			`from timmy.agent import create_timmy`
			`create_timmy()`

			`kwargs = MockAgent.call_args.kwargs`
			`assert kwargs["add_history_to_context"] is True`
Fine-tune Timmy's conversational AI with memory layers ## Enhanced System Prompt - Detailed tool usage guidelines with explicit examples - Clear DO and DON'T examples for tool selection - Memory system documentation - Conversation flow guidelines - Context awareness instructions ## Memory Layer System (NEW) Implemented 3-layer memory architecture: 1. WORKING MEMORY (src/timmy/memory_layers.py) - Immediate context (last 20 messages) - Topic tracking - Tool call tracking - Fast, ephemeral 2. SHORT-TERM MEMORY (Agno SQLite) - Recent conversations (100) - Persists across restarts - Managed by Agno Agent 3. LONG-TERM MEMORY (src/timmy/memory_layers.py) - Facts about user (name, preferences) - SQLite storage in data/memory/ - Auto-extraction from conversations - User profile generation ## Memory Manager (NEW) - Central coordinator for all memory layers - Context injection into prompts - Fact extraction and storage - Session management ## TimmyWithMemory Class (NEW) - Wrapper around Agno Agent with explicit memory - Auto-injects user context from LTM - Tracks exchanges across all layers - Simple chat() interface ## Agent Configuration - Increased num_history_runs: 10 -> 20 - Better conversational context retention ## Tests - All 973 tests pass - Fixed test expectations for new config - Fixed module path in test_scary_paths.py ## Files Added/Modified - src/timmy/prompts.py - Enhanced with memory and tool guidance - src/timmy/agent.py - Added TimmyWithMemory class - src/timmy/memory_layers.py - NEW memory system - src/timmy/conversation.py - NEW conversation manager - tests/ - Updated for new config 2026-02-25 18:07:44 -05:00			`assert kwargs["num_history_runs"] == 20`
feat: scaffold Timmy Time Mission Control (v1.0.0 Genesis) - src/timmy/ — Agno agent wrapper (llama3.2 via Ollama, SQLite memory, TIMMY_SYSTEM_PROMPT) - src/dashboard/ — FastAPI + HTMX + Jinja2 Mission Control UI - /health + /health/status (Ollama ping, HTMX 30s poll) - /agents list + /agents/timmy/chat (HTMX form submission) - static/style.css — dark terminal mission-control aesthetic - tests/ — 27 pytest tests (prompts, agent config, dashboard routes); no Ollama required - pyproject.toml — hatchling build, pytest configured with pythonpath=src https://claude.ai/code/session_01M4L3R98N5fgXFZRvV8X9b6 2026-02-19 19:05:01 +00:00			`assert kwargs["markdown"] is True`


			`def test_create_timmy_custom_db_file():`
			`with patch("timmy.agent.Agent"), \`
			`patch("timmy.agent.Ollama"), \`
			`patch("timmy.agent.SqliteDb") as MockDb:`

			`from timmy.agent import create_timmy`
			`create_timmy(db_file="custom.db")`

			`MockDb.assert_called_once_with(db_file="custom.db")`


			`def test_create_timmy_embeds_system_prompt():`
			`from timmy.prompts import TIMMY_SYSTEM_PROMPT`

			`with patch("timmy.agent.Agent") as MockAgent, \`
			`patch("timmy.agent.Ollama"), \`
			`patch("timmy.agent.SqliteDb"):`

			`from timmy.agent import create_timmy`
			`create_timmy()`

			`kwargs = MockAgent.call_args.kwargs`
			`assert kwargs["description"] == TIMMY_SYSTEM_PROMPT`
feat: integrate AirLLM as optional high-performance backend Adds the `bigbrain` optional dependency group (airllm>=2.9.0) and a complete second inference path that runs 8B / 70B / 405B Llama models locally via layer-by-layer loading — no GPU required, no cloud, fully sovereign. Key changes: - src/timmy/backends.py — TimmyAirLLMAgent (same print_response interface as Agno Agent); auto-selects AirLLMMLX on Apple Silicon, AutoModel (PyTorch) everywhere else - src/timmy/agent.py — _resolve_backend() routing with explicit override, env-config, and 'auto' Apple-Silicon detection - src/timmy/cli.py — --backend / --model-size flags on all commands - src/config.py — timmy_model_backend + airllm_model_size settings - src/timmy/prompts.py — mentions AirLLM "even bigger brains, still fully sovereign" - pyproject.toml — bigbrain optional dep; wheel includes updated - .env.example — TIMMY_MODEL_BACKEND + AIRLLM_MODEL_SIZE docs - tests/conftest.py — stubs 'airllm' module so tests run without GPU - tests/test_backends.py — 13 new tests covering helpers + TimmyAirLLMAgent - tests/test_agent.py — 7 new tests for backend routing - README.md — Big Brain section with one-line install - activate_self_tdd.sh — bootstrap script (venv + install + tests + watchdog + dashboard); --big-brain flag All 61 tests pass. Self-TDD watchdog unaffected. https://claude.ai/code/session_01DMjQ5qMZ8iHeyix1j3GS7c 2026-02-21 16:53:16 +00:00

feat: single-command Docker startup, fix UI bugs, add Selenium tests - Add `make up` / `make up DEV=1` for one-command Docker startup with optional hot-reload via docker-compose.dev.yml overlay - Add `timmy up --dev` / `timmy down` CLI commands - Fix cross-platform font resolution in creative assembler (7 test failures) - Fix Ollama host URL not passed to Agno model (container connectivity) - Fix task panel route shadowing by reordering literal routes before parameterized routes in swarm.py - Fix chat input not clearing after send (hx-on::after-request) - Fix chat scroll overflow (CSS min-height: 0 on flex children) - Add Selenium UI smoke tests (17 tests, gated behind SELENIUM_UI=1) - Install fonts-dejavu-core in Dockerfile for container font support - Remove obsolete docker-compose version key - Bump CSS cache-bust to v4 833 unit tests pass, 15 Selenium tests pass (2 skipped). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-25 07:20:56 -05:00			`# ── Ollama host regression (container connectivity) ─────────────────────────`

			`def test_create_timmy_passes_ollama_url_to_model():`
			`"""Regression: Ollama model must receive settings.ollama_url as host.`

			`Without this, containers default to localhost:11434 which is unreachable`
			`when Ollama runs on the Docker host.`
			`"""`
			`with patch("timmy.agent.Agent"), \`
			`patch("timmy.agent.Ollama") as MockOllama, \`
			`patch("timmy.agent.SqliteDb"):`

			`from timmy.agent import create_timmy`
			`create_timmy()`

			`kwargs = MockOllama.call_args.kwargs`
			`assert "host" in kwargs, "Ollama() must receive host= parameter"`
			`assert kwargs["host"] == "http://localhost:11434" # default from config`


			`def test_create_timmy_respects_custom_ollama_url():`
			`"""Ollama host should follow OLLAMA_URL when overridden in config."""`
			`custom_url = "http://host.docker.internal:11434"`
			`with patch("timmy.agent.Agent"), \`
			`patch("timmy.agent.Ollama") as MockOllama, \`
			`patch("timmy.agent.SqliteDb"), \`
			`patch("timmy.agent.settings") as mock_settings:`

			`mock_settings.ollama_model = "llama3.2"`
			`mock_settings.ollama_url = custom_url`
			`mock_settings.timmy_model_backend = "ollama"`
			`mock_settings.airllm_model_size = "70b"`

			`from timmy.agent import create_timmy`
			`create_timmy()`

			`kwargs = MockOllama.call_args.kwargs`
			`assert kwargs["host"] == custom_url`


feat: integrate AirLLM as optional high-performance backend Adds the `bigbrain` optional dependency group (airllm>=2.9.0) and a complete second inference path that runs 8B / 70B / 405B Llama models locally via layer-by-layer loading — no GPU required, no cloud, fully sovereign. Key changes: - src/timmy/backends.py — TimmyAirLLMAgent (same print_response interface as Agno Agent); auto-selects AirLLMMLX on Apple Silicon, AutoModel (PyTorch) everywhere else - src/timmy/agent.py — _resolve_backend() routing with explicit override, env-config, and 'auto' Apple-Silicon detection - src/timmy/cli.py — --backend / --model-size flags on all commands - src/config.py — timmy_model_backend + airllm_model_size settings - src/timmy/prompts.py — mentions AirLLM "even bigger brains, still fully sovereign" - pyproject.toml — bigbrain optional dep; wheel includes updated - .env.example — TIMMY_MODEL_BACKEND + AIRLLM_MODEL_SIZE docs - tests/conftest.py — stubs 'airllm' module so tests run without GPU - tests/test_backends.py — 13 new tests covering helpers + TimmyAirLLMAgent - tests/test_agent.py — 7 new tests for backend routing - README.md — Big Brain section with one-line install - activate_self_tdd.sh — bootstrap script (venv + install + tests + watchdog + dashboard); --big-brain flag All 61 tests pass. Self-TDD watchdog unaffected. https://claude.ai/code/session_01DMjQ5qMZ8iHeyix1j3GS7c 2026-02-21 16:53:16 +00:00			`# ── AirLLM path ──────────────────────────────────────────────────────────────`

			`def test_create_timmy_airllm_returns_airllm_agent():`
			`"""backend='airllm' must return a TimmyAirLLMAgent, not an Agno Agent."""`
			`with patch("timmy.backends.is_apple_silicon", return_value=False):`
			`from timmy.agent import create_timmy`
			`from timmy.backends import TimmyAirLLMAgent`

			`result = create_timmy(backend="airllm", model_size="8b")`

			`assert isinstance(result, TimmyAirLLMAgent)`


			`def test_create_timmy_airllm_does_not_call_agno_agent():`
			`"""When using the airllm backend, Agno Agent should never be instantiated."""`
			`with patch("timmy.agent.Agent") as MockAgent, \`
			`patch("timmy.backends.is_apple_silicon", return_value=False):`

			`from timmy.agent import create_timmy`
			`create_timmy(backend="airllm", model_size="8b")`

			`MockAgent.assert_not_called()`


			`def test_create_timmy_explicit_ollama_ignores_autodetect():`
			`"""backend='ollama' must always use Ollama, even on Apple Silicon."""`
			`with patch("timmy.agent.Agent") as MockAgent, \`
			`patch("timmy.agent.Ollama"), \`
			`patch("timmy.agent.SqliteDb"):`

			`from timmy.agent import create_timmy`
			`create_timmy(backend="ollama")`

			`MockAgent.assert_called_once()`


			`# ── _resolve_backend ─────────────────────────────────────────────────────────`

			`def test_resolve_backend_explicit_takes_priority():`
			`from timmy.agent import _resolve_backend`
			`assert _resolve_backend("airllm") == "airllm"`
			`assert _resolve_backend("ollama") == "ollama"`


			`def test_resolve_backend_defaults_to_ollama_without_config():`
			`"""Default config (timmy_model_backend='ollama') → 'ollama'."""`
			`from timmy.agent import _resolve_backend`
			`assert _resolve_backend(None) == "ollama"`


			`def test_resolve_backend_auto_uses_airllm_on_apple_silicon():`
			`"""'auto' on Apple Silicon with airllm stubbed → 'airllm'."""`
			`with patch("timmy.backends.is_apple_silicon", return_value=True), \`
			`patch("timmy.agent.settings") as mock_settings:`
			`mock_settings.timmy_model_backend = "auto"`
			`mock_settings.airllm_model_size = "70b"`
			`mock_settings.ollama_model = "llama3.2"`

			`from timmy.agent import _resolve_backend`
			`assert _resolve_backend(None) == "airllm"`


			`def test_resolve_backend_auto_falls_back_on_non_apple():`
			`"""'auto' on non-Apple Silicon → 'ollama'."""`
			`with patch("timmy.backends.is_apple_silicon", return_value=False), \`
			`patch("timmy.agent.settings") as mock_settings:`
			`mock_settings.timmy_model_backend = "auto"`
			`mock_settings.airllm_model_size = "70b"`
			`mock_settings.ollama_model = "llama3.2"`

			`from timmy.agent import _resolve_backend`
			`assert _resolve_backend(None) == "ollama"`