This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/tests/timmy/test_agent.py

538 lines
20 KiB
Python
Raw Normal View History

from unittest.mock import MagicMock, patch
def test_create_timmy_custom_db_file():
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
with (
patch("timmy.agent.Agent"),
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb") as MockDb,
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
):
from timmy.agent import create_timmy
create_timmy(db_file="custom.db")
MockDb.assert_called_once_with(db_file="custom.db")
def test_create_timmy_embeds_system_prompt():
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
with (
patch("timmy.agent.Agent") as MockAgent,
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb"),
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
Implement three-tier memory architecture (Hot/Vault/Handoff) This commit replaces the previous memory_layers.py with a proper three-tier memory system as specified by the user: ## Tier 1 — Hot Memory (MEMORY.md) - Single flat file always loaded into system context - Contains: current status, standing rules, agent roster, key decisions - ~300 lines max, pruned monthly - Managed by HotMemory class ## Tier 2 — Structured Vault (memory/) - Directory with three namespaces: • self/ — identity.md, user_profile.md, methodology.md • notes/ — session logs, AARs, research • aar/ — post-task retrospectives - Markdown format, Obsidian-compatible - Append-only, date-stamped - Managed by VaultMemory class ## Handoff Protocol - last-session-handoff.md written at session end - Contains: summary, key decisions, open items, next steps - Auto-loaded at next session start - Maintains continuity across resets ## Implementation ### New Files: - src/timmy/memory_system.py — Core memory system - MEMORY.md — Hot memory template - memory/self/*.md — Identity, user profile, methodology ### Modified: - src/timmy/agent.py — Integrated with memory system - create_timmy() injects memory context - TimmyWithMemory class with automatic fact extraction - tests/test_agent.py — Updated for memory context ## Key Principles - Hot memory = small and curated - Vault = append-only, never delete - Handoffs = continuity mechanism - Flat files = human-readable, portable ## Usage All 973 tests pass.
2026-02-25 18:17:43 -05:00
# Prompt should contain base system prompt (may have memory context appended)
# Default model (llama3.2) uses the lite prompt
assert "Timmy" in kwargs["description"]
# ── Ollama host regression (container connectivity) ─────────────────────────
def test_create_timmy_passes_ollama_url_to_model():
"""Regression: Ollama model must receive settings.ollama_url as host.
Without this, containers default to localhost:11434 which is unreachable
when Ollama runs on the Docker host.
"""
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
with (
patch("timmy.agent.Agent"),
patch("timmy.agent.Ollama") as MockOllama,
patch("timmy.agent.SqliteDb"),
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockOllama.call_args.kwargs
assert "host" in kwargs, "Ollama() must receive host= parameter"
from config import settings
assert kwargs["host"] == settings.ollama_url
def test_create_timmy_respects_custom_ollama_url():
"""Ollama host should follow OLLAMA_URL when overridden in config."""
custom_url = "http://host.docker.internal:11434"
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
with (
patch("timmy.agent.Agent"),
patch("timmy.agent.Ollama") as MockOllama,
patch("timmy.agent.SqliteDb"),
patch("timmy.agent.settings") as mock_settings,
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
):
mock_settings.ollama_model = "llama3.2"
mock_settings.ollama_url = custom_url
mock_settings.ollama_num_ctx = 4096
mock_settings.timmy_model_backend = "ollama"
from timmy.agent import create_timmy
create_timmy()
kwargs = MockOllama.call_args.kwargs
assert kwargs["host"] == custom_url
def test_create_timmy_explicit_ollama_ignores_autodetect():
"""backend='ollama' must always use Ollama, even on Apple Silicon."""
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
with (
patch("timmy.agent.Agent") as MockAgent,
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb"),
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
):
from timmy.agent import create_timmy
create_timmy(backend="ollama")
MockAgent.assert_called_once()
# ── _resolve_backend ─────────────────────────────────────────────────────────
def test_resolve_backend_explicit_takes_priority():
from timmy.agent import _resolve_backend
assert _resolve_backend("ollama") == "ollama"
def test_resolve_backend_defaults_to_ollama_without_config():
"""Default config (timmy_model_backend='ollama') → 'ollama'."""
from timmy.agent import _resolve_backend
assert _resolve_backend(None) == "ollama"
def test_model_supports_tools_llama32_returns_false():
"""llama3.2 (3B) is too small for reliable tool calling."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("llama3.2") is False
assert _model_supports_tools("llama3.2:latest") is False
def test_model_supports_tools_llama31_returns_true():
"""llama3.1 (8B+) can handle tool calling."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("llama3.1") is True
assert _model_supports_tools("llama3.3") is True
def test_model_supports_tools_other_small_models():
"""Other known small models should not get tools."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("phi-3") is False
assert _model_supports_tools("tinyllama") is False
def test_model_supports_tools_unknown_model_gets_tools():
"""Unknown models default to tool-capable (optimistic)."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("mistral") is True
assert _model_supports_tools("qwen2.5:72b") is True
# ── Tool gating in create_timmy ──────────────────────────────────────────────
def test_create_timmy_no_tools_for_small_model():
"""Small models (llama3.2) should get no tools."""
mock_toolkit = MagicMock()
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
with (
patch("timmy.agent.Agent") as MockAgent,
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb"),
patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit),
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
# llama3.2 is in _SMALL_MODEL_PATTERNS → tools should be None
assert kwargs["tools"] is None
def test_create_timmy_includes_tools_for_large_model():
"""A tool-capable model (e.g. llama3.1) should attempt to include tools."""
mock_toolkit = MagicMock()
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
with (
patch("timmy.agent.Agent") as MockAgent,
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb"),
patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit),
patch("timmy.agent.settings") as mock_settings,
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.1", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
):
mock_settings.ollama_model = "llama3.1"
mock_settings.ollama_url = "http://localhost:11434"
mock_settings.ollama_num_ctx = 4096
mock_settings.timmy_model_backend = "ollama"
mock_settings.telemetry_enabled = False
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
assert mock_toolkit in kwargs["tools"]
def test_create_timmy_no_unsupported_agent_kwargs():
"""Regression guard: show_tool_calls and tool_call_limit are not valid agno 2.x params.
These were removed in f95c960 (Feb 26) and must not be reintroduced.
"""
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
with (
patch("timmy.agent.Agent") as MockAgent,
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb"),
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
assert "show_tool_calls" not in kwargs, "show_tool_calls is not a valid Agent param"
def test_create_timmy_no_extra_kwargs():
"""All kwargs passed to Agent() must be from the known-valid set.
agno is mocked globally in conftest, so we can't inspect the real class here.
Instead, maintain an explicit allowlist of params validated against agno 2.5.5.
If a new param is needed, verify it exists in agno first, then add it here.
"""
VALID_AGENT_KWARGS = {
"name",
"model",
"db",
"description",
"add_history_to_context",
"num_history_runs",
"markdown",
"tools",
"tool_call_limit",
"telemetry",
}
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
with (
patch("timmy.agent.Agent") as MockAgent,
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb"),
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
invalid = set(kwargs.keys()) - VALID_AGENT_KWARGS
assert not invalid, (
f"Unknown Agent kwargs {invalid} — verify they exist in agno "
f"before adding to VALID_AGENT_KWARGS"
)
# ── skip_mcp flag (#72) ─────────────────────────────────────────────────────
def test_create_timmy_skip_mcp_omits_mcp_tools():
"""create_timmy(skip_mcp=True) must not add MCP tool servers."""
with (
patch("timmy.agent.Agent"),
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb"),
patch("timmy.mcp_tools.create_gitea_mcp_tools") as mock_gitea_mcp,
patch("timmy.mcp_tools.create_filesystem_mcp_tools") as mock_fs_mcp,
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
):
from timmy.agent import create_timmy
create_timmy(skip_mcp=True)
# MCP factory functions should never be called
mock_gitea_mcp.assert_not_called()
mock_fs_mcp.assert_not_called()
def test_create_timmy_default_includes_mcp_tools():
"""create_timmy() without skip_mcp should attempt MCP tool creation."""
with (
patch("timmy.agent.Agent"),
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb"),
patch("timmy.mcp_tools.create_gitea_mcp_tools", return_value=None) as mock_gitea_mcp,
patch("timmy.mcp_tools.create_filesystem_mcp_tools", return_value=None) as mock_fs_mcp,
patch("timmy.agent._resolve_model_with_fallback", return_value=("llama3.1", False)),
patch("timmy.agent._check_model_available", return_value=True),
patch("timmy.agent._warmup_model", return_value=True),
):
from timmy.agent import create_timmy
create_timmy(skip_mcp=False)
# MCP factories should be called when skip_mcp is False
mock_gitea_mcp.assert_called_once()
mock_fs_mcp.assert_called_once()
# ── Configurable fallback chain tests ────────────────────────────────────────
def test_settings_has_fallback_model_lists():
"""settings.fallback_models and vision_fallback_models exist and are lists."""
from config import settings
assert isinstance(settings.fallback_models, list)
assert isinstance(settings.vision_fallback_models, list)
assert len(settings.fallback_models) > 0
assert len(settings.vision_fallback_models) > 0
def test_resolve_model_uses_configurable_text_fallback():
"""_resolve_model_with_fallback walks settings.fallback_models for text models."""
with patch("timmy.agent.settings") as mock_settings:
mock_settings.ollama_model = "nonexistent-model"
mock_settings.fallback_models = ["custom-a", "custom-b"]
mock_settings.vision_fallback_models = ["vision-a"]
# First model in chain is available
with patch("timmy.agent._check_model_available", side_effect=lambda m: m == "custom-a"):
from timmy.agent import _resolve_model_with_fallback
model, is_fallback = _resolve_model_with_fallback(
requested_model="nonexistent-model",
require_vision=False,
auto_pull=False,
)
assert model == "custom-a"
assert is_fallback is True
def test_resolve_model_uses_configurable_vision_fallback():
"""_resolve_model_with_fallback walks settings.vision_fallback_models for vision."""
with patch("timmy.agent.settings") as mock_settings:
mock_settings.ollama_model = "nonexistent-model"
mock_settings.fallback_models = ["text-a"]
mock_settings.vision_fallback_models = ["vision-x", "vision-y"]
with patch("timmy.agent._check_model_available", side_effect=lambda m: m == "vision-y"):
from timmy.agent import _resolve_model_with_fallback
model, is_fallback = _resolve_model_with_fallback(
requested_model="nonexistent-model",
require_vision=True,
auto_pull=False,
)
assert model == "vision-y"
assert is_fallback is True
def test_get_effective_ollama_model_walks_fallback_chain():
"""get_effective_ollama_model uses settings.fallback_models."""
with (
patch("config.settings") as mock_settings,
patch("config.check_ollama_model_available", side_effect=lambda m: m == "fb-2") as _,
):
mock_settings.ollama_model = "gone-model"
mock_settings.ollama_url = "http://localhost:11434"
mock_settings.fallback_models = ["fb-1", "fb-2", "fb-3"]
from config import get_effective_ollama_model
result = get_effective_ollama_model()
assert result == "fb-2"
# ── _build_tools_list ─────────────────────────────────────────────────────
def test_build_tools_list_empty_when_tools_disabled():
"""Small models get an empty tools list."""
from timmy.agent import _build_tools_list
result = _build_tools_list(use_tools=False, skip_mcp=False, model_name="llama3.2")
assert result == []
def test_build_tools_list_includes_toolkit_when_enabled():
"""Tool-capable models get the full toolkit."""
mock_toolkit = MagicMock()
with patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit):
from timmy.agent import _build_tools_list
result = _build_tools_list(use_tools=True, skip_mcp=True, model_name="llama3.1")
assert mock_toolkit in result
def test_build_tools_list_skips_mcp_when_flagged():
"""skip_mcp=True must not call MCP factories."""
mock_toolkit = MagicMock()
with (
patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit),
patch("timmy.mcp_tools.create_gitea_mcp_tools") as mock_gitea,
patch("timmy.mcp_tools.create_filesystem_mcp_tools") as mock_fs,
):
from timmy.agent import _build_tools_list
_build_tools_list(use_tools=True, skip_mcp=True, model_name="llama3.1")
mock_gitea.assert_not_called()
mock_fs.assert_not_called()
def test_build_tools_list_includes_mcp_when_not_skipped():
"""skip_mcp=False should attempt MCP tool creation."""
mock_toolkit = MagicMock()
with (
patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit),
patch("timmy.mcp_tools.create_gitea_mcp_tools", return_value=None) as mock_gitea,
patch("timmy.mcp_tools.create_filesystem_mcp_tools", return_value=None) as mock_fs,
):
from timmy.agent import _build_tools_list
_build_tools_list(use_tools=True, skip_mcp=False, model_name="llama3.1")
mock_gitea.assert_called_once()
mock_fs.assert_called_once()
# ── _build_prompt ─────────────────────────────────────────────────────────
def test_build_prompt_includes_base_prompt():
"""Prompt should always contain the base system prompt."""
from timmy.agent import _build_prompt
result = _build_prompt(use_tools=False, session_id="test")
assert "Timmy" in result
def test_build_prompt_appends_memory_context():
"""Memory context should be appended when available."""
mock_memory = MagicMock()
mock_memory.get_system_context.return_value = "User prefers dark mode."
with patch("timmy.memory_system.memory_system", mock_memory):
from timmy.agent import _build_prompt
result = _build_prompt(use_tools=True, session_id="test")
assert "GROUNDED CONTEXT" in result
assert "dark mode" in result
def test_build_prompt_truncates_long_memory():
"""Long memory context should be truncated."""
mock_memory = MagicMock()
mock_memory.get_system_context.return_value = "x" * 10000
with patch("timmy.memory_system.memory_system", mock_memory):
from timmy.agent import _build_prompt
result = _build_prompt(use_tools=False, session_id="test")
assert "[truncated]" in result
def test_build_prompt_survives_memory_failure():
"""Prompt should fall back to base when memory fails."""
mock_memory = MagicMock()
mock_memory.get_system_context.side_effect = RuntimeError("db locked")
with patch("timmy.memory_system.memory_system", mock_memory):
from timmy.agent import _build_prompt
result = _build_prompt(use_tools=True, session_id="test")
assert "Timmy" in result
# Memory context should NOT be appended (the db locked error was caught)
assert "db locked" not in result
# ── _create_ollama_agent ──────────────────────────────────────────────────
def test_create_ollama_agent_passes_correct_kwargs():
"""_create_ollama_agent must pass the expected kwargs to Agent."""
with (
patch("timmy.agent.Agent") as MockAgent,
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb"),
patch("timmy.agent._warmup_model", return_value=True),
):
from timmy.agent import _create_ollama_agent
_create_ollama_agent(
db_file="test.db",
model_name="llama3.1",
tools_list=[MagicMock()],
full_prompt="test prompt",
use_tools=True,
)
kwargs = MockAgent.call_args.kwargs
assert kwargs["description"] == "test prompt"
assert kwargs["markdown"] is False
def test_create_ollama_agent_none_tools_when_empty():
"""Empty tools_list should pass tools=None to Agent."""
with (
patch("timmy.agent.Agent") as MockAgent,
patch("timmy.agent.Ollama"),
patch("timmy.agent.SqliteDb"),
patch("timmy.agent._warmup_model", return_value=True),
):
from timmy.agent import _create_ollama_agent
_create_ollama_agent(
db_file="test.db",
model_name="llama3.2",
tools_list=[],
full_prompt="test prompt",
use_tools=False,
)
kwargs = MockAgent.call_args.kwargs
assert kwargs["tools"] is None
def test_no_hardcoded_fallback_constants_in_agent():
"""agent.py must not define module-level DEFAULT_MODEL_FALLBACKS."""
import timmy.agent as agent_mod
assert not hasattr(agent_mod, "DEFAULT_MODEL_FALLBACKS"), (
"Hardcoded DEFAULT_MODEL_FALLBACKS still exists — use settings.fallback_models"
)
assert not hasattr(agent_mod, "VISION_MODEL_FALLBACKS"), (
"Hardcoded VISION_MODEL_FALLBACKS still exists — use settings.vision_fallback_models"
)