Files
Timmy-time-dashboard/tests/timmy/test_agent.py

325 lines
11 KiB
Python
Raw Normal View History

from unittest.mock import MagicMock, patch
def test_create_timmy_returns_agent():
"""create_timmy should delegate to Agno Agent with correct config."""
with patch("timmy.agent.Agent") as MockAgent, patch("timmy.agent.Ollama"), patch(
"timmy.agent.SqliteDb"
):
mock_instance = MagicMock()
MockAgent.return_value = mock_instance
from timmy.agent import create_timmy
result = create_timmy()
assert result is mock_instance
MockAgent.assert_called_once()
def test_create_timmy_agent_name():
with patch("timmy.agent.Agent") as MockAgent, patch("timmy.agent.Ollama"), patch(
"timmy.agent.SqliteDb"
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
Claude/remove persona system f vgt m (#126) * Remove persona system, identity, and all Timmy references Strip the codebase to pure orchestration logic: - Delete TIMMY_IDENTITY.md and memory/self/identity.md - Gut brain/identity.py to no-op stubs (empty returns) - Remove all system prompts reinforcing Timmy's character, faith, sovereignty, sign-off ("Sir, affirmative"), and agent roster - Replace identity-laden prompts with generic local-AI-assistant prompts - Remove "You work for Timmy" from all sub-agent system prompts - Rename PersonaTools → AgentTools, PERSONA_TOOLKITS → AGENT_TOOLKITS - Replace "timmy" agent ID with "orchestrator" across routes, marketplace, tools catalog, and orchestrator class - Strip Timmy references from config comments, templates, telegram bot, chat API, and dashboard UI - Delete tests/brain/test_identity.py entirely - Fix all test assertions that checked for persona identity content 729 tests pass (2 pre-existing failures in test_calm.py unrelated). https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy * Add Taskosaur (PM + AI task execution) to docker-compose Spins up Taskosaur alongside the dashboard on `docker compose up`: - postgres:16-alpine (port 5432, Taskosaur DB) - redis:7-alpine (Bull queue backend) - taskosaur (ports 3000 API / 3001 UI) - dashboard now depends_on taskosaur healthy - TASKOSAUR_API_URL injected into dashboard environment Dashboard can reach Taskosaur at http://taskosaur:3000/api on the internal network. Frontend UI accessible at http://localhost:3001. https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-04 12:00:49 -05:00
assert kwargs["name"] == "Agent"
def test_create_timmy_history_config():
with patch("timmy.agent.Agent") as MockAgent, patch("timmy.agent.Ollama"), patch(
"timmy.agent.SqliteDb"
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
assert kwargs["add_history_to_context"] is True
Fine-tune Timmy's conversational AI with memory layers ## Enhanced System Prompt - Detailed tool usage guidelines with explicit examples - Clear DO and DON'T examples for tool selection - Memory system documentation - Conversation flow guidelines - Context awareness instructions ## Memory Layer System (NEW) Implemented 3-layer memory architecture: 1. WORKING MEMORY (src/timmy/memory_layers.py) - Immediate context (last 20 messages) - Topic tracking - Tool call tracking - Fast, ephemeral 2. SHORT-TERM MEMORY (Agno SQLite) - Recent conversations (100) - Persists across restarts - Managed by Agno Agent 3. LONG-TERM MEMORY (src/timmy/memory_layers.py) - Facts about user (name, preferences) - SQLite storage in data/memory/ - Auto-extraction from conversations - User profile generation ## Memory Manager (NEW) - Central coordinator for all memory layers - Context injection into prompts - Fact extraction and storage - Session management ## TimmyWithMemory Class (NEW) - Wrapper around Agno Agent with explicit memory - Auto-injects user context from LTM - Tracks exchanges across all layers - Simple chat() interface ## Agent Configuration - Increased num_history_runs: 10 -> 20 - Better conversational context retention ## Tests - All 973 tests pass - Fixed test expectations for new config - Fixed module path in test_scary_paths.py ## Files Added/Modified - src/timmy/prompts.py - Enhanced with memory and tool guidance - src/timmy/agent.py - Added TimmyWithMemory class - src/timmy/memory_layers.py - NEW memory system - src/timmy/conversation.py - NEW conversation manager - tests/ - Updated for new config
2026-02-25 18:07:44 -05:00
assert kwargs["num_history_runs"] == 20
assert kwargs["markdown"] is True
def test_create_timmy_custom_db_file():
with patch("timmy.agent.Agent"), patch("timmy.agent.Ollama"), patch(
"timmy.agent.SqliteDb"
) as MockDb:
from timmy.agent import create_timmy
create_timmy(db_file="custom.db")
MockDb.assert_called_once_with(db_file="custom.db")
def test_create_timmy_embeds_system_prompt():
from timmy.prompts import SYSTEM_PROMPT
with patch("timmy.agent.Agent") as MockAgent, patch("timmy.agent.Ollama"), patch(
"timmy.agent.SqliteDb"
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
Implement three-tier memory architecture (Hot/Vault/Handoff) This commit replaces the previous memory_layers.py with a proper three-tier memory system as specified by the user: ## Tier 1 — Hot Memory (MEMORY.md) - Single flat file always loaded into system context - Contains: current status, standing rules, agent roster, key decisions - ~300 lines max, pruned monthly - Managed by HotMemory class ## Tier 2 — Structured Vault (memory/) - Directory with three namespaces: • self/ — identity.md, user_profile.md, methodology.md • notes/ — session logs, AARs, research • aar/ — post-task retrospectives - Markdown format, Obsidian-compatible - Append-only, date-stamped - Managed by VaultMemory class ## Handoff Protocol - last-session-handoff.md written at session end - Contains: summary, key decisions, open items, next steps - Auto-loaded at next session start - Maintains continuity across resets ## Implementation ### New Files: - src/timmy/memory_system.py — Core memory system - MEMORY.md — Hot memory template - memory/self/*.md — Identity, user profile, methodology ### Modified: - src/timmy/agent.py — Integrated with memory system - create_timmy() injects memory context - TimmyWithMemory class with automatic fact extraction - tests/test_agent.py — Updated for memory context ## Key Principles - Hot memory = small and curated - Vault = append-only, never delete - Handoffs = continuity mechanism - Flat files = human-readable, portable ## Usage All 973 tests pass.
2026-02-25 18:17:43 -05:00
# Prompt should contain base system prompt (may have memory context appended)
# Default model (llama3.2) uses the lite prompt
Claude/remove persona system f vgt m (#126) * Remove persona system, identity, and all Timmy references Strip the codebase to pure orchestration logic: - Delete TIMMY_IDENTITY.md and memory/self/identity.md - Gut brain/identity.py to no-op stubs (empty returns) - Remove all system prompts reinforcing Timmy's character, faith, sovereignty, sign-off ("Sir, affirmative"), and agent roster - Replace identity-laden prompts with generic local-AI-assistant prompts - Remove "You work for Timmy" from all sub-agent system prompts - Rename PersonaTools → AgentTools, PERSONA_TOOLKITS → AGENT_TOOLKITS - Replace "timmy" agent ID with "orchestrator" across routes, marketplace, tools catalog, and orchestrator class - Strip Timmy references from config comments, templates, telegram bot, chat API, and dashboard UI - Delete tests/brain/test_identity.py entirely - Fix all test assertions that checked for persona identity content 729 tests pass (2 pre-existing failures in test_calm.py unrelated). https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy * Add Taskosaur (PM + AI task execution) to docker-compose Spins up Taskosaur alongside the dashboard on `docker compose up`: - postgres:16-alpine (port 5432, Taskosaur DB) - redis:7-alpine (Bull queue backend) - taskosaur (ports 3000 API / 3001 UI) - dashboard now depends_on taskosaur healthy - TASKOSAUR_API_URL injected into dashboard environment Dashboard can reach Taskosaur at http://taskosaur:3000/api on the internal network. Frontend UI accessible at http://localhost:3001. https://claude.ai/code/session_01LjQGUE6nk9W9674zaxrYxy --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-04 12:00:49 -05:00
assert "local AI assistant" in kwargs["description"]
# ── Ollama host regression (container connectivity) ─────────────────────────
def test_create_timmy_passes_ollama_url_to_model():
"""Regression: Ollama model must receive settings.ollama_url as host.
Without this, containers default to localhost:11434 which is unreachable
when Ollama runs on the Docker host.
"""
with patch("timmy.agent.Agent"), patch("timmy.agent.Ollama") as MockOllama, patch(
"timmy.agent.SqliteDb"
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockOllama.call_args.kwargs
assert "host" in kwargs, "Ollama() must receive host= parameter"
assert kwargs["host"] == "http://localhost:11434" # default from config
def test_create_timmy_respects_custom_ollama_url():
"""Ollama host should follow OLLAMA_URL when overridden in config."""
custom_url = "http://host.docker.internal:11434"
with patch("timmy.agent.Agent"), patch("timmy.agent.Ollama") as MockOllama, patch(
"timmy.agent.SqliteDb"
), patch("timmy.agent.settings") as mock_settings:
mock_settings.ollama_model = "llama3.2"
mock_settings.ollama_url = custom_url
mock_settings.timmy_model_backend = "ollama"
mock_settings.airllm_model_size = "70b"
from timmy.agent import create_timmy
create_timmy()
kwargs = MockOllama.call_args.kwargs
assert kwargs["host"] == custom_url
# ── AirLLM path ──────────────────────────────────────────────────────────────
def test_create_timmy_airllm_returns_airllm_agent():
"""backend='airllm' must return a TimmyAirLLMAgent, not an Agno Agent."""
with patch("timmy.backends.is_apple_silicon", return_value=False):
from timmy.agent import create_timmy
from timmy.backends import TimmyAirLLMAgent
result = create_timmy(backend="airllm", model_size="8b")
assert isinstance(result, TimmyAirLLMAgent)
def test_create_timmy_airllm_does_not_call_agno_agent():
"""When using the airllm backend, Agno Agent should never be instantiated."""
with patch("timmy.agent.Agent") as MockAgent, patch(
"timmy.backends.is_apple_silicon", return_value=False
):
from timmy.agent import create_timmy
create_timmy(backend="airllm", model_size="8b")
MockAgent.assert_not_called()
def test_create_timmy_explicit_ollama_ignores_autodetect():
"""backend='ollama' must always use Ollama, even on Apple Silicon."""
with patch("timmy.agent.Agent") as MockAgent, patch("timmy.agent.Ollama"), patch(
"timmy.agent.SqliteDb"
):
from timmy.agent import create_timmy
create_timmy(backend="ollama")
MockAgent.assert_called_once()
# ── _resolve_backend ─────────────────────────────────────────────────────────
def test_resolve_backend_explicit_takes_priority():
from timmy.agent import _resolve_backend
assert _resolve_backend("airllm") == "airllm"
assert _resolve_backend("ollama") == "ollama"
def test_resolve_backend_defaults_to_ollama_without_config():
"""Default config (timmy_model_backend='ollama') → 'ollama'."""
from timmy.agent import _resolve_backend
assert _resolve_backend(None) == "ollama"
def test_resolve_backend_auto_uses_airllm_on_apple_silicon():
"""'auto' on Apple Silicon with airllm stubbed → 'airllm'."""
with patch("timmy.backends.is_apple_silicon", return_value=True), patch(
"timmy.agent.settings"
) as mock_settings:
mock_settings.timmy_model_backend = "auto"
mock_settings.airllm_model_size = "70b"
mock_settings.ollama_model = "llama3.2"
from timmy.agent import _resolve_backend
assert _resolve_backend(None) == "airllm"
def test_resolve_backend_auto_falls_back_on_non_apple():
"""'auto' on non-Apple Silicon → 'ollama'."""
with patch("timmy.backends.is_apple_silicon", return_value=False), patch(
"timmy.agent.settings"
) as mock_settings:
mock_settings.timmy_model_backend = "auto"
mock_settings.airllm_model_size = "70b"
mock_settings.ollama_model = "llama3.2"
from timmy.agent import _resolve_backend
assert _resolve_backend(None) == "ollama"
# ── _model_supports_tools ────────────────────────────────────────────────────
def test_model_supports_tools_llama32_returns_false():
"""llama3.2 (3B) is too small for reliable tool calling."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("llama3.2") is False
assert _model_supports_tools("llama3.2:latest") is False
def test_model_supports_tools_llama31_returns_true():
"""llama3.1 (8B+) can handle tool calling."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("llama3.1") is True
assert _model_supports_tools("llama3.3") is True
def test_model_supports_tools_other_small_models():
"""Other known small models should not get tools."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("phi-3") is False
assert _model_supports_tools("tinyllama") is False
def test_model_supports_tools_unknown_model_gets_tools():
"""Unknown models default to tool-capable (optimistic)."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("mistral") is True
assert _model_supports_tools("qwen2.5:72b") is True
# ── Tool gating in create_timmy ──────────────────────────────────────────────
def test_create_timmy_no_tools_for_small_model():
"""Small models (llama3.2) should get no tools."""
mock_toolkit = MagicMock()
with patch("timmy.agent.Agent") as MockAgent, patch("timmy.agent.Ollama"), patch(
"timmy.agent.SqliteDb"
), patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit), patch(
"timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)
), patch(
"timmy.agent._check_model_available", return_value=True
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
# llama3.2 is in _SMALL_MODEL_PATTERNS → tools should be None
assert kwargs["tools"] is None
def test_create_timmy_includes_tools_for_large_model():
"""A tool-capable model (e.g. llama3.1) should attempt to include tools."""
mock_toolkit = MagicMock()
with patch("timmy.agent.Agent") as MockAgent, patch("timmy.agent.Ollama"), patch(
"timmy.agent.SqliteDb"
), patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit), patch(
"timmy.agent.settings"
) as mock_settings:
mock_settings.ollama_model = "llama3.1"
mock_settings.ollama_url = "http://localhost:11434"
mock_settings.timmy_model_backend = "ollama"
mock_settings.airllm_model_size = "70b"
mock_settings.telemetry_enabled = False
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
assert kwargs["tools"] == [mock_toolkit]
def test_create_timmy_no_unsupported_agent_kwargs():
"""Regression guard: show_tool_calls and tool_call_limit are not valid agno 2.x params.
These were removed in f95c960 (Feb 26) and must not be reintroduced.
"""
with patch("timmy.agent.Agent") as MockAgent, patch("timmy.agent.Ollama"), patch(
"timmy.agent.SqliteDb"
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
assert "show_tool_calls" not in kwargs, "show_tool_calls is not a valid Agent param"
def test_create_timmy_no_extra_kwargs():
"""All kwargs passed to Agent() must be from the known-valid set.
agno is mocked globally in conftest, so we can't inspect the real class here.
Instead, maintain an explicit allowlist of params validated against agno 2.5.5.
If a new param is needed, verify it exists in agno first, then add it here.
"""
VALID_AGENT_KWARGS = {
"name",
"model",
"db",
"description",
"add_history_to_context",
"num_history_runs",
"markdown",
"tools",
"tool_call_limit",
"telemetry",
}
with patch("timmy.agent.Agent") as MockAgent, patch("timmy.agent.Ollama"), patch(
"timmy.agent.SqliteDb"
):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
invalid = set(kwargs.keys()) - VALID_AGENT_KWARGS
assert not invalid, (
f"Unknown Agent kwargs {invalid} — verify they exist in agno "
f"before adding to VALID_AGENT_KWARGS"
)