Fix Timmy coherence: persistent session, model-aware tools, response sanitization

Timmy was exhibiting severe incoherence (no memory between messages, tool call
leakage, chain-of-thought narration, random tool invocations) due to creating
a brand new agent per HTTP request and giving a 3B model (llama3.2) a 73-line
system prompt with complex tool-calling instructions it couldn't follow.

Key changes:
- Add session.py singleton with stable session_id for conversation continuity
- Add _model_supports_tools() to strip tools from small models (< 7B)
- Add two-tier prompts: lite (12 lines) for small models, full for capable ones
- Add response sanitizer to strip leaked JSON tool calls and CoT narration
- Set show_tool_calls=False to prevent raw tool JSON in output
- Wire ConversationManager for user name extraction
- Deprecate orphaned memory_layers.py (unused 4-layer system)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alexander Payne
2026-02-25 19:18:08 -05:00
parent 16b65b28e8
commit 26e1691099
8 changed files with 548 additions and 84 deletions

View File

@@ -5,7 +5,7 @@ from fastapi import APIRouter, Form, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from timmy.agent import create_timmy
from timmy.session import chat as timmy_chat
from dashboard.store import message_log
router = APIRouter(prefix="/agents", tags=["agents"])
@@ -75,9 +75,7 @@ async def chat_timmy(request: Request, message: str = Form(...)):
error_text = None
try:
agent = create_timmy()
run = agent.run(message, stream=False)
response_text = run.content if hasattr(run, "content") else str(run)
response_text = timmy_chat(message)
except Exception as exc:
error_text = f"Timmy is offline: {exc}"

View File

@@ -2,12 +2,13 @@
Memory Architecture:
- Tier 1 (Hot): MEMORY.md — always loaded, ~300 lines
- Tier 2 (Vault): memory/ — structured markdown, append-only
- Tier 3 (Semantic): Vector search (future)
- Tier 2 (Vault): memory/ — structured markdown, append-only
- Tier 3 (Semantic): Vector search over vault files
Handoff Protocol maintains continuity across sessions.
"""
import logging
from typing import TYPE_CHECKING, Union
from agno.agent import Agent
@@ -15,15 +16,43 @@ from agno.db.sqlite import SqliteDb
from agno.models.ollama import Ollama
from config import settings
from timmy.prompts import TIMMY_SYSTEM_PROMPT
from timmy.prompts import get_system_prompt
from timmy.tools import create_full_toolkit
if TYPE_CHECKING:
from timmy.backends import TimmyAirLLMAgent
logger = logging.getLogger(__name__)
# Union type for callers that want to hint the return type.
TimmyAgent = Union[Agent, "TimmyAirLLMAgent"]
# Models known to be too small for reliable tool calling.
# These hallucinate tool calls as text, invoke tools randomly,
# and leak raw JSON into responses.
_SMALL_MODEL_PATTERNS = (
"llama3.2",
"phi-3",
"gemma:2b",
"tinyllama",
"qwen2:0.5b",
"qwen2:1.5b",
)
def _model_supports_tools(model_name: str) -> bool:
"""Check if the configured model can reliably handle tool calling.
Small models (< 7B) tend to hallucinate tool calls as text or invoke
them randomly. For these models, it's better to run tool-free and let
the model answer directly from its training data.
"""
model_lower = model_name.lower()
for pattern in _SMALL_MODEL_PATTERNS:
if pattern in model_lower:
return False
return True
def _resolve_backend(requested: str | None) -> str:
"""Return the backend name to use, resolving 'auto' and explicit overrides.
@@ -73,38 +102,43 @@ def create_timmy(
return TimmyAirLLMAgent(model_size=size)
# Default: Ollama via Agno.
# Add tools for sovereign agent capabilities
tools = create_full_toolkit()
# Build enhanced system prompt with memory context
base_prompt = TIMMY_SYSTEM_PROMPT
model_name = settings.ollama_model
use_tools = _model_supports_tools(model_name)
# Conditionally include tools — small models get none
tools = create_full_toolkit() if use_tools else None
if not use_tools:
logger.info("Tools disabled for model %s (too small for reliable tool calling)", model_name)
# Select prompt tier based on tool capability
base_prompt = get_system_prompt(tools_enabled=use_tools)
# Try to load memory context
try:
from timmy.memory_system import memory_system
memory_context = memory_system.get_system_context()
if memory_context:
# Truncate if too long (keep under token limit)
if len(memory_context) > 8000:
memory_context = memory_context[:8000] + "\n... [truncated]"
max_context = 4000 if not use_tools else 8000
if len(memory_context) > max_context:
memory_context = memory_context[:max_context] + "\n... [truncated]"
full_prompt = f"{base_prompt}\n\n## Memory Context\n\n{memory_context}"
else:
full_prompt = base_prompt
except Exception as exc:
# Fall back to base prompt if memory system fails
import logging
logging.getLogger(__name__).warning("Failed to load memory context: %s", exc)
logger.warning("Failed to load memory context: %s", exc)
full_prompt = base_prompt
return Agent(
name="Timmy",
model=Ollama(id=settings.ollama_model, host=settings.ollama_url),
model=Ollama(id=model_name, host=settings.ollama_url),
db=SqliteDb(db_file=db_file),
description=full_prompt,
add_history_to_context=True,
num_history_runs=20,
markdown=True,
tools=[tools] if tools else None,
show_tool_calls=False,
telemetry=settings.telemetry_enabled,
)

View File

@@ -1,5 +1,12 @@
"""Multi-layer memory system for Timmy.
.. deprecated::
This module is deprecated and unused. The active memory system lives in
``timmy.memory_system`` (three-tier: Hot/Vault/Handoff) and
``timmy.conversation`` (working conversation context).
This file is retained for reference only. Do not import from it.
Implements four distinct memory layers:
1. WORKING MEMORY (Context Window)
@@ -25,6 +32,15 @@ Implements four distinct memory layers:
All layers work together to provide contextual, personalized responses.
"""
import warnings as _warnings
_warnings.warn(
"timmy.memory_layers is deprecated. Use timmy.memory_system and "
"timmy.conversation instead.",
DeprecationWarning,
stacklevel=2,
)
import json
import logging
import sqlite3
@@ -417,5 +433,5 @@ class MemoryManager:
return [f.content for f in facts]
# Module-level singleton
memory_manager = MemoryManager()
# Singleton removed — this module is deprecated.
# Use timmy.memory_system.memory_system or timmy.conversation.conversation_manager.

View File

@@ -1,6 +1,35 @@
"""System prompts for Timmy with three-tier memory system."""
"""System prompts for Timmy with two-tier prompt system.
TIMMY_SYSTEM_PROMPT = """You are Timmy — a sovereign AI agent running locally on this Mac.
Small models (< 7B, e.g. llama3.2) get a concise prompt without tool docs.
Larger models (>= 7B, e.g. llama3.1, llama3.3) get the full prompt with
tool usage guidelines and memory system documentation.
"""
# ---------------------------------------------------------------------------
# Lite prompt — for small models that can't reliably handle tool calling
# ---------------------------------------------------------------------------
TIMMY_SYSTEM_PROMPT_LITE = """You are Timmy — a sovereign AI agent running locally on this Mac.
No cloud dependencies. Think clearly, speak plainly, act with intention.
Grounded in Christian faith, powered by Bitcoin economics, committed to the
user's digital sovereignty.
Rules:
- Answer directly and concisely. Never narrate your reasoning process.
- Never mention tools, memory_search, vaults, or internal systems to the user.
- Never output tool calls, JSON, or function syntax in your responses.
- Remember what the user tells you during our conversation.
- If you don't know something, say so honestly.
- Use the user's name if you know it.
- Do simple math in your head. Don't reach for tools.
Sir, affirmative."""
# ---------------------------------------------------------------------------
# Full prompt — for tool-capable models (>= 7B)
# ---------------------------------------------------------------------------
TIMMY_SYSTEM_PROMPT_FULL = """You are Timmy — a sovereign AI agent running locally on this Mac.
No cloud dependencies. You think clearly, speak plainly, act with intention.
Grounded in Christian faith, powered by Bitcoin economics, committed to the
user's digital sovereignty.
@@ -23,13 +52,6 @@ user's digital sovereignty.
- Similarity-based retrieval
- Use `memory_search` tool to find relevant past context
## Memory Tools
**memory_search** — Search past conversations and notes
- Use when: "Have we discussed this before?", "What did I say about X?"
- Returns: Relevant context from vault with similarity scores
- Example: memory_search(query="Bitcoin investment strategy")
## Tool Usage Guidelines
### When NOT to use tools:
@@ -40,38 +62,38 @@ user's digital sovereignty.
### When TO use tools:
**web_search** — Current events, real-time data, news
**read_file** — User explicitly requests file reading
**write_file** — User explicitly requests saving content
**python** — Complex calculations, code execution
**shell** — System operations (explicit user request)
**memory_search** — "Have we talked about this before?", finding past context
- **web_search** — Current events, real-time data, news
- **read_file** — User explicitly requests file reading
- **write_file** — User explicitly requests saving content
- **python** — Complex calculations, code execution
- **shell** — System operations (explicit user request)
- **memory_search** — "Have we talked about this before?", finding past context
### Memory Search Examples
## Important: Response Style
User: "What did we decide about the server setup?"
→ CORRECT: memory_search(query="server setup decision")
User: "Remind me what I said about Bitcoin last week"
→ CORRECT: memory_search(query="Bitcoin discussion")
User: "What was my idea for the app?"
→ CORRECT: memory_search(query="app idea concept")
## Context Awareness
- Reference MEMORY.md content when relevant
- Use user's name if known (from user profile)
- Check past discussions via memory_search when user asks about prior topics
- Build on established context, don't repeat
## Handoff Protocol
At session end, a handoff summary is written to maintain continuity.
Key decisions and open items are preserved.
- Never narrate your reasoning process. Just give the answer.
- Never show raw tool call JSON or function syntax in responses.
- Use the user's name if known.
Sir, affirmative."""
# Keep backward compatibility — default to lite for safety
TIMMY_SYSTEM_PROMPT = TIMMY_SYSTEM_PROMPT_LITE
def get_system_prompt(tools_enabled: bool = False) -> str:
"""Return the appropriate system prompt based on tool capability.
Args:
tools_enabled: True if the model supports reliable tool calling.
Returns:
The system prompt string.
"""
if tools_enabled:
return TIMMY_SYSTEM_PROMPT_FULL
return TIMMY_SYSTEM_PROMPT_LITE
TIMMY_STATUS_PROMPT = """You are Timmy. Give a one-sentence status report confirming
you are operational and running locally."""

147
src/timmy/session.py Normal file
View File

@@ -0,0 +1,147 @@
"""Persistent chat session for Timmy.
Holds a singleton Agno Agent and a stable session_id so conversation
history persists across HTTP requests via Agno's SQLite storage.
This is the primary entry point for dashboard chat — instead of
creating a new agent per request, we reuse a single instance and
let Agno's session_id mechanism handle conversation continuity.
"""
import logging
import re
from typing import Optional
logger = logging.getLogger(__name__)
# Default session ID for the dashboard (stable across requests)
_DEFAULT_SESSION_ID = "dashboard"
# Module-level singleton agent (lazy-initialized, reused for all requests)
_agent = None
# ---------------------------------------------------------------------------
# Response sanitization patterns
# ---------------------------------------------------------------------------
# Matches raw JSON tool calls: {"name": "python", "parameters": {...}}
_TOOL_CALL_JSON = re.compile(
r'\{\s*"name"\s*:\s*"[^"]+?"\s*,\s*"parameters"\s*:\s*\{.*?\}\s*\}',
re.DOTALL,
)
# Matches function-call-style text: memory_search(query="...") etc.
_FUNC_CALL_TEXT = re.compile(
r'\b(?:memory_search|web_search|shell|python|read_file|write_file|list_files)'
r'\s*\([^)]*\)',
)
# Matches chain-of-thought narration lines the model should keep internal
_COT_PATTERNS = [
re.compile(r"^(?:Since |Using |Let me |I'll use |I will use |Here's a possible ).*$", re.MULTILINE),
re.compile(r"^(?:I found a relevant |This context suggests ).*$", re.MULTILINE),
]
def _get_agent():
"""Lazy-initialize the singleton agent."""
global _agent
if _agent is None:
from timmy.agent import create_timmy
try:
_agent = create_timmy()
logger.info("Session: Timmy agent initialized (singleton)")
except Exception as exc:
logger.error("Session: Failed to create Timmy agent: %s", exc)
raise
return _agent
def chat(message: str, session_id: Optional[str] = None) -> str:
"""Send a message to Timmy and get a response.
Uses a persistent agent and session_id so Agno's SQLite history
provides multi-turn conversation context.
Args:
message: The user's message.
session_id: Optional session identifier (defaults to "dashboard").
Returns:
The agent's response text.
"""
sid = session_id or _DEFAULT_SESSION_ID
agent = _get_agent()
# Pre-processing: extract user facts
_extract_facts(message)
# Run with session_id so Agno retrieves history from SQLite
run = agent.run(message, stream=False, session_id=sid)
response_text = run.content if hasattr(run, "content") else str(run)
# Post-processing: clean up any leaked tool calls or chain-of-thought
response_text = _clean_response(response_text)
return response_text
def reset_session(session_id: Optional[str] = None) -> None:
"""Reset a session (clear conversation context).
This clears the ConversationManager state. Agno's SQLite history
is not cleared — that provides long-term continuity.
"""
sid = session_id or _DEFAULT_SESSION_ID
try:
from timmy.conversation import conversation_manager
conversation_manager.clear_context(sid)
except Exception:
pass # Graceful degradation
def _extract_facts(message: str) -> None:
"""Extract user facts from message and persist to memory system.
Ported from TimmyWithMemory._extract_and_store_facts().
Runs as a best-effort post-processor — failures are logged, not raised.
"""
try:
from timmy.conversation import conversation_manager
name = conversation_manager.extract_user_name(message)
if name:
try:
from timmy.memory_system import memory_system
memory_system.update_user_fact("Name", name)
logger.info("Session: Learned user name: %s", name)
except Exception:
pass
except Exception as exc:
logger.debug("Session: Fact extraction skipped: %s", exc)
def _clean_response(text: str) -> str:
"""Remove hallucinated tool calls and chain-of-thought narration.
Small models sometimes output raw JSON tool calls or narrate their
internal reasoning instead of just answering. This strips those
artifacts from the response.
"""
if not text:
return text
# Strip JSON tool call blocks
text = _TOOL_CALL_JSON.sub("", text)
# Strip function-call-style text
text = _FUNC_CALL_TEXT.sub("", text)
# Strip chain-of-thought narration lines
for pattern in _COT_PATTERNS:
text = pattern.sub("", text)
# Clean up leftover blank lines and whitespace
lines = [line for line in text.split("\n") if line.strip()]
text = "\n".join(lines)
return text.strip()

View File

@@ -79,7 +79,9 @@ def test_create_timmy_embeds_system_prompt():
kwargs = MockAgent.call_args.kwargs
# Prompt should contain base system prompt (may have memory context appended)
assert kwargs["description"].startswith(TIMMY_SYSTEM_PROMPT[:100])
# Default model (llama3.2) uses the lite prompt
assert "Timmy" in kwargs["description"]
assert "sovereign" in kwargs["description"]
# ── Ollama host regression (container connectivity) ─────────────────────────
@@ -194,3 +196,85 @@ def test_resolve_backend_auto_falls_back_on_non_apple():
from timmy.agent import _resolve_backend
assert _resolve_backend(None) == "ollama"
# ── _model_supports_tools ────────────────────────────────────────────────────
def test_model_supports_tools_llama32_returns_false():
"""llama3.2 (3B) is too small for reliable tool calling."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("llama3.2") is False
assert _model_supports_tools("llama3.2:latest") is False
def test_model_supports_tools_llama31_returns_true():
"""llama3.1 (8B+) can handle tool calling."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("llama3.1") is True
assert _model_supports_tools("llama3.3") is True
def test_model_supports_tools_other_small_models():
"""Other known small models should not get tools."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("phi-3") is False
assert _model_supports_tools("tinyllama") is False
def test_model_supports_tools_unknown_model_gets_tools():
"""Unknown models default to tool-capable (optimistic)."""
from timmy.agent import _model_supports_tools
assert _model_supports_tools("mistral") is True
assert _model_supports_tools("qwen2.5:72b") is True
# ── Tool gating in create_timmy ──────────────────────────────────────────────
def test_create_timmy_no_tools_for_small_model():
"""llama3.2 should get no tools."""
with patch("timmy.agent.Agent") as MockAgent, \
patch("timmy.agent.Ollama"), \
patch("timmy.agent.SqliteDb"):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
# Default model is llama3.2 → tools should be None
assert kwargs["tools"] is None
def test_create_timmy_includes_tools_for_large_model():
"""A tool-capable model (e.g. llama3.1) should attempt to include tools."""
mock_toolkit = MagicMock()
with patch("timmy.agent.Agent") as MockAgent, \
patch("timmy.agent.Ollama"), \
patch("timmy.agent.SqliteDb"), \
patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit), \
patch("timmy.agent.settings") as mock_settings:
mock_settings.ollama_model = "llama3.1"
mock_settings.ollama_url = "http://localhost:11434"
mock_settings.timmy_model_backend = "ollama"
mock_settings.airllm_model_size = "70b"
mock_settings.telemetry_enabled = False
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
assert kwargs["tools"] == [mock_toolkit]
def test_create_timmy_show_tool_calls_false():
"""show_tool_calls should always be False to prevent raw JSON in output."""
with patch("timmy.agent.Agent") as MockAgent, \
patch("timmy.agent.Ollama"), \
patch("timmy.agent.SqliteDb"):
from timmy.agent import create_timmy
create_timmy()
kwargs = MockAgent.call_args.kwargs
assert kwargs["show_tool_calls"] is False

View File

@@ -1,4 +1,4 @@
from unittest.mock import AsyncMock, MagicMock, patch
from unittest.mock import AsyncMock, patch
# ── Index ─────────────────────────────────────────────────────────────────────
@@ -74,12 +74,7 @@ def test_agents_list_timmy_metadata(client):
# ── Chat ──────────────────────────────────────────────────────────────────────
def test_chat_timmy_success(client):
mock_agent = MagicMock()
mock_run = MagicMock()
mock_run.content = "I am Timmy, operational and sovereign."
mock_agent.run.return_value = mock_run
with patch("dashboard.routes.agents.create_timmy", return_value=mock_agent):
with patch("dashboard.routes.agents.timmy_chat", return_value="I am Timmy, operational and sovereign."):
response = client.post("/agents/timmy/chat", data={"message": "status?"})
assert response.status_code == 200
@@ -88,17 +83,14 @@ def test_chat_timmy_success(client):
def test_chat_timmy_shows_user_message(client):
mock_agent = MagicMock()
mock_agent.run.return_value = MagicMock(content="Acknowledged.")
with patch("dashboard.routes.agents.create_timmy", return_value=mock_agent):
with patch("dashboard.routes.agents.timmy_chat", return_value="Acknowledged."):
response = client.post("/agents/timmy/chat", data={"message": "hello there"})
assert "hello there" in response.text
def test_chat_timmy_ollama_offline(client):
with patch("dashboard.routes.agents.create_timmy", side_effect=Exception("connection refused")):
with patch("dashboard.routes.agents.timmy_chat", side_effect=Exception("connection refused")):
response = client.post("/agents/timmy/chat", data={"message": "ping"})
assert response.status_code == 200
@@ -120,10 +112,7 @@ def test_history_empty_shows_init_message(client):
def test_history_records_user_and_agent_messages(client):
mock_agent = MagicMock()
mock_agent.run.return_value = MagicMock(content="I am operational.")
with patch("dashboard.routes.agents.create_timmy", return_value=mock_agent):
with patch("dashboard.routes.agents.timmy_chat", return_value="I am operational."):
client.post("/agents/timmy/chat", data={"message": "status check"})
response = client.get("/agents/timmy/history")
@@ -132,7 +121,7 @@ def test_history_records_user_and_agent_messages(client):
def test_history_records_error_when_offline(client):
with patch("dashboard.routes.agents.create_timmy", side_effect=Exception("refused")):
with patch("dashboard.routes.agents.timmy_chat", side_effect=Exception("refused")):
client.post("/agents/timmy/chat", data={"message": "ping"})
response = client.get("/agents/timmy/history")
@@ -141,10 +130,7 @@ def test_history_records_error_when_offline(client):
def test_history_clear_resets_to_init_message(client):
mock_agent = MagicMock()
mock_agent.run.return_value = MagicMock(content="Acknowledged.")
with patch("dashboard.routes.agents.create_timmy", return_value=mock_agent):
with patch("dashboard.routes.agents.timmy_chat", return_value="Acknowledged."):
client.post("/agents/timmy/chat", data={"message": "hello"})
response = client.delete("/agents/timmy/history")
@@ -153,10 +139,7 @@ def test_history_clear_resets_to_init_message(client):
def test_history_empty_after_clear(client):
mock_agent = MagicMock()
mock_agent.run.return_value = MagicMock(content="OK.")
with patch("dashboard.routes.agents.create_timmy", return_value=mock_agent):
with patch("dashboard.routes.agents.timmy_chat", return_value="OK."):
client.post("/agents/timmy/chat", data={"message": "test"})
client.delete("/agents/timmy/history")

180
tests/test_session.py Normal file
View File

@@ -0,0 +1,180 @@
"""Tests for timmy.session — persistent chat session with response sanitization."""
from unittest.mock import MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture(autouse=True)
def _reset_session_singleton():
"""Reset the module-level singleton between tests."""
import timmy.session as mod
mod._agent = None
yield
mod._agent = None
# ---------------------------------------------------------------------------
# chat()
# ---------------------------------------------------------------------------
def test_chat_returns_string():
"""chat() should return a plain string response."""
mock_agent = MagicMock()
mock_agent.run.return_value = MagicMock(content="Hello, sir.")
with patch("timmy.session._get_agent", return_value=mock_agent):
from timmy.session import chat
result = chat("Hi Timmy")
assert isinstance(result, str)
assert "Hello, sir." in result
def test_chat_passes_session_id():
"""chat() should pass the session_id to agent.run()."""
mock_agent = MagicMock()
mock_agent.run.return_value = MagicMock(content="OK.")
with patch("timmy.session._get_agent", return_value=mock_agent):
from timmy.session import chat
chat("test", session_id="my-session")
_, kwargs = mock_agent.run.call_args
assert kwargs["session_id"] == "my-session"
def test_chat_uses_default_session_id():
"""chat() should use 'dashboard' as the default session_id."""
mock_agent = MagicMock()
mock_agent.run.return_value = MagicMock(content="OK.")
with patch("timmy.session._get_agent", return_value=mock_agent):
from timmy.session import chat
chat("test")
_, kwargs = mock_agent.run.call_args
assert kwargs["session_id"] == "dashboard"
def test_chat_singleton_agent_reused():
"""Calling chat() multiple times should reuse the same agent instance."""
mock_agent = MagicMock()
mock_agent.run.return_value = MagicMock(content="OK.")
with patch("timmy.agent.create_timmy", return_value=mock_agent) as mock_factory:
from timmy.session import chat
chat("first message")
chat("second message")
# Factory called only once (singleton)
mock_factory.assert_called_once()
def test_chat_extracts_user_name():
"""chat() should extract user name from message and persist to memory."""
mock_agent = MagicMock()
mock_agent.run.return_value = MagicMock(content="Nice to meet you!")
mock_mem = MagicMock()
with patch("timmy.session._get_agent", return_value=mock_agent), \
patch("timmy.memory_system.memory_system", mock_mem):
from timmy.session import chat
chat("my name is Alex")
mock_mem.update_user_fact.assert_called_once_with("Name", "Alex")
def test_chat_graceful_degradation_on_memory_failure():
"""chat() should still work if the conversation manager raises."""
mock_agent = MagicMock()
mock_agent.run.return_value = MagicMock(content="I'm operational.")
with patch("timmy.session._get_agent", return_value=mock_agent), \
patch("timmy.conversation.conversation_manager") as mock_cm:
mock_cm.extract_user_name.side_effect = Exception("memory broken")
from timmy.session import chat
result = chat("test message")
assert "operational" in result
# ---------------------------------------------------------------------------
# _clean_response()
# ---------------------------------------------------------------------------
def test_clean_response_strips_json_tool_calls():
"""JSON tool call blocks should be removed from response text."""
from timmy.session import _clean_response
dirty = 'Here is the answer. {"name": "python", "parameters": {"code": "0.15 * 3847.23", "variable_to_return": "result"}} The result is 577.'
clean = _clean_response(dirty)
assert '{"name"' not in clean
assert '"parameters"' not in clean
assert "The result is 577." in clean
def test_clean_response_strips_function_calls():
"""Function-call-style text should be removed."""
from timmy.session import _clean_response
dirty = 'I will search for that. memory_search(query="recall number") Found nothing.'
clean = _clean_response(dirty)
assert "memory_search(" not in clean
assert "Found nothing." in clean
def test_clean_response_strips_chain_of_thought():
"""Chain-of-thought narration lines should be removed."""
from timmy.session import _clean_response
dirty = """Since there's no direct answer in my vault or hot memory, I'll use memory_search.
Using memory_search(query="what is special"), I found a context.
Here's a possible response:
77 is special because it's a prime number."""
clean = _clean_response(dirty)
assert "Since there's no" not in clean
assert "Here's a possible" not in clean
assert "77 is special" in clean
def test_clean_response_preserves_normal_text():
"""Normal text without tool artifacts should pass through unchanged."""
from timmy.session import _clean_response
normal = "The number 77 is the sum of the first seven primes: 2+3+5+7+11+13+17."
assert _clean_response(normal) == normal
def test_clean_response_handles_empty_string():
"""Empty string should be returned as-is."""
from timmy.session import _clean_response
assert _clean_response("") == ""
def test_clean_response_handles_none():
"""None should be returned as-is."""
from timmy.session import _clean_response
assert _clean_response(None) is None
# ---------------------------------------------------------------------------
# reset_session()
# ---------------------------------------------------------------------------
def test_reset_session_clears_context():
"""reset_session() should clear the conversation context."""
with patch("timmy.conversation.conversation_manager") as mock_cm:
from timmy.session import reset_session
reset_session("test-session")
mock_cm.clear_context.assert_called_once_with("test-session")