This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/src/timmy/agent.py
Alexander Payne 3ca8e9f2d6 fix: chat evaluation bugs — task pipeline, prompt grounding, markdown rendering
Addresses 14 bugs from 3 rounds of deep chat evaluation:

- Add chat-to-task pipeline in agents.py with regex-based intent detection,
  agent extraction, priority extraction, and title cleaning
- Filter meta-questions ("how do I create a task?") from task creation
- Inject real-time date/time context into every chat message
- Inject live queue state when user asks about tasks
- Ground system prompts with agent roster, honesty guardrails, self-knowledge,
  math delegation template, anti-filler rules, values-conflict guidance
- Add CSS for markdown code blocks, inline code, lists, blockquotes in chat
- Add highlight.js CDN for syntax highlighting in chat responses
- Reduce small-model memory context budget (4000→2000) for expanded prompt
- Add 27 comprehensive tests covering the full chat-to-task pipeline

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 11:42:42 -05:00

220 lines
7.4 KiB
Python

"""Timmy agent creation with three-tier memory system.
Memory Architecture:
- Tier 1 (Hot): MEMORY.md — always loaded, ~300 lines
- Tier 2 (Vault): memory/ — structured markdown, append-only
- Tier 3 (Semantic): Vector search over vault files
Handoff Protocol maintains continuity across sessions.
"""
import logging
from typing import TYPE_CHECKING, Union
from agno.agent import Agent
from agno.db.sqlite import SqliteDb
from agno.models.ollama import Ollama
from config import settings
from timmy.prompts import get_system_prompt
from timmy.tools import create_full_toolkit
if TYPE_CHECKING:
from timmy.backends import TimmyAirLLMAgent
logger = logging.getLogger(__name__)
# Union type for callers that want to hint the return type.
TimmyAgent = Union[Agent, "TimmyAirLLMAgent"]
# Models known to be too small for reliable tool calling.
# These hallucinate tool calls as text, invoke tools randomly,
# and leak raw JSON into responses.
_SMALL_MODEL_PATTERNS = (
"llama3.2",
"phi-3",
"gemma:2b",
"tinyllama",
"qwen2:0.5b",
"qwen2:1.5b",
)
def _model_supports_tools(model_name: str) -> bool:
"""Check if the configured model can reliably handle tool calling.
Small models (< 7B) tend to hallucinate tool calls as text or invoke
them randomly. For these models, it's better to run tool-free and let
the model answer directly from its training data.
"""
model_lower = model_name.lower()
for pattern in _SMALL_MODEL_PATTERNS:
if pattern in model_lower:
return False
return True
def _resolve_backend(requested: str | None) -> str:
"""Return the backend name to use, resolving 'auto' and explicit overrides.
Priority (highest → lowest):
1. CLI flag passed directly to create_timmy()
2. TIMMY_MODEL_BACKEND env var / .env setting
3. 'ollama' (safe default — no surprises)
'auto' triggers Apple Silicon detection: uses AirLLM if both
is_apple_silicon() and airllm_available() return True.
"""
if requested is not None:
return requested
configured = settings.timmy_model_backend # "ollama" | "airllm" | "auto"
if configured != "auto":
return configured
# "auto" path — lazy import to keep startup fast and tests clean.
from timmy.backends import airllm_available, is_apple_silicon
if is_apple_silicon() and airllm_available():
return "airllm"
return "ollama"
def create_timmy(
db_file: str = "timmy.db",
backend: str | None = None,
model_size: str | None = None,
) -> TimmyAgent:
"""Instantiate Timmy — Ollama or AirLLM, same public interface either way.
Args:
db_file: SQLite file for Agno conversation memory (Ollama path only).
backend: "ollama" | "airllm" | "auto" | None (reads config/env).
model_size: AirLLM size — "8b" | "70b" | "405b" | None (reads config).
Returns an Agno Agent (Ollama) or TimmyAirLLMAgent — both expose
print_response(message, stream).
"""
resolved = _resolve_backend(backend)
size = model_size or settings.airllm_model_size
if resolved == "airllm":
from timmy.backends import TimmyAirLLMAgent
return TimmyAirLLMAgent(model_size=size)
# Default: Ollama via Agno.
model_name = settings.ollama_model
use_tools = _model_supports_tools(model_name)
# Conditionally include tools — small models get none
tools = create_full_toolkit() if use_tools else None
if not use_tools:
logger.info("Tools disabled for model %s (too small for reliable tool calling)", model_name)
# Select prompt tier based on tool capability
base_prompt = get_system_prompt(tools_enabled=use_tools)
# Try to load memory context
try:
from timmy.memory_system import memory_system
memory_context = memory_system.get_system_context()
if memory_context:
# Truncate if too long — smaller budget for small models
# since the expanded prompt (roster, guardrails) uses more tokens
max_context = 2000 if not use_tools else 8000
if len(memory_context) > max_context:
memory_context = memory_context[:max_context] + "\n... [truncated]"
full_prompt = f"{base_prompt}\n\n## Memory Context\n\n{memory_context}"
else:
full_prompt = base_prompt
except Exception as exc:
logger.warning("Failed to load memory context: %s", exc)
full_prompt = base_prompt
return Agent(
name="Timmy",
model=Ollama(id=model_name, host=settings.ollama_url),
db=SqliteDb(db_file=db_file),
description=full_prompt,
add_history_to_context=True,
num_history_runs=20,
markdown=True,
tools=[tools] if tools else None,
telemetry=settings.telemetry_enabled,
)
class TimmyWithMemory:
"""Timmy wrapper with explicit three-tier memory management."""
def __init__(self, db_file: str = "timmy.db") -> None:
from timmy.memory_system import memory_system
self.agent = create_timmy(db_file=db_file)
self.memory = memory_system
self.session_active = True
# Store initial context for reference
self.initial_context = self.memory.get_system_context()
def chat(self, message: str) -> str:
"""Simple chat interface that tracks in memory."""
# Check for user facts to extract
self._extract_and_store_facts(message)
# Run agent
result = self.agent.run(message, stream=False)
response_text = result.content if hasattr(result, "content") else str(result)
return response_text
def _extract_and_store_facts(self, message: str) -> None:
"""Extract user facts from message and store in memory."""
message_lower = message.lower()
# Extract name
name_patterns = [
("my name is ", 11),
("i'm ", 4),
("i am ", 5),
("call me ", 8),
]
for pattern, offset in name_patterns:
if pattern in message_lower:
idx = message_lower.find(pattern) + offset
name = message[idx:].strip().split()[0].strip(".,!?;:()\"'").capitalize()
if name and len(name) > 1 and name.lower() not in ("the", "a", "an"):
self.memory.update_user_fact("Name", name)
self.memory.record_decision(f"Learned user's name: {name}")
break
# Extract preferences
pref_patterns = [
("i like ", "Likes"),
("i love ", "Loves"),
("i prefer ", "Prefers"),
("i don't like ", "Dislikes"),
("i hate ", "Dislikes"),
]
for pattern, category in pref_patterns:
if pattern in message_lower:
idx = message_lower.find(pattern) + len(pattern)
pref = message[idx:].strip().split(".")[0].strip()
if pref and len(pref) > 3:
self.memory.record_open_item(f"User {category.lower()}: {pref}")
break
def end_session(self, summary: str = "Session completed") -> None:
"""End session and write handoff."""
if self.session_active:
self.memory.end_session(summary)
self.session_active = False
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.end_session()
return False