forked from Rockachopa/Timmy-time-dashboard
Addresses 14 bugs from 3 rounds of deep chat evaluation:
- Add chat-to-task pipeline in agents.py with regex-based intent detection,
agent extraction, priority extraction, and title cleaning
- Filter meta-questions ("how do I create a task?") from task creation
- Inject real-time date/time context into every chat message
- Inject live queue state when user asks about tasks
- Ground system prompts with agent roster, honesty guardrails, self-knowledge,
math delegation template, anti-filler rules, values-conflict guidance
- Add CSS for markdown code blocks, inline code, lists, blockquotes in chat
- Add highlight.js CDN for syntax highlighting in chat responses
- Reduce small-model memory context budget (4000→2000) for expanded prompt
- Add 27 comprehensive tests covering the full chat-to-task pipeline
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
220 lines
7.4 KiB
Python
220 lines
7.4 KiB
Python
"""Timmy agent creation with three-tier memory system.
|
|
|
|
Memory Architecture:
|
|
- Tier 1 (Hot): MEMORY.md — always loaded, ~300 lines
|
|
- Tier 2 (Vault): memory/ — structured markdown, append-only
|
|
- Tier 3 (Semantic): Vector search over vault files
|
|
|
|
Handoff Protocol maintains continuity across sessions.
|
|
"""
|
|
|
|
import logging
|
|
from typing import TYPE_CHECKING, Union
|
|
|
|
from agno.agent import Agent
|
|
from agno.db.sqlite import SqliteDb
|
|
from agno.models.ollama import Ollama
|
|
|
|
from config import settings
|
|
from timmy.prompts import get_system_prompt
|
|
from timmy.tools import create_full_toolkit
|
|
|
|
if TYPE_CHECKING:
|
|
from timmy.backends import TimmyAirLLMAgent
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Union type for callers that want to hint the return type.
|
|
TimmyAgent = Union[Agent, "TimmyAirLLMAgent"]
|
|
|
|
# Models known to be too small for reliable tool calling.
|
|
# These hallucinate tool calls as text, invoke tools randomly,
|
|
# and leak raw JSON into responses.
|
|
_SMALL_MODEL_PATTERNS = (
|
|
"llama3.2",
|
|
"phi-3",
|
|
"gemma:2b",
|
|
"tinyllama",
|
|
"qwen2:0.5b",
|
|
"qwen2:1.5b",
|
|
)
|
|
|
|
|
|
def _model_supports_tools(model_name: str) -> bool:
|
|
"""Check if the configured model can reliably handle tool calling.
|
|
|
|
Small models (< 7B) tend to hallucinate tool calls as text or invoke
|
|
them randomly. For these models, it's better to run tool-free and let
|
|
the model answer directly from its training data.
|
|
"""
|
|
model_lower = model_name.lower()
|
|
for pattern in _SMALL_MODEL_PATTERNS:
|
|
if pattern in model_lower:
|
|
return False
|
|
return True
|
|
|
|
|
|
def _resolve_backend(requested: str | None) -> str:
|
|
"""Return the backend name to use, resolving 'auto' and explicit overrides.
|
|
|
|
Priority (highest → lowest):
|
|
1. CLI flag passed directly to create_timmy()
|
|
2. TIMMY_MODEL_BACKEND env var / .env setting
|
|
3. 'ollama' (safe default — no surprises)
|
|
|
|
'auto' triggers Apple Silicon detection: uses AirLLM if both
|
|
is_apple_silicon() and airllm_available() return True.
|
|
"""
|
|
if requested is not None:
|
|
return requested
|
|
|
|
configured = settings.timmy_model_backend # "ollama" | "airllm" | "auto"
|
|
if configured != "auto":
|
|
return configured
|
|
|
|
# "auto" path — lazy import to keep startup fast and tests clean.
|
|
from timmy.backends import airllm_available, is_apple_silicon
|
|
if is_apple_silicon() and airllm_available():
|
|
return "airllm"
|
|
return "ollama"
|
|
|
|
|
|
def create_timmy(
|
|
db_file: str = "timmy.db",
|
|
backend: str | None = None,
|
|
model_size: str | None = None,
|
|
) -> TimmyAgent:
|
|
"""Instantiate Timmy — Ollama or AirLLM, same public interface either way.
|
|
|
|
Args:
|
|
db_file: SQLite file for Agno conversation memory (Ollama path only).
|
|
backend: "ollama" | "airllm" | "auto" | None (reads config/env).
|
|
model_size: AirLLM size — "8b" | "70b" | "405b" | None (reads config).
|
|
|
|
Returns an Agno Agent (Ollama) or TimmyAirLLMAgent — both expose
|
|
print_response(message, stream).
|
|
"""
|
|
resolved = _resolve_backend(backend)
|
|
size = model_size or settings.airllm_model_size
|
|
|
|
if resolved == "airllm":
|
|
from timmy.backends import TimmyAirLLMAgent
|
|
return TimmyAirLLMAgent(model_size=size)
|
|
|
|
# Default: Ollama via Agno.
|
|
model_name = settings.ollama_model
|
|
use_tools = _model_supports_tools(model_name)
|
|
|
|
# Conditionally include tools — small models get none
|
|
tools = create_full_toolkit() if use_tools else None
|
|
if not use_tools:
|
|
logger.info("Tools disabled for model %s (too small for reliable tool calling)", model_name)
|
|
|
|
# Select prompt tier based on tool capability
|
|
base_prompt = get_system_prompt(tools_enabled=use_tools)
|
|
|
|
# Try to load memory context
|
|
try:
|
|
from timmy.memory_system import memory_system
|
|
memory_context = memory_system.get_system_context()
|
|
if memory_context:
|
|
# Truncate if too long — smaller budget for small models
|
|
# since the expanded prompt (roster, guardrails) uses more tokens
|
|
max_context = 2000 if not use_tools else 8000
|
|
if len(memory_context) > max_context:
|
|
memory_context = memory_context[:max_context] + "\n... [truncated]"
|
|
full_prompt = f"{base_prompt}\n\n## Memory Context\n\n{memory_context}"
|
|
else:
|
|
full_prompt = base_prompt
|
|
except Exception as exc:
|
|
logger.warning("Failed to load memory context: %s", exc)
|
|
full_prompt = base_prompt
|
|
|
|
return Agent(
|
|
name="Timmy",
|
|
model=Ollama(id=model_name, host=settings.ollama_url),
|
|
db=SqliteDb(db_file=db_file),
|
|
description=full_prompt,
|
|
add_history_to_context=True,
|
|
num_history_runs=20,
|
|
markdown=True,
|
|
tools=[tools] if tools else None,
|
|
telemetry=settings.telemetry_enabled,
|
|
)
|
|
|
|
|
|
class TimmyWithMemory:
|
|
"""Timmy wrapper with explicit three-tier memory management."""
|
|
|
|
def __init__(self, db_file: str = "timmy.db") -> None:
|
|
from timmy.memory_system import memory_system
|
|
|
|
self.agent = create_timmy(db_file=db_file)
|
|
self.memory = memory_system
|
|
self.session_active = True
|
|
|
|
# Store initial context for reference
|
|
self.initial_context = self.memory.get_system_context()
|
|
|
|
def chat(self, message: str) -> str:
|
|
"""Simple chat interface that tracks in memory."""
|
|
# Check for user facts to extract
|
|
self._extract_and_store_facts(message)
|
|
|
|
# Run agent
|
|
result = self.agent.run(message, stream=False)
|
|
response_text = result.content if hasattr(result, "content") else str(result)
|
|
|
|
return response_text
|
|
|
|
def _extract_and_store_facts(self, message: str) -> None:
|
|
"""Extract user facts from message and store in memory."""
|
|
message_lower = message.lower()
|
|
|
|
# Extract name
|
|
name_patterns = [
|
|
("my name is ", 11),
|
|
("i'm ", 4),
|
|
("i am ", 5),
|
|
("call me ", 8),
|
|
]
|
|
|
|
for pattern, offset in name_patterns:
|
|
if pattern in message_lower:
|
|
idx = message_lower.find(pattern) + offset
|
|
name = message[idx:].strip().split()[0].strip(".,!?;:()\"'").capitalize()
|
|
if name and len(name) > 1 and name.lower() not in ("the", "a", "an"):
|
|
self.memory.update_user_fact("Name", name)
|
|
self.memory.record_decision(f"Learned user's name: {name}")
|
|
break
|
|
|
|
# Extract preferences
|
|
pref_patterns = [
|
|
("i like ", "Likes"),
|
|
("i love ", "Loves"),
|
|
("i prefer ", "Prefers"),
|
|
("i don't like ", "Dislikes"),
|
|
("i hate ", "Dislikes"),
|
|
]
|
|
|
|
for pattern, category in pref_patterns:
|
|
if pattern in message_lower:
|
|
idx = message_lower.find(pattern) + len(pattern)
|
|
pref = message[idx:].strip().split(".")[0].strip()
|
|
if pref and len(pref) > 3:
|
|
self.memory.record_open_item(f"User {category.lower()}: {pref}")
|
|
break
|
|
|
|
def end_session(self, summary: str = "Session completed") -> None:
|
|
"""End session and write handoff."""
|
|
if self.session_active:
|
|
self.memory.end_session(summary)
|
|
self.session_active = False
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
self.end_session()
|
|
return False
|