Compare commits
9 Commits
kimi/issue
...
b44014db38
| Author | SHA1 | Date | |
|---|---|---|---|
| b44014db38 | |||
| 1b4fe65650 | |||
|
|
7866dd9e30 | ||
| 2d69f73d9d | |||
| ff1e43c235 | |||
| b331aa6139 | |||
| b45b543f2d | |||
| 7c823ab59c | |||
| 9f2728f529 |
@@ -18,13 +18,19 @@ Exit codes:
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
QUEUE_FILE = REPO_ROOT / ".loop" / "queue.json"
|
||||
IDLE_STATE_FILE = REPO_ROOT / ".loop" / "idle_state.json"
|
||||
TOKEN_FILE = Path.home() / ".hermes" / "gitea_token"
|
||||
|
||||
GITEA_API = os.environ.get("GITEA_API", "http://localhost:3000/api/v1")
|
||||
REPO_SLUG = os.environ.get("REPO_SLUG", "rockachopa/Timmy-time-dashboard")
|
||||
|
||||
# Backoff sequence: 60s, 120s, 240s, 600s max
|
||||
BACKOFF_BASE = 60
|
||||
@@ -32,19 +38,81 @@ BACKOFF_MAX = 600
|
||||
BACKOFF_MULTIPLIER = 2
|
||||
|
||||
|
||||
def _get_token() -> str:
|
||||
"""Read Gitea token from env or file."""
|
||||
token = os.environ.get("GITEA_TOKEN", "").strip()
|
||||
if not token and TOKEN_FILE.exists():
|
||||
token = TOKEN_FILE.read_text().strip()
|
||||
return token
|
||||
|
||||
|
||||
def _fetch_open_issue_numbers() -> set[int] | None:
|
||||
"""Fetch open issue numbers from Gitea. Returns None on failure."""
|
||||
token = _get_token()
|
||||
if not token:
|
||||
return None
|
||||
try:
|
||||
numbers: set[int] = set()
|
||||
page = 1
|
||||
while True:
|
||||
url = (
|
||||
f"{GITEA_API}/repos/{REPO_SLUG}/issues"
|
||||
f"?state=open&type=issues&limit=50&page={page}"
|
||||
)
|
||||
req = urllib.request.Request(url, headers={
|
||||
"Authorization": f"token {token}",
|
||||
"Accept": "application/json",
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
if not data:
|
||||
break
|
||||
for issue in data:
|
||||
numbers.add(issue["number"])
|
||||
if len(data) < 50:
|
||||
break
|
||||
page += 1
|
||||
return numbers
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def load_queue() -> list[dict]:
|
||||
"""Load queue.json and return ready items."""
|
||||
"""Load queue.json and return ready items, filtering out closed issues."""
|
||||
if not QUEUE_FILE.exists():
|
||||
return []
|
||||
try:
|
||||
data = json.loads(QUEUE_FILE.read_text())
|
||||
if isinstance(data, list):
|
||||
return [item for item in data if item.get("ready")]
|
||||
return []
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
ready = [item for item in data if item.get("ready")]
|
||||
if not ready:
|
||||
return []
|
||||
|
||||
# Filter out issues that are no longer open (auto-hygiene)
|
||||
open_numbers = _fetch_open_issue_numbers()
|
||||
if open_numbers is not None:
|
||||
before = len(ready)
|
||||
ready = [item for item in ready if item.get("issue") in open_numbers]
|
||||
removed = before - len(ready)
|
||||
if removed > 0:
|
||||
print(f"[loop-guard] Filtered {removed} closed issue(s) from queue")
|
||||
# Persist the cleaned queue so stale entries don't recur
|
||||
_save_cleaned_queue(data, open_numbers)
|
||||
return ready
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return []
|
||||
|
||||
|
||||
def _save_cleaned_queue(full_queue: list[dict], open_numbers: set[int]) -> None:
|
||||
"""Rewrite queue.json without closed issues."""
|
||||
cleaned = [item for item in full_queue if item.get("issue") in open_numbers]
|
||||
try:
|
||||
QUEUE_FILE.write_text(json.dumps(cleaned, indent=2) + "\n")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def load_idle_state() -> dict:
|
||||
"""Load persistent idle state."""
|
||||
if not IDLE_STATE_FILE.exists():
|
||||
|
||||
@@ -254,6 +254,7 @@ class Settings(BaseSettings):
|
||||
# When enabled, the agent starts an internal thought loop on server start.
|
||||
thinking_enabled: bool = True
|
||||
thinking_interval_seconds: int = 300 # 5 minutes between thoughts
|
||||
thinking_timeout_seconds: int = 120 # max wall-clock time per thinking cycle
|
||||
thinking_distill_every: int = 10 # distill facts from thoughts every Nth thought
|
||||
thinking_issue_every: int = 20 # file Gitea issues from thoughts every Nth thought
|
||||
thinking_memory_check_every: int = 50 # check memory status every Nth thought
|
||||
|
||||
@@ -155,7 +155,17 @@ async def _thinking_scheduler() -> None:
|
||||
while True:
|
||||
try:
|
||||
if settings.thinking_enabled:
|
||||
await thinking_engine.think_once()
|
||||
await asyncio.wait_for(
|
||||
thinking_engine.think_once(),
|
||||
timeout=settings.thinking_timeout_seconds,
|
||||
)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"Thinking cycle timed out after %ds — Ollama may be unresponsive",
|
||||
settings.thinking_timeout_seconds,
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error("Thinking scheduler error: %s", exc)
|
||||
|
||||
@@ -175,7 +185,10 @@ async def _loop_qa_scheduler() -> None:
|
||||
while True:
|
||||
try:
|
||||
if settings.loop_qa_enabled:
|
||||
result = await loop_qa_orchestrator.run_next_test()
|
||||
result = await asyncio.wait_for(
|
||||
loop_qa_orchestrator.run_next_test(),
|
||||
timeout=settings.thinking_timeout_seconds,
|
||||
)
|
||||
if result:
|
||||
status = "PASS" if result["success"] else "FAIL"
|
||||
logger.info(
|
||||
@@ -184,6 +197,13 @@ async def _loop_qa_scheduler() -> None:
|
||||
status,
|
||||
result.get("details", "")[:80],
|
||||
)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"Loop QA test timed out after %ds",
|
||||
settings.thinking_timeout_seconds,
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error("Loop QA scheduler error: %s", exc)
|
||||
|
||||
@@ -329,33 +349,35 @@ async def _discord_token_watcher() -> None:
|
||||
logger.warning("Discord auto-start failed: %s", exc)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan manager with non-blocking startup."""
|
||||
|
||||
# Validate security config (no-op in test mode)
|
||||
def _startup_init() -> None:
|
||||
"""Validate config and enable event persistence."""
|
||||
from config import validate_startup
|
||||
|
||||
validate_startup()
|
||||
|
||||
# Enable event persistence (unified EventBus + swarm event_log)
|
||||
from infrastructure.events.bus import init_event_bus_persistence
|
||||
|
||||
init_event_bus_persistence()
|
||||
|
||||
# Create all background tasks without waiting for them
|
||||
briefing_task = asyncio.create_task(_briefing_scheduler())
|
||||
thinking_task = asyncio.create_task(_thinking_scheduler())
|
||||
loop_qa_task = asyncio.create_task(_loop_qa_scheduler())
|
||||
presence_task = asyncio.create_task(_presence_watcher())
|
||||
|
||||
# Initialize Spark Intelligence engine
|
||||
from spark.engine import get_spark_engine
|
||||
|
||||
if get_spark_engine().enabled:
|
||||
logger.info("Spark Intelligence active — event capture enabled")
|
||||
|
||||
# Auto-prune old vector store memories on startup
|
||||
|
||||
def _startup_background_tasks() -> list[asyncio.Task]:
|
||||
"""Spawn all recurring background tasks (non-blocking)."""
|
||||
return [
|
||||
asyncio.create_task(_briefing_scheduler()),
|
||||
asyncio.create_task(_thinking_scheduler()),
|
||||
asyncio.create_task(_loop_qa_scheduler()),
|
||||
asyncio.create_task(_presence_watcher()),
|
||||
asyncio.create_task(_start_chat_integrations_background()),
|
||||
]
|
||||
|
||||
|
||||
def _startup_pruning() -> None:
|
||||
"""Auto-prune old memories, thoughts, and events on startup."""
|
||||
if settings.memory_prune_days > 0:
|
||||
try:
|
||||
from timmy.memory_system import prune_memories
|
||||
@@ -373,7 +395,6 @@ async def lifespan(app: FastAPI):
|
||||
except Exception as exc:
|
||||
logger.debug("Memory auto-prune skipped: %s", exc)
|
||||
|
||||
# Auto-prune old thoughts on startup
|
||||
if settings.thoughts_prune_days > 0:
|
||||
try:
|
||||
from timmy.thinking import thinking_engine
|
||||
@@ -391,7 +412,6 @@ async def lifespan(app: FastAPI):
|
||||
except Exception as exc:
|
||||
logger.debug("Thought auto-prune skipped: %s", exc)
|
||||
|
||||
# Auto-prune old system events on startup
|
||||
if settings.events_prune_days > 0:
|
||||
try:
|
||||
from swarm.event_log import prune_old_events
|
||||
@@ -409,7 +429,6 @@ async def lifespan(app: FastAPI):
|
||||
except Exception as exc:
|
||||
logger.debug("Event auto-prune skipped: %s", exc)
|
||||
|
||||
# Warn if memory vault exceeds size limit
|
||||
if settings.memory_vault_max_mb > 0:
|
||||
try:
|
||||
vault_path = Path(settings.repo_root) / "memory" / "notes"
|
||||
@@ -425,6 +444,42 @@ async def lifespan(app: FastAPI):
|
||||
except Exception as exc:
|
||||
logger.debug("Vault size check skipped: %s", exc)
|
||||
|
||||
|
||||
async def _shutdown_cleanup(
|
||||
bg_tasks: list[asyncio.Task],
|
||||
workshop_heartbeat,
|
||||
) -> None:
|
||||
"""Stop chat bots, MCP sessions, heartbeat, and cancel background tasks."""
|
||||
from integrations.chat_bridge.vendors.discord import discord_bot
|
||||
from integrations.telegram_bot.bot import telegram_bot
|
||||
|
||||
await discord_bot.stop()
|
||||
await telegram_bot.stop()
|
||||
|
||||
try:
|
||||
from timmy.mcp_tools import close_mcp_sessions
|
||||
|
||||
await close_mcp_sessions()
|
||||
except Exception as exc:
|
||||
logger.debug("MCP shutdown: %s", exc)
|
||||
|
||||
await workshop_heartbeat.stop()
|
||||
|
||||
for task in bg_tasks:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan manager with non-blocking startup."""
|
||||
_startup_init()
|
||||
bg_tasks = _startup_background_tasks()
|
||||
_startup_pruning()
|
||||
|
||||
# Start Workshop presence heartbeat with WS relay
|
||||
from dashboard.routes.world import broadcast_world_state
|
||||
from timmy.workshop_state import WorkshopHeartbeat
|
||||
@@ -432,10 +487,7 @@ async def lifespan(app: FastAPI):
|
||||
workshop_heartbeat = WorkshopHeartbeat(on_change=broadcast_world_state)
|
||||
await workshop_heartbeat.start()
|
||||
|
||||
# Start chat integrations in background
|
||||
chat_task = asyncio.create_task(_start_chat_integrations_background())
|
||||
|
||||
# Register session logger with error capture (breaks infrastructure → timmy circular dep)
|
||||
# Register session logger with error capture
|
||||
try:
|
||||
from infrastructure.error_capture import register_error_recorder
|
||||
from timmy.session_logger import get_session_logger
|
||||
@@ -448,30 +500,7 @@ async def lifespan(app: FastAPI):
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup on shutdown
|
||||
from integrations.chat_bridge.vendors.discord import discord_bot
|
||||
from integrations.telegram_bot.bot import telegram_bot
|
||||
|
||||
await discord_bot.stop()
|
||||
await telegram_bot.stop()
|
||||
|
||||
# Close MCP tool server sessions
|
||||
try:
|
||||
from timmy.mcp_tools import close_mcp_sessions
|
||||
|
||||
await close_mcp_sessions()
|
||||
except Exception as exc:
|
||||
logger.debug("MCP shutdown: %s", exc)
|
||||
|
||||
await workshop_heartbeat.stop()
|
||||
|
||||
for task in [briefing_task, thinking_task, chat_task, loop_qa_task, presence_task]:
|
||||
if task:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
await _shutdown_cleanup(bg_tasks, workshop_heartbeat)
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
|
||||
@@ -100,36 +100,14 @@ def _get_git_context() -> dict:
|
||||
return {"branch": "unknown", "commit": "unknown"}
|
||||
|
||||
|
||||
def capture_error(
|
||||
exc: Exception,
|
||||
source: str = "unknown",
|
||||
context: dict | None = None,
|
||||
) -> str | None:
|
||||
"""Capture an error and optionally create a bug report.
|
||||
|
||||
Args:
|
||||
exc: The exception to capture
|
||||
source: Module/component where the error occurred
|
||||
context: Optional dict of extra context (request path, etc.)
|
||||
def _extract_traceback_info(exc: Exception) -> tuple[str, str, int]:
|
||||
"""Extract formatted traceback, affected file, and line number.
|
||||
|
||||
Returns:
|
||||
Task ID of the created bug report, or None if deduplicated/disabled
|
||||
Tuple of (traceback_string, affected_file, affected_line).
|
||||
"""
|
||||
from config import settings
|
||||
|
||||
if not settings.error_feedback_enabled:
|
||||
return None
|
||||
|
||||
error_hash = _stack_hash(exc)
|
||||
|
||||
if _is_duplicate(error_hash):
|
||||
logger.debug("Duplicate error suppressed: %s (hash=%s)", exc, error_hash)
|
||||
return None
|
||||
|
||||
# Format the stack trace
|
||||
tb_str = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__))
|
||||
|
||||
# Extract file/line from traceback
|
||||
tb_obj = exc.__traceback__
|
||||
affected_file = "unknown"
|
||||
affected_line = 0
|
||||
@@ -139,9 +117,18 @@ def capture_error(
|
||||
affected_file = tb_obj.tb_frame.f_code.co_filename
|
||||
affected_line = tb_obj.tb_lineno
|
||||
|
||||
git_ctx = _get_git_context()
|
||||
return tb_str, affected_file, affected_line
|
||||
|
||||
# 1. Log to event_log
|
||||
|
||||
def _log_error_event(
|
||||
exc: Exception,
|
||||
source: str,
|
||||
error_hash: str,
|
||||
affected_file: str,
|
||||
affected_line: int,
|
||||
git_ctx: dict,
|
||||
) -> None:
|
||||
"""Log the captured error to the event log."""
|
||||
try:
|
||||
from swarm.event_log import EventType, log_event
|
||||
|
||||
@@ -161,8 +148,18 @@ def capture_error(
|
||||
except Exception as log_exc:
|
||||
logger.debug("Failed to log error event: %s", log_exc)
|
||||
|
||||
# 2. Create bug report task
|
||||
task_id = None
|
||||
|
||||
def _create_bug_report(
|
||||
exc: Exception,
|
||||
source: str,
|
||||
context: dict | None,
|
||||
error_hash: str,
|
||||
tb_str: str,
|
||||
affected_file: str,
|
||||
affected_line: int,
|
||||
git_ctx: dict,
|
||||
) -> str | None:
|
||||
"""Create a bug report task and return the task ID (or None on failure)."""
|
||||
try:
|
||||
from swarm.task_queue.models import create_task
|
||||
|
||||
@@ -195,7 +192,6 @@ def capture_error(
|
||||
)
|
||||
task_id = task.id
|
||||
|
||||
# Log the creation event
|
||||
try:
|
||||
from swarm.event_log import EventType, log_event
|
||||
|
||||
@@ -210,12 +206,16 @@ def capture_error(
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Bug report screenshot error: %s", exc)
|
||||
pass
|
||||
|
||||
return task_id
|
||||
|
||||
except Exception as task_exc:
|
||||
logger.debug("Failed to create bug report task: %s", task_exc)
|
||||
return None
|
||||
|
||||
# 3. Send notification
|
||||
|
||||
def _notify_bug_report(exc: Exception, source: str) -> None:
|
||||
"""Send a push notification about the captured error."""
|
||||
try:
|
||||
from infrastructure.notifications.push import notifier
|
||||
|
||||
@@ -224,11 +224,12 @@ def capture_error(
|
||||
message=f"{type(exc).__name__} in {source}: {str(exc)[:80]}",
|
||||
category="system",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Bug report notification error: %s", exc)
|
||||
pass
|
||||
except Exception as notify_exc:
|
||||
logger.warning("Bug report notification error: %s", notify_exc)
|
||||
|
||||
# 4. Record in session logger (via registered callback)
|
||||
|
||||
def _record_to_session(exc: Exception, source: str) -> None:
|
||||
"""Record the error via the registered session callback."""
|
||||
if _error_recorder is not None:
|
||||
try:
|
||||
_error_recorder(
|
||||
@@ -238,4 +239,50 @@ def capture_error(
|
||||
except Exception as log_exc:
|
||||
logger.warning("Bug report session logging error: %s", log_exc)
|
||||
|
||||
|
||||
def capture_error(
|
||||
exc: Exception,
|
||||
source: str = "unknown",
|
||||
context: dict | None = None,
|
||||
) -> str | None:
|
||||
"""Capture an error and optionally create a bug report.
|
||||
|
||||
Args:
|
||||
exc: The exception to capture
|
||||
source: Module/component where the error occurred
|
||||
context: Optional dict of extra context (request path, etc.)
|
||||
|
||||
Returns:
|
||||
Task ID of the created bug report, or None if deduplicated/disabled
|
||||
"""
|
||||
from config import settings
|
||||
|
||||
if not settings.error_feedback_enabled:
|
||||
return None
|
||||
|
||||
error_hash = _stack_hash(exc)
|
||||
|
||||
if _is_duplicate(error_hash):
|
||||
logger.debug("Duplicate error suppressed: %s (hash=%s)", exc, error_hash)
|
||||
return None
|
||||
|
||||
tb_str, affected_file, affected_line = _extract_traceback_info(exc)
|
||||
git_ctx = _get_git_context()
|
||||
|
||||
_log_error_event(exc, source, error_hash, affected_file, affected_line, git_ctx)
|
||||
|
||||
task_id = _create_bug_report(
|
||||
exc,
|
||||
source,
|
||||
context,
|
||||
error_hash,
|
||||
tb_str,
|
||||
affected_file,
|
||||
affected_line,
|
||||
git_ctx,
|
||||
)
|
||||
|
||||
_notify_bug_report(exc, source)
|
||||
_record_to_session(exc, source)
|
||||
|
||||
return task_id
|
||||
|
||||
@@ -197,6 +197,90 @@ def _resolve_backend(requested: str | None) -> str:
|
||||
return "ollama"
|
||||
|
||||
|
||||
def _build_tools_list(use_tools: bool, skip_mcp: bool, model_name: str) -> list:
|
||||
"""Assemble the tools list based on model capability and MCP flags.
|
||||
|
||||
Returns a list of Toolkit / MCPTools objects, or an empty list.
|
||||
"""
|
||||
if not use_tools:
|
||||
logger.info("Tools disabled for model %s (too small for reliable tool calling)", model_name)
|
||||
return []
|
||||
|
||||
tools_list: list = [create_full_toolkit()]
|
||||
|
||||
# Add MCP tool servers (lazy-connected on first arun()).
|
||||
# Skipped when skip_mcp=True — MCP's stdio transport uses anyio cancel
|
||||
# scopes that conflict with asyncio background task cancellation (#72).
|
||||
if not skip_mcp:
|
||||
try:
|
||||
from timmy.mcp_tools import create_filesystem_mcp_tools, create_gitea_mcp_tools
|
||||
|
||||
gitea_mcp = create_gitea_mcp_tools()
|
||||
if gitea_mcp:
|
||||
tools_list.append(gitea_mcp)
|
||||
|
||||
fs_mcp = create_filesystem_mcp_tools()
|
||||
if fs_mcp:
|
||||
tools_list.append(fs_mcp)
|
||||
except Exception as exc:
|
||||
logger.debug("MCP tools unavailable: %s", exc)
|
||||
|
||||
return tools_list
|
||||
|
||||
|
||||
def _build_prompt(use_tools: bool, session_id: str) -> str:
|
||||
"""Build the full system prompt with optional memory context."""
|
||||
base_prompt = get_system_prompt(tools_enabled=use_tools, session_id=session_id)
|
||||
|
||||
try:
|
||||
from timmy.memory_system import memory_system
|
||||
|
||||
memory_context = memory_system.get_system_context()
|
||||
if memory_context:
|
||||
# Smaller budget for small models — expanded prompt uses more tokens
|
||||
max_context = 2000 if not use_tools else 8000
|
||||
if len(memory_context) > max_context:
|
||||
memory_context = memory_context[:max_context] + "\n... [truncated]"
|
||||
return (
|
||||
f"{base_prompt}\n\n"
|
||||
f"## GROUNDED CONTEXT (verified sources — cite when using)\n\n"
|
||||
f"{memory_context}"
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to load memory context: %s", exc)
|
||||
|
||||
return base_prompt
|
||||
|
||||
|
||||
def _create_ollama_agent(
|
||||
*,
|
||||
db_file: str,
|
||||
model_name: str,
|
||||
tools_list: list,
|
||||
full_prompt: str,
|
||||
use_tools: bool,
|
||||
) -> Agent:
|
||||
"""Construct the Agno Agent with Ollama backend and warm up the model."""
|
||||
model_kwargs = {}
|
||||
if settings.ollama_num_ctx > 0:
|
||||
model_kwargs["options"] = {"num_ctx": settings.ollama_num_ctx}
|
||||
|
||||
agent = Agent(
|
||||
name="Agent",
|
||||
model=Ollama(id=model_name, host=settings.ollama_url, timeout=300, **model_kwargs),
|
||||
db=SqliteDb(db_file=db_file),
|
||||
description=full_prompt,
|
||||
add_history_to_context=True,
|
||||
num_history_runs=20,
|
||||
markdown=False,
|
||||
tools=tools_list if tools_list else None,
|
||||
tool_call_limit=settings.max_agent_steps if use_tools else None,
|
||||
telemetry=settings.telemetry_enabled,
|
||||
)
|
||||
_warmup_model(model_name)
|
||||
return agent
|
||||
|
||||
|
||||
def create_timmy(
|
||||
db_file: str = "timmy.db",
|
||||
backend: str | None = None,
|
||||
@@ -238,16 +322,12 @@ def create_timmy(
|
||||
return TimmyAirLLMAgent(model_size=size)
|
||||
|
||||
# Default: Ollama via Agno.
|
||||
# Resolve model with automatic pulling and fallback
|
||||
model_name, is_fallback = _resolve_model_with_fallback(
|
||||
requested_model=None,
|
||||
require_vision=False,
|
||||
auto_pull=True,
|
||||
)
|
||||
|
||||
# If Ollama is completely unreachable, fail loudly.
|
||||
# Sovereignty: never silently send data to a cloud API.
|
||||
# Use --backend claude explicitly if you want cloud inference.
|
||||
if not _check_model_available(model_name):
|
||||
logger.error(
|
||||
"Ollama unreachable and no local models available. "
|
||||
@@ -258,76 +338,16 @@ def create_timmy(
|
||||
logger.info("Using fallback model %s (requested was unavailable)", model_name)
|
||||
|
||||
use_tools = _model_supports_tools(model_name)
|
||||
tools_list = _build_tools_list(use_tools, skip_mcp, model_name)
|
||||
full_prompt = _build_prompt(use_tools, session_id)
|
||||
|
||||
# Conditionally include tools — small models get none
|
||||
toolkit = create_full_toolkit() if use_tools else None
|
||||
if not use_tools:
|
||||
logger.info("Tools disabled for model %s (too small for reliable tool calling)", model_name)
|
||||
|
||||
# Build the tools list — Agno accepts a list of Toolkit / MCPTools
|
||||
tools_list: list = []
|
||||
if toolkit:
|
||||
tools_list.append(toolkit)
|
||||
|
||||
# Add MCP tool servers (lazy-connected on first arun()).
|
||||
# Skipped when skip_mcp=True — MCP's stdio transport uses anyio cancel
|
||||
# scopes that conflict with asyncio background task cancellation (#72).
|
||||
if use_tools and not skip_mcp:
|
||||
try:
|
||||
from timmy.mcp_tools import create_filesystem_mcp_tools, create_gitea_mcp_tools
|
||||
|
||||
gitea_mcp = create_gitea_mcp_tools()
|
||||
if gitea_mcp:
|
||||
tools_list.append(gitea_mcp)
|
||||
|
||||
fs_mcp = create_filesystem_mcp_tools()
|
||||
if fs_mcp:
|
||||
tools_list.append(fs_mcp)
|
||||
except Exception as exc:
|
||||
logger.debug("MCP tools unavailable: %s", exc)
|
||||
|
||||
# Select prompt tier based on tool capability
|
||||
base_prompt = get_system_prompt(tools_enabled=use_tools, session_id=session_id)
|
||||
|
||||
# Try to load memory context
|
||||
try:
|
||||
from timmy.memory_system import memory_system
|
||||
|
||||
memory_context = memory_system.get_system_context()
|
||||
if memory_context:
|
||||
# Truncate if too long — smaller budget for small models
|
||||
# since the expanded prompt (roster, guardrails) uses more tokens
|
||||
max_context = 2000 if not use_tools else 8000
|
||||
if len(memory_context) > max_context:
|
||||
memory_context = memory_context[:max_context] + "\n... [truncated]"
|
||||
full_prompt = (
|
||||
f"{base_prompt}\n\n"
|
||||
f"## GROUNDED CONTEXT (verified sources — cite when using)\n\n"
|
||||
f"{memory_context}"
|
||||
)
|
||||
else:
|
||||
full_prompt = base_prompt
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to load memory context: %s", exc)
|
||||
full_prompt = base_prompt
|
||||
|
||||
model_kwargs = {}
|
||||
if settings.ollama_num_ctx > 0:
|
||||
model_kwargs["options"] = {"num_ctx": settings.ollama_num_ctx}
|
||||
agent = Agent(
|
||||
name="Agent",
|
||||
model=Ollama(id=model_name, host=settings.ollama_url, timeout=300, **model_kwargs),
|
||||
db=SqliteDb(db_file=db_file),
|
||||
description=full_prompt,
|
||||
add_history_to_context=True,
|
||||
num_history_runs=20,
|
||||
markdown=False,
|
||||
tools=tools_list if tools_list else None,
|
||||
tool_call_limit=settings.max_agent_steps if use_tools else None,
|
||||
telemetry=settings.telemetry_enabled,
|
||||
return _create_ollama_agent(
|
||||
db_file=db_file,
|
||||
model_name=model_name,
|
||||
tools_list=tools_list,
|
||||
full_prompt=full_prompt,
|
||||
use_tools=use_tools,
|
||||
)
|
||||
_warmup_model(model_name)
|
||||
return agent
|
||||
|
||||
|
||||
class TimmyWithMemory:
|
||||
|
||||
@@ -232,6 +232,90 @@ class ThinkingEngine:
|
||||
return False # Disabled — never idle
|
||||
return datetime.now(UTC) - self._last_input_time > timedelta(minutes=timeout)
|
||||
|
||||
def _build_thinking_context(self) -> tuple[str, str, list["Thought"]]:
|
||||
"""Assemble the context needed for a thinking cycle.
|
||||
|
||||
Returns:
|
||||
(memory_context, system_context, recent_thoughts)
|
||||
"""
|
||||
memory_context = self._load_memory_context()
|
||||
system_context = self._gather_system_snapshot()
|
||||
recent_thoughts = self.get_recent_thoughts(limit=5)
|
||||
return memory_context, system_context, recent_thoughts
|
||||
|
||||
async def _generate_novel_thought(
|
||||
self,
|
||||
prompt: str | None,
|
||||
memory_context: str,
|
||||
system_context: str,
|
||||
recent_thoughts: list["Thought"],
|
||||
) -> tuple[str | None, str]:
|
||||
"""Run the dedup-retry loop to produce a novel thought.
|
||||
|
||||
Returns:
|
||||
(content, seed_type) — content is None if no novel thought produced.
|
||||
"""
|
||||
seed_type: str = "freeform"
|
||||
|
||||
for attempt in range(self._MAX_DEDUP_RETRIES + 1):
|
||||
if prompt:
|
||||
seed_type = "prompted"
|
||||
seed_context = f"Journal prompt: {prompt}"
|
||||
else:
|
||||
seed_type, seed_context = self._gather_seed()
|
||||
|
||||
continuity = self._build_continuity_context()
|
||||
|
||||
full_prompt = _THINKING_PROMPT.format(
|
||||
memory_context=memory_context,
|
||||
system_context=system_context,
|
||||
seed_context=seed_context,
|
||||
continuity_context=continuity,
|
||||
)
|
||||
|
||||
try:
|
||||
raw = await self._call_agent(full_prompt)
|
||||
except Exception as exc:
|
||||
logger.warning("Thinking cycle failed (Ollama likely down): %s", exc)
|
||||
return None, seed_type
|
||||
|
||||
if not raw or not raw.strip():
|
||||
logger.debug("Thinking cycle produced empty response, skipping")
|
||||
return None, seed_type
|
||||
|
||||
content = raw.strip()
|
||||
|
||||
# Dedup: reject thoughts too similar to recent ones
|
||||
if not self._is_too_similar(content, recent_thoughts):
|
||||
return content, seed_type # Good — novel thought
|
||||
|
||||
if attempt < self._MAX_DEDUP_RETRIES:
|
||||
logger.info(
|
||||
"Thought too similar to recent (attempt %d/%d), retrying with new seed",
|
||||
attempt + 1,
|
||||
self._MAX_DEDUP_RETRIES + 1,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Thought still repetitive after %d retries, discarding",
|
||||
self._MAX_DEDUP_RETRIES + 1,
|
||||
)
|
||||
return None, seed_type
|
||||
|
||||
return None, seed_type
|
||||
|
||||
async def _process_thinking_result(self, thought: "Thought") -> None:
|
||||
"""Run all post-hooks after a thought is stored."""
|
||||
self._maybe_check_memory()
|
||||
await self._maybe_distill()
|
||||
await self._maybe_file_issues()
|
||||
await self._check_workspace()
|
||||
self._maybe_check_memory_status()
|
||||
self._update_memory(thought)
|
||||
self._log_event(thought)
|
||||
self._write_journal(thought)
|
||||
await self._broadcast(thought)
|
||||
|
||||
async def think_once(self, prompt: str | None = None) -> Thought | None:
|
||||
"""Execute one thinking cycle.
|
||||
|
||||
@@ -257,91 +341,21 @@ class ThinkingEngine:
|
||||
)
|
||||
return None
|
||||
|
||||
memory_context = self._load_memory_context()
|
||||
system_context = self._gather_system_snapshot()
|
||||
recent_thoughts = self.get_recent_thoughts(limit=5)
|
||||
|
||||
content: str | None = None
|
||||
seed_type: str = "freeform"
|
||||
|
||||
for attempt in range(self._MAX_DEDUP_RETRIES + 1):
|
||||
if prompt:
|
||||
seed_type = "prompted"
|
||||
seed_context = f"Journal prompt: {prompt}"
|
||||
else:
|
||||
seed_type, seed_context = self._gather_seed()
|
||||
|
||||
continuity = self._build_continuity_context()
|
||||
|
||||
full_prompt = _THINKING_PROMPT.format(
|
||||
memory_context=memory_context,
|
||||
system_context=system_context,
|
||||
seed_context=seed_context,
|
||||
continuity_context=continuity,
|
||||
)
|
||||
|
||||
try:
|
||||
raw = await self._call_agent(full_prompt)
|
||||
except Exception as exc:
|
||||
logger.warning("Thinking cycle failed (Ollama likely down): %s", exc)
|
||||
return None
|
||||
|
||||
if not raw or not raw.strip():
|
||||
logger.debug("Thinking cycle produced empty response, skipping")
|
||||
return None
|
||||
|
||||
content = raw.strip()
|
||||
|
||||
# Dedup: reject thoughts too similar to recent ones
|
||||
if not self._is_too_similar(content, recent_thoughts):
|
||||
break # Good — novel thought
|
||||
|
||||
if attempt < self._MAX_DEDUP_RETRIES:
|
||||
logger.info(
|
||||
"Thought too similar to recent (attempt %d/%d), retrying with new seed",
|
||||
attempt + 1,
|
||||
self._MAX_DEDUP_RETRIES + 1,
|
||||
)
|
||||
content = None # Will retry
|
||||
else:
|
||||
logger.warning(
|
||||
"Thought still repetitive after %d retries, discarding",
|
||||
self._MAX_DEDUP_RETRIES + 1,
|
||||
)
|
||||
return None
|
||||
memory_context, system_context, recent_thoughts = self._build_thinking_context()
|
||||
|
||||
content, seed_type = await self._generate_novel_thought(
|
||||
prompt,
|
||||
memory_context,
|
||||
system_context,
|
||||
recent_thoughts,
|
||||
)
|
||||
if not content:
|
||||
return None
|
||||
|
||||
thought = self._store_thought(content, seed_type)
|
||||
self._last_thought_id = thought.id
|
||||
|
||||
# Post-hook: check memory status periodically
|
||||
self._maybe_check_memory()
|
||||
|
||||
# Post-hook: distill facts from recent thoughts periodically
|
||||
await self._maybe_distill()
|
||||
|
||||
# Post-hook: file Gitea issues for actionable observations
|
||||
await self._maybe_file_issues()
|
||||
|
||||
# Post-hook: check workspace for new messages from Hermes
|
||||
await self._check_workspace()
|
||||
|
||||
# Post-hook: proactive memory status audit
|
||||
self._maybe_check_memory_status()
|
||||
|
||||
# Post-hook: update MEMORY.md with latest reflection
|
||||
self._update_memory(thought)
|
||||
|
||||
# Log to swarm event system
|
||||
self._log_event(thought)
|
||||
|
||||
# Append to daily journal file
|
||||
self._write_journal(thought)
|
||||
|
||||
# Broadcast to WebSocket clients
|
||||
await self._broadcast(thought)
|
||||
await self._process_thinking_result(thought)
|
||||
|
||||
logger.info(
|
||||
"Thought [%s] (%s): %s",
|
||||
@@ -1110,21 +1124,37 @@ class ThinkingEngine:
|
||||
lines.append(f"- [{thought.seed_type}] {snippet}")
|
||||
return "\n".join(lines)
|
||||
|
||||
_thinking_agent = None # cached agent — avoids per-call resource leaks (#525)
|
||||
|
||||
async def _call_agent(self, prompt: str) -> str:
|
||||
"""Call Timmy's agent to generate a thought.
|
||||
|
||||
Creates a lightweight agent with skip_mcp=True to avoid the cancel-scope
|
||||
Reuses a cached agent with skip_mcp=True to avoid the cancel-scope
|
||||
errors that occur when MCP stdio transports are spawned inside asyncio
|
||||
background tasks (#72). The thinking engine doesn't need Gitea or
|
||||
filesystem tools — it only needs the LLM.
|
||||
background tasks (#72) and to prevent per-call resource leaks (httpx
|
||||
clients, SQLite connections, model warmups) that caused the thinking
|
||||
loop to die every ~10 min (#525).
|
||||
|
||||
Individual calls are capped at 120 s so a hung Ollama never blocks
|
||||
the scheduler indefinitely.
|
||||
|
||||
Strips ``<think>`` tags from reasoning models (qwen3, etc.) so that
|
||||
downstream parsers (fact distillation, issue filing) receive clean text.
|
||||
"""
|
||||
from timmy.agent import create_timmy
|
||||
import asyncio
|
||||
|
||||
if self._thinking_agent is None:
|
||||
from timmy.agent import create_timmy
|
||||
|
||||
self._thinking_agent = create_timmy(skip_mcp=True)
|
||||
|
||||
try:
|
||||
async with asyncio.timeout(120):
|
||||
run = await self._thinking_agent.arun(prompt, stream=False)
|
||||
except TimeoutError:
|
||||
logger.warning("Thinking LLM call timed out after 120 s")
|
||||
return ""
|
||||
|
||||
agent = create_timmy(skip_mcp=True)
|
||||
run = await agent.arun(prompt, stream=False)
|
||||
raw = run.content if hasattr(run, "content") else str(run)
|
||||
return _THINK_TAG_RE.sub("", raw) if raw else raw
|
||||
|
||||
|
||||
@@ -127,54 +127,48 @@ def check_ollama_health() -> dict[str, Any]:
|
||||
return result
|
||||
|
||||
|
||||
def get_memory_status() -> dict[str, Any]:
|
||||
"""Get the status of Timmy's memory system.
|
||||
|
||||
Returns:
|
||||
Dict with memory tier information
|
||||
"""
|
||||
from config import settings
|
||||
|
||||
repo_root = Path(settings.repo_root)
|
||||
|
||||
# Check tier 1: Hot memory
|
||||
def _hot_memory_info(repo_root: Path) -> dict[str, Any]:
|
||||
"""Tier 1: Hot memory (MEMORY.md) status."""
|
||||
memory_md = repo_root / "MEMORY.md"
|
||||
tier1_exists = memory_md.exists()
|
||||
tier1_content = ""
|
||||
if tier1_exists:
|
||||
tier1_content = memory_md.read_text()[:500] # First 500 chars
|
||||
tier1_content = memory_md.read_text()[:500]
|
||||
|
||||
# Check tier 2: Vault
|
||||
vault_path = repo_root / "memory" / "self"
|
||||
tier2_exists = vault_path.exists()
|
||||
tier2_files = []
|
||||
if tier2_exists:
|
||||
tier2_files = [f.name for f in vault_path.iterdir() if f.is_file()]
|
||||
|
||||
tier1_info: dict[str, Any] = {
|
||||
info: dict[str, Any] = {
|
||||
"exists": tier1_exists,
|
||||
"path": str(memory_md),
|
||||
"preview": " ".join(tier1_content[:200].split()) if tier1_content else None,
|
||||
}
|
||||
if tier1_exists:
|
||||
lines = memory_md.read_text().splitlines()
|
||||
tier1_info["line_count"] = len(lines)
|
||||
tier1_info["sections"] = [ln.lstrip("# ").strip() for ln in lines if ln.startswith("## ")]
|
||||
info["line_count"] = len(lines)
|
||||
info["sections"] = [ln.lstrip("# ").strip() for ln in lines if ln.startswith("## ")]
|
||||
return info
|
||||
|
||||
|
||||
def _vault_info(repo_root: Path) -> dict[str, Any]:
|
||||
"""Tier 2: Vault (memory/ directory tree) status."""
|
||||
vault_path = repo_root / "memory" / "self"
|
||||
tier2_exists = vault_path.exists()
|
||||
tier2_files = [f.name for f in vault_path.iterdir() if f.is_file()] if tier2_exists else []
|
||||
|
||||
# Vault — scan all subdirs under memory/
|
||||
vault_root = repo_root / "memory"
|
||||
vault_info: dict[str, Any] = {
|
||||
info: dict[str, Any] = {
|
||||
"exists": tier2_exists,
|
||||
"path": str(vault_path),
|
||||
"file_count": len(tier2_files),
|
||||
"files": tier2_files[:10],
|
||||
}
|
||||
if vault_root.exists():
|
||||
vault_info["directories"] = [d.name for d in vault_root.iterdir() if d.is_dir()]
|
||||
vault_info["total_markdown_files"] = sum(1 for _ in vault_root.rglob("*.md"))
|
||||
info["directories"] = [d.name for d in vault_root.iterdir() if d.is_dir()]
|
||||
info["total_markdown_files"] = sum(1 for _ in vault_root.rglob("*.md"))
|
||||
return info
|
||||
|
||||
# Tier 3: Semantic memory row count
|
||||
tier3_info: dict[str, Any] = {"available": False}
|
||||
|
||||
def _semantic_memory_info(repo_root: Path) -> dict[str, Any]:
|
||||
"""Tier 3: Semantic memory (vector DB) status."""
|
||||
info: dict[str, Any] = {"available": False}
|
||||
try:
|
||||
sem_db = repo_root / "data" / "memory.db"
|
||||
if sem_db.exists():
|
||||
@@ -184,14 +178,16 @@ def get_memory_status() -> dict[str, Any]:
|
||||
).fetchone()
|
||||
if row and row[0]:
|
||||
count = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()
|
||||
tier3_info["available"] = True
|
||||
tier3_info["vector_count"] = count[0] if count else 0
|
||||
info["available"] = True
|
||||
info["vector_count"] = count[0] if count else 0
|
||||
except Exception as exc:
|
||||
logger.debug("Memory status query failed: %s", exc)
|
||||
pass
|
||||
return info
|
||||
|
||||
# Self-coding journal stats
|
||||
journal_info: dict[str, Any] = {"available": False}
|
||||
|
||||
def _journal_info(repo_root: Path) -> dict[str, Any]:
|
||||
"""Self-coding journal statistics."""
|
||||
info: dict[str, Any] = {"available": False}
|
||||
try:
|
||||
journal_db = repo_root / "data" / "self_coding.db"
|
||||
if journal_db.exists():
|
||||
@@ -203,7 +199,7 @@ def get_memory_status() -> dict[str, Any]:
|
||||
if rows:
|
||||
counts = {r["outcome"]: r["cnt"] for r in rows}
|
||||
total = sum(counts.values())
|
||||
journal_info = {
|
||||
info = {
|
||||
"available": True,
|
||||
"total_attempts": total,
|
||||
"successes": counts.get("success", 0),
|
||||
@@ -212,13 +208,24 @@ def get_memory_status() -> dict[str, Any]:
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.debug("Journal stats query failed: %s", exc)
|
||||
pass
|
||||
return info
|
||||
|
||||
|
||||
def get_memory_status() -> dict[str, Any]:
|
||||
"""Get the status of Timmy's memory system.
|
||||
|
||||
Returns:
|
||||
Dict with memory tier information
|
||||
"""
|
||||
from config import settings
|
||||
|
||||
repo_root = Path(settings.repo_root)
|
||||
|
||||
return {
|
||||
"tier1_hot_memory": tier1_info,
|
||||
"tier2_vault": vault_info,
|
||||
"tier3_semantic": tier3_info,
|
||||
"self_coding_journal": journal_info,
|
||||
"tier1_hot_memory": _hot_memory_info(repo_root),
|
||||
"tier2_vault": _vault_info(repo_root),
|
||||
"tier3_semantic": _semantic_memory_info(repo_root),
|
||||
"self_coding_journal": _journal_info(repo_root),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -5,9 +5,14 @@ from datetime import UTC, datetime, timedelta
|
||||
from unittest.mock import patch
|
||||
|
||||
from infrastructure.error_capture import (
|
||||
_create_bug_report,
|
||||
_dedup_cache,
|
||||
_extract_traceback_info,
|
||||
_get_git_context,
|
||||
_is_duplicate,
|
||||
_log_error_event,
|
||||
_notify_bug_report,
|
||||
_record_to_session,
|
||||
_stack_hash,
|
||||
capture_error,
|
||||
)
|
||||
@@ -193,3 +198,91 @@ class TestCaptureError:
|
||||
|
||||
def teardown_method(self):
|
||||
_dedup_cache.clear()
|
||||
|
||||
|
||||
class TestExtractTracebackInfo:
|
||||
"""Test _extract_traceback_info helper."""
|
||||
|
||||
def test_returns_three_tuple(self):
|
||||
try:
|
||||
raise ValueError("extract test")
|
||||
except ValueError as e:
|
||||
tb_str, affected_file, affected_line = _extract_traceback_info(e)
|
||||
assert "ValueError" in tb_str
|
||||
assert "extract test" in tb_str
|
||||
assert affected_file.endswith(".py")
|
||||
assert affected_line > 0
|
||||
|
||||
def test_file_points_to_raise_site(self):
|
||||
try:
|
||||
_make_exception()
|
||||
except ValueError as e:
|
||||
_, affected_file, _ = _extract_traceback_info(e)
|
||||
assert "test_error_capture" in affected_file
|
||||
|
||||
|
||||
class TestLogErrorEvent:
|
||||
"""Test _log_error_event helper."""
|
||||
|
||||
def test_does_not_crash_on_missing_deps(self):
|
||||
try:
|
||||
raise RuntimeError("log test")
|
||||
except RuntimeError as e:
|
||||
_log_error_event(e, "test", "abc123", "file.py", 42, {"branch": "main"})
|
||||
|
||||
|
||||
class TestCreateBugReport:
|
||||
"""Test _create_bug_report helper."""
|
||||
|
||||
def test_does_not_crash_on_missing_deps(self):
|
||||
try:
|
||||
raise RuntimeError("report test")
|
||||
except RuntimeError as e:
|
||||
result = _create_bug_report(
|
||||
e, "test", None, "abc123", "traceback...", "file.py", 42, {}
|
||||
)
|
||||
# May return None if swarm deps unavailable — that's fine
|
||||
assert result is None or isinstance(result, str)
|
||||
|
||||
def test_with_context(self):
|
||||
try:
|
||||
raise RuntimeError("ctx test")
|
||||
except RuntimeError as e:
|
||||
result = _create_bug_report(e, "test", {"path": "/api"}, "abc", "tb", "f.py", 1, {})
|
||||
assert result is None or isinstance(result, str)
|
||||
|
||||
|
||||
class TestNotifyBugReport:
|
||||
"""Test _notify_bug_report helper."""
|
||||
|
||||
def test_does_not_crash(self):
|
||||
try:
|
||||
raise RuntimeError("notify test")
|
||||
except RuntimeError as e:
|
||||
_notify_bug_report(e, "test")
|
||||
|
||||
|
||||
class TestRecordToSession:
|
||||
"""Test _record_to_session helper."""
|
||||
|
||||
def test_does_not_crash_without_recorder(self):
|
||||
try:
|
||||
raise RuntimeError("session test")
|
||||
except RuntimeError as e:
|
||||
_record_to_session(e, "test")
|
||||
|
||||
def test_calls_registered_recorder(self):
|
||||
from infrastructure.error_capture import register_error_recorder
|
||||
|
||||
calls = []
|
||||
register_error_recorder(lambda **kwargs: calls.append(kwargs))
|
||||
try:
|
||||
try:
|
||||
raise RuntimeError("callback test")
|
||||
except RuntimeError as e:
|
||||
_record_to_session(e, "test_source")
|
||||
assert len(calls) == 1
|
||||
assert "RuntimeError" in calls[0]["error"]
|
||||
assert calls[0]["context"] == "test_source"
|
||||
finally:
|
||||
register_error_recorder(None)
|
||||
|
||||
@@ -444,6 +444,150 @@ def test_get_effective_ollama_model_walks_fallback_chain():
|
||||
assert result == "fb-2"
|
||||
|
||||
|
||||
# ── _build_tools_list ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_build_tools_list_empty_when_tools_disabled():
|
||||
"""Small models get an empty tools list."""
|
||||
from timmy.agent import _build_tools_list
|
||||
|
||||
result = _build_tools_list(use_tools=False, skip_mcp=False, model_name="llama3.2")
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_build_tools_list_includes_toolkit_when_enabled():
|
||||
"""Tool-capable models get the full toolkit."""
|
||||
mock_toolkit = MagicMock()
|
||||
with patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit):
|
||||
from timmy.agent import _build_tools_list
|
||||
|
||||
result = _build_tools_list(use_tools=True, skip_mcp=True, model_name="llama3.1")
|
||||
assert mock_toolkit in result
|
||||
|
||||
|
||||
def test_build_tools_list_skips_mcp_when_flagged():
|
||||
"""skip_mcp=True must not call MCP factories."""
|
||||
mock_toolkit = MagicMock()
|
||||
with (
|
||||
patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit),
|
||||
patch("timmy.mcp_tools.create_gitea_mcp_tools") as mock_gitea,
|
||||
patch("timmy.mcp_tools.create_filesystem_mcp_tools") as mock_fs,
|
||||
):
|
||||
from timmy.agent import _build_tools_list
|
||||
|
||||
_build_tools_list(use_tools=True, skip_mcp=True, model_name="llama3.1")
|
||||
mock_gitea.assert_not_called()
|
||||
mock_fs.assert_not_called()
|
||||
|
||||
|
||||
def test_build_tools_list_includes_mcp_when_not_skipped():
|
||||
"""skip_mcp=False should attempt MCP tool creation."""
|
||||
mock_toolkit = MagicMock()
|
||||
with (
|
||||
patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit),
|
||||
patch("timmy.mcp_tools.create_gitea_mcp_tools", return_value=None) as mock_gitea,
|
||||
patch("timmy.mcp_tools.create_filesystem_mcp_tools", return_value=None) as mock_fs,
|
||||
):
|
||||
from timmy.agent import _build_tools_list
|
||||
|
||||
_build_tools_list(use_tools=True, skip_mcp=False, model_name="llama3.1")
|
||||
mock_gitea.assert_called_once()
|
||||
mock_fs.assert_called_once()
|
||||
|
||||
|
||||
# ── _build_prompt ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_build_prompt_includes_base_prompt():
|
||||
"""Prompt should always contain the base system prompt."""
|
||||
from timmy.agent import _build_prompt
|
||||
|
||||
result = _build_prompt(use_tools=False, session_id="test")
|
||||
assert "Timmy" in result
|
||||
|
||||
|
||||
def test_build_prompt_appends_memory_context():
|
||||
"""Memory context should be appended when available."""
|
||||
mock_memory = MagicMock()
|
||||
mock_memory.get_system_context.return_value = "User prefers dark mode."
|
||||
with patch("timmy.memory_system.memory_system", mock_memory):
|
||||
from timmy.agent import _build_prompt
|
||||
|
||||
result = _build_prompt(use_tools=True, session_id="test")
|
||||
assert "GROUNDED CONTEXT" in result
|
||||
assert "dark mode" in result
|
||||
|
||||
|
||||
def test_build_prompt_truncates_long_memory():
|
||||
"""Long memory context should be truncated."""
|
||||
mock_memory = MagicMock()
|
||||
mock_memory.get_system_context.return_value = "x" * 10000
|
||||
with patch("timmy.memory_system.memory_system", mock_memory):
|
||||
from timmy.agent import _build_prompt
|
||||
|
||||
result = _build_prompt(use_tools=False, session_id="test")
|
||||
assert "[truncated]" in result
|
||||
|
||||
|
||||
def test_build_prompt_survives_memory_failure():
|
||||
"""Prompt should fall back to base when memory fails."""
|
||||
mock_memory = MagicMock()
|
||||
mock_memory.get_system_context.side_effect = RuntimeError("db locked")
|
||||
with patch("timmy.memory_system.memory_system", mock_memory):
|
||||
from timmy.agent import _build_prompt
|
||||
|
||||
result = _build_prompt(use_tools=True, session_id="test")
|
||||
assert "Timmy" in result
|
||||
# Memory context should NOT be appended (the db locked error was caught)
|
||||
assert "db locked" not in result
|
||||
|
||||
|
||||
# ── _create_ollama_agent ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_create_ollama_agent_passes_correct_kwargs():
|
||||
"""_create_ollama_agent must pass the expected kwargs to Agent."""
|
||||
with (
|
||||
patch("timmy.agent.Agent") as MockAgent,
|
||||
patch("timmy.agent.Ollama"),
|
||||
patch("timmy.agent.SqliteDb"),
|
||||
patch("timmy.agent._warmup_model", return_value=True),
|
||||
):
|
||||
from timmy.agent import _create_ollama_agent
|
||||
|
||||
_create_ollama_agent(
|
||||
db_file="test.db",
|
||||
model_name="llama3.1",
|
||||
tools_list=[MagicMock()],
|
||||
full_prompt="test prompt",
|
||||
use_tools=True,
|
||||
)
|
||||
kwargs = MockAgent.call_args.kwargs
|
||||
assert kwargs["description"] == "test prompt"
|
||||
assert kwargs["markdown"] is False
|
||||
|
||||
|
||||
def test_create_ollama_agent_none_tools_when_empty():
|
||||
"""Empty tools_list should pass tools=None to Agent."""
|
||||
with (
|
||||
patch("timmy.agent.Agent") as MockAgent,
|
||||
patch("timmy.agent.Ollama"),
|
||||
patch("timmy.agent.SqliteDb"),
|
||||
patch("timmy.agent._warmup_model", return_value=True),
|
||||
):
|
||||
from timmy.agent import _create_ollama_agent
|
||||
|
||||
_create_ollama_agent(
|
||||
db_file="test.db",
|
||||
model_name="llama3.2",
|
||||
tools_list=[],
|
||||
full_prompt="test prompt",
|
||||
use_tools=False,
|
||||
)
|
||||
kwargs = MockAgent.call_args.kwargs
|
||||
assert kwargs["tools"] is None
|
||||
|
||||
|
||||
def test_no_hardcoded_fallback_constants_in_agent():
|
||||
"""agent.py must not define module-level DEFAULT_MODEL_FALLBACKS."""
|
||||
import timmy.agent as agent_mod
|
||||
|
||||
Reference in New Issue
Block a user