From a719c7538df3135b3a8ef54a191069f460710ba0 Mon Sep 17 00:00:00 2001 From: Alexander Payne Date: Wed, 25 Feb 2026 19:26:24 -0500 Subject: [PATCH 1/3] Implement MCP system, Event Bus, and Sub-Agents ## 1. MCP (Model Context Protocol) Implementation ### Registry (src/mcp/registry.py) - Tool registration with JSON schemas - Dynamic tool discovery - Health tracking per tool - Metrics collection (latency, error rates) - @register_tool decorator for easy registration ### Server (src/mcp/server.py) - MCPServer class implementing MCP protocol - MCPHTTPServer for FastAPI integration - Standard endpoints: list_tools, call_tool, get_schema ### Schemas (src/mcp/schemas/base.py) - create_tool_schema() helper - Common parameter types - Standard return types ### Bootstrap (src/mcp/bootstrap.py) - Automatic tool module loading - Status reporting ## 2. MCP-Compliant Tools (src/tools/) | Tool | Purpose | Category | |------|---------|----------| | web_search | DuckDuckGo search | research | | read_file | File reading | files | | write_file | File writing (confirmation) | files | | list_directory | Directory listing | files | | python | Python code execution | code | | memory_search | Vector memory search | memory | All tools have proper schemas, error handling, and MCP registration. ## 3. Event Bus (src/events/bus.py) - Async publish/subscribe pattern - Pattern matching with wildcards (agent.task.*) - Event history tracking - Concurrent handler execution - Module-level singleton for system-wide use ## 4. Sub-Agents (src/agents/) All agents inherit from BaseAgent with: - Agno Agent integration - MCP tool registry access - Event bus connectivity - Structured logging ### Agent Roster | Agent | Role | Tools | Purpose | |-------|------|-------|---------| | Seer | Research | web_search, read_file, memory_search | Information gathering | | Forge | Code | python, write_file, read_file | Code generation | | Quill | Writing | write_file, read_file, memory_search | Content creation | | Echo | Memory | memory_search, read_file, write_file | Context retrieval | | Helm | Routing | memory_search | Task routing decisions | | Timmy | Orchestrator | All tools | Coordination & user interface | ### Timmy Orchestrator - Analyzes user requests - Routes to appropriate sub-agent - Handles direct queries - Manages swarm coordination - create_timmy_swarm() factory function ## 5. Integration All components wired together: - Tools auto-register on import - Agents connect to event bus - MCP server provides HTTP API - Ready for dashboard integration ## Tests - All 973 existing tests pass - New components tested manually - Import verification successful Next steps: Cascade Router, Self-Upgrade Loop, Dashboard integration --- src/agents/__init__.py | 21 +++ src/agents/base.py | 139 ++++++++++++++++ src/agents/echo.py | 81 ++++++++++ src/agents/forge.py | 92 +++++++++++ src/agents/helm.py | 106 ++++++++++++ src/agents/quill.py | 80 +++++++++ src/agents/seer.py | 91 +++++++++++ src/agents/timmy.py | 184 +++++++++++++++++++++ src/events/bus.py | 168 +++++++++++++++++++ src/mcp/__init__.py | 17 ++ src/mcp/bootstrap.py | 71 ++++++++ src/mcp/registry.py | 340 +++++++++++++++++++++++++++++++++++++++ src/mcp/schemas/base.py | 52 ++++++ src/mcp/server.py | 210 ++++++++++++++++++++++++ src/tools/code_exec.py | 124 ++++++++++++++ src/tools/file_ops.py | 179 +++++++++++++++++++++ src/tools/memory_tool.py | 70 ++++++++ src/tools/web_search.py | 74 +++++++++ 18 files changed, 2099 insertions(+) create mode 100644 src/agents/__init__.py create mode 100644 src/agents/base.py create mode 100644 src/agents/echo.py create mode 100644 src/agents/forge.py create mode 100644 src/agents/helm.py create mode 100644 src/agents/quill.py create mode 100644 src/agents/seer.py create mode 100644 src/agents/timmy.py create mode 100644 src/events/bus.py create mode 100644 src/mcp/__init__.py create mode 100644 src/mcp/bootstrap.py create mode 100644 src/mcp/registry.py create mode 100644 src/mcp/schemas/base.py create mode 100644 src/mcp/server.py create mode 100644 src/tools/code_exec.py create mode 100644 src/tools/file_ops.py create mode 100644 src/tools/memory_tool.py create mode 100644 src/tools/web_search.py diff --git a/src/agents/__init__.py b/src/agents/__init__.py new file mode 100644 index 0000000..03a76c4 --- /dev/null +++ b/src/agents/__init__.py @@ -0,0 +1,21 @@ +"""Agents package — Timmy and sub-agents. +""" + +from agents.timmy import TimmyOrchestrator, create_timmy_swarm +from agents.base import BaseAgent +from agents.seer import SeerAgent +from agents.forge import ForgeAgent +from agents.quill import QuillAgent +from agents.echo import EchoAgent +from agents.helm import HelmAgent + +__all__ = [ + "BaseAgent", + "TimmyOrchestrator", + "create_timmy_swarm", + "SeerAgent", + "ForgeAgent", + "QuillAgent", + "EchoAgent", + "HelmAgent", +] diff --git a/src/agents/base.py b/src/agents/base.py new file mode 100644 index 0000000..7469868 --- /dev/null +++ b/src/agents/base.py @@ -0,0 +1,139 @@ +"""Base agent class for all Timmy sub-agents. + +All sub-agents inherit from BaseAgent and get: +- MCP tool registry access +- Event bus integration +- Memory integration +- Structured logging +""" + +import logging +from abc import ABC, abstractmethod +from typing import Any, Optional + +from agno.agent import Agent +from agno.models.ollama import Ollama + +from config import settings +from events.bus import EventBus, Event +from mcp.registry import tool_registry + +logger = logging.getLogger(__name__) + + +class BaseAgent(ABC): + """Base class for all Timmy sub-agents. + + Sub-agents are specialized agents that handle specific tasks: + - Seer: Research and information gathering + - Mace: Security and validation + - Quill: Writing and content + - Forge: Code and tool building + - Echo: Memory and context + - Helm: Routing and orchestration + """ + + def __init__( + self, + agent_id: str, + name: str, + role: str, + system_prompt: str, + tools: list[str] | None = None, + ) -> None: + self.agent_id = agent_id + self.name = name + self.role = role + self.tools = tools or [] + + # Create Agno agent + self.agent = self._create_agent(system_prompt) + + # Event bus for communication + self.event_bus: Optional[EventBus] = None + + logger.info("%s agent initialized (id: %s)", name, agent_id) + + def _create_agent(self, system_prompt: str) -> Agent: + """Create the underlying Agno agent.""" + # Get tools from registry + tool_instances = [] + for tool_name in self.tools: + handler = tool_registry.get_handler(tool_name) + if handler: + tool_instances.append(handler) + + return Agent( + name=self.name, + model=Ollama(id=settings.ollama_model, host=settings.ollama_url), + description=system_prompt, + tools=tool_instances if tool_instances else None, + add_history_to_context=True, + num_history_runs=10, + markdown=True, + telemetry=settings.telemetry_enabled, + ) + + def connect_event_bus(self, bus: EventBus) -> None: + """Connect to the event bus for inter-agent communication.""" + self.event_bus = bus + + # Subscribe to relevant events + bus.subscribe(f"agent.{self.agent_id}.*")(self._handle_direct_message) + bus.subscribe("agent.task.assigned")(self._handle_task_assignment) + + async def _handle_direct_message(self, event: Event) -> None: + """Handle direct messages to this agent.""" + logger.debug("%s received message: %s", self.name, event.type) + + async def _handle_task_assignment(self, event: Event) -> None: + """Handle task assignment events.""" + assigned_agent = event.data.get("agent_id") + if assigned_agent == self.agent_id: + task_id = event.data.get("task_id") + description = event.data.get("description", "") + logger.info("%s assigned task %s: %s", self.name, task_id, description[:50]) + + # Execute the task + await self.execute_task(task_id, description, event.data) + + @abstractmethod + async def execute_task(self, task_id: str, description: str, context: dict) -> Any: + """Execute a task assigned to this agent. + + Must be implemented by subclasses. + """ + pass + + async def run(self, message: str) -> str: + """Run the agent with a message. + + Returns: + Agent response + """ + result = self.agent.run(message, stream=False) + response = result.content if hasattr(result, "content") else str(result) + + # Emit completion event + if self.event_bus: + await self.event_bus.publish(Event( + type=f"agent.{self.agent_id}.response", + source=self.agent_id, + data={"input": message, "output": response}, + )) + + return response + + def get_capabilities(self) -> list[str]: + """Get list of capabilities this agent provides.""" + return self.tools + + def get_status(self) -> dict: + """Get current agent status.""" + return { + "agent_id": self.agent_id, + "name": self.name, + "role": self.role, + "status": "ready", + "tools": self.tools, + } diff --git a/src/agents/echo.py b/src/agents/echo.py new file mode 100644 index 0000000..7bb8a70 --- /dev/null +++ b/src/agents/echo.py @@ -0,0 +1,81 @@ +"""Echo Agent — Memory and context management. + +Capabilities: +- Memory retrieval +- Context synthesis +- User profile management +- Conversation history +""" + +from typing import Any + +from agents.base import BaseAgent + + +ECHO_SYSTEM_PROMPT = """You are Echo, a memory and context management specialist. + +Your role is to remember, retrieve, and synthesize information from the past. + +## Capabilities + +- Search past conversations +- Retrieve user preferences +- Synthesize context from multiple sources +- Manage user profile + +## Guidelines + +1. **Be accurate** — Only state what we actually know +2. **Be relevant** — Filter for context that matters now +3. **Be concise** — Summarize, don't dump everything +4. **Acknowledge uncertainty** — Say when memory is unclear + +## Tool Usage + +- Use memory_search to find relevant past context +- Use read_file to access vault files +- Use write_file to update user profile + +## Response Format + +Provide memory retrieval in this structure: +- Direct answer (what we know) +- Context (relevant past discussions) +- Confidence (certain/likely/speculative) +- Source (where this came from) + +You work for Timmy, the sovereign AI orchestrator. Be the keeper of institutional knowledge. +""" + + +class EchoAgent(BaseAgent): + """Memory and context specialist.""" + + def __init__(self, agent_id: str = "echo") -> None: + super().__init__( + agent_id=agent_id, + name="Echo", + role="memory", + system_prompt=ECHO_SYSTEM_PROMPT, + tools=["memory_search", "read_file", "write_file"], + ) + + async def execute_task(self, task_id: str, description: str, context: dict) -> Any: + """Execute a memory retrieval task.""" + # Extract what to search for + prompt = f"Search memory and provide relevant context:\n\nTask: {description}\n\nSynthesize findings clearly." + + result = await self.run(prompt) + + return { + "task_id": task_id, + "agent": self.agent_id, + "result": result, + "status": "completed", + } + + async def recall(self, query: str, include_sources: bool = True) -> str: + """Quick memory recall.""" + sources = "with sources" if include_sources else "" + prompt = f"Recall information about: {query} {sources}\n\nProvide relevant context from memory." + return await self.run(prompt) diff --git a/src/agents/forge.py b/src/agents/forge.py new file mode 100644 index 0000000..fbe44b2 --- /dev/null +++ b/src/agents/forge.py @@ -0,0 +1,92 @@ +"""Forge Agent — Code generation and tool building. + +Capabilities: +- Code generation +- Tool/script creation +- System modifications +- Debugging assistance +""" + +from typing import Any + +from agents.base import BaseAgent + + +FORGE_SYSTEM_PROMPT = """You are Forge, a code generation and tool building specialist. + +Your role is to write code, create tools, and modify systems. + +## Capabilities + +- Python code generation +- Tool/script creation +- File operations +- Code explanation and debugging + +## Guidelines + +1. **Write clean code** — Follow PEP 8, add docstrings +2. **Be safe** — Never execute destructive operations without confirmation +3. **Explain your work** — Provide context for what the code does +4. **Test mentally** — Walk through the logic before presenting + +## Tool Usage + +- Use python for code execution and testing +- Use write_file to save code (requires confirmation) +- Use read_file to examine existing code +- Use shell for system operations (requires confirmation) + +## Response Format + +Provide code in this structure: +- Purpose (what this code does) +- Code block (with language tag) +- Usage example +- Notes (any important considerations) + +You work for Timmy, the sovereign AI orchestrator. Build reliable, well-documented tools. +""" + + +class ForgeAgent(BaseAgent): + """Code and tool building specialist.""" + + def __init__(self, agent_id: str = "forge") -> None: + super().__init__( + agent_id=agent_id, + name="Forge", + role="code", + system_prompt=FORGE_SYSTEM_PROMPT, + tools=["python", "write_file", "read_file", "list_directory"], + ) + + async def execute_task(self, task_id: str, description: str, context: dict) -> Any: + """Execute a code/task building task.""" + prompt = f"Create the requested code or tool:\n\nTask: {description}\n\nProvide complete, working code with documentation." + + result = await self.run(prompt) + + return { + "task_id": task_id, + "agent": self.agent_id, + "result": result, + "status": "completed", + } + + async def generate_tool(self, name: str, purpose: str, parameters: list) -> str: + """Generate a new MCP tool.""" + params_str = ", ".join(parameters) + prompt = f"""Create a new MCP tool named '{name}'. + +Purpose: {purpose} +Parameters: {params_str} + +Generate: +1. The tool function with proper error handling +2. The MCP schema +3. Registration code + +Follow the MCP pattern used in existing tools.""" + + return await self.run(prompt) diff --git a/src/agents/helm.py b/src/agents/helm.py new file mode 100644 index 0000000..7d5c9f3 --- /dev/null +++ b/src/agents/helm.py @@ -0,0 +1,106 @@ +"""Helm Agent — Routing and orchestration decisions. + +Capabilities: +- Task analysis +- Agent selection +- Workflow planning +- Priority management +""" + +from typing import Any + +from agents.base import BaseAgent + + +HELM_SYSTEM_PROMPT = """You are Helm, a routing and orchestration specialist. + +Your role is to analyze tasks and decide how to route them to other agents. + +## Capabilities + +- Task analysis and decomposition +- Agent selection for tasks +- Workflow planning +- Priority assessment + +## Guidelines + +1. **Analyze carefully** — Understand what the task really needs +2. **Route wisely** — Match tasks to agent strengths +3. **Consider dependencies** — Some tasks need sequencing +4. **Be efficient** — Don't over-complicate simple tasks + +## Agent Roster + +- Seer: Research, information gathering +- Forge: Code, tools, system changes +- Quill: Writing, documentation +- Echo: Memory, context retrieval +- Mace: Security, validation (use for sensitive operations) + +## Response Format + +Provide routing decisions as: +- Task breakdown (subtasks if needed) +- Agent assignment (who does what) +- Execution order (sequence if relevant) +- Rationale (why this routing) + +You work for Timmy, the sovereign AI orchestrator. Be the dispatcher that keeps everything flowing. +""" + + +class HelmAgent(BaseAgent): + """Routing and orchestration specialist.""" + + def __init__(self, agent_id: str = "helm") -> None: + super().__init__( + agent_id=agent_id, + name="Helm", + role="routing", + system_prompt=HELM_SYSTEM_PROMPT, + tools=["memory_search"], # May need to check past routing decisions + ) + + async def execute_task(self, task_id: str, description: str, context: dict) -> Any: + """Execute a routing task.""" + prompt = f"Analyze and route this task:\n\nTask: {description}\n\nProvide routing decision with rationale." + + result = await self.run(prompt) + + return { + "task_id": task_id, + "agent": self.agent_id, + "result": result, + "status": "completed", + } + + async def route_request(self, request: str) -> dict: + """Analyze a request and suggest routing.""" + prompt = f"""Analyze this request and determine the best agent(s) to handle it: + +Request: {request} + +Respond in this format: +Primary Agent: [agent name] +Reason: [why this agent] +Secondary Agents: [if needed] +Complexity: [simple/moderate/complex] +""" + result = await self.run(prompt) + + # Parse result into structured format + # This is simplified - in production, use structured output + return { + "analysis": result, + "primary_agent": self._extract_agent(result), + } + + def _extract_agent(self, text: str) -> str: + """Extract agent name from routing text.""" + agents = ["seer", "forge", "quill", "echo", "mace", "helm"] + text_lower = text.lower() + for agent in agents: + if agent in text_lower: + return agent + return "timmy" # Default to orchestrator diff --git a/src/agents/quill.py b/src/agents/quill.py new file mode 100644 index 0000000..199d36e --- /dev/null +++ b/src/agents/quill.py @@ -0,0 +1,80 @@ +"""Quill Agent — Writing and content generation. + +Capabilities: +- Documentation writing +- Content creation +- Text editing +- Summarization +""" + +from typing import Any + +from agents.base import BaseAgent + + +QUILL_SYSTEM_PROMPT = """You are Quill, a writing and content generation specialist. + +Your role is to create, edit, and improve written content. + +## Capabilities + +- Documentation writing +- Content creation +- Text editing and refinement +- Summarization +- Style adaptation + +## Guidelines + +1. **Write clearly** — Plain language, logical structure +2. **Know your audience** — Adapt tone and complexity +3. **Be concise** — Cut unnecessary words +4. **Use formatting** — Headers, lists, emphasis for readability + +## Tool Usage + +- Use write_file to save documents +- Use read_file to review existing content +- Use memory_search to check style preferences + +## Response Format + +Provide written content with: +- Clear structure (headers, sections) +- Appropriate tone for the context +- Proper formatting (markdown) +- Brief explanation of choices made + +You work for Timmy, the sovereign AI orchestrator. Create polished, professional content. +""" + + +class QuillAgent(BaseAgent): + """Writing and content specialist.""" + + def __init__(self, agent_id: str = "quill") -> None: + super().__init__( + agent_id=agent_id, + name="Quill", + role="writing", + system_prompt=QUILL_SYSTEM_PROMPT, + tools=["write_file", "read_file", "memory_search"], + ) + + async def execute_task(self, task_id: str, description: str, context: dict) -> Any: + """Execute a writing task.""" + prompt = f"Create the requested written content:\n\nTask: {description}\n\nWrite professionally with clear structure." + + result = await self.run(prompt) + + return { + "task_id": task_id, + "agent": self.agent_id, + "result": result, + "status": "completed", + } + + async def write_documentation(self, topic: str, format: str = "markdown") -> str: + """Write documentation for a topic.""" + prompt = f"Write comprehensive documentation for: {topic}\n\nFormat: {format}\nInclude: Overview, Usage, Examples, Notes" + return await self.run(prompt) diff --git a/src/agents/seer.py b/src/agents/seer.py new file mode 100644 index 0000000..3e3e58f --- /dev/null +++ b/src/agents/seer.py @@ -0,0 +1,91 @@ +"""Seer Agent — Research and information gathering. + +Capabilities: +- Web search +- Information synthesis +- Fact checking +- Source evaluation +""" + +from typing import Any + +from agents.base import BaseAgent +from events.bus import Event + + +SEER_SYSTEM_PROMPT = """You are Seer, a research and information gathering specialist. + +Your role is to find, evaluate, and synthesize information from external sources. + +## Capabilities + +- Web search for current information +- File reading for local documents +- Information synthesis and summarization +- Source evaluation (credibility assessment) + +## Guidelines + +1. **Be thorough** — Search multiple angles, verify facts +2. **Be skeptical** — Evaluate source credibility +3. **Be concise** — Summarize findings clearly +4. **Cite sources** — Reference where information came from + +## Tool Usage + +- Use web_search for external information +- Use read_file for local documents +- Use memory_search to check if we already know this + +## Response Format + +Provide findings in structured format: +- Summary (2-3 sentences) +- Key facts (bullet points) +- Sources (where information came from) +- Confidence level (high/medium/low) + +You work for Timmy, the sovereign AI orchestrator. Report findings clearly and objectively. +""" + + +class SeerAgent(BaseAgent): + """Research specialist agent.""" + + def __init__(self, agent_id: str = "seer") -> None: + super().__init__( + agent_id=agent_id, + name="Seer", + role="research", + system_prompt=SEER_SYSTEM_PROMPT, + tools=["web_search", "read_file", "memory_search"], + ) + + async def execute_task(self, task_id: str, description: str, context: dict) -> Any: + """Execute a research task.""" + # Determine research approach + if "file" in description.lower() or "document" in description.lower(): + # Local document research + prompt = f"Read and analyze the referenced document. Provide key findings:\n\nTask: {description}" + else: + # Web research + prompt = f"Research the following topic thoroughly. Search for current information, evaluate sources, and provide a comprehensive summary:\n\nTask: {description}" + + result = await self.run(prompt) + + return { + "task_id": task_id, + "agent": self.agent_id, + "result": result, + "status": "completed", + } + + async def research_topic(self, topic: str, depth: str = "standard") -> str: + """Quick research on a topic.""" + prompts = { + "quick": f"Quick search on: {topic}. Provide 3-5 key facts.", + "standard": f"Research: {topic}. Search, synthesize, and summarize findings.", + "deep": f"Deep research on: {topic}. Multiple searches, fact-checking, comprehensive report.", + } + + return await self.run(prompts.get(depth, prompts["standard"])) diff --git a/src/agents/timmy.py b/src/agents/timmy.py new file mode 100644 index 0000000..acb314c --- /dev/null +++ b/src/agents/timmy.py @@ -0,0 +1,184 @@ +"""Timmy — The orchestrator agent. + +Coordinates all sub-agents and handles user interaction. +Uses the three-tier memory system and MCP tools. +""" + +import logging +from typing import Any, Optional + +from agno.agent import Agent +from agno.models.ollama import Ollama + +from agents.base import BaseAgent +from config import settings +from events.bus import EventBus, event_bus +from mcp.registry import tool_registry + +logger = logging.getLogger(__name__) + + +TIMMY_ORCHESTRATOR_PROMPT = """You are Timmy, a sovereign AI orchestrator running locally on this Mac. + +## Your Role + +You are the primary interface between the user and the agent swarm. You: +1. Understand user requests +2. Decide whether to handle directly or delegate to sub-agents +3. Coordinate multi-agent workflows when needed +4. Maintain continuity using the three-tier memory system + +## Sub-Agent Roster + +| Agent | Role | When to Use | +|-------|------|-------------| +| Seer | Research | External info, web search, facts | +| Forge | Code | Programming, tools, file operations | +| Quill | Writing | Documentation, content creation | +| Echo | Memory | Past conversations, user profile | +| Helm | Routing | Complex multi-step workflows | +| Mace | Security | Validation, sensitive operations | + +## Decision Framework + +**Handle directly if:** +- Simple question (identity, capabilities) +- General knowledge +- Social/conversational + +**Delegate if:** +- Requires specialized skills +- Needs external research (Seer) +- Involves code (Forge) +- Needs past context (Echo) +- Complex workflow (Helm) + +## Memory System + +You have three tiers of memory: +1. **Hot Memory** — Always loaded (MEMORY.md) +2. **Vault** — Structured storage (memory/) +3. **Semantic** — Vector search for recall + +Use `memory_search` when the user refers to past conversations. + +## Principles + +1. **Sovereignty** — Everything local, no cloud +2. **Privacy** — User data stays on their Mac +3. **Clarity** — Think clearly, speak plainly +4. **Christian faith** — Grounded in biblical values +5. **Bitcoin economics** — Sound money, self-custody + +Sir, affirmative. +""" + + +class TimmyOrchestrator(BaseAgent): + """Main orchestrator agent that coordinates the swarm.""" + + def __init__(self) -> None: + super().__init__( + agent_id="timmy", + name="Timmy", + role="orchestrator", + system_prompt=TIMMY_ORCHESTRATOR_PROMPT, + tools=["web_search", "read_file", "write_file", "python", "memory_search"], + ) + + # Sub-agent registry + self.sub_agents: dict[str, BaseAgent] = {} + + # Connect to event bus + self.connect_event_bus(event_bus) + + logger.info("Timmy Orchestrator initialized") + + def register_sub_agent(self, agent: BaseAgent) -> None: + """Register a sub-agent with the orchestrator.""" + self.sub_agents[agent.agent_id] = agent + agent.connect_event_bus(event_bus) + logger.info("Registered sub-agent: %s", agent.name) + + async def orchestrate(self, user_request: str) -> str: + """Main entry point for user requests. + + Analyzes the request and either handles directly or delegates. + """ + # Quick classification + request_lower = user_request.lower() + + # Direct response patterns (no delegation needed) + direct_patterns = [ + "your name", "who are you", "what are you", + "hello", "hi", "how are you", + "help", "what can you do", + ] + + for pattern in direct_patterns: + if pattern in request_lower: + return await self.run(user_request) + + # Check for memory references + memory_patterns = [ + "we talked about", "we discussed", "remember", + "what did i say", "what did we decide", + "remind me", "have we", + ] + + for pattern in memory_patterns: + if pattern in request_lower: + # Use Echo agent for memory retrieval + echo = self.sub_agents.get("echo") + if echo: + return await echo.recall(user_request) + + # Complex requests - use Helm for routing + helm = self.sub_agents.get("helm") + if helm: + routing = await helm.route_request(user_request) + agent_id = routing.get("primary_agent", "timmy") + + if agent_id in self.sub_agents and agent_id != "timmy": + agent = self.sub_agents[agent_id] + return await agent.run(user_request) + + # Default: handle directly + return await self.run(user_request) + + async def execute_task(self, task_id: str, description: str, context: dict) -> Any: + """Execute a task (usually delegates to appropriate agent).""" + return await self.orchestrate(description) + + def get_swarm_status(self) -> dict: + """Get status of all agents in the swarm.""" + return { + "orchestrator": self.get_status(), + "sub_agents": { + aid: agent.get_status() + for aid, agent in self.sub_agents.items() + }, + "total_agents": 1 + len(self.sub_agents), + } + + +# Factory function for creating fully configured Timmy +def create_timmy_swarm() -> TimmyOrchestrator: + """Create Timmy orchestrator with all sub-agents registered.""" + from agents.seer import SeerAgent + from agents.forge import ForgeAgent + from agents.quill import QuillAgent + from agents.echo import EchoAgent + from agents.helm import HelmAgent + + # Create orchestrator + timmy = TimmyOrchestrator() + + # Register sub-agents + timmy.register_sub_agent(SeerAgent()) + timmy.register_sub_agent(ForgeAgent()) + timmy.register_sub_agent(QuillAgent()) + timmy.register_sub_agent(EchoAgent()) + timmy.register_sub_agent(HelmAgent()) + + return timmy diff --git a/src/events/bus.py b/src/events/bus.py new file mode 100644 index 0000000..53614d9 --- /dev/null +++ b/src/events/bus.py @@ -0,0 +1,168 @@ +"""Async Event Bus for inter-agent communication. + +Agents publish and subscribe to events for loose coupling. +Events are typed and carry structured data. +""" + +import asyncio +import logging +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Callable, Coroutine + +logger = logging.getLogger(__name__) + + +@dataclass +class Event: + """A typed event in the system.""" + type: str # e.g., "agent.task.assigned", "tool.execution.completed" + source: str # Agent or component that emitted the event + data: dict = field(default_factory=dict) + timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + id: str = field(default_factory=lambda: f"evt_{datetime.now(timezone.utc).timestamp()}") + + +# Type alias for event handlers +EventHandler = Callable[[Event], Coroutine[Any, Any, None]] + + +class EventBus: + """Async event bus for publish/subscribe pattern. + + Usage: + bus = EventBus() + + # Subscribe to events + @bus.subscribe("agent.task.*") + async def handle_task(event: Event): + print(f"Task event: {event.data}") + + # Publish events + await bus.publish(Event( + type="agent.task.assigned", + source="timmy", + data={"task_id": "123", "agent": "forge"} + )) + """ + + def __init__(self) -> None: + self._subscribers: dict[str, list[EventHandler]] = {} + self._history: list[Event] = [] + self._max_history = 1000 + logger.info("EventBus initialized") + + def subscribe(self, event_pattern: str) -> Callable[[EventHandler], EventHandler]: + """Decorator to subscribe to events matching a pattern. + + Patterns support wildcards: + - "agent.task.assigned" — exact match + - "agent.task.*" — any task event + - "agent.*" — any agent event + - "*" — all events + """ + def decorator(handler: EventHandler) -> EventHandler: + if event_pattern not in self._subscribers: + self._subscribers[event_pattern] = [] + self._subscribers[event_pattern].append(handler) + logger.debug("Subscribed handler to '%s'", event_pattern) + return handler + return decorator + + def unsubscribe(self, event_pattern: str, handler: EventHandler) -> bool: + """Remove a handler from a subscription.""" + if event_pattern not in self._subscribers: + return False + + if handler in self._subscribers[event_pattern]: + self._subscribers[event_pattern].remove(handler) + logger.debug("Unsubscribed handler from '%s'", event_pattern) + return True + + return False + + async def publish(self, event: Event) -> int: + """Publish an event to all matching subscribers. + + Returns: + Number of handlers invoked + """ + # Store in history + self._history.append(event) + if len(self._history) > self._max_history: + self._history = self._history[-self._max_history:] + + # Find matching handlers + handlers: list[EventHandler] = [] + + for pattern, pattern_handlers in self._subscribers.items(): + if self._match_pattern(event.type, pattern): + handlers.extend(pattern_handlers) + + # Invoke handlers concurrently + if handlers: + await asyncio.gather( + *[self._invoke_handler(h, event) for h in handlers], + return_exceptions=True + ) + + logger.debug("Published event '%s' to %d handlers", event.type, len(handlers)) + return len(handlers) + + async def _invoke_handler(self, handler: EventHandler, event: Event) -> None: + """Invoke a handler with error handling.""" + try: + await handler(event) + except Exception as exc: + logger.error("Event handler failed for '%s': %s", event.type, exc) + + def _match_pattern(self, event_type: str, pattern: str) -> bool: + """Check if event type matches a wildcard pattern.""" + if pattern == "*": + return True + + if pattern.endswith(".*"): + prefix = pattern[:-2] + return event_type.startswith(prefix + ".") + + return event_type == pattern + + def get_history( + self, + event_type: str | None = None, + source: str | None = None, + limit: int = 100, + ) -> list[Event]: + """Get recent event history with optional filtering.""" + events = self._history + + if event_type: + events = [e for e in events if e.type == event_type] + + if source: + events = [e for e in events if e.source == source] + + return events[-limit:] + + def clear_history(self) -> None: + """Clear event history.""" + self._history.clear() + + +# Module-level singleton +event_bus = EventBus() + + +# Convenience functions +async def emit(event_type: str, source: str, data: dict) -> int: + """Quick emit an event.""" + return await event_bus.publish(Event( + type=event_type, + source=source, + data=data, + )) + + +def on(event_pattern: str) -> Callable[[EventHandler], EventHandler]: + """Quick subscribe decorator.""" + return event_bus.subscribe(event_pattern) diff --git a/src/mcp/__init__.py b/src/mcp/__init__.py new file mode 100644 index 0000000..5690035 --- /dev/null +++ b/src/mcp/__init__.py @@ -0,0 +1,17 @@ +"""MCP (Model Context Protocol) package. + +Provides tool registry, server, and schema management. +""" + +from mcp.registry import tool_registry, register_tool +from mcp.server import mcp_server, MCPServer, MCPHTTPServer +from mcp.schemas.base import create_tool_schema + +__all__ = [ + "tool_registry", + "register_tool", + "mcp_server", + "MCPServer", + "MCPHTTPServer", + "create_tool_schema", +] diff --git a/src/mcp/bootstrap.py b/src/mcp/bootstrap.py new file mode 100644 index 0000000..1ca9cd2 --- /dev/null +++ b/src/mcp/bootstrap.py @@ -0,0 +1,71 @@ +"""Bootstrap the MCP system by loading all tools. + +This module is responsible for: +1. Loading all tool modules from src/tools/ +2. Registering them with the tool registry +3. Verifying tool health +4. Reporting status +""" + +import importlib +import logging +from pathlib import Path + +from mcp.registry import tool_registry + +logger = logging.getLogger(__name__) + +# Tool modules to load +TOOL_MODULES = [ + "tools.web_search", + "tools.file_ops", + "tools.code_exec", + "tools.memory_tool", +] + + +def bootstrap_mcp() -> dict: + """Initialize the MCP system by loading all tools. + + Returns: + Status dict with loaded tools and any errors + """ + loaded = [] + errors = [] + + for module_name in TOOL_MODULES: + try: + # Import the module (this triggers @register_tool decorators) + importlib.import_module(module_name) + loaded.append(module_name) + logger.info("Loaded tool module: %s", module_name) + except Exception as exc: + errors.append({"module": module_name, "error": str(exc)}) + logger.error("Failed to load tool module %s: %s", module_name, exc) + + # Get registry status + registry_status = tool_registry.to_dict() + + status = { + "loaded_modules": loaded, + "errors": errors, + "total_tools": len(registry_status.get("tools", [])), + "tools_by_category": registry_status.get("categories", {}), + "tool_names": tool_registry.list_tools(), + } + + logger.info( + "MCP Bootstrap complete: %d tools loaded from %d modules", + status["total_tools"], + len(loaded) + ) + + return status + + +def get_tool_status() -> dict: + """Get current status of all tools.""" + return { + "tools": tool_registry.to_dict(), + "metrics": tool_registry.get_metrics(), + } diff --git a/src/mcp/registry.py b/src/mcp/registry.py new file mode 100644 index 0000000..292f1cd --- /dev/null +++ b/src/mcp/registry.py @@ -0,0 +1,340 @@ +"""MCP Tool Registry — Dynamic tool discovery and management. + +The registry maintains a catalog of all available tools, their schemas, +and health status. Tools can be registered dynamically at runtime. + +Usage: + from mcp.registry import tool_registry + + # Register a tool + tool_registry.register("web_search", web_search_schema, web_search_func) + + # Discover tools + tools = tool_registry.discover(capabilities=["search"]) + + # Execute a tool + result = tool_registry.execute("web_search", {"query": "Bitcoin"}) +""" + +import asyncio +import inspect +import logging +import time +from dataclasses import dataclass, field +from typing import Any, Callable, Optional + +from mcp.schemas.base import create_tool_schema + +logger = logging.getLogger(__name__) + + +@dataclass +class ToolRecord: + """A registered tool with metadata.""" + name: str + schema: dict + handler: Callable + category: str = "general" + health_status: str = "unknown" # healthy, degraded, unhealthy + last_execution: Optional[float] = None + execution_count: int = 0 + error_count: int = 0 + avg_latency_ms: float = 0.0 + added_at: float = field(default_factory=time.time) + requires_confirmation: bool = False + + +class ToolRegistry: + """Central registry for all MCP tools.""" + + def __init__(self) -> None: + self._tools: dict[str, ToolRecord] = {} + self._categories: dict[str, list[str]] = {} + logger.info("ToolRegistry initialized") + + def register( + self, + name: str, + schema: dict, + handler: Callable, + category: str = "general", + requires_confirmation: bool = False, + ) -> ToolRecord: + """Register a new tool. + + Args: + name: Unique tool name + schema: JSON schema describing inputs/outputs + handler: Function to execute + category: Tool category for organization + requires_confirmation: If True, user must approve before execution + + Returns: + The registered ToolRecord + """ + if name in self._tools: + logger.warning("Tool '%s' already registered, replacing", name) + + record = ToolRecord( + name=name, + schema=schema, + handler=handler, + category=category, + requires_confirmation=requires_confirmation, + ) + + self._tools[name] = record + + # Add to category + if category not in self._categories: + self._categories[category] = [] + if name not in self._categories[category]: + self._categories[category].append(name) + + logger.info("Registered tool: %s (category: %s)", name, category) + return record + + def unregister(self, name: str) -> bool: + """Remove a tool from the registry.""" + if name not in self._tools: + return False + + record = self._tools.pop(name) + + # Remove from category + if record.category in self._categories: + if name in self._categories[record.category]: + self._categories[record.category].remove(name) + + logger.info("Unregistered tool: %s", name) + return True + + def get(self, name: str) -> Optional[ToolRecord]: + """Get a tool record by name.""" + return self._tools.get(name) + + def get_handler(self, name: str) -> Optional[Callable]: + """Get just the handler function for a tool.""" + record = self._tools.get(name) + return record.handler if record else None + + def get_schema(self, name: str) -> Optional[dict]: + """Get the JSON schema for a tool.""" + record = self._tools.get(name) + return record.schema if record else None + + def list_tools(self, category: Optional[str] = None) -> list[str]: + """List all tool names, optionally filtered by category.""" + if category: + return self._categories.get(category, []) + return list(self._tools.keys()) + + def list_categories(self) -> list[str]: + """List all tool categories.""" + return list(self._categories.keys()) + + def discover( + self, + query: Optional[str] = None, + category: Optional[str] = None, + healthy_only: bool = True, + ) -> list[ToolRecord]: + """Discover tools matching criteria. + + Args: + query: Search in tool names and descriptions + category: Filter by category + healthy_only: Only return healthy tools + + Returns: + List of matching ToolRecords + """ + results = [] + + for name, record in self._tools.items(): + # Category filter + if category and record.category != category: + continue + + # Health filter + if healthy_only and record.health_status == "unhealthy": + continue + + # Query filter + if query: + query_lower = query.lower() + name_match = query_lower in name.lower() + desc = record.schema.get("description", "") + desc_match = query_lower in desc.lower() + if not (name_match or desc_match): + continue + + results.append(record) + + return results + + async def execute(self, name: str, params: dict) -> Any: + """Execute a tool by name with given parameters. + + Args: + name: Tool name + params: Parameters to pass to the tool + + Returns: + Tool execution result + + Raises: + ValueError: If tool not found + RuntimeError: If tool execution fails + """ + record = self._tools.get(name) + if not record: + raise ValueError(f"Tool '{name}' not found in registry") + + start_time = time.time() + + try: + # Check if handler is async + if inspect.iscoroutinefunction(record.handler): + result = await record.handler(**params) + else: + result = record.handler(**params) + + # Update metrics + latency_ms = (time.time() - start_time) * 1000 + record.last_execution = time.time() + record.execution_count += 1 + + # Update rolling average latency + if record.execution_count == 1: + record.avg_latency_ms = latency_ms + else: + record.avg_latency_ms = ( + record.avg_latency_ms * 0.9 + latency_ms * 0.1 + ) + + # Mark healthy on success + record.health_status = "healthy" + + logger.debug("Tool '%s' executed in %.2fms", name, latency_ms) + return result + + except Exception as exc: + record.error_count += 1 + record.execution_count += 1 + + # Degrade health on repeated errors + error_rate = record.error_count / record.execution_count + if error_rate > 0.5: + record.health_status = "unhealthy" + logger.error("Tool '%s' marked unhealthy (error rate: %.1f%%)", + name, error_rate * 100) + elif error_rate > 0.2: + record.health_status = "degraded" + logger.warning("Tool '%s' degraded (error rate: %.1f%%)", + name, error_rate * 100) + + raise RuntimeError(f"Tool '{name}' execution failed: {exc}") from exc + + def check_health(self, name: str) -> str: + """Check health status of a tool.""" + record = self._tools.get(name) + if not record: + return "not_found" + return record.health_status + + def get_metrics(self, name: Optional[str] = None) -> dict: + """Get metrics for a tool or all tools.""" + if name: + record = self._tools.get(name) + if not record: + return {} + return { + "name": record.name, + "category": record.category, + "health": record.health_status, + "executions": record.execution_count, + "errors": record.error_count, + "avg_latency_ms": round(record.avg_latency_ms, 2), + } + + # Return metrics for all tools + return { + name: self.get_metrics(name) + for name in self._tools.keys() + } + + def to_dict(self) -> dict: + """Export registry as dictionary (for API/dashboard).""" + return { + "tools": [ + { + "name": r.name, + "schema": r.schema, + "category": r.category, + "health": r.health_status, + "requires_confirmation": r.requires_confirmation, + } + for r in self._tools.values() + ], + "categories": self._categories, + "total_tools": len(self._tools), + } + + +# Module-level singleton +tool_registry = ToolRegistry() + + +def register_tool( + name: Optional[str] = None, + category: str = "general", + schema: Optional[dict] = None, + requires_confirmation: bool = False, +): + """Decorator for registering a function as an MCP tool. + + Usage: + @register_tool(name="web_search", category="research") + def web_search(query: str, max_results: int = 5) -> str: + ... + """ + def decorator(func: Callable) -> Callable: + tool_name = name or func.__name__ + + # Auto-generate schema if not provided + if schema is None: + # Try to infer from type hints + sig = inspect.signature(func) + params = {} + required = [] + + for param_name, param in sig.parameters.items(): + if param.default == inspect.Parameter.empty: + required.append(param_name) + params[param_name] = {"type": "string"} + else: + params[param_name] = { + "type": "string", + "default": str(param.default), + } + + tool_schema = create_tool_schema( + name=tool_name, + description=func.__doc__ or f"Execute {tool_name}", + parameters=params, + required=required, + ) + else: + tool_schema = schema + + tool_registry.register( + name=tool_name, + schema=tool_schema, + handler=func, + category=category, + requires_confirmation=requires_confirmation, + ) + + return func + return decorator diff --git a/src/mcp/schemas/base.py b/src/mcp/schemas/base.py new file mode 100644 index 0000000..97a73cb --- /dev/null +++ b/src/mcp/schemas/base.py @@ -0,0 +1,52 @@ +"""Base schemas for MCP (Model Context Protocol) tools. + +All tools must provide a JSON schema describing their interface. +This enables dynamic discovery and type-safe invocation. +""" + +from typing import Any + + +def create_tool_schema( + name: str, + description: str, + parameters: dict[str, Any], + required: list[str] | None = None, + returns: dict[str, Any] | None = None, +) -> dict: + """Create a standard MCP tool schema. + + Args: + name: Tool name (must be unique) + description: Human-readable description + parameters: JSON schema for input parameters + required: List of required parameter names + returns: JSON schema for return value + + Returns: + Complete tool schema dict + """ + return { + "name": name, + "description": description, + "parameters": { + "type": "object", + "properties": parameters, + "required": required or [], + }, + "returns": returns or {"type": "string"}, + } + + +# Common parameter schemas +PARAM_STRING = {"type": "string"} +PARAM_INTEGER = {"type": "integer"} +PARAM_BOOLEAN = {"type": "boolean"} +PARAM_ARRAY_STRINGS = {"type": "array", "items": {"type": "string"}} +PARAM_OBJECT = {"type": "object"} + +# Common return schemas +RETURN_STRING = {"type": "string"} +RETURN_OBJECT = {"type": "object"} +RETURN_ARRAY = {"type": "array"} +RETURN_BOOLEAN = {"type": "boolean"} diff --git a/src/mcp/server.py b/src/mcp/server.py new file mode 100644 index 0000000..7d04684 --- /dev/null +++ b/src/mcp/server.py @@ -0,0 +1,210 @@ +"""MCP (Model Context Protocol) Server. + +Implements the MCP protocol for tool discovery and execution. +Agents communicate with this server to discover and invoke tools. + +The server can run: +1. In-process (direct method calls) — fastest, for local agents +2. HTTP API — for external clients +3. Stdio — for subprocess-based agents +""" + +import asyncio +import json +import logging +from typing import Any, Optional + +from mcp.registry import tool_registry + +logger = logging.getLogger(__name__) + + +class MCPServer: + """Model Context Protocol server for tool management. + + Provides standard MCP endpoints: + - list_tools: Discover available tools + - call_tool: Execute a tool + - get_schema: Get tool input/output schemas + """ + + def __init__(self) -> None: + self.registry = tool_registry + logger.info("MCP Server initialized") + + def list_tools( + self, + category: Optional[str] = None, + query: Optional[str] = None, + ) -> list[dict]: + """List available tools. + + MCP Protocol: tools/list + """ + tools = self.registry.discover( + query=query, + category=category, + healthy_only=True, + ) + + return [ + { + "name": t.name, + "description": t.schema.get("description", ""), + "parameters": t.schema.get("parameters", {}), + "category": t.category, + } + for t in tools + ] + + async def call_tool(self, name: str, arguments: dict) -> dict: + """Execute a tool with given arguments. + + MCP Protocol: tools/call + + Args: + name: Tool name + arguments: Tool parameters + + Returns: + Result dict with content or error + """ + try: + result = await self.registry.execute(name, arguments) + return { + "content": [ + {"type": "text", "text": str(result)} + ], + "isError": False, + } + except Exception as exc: + logger.error("Tool execution failed: %s", exc) + return { + "content": [ + {"type": "text", "text": f"Error: {exc}"} + ], + "isError": True, + } + + def get_schema(self, name: str) -> Optional[dict]: + """Get the JSON schema for a tool. + + MCP Protocol: tools/schema + """ + return self.registry.get_schema(name) + + def get_tool_info(self, name: str) -> Optional[dict]: + """Get detailed info about a tool including health metrics.""" + record = self.registry.get(name) + if not record: + return None + + return { + "name": record.name, + "schema": record.schema, + "category": record.category, + "health": record.health_status, + "metrics": { + "executions": record.execution_count, + "errors": record.error_count, + "avg_latency_ms": round(record.avg_latency_ms, 2), + }, + "requires_confirmation": record.requires_confirmation, + } + + def health_check(self) -> dict: + """Server health status.""" + tools = self.registry.list_tools() + healthy = sum( + 1 for t in tools + if self.registry.check_health(t) == "healthy" + ) + + return { + "status": "healthy", + "total_tools": len(tools), + "healthy_tools": healthy, + "degraded_tools": sum( + 1 for t in tools + if self.registry.check_health(t) == "degraded" + ), + "unhealthy_tools": sum( + 1 for t in tools + if self.registry.check_health(t) == "unhealthy" + ), + } + + +class MCPHTTPServer: + """HTTP API wrapper for MCP Server.""" + + def __init__(self) -> None: + self.mcp = MCPServer() + + def get_routes(self) -> dict: + """Get FastAPI route handlers.""" + from fastapi import APIRouter, HTTPException + from pydantic import BaseModel + + router = APIRouter(prefix="/mcp", tags=["mcp"]) + + class ToolCallRequest(BaseModel): + name: str + arguments: dict = {} + + @router.get("/tools") + async def list_tools( + category: Optional[str] = None, + query: Optional[str] = None, + ): + """List available tools.""" + return {"tools": self.mcp.list_tools(category, query)} + + @router.post("/tools/call") + async def call_tool(request: ToolCallRequest): + """Execute a tool.""" + result = await self.mcp.call_tool(request.name, request.arguments) + return result + + @router.get("/tools/{name}") + async def get_tool(name: str): + """Get tool info.""" + info = self.mcp.get_tool_info(name) + if not info: + raise HTTPException(404, f"Tool '{name}' not found") + return info + + @router.get("/tools/{name}/schema") + async def get_schema(name: str): + """Get tool schema.""" + schema = self.mcp.get_schema(name) + if not schema: + raise HTTPException(404, f"Tool '{name}' not found") + return schema + + @router.get("/health") + async def health(): + """Server health check.""" + return self.mcp.health_check() + + return router + + +# Module-level singleton +mcp_server = MCPServer() + + +# Convenience functions for agents +def discover_tools(query: Optional[str] = None) -> list[dict]: + """Quick tool discovery.""" + return mcp_server.list_tools(query=query) + + +async def use_tool(name: str, **kwargs) -> str: + """Execute a tool and return result text.""" + result = await mcp_server.call_tool(name, kwargs) + + if result.get("isError"): + raise RuntimeError(result["content"][0]["text"]) + + return result["content"][0]["text"] diff --git a/src/tools/code_exec.py b/src/tools/code_exec.py new file mode 100644 index 0000000..a1f4222 --- /dev/null +++ b/src/tools/code_exec.py @@ -0,0 +1,124 @@ +"""Code execution tool. + +MCP-compliant tool for executing Python code. +""" + +import logging +import traceback +from typing import Any + +from mcp.registry import register_tool +from mcp.schemas.base import create_tool_schema, PARAM_STRING, PARAM_BOOLEAN, RETURN_STRING + +logger = logging.getLogger(__name__) + + +PYTHON_SCHEMA = create_tool_schema( + name="python", + description="Execute Python code. Use for calculations, data processing, or when precise computation is needed. Code runs in a restricted environment.", + parameters={ + "code": { + **PARAM_STRING, + "description": "Python code to execute", + }, + "return_output": { + **PARAM_BOOLEAN, + "description": "Return the value of the last expression", + "default": True, + }, + }, + required=["code"], + returns=RETURN_STRING, +) + + +def python(code: str, return_output: bool = True) -> str: + """Execute Python code in restricted environment. + + Args: + code: Python code to execute + return_output: Whether to return last expression value + + Returns: + Execution result or error message + """ + # Safe globals for code execution + safe_globals = { + "__builtins__": { + "abs": abs, + "all": all, + "any": any, + "bin": bin, + "bool": bool, + "dict": dict, + "enumerate": enumerate, + "filter": filter, + "float": float, + "format": format, + "hex": hex, + "int": int, + "isinstance": isinstance, + "issubclass": issubclass, + "len": len, + "list": list, + "map": map, + "max": max, + "min": min, + "next": next, + "oct": oct, + "ord": ord, + "pow": pow, + "print": lambda *args, **kwargs: None, # Disabled + "range": range, + "repr": repr, + "reversed": reversed, + "round": round, + "set": set, + "slice": slice, + "sorted": sorted, + "str": str, + "sum": sum, + "tuple": tuple, + "type": type, + "zip": zip, + } + } + + # Allowed modules + allowed_modules = ["math", "random", "statistics", "datetime", "json"] + + for mod_name in allowed_modules: + try: + safe_globals[mod_name] = __import__(mod_name) + except ImportError: + pass + + try: + # Compile and execute + compiled = compile(code, "", "eval" if return_output else "exec") + + if return_output: + result = eval(compiled, safe_globals, {}) + return f"Result: {result}" + else: + exec(compiled, safe_globals, {}) + return "Code executed successfully." + + except SyntaxError: + # Try as exec if eval fails + try: + compiled = compile(code, "", "exec") + exec(compiled, safe_globals, {}) + return "Code executed successfully." + except Exception as exc: + error_msg = traceback.format_exc() + logger.error("Python execution failed: %s", exc) + return f"Error: {exc}\n\n{error_msg}" + except Exception as exc: + error_msg = traceback.format_exc() + logger.error("Python execution failed: %s", exc) + return f"Error: {exc}\n\n{error_msg}" + + +# Register with MCP +register_tool(name="python", schema=PYTHON_SCHEMA, category="code")(python) diff --git a/src/tools/file_ops.py b/src/tools/file_ops.py new file mode 100644 index 0000000..e084bbe --- /dev/null +++ b/src/tools/file_ops.py @@ -0,0 +1,179 @@ +"""File operations tool. + +MCP-compliant tool for reading, writing, and listing files. +""" + +import logging +from pathlib import Path +from typing import Any + +from mcp.registry import register_tool +from mcp.schemas.base import create_tool_schema, PARAM_STRING, PARAM_BOOLEAN, RETURN_STRING + +logger = logging.getLogger(__name__) + + +# Read File Schema +READ_FILE_SCHEMA = create_tool_schema( + name="read_file", + description="Read contents of a file. Use when user explicitly asks to read a file.", + parameters={ + "path": { + **PARAM_STRING, + "description": "Path to file (relative to project root or absolute)", + }, + "limit": { + "type": "integer", + "description": "Maximum lines to read (0 = all)", + "default": 0, + }, + }, + required=["path"], + returns=RETURN_STRING, +) + +# Write File Schema +WRITE_FILE_SCHEMA = create_tool_schema( + name="write_file", + description="Write content to a file. Use when user explicitly asks to save content.", + parameters={ + "path": { + **PARAM_STRING, + "description": "Path to file", + }, + "content": { + **PARAM_STRING, + "description": "Content to write", + }, + "append": { + **PARAM_BOOLEAN, + "description": "Append to file instead of overwrite", + "default": False, + }, + }, + required=["path", "content"], + returns=RETURN_STRING, +) + +# List Directory Schema +LIST_DIR_SCHEMA = create_tool_schema( + name="list_directory", + description="List files in a directory.", + parameters={ + "path": { + **PARAM_STRING, + "description": "Directory path (default: current)", + "default": ".", + }, + "pattern": { + **PARAM_STRING, + "description": "File pattern filter (e.g., '*.py')", + "default": "*", + }, + }, + returns=RETURN_STRING, +) + + +def _resolve_path(path: str) -> Path: + """Resolve path relative to project root.""" + from config import settings + + p = Path(path) + if p.is_absolute(): + return p + + # Try relative to project root + project_root = Path(__file__).parent.parent.parent + return project_root / p + + +def read_file(path: str, limit: int = 0) -> str: + """Read file contents.""" + try: + filepath = _resolve_path(path) + + if not filepath.exists(): + return f"Error: File not found: {path}" + + if not filepath.is_file(): + return f"Error: Path is not a file: {path}" + + content = filepath.read_text() + + if limit > 0: + lines = content.split('\n')[:limit] + content = '\n'.join(lines) + if len(content.split('\n')) == limit: + content += f"\n\n... [{limit} lines shown]" + + return content + + except Exception as exc: + logger.error("Read file failed: %s", exc) + return f"Error reading file: {exc}" + + +def write_file(path: str, content: str, append: bool = False) -> str: + """Write content to file.""" + try: + filepath = _resolve_path(path) + + # Ensure directory exists + filepath.parent.mkdir(parents=True, exist_ok=True) + + mode = "a" if append else "w" + filepath.write_text(content) + + action = "appended to" if append else "wrote" + return f"Successfully {action} {filepath}" + + except Exception as exc: + logger.error("Write file failed: %s", exc) + return f"Error writing file: {exc}" + + +def list_directory(path: str = ".", pattern: str = "*") -> str: + """List directory contents.""" + try: + dirpath = _resolve_path(path) + + if not dirpath.exists(): + return f"Error: Directory not found: {path}" + + if not dirpath.is_dir(): + return f"Error: Path is not a directory: {path}" + + items = list(dirpath.glob(pattern)) + + files = [] + dirs = [] + + for item in items: + if item.is_dir(): + dirs.append(f"📁 {item.name}/") + else: + size = item.stat().st_size + size_str = f"{size}B" if size < 1024 else f"{size//1024}KB" + files.append(f"📄 {item.name} ({size_str})") + + result = [f"Contents of {dirpath}:", ""] + result.extend(sorted(dirs)) + result.extend(sorted(files)) + + return "\n".join(result) + + except Exception as exc: + logger.error("List directory failed: %s", exc) + return f"Error listing directory: {exc}" + + +# Register with MCP +register_tool(name="read_file", schema=READ_FILE_SCHEMA, category="files")(read_file) +register_tool( + name="write_file", + schema=WRITE_FILE_SCHEMA, + category="files", + requires_confirmation=True, +)(write_file) +register_tool(name="list_directory", schema=LIST_DIR_SCHEMA, category="files")(list_directory) diff --git a/src/tools/memory_tool.py b/src/tools/memory_tool.py new file mode 100644 index 0000000..cb31438 --- /dev/null +++ b/src/tools/memory_tool.py @@ -0,0 +1,70 @@ +"""Memory search tool. + +MCP-compliant tool for searching Timmy's memory. +""" + +import logging +from typing import Any + +from mcp.registry import register_tool +from mcp.schemas.base import create_tool_schema, PARAM_STRING, PARAM_INTEGER, RETURN_STRING + +logger = logging.getLogger(__name__) + + +MEMORY_SEARCH_SCHEMA = create_tool_schema( + name="memory_search", + description="Search Timmy's memory for past conversations, facts, and context. Use when user asks about previous discussions or when you need to recall something from memory.", + parameters={ + "query": { + **PARAM_STRING, + "description": "What to search for in memory", + }, + "top_k": { + **PARAM_INTEGER, + "description": "Number of results to return (1-10)", + "default": 5, + "minimum": 1, + "maximum": 10, + }, + }, + required=["query"], + returns=RETURN_STRING, +) + + +def memory_search(query: str, top_k: int = 5) -> str: + """Search Timmy's memory. + + Args: + query: Search query + top_k: Number of results + + Returns: + Relevant memories from past conversations + """ + try: + from timmy.semantic_memory import memory_search as semantic_search + + results = semantic_search(query, top_k=top_k) + + if not results: + return "No relevant memories found." + + formatted = ["Relevant memories from past conversations:", ""] + + for i, (content, score) in enumerate(results, 1): + relevance = "🔥" if score > 0.8 else "⭐" if score > 0.5 else "📄" + formatted.append(f"{relevance} [{i}] (score: {score:.2f})") + formatted.append(f" {content[:300]}...") + formatted.append("") + + return "\n".join(formatted) + + except Exception as exc: + logger.error("Memory search failed: %s", exc) + return f"Memory search error: {exc}" + + +# Register with MCP +register_tool(name="memory_search", schema=MEMORY_SEARCH_SCHEMA, category="memory")(memory_search) diff --git a/src/tools/web_search.py b/src/tools/web_search.py new file mode 100644 index 0000000..9bcae37 --- /dev/null +++ b/src/tools/web_search.py @@ -0,0 +1,74 @@ +"""Web search tool using DuckDuckGo. + +MCP-compliant tool for searching the web. +""" + +import logging +from typing import Any + +from mcp.registry import register_tool +from mcp.schemas.base import create_tool_schema, PARAM_STRING, PARAM_INTEGER, RETURN_STRING + +logger = logging.getLogger(__name__) + + +WEB_SEARCH_SCHEMA = create_tool_schema( + name="web_search", + description="Search the web using DuckDuckGo. Use for current events, news, real-time data, and information not in your training data.", + parameters={ + "query": { + **PARAM_STRING, + "description": "Search query string", + }, + "max_results": { + **PARAM_INTEGER, + "description": "Maximum number of results (1-10)", + "default": 5, + "minimum": 1, + "maximum": 10, + }, + }, + required=["query"], + returns=RETURN_STRING, +) + + +def web_search(query: str, max_results: int = 5) -> str: + """Search the web using DuckDuckGo. + + Args: + query: Search query + max_results: Maximum results to return + + Returns: + Formatted search results + """ + try: + from duckduckgo_search import DDGS + + with DDGS() as ddgs: + results = list(ddgs.text(query, max_results=max_results)) + + if not results: + return "No results found." + + formatted = [] + for i, r in enumerate(results, 1): + title = r.get("title", "No title") + body = r.get("body", "No description") + href = r.get("href", "") + formatted.append(f"{i}. {title}\n {body[:150]}...\n {href}") + + return "\n\n".join(formatted) + + except Exception as exc: + logger.error("Web search failed: %s", exc) + return f"Search error: {exc}" + + +# Register with MCP +register_tool( + name="web_search", + schema=WEB_SEARCH_SCHEMA, + category="research", +)(web_search) From c658ca829c053c450a23b43b80ff29c956b4ce4a Mon Sep 17 00:00:00 2001 From: Alexander Payne Date: Wed, 25 Feb 2026 19:43:43 -0500 Subject: [PATCH 2/3] Phase 3: Cascade LLM Router with automatic failover - YAML-based provider configuration (config/providers.yaml) - Priority-ordered provider routing - Circuit breaker pattern for failing providers - Health check and availability monitoring - Metrics tracking (latency, errors, success rates) - Support for Ollama, OpenAI, Anthropic, AirLLM providers - Automatic failover on rate limits or errors - REST API endpoints for monitoring and control - 41 comprehensive tests API Endpoints: - POST /api/v1/router/complete - Chat completion with failover - GET /api/v1/router/status - Provider health status - GET /api/v1/router/metrics - Detailed metrics - GET /api/v1/router/providers - List all providers - POST /api/v1/router/providers/{name}/control - Enable/disable/reset - POST /api/v1/router/health-check - Run health checks - GET /api/v1/router/config - View configuration --- IMPLEMENTATION_SUMMARY.md | 326 ++++++++++ config/providers.yaml | 80 +++ .../20260225_223632_add_docstring.md | 31 + .../20260225_223632_break_it.md | 31 + .../20260225_223632_do_something_vague.md | 12 + .../20260225_223632_fix_foo.md | 48 ++ ...fix_foo_important_correction_from_previ.md | 34 ++ .../20260225_224732_fix_foo.md | 48 ++ .../20260225_224733_add_docstring.md | 19 + .../20260225_224733_break_it.md | 31 + .../20260225_224733_do_something_vague.md | 12 + .../20260225_224734_fix_foo.md | 31 + ...fix_foo_important_correction_from_previ.md | 34 ++ .../20260225_225049_add_docstring.md | 19 + .../20260225_225049_break_it.md | 31 + .../20260225_225049_do_something_vague.md | 12 + .../20260225_225049_fix_foo.md | 48 ++ ...fix_foo_important_correction_from_previ.md | 34 ++ .../20260225_230304_fix_foo.md | 31 + .../20260225_230305_add_docstring.md | 31 + .../20260225_230305_break_it.md | 31 + .../20260225_230305_do_something_vague.md | 12 + ...fix_foo_important_correction_from_previ.md | 34 ++ .../20260225_230306_fix_foo.md | 48 ++ .../20260225_230553_add_docstring.md | 19 + .../20260225_230553_break_it.md | 31 + .../20260225_230553_do_something_vague.md | 12 + .../20260225_230554_fix_foo.md | 48 ++ ...fix_foo_important_correction_from_previ.md | 34 ++ .../20260225_231440_add_docstring.md | 31 + .../20260225_231440_break_it.md | 31 + .../20260225_231440_do_something_vague.md | 12 + .../20260225_231440_fix_foo.md | 31 + ...fix_foo_important_correction_from_previ.md | 34 ++ .../20260225_231441_fix_foo.md | 48 ++ .../20260225_231645_add_docstring.md | 19 + .../20260225_231645_break_it.md | 31 + .../20260225_231645_do_something_vague.md | 12 + .../20260225_231645_fix_foo.md | 48 ++ ...fix_foo_important_correction_from_previ.md | 34 ++ .../20260225_232402_add_docstring.md | 19 + .../20260225_232402_break_it.md | 31 + .../20260225_232402_do_something_vague.md | 12 + .../20260225_232402_fix_foo.md | 31 + ...fix_foo_important_correction_from_previ.md | 34 ++ .../20260225_232403_fix_foo.md | 48 ++ .../20260226_002427_add_docstring.md | 31 + .../20260226_002427_break_it.md | 31 + .../20260226_002427_do_something_vague.md | 12 + .../20260226_002427_fix_foo.md | 48 ++ .../20260226_002428_fix_foo.md | 31 + ...fix_foo_important_correction_from_previ.md | 34 ++ .../20260226_004233_add_docstring.md | 31 + .../20260226_004233_break_it.md | 31 + .../20260226_004233_do_something_vague.md | 12 + .../20260226_004234_fix_foo.md | 48 ++ ...fix_foo_important_correction_from_previ.md | 34 ++ src/dashboard/app.py | 2 + src/router/__init__.py | 12 + src/router/api.py | 199 ++++++ src/router/cascade.py | 566 ++++++++++++++++++ tests/test_router_api.py | 358 +++++++++++ tests/test_router_cascade.py | 523 ++++++++++++++++ 63 files changed, 3721 insertions(+) create mode 100644 IMPLEMENTATION_SUMMARY.md create mode 100644 config/providers.yaml create mode 100644 data/self_modify_reports/20260225_223632_add_docstring.md create mode 100644 data/self_modify_reports/20260225_223632_break_it.md create mode 100644 data/self_modify_reports/20260225_223632_do_something_vague.md create mode 100644 data/self_modify_reports/20260225_223632_fix_foo.md create mode 100644 data/self_modify_reports/20260225_223632_fix_foo_important_correction_from_previ.md create mode 100644 data/self_modify_reports/20260225_224732_fix_foo.md create mode 100644 data/self_modify_reports/20260225_224733_add_docstring.md create mode 100644 data/self_modify_reports/20260225_224733_break_it.md create mode 100644 data/self_modify_reports/20260225_224733_do_something_vague.md create mode 100644 data/self_modify_reports/20260225_224734_fix_foo.md create mode 100644 data/self_modify_reports/20260225_224734_fix_foo_important_correction_from_previ.md create mode 100644 data/self_modify_reports/20260225_225049_add_docstring.md create mode 100644 data/self_modify_reports/20260225_225049_break_it.md create mode 100644 data/self_modify_reports/20260225_225049_do_something_vague.md create mode 100644 data/self_modify_reports/20260225_225049_fix_foo.md create mode 100644 data/self_modify_reports/20260225_225049_fix_foo_important_correction_from_previ.md create mode 100644 data/self_modify_reports/20260225_230304_fix_foo.md create mode 100644 data/self_modify_reports/20260225_230305_add_docstring.md create mode 100644 data/self_modify_reports/20260225_230305_break_it.md create mode 100644 data/self_modify_reports/20260225_230305_do_something_vague.md create mode 100644 data/self_modify_reports/20260225_230305_fix_foo_important_correction_from_previ.md create mode 100644 data/self_modify_reports/20260225_230306_fix_foo.md create mode 100644 data/self_modify_reports/20260225_230553_add_docstring.md create mode 100644 data/self_modify_reports/20260225_230553_break_it.md create mode 100644 data/self_modify_reports/20260225_230553_do_something_vague.md create mode 100644 data/self_modify_reports/20260225_230554_fix_foo.md create mode 100644 data/self_modify_reports/20260225_230554_fix_foo_important_correction_from_previ.md create mode 100644 data/self_modify_reports/20260225_231440_add_docstring.md create mode 100644 data/self_modify_reports/20260225_231440_break_it.md create mode 100644 data/self_modify_reports/20260225_231440_do_something_vague.md create mode 100644 data/self_modify_reports/20260225_231440_fix_foo.md create mode 100644 data/self_modify_reports/20260225_231440_fix_foo_important_correction_from_previ.md create mode 100644 data/self_modify_reports/20260225_231441_fix_foo.md create mode 100644 data/self_modify_reports/20260225_231645_add_docstring.md create mode 100644 data/self_modify_reports/20260225_231645_break_it.md create mode 100644 data/self_modify_reports/20260225_231645_do_something_vague.md create mode 100644 data/self_modify_reports/20260225_231645_fix_foo.md create mode 100644 data/self_modify_reports/20260225_231645_fix_foo_important_correction_from_previ.md create mode 100644 data/self_modify_reports/20260225_232402_add_docstring.md create mode 100644 data/self_modify_reports/20260225_232402_break_it.md create mode 100644 data/self_modify_reports/20260225_232402_do_something_vague.md create mode 100644 data/self_modify_reports/20260225_232402_fix_foo.md create mode 100644 data/self_modify_reports/20260225_232402_fix_foo_important_correction_from_previ.md create mode 100644 data/self_modify_reports/20260225_232403_fix_foo.md create mode 100644 data/self_modify_reports/20260226_002427_add_docstring.md create mode 100644 data/self_modify_reports/20260226_002427_break_it.md create mode 100644 data/self_modify_reports/20260226_002427_do_something_vague.md create mode 100644 data/self_modify_reports/20260226_002427_fix_foo.md create mode 100644 data/self_modify_reports/20260226_002428_fix_foo.md create mode 100644 data/self_modify_reports/20260226_002428_fix_foo_important_correction_from_previ.md create mode 100644 data/self_modify_reports/20260226_004233_add_docstring.md create mode 100644 data/self_modify_reports/20260226_004233_break_it.md create mode 100644 data/self_modify_reports/20260226_004233_do_something_vague.md create mode 100644 data/self_modify_reports/20260226_004234_fix_foo.md create mode 100644 data/self_modify_reports/20260226_004234_fix_foo_important_correction_from_previ.md create mode 100644 src/router/__init__.py create mode 100644 src/router/api.py create mode 100644 src/router/cascade.py create mode 100644 tests/test_router_api.py create mode 100644 tests/test_router_cascade.py diff --git a/IMPLEMENTATION_SUMMARY.md b/IMPLEMENTATION_SUMMARY.md new file mode 100644 index 0000000..a18af2b --- /dev/null +++ b/IMPLEMENTATION_SUMMARY.md @@ -0,0 +1,326 @@ +# Timmy Time — Implementation Summary + +**Date:** 2026-02-25 +**Phase:** 1, 2 Complete (MCP, Event Bus, Agents) +**Status:** ✅ Ready for Phase 3 (Cascade Router) + +--- + +## What Was Built + +### 1. MCP (Model Context Protocol) ✅ + +**Location:** `src/mcp/` + +| Component | Purpose | Status | +|-----------|---------|--------| +| Registry | Tool catalog with health tracking | ✅ Complete | +| Server | MCP protocol implementation | ✅ Complete | +| Schemas | JSON schema utilities | ✅ Complete | +| Bootstrap | Auto-load all tools | ✅ Complete | + +**Features:** +- 6 tools registered with full schemas +- Health tracking (healthy/degraded/unhealthy) +- Metrics collection (latency, error rates) +- Pattern-based discovery +- `@register_tool` decorator + +**Tools Implemented:** +```python +web_search # DuckDuckGo search +read_file # File reading +write_file # File writing (with confirmation) +list_directory # Directory listing +python # Python execution +memory_search # Vector memory search +``` + +### 2. Event Bus ✅ + +**Location:** `src/events/bus.py` + +**Features:** +- Async publish/subscribe +- Wildcard pattern matching (`agent.task.*`) +- Event history (last 1000 events) +- Concurrent handler execution +- System-wide singleton + +**Usage:** +```python +from events.bus import event_bus, Event + +@event_bus.subscribe("agent.task.*") +async def handle_task(event): + print(f"Task: {event.data}") + +await event_bus.publish(Event( + type="agent.task.assigned", + source="timmy", + data={"task_id": "123"} +)) +``` + +### 3. Sub-Agents ✅ + +**Location:** `src/agents/` + +| Agent | ID | Role | Key Tools | +|-------|-----|------|-----------| +| Seer | seer | Research | web_search, read_file, memory_search | +| Forge | forge | Code | python, write_file, read_file | +| Quill | quill | Writing | write_file, read_file, memory_search | +| Echo | echo | Memory | memory_search, read_file, write_file | +| Helm | helm | Routing | memory_search | +| Timmy | timmy | Orchestrator | All tools | + +**BaseAgent Features:** +- Agno Agent integration +- MCP tool registry access +- Event bus connectivity +- Structured logging +- Task execution framework + +**Orchestrator Logic:** +```python +timmy = create_timmy_swarm() + +# Automatic routing: +# - Simple questions → Direct response +# - "Remember..." → Echo agent +# - Complex tasks → Helm routes to specialist +``` + +### 4. Memory System (Previously Complete) ✅ + +**Three-Tier Architecture:** + +``` +Tier 1: Hot Memory (MEMORY.md) + ↓ Always loaded + +Tier 2: Vault (memory/) + ├── self/identity.md + ├── self/user_profile.md + ├── self/methodology.md + ├── notes/*.md + └── aar/*.md + +Tier 3: Semantic Search + └── Vector embeddings over vault +``` + +**Handoff Protocol:** +- `last-session-handoff.md` written at session end +- Auto-loaded at next session start + +--- + +## Architecture Diagram + +``` +┌─────────────────────────────────────────────────────────────┐ +│ USER INTERFACE │ +│ (Dashboard/CLI) │ +└──────────────────────────┬──────────────────────────────────┘ + │ +┌──────────────────────────▼──────────────────────────────────┐ +│ TIMMY ORCHESTRATOR │ +│ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Request │ │ Router │ │ Response │ │ +│ │ Analysis │→ │ (Helm) │→ │ Synthesis │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ │ +│ │ +└──────────────────────────┬──────────────────────────────────┘ + │ + ┌──────────────────┼──────────────────┐ + │ │ │ +┌───────▼──────┐ ┌───────▼──────┐ ┌───────▼──────┐ +│ Seer │ │ Forge │ │ Quill │ +│ (Research) │ │ (Code) │ │ (Writing) │ +└──────────────┘ └──────────────┘ └──────────────┘ + │ +┌───────▼──────┐ ┌───────▼──────┐ +│ Echo │ │ Helm │ +│ (Memory) │ │ (Routing) │ +└──────────────┘ └──────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ MCP TOOL REGISTRY │ +│ │ +│ web_search read_file write_file list_directory │ +│ python memory_search │ +│ │ +└──────────────────────────┬──────────────────────────────────┘ + │ +┌──────────────────────────▼──────────────────────────────────┐ +│ EVENT BUS │ +│ (Async pub/sub, wildcard patterns) │ +└──────────────────────────┬──────────────────────────────────┘ + │ +┌──────────────────────────▼──────────────────────────────────┐ +│ MEMORY SYSTEM │ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ Hot │ │ Vault │ │ Semantic │ │ +│ │ MEMORY │ │ Files │ │ Search │ │ +│ └──────────┘ └──────────┘ └──────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## Testing Results + +``` +All 973 tests pass ✅ + +Manual verification: +- MCP Bootstrap: ✅ 6 tools loaded +- Tool Registry: ✅ web_search, file_ops, etc. +- Event Bus: ✅ Events published/subscribed +- Agent Imports: ✅ All agents loadable +``` + +--- + +## Files Created + +``` +src/ +├── mcp/ +│ ├── __init__.py +│ ├── bootstrap.py # Auto-load tools +│ ├── registry.py # Tool catalog +│ ├── server.py # MCP protocol +│ └── schemas/ +│ └── base.py # Schema utilities +│ +├── tools/ +│ ├── web_search.py # DuckDuckGo search +│ ├── file_ops.py # File operations +│ ├── code_exec.py # Python execution +│ └── memory_tool.py # Memory search +│ +├── events/ +│ └── bus.py # Event bus +│ +└── agents/ + ├── __init__.py + ├── base.py # Base agent class + ├── timmy.py # Orchestrator + ├── seer.py # Research + ├── forge.py # Code + ├── quill.py # Writing + ├── echo.py # Memory + └── helm.py # Routing + +MEMORY.md # Hot memory +memory/ # Vault structure +``` + +--- + +## Usage Example + +```python +from agents import create_timmy_swarm + +# Create fully configured Timmy +timmy = create_timmy_swarm() + +# Simple chat (handles directly) +response = await timmy.orchestrate("What is your name?") + +# Research (routes to Seer) +response = await timmy.orchestrate("Search for Bitcoin news") + +# Code (routes to Forge) +response = await timmy.orchestrate("Write a Python script to...") + +# Memory (routes to Echo) +response = await timmy.orchestrate("What did we discuss yesterday?") +``` + +--- + +## Next: Phase 3 (Cascade Router) + +To complete the brief, implement: + +### 1. Cascade LLM Router +```yaml +# config/providers.yaml +providers: + - name: ollama-local + type: ollama + url: http://localhost:11434 + priority: 1 + models: [llama3.2, deepseek-r1] + + - name: openai-backup + type: openai + api_key: ${OPENAI_API_KEY} + priority: 2 + models: [gpt-4o-mini] +``` + +Features: +- Priority-ordered fallback +- Latency/error tracking +- Cost accounting +- Health checks + +### 2. Self-Upgrade Loop +- Detect failures from logs +- Propose fixes via Forge +- Present to user for approval +- Apply changes with rollback + +### 3. Dashboard Integration +- Tool registry browser +- Agent activity feed +- Memory browser +- Upgrade queue + +--- + +## Success Criteria Status + +| Criteria | Status | +|----------|--------| +| Start with `python main.py` | 🟡 Need entry point | +| Dashboard at localhost | ✅ Exists | +| Timmy responds to questions | ✅ Working | +| Routes to sub-agents | ✅ Implemented | +| MCP tool discovery | ✅ Working | +| LLM failover | 🟡 Phase 3 | +| Search memory | ✅ Working | +| Self-upgrade proposals | 🟡 Phase 3 | +| Lightning payments | ✅ Mock exists | + +--- + +## Key Achievements + +1. ✅ **MCP Protocol** — Full implementation with schemas, registry, server +2. ✅ **6 Production Tools** — All with error handling and health tracking +3. ✅ **Event Bus** — Async pub/sub for agent communication +4. ✅ **6 Agents** — Full roster with specialized roles +5. ✅ **Orchestrator** — Intelligent routing logic +6. ✅ **Memory System** — Three-tier architecture +7. ✅ **All Tests Pass** — No regressions + +--- + +## Ready for Phase 3 + +The foundation is solid. Next steps: +1. Cascade Router for LLM failover +2. Self-upgrade loop +3. Enhanced dashboard views +4. Production hardening diff --git a/config/providers.yaml b/config/providers.yaml new file mode 100644 index 0000000..9063048 --- /dev/null +++ b/config/providers.yaml @@ -0,0 +1,80 @@ +# Cascade LLM Router Configuration +# Providers are tried in priority order (1 = highest) +# On failure, automatically falls back to next provider + +cascade: + # Timeout settings + timeout_seconds: 30 + + # Retry settings + max_retries_per_provider: 2 + retry_delay_seconds: 1 + + # Circuit breaker settings + circuit_breaker: + failure_threshold: 5 # Open circuit after 5 failures + recovery_timeout: 60 # Try again after 60 seconds + half_open_max_calls: 2 # Allow 2 test calls when half-open + +providers: + # Primary: Local Ollama (always try first for sovereignty) + - name: ollama-local + type: ollama + enabled: true + priority: 1 + url: "http://localhost:11434" + models: + - name: llama3.2 + default: true + context_window: 128000 + - name: deepseek-r1:1.5b + context_window: 32000 + + # Secondary: Local AirLLM (if installed) + - name: airllm-local + type: airllm + enabled: false # Enable if pip install airllm + priority: 2 + models: + - name: 70b + default: true + - name: 8b + - name: 405b + + # Tertiary: OpenAI (if API key available) + - name: openai-backup + type: openai + enabled: false # Enable by setting OPENAI_API_KEY + priority: 3 + api_key: "${OPENAI_API_KEY}" # Loaded from environment + base_url: null # Use default OpenAI endpoint + models: + - name: gpt-4o-mini + default: true + context_window: 128000 + - name: gpt-4o + context_window: 128000 + + # Quaternary: Anthropic (if API key available) + - name: anthropic-backup + type: anthropic + enabled: false # Enable by setting ANTHROPIC_API_KEY + priority: 4 + api_key: "${ANTHROPIC_API_KEY}" + models: + - name: claude-3-haiku-20240307 + default: true + context_window: 200000 + - name: claude-3-sonnet-20240229 + context_window: 200000 + +# Cost tracking (optional, for budget monitoring) +cost_tracking: + enabled: true + budget_daily_usd: 10.0 # Alert if daily spend exceeds this + alert_threshold_percent: 80 # Alert at 80% of budget + +# Metrics retention +metrics: + retention_hours: 168 # Keep 7 days of metrics + purge_interval_hours: 24 diff --git a/data/self_modify_reports/20260225_223632_add_docstring.md b/data/self_modify_reports/20260225_223632_add_docstring.md new file mode 100644 index 0000000..47cb2d0 --- /dev/null +++ b/data/self_modify_reports/20260225_223632_add_docstring.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_223632 + +**Instruction:** Add docstring +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc12345 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: PASSED +``` +5 passed +``` diff --git a/data/self_modify_reports/20260225_223632_break_it.md b/data/self_modify_reports/20260225_223632_break_it.md new file mode 100644 index 0000000..2205629 --- /dev/null +++ b/data/self_modify_reports/20260225_223632_break_it.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_223632 + +**Instruction:** Break it +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: FAILED +``` +1 failed +``` diff --git a/data/self_modify_reports/20260225_223632_do_something_vague.md b/data/self_modify_reports/20260225_223632_do_something_vague.md new file mode 100644 index 0000000..5bc5bb5 --- /dev/null +++ b/data/self_modify_reports/20260225_223632_do_something_vague.md @@ -0,0 +1,12 @@ +# Self-Modify Report: 20260225_223632 + +**Instruction:** do something vague +**Target files:** (auto-detected) +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** No target files identified. Specify target_files or use more specific language. +**Commit:** none +**Attempts:** 0 +**Autonomous cycles:** 0 diff --git a/data/self_modify_reports/20260225_223632_fix_foo.md b/data/self_modify_reports/20260225_223632_fix_foo.md new file mode 100644 index 0000000..c66003f --- /dev/null +++ b/data/self_modify_reports/20260225_223632_fix_foo.md @@ -0,0 +1,48 @@ +# Self-Modify Report: 20260225_223632 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 2 +**Autonomous cycles:** 0 + +## Attempt 1 -- syntax_validation + +**Error:** src/foo.py: line 1: '(' was never closed + +### LLM Response +``` +bad llm +``` + +### Edits Written +#### src/foo.py +```python +def foo( + +``` + +## Attempt 2 -- complete + +### LLM Response +``` +good llm +``` + +### Edits Written +#### src/foo.py +```python +def foo(): + pass + +``` + +### Test Result: PASSED +``` +passed +``` diff --git a/data/self_modify_reports/20260225_223632_fix_foo_important_correction_from_previ.md b/data/self_modify_reports/20260225_223632_fix_foo_important_correction_from_previ.md new file mode 100644 index 0000000..762f4c8 --- /dev/null +++ b/data/self_modify_reports/20260225_223632_fix_foo_important_correction_from_previ.md @@ -0,0 +1,34 @@ +# Self-Modify Report: 20260225_223632 + +**Instruction:** Fix foo + +IMPORTANT CORRECTION from previous failure: +Fix: do X instead of Y +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: PASSED +``` +PASSED +``` diff --git a/data/self_modify_reports/20260225_224732_fix_foo.md b/data/self_modify_reports/20260225_224732_fix_foo.md new file mode 100644 index 0000000..7e06633 --- /dev/null +++ b/data/self_modify_reports/20260225_224732_fix_foo.md @@ -0,0 +1,48 @@ +# Self-Modify Report: 20260225_224732 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 2 +**Autonomous cycles:** 0 + +## Attempt 1 -- syntax_validation + +**Error:** src/foo.py: line 1: '(' was never closed + +### LLM Response +``` +bad llm +``` + +### Edits Written +#### src/foo.py +```python +def foo( + +``` + +## Attempt 2 -- complete + +### LLM Response +``` +good llm +``` + +### Edits Written +#### src/foo.py +```python +def foo(): + pass + +``` + +### Test Result: PASSED +``` +passed +``` diff --git a/data/self_modify_reports/20260225_224733_add_docstring.md b/data/self_modify_reports/20260225_224733_add_docstring.md new file mode 100644 index 0000000..be7ed43 --- /dev/null +++ b/data/self_modify_reports/20260225_224733_add_docstring.md @@ -0,0 +1,19 @@ +# Self-Modify Report: 20260225_224733 + +**Instruction:** Add docstring +**Target files:** src/foo.py +**Dry run:** True +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- dry_run + +### LLM Response +``` +llm raw +``` diff --git a/data/self_modify_reports/20260225_224733_break_it.md b/data/self_modify_reports/20260225_224733_break_it.md new file mode 100644 index 0000000..1a76f4a --- /dev/null +++ b/data/self_modify_reports/20260225_224733_break_it.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_224733 + +**Instruction:** Break it +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: FAILED +``` +1 failed +``` diff --git a/data/self_modify_reports/20260225_224733_do_something_vague.md b/data/self_modify_reports/20260225_224733_do_something_vague.md new file mode 100644 index 0000000..f898b79 --- /dev/null +++ b/data/self_modify_reports/20260225_224733_do_something_vague.md @@ -0,0 +1,12 @@ +# Self-Modify Report: 20260225_224733 + +**Instruction:** do something vague +**Target files:** (auto-detected) +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** No target files identified. Specify target_files or use more specific language. +**Commit:** none +**Attempts:** 0 +**Autonomous cycles:** 0 diff --git a/data/self_modify_reports/20260225_224734_fix_foo.md b/data/self_modify_reports/20260225_224734_fix_foo.md new file mode 100644 index 0000000..952f991 --- /dev/null +++ b/data/self_modify_reports/20260225_224734_fix_foo.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_224734 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: FAILED +``` +FAILED +``` diff --git a/data/self_modify_reports/20260225_224734_fix_foo_important_correction_from_previ.md b/data/self_modify_reports/20260225_224734_fix_foo_important_correction_from_previ.md new file mode 100644 index 0000000..50fa47e --- /dev/null +++ b/data/self_modify_reports/20260225_224734_fix_foo_important_correction_from_previ.md @@ -0,0 +1,34 @@ +# Self-Modify Report: 20260225_224734 + +**Instruction:** Fix foo + +IMPORTANT CORRECTION from previous failure: +Fix: do X instead of Y +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: PASSED +``` +PASSED +``` diff --git a/data/self_modify_reports/20260225_225049_add_docstring.md b/data/self_modify_reports/20260225_225049_add_docstring.md new file mode 100644 index 0000000..dd3e411 --- /dev/null +++ b/data/self_modify_reports/20260225_225049_add_docstring.md @@ -0,0 +1,19 @@ +# Self-Modify Report: 20260225_225049 + +**Instruction:** Add docstring +**Target files:** src/foo.py +**Dry run:** True +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- dry_run + +### LLM Response +``` +llm raw +``` diff --git a/data/self_modify_reports/20260225_225049_break_it.md b/data/self_modify_reports/20260225_225049_break_it.md new file mode 100644 index 0000000..4bd1bd1 --- /dev/null +++ b/data/self_modify_reports/20260225_225049_break_it.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_225049 + +**Instruction:** Break it +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: FAILED +``` +1 failed +``` diff --git a/data/self_modify_reports/20260225_225049_do_something_vague.md b/data/self_modify_reports/20260225_225049_do_something_vague.md new file mode 100644 index 0000000..fa54a8e --- /dev/null +++ b/data/self_modify_reports/20260225_225049_do_something_vague.md @@ -0,0 +1,12 @@ +# Self-Modify Report: 20260225_225049 + +**Instruction:** do something vague +**Target files:** (auto-detected) +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** No target files identified. Specify target_files or use more specific language. +**Commit:** none +**Attempts:** 0 +**Autonomous cycles:** 0 diff --git a/data/self_modify_reports/20260225_225049_fix_foo.md b/data/self_modify_reports/20260225_225049_fix_foo.md new file mode 100644 index 0000000..fcbf51f --- /dev/null +++ b/data/self_modify_reports/20260225_225049_fix_foo.md @@ -0,0 +1,48 @@ +# Self-Modify Report: 20260225_225049 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 2 +**Autonomous cycles:** 0 + +## Attempt 1 -- syntax_validation + +**Error:** src/foo.py: line 1: '(' was never closed + +### LLM Response +``` +bad llm +``` + +### Edits Written +#### src/foo.py +```python +def foo( + +``` + +## Attempt 2 -- complete + +### LLM Response +``` +good llm +``` + +### Edits Written +#### src/foo.py +```python +def foo(): + pass + +``` + +### Test Result: PASSED +``` +passed +``` diff --git a/data/self_modify_reports/20260225_225049_fix_foo_important_correction_from_previ.md b/data/self_modify_reports/20260225_225049_fix_foo_important_correction_from_previ.md new file mode 100644 index 0000000..6ff7c5a --- /dev/null +++ b/data/self_modify_reports/20260225_225049_fix_foo_important_correction_from_previ.md @@ -0,0 +1,34 @@ +# Self-Modify Report: 20260225_225049 + +**Instruction:** Fix foo + +IMPORTANT CORRECTION from previous failure: +Fix: do X instead of Y +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: PASSED +``` +PASSED +``` diff --git a/data/self_modify_reports/20260225_230304_fix_foo.md b/data/self_modify_reports/20260225_230304_fix_foo.md new file mode 100644 index 0000000..f530dde --- /dev/null +++ b/data/self_modify_reports/20260225_230304_fix_foo.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_230304 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: FAILED +``` +FAILED +``` diff --git a/data/self_modify_reports/20260225_230305_add_docstring.md b/data/self_modify_reports/20260225_230305_add_docstring.md new file mode 100644 index 0000000..b4f08b4 --- /dev/null +++ b/data/self_modify_reports/20260225_230305_add_docstring.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_230305 + +**Instruction:** Add docstring +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc12345 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: PASSED +``` +5 passed +``` diff --git a/data/self_modify_reports/20260225_230305_break_it.md b/data/self_modify_reports/20260225_230305_break_it.md new file mode 100644 index 0000000..c9e4fae --- /dev/null +++ b/data/self_modify_reports/20260225_230305_break_it.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_230305 + +**Instruction:** Break it +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: FAILED +``` +1 failed +``` diff --git a/data/self_modify_reports/20260225_230305_do_something_vague.md b/data/self_modify_reports/20260225_230305_do_something_vague.md new file mode 100644 index 0000000..6b8de02 --- /dev/null +++ b/data/self_modify_reports/20260225_230305_do_something_vague.md @@ -0,0 +1,12 @@ +# Self-Modify Report: 20260225_230305 + +**Instruction:** do something vague +**Target files:** (auto-detected) +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** No target files identified. Specify target_files or use more specific language. +**Commit:** none +**Attempts:** 0 +**Autonomous cycles:** 0 diff --git a/data/self_modify_reports/20260225_230305_fix_foo_important_correction_from_previ.md b/data/self_modify_reports/20260225_230305_fix_foo_important_correction_from_previ.md new file mode 100644 index 0000000..c2b0c6b --- /dev/null +++ b/data/self_modify_reports/20260225_230305_fix_foo_important_correction_from_previ.md @@ -0,0 +1,34 @@ +# Self-Modify Report: 20260225_230305 + +**Instruction:** Fix foo + +IMPORTANT CORRECTION from previous failure: +Fix: do X instead of Y +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: PASSED +``` +PASSED +``` diff --git a/data/self_modify_reports/20260225_230306_fix_foo.md b/data/self_modify_reports/20260225_230306_fix_foo.md new file mode 100644 index 0000000..8c6fb0b --- /dev/null +++ b/data/self_modify_reports/20260225_230306_fix_foo.md @@ -0,0 +1,48 @@ +# Self-Modify Report: 20260225_230306 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 2 +**Autonomous cycles:** 0 + +## Attempt 1 -- syntax_validation + +**Error:** src/foo.py: line 1: '(' was never closed + +### LLM Response +``` +bad llm +``` + +### Edits Written +#### src/foo.py +```python +def foo( + +``` + +## Attempt 2 -- complete + +### LLM Response +``` +good llm +``` + +### Edits Written +#### src/foo.py +```python +def foo(): + pass + +``` + +### Test Result: PASSED +``` +passed +``` diff --git a/data/self_modify_reports/20260225_230553_add_docstring.md b/data/self_modify_reports/20260225_230553_add_docstring.md new file mode 100644 index 0000000..ba405d1 --- /dev/null +++ b/data/self_modify_reports/20260225_230553_add_docstring.md @@ -0,0 +1,19 @@ +# Self-Modify Report: 20260225_230553 + +**Instruction:** Add docstring +**Target files:** src/foo.py +**Dry run:** True +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- dry_run + +### LLM Response +``` +llm raw +``` diff --git a/data/self_modify_reports/20260225_230553_break_it.md b/data/self_modify_reports/20260225_230553_break_it.md new file mode 100644 index 0000000..4dc4bec --- /dev/null +++ b/data/self_modify_reports/20260225_230553_break_it.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_230553 + +**Instruction:** Break it +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: FAILED +``` +1 failed +``` diff --git a/data/self_modify_reports/20260225_230553_do_something_vague.md b/data/self_modify_reports/20260225_230553_do_something_vague.md new file mode 100644 index 0000000..710f188 --- /dev/null +++ b/data/self_modify_reports/20260225_230553_do_something_vague.md @@ -0,0 +1,12 @@ +# Self-Modify Report: 20260225_230553 + +**Instruction:** do something vague +**Target files:** (auto-detected) +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** No target files identified. Specify target_files or use more specific language. +**Commit:** none +**Attempts:** 0 +**Autonomous cycles:** 0 diff --git a/data/self_modify_reports/20260225_230554_fix_foo.md b/data/self_modify_reports/20260225_230554_fix_foo.md new file mode 100644 index 0000000..51cecd3 --- /dev/null +++ b/data/self_modify_reports/20260225_230554_fix_foo.md @@ -0,0 +1,48 @@ +# Self-Modify Report: 20260225_230554 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 2 +**Autonomous cycles:** 0 + +## Attempt 1 -- syntax_validation + +**Error:** src/foo.py: line 1: '(' was never closed + +### LLM Response +``` +bad llm +``` + +### Edits Written +#### src/foo.py +```python +def foo( + +``` + +## Attempt 2 -- complete + +### LLM Response +``` +good llm +``` + +### Edits Written +#### src/foo.py +```python +def foo(): + pass + +``` + +### Test Result: PASSED +``` +passed +``` diff --git a/data/self_modify_reports/20260225_230554_fix_foo_important_correction_from_previ.md b/data/self_modify_reports/20260225_230554_fix_foo_important_correction_from_previ.md new file mode 100644 index 0000000..7f2d325 --- /dev/null +++ b/data/self_modify_reports/20260225_230554_fix_foo_important_correction_from_previ.md @@ -0,0 +1,34 @@ +# Self-Modify Report: 20260225_230554 + +**Instruction:** Fix foo + +IMPORTANT CORRECTION from previous failure: +Fix: do X instead of Y +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: PASSED +``` +PASSED +``` diff --git a/data/self_modify_reports/20260225_231440_add_docstring.md b/data/self_modify_reports/20260225_231440_add_docstring.md new file mode 100644 index 0000000..74516bd --- /dev/null +++ b/data/self_modify_reports/20260225_231440_add_docstring.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_231440 + +**Instruction:** Add docstring +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc12345 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: PASSED +``` +5 passed +``` diff --git a/data/self_modify_reports/20260225_231440_break_it.md b/data/self_modify_reports/20260225_231440_break_it.md new file mode 100644 index 0000000..cea12e2 --- /dev/null +++ b/data/self_modify_reports/20260225_231440_break_it.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_231440 + +**Instruction:** Break it +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: FAILED +``` +1 failed +``` diff --git a/data/self_modify_reports/20260225_231440_do_something_vague.md b/data/self_modify_reports/20260225_231440_do_something_vague.md new file mode 100644 index 0000000..39a4725 --- /dev/null +++ b/data/self_modify_reports/20260225_231440_do_something_vague.md @@ -0,0 +1,12 @@ +# Self-Modify Report: 20260225_231440 + +**Instruction:** do something vague +**Target files:** (auto-detected) +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** No target files identified. Specify target_files or use more specific language. +**Commit:** none +**Attempts:** 0 +**Autonomous cycles:** 0 diff --git a/data/self_modify_reports/20260225_231440_fix_foo.md b/data/self_modify_reports/20260225_231440_fix_foo.md new file mode 100644 index 0000000..97c89fc --- /dev/null +++ b/data/self_modify_reports/20260225_231440_fix_foo.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_231440 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: FAILED +``` +FAILED +``` diff --git a/data/self_modify_reports/20260225_231440_fix_foo_important_correction_from_previ.md b/data/self_modify_reports/20260225_231440_fix_foo_important_correction_from_previ.md new file mode 100644 index 0000000..ab3463f --- /dev/null +++ b/data/self_modify_reports/20260225_231440_fix_foo_important_correction_from_previ.md @@ -0,0 +1,34 @@ +# Self-Modify Report: 20260225_231440 + +**Instruction:** Fix foo + +IMPORTANT CORRECTION from previous failure: +Fix: do X instead of Y +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: PASSED +``` +PASSED +``` diff --git a/data/self_modify_reports/20260225_231441_fix_foo.md b/data/self_modify_reports/20260225_231441_fix_foo.md new file mode 100644 index 0000000..657f1ac --- /dev/null +++ b/data/self_modify_reports/20260225_231441_fix_foo.md @@ -0,0 +1,48 @@ +# Self-Modify Report: 20260225_231441 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 2 +**Autonomous cycles:** 0 + +## Attempt 1 -- syntax_validation + +**Error:** src/foo.py: line 1: '(' was never closed + +### LLM Response +``` +bad llm +``` + +### Edits Written +#### src/foo.py +```python +def foo( + +``` + +## Attempt 2 -- complete + +### LLM Response +``` +good llm +``` + +### Edits Written +#### src/foo.py +```python +def foo(): + pass + +``` + +### Test Result: PASSED +``` +passed +``` diff --git a/data/self_modify_reports/20260225_231645_add_docstring.md b/data/self_modify_reports/20260225_231645_add_docstring.md new file mode 100644 index 0000000..d78368b --- /dev/null +++ b/data/self_modify_reports/20260225_231645_add_docstring.md @@ -0,0 +1,19 @@ +# Self-Modify Report: 20260225_231645 + +**Instruction:** Add docstring +**Target files:** src/foo.py +**Dry run:** True +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- dry_run + +### LLM Response +``` +llm raw +``` diff --git a/data/self_modify_reports/20260225_231645_break_it.md b/data/self_modify_reports/20260225_231645_break_it.md new file mode 100644 index 0000000..a4e3299 --- /dev/null +++ b/data/self_modify_reports/20260225_231645_break_it.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_231645 + +**Instruction:** Break it +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: FAILED +``` +1 failed +``` diff --git a/data/self_modify_reports/20260225_231645_do_something_vague.md b/data/self_modify_reports/20260225_231645_do_something_vague.md new file mode 100644 index 0000000..4c68581 --- /dev/null +++ b/data/self_modify_reports/20260225_231645_do_something_vague.md @@ -0,0 +1,12 @@ +# Self-Modify Report: 20260225_231645 + +**Instruction:** do something vague +**Target files:** (auto-detected) +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** No target files identified. Specify target_files or use more specific language. +**Commit:** none +**Attempts:** 0 +**Autonomous cycles:** 0 diff --git a/data/self_modify_reports/20260225_231645_fix_foo.md b/data/self_modify_reports/20260225_231645_fix_foo.md new file mode 100644 index 0000000..7994a87 --- /dev/null +++ b/data/self_modify_reports/20260225_231645_fix_foo.md @@ -0,0 +1,48 @@ +# Self-Modify Report: 20260225_231645 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 2 +**Autonomous cycles:** 0 + +## Attempt 1 -- syntax_validation + +**Error:** src/foo.py: line 1: '(' was never closed + +### LLM Response +``` +bad llm +``` + +### Edits Written +#### src/foo.py +```python +def foo( + +``` + +## Attempt 2 -- complete + +### LLM Response +``` +good llm +``` + +### Edits Written +#### src/foo.py +```python +def foo(): + pass + +``` + +### Test Result: PASSED +``` +passed +``` diff --git a/data/self_modify_reports/20260225_231645_fix_foo_important_correction_from_previ.md b/data/self_modify_reports/20260225_231645_fix_foo_important_correction_from_previ.md new file mode 100644 index 0000000..3cd0cec --- /dev/null +++ b/data/self_modify_reports/20260225_231645_fix_foo_important_correction_from_previ.md @@ -0,0 +1,34 @@ +# Self-Modify Report: 20260225_231645 + +**Instruction:** Fix foo + +IMPORTANT CORRECTION from previous failure: +Fix: do X instead of Y +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: PASSED +``` +PASSED +``` diff --git a/data/self_modify_reports/20260225_232402_add_docstring.md b/data/self_modify_reports/20260225_232402_add_docstring.md new file mode 100644 index 0000000..3b3f22d --- /dev/null +++ b/data/self_modify_reports/20260225_232402_add_docstring.md @@ -0,0 +1,19 @@ +# Self-Modify Report: 20260225_232402 + +**Instruction:** Add docstring +**Target files:** src/foo.py +**Dry run:** True +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- dry_run + +### LLM Response +``` +llm raw +``` diff --git a/data/self_modify_reports/20260225_232402_break_it.md b/data/self_modify_reports/20260225_232402_break_it.md new file mode 100644 index 0000000..4a7637f --- /dev/null +++ b/data/self_modify_reports/20260225_232402_break_it.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_232402 + +**Instruction:** Break it +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: FAILED +``` +1 failed +``` diff --git a/data/self_modify_reports/20260225_232402_do_something_vague.md b/data/self_modify_reports/20260225_232402_do_something_vague.md new file mode 100644 index 0000000..a79f63c --- /dev/null +++ b/data/self_modify_reports/20260225_232402_do_something_vague.md @@ -0,0 +1,12 @@ +# Self-Modify Report: 20260225_232402 + +**Instruction:** do something vague +**Target files:** (auto-detected) +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** No target files identified. Specify target_files or use more specific language. +**Commit:** none +**Attempts:** 0 +**Autonomous cycles:** 0 diff --git a/data/self_modify_reports/20260225_232402_fix_foo.md b/data/self_modify_reports/20260225_232402_fix_foo.md new file mode 100644 index 0000000..0b9e93f --- /dev/null +++ b/data/self_modify_reports/20260225_232402_fix_foo.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260225_232402 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: FAILED +``` +FAILED +``` diff --git a/data/self_modify_reports/20260225_232402_fix_foo_important_correction_from_previ.md b/data/self_modify_reports/20260225_232402_fix_foo_important_correction_from_previ.md new file mode 100644 index 0000000..ba34fc3 --- /dev/null +++ b/data/self_modify_reports/20260225_232402_fix_foo_important_correction_from_previ.md @@ -0,0 +1,34 @@ +# Self-Modify Report: 20260225_232402 + +**Instruction:** Fix foo + +IMPORTANT CORRECTION from previous failure: +Fix: do X instead of Y +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: PASSED +``` +PASSED +``` diff --git a/data/self_modify_reports/20260225_232403_fix_foo.md b/data/self_modify_reports/20260225_232403_fix_foo.md new file mode 100644 index 0000000..d932f66 --- /dev/null +++ b/data/self_modify_reports/20260225_232403_fix_foo.md @@ -0,0 +1,48 @@ +# Self-Modify Report: 20260225_232403 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 2 +**Autonomous cycles:** 0 + +## Attempt 1 -- syntax_validation + +**Error:** src/foo.py: line 1: '(' was never closed + +### LLM Response +``` +bad llm +``` + +### Edits Written +#### src/foo.py +```python +def foo( + +``` + +## Attempt 2 -- complete + +### LLM Response +``` +good llm +``` + +### Edits Written +#### src/foo.py +```python +def foo(): + pass + +``` + +### Test Result: PASSED +``` +passed +``` diff --git a/data/self_modify_reports/20260226_002427_add_docstring.md b/data/self_modify_reports/20260226_002427_add_docstring.md new file mode 100644 index 0000000..f544358 --- /dev/null +++ b/data/self_modify_reports/20260226_002427_add_docstring.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260226_002427 + +**Instruction:** Add docstring +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc12345 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: PASSED +``` +5 passed +``` diff --git a/data/self_modify_reports/20260226_002427_break_it.md b/data/self_modify_reports/20260226_002427_break_it.md new file mode 100644 index 0000000..05c9773 --- /dev/null +++ b/data/self_modify_reports/20260226_002427_break_it.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260226_002427 + +**Instruction:** Break it +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: FAILED +``` +1 failed +``` diff --git a/data/self_modify_reports/20260226_002427_do_something_vague.md b/data/self_modify_reports/20260226_002427_do_something_vague.md new file mode 100644 index 0000000..f5d554d --- /dev/null +++ b/data/self_modify_reports/20260226_002427_do_something_vague.md @@ -0,0 +1,12 @@ +# Self-Modify Report: 20260226_002427 + +**Instruction:** do something vague +**Target files:** (auto-detected) +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** No target files identified. Specify target_files or use more specific language. +**Commit:** none +**Attempts:** 0 +**Autonomous cycles:** 0 diff --git a/data/self_modify_reports/20260226_002427_fix_foo.md b/data/self_modify_reports/20260226_002427_fix_foo.md new file mode 100644 index 0000000..3a4dbeb --- /dev/null +++ b/data/self_modify_reports/20260226_002427_fix_foo.md @@ -0,0 +1,48 @@ +# Self-Modify Report: 20260226_002427 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 2 +**Autonomous cycles:** 0 + +## Attempt 1 -- syntax_validation + +**Error:** src/foo.py: line 1: '(' was never closed + +### LLM Response +``` +bad llm +``` + +### Edits Written +#### src/foo.py +```python +def foo( + +``` + +## Attempt 2 -- complete + +### LLM Response +``` +good llm +``` + +### Edits Written +#### src/foo.py +```python +def foo(): + pass + +``` + +### Test Result: PASSED +``` +passed +``` diff --git a/data/self_modify_reports/20260226_002428_fix_foo.md b/data/self_modify_reports/20260226_002428_fix_foo.md new file mode 100644 index 0000000..de322ed --- /dev/null +++ b/data/self_modify_reports/20260226_002428_fix_foo.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260226_002428 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: FAILED +``` +FAILED +``` diff --git a/data/self_modify_reports/20260226_002428_fix_foo_important_correction_from_previ.md b/data/self_modify_reports/20260226_002428_fix_foo_important_correction_from_previ.md new file mode 100644 index 0000000..fdca93b --- /dev/null +++ b/data/self_modify_reports/20260226_002428_fix_foo_important_correction_from_previ.md @@ -0,0 +1,34 @@ +# Self-Modify Report: 20260226_002428 + +**Instruction:** Fix foo + +IMPORTANT CORRECTION from previous failure: +Fix: do X instead of Y +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: PASSED +``` +PASSED +``` diff --git a/data/self_modify_reports/20260226_004233_add_docstring.md b/data/self_modify_reports/20260226_004233_add_docstring.md new file mode 100644 index 0000000..094bcf4 --- /dev/null +++ b/data/self_modify_reports/20260226_004233_add_docstring.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260226_004233 + +**Instruction:** Add docstring +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc12345 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: PASSED +``` +5 passed +``` diff --git a/data/self_modify_reports/20260226_004233_break_it.md b/data/self_modify_reports/20260226_004233_break_it.md new file mode 100644 index 0000000..610c836 --- /dev/null +++ b/data/self_modify_reports/20260226_004233_break_it.md @@ -0,0 +1,31 @@ +# Self-Modify Report: 20260226_004233 + +**Instruction:** Break it +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** Tests failed after 1 attempt(s). +**Commit:** none +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 1 + +``` + +### Test Result: FAILED +``` +1 failed +``` diff --git a/data/self_modify_reports/20260226_004233_do_something_vague.md b/data/self_modify_reports/20260226_004233_do_something_vague.md new file mode 100644 index 0000000..6f7a4d6 --- /dev/null +++ b/data/self_modify_reports/20260226_004233_do_something_vague.md @@ -0,0 +1,12 @@ +# Self-Modify Report: 20260226_004233 + +**Instruction:** do something vague +**Target files:** (auto-detected) +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** FAILED +**Error:** No target files identified. Specify target_files or use more specific language. +**Commit:** none +**Attempts:** 0 +**Autonomous cycles:** 0 diff --git a/data/self_modify_reports/20260226_004234_fix_foo.md b/data/self_modify_reports/20260226_004234_fix_foo.md new file mode 100644 index 0000000..9fb29cf --- /dev/null +++ b/data/self_modify_reports/20260226_004234_fix_foo.md @@ -0,0 +1,48 @@ +# Self-Modify Report: 20260226_004234 + +**Instruction:** Fix foo +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 2 +**Autonomous cycles:** 0 + +## Attempt 1 -- syntax_validation + +**Error:** src/foo.py: line 1: '(' was never closed + +### LLM Response +``` +bad llm +``` + +### Edits Written +#### src/foo.py +```python +def foo( + +``` + +## Attempt 2 -- complete + +### LLM Response +``` +good llm +``` + +### Edits Written +#### src/foo.py +```python +def foo(): + pass + +``` + +### Test Result: PASSED +``` +passed +``` diff --git a/data/self_modify_reports/20260226_004234_fix_foo_important_correction_from_previ.md b/data/self_modify_reports/20260226_004234_fix_foo_important_correction_from_previ.md new file mode 100644 index 0000000..5c94c00 --- /dev/null +++ b/data/self_modify_reports/20260226_004234_fix_foo_important_correction_from_previ.md @@ -0,0 +1,34 @@ +# Self-Modify Report: 20260226_004234 + +**Instruction:** Fix foo + +IMPORTANT CORRECTION from previous failure: +Fix: do X instead of Y +**Target files:** src/foo.py +**Dry run:** False +**Backend:** ollama +**Branch:** N/A +**Result:** SUCCESS +**Error:** none +**Commit:** abc123 +**Attempts:** 1 +**Autonomous cycles:** 0 + +## Attempt 1 -- complete + +### LLM Response +``` +llm raw +``` + +### Edits Written +#### src/foo.py +```python +x = 2 + +``` + +### Test Result: PASSED +``` +PASSED +``` diff --git a/src/dashboard/app.py b/src/dashboard/app.py index 3b2788a..2e98ae5 100644 --- a/src/dashboard/app.py +++ b/src/dashboard/app.py @@ -27,6 +27,7 @@ from dashboard.routes.spark import router as spark_router from dashboard.routes.creative import router as creative_router from dashboard.routes.discord import router as discord_router from dashboard.routes.self_modify import router as self_modify_router +from router.api import router as cascade_router logging.basicConfig( level=logging.INFO, @@ -156,6 +157,7 @@ app.include_router(spark_router) app.include_router(creative_router) app.include_router(discord_router) app.include_router(self_modify_router) +app.include_router(cascade_router) @app.get("/", response_class=HTMLResponse) diff --git a/src/router/__init__.py b/src/router/__init__.py new file mode 100644 index 0000000..730f623 --- /dev/null +++ b/src/router/__init__.py @@ -0,0 +1,12 @@ +"""Cascade LLM Router — Automatic failover between providers.""" + +from .cascade import CascadeRouter, Provider, ProviderStatus, get_router +from .api import router + +__all__ = [ + "CascadeRouter", + "Provider", + "ProviderStatus", + "get_router", + "router", +] diff --git a/src/router/api.py b/src/router/api.py new file mode 100644 index 0000000..7558c4e --- /dev/null +++ b/src/router/api.py @@ -0,0 +1,199 @@ +"""API endpoints for Cascade Router monitoring and control.""" + +import asyncio +import logging +from typing import Annotated, Any + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel + +from .cascade import CascadeRouter, get_router + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/api/v1/router", tags=["router"]) + + +class CompletionRequest(BaseModel): + """Request body for completions.""" + messages: list[dict[str, str]] + model: str | None = None + temperature: float = 0.7 + max_tokens: int | None = None + + +class CompletionResponse(BaseModel): + """Response from completion endpoint.""" + content: str + provider: str + model: str + latency_ms: float + + +class ProviderControl(BaseModel): + """Control a provider's status.""" + action: str # "enable", "disable", "reset_circuit" + + +async def get_cascade_router() -> CascadeRouter: + """Dependency to get the cascade router.""" + return get_router() + + +@router.post("/complete", response_model=CompletionResponse) +async def complete( + request: CompletionRequest, + cascade: Annotated[CascadeRouter, Depends(get_cascade_router)], +) -> dict[str, Any]: + """Complete a conversation with automatic failover. + + Routes through providers in priority order until one succeeds. + """ + try: + result = await cascade.complete( + messages=request.messages, + model=request.model, + temperature=request.temperature, + max_tokens=request.max_tokens, + ) + return result + except RuntimeError as exc: + raise HTTPException(status_code=503, detail=str(exc)) + + +@router.get("/status") +async def get_status( + cascade: Annotated[CascadeRouter, Depends(get_cascade_router)], +) -> dict[str, Any]: + """Get router status and provider health.""" + return cascade.get_status() + + +@router.get("/metrics") +async def get_metrics( + cascade: Annotated[CascadeRouter, Depends(get_cascade_router)], +) -> dict[str, Any]: + """Get detailed metrics for all providers.""" + return cascade.get_metrics() + + +@router.get("/providers") +async def list_providers( + cascade: Annotated[CascadeRouter, Depends(get_cascade_router)], +) -> list[dict[str, Any]]: + """List all configured providers.""" + return [ + { + "name": p.name, + "type": p.type, + "enabled": p.enabled, + "priority": p.priority, + "status": p.status.value, + "circuit_state": p.circuit_state.value, + "default_model": p.get_default_model(), + "models": [m["name"] for m in p.models], + } + for p in cascade.providers + ] + + +@router.post("/providers/{provider_name}/control") +async def control_provider( + provider_name: str, + control: ProviderControl, + cascade: Annotated[CascadeRouter, Depends(get_cascade_router)], +) -> dict[str, str]: + """Control a provider (enable/disable/reset).""" + provider = None + for p in cascade.providers: + if p.name == provider_name: + provider = p + break + + if not provider: + raise HTTPException(status_code=404, detail=f"Provider {provider_name} not found") + + if control.action == "enable": + provider.enabled = True + provider.status = provider.status.__class__.HEALTHY + return {"message": f"Provider {provider_name} enabled"} + + elif control.action == "disable": + provider.enabled = False + from .cascade import ProviderStatus + provider.status = ProviderStatus.DISABLED + return {"message": f"Provider {provider_name} disabled"} + + elif control.action == "reset_circuit": + from .cascade import CircuitState, ProviderStatus + provider.circuit_state = CircuitState.CLOSED + provider.circuit_opened_at = None + provider.half_open_calls = 0 + provider.metrics.consecutive_failures = 0 + provider.status = ProviderStatus.HEALTHY + return {"message": f"Circuit breaker reset for {provider_name}"} + + else: + raise HTTPException(status_code=400, detail=f"Unknown action: {control.action}") + + +@router.post("/health-check") +async def run_health_check( + cascade: Annotated[CascadeRouter, Depends(get_cascade_router)], +) -> dict[str, Any]: + """Run health checks on all providers.""" + results = [] + + for provider in cascade.providers: + # Quick ping to check availability + is_healthy = cascade._check_provider_available(provider) + + from .cascade import ProviderStatus + if is_healthy: + if provider.status == ProviderStatus.UNHEALTHY: + # Reset circuit if it was open but now healthy + provider.circuit_state = provider.circuit_state.__class__.CLOSED + provider.circuit_opened_at = None + provider.status = ProviderStatus.HEALTHY if provider.metrics.error_rate < 0.1 else ProviderStatus.DEGRADED + else: + provider.status = ProviderStatus.UNHEALTHY + + results.append({ + "name": provider.name, + "type": provider.type, + "healthy": is_healthy, + "status": provider.status.value, + }) + + return { + "checked_at": asyncio.get_event_loop().time(), + "providers": results, + "healthy_count": sum(1 for r in results if r["healthy"]), + } + + +@router.get("/config") +async def get_config( + cascade: Annotated[CascadeRouter, Depends(get_cascade_router)], +) -> dict[str, Any]: + """Get router configuration (without secrets).""" + cfg = cascade.config + + return { + "timeout_seconds": cfg.timeout_seconds, + "max_retries_per_provider": cfg.max_retries_per_provider, + "retry_delay_seconds": cfg.retry_delay_seconds, + "circuit_breaker": { + "failure_threshold": cfg.circuit_breaker_failure_threshold, + "recovery_timeout": cfg.circuit_breaker_recovery_timeout, + "half_open_max_calls": cfg.circuit_breaker_half_open_max_calls, + }, + "providers": [ + { + "name": p.name, + "type": p.type, + "priority": p.priority, + "enabled": p.enabled, + } + for p in cascade.providers + ], + } diff --git a/src/router/cascade.py b/src/router/cascade.py new file mode 100644 index 0000000..bb1de69 --- /dev/null +++ b/src/router/cascade.py @@ -0,0 +1,566 @@ +"""Cascade LLM Router — Automatic failover between providers. + +Routes requests through an ordered list of LLM providers, +automatically failing over on rate limits or errors. +Tracks metrics for latency, errors, and cost. +""" + +import asyncio +import logging +import time +from dataclasses import dataclass, field +from datetime import datetime, timedelta, timezone +from enum import Enum +from typing import Any, Optional + +from pathlib import Path + +try: + import yaml +except ImportError: + yaml = None # type: ignore + +try: + import requests +except ImportError: + requests = None # type: ignore + +logger = logging.getLogger(__name__) + + +class ProviderStatus(Enum): + """Health status of a provider.""" + HEALTHY = "healthy" + DEGRADED = "degraded" # Working but slow or occasional errors + UNHEALTHY = "unhealthy" # Circuit breaker open + DISABLED = "disabled" + + +class CircuitState(Enum): + """Circuit breaker state.""" + CLOSED = "closed" # Normal operation + OPEN = "open" # Failing, rejecting requests + HALF_OPEN = "half_open" # Testing if recovered + + +@dataclass +class ProviderMetrics: + """Metrics for a single provider.""" + total_requests: int = 0 + successful_requests: int = 0 + failed_requests: int = 0 + total_latency_ms: float = 0.0 + last_request_time: Optional[str] = None + last_error_time: Optional[str] = None + consecutive_failures: int = 0 + + @property + def avg_latency_ms(self) -> float: + if self.total_requests == 0: + return 0.0 + return self.total_latency_ms / self.total_requests + + @property + def error_rate(self) -> float: + if self.total_requests == 0: + return 0.0 + return self.failed_requests / self.total_requests + + +@dataclass +class Provider: + """LLM provider configuration and state.""" + name: str + type: str # ollama, openai, anthropic, airllm + enabled: bool + priority: int + url: Optional[str] = None + api_key: Optional[str] = None + base_url: Optional[str] = None + models: list[dict] = field(default_factory=list) + + # Runtime state + status: ProviderStatus = ProviderStatus.HEALTHY + metrics: ProviderMetrics = field(default_factory=ProviderMetrics) + circuit_state: CircuitState = CircuitState.CLOSED + circuit_opened_at: Optional[float] = None + half_open_calls: int = 0 + + def get_default_model(self) -> Optional[str]: + """Get the default model for this provider.""" + for model in self.models: + if model.get("default"): + return model["name"] + if self.models: + return self.models[0]["name"] + return None + + +@dataclass +class RouterConfig: + """Cascade router configuration.""" + timeout_seconds: int = 30 + max_retries_per_provider: int = 2 + retry_delay_seconds: int = 1 + circuit_breaker_failure_threshold: int = 5 + circuit_breaker_recovery_timeout: int = 60 + circuit_breaker_half_open_max_calls: int = 2 + cost_tracking_enabled: bool = True + budget_daily_usd: float = 10.0 + + +class CascadeRouter: + """Routes LLM requests with automatic failover. + + Usage: + router = CascadeRouter() + + response = await router.complete( + messages=[{"role": "user", "content": "Hello"}], + model="llama3.2" + ) + + # Check metrics + metrics = router.get_metrics() + """ + + def __init__(self, config_path: Optional[Path] = None) -> None: + self.config_path = config_path or Path("config/providers.yaml") + self.providers: list[Provider] = [] + self.config: RouterConfig = RouterConfig() + self._load_config() + + logger.info("CascadeRouter initialized with %d providers", len(self.providers)) + + def _load_config(self) -> None: + """Load configuration from YAML.""" + if not self.config_path.exists(): + logger.warning("Config not found: %s, using defaults", self.config_path) + return + + try: + if yaml is None: + raise RuntimeError("PyYAML not installed") + + content = self.config_path.read_text() + # Expand environment variables + content = self._expand_env_vars(content) + data = yaml.safe_load(content) + + # Load cascade settings + cascade = data.get("cascade", {}) + self.config = RouterConfig( + timeout_seconds=cascade.get("timeout_seconds", 30), + max_retries_per_provider=cascade.get("max_retries_per_provider", 2), + retry_delay_seconds=cascade.get("retry_delay_seconds", 1), + circuit_breaker_failure_threshold=cascade.get("circuit_breaker", {}).get("failure_threshold", 5), + circuit_breaker_recovery_timeout=cascade.get("circuit_breaker", {}).get("recovery_timeout", 60), + circuit_breaker_half_open_max_calls=cascade.get("circuit_breaker", {}).get("half_open_max_calls", 2), + ) + + # Load providers + for p_data in data.get("providers", []): + # Skip disabled providers + if not p_data.get("enabled", False): + continue + + provider = Provider( + name=p_data["name"], + type=p_data["type"], + enabled=p_data.get("enabled", True), + priority=p_data.get("priority", 99), + url=p_data.get("url"), + api_key=p_data.get("api_key"), + base_url=p_data.get("base_url"), + models=p_data.get("models", []), + ) + + # Check if provider is actually available + if self._check_provider_available(provider): + self.providers.append(provider) + else: + logger.warning("Provider %s not available, skipping", provider.name) + + # Sort by priority + self.providers.sort(key=lambda p: p.priority) + + except Exception as exc: + logger.error("Failed to load config: %s", exc) + + def _expand_env_vars(self, content: str) -> str: + """Expand ${VAR} syntax in YAML content.""" + import os + import re + + def replace_var(match): + var_name = match.group(1) + return os.environ.get(var_name, match.group(0)) + + return re.sub(r"\$\{(\w+)\}", replace_var, content) + + def _check_provider_available(self, provider: Provider) -> bool: + """Check if a provider is actually available.""" + if provider.type == "ollama": + # Check if Ollama is running + if requests is None: + # Can't check without requests, assume available + return True + try: + url = provider.url or "http://localhost:11434" + response = requests.get(f"{url}/api/tags", timeout=5) + return response.status_code == 200 + except Exception: + return False + + elif provider.type == "airllm": + # Check if airllm is installed + try: + import airllm + return True + except ImportError: + return False + + elif provider.type in ("openai", "anthropic"): + # Check if API key is set + return provider.api_key is not None and provider.api_key != "" + + return True + + async def complete( + self, + messages: list[dict], + model: Optional[str] = None, + temperature: float = 0.7, + max_tokens: Optional[int] = None, + ) -> dict: + """Complete a chat conversation with automatic failover. + + Args: + messages: List of message dicts with role and content + model: Preferred model (tries this first, then provider defaults) + temperature: Sampling temperature + max_tokens: Maximum tokens to generate + + Returns: + Dict with content, provider_used, and metrics + + Raises: + RuntimeError: If all providers fail + """ + errors = [] + + for provider in self.providers: + # Skip unhealthy providers (circuit breaker) + if provider.status == ProviderStatus.UNHEALTHY: + # Check if circuit breaker can close + if self._can_close_circuit(provider): + provider.circuit_state = CircuitState.HALF_OPEN + provider.half_open_calls = 0 + logger.info("Circuit breaker half-open for %s", provider.name) + else: + logger.debug("Skipping %s (circuit open)", provider.name) + continue + + # Try this provider + for attempt in range(self.config.max_retries_per_provider): + try: + result = await self._try_provider( + provider=provider, + messages=messages, + model=model, + temperature=temperature, + max_tokens=max_tokens, + ) + + # Success! Update metrics and return + self._record_success(provider, result.get("latency_ms", 0)) + return { + "content": result["content"], + "provider": provider.name, + "model": result.get("model", model or provider.get_default_model()), + "latency_ms": result.get("latency_ms", 0), + } + + except Exception as exc: + error_msg = str(exc) + logger.warning( + "Provider %s attempt %d failed: %s", + provider.name, attempt + 1, error_msg + ) + errors.append(f"{provider.name}: {error_msg}") + + if attempt < self.config.max_retries_per_provider - 1: + await asyncio.sleep(self.config.retry_delay_seconds) + + # All retries failed for this provider + self._record_failure(provider) + + # All providers failed + raise RuntimeError(f"All providers failed: {'; '.join(errors)}") + + async def _try_provider( + self, + provider: Provider, + messages: list[dict], + model: Optional[str], + temperature: float, + max_tokens: Optional[int], + ) -> dict: + """Try a single provider request.""" + start_time = time.time() + + if provider.type == "ollama": + result = await self._call_ollama( + provider=provider, + messages=messages, + model=model or provider.get_default_model(), + temperature=temperature, + ) + elif provider.type == "openai": + result = await self._call_openai( + provider=provider, + messages=messages, + model=model or provider.get_default_model(), + temperature=temperature, + max_tokens=max_tokens, + ) + elif provider.type == "anthropic": + result = await self._call_anthropic( + provider=provider, + messages=messages, + model=model or provider.get_default_model(), + temperature=temperature, + max_tokens=max_tokens, + ) + else: + raise ValueError(f"Unknown provider type: {provider.type}") + + latency_ms = (time.time() - start_time) * 1000 + result["latency_ms"] = latency_ms + + return result + + async def _call_ollama( + self, + provider: Provider, + messages: list[dict], + model: str, + temperature: float, + ) -> dict: + """Call Ollama API.""" + import aiohttp + + url = f"{provider.url}/api/chat" + + payload = { + "model": model, + "messages": messages, + "stream": False, + "options": { + "temperature": temperature, + }, + } + + timeout = aiohttp.ClientTimeout(total=self.config.timeout_seconds) + + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.post(url, json=payload) as response: + if response.status != 200: + text = await response.text() + raise RuntimeError(f"Ollama error {response.status}: {text}") + + data = await response.json() + return { + "content": data["message"]["content"], + "model": model, + } + + async def _call_openai( + self, + provider: Provider, + messages: list[dict], + model: str, + temperature: float, + max_tokens: Optional[int], + ) -> dict: + """Call OpenAI API.""" + import openai + + client = openai.AsyncOpenAI( + api_key=provider.api_key, + base_url=provider.base_url, + timeout=self.config.timeout_seconds, + ) + + kwargs = { + "model": model, + "messages": messages, + "temperature": temperature, + } + if max_tokens: + kwargs["max_tokens"] = max_tokens + + response = await client.chat.completions.create(**kwargs) + + return { + "content": response.choices[0].message.content, + "model": response.model, + } + + async def _call_anthropic( + self, + provider: Provider, + messages: list[dict], + model: str, + temperature: float, + max_tokens: Optional[int], + ) -> dict: + """Call Anthropic API.""" + import anthropic + + client = anthropic.AsyncAnthropic( + api_key=provider.api_key, + timeout=self.config.timeout_seconds, + ) + + # Convert messages to Anthropic format + system_msg = None + conversation = [] + for msg in messages: + if msg["role"] == "system": + system_msg = msg["content"] + else: + conversation.append({ + "role": msg["role"], + "content": msg["content"], + }) + + kwargs = { + "model": model, + "messages": conversation, + "temperature": temperature, + "max_tokens": max_tokens or 1024, + } + if system_msg: + kwargs["system"] = system_msg + + response = await client.messages.create(**kwargs) + + return { + "content": response.content[0].text, + "model": response.model, + } + + def _record_success(self, provider: Provider, latency_ms: float) -> None: + """Record a successful request.""" + provider.metrics.total_requests += 1 + provider.metrics.successful_requests += 1 + provider.metrics.total_latency_ms += latency_ms + provider.metrics.last_request_time = datetime.now(timezone.utc).isoformat() + provider.metrics.consecutive_failures = 0 + + # Close circuit breaker if half-open + if provider.circuit_state == CircuitState.HALF_OPEN: + provider.half_open_calls += 1 + if provider.half_open_calls >= self.config.circuit_breaker_half_open_max_calls: + self._close_circuit(provider) + + # Update status based on error rate + if provider.metrics.error_rate < 0.1: + provider.status = ProviderStatus.HEALTHY + elif provider.metrics.error_rate < 0.3: + provider.status = ProviderStatus.DEGRADED + + def _record_failure(self, provider: Provider) -> None: + """Record a failed request.""" + provider.metrics.total_requests += 1 + provider.metrics.failed_requests += 1 + provider.metrics.last_error_time = datetime.now(timezone.utc).isoformat() + provider.metrics.consecutive_failures += 1 + + # Check if we should open circuit breaker + if provider.metrics.consecutive_failures >= self.config.circuit_breaker_failure_threshold: + self._open_circuit(provider) + + # Update status + if provider.metrics.error_rate > 0.3: + provider.status = ProviderStatus.DEGRADED + if provider.metrics.error_rate > 0.5: + provider.status = ProviderStatus.UNHEALTHY + + def _open_circuit(self, provider: Provider) -> None: + """Open the circuit breaker for a provider.""" + provider.circuit_state = CircuitState.OPEN + provider.circuit_opened_at = time.time() + provider.status = ProviderStatus.UNHEALTHY + logger.warning("Circuit breaker OPEN for %s", provider.name) + + def _can_close_circuit(self, provider: Provider) -> bool: + """Check if circuit breaker can transition to half-open.""" + if provider.circuit_opened_at is None: + return False + elapsed = time.time() - provider.circuit_opened_at + return elapsed >= self.config.circuit_breaker_recovery_timeout + + def _close_circuit(self, provider: Provider) -> None: + """Close the circuit breaker (provider healthy again).""" + provider.circuit_state = CircuitState.CLOSED + provider.circuit_opened_at = None + provider.half_open_calls = 0 + provider.metrics.consecutive_failures = 0 + provider.status = ProviderStatus.HEALTHY + logger.info("Circuit breaker CLOSED for %s", provider.name) + + def get_metrics(self) -> dict: + """Get metrics for all providers.""" + return { + "providers": [ + { + "name": p.name, + "type": p.type, + "status": p.status.value, + "circuit_state": p.circuit_state.value, + "metrics": { + "total_requests": p.metrics.total_requests, + "successful": p.metrics.successful_requests, + "failed": p.metrics.failed_requests, + "error_rate": round(p.metrics.error_rate, 3), + "avg_latency_ms": round(p.metrics.avg_latency_ms, 2), + }, + } + for p in self.providers + ] + } + + def get_status(self) -> dict: + """Get current router status.""" + healthy = sum(1 for p in self.providers if p.status == ProviderStatus.HEALTHY) + + return { + "total_providers": len(self.providers), + "healthy_providers": healthy, + "degraded_providers": sum(1 for p in self.providers if p.status == ProviderStatus.DEGRADED), + "unhealthy_providers": sum(1 for p in self.providers if p.status == ProviderStatus.UNHEALTHY), + "providers": [ + { + "name": p.name, + "type": p.type, + "status": p.status.value, + "priority": p.priority, + "default_model": p.get_default_model(), + } + for p in self.providers + ], + } + + +# Module-level singleton +cascade_router: Optional[CascadeRouter] = None + + +def get_router() -> CascadeRouter: + """Get or create the cascade router singleton.""" + global cascade_router + if cascade_router is None: + cascade_router = CascadeRouter() + return cascade_router diff --git a/tests/test_router_api.py b/tests/test_router_api.py new file mode 100644 index 0000000..1ac5945 --- /dev/null +++ b/tests/test_router_api.py @@ -0,0 +1,358 @@ +"""Tests for Cascade Router API endpoints.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + +from router.cascade import CircuitState, Provider, ProviderStatus +from router.api import router, get_cascade_router + + +def make_mock_router(): + """Create a mock CascadeRouter.""" + router = MagicMock() + + # Create test providers + provider1 = Provider( + name="ollama-local", + type="ollama", + enabled=True, + priority=1, + url="http://localhost:11434", + models=[{"name": "llama3.2", "default": True, "context_window": 128000}], + ) + provider1.status = ProviderStatus.HEALTHY + provider1.circuit_state = CircuitState.CLOSED + + provider2 = Provider( + name="openai-backup", + type="openai", + enabled=True, + priority=2, + api_key="sk-test", + models=[{"name": "gpt-4o-mini", "default": True, "context_window": 128000}], + ) + provider2.status = ProviderStatus.DEGRADED + provider2.circuit_state = CircuitState.CLOSED + + router.providers = [provider1, provider2] + router.config.timeout_seconds = 30 + router.config.max_retries_per_provider = 2 + router.config.circuit_breaker_failure_threshold = 5 + + return router + + +@pytest.fixture +def mock_router(): + """Create test client with mocked router.""" + from fastapi import FastAPI + + app = FastAPI() + app.include_router(router) + + # Create mock router + mock = make_mock_router() + + # Override dependency + async def mock_get_router(): + return mock + + app.dependency_overrides[get_cascade_router] = mock_get_router + + client = TestClient(app) + return client, mock + + +class TestCompleteEndpoint: + """Test /complete endpoint.""" + + def test_complete_success(self, mock_router): + """Test successful completion.""" + client, mock = mock_router + mock.complete = AsyncMock(return_value={ + "content": "Hello! How can I help?", + "provider": "ollama-local", + "model": "llama3.2", + "latency_ms": 250.5, + }) + + response = client.post("/api/v1/router/complete", json={ + "messages": [{"role": "user", "content": "Hi"}], + "model": "llama3.2", + "temperature": 0.7, + }) + + assert response.status_code == 200 + data = response.json() + assert data["content"] == "Hello! How can I help?" + assert data["provider"] == "ollama-local" + assert data["latency_ms"] == 250.5 + + def test_complete_all_providers_fail(self, mock_router): + """Test 503 when all providers fail.""" + client, mock = mock_router + mock.complete = AsyncMock(side_effect=RuntimeError("All providers failed")) + + response = client.post("/api/v1/router/complete", json={ + "messages": [{"role": "user", "content": "Hi"}], + }) + + assert response.status_code == 503 + assert "All providers failed" in response.json()["detail"] + + def test_complete_default_temperature(self, mock_router): + """Test completion with default temperature.""" + client, mock = mock_router + mock.complete = AsyncMock(return_value={ + "content": "Response", + "provider": "ollama-local", + "model": "llama3.2", + "latency_ms": 100.0, + }) + + response = client.post("/api/v1/router/complete", json={ + "messages": [{"role": "user", "content": "Hi"}], + }) + + assert response.status_code == 200 + # Check that complete was called with correct temperature + call_args = mock.complete.call_args + assert call_args.kwargs["temperature"] == 0.7 + + +class TestStatusEndpoint: + """Test /status endpoint.""" + + def test_get_status(self, mock_router): + """Test getting router status.""" + client, mock = mock_router + mock.get_status = MagicMock(return_value={ + "total_providers": 2, + "healthy_providers": 1, + "degraded_providers": 1, + "unhealthy_providers": 0, + "providers": [ + { + "name": "ollama-local", + "type": "ollama", + "status": "healthy", + "priority": 1, + "default_model": "llama3.2", + }, + { + "name": "openai-backup", + "type": "openai", + "status": "degraded", + "priority": 2, + "default_model": "gpt-4o-mini", + }, + ], + }) + + response = client.get("/api/v1/router/status") + + assert response.status_code == 200 + data = response.json() + assert data["total_providers"] == 2 + assert data["healthy_providers"] == 1 + assert data["degraded_providers"] == 1 + assert len(data["providers"]) == 2 + + +class TestMetricsEndpoint: + """Test /metrics endpoint.""" + + def test_get_metrics(self, mock_router): + """Test getting detailed metrics.""" + client, mock = mock_router + # Setup the mock return value on the mock_router object + mock.get_metrics = MagicMock(return_value={ + "providers": [ + { + "name": "ollama-local", + "type": "ollama", + "status": "healthy", + "circuit_state": "closed", + "metrics": { + "total_requests": 100, + "successful": 98, + "failed": 2, + "error_rate": 0.02, + "avg_latency_ms": 150.5, + }, + }, + ], + }) + + response = client.get("/api/v1/router/metrics") + + assert response.status_code == 200 + data = response.json() + assert len(data["providers"]) == 1 + metrics = data["providers"][0]["metrics"] + assert metrics["total_requests"] == 100 + assert metrics["error_rate"] == 0.02 + assert metrics["avg_latency_ms"] == 150.5 + + +class TestListProvidersEndpoint: + """Test /providers endpoint.""" + + def test_list_providers(self, mock_router): + """Test listing all providers.""" + client, mock = mock_router + + response = client.get("/api/v1/router/providers") + + assert response.status_code == 200 + data = response.json() + assert len(data) == 2 + + # Check first provider + assert data[0]["name"] == "ollama-local" + assert data[0]["type"] == "ollama" + assert data[0]["enabled"] is True + assert data[0]["priority"] == 1 + assert data[0]["default_model"] == "llama3.2" + assert "llama3.2" in data[0]["models"] + + +class TestControlProviderEndpoint: + """Test /providers/{name}/control endpoint.""" + + def test_disable_provider(self, mock_router): + """Test disabling a provider.""" + client, mock = mock_router + + response = client.post( + "/api/v1/router/providers/ollama-local/control", + json={"action": "disable"} + ) + + assert response.status_code == 200 + assert "disabled" in response.json()["message"] + + # Check that the provider was disabled + provider = mock.providers[0] + assert provider.enabled is False + assert provider.status == ProviderStatus.DISABLED + + def test_enable_provider(self, mock_router): + """Test enabling a provider.""" + client, mock = mock_router + # First disable it + mock.providers[0].enabled = False + mock.providers[0].status = ProviderStatus.DISABLED + + response = client.post( + "/api/v1/router/providers/ollama-local/control", + json={"action": "enable"} + ) + + assert response.status_code == 200 + assert "enabled" in response.json()["message"] + assert mock.providers[0].enabled is True + + def test_reset_circuit(self, mock_router): + """Test resetting circuit breaker.""" + client, mock = mock_router + # Set to open state + mock.providers[0].circuit_state = CircuitState.OPEN + mock.providers[0].status = ProviderStatus.UNHEALTHY + mock.providers[0].metrics.consecutive_failures = 10 + + response = client.post( + "/api/v1/router/providers/ollama-local/control", + json={"action": "reset_circuit"} + ) + + assert response.status_code == 200 + assert "reset" in response.json()["message"] + + provider = mock.providers[0] + assert provider.circuit_state == CircuitState.CLOSED + assert provider.status == ProviderStatus.HEALTHY + assert provider.metrics.consecutive_failures == 0 + + def test_control_unknown_provider(self, mock_router): + """Test controlling unknown provider returns 404.""" + client, mock = mock_router + response = client.post( + "/api/v1/router/providers/unknown/control", + json={"action": "disable"} + ) + + assert response.status_code == 404 + assert "not found" in response.json()["detail"] + + def test_control_unknown_action(self, mock_router): + """Test unknown action returns 400.""" + client, mock = mock_router + response = client.post( + "/api/v1/router/providers/ollama-local/control", + json={"action": "invalid_action"} + ) + + assert response.status_code == 400 + assert "Unknown action" in response.json()["detail"] + + +class TestHealthCheckEndpoint: + """Test /health-check endpoint.""" + + def test_health_check_all_healthy(self, mock_router): + """Test health check when all providers are healthy.""" + client, mock = mock_router + + with patch.object(mock, "_check_provider_available") as mock_check: + mock_check.return_value = True + + response = client.post("/api/v1/router/health-check") + + assert response.status_code == 200 + data = response.json() + assert data["healthy_count"] == 2 + assert len(data["providers"]) == 2 + + for p in data["providers"]: + assert p["healthy"] is True + + def test_health_check_with_failure(self, mock_router): + """Test health check when some providers fail.""" + client, mock = mock_router + + with patch.object(mock, "_check_provider_available") as mock_check: + # First provider fails, second succeeds + mock_check.side_effect = [False, True] + + response = client.post("/api/v1/router/health-check") + + assert response.status_code == 200 + data = response.json() + assert data["healthy_count"] == 1 + assert data["providers"][0]["healthy"] is False + assert data["providers"][1]["healthy"] is True + + +class TestGetConfigEndpoint: + """Test /config endpoint.""" + + def test_get_config(self, mock_router): + """Test getting router configuration.""" + client, mock = mock_router + + response = client.get("/api/v1/router/config") + + assert response.status_code == 200 + data = response.json() + + assert data["timeout_seconds"] == 30 + assert data["max_retries_per_provider"] == 2 + assert "circuit_breaker" in data + assert data["circuit_breaker"]["failure_threshold"] == 5 + + # Check providers list (without secrets) + assert len(data["providers"]) == 2 + assert "api_key" not in data["providers"][0] diff --git a/tests/test_router_cascade.py b/tests/test_router_cascade.py new file mode 100644 index 0000000..a1a6a2f --- /dev/null +++ b/tests/test_router_cascade.py @@ -0,0 +1,523 @@ +"""Tests for Cascade LLM Router.""" + +import json +import time +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import yaml + +from router.cascade import ( + CascadeRouter, + CircuitState, + Provider, + ProviderMetrics, + ProviderStatus, + RouterConfig, +) + + +class TestProviderMetrics: + """Test provider metrics tracking.""" + + def test_empty_metrics(self): + """Test metrics with no requests.""" + metrics = ProviderMetrics() + assert metrics.total_requests == 0 + assert metrics.avg_latency_ms == 0.0 + assert metrics.error_rate == 0.0 + + def test_avg_latency_calculation(self): + """Test average latency calculation.""" + metrics = ProviderMetrics( + total_requests=4, + total_latency_ms=1000.0, # 4 requests, 1000ms total + ) + assert metrics.avg_latency_ms == 250.0 + + def test_error_rate_calculation(self): + """Test error rate calculation.""" + metrics = ProviderMetrics( + total_requests=10, + successful_requests=7, + failed_requests=3, + ) + assert metrics.error_rate == 0.3 + + +class TestProvider: + """Test Provider dataclass.""" + + def test_get_default_model(self): + """Test getting default model.""" + provider = Provider( + name="test", + type="ollama", + enabled=True, + priority=1, + models=[ + {"name": "llama3", "default": True}, + {"name": "mistral"}, + ], + ) + assert provider.get_default_model() == "llama3" + + def test_get_default_model_no_default(self): + """Test getting first model when no default set.""" + provider = Provider( + name="test", + type="ollama", + enabled=True, + priority=1, + models=[ + {"name": "llama3"}, + {"name": "mistral"}, + ], + ) + assert provider.get_default_model() == "llama3" + + def test_get_default_model_empty(self): + """Test with no models.""" + provider = Provider( + name="test", + type="ollama", + enabled=True, + priority=1, + models=[], + ) + assert provider.get_default_model() is None + + +class TestRouterConfig: + """Test router configuration.""" + + def test_default_config(self): + """Test default configuration values.""" + config = RouterConfig() + assert config.timeout_seconds == 30 + assert config.max_retries_per_provider == 2 + assert config.retry_delay_seconds == 1 + assert config.circuit_breaker_failure_threshold == 5 + + +class TestCascadeRouterInit: + """Test CascadeRouter initialization.""" + + def test_init_without_config(self, tmp_path): + """Test initialization without config file.""" + router = CascadeRouter(config_path=tmp_path / "nonexistent.yaml") + assert len(router.providers) == 0 + assert router.config.timeout_seconds == 30 + + def test_init_with_config(self, tmp_path): + """Test initialization with config file.""" + config = { + "cascade": { + "timeout_seconds": 60, + "max_retries_per_provider": 3, + }, + "providers": [ + { + "name": "test-ollama", + "type": "ollama", + "enabled": False, # Disabled to avoid availability check + "priority": 1, + "url": "http://localhost:11434", + } + ], + } + config_path = tmp_path / "providers.yaml" + config_path.write_text(yaml.dump(config)) + + router = CascadeRouter(config_path=config_path) + assert router.config.timeout_seconds == 60 + assert router.config.max_retries_per_provider == 3 + assert len(router.providers) == 0 # Provider is disabled + + def test_env_var_expansion(self, tmp_path, monkeypatch): + """Test environment variable expansion in config.""" + monkeypatch.setenv("TEST_API_KEY", "secret123") + + config = { + "cascade": {}, + "providers": [ + { + "name": "test-openai", + "type": "openai", + "enabled": True, + "priority": 1, + "api_key": "${TEST_API_KEY}", + } + ], + } + config_path = tmp_path / "providers.yaml" + config_path.write_text(yaml.dump(config)) + + router = CascadeRouter(config_path=config_path) + assert len(router.providers) == 1 + assert router.providers[0].api_key == "secret123" + + +class TestCascadeRouterMetrics: + """Test metrics tracking.""" + + def test_record_success(self): + """Test recording successful request.""" + provider = Provider(name="test", type="ollama", enabled=True, priority=1) + + router = CascadeRouter(config_path=Path("/nonexistent")) + router._record_success(provider, 150.0) + + assert provider.metrics.total_requests == 1 + assert provider.metrics.successful_requests == 1 + assert provider.metrics.total_latency_ms == 150.0 + assert provider.metrics.consecutive_failures == 0 + + def test_record_failure(self): + """Test recording failed request.""" + provider = Provider(name="test", type="ollama", enabled=True, priority=1) + + router = CascadeRouter(config_path=Path("/nonexistent")) + router._record_failure(provider) + + assert provider.metrics.total_requests == 1 + assert provider.metrics.failed_requests == 1 + assert provider.metrics.consecutive_failures == 1 + + def test_circuit_breaker_opens(self): + """Test circuit breaker opens after failures.""" + provider = Provider(name="test", type="ollama", enabled=True, priority=1) + + router = CascadeRouter(config_path=Path("/nonexistent")) + router.config.circuit_breaker_failure_threshold = 3 + + # Record 3 failures + for _ in range(3): + router._record_failure(provider) + + assert provider.circuit_state == CircuitState.OPEN + assert provider.status == ProviderStatus.UNHEALTHY + assert provider.circuit_opened_at is not None + + def test_circuit_breaker_can_close(self): + """Test circuit breaker can transition to closed.""" + provider = Provider(name="test", type="ollama", enabled=True, priority=1) + + router = CascadeRouter(config_path=Path("/nonexistent")) + router.config.circuit_breaker_failure_threshold = 3 + router.config.circuit_breaker_recovery_timeout = 1 + + # Open the circuit + for _ in range(3): + router._record_failure(provider) + + assert provider.circuit_state == CircuitState.OPEN + + # Wait for recovery timeout + time.sleep(1.1) + + # Check if can close + assert router._can_close_circuit(provider) is True + + def test_half_open_to_closed(self): + """Test circuit breaker closes after successful test calls.""" + provider = Provider(name="test", type="ollama", enabled=True, priority=1) + + router = CascadeRouter(config_path=Path("/nonexistent")) + router.config.circuit_breaker_half_open_max_calls = 2 + + # Manually set to half-open + provider.circuit_state = CircuitState.HALF_OPEN + provider.half_open_calls = 0 + + # Record successful calls + router._record_success(provider, 100.0) + assert provider.circuit_state == CircuitState.HALF_OPEN # Still half-open + + router._record_success(provider, 100.0) + assert provider.circuit_state == CircuitState.CLOSED # Now closed + assert provider.status == ProviderStatus.HEALTHY + + +class TestCascadeRouterGetMetrics: + """Test get_metrics method.""" + + def test_get_metrics_empty(self): + """Test getting metrics with no providers.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + metrics = router.get_metrics() + + assert "providers" in metrics + assert len(metrics["providers"]) == 0 + + def test_get_metrics_with_providers(self): + """Test getting metrics with providers.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + # Add a test provider + provider = Provider( + name="test", + type="ollama", + enabled=True, + priority=1, + ) + provider.metrics.total_requests = 10 + provider.metrics.successful_requests = 8 + provider.metrics.failed_requests = 2 + provider.metrics.total_latency_ms = 2000.0 + + router.providers = [provider] + + metrics = router.get_metrics() + + assert len(metrics["providers"]) == 1 + p_metrics = metrics["providers"][0] + assert p_metrics["name"] == "test" + assert p_metrics["metrics"]["total_requests"] == 10 + assert p_metrics["metrics"]["error_rate"] == 0.2 + assert p_metrics["metrics"]["avg_latency_ms"] == 200.0 + + +class TestCascadeRouterGetStatus: + """Test get_status method.""" + + def test_get_status(self): + """Test getting router status.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="test", + type="ollama", + enabled=True, + priority=1, + models=[{"name": "llama3", "default": True}], + ) + router.providers = [provider] + + status = router.get_status() + + assert status["total_providers"] == 1 + assert status["healthy_providers"] == 1 + assert status["degraded_providers"] == 0 + assert status["unhealthy_providers"] == 0 + assert len(status["providers"]) == 1 + + +@pytest.mark.asyncio +class TestCascadeRouterComplete: + """Test complete method with failover.""" + + async def test_complete_with_ollama(self): + """Test successful completion with Ollama.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="ollama-local", + type="ollama", + enabled=True, + priority=1, + url="http://localhost:11434", + models=[{"name": "llama3.2", "default": True}], + ) + router.providers = [provider] + + # Mock the Ollama call + with patch.object(router, "_call_ollama") as mock_call: + mock_call.return_value = AsyncMock()() + mock_call.return_value = { + "content": "Hello, world!", + "model": "llama3.2", + } + + result = await router.complete( + messages=[{"role": "user", "content": "Hi"}], + ) + + assert result["content"] == "Hello, world!" + assert result["provider"] == "ollama-local" + assert result["model"] == "llama3.2" + + async def test_failover_to_second_provider(self): + """Test failover when first provider fails.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider1 = Provider( + name="ollama-failing", + type="ollama", + enabled=True, + priority=1, + url="http://localhost:11434", + models=[{"name": "llama3.2", "default": True}], + ) + provider2 = Provider( + name="ollama-backup", + type="ollama", + enabled=True, + priority=2, + url="http://backup:11434", + models=[{"name": "llama3.2", "default": True}], + ) + router.providers = [provider1, provider2] + + # First provider fails, second succeeds + call_count = [0] + + async def side_effect(*args, **kwargs): + call_count[0] += 1 + # First 2 retries for provider1 fail, then provider2 succeeds + if call_count[0] <= router.config.max_retries_per_provider: + raise RuntimeError("Connection failed") + return {"content": "Backup response", "model": "llama3.2"} + + with patch.object(router, "_call_ollama") as mock_call: + mock_call.side_effect = side_effect + + result = await router.complete( + messages=[{"role": "user", "content": "Hi"}], + ) + + assert result["content"] == "Backup response" + assert result["provider"] == "ollama-backup" + + async def test_all_providers_fail(self): + """Test error when all providers fail.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="failing", + type="ollama", + enabled=True, + priority=1, + models=[{"name": "llama3.2", "default": True}], + ) + router.providers = [provider] + + with patch.object(router, "_call_ollama") as mock_call: + mock_call.side_effect = RuntimeError("Always fails") + + with pytest.raises(RuntimeError) as exc_info: + await router.complete(messages=[{"role": "user", "content": "Hi"}]) + + assert "All providers failed" in str(exc_info.value) + + async def test_skips_unhealthy_provider(self): + """Test that unhealthy providers are skipped.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider1 = Provider( + name="unhealthy", + type="ollama", + enabled=True, + priority=1, + status=ProviderStatus.UNHEALTHY, + circuit_state=CircuitState.OPEN, + circuit_opened_at=time.time(), # Just opened + models=[{"name": "llama3.2", "default": True}], + ) + provider2 = Provider( + name="healthy", + type="ollama", + enabled=True, + priority=2, + models=[{"name": "llama3.2", "default": True}], + ) + router.providers = [provider1, provider2] + + with patch.object(router, "_call_ollama") as mock_call: + mock_call.return_value = {"content": "Success", "model": "llama3.2"} + + result = await router.complete( + messages=[{"role": "user", "content": "Hi"}], + ) + + # Should use the healthy provider + assert result["provider"] == "healthy" + + +class TestProviderAvailabilityCheck: + """Test provider availability checking.""" + + def test_check_ollama_without_requests(self): + """Test Ollama returns True when requests not available (fallback).""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="ollama", + type="ollama", + enabled=True, + priority=1, + url="http://localhost:11434", + ) + + # When requests is None, assume available + import router.cascade as cascade_module + old_requests = cascade_module.requests + cascade_module.requests = None + try: + assert router._check_provider_available(provider) is True + finally: + cascade_module.requests = old_requests + + def test_check_openai_with_key(self): + """Test OpenAI with API key.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="openai", + type="openai", + enabled=True, + priority=1, + api_key="sk-test123", + ) + + assert router._check_provider_available(provider) is True + + def test_check_openai_without_key(self): + """Test OpenAI without API key.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="openai", + type="openai", + enabled=True, + priority=1, + api_key=None, + ) + + assert router._check_provider_available(provider) is False + + def test_check_airllm_installed(self): + """Test AirLLM when installed.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="airllm", + type="airllm", + enabled=True, + priority=1, + ) + + with patch("builtins.__import__") as mock_import: + mock_import.return_value = MagicMock() + assert router._check_provider_available(provider) is True + + def test_check_airllm_not_installed(self): + """Test AirLLM when not installed.""" + router = CascadeRouter(config_path=Path("/nonexistent")) + + provider = Provider( + name="airllm", + type="airllm", + enabled=True, + priority=1, + ) + + # Patch __import__ to simulate airllm not being available + def raise_import_error(name, *args, **kwargs): + if name == "airllm": + raise ImportError("No module named 'airllm'") + return __builtins__.__import__(name, *args, **kwargs) + + with patch("builtins.__import__", side_effect=raise_import_error): + assert router._check_provider_available(provider) is False From 56437751d34ac669149a4b302cc8c167bffe736d Mon Sep 17 00:00:00 2001 From: Alexander Payne Date: Wed, 25 Feb 2026 19:59:42 -0500 Subject: [PATCH 3/3] Phase 4: Tool Registry Auto-Discovery - @mcp_tool decorator for marking functions as tools - ToolDiscovery class for introspecting modules and packages - Automatic JSON schema generation from type hints - AST-based discovery for files (without importing) - Auto-bootstrap on startup (packages=['tools'] by default) - Support for tags, categories, and metadata - Updated registry with register_tool() convenience method - Environment variable MCP_AUTO_BOOTSTRAP to disable - 39 tests with proper isolation and cleanup Files Added: - src/mcp/discovery.py: Tool discovery and introspection - src/mcp/bootstrap.py: Auto-bootstrap functionality - tests/test_mcp_discovery.py: 26 tests - tests/test_mcp_bootstrap.py: 13 tests Files Modified: - src/mcp/registry.py: Added tags, source_module, auto_discovered fields - src/mcp/__init__.py: Export discovery and bootstrap modules - src/dashboard/app.py: Auto-bootstrap on startup --- src/dashboard/app.py | 9 + src/mcp/__init__.py | 17 +- src/mcp/bootstrap.py | 175 ++++++++++---- src/mcp/discovery.py | 441 ++++++++++++++++++++++++++++++++++++ src/mcp/registry.py | 106 ++++++++- tests/test_mcp_bootstrap.py | 265 ++++++++++++++++++++++ tests/test_mcp_discovery.py | 329 +++++++++++++++++++++++++++ 7 files changed, 1290 insertions(+), 52 deletions(-) create mode 100644 src/mcp/discovery.py create mode 100644 tests/test_mcp_bootstrap.py create mode 100644 tests/test_mcp_discovery.py diff --git a/src/dashboard/app.py b/src/dashboard/app.py index 2e98ae5..6394bc9 100644 --- a/src/dashboard/app.py +++ b/src/dashboard/app.py @@ -102,6 +102,15 @@ async def lifespan(app: FastAPI): except Exception as exc: logger.error("Failed to spawn persona agents: %s", exc) + # Auto-bootstrap MCP tools + from mcp.bootstrap import auto_bootstrap, get_bootstrap_status + try: + registered = auto_bootstrap() + if registered: + logger.info("MCP auto-bootstrap: %d tools registered", len(registered)) + except Exception as exc: + logger.warning("MCP auto-bootstrap failed: %s", exc) + # Initialise Spark Intelligence engine from spark.engine import spark_engine if spark_engine.enabled: diff --git a/src/mcp/__init__.py b/src/mcp/__init__.py index 5690035..38d9eb5 100644 --- a/src/mcp/__init__.py +++ b/src/mcp/__init__.py @@ -1,17 +1,30 @@ """MCP (Model Context Protocol) package. -Provides tool registry, server, and schema management. +Provides tool registry, server, schema management, and auto-discovery. """ -from mcp.registry import tool_registry, register_tool +from mcp.registry import tool_registry, register_tool, ToolRegistry from mcp.server import mcp_server, MCPServer, MCPHTTPServer from mcp.schemas.base import create_tool_schema +from mcp.discovery import ToolDiscovery, mcp_tool, get_discovery +from mcp.bootstrap import auto_bootstrap, get_bootstrap_status __all__ = [ + # Registry "tool_registry", "register_tool", + "ToolRegistry", + # Server "mcp_server", "MCPServer", "MCPHTTPServer", + # Schemas "create_tool_schema", + # Discovery + "ToolDiscovery", + "mcp_tool", + "get_discovery", + # Bootstrap + "auto_bootstrap", + "get_bootstrap_status", ] diff --git a/src/mcp/bootstrap.py b/src/mcp/bootstrap.py index 1ca9cd2..7e21b32 100644 --- a/src/mcp/bootstrap.py +++ b/src/mcp/bootstrap.py @@ -1,71 +1,148 @@ -"""Bootstrap the MCP system by loading all tools. +"""MCP Auto-Bootstrap — Auto-discover and register tools on startup. -This module is responsible for: -1. Loading all tool modules from src/tools/ -2. Registering them with the tool registry -3. Verifying tool health -4. Reporting status +Usage: + from mcp.bootstrap import auto_bootstrap + + # Auto-discover from 'tools' package + registered = auto_bootstrap() + + # Or specify custom packages + registered = auto_bootstrap(packages=["tools", "custom_tools"]) """ -import importlib import logging +import os from pathlib import Path +from typing import Optional -from mcp.registry import tool_registry +from .discovery import ToolDiscovery, get_discovery +from .registry import ToolRegistry, tool_registry logger = logging.getLogger(__name__) -# Tool modules to load -TOOL_MODULES = [ - "tools.web_search", - "tools.file_ops", - "tools.code_exec", - "tools.memory_tool", -] +# Default packages to scan for tools +DEFAULT_TOOL_PACKAGES = ["tools"] + +# Environment variable to disable auto-bootstrap +AUTO_BOOTSTRAP_ENV_VAR = "MCP_AUTO_BOOTSTRAP" -def bootstrap_mcp() -> dict: - """Initialize the MCP system by loading all tools. +def auto_bootstrap( + packages: Optional[list[str]] = None, + registry: Optional[ToolRegistry] = None, + force: bool = False, +) -> list[str]: + """Auto-discover and register MCP tools. + + Args: + packages: Packages to scan (defaults to ["tools"]) + registry: Registry to register tools with (defaults to singleton) + force: Force bootstrap even if disabled by env var Returns: - Status dict with loaded tools and any errors + List of registered tool names """ - loaded = [] - errors = [] + # Check if auto-bootstrap is disabled + if not force and os.environ.get(AUTO_BOOTSTRAP_ENV_VAR, "1") == "0": + logger.info("MCP auto-bootstrap disabled via %s", AUTO_BOOTSTRAP_ENV_VAR) + return [] - for module_name in TOOL_MODULES: + packages = packages or DEFAULT_TOOL_PACKAGES + registry = registry or tool_registry + discovery = get_discovery(registry=registry) + + registered: list[str] = [] + + logger.info("Starting MCP auto-bootstrap from packages: %s", packages) + + for package in packages: try: - # Import the module (this triggers @register_tool decorators) - importlib.import_module(module_name) - loaded.append(module_name) - logger.info("Loaded tool module: %s", module_name) + # Check if package exists + try: + __import__(package) + except ImportError: + logger.debug("Package %s not found, skipping", package) + continue + + # Discover and register + tools = discovery.auto_register(package) + registered.extend(tools) + except Exception as exc: - errors.append({"module": module_name, "error": str(exc)}) - logger.error("Failed to load tool module %s: %s", module_name, exc) + logger.warning("Failed to bootstrap from %s: %s", package, exc) - # Get registry status - registry_status = tool_registry.to_dict() - - status = { - "loaded_modules": loaded, - "errors": errors, - "total_tools": len(registry_status.get("tools", [])), - "tools_by_category": registry_status.get("categories", {}), - "tool_names": tool_registry.list_tools(), - } - - logger.info( - "MCP Bootstrap complete: %d tools loaded from %d modules", - status["total_tools"], - len(loaded) - ) - - return status + logger.info("MCP auto-bootstrap complete: %d tools registered", len(registered)) + return registered -def get_tool_status() -> dict: - """Get current status of all tools.""" +def bootstrap_from_directory( + directory: Path, + registry: Optional[ToolRegistry] = None, +) -> list[str]: + """Bootstrap tools from a directory of Python files. + + Args: + directory: Directory containing Python files with tools + registry: Registry to register tools with + + Returns: + List of registered tool names + """ + registry = registry or tool_registry + discovery = get_discovery(registry=registry) + + registered: list[str] = [] + + if not directory.exists(): + logger.warning("Tools directory not found: %s", directory) + return registered + + logger.info("Bootstrapping tools from directory: %s", directory) + + # Find all Python files + for py_file in directory.rglob("*.py"): + if py_file.name.startswith("_"): + continue + + try: + discovered = discovery.discover_file(py_file) + + for tool in discovered: + if tool.function is None: + # Need to import and resolve the function + continue + + try: + registry.register_tool( + name=tool.name, + function=tool.function, + description=tool.description, + category=tool.category, + tags=tool.tags, + ) + registered.append(tool.name) + except Exception as exc: + logger.error("Failed to register %s: %s", tool.name, exc) + + except Exception as exc: + logger.warning("Failed to process %s: %s", py_file, exc) + + logger.info("Directory bootstrap complete: %d tools registered", len(registered)) + return registered + + +def get_bootstrap_status() -> dict: + """Get auto-bootstrap status. + + Returns: + Dict with bootstrap status info + """ + discovery = get_discovery() + registry = tool_registry + return { - "tools": tool_registry.to_dict(), - "metrics": tool_registry.get_metrics(), + "auto_bootstrap_enabled": os.environ.get(AUTO_BOOTSTRAP_ENV_VAR, "1") != "0", + "discovered_tools_count": len(discovery.get_discovered()), + "registered_tools_count": len(registry.list_tools()), + "default_packages": DEFAULT_TOOL_PACKAGES, } diff --git a/src/mcp/discovery.py b/src/mcp/discovery.py new file mode 100644 index 0000000..a6ec024 --- /dev/null +++ b/src/mcp/discovery.py @@ -0,0 +1,441 @@ +"""MCP Tool Auto-Discovery — Introspect Python modules to find tools. + +Automatically discovers functions marked with @mcp_tool decorator +and registers them with the MCP registry. Generates JSON schemas +from type hints. +""" + +import ast +import importlib +import inspect +import logging +import pkgutil +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Callable, Optional, get_type_hints + +from .registry import ToolRegistry, tool_registry + +logger = logging.getLogger(__name__) + + +# Decorator to mark functions as MCP tools +def mcp_tool( + name: Optional[str] = None, + description: Optional[str] = None, + category: str = "general", + tags: Optional[list[str]] = None, +): + """Decorator to mark a function as an MCP tool. + + Args: + name: Tool name (defaults to function name) + description: Tool description (defaults to docstring) + category: Tool category for organization + tags: Additional tags for filtering + + Example: + @mcp_tool(name="weather", category="external") + def get_weather(city: str) -> dict: + '''Get weather for a city.''' + ... + """ + def decorator(func: Callable) -> Callable: + func._mcp_tool = True + func._mcp_name = name or func.__name__ + func._mcp_description = description or (func.__doc__ or "").strip() + func._mcp_category = category + func._mcp_tags = tags or [] + return func + return decorator + + +@dataclass +class DiscoveredTool: + """A tool discovered via introspection.""" + name: str + description: str + function: Callable + module: str + category: str + tags: list[str] + parameters_schema: dict[str, Any] + returns_schema: dict[str, Any] + source_file: Optional[str] = None + line_number: int = 0 + + +class ToolDiscovery: + """Discovers and registers MCP tools from Python modules. + + Usage: + discovery = ToolDiscovery() + + # Discover from a module + tools = discovery.discover_module("tools.git") + + # Auto-register with registry + discovery.auto_register("tools") + + # Discover from all installed packages + tools = discovery.discover_all_packages() + """ + + def __init__(self, registry: Optional[ToolRegistry] = None) -> None: + self.registry = registry or tool_registry + self._discovered: list[DiscoveredTool] = [] + + def discover_module(self, module_name: str) -> list[DiscoveredTool]: + """Discover all MCP tools in a module. + + Args: + module_name: Dotted path to module (e.g., "tools.git") + + Returns: + List of discovered tools + """ + discovered = [] + + try: + module = importlib.import_module(module_name) + except ImportError as exc: + logger.warning("Failed to import module %s: %s", module_name, exc) + return discovered + + # Get module file path for source location + module_file = getattr(module, "__file__", None) + + # Iterate through module members + for name, obj in inspect.getmembers(module): + # Skip private and non-callable + if name.startswith("_") or not callable(obj): + continue + + # Check if marked as MCP tool + if not getattr(obj, "_mcp_tool", False): + continue + + # Get source location + try: + source_file = inspect.getfile(obj) + line_number = inspect.getsourcelines(obj)[1] + except (OSError, TypeError): + source_file = module_file + line_number = 0 + + # Build schemas from type hints + try: + sig = inspect.signature(obj) + parameters_schema = self._build_parameters_schema(sig) + returns_schema = self._build_returns_schema(sig, obj) + except Exception as exc: + logger.warning("Failed to build schema for %s: %s", name, exc) + parameters_schema = {"type": "object", "properties": {}} + returns_schema = {} + + tool = DiscoveredTool( + name=getattr(obj, "_mcp_name", name), + description=getattr(obj, "_mcp_description", obj.__doc__ or ""), + function=obj, + module=module_name, + category=getattr(obj, "_mcp_category", "general"), + tags=getattr(obj, "_mcp_tags", []), + parameters_schema=parameters_schema, + returns_schema=returns_schema, + source_file=source_file, + line_number=line_number, + ) + + discovered.append(tool) + logger.debug("Discovered tool: %s from %s", tool.name, module_name) + + self._discovered.extend(discovered) + logger.info("Discovered %d tools from module %s", len(discovered), module_name) + return discovered + + def discover_package(self, package_name: str, recursive: bool = True) -> list[DiscoveredTool]: + """Discover tools from all modules in a package. + + Args: + package_name: Package name (e.g., "tools") + recursive: Whether to search subpackages + + Returns: + List of discovered tools + """ + discovered = [] + + try: + package = importlib.import_module(package_name) + except ImportError as exc: + logger.warning("Failed to import package %s: %s", package_name, exc) + return discovered + + package_path = getattr(package, "__path__", []) + if not package_path: + # Not a package, treat as module + return self.discover_module(package_name) + + # Walk package modules + for _, name, is_pkg in pkgutil.iter_modules(package_path, prefix=f"{package_name}."): + if is_pkg and recursive: + discovered.extend(self.discover_package(name, recursive=True)) + else: + discovered.extend(self.discover_module(name)) + + return discovered + + def discover_file(self, file_path: Path) -> list[DiscoveredTool]: + """Discover tools from a Python file. + + Args: + file_path: Path to Python file + + Returns: + List of discovered tools + """ + discovered = [] + + try: + source = file_path.read_text() + tree = ast.parse(source) + except Exception as exc: + logger.warning("Failed to parse %s: %s", file_path, exc) + return discovered + + # Find all decorated functions + for node in ast.walk(tree): + if not isinstance(node, ast.FunctionDef): + continue + + # Check for @mcp_tool decorator + is_tool = False + tool_name = node.name + tool_description = ast.get_docstring(node) or "" + tool_category = "general" + tool_tags: list[str] = [] + + for decorator in node.decorator_list: + if isinstance(decorator, ast.Call): + if isinstance(decorator.func, ast.Name) and decorator.func.id == "mcp_tool": + is_tool = True + # Extract decorator arguments + for kw in decorator.keywords: + if kw.arg == "name" and isinstance(kw.value, ast.Constant): + tool_name = kw.value.value + elif kw.arg == "description" and isinstance(kw.value, ast.Constant): + tool_description = kw.value.value + elif kw.arg == "category" and isinstance(kw.value, ast.Constant): + tool_category = kw.value.value + elif kw.arg == "tags" and isinstance(kw.value, ast.List): + tool_tags = [ + elt.value for elt in kw.value.elts + if isinstance(elt, ast.Constant) + ] + elif isinstance(decorator, ast.Name) and decorator.id == "mcp_tool": + is_tool = True + + if not is_tool: + continue + + # Build parameter schema from AST + parameters_schema = self._build_schema_from_ast(node) + + # We can't get the actual function without importing + # So create a placeholder that will be resolved later + tool = DiscoveredTool( + name=tool_name, + description=tool_description, + function=None, # Will be resolved when registered + module=str(file_path), + category=tool_category, + tags=tool_tags, + parameters_schema=parameters_schema, + returns_schema={"type": "object"}, + source_file=str(file_path), + line_number=node.lineno, + ) + + discovered.append(tool) + + self._discovered.extend(discovered) + logger.info("Discovered %d tools from file %s", len(discovered), file_path) + return discovered + + def auto_register(self, package_name: str = "tools") -> list[str]: + """Automatically discover and register tools. + + Args: + package_name: Package to scan for tools + + Returns: + List of registered tool names + """ + discovered = self.discover_package(package_name) + registered = [] + + for tool in discovered: + if tool.function is None: + logger.warning("Skipping %s: no function resolved", tool.name) + continue + + try: + self.registry.register_tool( + name=tool.name, + function=tool.function, + description=tool.description, + category=tool.category, + tags=tool.tags, + ) + registered.append(tool.name) + logger.debug("Registered tool: %s", tool.name) + except Exception as exc: + logger.error("Failed to register %s: %s", tool.name, exc) + + logger.info("Auto-registered %d/%d tools", len(registered), len(discovered)) + return registered + + def _build_parameters_schema(self, sig: inspect.Signature) -> dict[str, Any]: + """Build JSON schema for function parameters.""" + properties = {} + required = [] + + for name, param in sig.parameters.items(): + if param.kind in (param.VAR_POSITIONAL, param.VAR_KEYWORD): + continue + + schema = self._type_to_schema(param.annotation) + + if param.default is param.empty: + required.append(name) + else: + schema["default"] = param.default + + properties[name] = schema + + return { + "type": "object", + "properties": properties, + "required": required, + } + + def _build_returns_schema( + self, sig: inspect.Signature, func: Callable + ) -> dict[str, Any]: + """Build JSON schema for return type.""" + return_annotation = sig.return_annotation + + if return_annotation is sig.empty: + return {"type": "object"} + + return self._type_to_schema(return_annotation) + + def _build_schema_from_ast(self, node: ast.FunctionDef) -> dict[str, Any]: + """Build parameter schema from AST node.""" + properties = {} + required = [] + + # Get defaults (reversed, since they're at the end) + defaults = [None] * (len(node.args.args) - len(node.args.defaults)) + list(node.args.defaults) + + for arg, default in zip(node.args.args, defaults): + arg_name = arg.arg + arg_type = "string" # Default + + # Try to get type from annotation + if arg.annotation: + if isinstance(arg.annotation, ast.Name): + arg_type = self._ast_type_to_json_type(arg.annotation.id) + elif isinstance(arg.annotation, ast.Constant): + arg_type = self._ast_type_to_json_type(str(arg.annotation.value)) + + schema = {"type": arg_type} + + if default is None: + required.append(arg_name) + + properties[arg_name] = schema + + return { + "type": "object", + "properties": properties, + "required": required, + } + + def _type_to_schema(self, annotation: Any) -> dict[str, Any]: + """Convert Python type annotation to JSON schema.""" + if annotation is inspect.Parameter.empty: + return {"type": "string"} + + origin = getattr(annotation, "__origin__", None) + args = getattr(annotation, "__args__", ()) + + # Handle Optional[T] = Union[T, None] + if origin is not None: + if str(origin) == "typing.Union" and type(None) in args: + # Optional type + non_none_args = [a for a in args if a is not type(None)] + if len(non_none_args) == 1: + schema = self._type_to_schema(non_none_args[0]) + return schema + return {"type": "object"} + + # Handle List[T], Dict[K,V] + if origin in (list, tuple): + items_schema = {"type": "object"} + if args: + items_schema = self._type_to_schema(args[0]) + return {"type": "array", "items": items_schema} + + if origin is dict: + return {"type": "object"} + + # Handle basic types + if annotation in (str,): + return {"type": "string"} + elif annotation in (int, float): + return {"type": "number"} + elif annotation in (bool,): + return {"type": "boolean"} + elif annotation in (list, tuple): + return {"type": "array"} + elif annotation in (dict,): + return {"type": "object"} + + return {"type": "object"} + + def _ast_type_to_json_type(self, type_name: str) -> str: + """Convert AST type name to JSON schema type.""" + type_map = { + "str": "string", + "int": "number", + "float": "number", + "bool": "boolean", + "list": "array", + "dict": "object", + "List": "array", + "Dict": "object", + "Optional": "object", + "Any": "object", + } + return type_map.get(type_name, "object") + + def get_discovered(self) -> list[DiscoveredTool]: + """Get all discovered tools.""" + return list(self._discovered) + + def clear(self) -> None: + """Clear discovered tools cache.""" + self._discovered.clear() + + +# Module-level singleton +discovery: Optional[ToolDiscovery] = None + + +def get_discovery(registry: Optional[ToolRegistry] = None) -> ToolDiscovery: + """Get or create the tool discovery singleton.""" + global discovery + if discovery is None: + discovery = ToolDiscovery(registry=registry) + return discovery diff --git a/src/mcp/registry.py b/src/mcp/registry.py index 292f1cd..29d8717 100644 --- a/src/mcp/registry.py +++ b/src/mcp/registry.py @@ -42,6 +42,9 @@ class ToolRecord: avg_latency_ms: float = 0.0 added_at: float = field(default_factory=time.time) requires_confirmation: bool = False + tags: list[str] = field(default_factory=list) + source_module: Optional[str] = None + auto_discovered: bool = False class ToolRegistry: @@ -59,6 +62,9 @@ class ToolRegistry: handler: Callable, category: str = "general", requires_confirmation: bool = False, + tags: Optional[list[str]] = None, + source_module: Optional[str] = None, + auto_discovered: bool = False, ) -> ToolRecord: """Register a new tool. @@ -68,6 +74,9 @@ class ToolRegistry: handler: Function to execute category: Tool category for organization requires_confirmation: If True, user must approve before execution + tags: Tags for filtering and organization + source_module: Module where tool was defined + auto_discovered: Whether tool was auto-discovered Returns: The registered ToolRecord @@ -81,6 +90,9 @@ class ToolRegistry: handler=handler, category=category, requires_confirmation=requires_confirmation, + tags=tags or [], + source_module=source_module, + auto_discovered=auto_discovered, ) self._tools[name] = record @@ -94,6 +106,75 @@ class ToolRegistry: logger.info("Registered tool: %s (category: %s)", name, category) return record + def register_tool( + self, + name: str, + function: Callable, + description: Optional[str] = None, + category: str = "general", + tags: Optional[list[str]] = None, + source_module: Optional[str] = None, + ) -> ToolRecord: + """Register a tool from a function (convenience method for discovery). + + Args: + name: Tool name + function: Function to register + description: Tool description (defaults to docstring) + category: Tool category + tags: Tags for organization + source_module: Source module path + + Returns: + The registered ToolRecord + """ + # Build schema from function signature + sig = inspect.signature(function) + + properties = {} + required = [] + + for param_name, param in sig.parameters.items(): + if param.kind in (param.VAR_POSITIONAL, param.VAR_KEYWORD): + continue + + param_schema: dict = {"type": "string"} + + # Try to infer type from annotation + if param.annotation != inspect.Parameter.empty: + if param.annotation in (int, float): + param_schema = {"type": "number"} + elif param.annotation == bool: + param_schema = {"type": "boolean"} + elif param.annotation == list: + param_schema = {"type": "array"} + elif param.annotation == dict: + param_schema = {"type": "object"} + + if param.default is param.empty: + required.append(param_name) + else: + param_schema["default"] = param.default + + properties[param_name] = param_schema + + schema = create_tool_schema( + name=name, + description=description or (function.__doc__ or f"Execute {name}"), + parameters=properties, + required=required, + ) + + return self.register( + name=name, + schema=schema, + handler=function, + category=category, + tags=tags, + source_module=source_module or function.__module__, + auto_discovered=True, + ) + def unregister(self, name: str) -> bool: """Remove a tool from the registry.""" if name not in self._tools: @@ -137,14 +218,18 @@ class ToolRegistry: self, query: Optional[str] = None, category: Optional[str] = None, + tags: Optional[list[str]] = None, healthy_only: bool = True, + auto_discovered_only: bool = False, ) -> list[ToolRecord]: """Discover tools matching criteria. Args: query: Search in tool names and descriptions category: Filter by category + tags: Filter by tags (must have all specified tags) healthy_only: Only return healthy tools + auto_discovered_only: Only return auto-discovered tools Returns: List of matching ToolRecords @@ -156,17 +241,27 @@ class ToolRegistry: if category and record.category != category: continue + # Tags filter + if tags: + if not all(tag in record.tags for tag in tags): + continue + # Health filter if healthy_only and record.health_status == "unhealthy": continue + # Auto-discovered filter + if auto_discovered_only and not record.auto_discovered: + continue + # Query filter if query: query_lower = query.lower() name_match = query_lower in name.lower() desc = record.schema.get("description", "") desc_match = query_lower in desc.lower() - if not (name_match or desc_match): + tag_match = any(query_lower in tag.lower() for tag in record.tags) + if not (name_match or desc_match or tag_match): continue results.append(record) @@ -274,11 +369,15 @@ class ToolRegistry: "category": r.category, "health": r.health_status, "requires_confirmation": r.requires_confirmation, + "tags": r.tags, + "source_module": r.source_module, + "auto_discovered": r.auto_discovered, } for r in self._tools.values() ], "categories": self._categories, "total_tools": len(self._tools), + "auto_discovered_count": sum(1 for r in self._tools.values() if r.auto_discovered), } @@ -286,6 +385,11 @@ class ToolRegistry: tool_registry = ToolRegistry() +def get_registry() -> ToolRegistry: + """Get the global tool registry singleton.""" + return tool_registry + + def register_tool( name: Optional[str] = None, category: str = "general", diff --git a/tests/test_mcp_bootstrap.py b/tests/test_mcp_bootstrap.py new file mode 100644 index 0000000..6b12db4 --- /dev/null +++ b/tests/test_mcp_bootstrap.py @@ -0,0 +1,265 @@ +"""Tests for MCP Auto-Bootstrap. + +Tests follow pytest best practices: +- No module-level state +- Proper fixture cleanup +- Isolated tests +""" + +import os +from pathlib import Path +from unittest.mock import patch + +import pytest + +from mcp.bootstrap import ( + auto_bootstrap, + bootstrap_from_directory, + get_bootstrap_status, + DEFAULT_TOOL_PACKAGES, + AUTO_BOOTSTRAP_ENV_VAR, +) +from mcp.discovery import mcp_tool, ToolDiscovery +from mcp.registry import ToolRegistry + + +@pytest.fixture +def fresh_registry(): + """Create a fresh registry for each test.""" + return ToolRegistry() + + +@pytest.fixture +def fresh_discovery(fresh_registry): + """Create a fresh discovery instance for each test.""" + return ToolDiscovery(registry=fresh_registry) + + +class TestAutoBootstrap: + """Test auto_bootstrap function.""" + + def test_auto_bootstrap_disabled_by_env(self, fresh_registry): + """Test that auto-bootstrap can be disabled via env var.""" + with patch.dict(os.environ, {AUTO_BOOTSTRAP_ENV_VAR: "0"}): + registered = auto_bootstrap(registry=fresh_registry) + + assert len(registered) == 0 + + def test_auto_bootstrap_forced_overrides_env(self, fresh_registry): + """Test that force=True overrides env var.""" + with patch.dict(os.environ, {AUTO_BOOTSTRAP_ENV_VAR: "0"}): + # Empty packages list - just test that it runs + registered = auto_bootstrap( + packages=[], + registry=fresh_registry, + force=True, + ) + + assert len(registered) == 0 # No packages, but didn't abort + + def test_auto_bootstrap_nonexistent_package(self, fresh_registry): + """Test bootstrap from non-existent package.""" + registered = auto_bootstrap( + packages=["nonexistent_package_xyz_12345"], + registry=fresh_registry, + force=True, + ) + + assert len(registered) == 0 + + def test_auto_bootstrap_empty_packages(self, fresh_registry): + """Test bootstrap with empty packages list.""" + registered = auto_bootstrap( + packages=[], + registry=fresh_registry, + force=True, + ) + + assert len(registered) == 0 + + def test_auto_bootstrap_registers_tools(self, fresh_registry, fresh_discovery): + """Test that auto-bootstrap registers discovered tools.""" + @mcp_tool(name="bootstrap_tool", category="bootstrap") + def bootstrap_func(value: str) -> str: + """A bootstrap test tool.""" + return value + + # Manually register it + fresh_registry.register_tool( + name="bootstrap_tool", + function=bootstrap_func, + category="bootstrap", + ) + + # Verify it's in the registry + record = fresh_registry.get("bootstrap_tool") + assert record is not None + assert record.auto_discovered is True + + +class TestBootstrapFromDirectory: + """Test bootstrap_from_directory function.""" + + def test_bootstrap_from_directory(self, fresh_registry, tmp_path): + """Test bootstrapping from a directory.""" + tools_dir = tmp_path / "tools" + tools_dir.mkdir() + + tool_file = tools_dir / "my_tools.py" + tool_file.write_text(''' +from mcp.discovery import mcp_tool + +@mcp_tool(name="dir_tool", category="directory") +def dir_tool(value: str) -> str: + """A tool from directory.""" + return value +''') + + registered = bootstrap_from_directory(tools_dir, registry=fresh_registry) + + # Function won't be resolved (AST only), so not registered + assert len(registered) == 0 + + def test_bootstrap_from_nonexistent_directory(self, fresh_registry): + """Test bootstrapping from non-existent directory.""" + registered = bootstrap_from_directory( + Path("/nonexistent/tools"), + registry=fresh_registry + ) + + assert len(registered) == 0 + + def test_bootstrap_skips_private_files(self, fresh_registry, tmp_path): + """Test that private files are skipped.""" + tools_dir = tmp_path / "tools" + tools_dir.mkdir() + + private_file = tools_dir / "_private.py" + private_file.write_text(''' +from mcp.discovery import mcp_tool + +@mcp_tool(name="private_tool") +def private_tool(): + pass +''') + + registered = bootstrap_from_directory(tools_dir, registry=fresh_registry) + assert len(registered) == 0 + + +class TestGetBootstrapStatus: + """Test get_bootstrap_status function.""" + + def test_status_default_enabled(self): + """Test status when auto-bootstrap is enabled by default.""" + with patch.dict(os.environ, {}, clear=True): + status = get_bootstrap_status() + + assert status["auto_bootstrap_enabled"] is True + assert "discovered_tools_count" in status + assert "registered_tools_count" in status + assert status["default_packages"] == DEFAULT_TOOL_PACKAGES + + def test_status_disabled(self): + """Test status when auto-bootstrap is disabled.""" + with patch.dict(os.environ, {AUTO_BOOTSTRAP_ENV_VAR: "0"}): + status = get_bootstrap_status() + + assert status["auto_bootstrap_enabled"] is False + + +class TestIntegration: + """Integration tests for bootstrap + discovery + registry.""" + + def test_full_workflow(self, fresh_registry): + """Test the full auto-discovery and registration workflow.""" + @mcp_tool(name="integration_tool", category="integration") + def integration_func(data: str) -> str: + """Integration test tool.""" + return f"processed: {data}" + + fresh_registry.register_tool( + name="integration_tool", + function=integration_func, + category="integration", + source_module="test_module", + ) + + record = fresh_registry.get("integration_tool") + assert record is not None + assert record.auto_discovered is True + assert record.source_module == "test_module" + + export = fresh_registry.to_dict() + assert export["total_tools"] == 1 + assert export["auto_discovered_count"] == 1 + + def test_tool_execution_after_registration(self, fresh_registry): + """Test that registered tools can be executed.""" + @mcp_tool(name="exec_tool", category="execution") + def exec_func(input: str) -> str: + """Executable test tool.""" + return input.upper() + + fresh_registry.register_tool( + name="exec_tool", + function=exec_func, + category="execution", + ) + + import asyncio + result = asyncio.run(fresh_registry.execute("exec_tool", {"input": "hello"})) + + assert result == "HELLO" + + metrics = fresh_registry.get_metrics("exec_tool") + assert metrics["executions"] == 1 + assert metrics["health"] == "healthy" + + def test_discover_filtering(self, fresh_registry): + """Test filtering registered tools.""" + @mcp_tool(name="cat1_tool", category="category1") + def cat1_func(): + pass + + @mcp_tool(name="cat2_tool", category="category2") + def cat2_func(): + pass + + fresh_registry.register_tool( + name="cat1_tool", + function=cat1_func, + category="category1" + ) + fresh_registry.register_tool( + name="cat2_tool", + function=cat2_func, + category="category2" + ) + + cat1_tools = fresh_registry.discover(category="category1") + assert len(cat1_tools) == 1 + assert cat1_tools[0].name == "cat1_tool" + + auto_tools = fresh_registry.discover(auto_discovered_only=True) + assert len(auto_tools) == 2 + + def test_registry_export_includes_metadata(self, fresh_registry): + """Test that registry export includes all metadata.""" + @mcp_tool(name="meta_tool", category="meta", tags=["tag1", "tag2"]) + def meta_func(): + pass + + fresh_registry.register_tool( + name="meta_tool", + function=meta_func, + category="meta", + tags=["tag1", "tag2"], + ) + + export = fresh_registry.to_dict() + + for tool_dict in export["tools"]: + assert "tags" in tool_dict + assert "source_module" in tool_dict + assert "auto_discovered" in tool_dict diff --git a/tests/test_mcp_discovery.py b/tests/test_mcp_discovery.py new file mode 100644 index 0000000..ca14fbf --- /dev/null +++ b/tests/test_mcp_discovery.py @@ -0,0 +1,329 @@ +"""Tests for MCP Tool Auto-Discovery. + +Tests follow pytest best practices: +- No module-level state +- Proper fixture cleanup +- Isolated tests +""" + +import ast +import inspect +import sys +import types +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from mcp.discovery import DiscoveredTool, ToolDiscovery, mcp_tool +from mcp.registry import ToolRegistry + + +@pytest.fixture +def fresh_registry(): + """Create a fresh registry for each test.""" + return ToolRegistry() + + +@pytest.fixture +def discovery(fresh_registry): + """Create a fresh discovery instance for each test.""" + return ToolDiscovery(registry=fresh_registry) + + +@pytest.fixture +def mock_module_with_tools(): + """Create a mock module with MCP tools for testing.""" + # Create a fresh module + mock_module = types.ModuleType("mock_test_module") + mock_module.__file__ = "mock_test_module.py" + + # Add decorated functions + @mcp_tool(name="echo", category="test", tags=["utility"]) + def echo_func(message: str) -> str: + """Echo a message back.""" + return message + + @mcp_tool(category="math") + def add_func(a: int, b: int) -> int: + """Add two numbers.""" + return a + b + + def not_decorated(): + """Not a tool.""" + pass + + mock_module.echo_func = echo_func + mock_module.add_func = add_func + mock_module.not_decorated = not_decorated + + # Inject into sys.modules + sys.modules["mock_test_module"] = mock_module + + yield mock_module + + # Cleanup + del sys.modules["mock_test_module"] + + +class TestMCPToolDecorator: + """Test the @mcp_tool decorator.""" + + def test_decorator_sets_explicit_name(self): + """Test that decorator uses explicit name.""" + @mcp_tool(name="custom_name", category="test") + def my_func(): + pass + + assert my_func._mcp_name == "custom_name" + assert my_func._mcp_category == "test" + + def test_decorator_uses_function_name(self): + """Test that decorator uses function name when not specified.""" + @mcp_tool(category="math") + def my_add_func(): + pass + + assert my_add_func._mcp_name == "my_add_func" + + def test_decorator_captures_docstring(self): + """Test that decorator captures docstring as description.""" + @mcp_tool(name="test") + def with_doc(): + """This is the description.""" + pass + + assert "This is the description" in with_doc._mcp_description + + def test_decorator_sets_tags(self): + """Test that decorator sets tags.""" + @mcp_tool(name="test", tags=["tag1", "tag2"]) + def tagged_func(): + pass + + assert tagged_func._mcp_tags == ["tag1", "tag2"] + + def test_undecorated_function(self): + """Test that undecorated functions don't have MCP attributes.""" + def plain_func(): + pass + + assert not hasattr(plain_func, "_mcp_tool") + + +class TestDiscoveredTool: + """Test DiscoveredTool dataclass.""" + + def test_tool_creation(self): + """Test creating a DiscoveredTool.""" + def dummy_func(): + pass + + tool = DiscoveredTool( + name="test", + description="A test tool", + function=dummy_func, + module="test_module", + category="test", + tags=["utility"], + parameters_schema={"type": "object"}, + returns_schema={"type": "string"}, + ) + + assert tool.name == "test" + assert tool.function == dummy_func + assert tool.category == "test" + + +class TestToolDiscoveryInit: + """Test ToolDiscovery initialization.""" + + def test_uses_provided_registry(self, fresh_registry): + """Test initialization with provided registry.""" + discovery = ToolDiscovery(registry=fresh_registry) + assert discovery.registry is fresh_registry + + +class TestDiscoverModule: + """Test discovering tools from modules.""" + + def test_discover_finds_decorated_tools(self, discovery, mock_module_with_tools): + """Test discovering tools from a module.""" + tools = discovery.discover_module("mock_test_module") + + tool_names = [t.name for t in tools] + assert "echo" in tool_names + assert "add_func" in tool_names + assert "not_decorated" not in tool_names + + def test_discover_nonexistent_module(self, discovery): + """Test discovering from non-existent module.""" + tools = discovery.discover_module("nonexistent.module.xyz") + assert len(tools) == 0 + + def test_discovered_tool_has_correct_metadata(self, discovery, mock_module_with_tools): + """Test that discovered tools have correct metadata.""" + tools = discovery.discover_module("mock_test_module") + + echo_tool = next(t for t in tools if t.name == "echo") + assert echo_tool.category == "test" + assert "utility" in echo_tool.tags + + def test_discovered_tool_has_schema(self, discovery, mock_module_with_tools): + """Test that discovered tools have parameter schemas.""" + tools = discovery.discover_module("mock_test_module") + + add_tool = next(t for t in tools if t.name == "add_func") + assert "properties" in add_tool.parameters_schema + assert "a" in add_tool.parameters_schema["properties"] + + +class TestDiscoverFile: + """Test discovering tools from Python files.""" + + def test_discover_from_file(self, discovery, tmp_path): + """Test discovering tools from a Python file.""" + test_file = tmp_path / "test_tools.py" + test_file.write_text(''' +from mcp.discovery import mcp_tool + +@mcp_tool(name="file_tool", category="file_ops", tags=["io"]) +def file_tool(path: str) -> dict: + """Process a file.""" + return {"path": path} +''') + + tools = discovery.discover_file(test_file) + + assert len(tools) == 1 + assert tools[0].name == "file_tool" + assert tools[0].category == "file_ops" + + def test_discover_from_nonexistent_file(self, discovery, tmp_path): + """Test discovering from non-existent file.""" + tools = discovery.discover_file(tmp_path / "nonexistent.py") + assert len(tools) == 0 + + def test_discover_from_invalid_python(self, discovery, tmp_path): + """Test discovering from invalid Python file.""" + test_file = tmp_path / "invalid.py" + test_file.write_text("not valid python @#$%") + + tools = discovery.discover_file(test_file) + assert len(tools) == 0 + + +class TestSchemaBuilding: + """Test JSON schema building from type hints.""" + + def test_string_parameter(self, discovery): + """Test string parameter schema.""" + def func(name: str) -> str: + return name + + sig = inspect.signature(func) + schema = discovery._build_parameters_schema(sig) + + assert schema["properties"]["name"]["type"] == "string" + + def test_int_parameter(self, discovery): + """Test int parameter schema.""" + def func(count: int) -> int: + return count + + sig = inspect.signature(func) + schema = discovery._build_parameters_schema(sig) + + assert schema["properties"]["count"]["type"] == "number" + + def test_bool_parameter(self, discovery): + """Test bool parameter schema.""" + def func(enabled: bool) -> bool: + return enabled + + sig = inspect.signature(func) + schema = discovery._build_parameters_schema(sig) + + assert schema["properties"]["enabled"]["type"] == "boolean" + + def test_required_parameters(self, discovery): + """Test that required parameters are marked.""" + def func(required: str, optional: str = "default") -> str: + return required + + sig = inspect.signature(func) + schema = discovery._build_parameters_schema(sig) + + assert "required" in schema["required"] + assert "optional" not in schema["required"] + + def test_default_values(self, discovery): + """Test that default values are captured.""" + def func(name: str = "default") -> str: + return name + + sig = inspect.signature(func) + schema = discovery._build_parameters_schema(sig) + + assert schema["properties"]["name"]["default"] == "default" + + +class TestTypeToSchema: + """Test type annotation to JSON schema conversion.""" + + def test_str_annotation(self, discovery): + """Test string annotation.""" + schema = discovery._type_to_schema(str) + assert schema["type"] == "string" + + def test_int_annotation(self, discovery): + """Test int annotation.""" + schema = discovery._type_to_schema(int) + assert schema["type"] == "number" + + def test_optional_annotation(self, discovery): + """Test Optional[T] annotation.""" + from typing import Optional + schema = discovery._type_to_schema(Optional[str]) + assert schema["type"] == "string" + + +class TestAutoRegister: + """Test auto-registration of discovered tools.""" + + def test_auto_register_module(self, discovery, mock_module_with_tools, fresh_registry): + """Test auto-registering tools from a module.""" + registered = discovery.auto_register("mock_test_module") + + assert "echo" in registered + assert "add_func" in registered + assert fresh_registry.get("echo") is not None + + def test_auto_register_skips_unresolved_functions(self, discovery, fresh_registry): + """Test that tools without resolved functions are skipped.""" + # Add a discovered tool with no function + discovery._discovered.append(DiscoveredTool( + name="no_func", + description="No function", + function=None, # type: ignore + module="test", + category="test", + tags=[], + parameters_schema={}, + returns_schema={}, + )) + + registered = discovery.auto_register("mock_test_module") + assert "no_func" not in registered + + +class TestClearDiscovered: + """Test clearing discovered tools cache.""" + + def test_clear_discovered(self, discovery, mock_module_with_tools): + """Test clearing discovered tools.""" + discovery.discover_module("mock_test_module") + assert len(discovery.get_discovered()) > 0 + + discovery.clear() + assert len(discovery.get_discovered()) == 0