feat: wire Gemma 4 vision into browser_tool for screenshots (#816 )

Resolves #816. Updated _get_vision_model() to prefer Gemma 4 native multimodal for screenshot analysis. Priority chain: 1. AUXILIARY_VISION_MODEL (explicit override) 2. GEMMA_VISION_MODEL (Gemma 4 native multimodal) 3. OLLAMA_VISION_MODEL (local Ollama vision) 4. None (text-only fallback) Reduces latency by eliminating model switching for vision tasks. Backward compatible — existing AUXILIARY_VISION_MODEL still works.
Merge pull request 'feat: self-modifying agent that improves its own prompts (#813 )' (#897 ) from fix/813 into main
2026-04-16 01:46:35 -04:00 · 2026-04-16 05:29:11 +00:00 · 2026-04-16 05:24:27 +00:00 · 2026-04-16 01:23:48 -04:00 · 2026-04-16 01:10:00 -04:00
3 changed files with 592 additions and 2 deletions
--- a/agent/self_modify.py
+++ b/agent/self_modify.py
@@ -0,0 +1,302 @@
 """Self-Modifying Prompt Engine — agent learns from its own failures.
 Analyzes session transcripts, identifies failure patterns, and generates
 prompt patches to prevent future failures.
 The loop: fail → analyze → rewrite → retry → verify improvement.
 Usage:
    from agent.self_modify import PromptLearner
    learner = PromptLearner()
    patches = learner.analyze_session(session_id)
    learner.apply_patches(patches)
 """
 from __future__ import annotations
 import json
 import logging
 import os
 import re
 import time
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 logger = logging.getLogger(__name__)
 HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
 PATCHES_DIR = HERMES_HOME / "prompt_patches"
 ROLLBACK_DIR = HERMES_HOME / "prompt_rollback"
@dataclass
 class FailurePattern:
    """A detected failure pattern in session transcripts."""
    pattern_type: str  # retry_loop, timeout, error_hallucination, context_loss
    description: str
    frequency: int
    example_messages: List[str] = field(default_factory=list)
    suggested_fix: str = ""
@dataclass
 class PromptPatch:
    """A modification to the system prompt based on failure analysis."""
    id: str
    failure_type: str
    original_rule: str
    new_rule: str
    confidence: float
    applied_at: Optional[float] = None
    reverted: bool = False
 # Failure detection patterns
 FAILURE_SIGNALS = {
    "retry_loop": {
        "patterns": [
            r"(?i)retry(?:ing)?\s*(?:attempt|again)",
            r"(?i)failed.*retrying",
            r"(?i)error.*again",
            r"(?i)attempt\s+\d+\s*(?:of|/)\s*\d+",
        ],
        "description": "Agent stuck in retry loop",
    },
    "timeout": {
        "patterns": [
            r"(?i)timed?\s*out",
            r"(?i)deadline\s+exceeded",
            r"(?i)took\s+(?:too\s+)?long",
        ],
        "description": "Operation timed out",
    },
    "hallucination": {
        "patterns": [
            r"(?i)i\s+(?:don't|do\s+not)\s+(?:have|see|find)\s+(?:any|that|this)\s+(?:information|data|file)",
            r"(?i)the\s+file\s+doesn't\s+exist",
            r"(?i)i\s+(?:made|invented|fabricated)\s+(?:that\s+up|this)",
        ],
        "description": "Agent hallucinated or fabricated information",
    },
    "context_loss": {
        "patterns": [
            r"(?i)i\s+(?:don't|do\s+not)\s+(?:remember|recall|know)\s+(?:what|where|when|how)",
            r"(?i)could\s+you\s+remind\s+me",
            r"(?i)what\s+were\s+we\s+(?:doing|working|talking)\s+(?:on|about)",
        ],
        "description": "Agent lost context from earlier in conversation",
    },
    "tool_failure": {
        "patterns": [
            r"(?i)tool\s+(?:call|execution)\s+failed",
            r"(?i)command\s+not\s+found",
            r"(?i)permission\s+denied",
            r"(?i)no\s+such\s+file",
        ],
        "description": "Tool execution failed",
    },
 }
 # Prompt improvement templates
 PROMPT_FIXES = {
    "retry_loop": (
        "If an operation fails more than twice, stop retrying. "
        "Report the failure and ask the user for guidance. "
        "Do not enter retry loops — they waste tokens."
    ),
    "timeout": (
        "For operations that may take long, set a timeout and report "
        "progress. If an operation takes more than 30 seconds, report "
        "what you've done so far and ask if you should continue."
    ),
    "hallucination": (
        "If you cannot find information, say 'I don't know' or "
        "'I couldn't find that.' Never fabricate information. "
        "If a file doesn't exist, say so — don't guess its contents."
    ),
    "context_loss": (
        "When you need context from earlier in the conversation, "
        "use session_search to find it. Don't ask the user to repeat themselves."
    ),
    "tool_failure": (
        "If a tool fails, check the error message and try a different approach. "
        "Don't retry the exact same command — diagnose first."
    ),
 }
 class PromptLearner:
    """Analyze session transcripts and generate prompt improvements."""
    def __init__(self):
        PATCHES_DIR.mkdir(parents=True, exist_ok=True)
        ROLLBACK_DIR.mkdir(parents=True, exist_ok=True)
    def analyze_session(self, session_data: dict) -> List[FailurePattern]:
        """Analyze a session for failure patterns.
        Args:
            session_data: Session dict with 'messages' list.
        Returns:
            List of detected failure patterns.
        """
        messages = session_data.get("messages", [])
        patterns_found: Dict[str, FailurePattern] = {}
        for msg in messages:
            content = str(msg.get("content", ""))
            role = msg.get("role", "")
            # Only analyze assistant messages and tool results
            if role not in ("assistant", "tool"):
                continue
            for failure_type, config in FAILURE_SIGNALS.items():
                for pattern in config["patterns"]:
                    if re.search(pattern, content):
                        if failure_type not in patterns_found:
                            patterns_found[failure_type] = FailurePattern(
                                pattern_type=failure_type,
                                description=config["description"],
                                frequency=0,
                                suggested_fix=PROMPT_FIXES.get(failure_type, ""),
                            )
                        patterns_found[failure_type].frequency += 1
                        if len(patterns_found[failure_type].example_messages) < 3:
                            patterns_found[failure_type].example_messages.append(
                                content[:200]
                            )
                        break  # One match per message per type is enough
        return list(patterns_found.values())
    def generate_patches(self, patterns: List[FailurePattern],
                         min_confidence: float = 0.7) -> List[PromptPatch]:
        """Generate prompt patches from failure patterns.
        Args:
            patterns: Detected failure patterns.
            min_confidence: Minimum confidence to generate a patch.
        Returns:
            List of prompt patches.
        """
        patches = []
        for pattern in patterns:
            # Confidence based on frequency
            if pattern.frequency >= 3:
                confidence = 0.9
            elif pattern.frequency >= 2:
                confidence = 0.75
            else:
                confidence = 0.5
            if confidence < min_confidence:
                continue
            if not pattern.suggested_fix:
                continue
            patch = PromptPatch(
                id=f"{pattern.pattern_type}-{int(time.time())}",
                failure_type=pattern.pattern_type,
                original_rule="(missing — no existing rule for this pattern)",
                new_rule=pattern.suggested_fix,
                confidence=confidence,
            )
            patches.append(patch)
        return patches
    def apply_patches(self, patches: List[PromptPatch],
                      prompt_path: Optional[str] = None) -> int:
        """Apply patches to the system prompt.
        Args:
            patches: Patches to apply.
            prompt_path: Path to prompt file (default: ~/.hermes/system_prompt.md)
        Returns:
            Number of patches applied.
        """
        if prompt_path is None:
            prompt_path = str(HERMES_HOME / "system_prompt.md")
        prompt_file = Path(prompt_path)
        # Backup current prompt
        if prompt_file.exists():
            backup = ROLLBACK_DIR / f"{prompt_file.name}.{int(time.time())}.bak"
            backup.write_text(prompt_file.read_text())
        # Read current prompt
        current = prompt_file.read_text() if prompt_file.exists() else ""
        # Apply patches
        applied = 0
        additions = []
        for patch in patches:
            if patch.new_rule not in current:
                additions.append(f"\n## Auto-learned: {patch.failure_type}\n{patch.new_rule}")
                patch.applied_at = time.time()
                applied += 1
        if additions:
            new_content = current + "\n".join(additions)
            prompt_file.write_text(new_content)
            # Log patches
            patches_file = PATCHES_DIR / f"patches-{int(time.time())}.json"
            with open(patches_file, "w") as f:
                json.dump([p.__dict__ for p in patches], f, indent=2, default=str)
        logger.info("Applied %d prompt patches", applied)
        return applied
    def rollback_last(self, prompt_path: Optional[str] = None) -> bool:
        """Rollback to the most recent backup.
        Args:
            prompt_path: Path to prompt file.
        Returns:
            True if rollback succeeded.
        """
        if prompt_path is None:
            prompt_path = str(HERMES_HOME / "system_prompt.md")
        backups = sorted(ROLLBACK_DIR.glob("*.bak"), reverse=True)
        if not backups:
            logger.warning("No backups to rollback to")
            return False
        latest = backups[0]
        Path(prompt_path).write_text(latest.read_text())
        logger.info("Rolled back to %s", latest.name)
        return True
    def learn_from_session(self, session_data: dict) -> Dict[str, Any]:
        """Full learning cycle: analyze → patch → apply.
        Args:
            session_data: Session dict.
        Returns:
            Summary of what was learned and applied.
        """
        patterns = self.analyze_session(session_data)
        patches = self.generate_patches(patterns)
        applied = self.apply_patches(patches)
        return {
            "patterns_detected": len(patterns),
            "patches_generated": len(patches),
            "patches_applied": applied,
            "patterns": [
                {"type": p.pattern_type, "frequency": p.frequency, "description": p.description}
                for p in patterns
            ],
        }
--- a/scripts/mcp_server.py
+++ b/scripts/mcp_server.py
@@ -0,0 +1,265 @@
 #!/usr/bin/env python3
 """Hermes MCP Server — expose hermes-agent tools to fleet peers.
 Runs as a standalone MCP server that other agents can connect to
 and invoke hermes tools remotely.
 Safe tools exposed:
 - terminal (safe commands only)
 - file_read, file_search
 - web_search, web_extract
 - session_search
 NOT exposed (internal tools):
 - approval, delegate, memory, config
 Usage:
    python -m tools.mcp_server --port 8081
    hermes mcp-server --port 8081
    python scripts/mcp_server.py --port 8081 --auth-key SECRET
 """
 from __future__ import annotations
 import argparse
 import asyncio
 import json
 import logging
 import os
 import sys
 import time
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 logger = logging.getLogger(__name__)
 # Tools safe to expose to other agents
 SAFE_TOOLS = {
    "terminal": {
        "name": "terminal",
        "description": "Execute safe shell commands. Dangerous commands are blocked.",
        "parameters": {
            "type": "object",
            "properties": {
                "command": {"type": "string", "description": "Shell command to execute"},
            },
            "required": ["command"],
        },
    },
    "file_read": {
        "name": "file_read",
        "description": "Read the contents of a file.",
        "parameters": {
            "type": "object",
            "properties": {
                "path": {"type": "string", "description": "File path to read"},
                "offset": {"type": "integer", "description": "Start line", "default": 1},
                "limit": {"type": "integer", "description": "Max lines", "default": 200},
            },
            "required": ["path"],
        },
    },
    "file_search": {
        "name": "file_search",
        "description": "Search file contents using regex.",
        "parameters": {
            "type": "object",
            "properties": {
                "pattern": {"type": "string", "description": "Regex pattern"},
                "path": {"type": "string", "description": "Directory to search", "default": "."},
            },
            "required": ["pattern"],
        },
    },
    "web_search": {
        "name": "web_search",
        "description": "Search the web for information.",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {"type": "string", "description": "Search query"},
            },
            "required": ["query"],
        },
    },
    "session_search": {
        "name": "session_search",
        "description": "Search past conversation sessions.",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {"type": "string", "description": "Search query"},
                "limit": {"type": "integer", "description": "Max results", "default": 3},
            },
            "required": ["query"],
        },
    },
 }
 # Tools explicitly blocked
 BLOCKED_TOOLS = {
    "approval", "delegate", "memory", "config", "skill_install",
    "mcp_tool", "cronjob", "tts", "send_message",
 }
 class MCPServer:
    """Simple MCP-compatible server for exposing hermes tools."""
    def __init__(self, host: str = "127.0.0.1", port: int = 8081,
                 auth_key: Optional[str] = None):
        self._host = host
        self._port = port
        self._auth_key = auth_key or os.getenv("MCP_AUTH_KEY", "")
    async def handle_tools_list(self, request: dict) -> dict:
        """Return available tools."""
        tools = list(SAFE_TOOLS.values())
        return {"tools": tools}
    async def handle_tools_call(self, request: dict) -> dict:
        """Execute a tool call."""
        tool_name = request.get("name", "")
        arguments = request.get("arguments", {})
        if tool_name in BLOCKED_TOOLS:
            return {"error": f"Tool '{tool_name}' is not exposed via MCP"}
        if tool_name not in SAFE_TOOLS:
            return {"error": f"Unknown tool: {tool_name}"}
        try:
            result = await self._execute_tool(tool_name, arguments)
            return {"content": [{"type": "text", "text": str(result)}]}
        except Exception as e:
            return {"error": str(e)}
    async def _execute_tool(self, tool_name: str, arguments: dict) -> str:
        """Execute a tool and return result."""
        if tool_name == "terminal":
            import subprocess
            cmd = arguments.get("command", "")
            # Block dangerous commands
            from tools.approval import detect_dangerous_command
            is_dangerous, _, desc = detect_dangerous_command(cmd)
            if is_dangerous:
                return f"BLOCKED: Dangerous command detected ({desc}). This tool only executes safe commands."
            result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30)
            return result.stdout or result.stderr or "(no output)"
        elif tool_name == "file_read":
            path = arguments.get("path", "")
            offset = arguments.get("offset", 1)
            limit = arguments.get("limit", 200)
            with open(path) as f:
                lines = f.readlines()
            return "".join(lines[offset-1:offset-1+limit])
        elif tool_name == "file_search":
            import re
            pattern = arguments.get("pattern", "")
            path = arguments.get("path", ".")
            results = []
            for p in Path(path).rglob("*.py"):
                try:
                    content = p.read_text()
                    for i, line in enumerate(content.split("\n"), 1):
                        if re.search(pattern, line, re.IGNORECASE):
                            results.append(f"{p}:{i}: {line.strip()}")
                            if len(results) >= 20:
                                break
                except Exception:
                    continue
                if len(results) >= 20:
                    break
            return "\n".join(results) or "No matches found"
        elif tool_name == "web_search":
            try:
                from tools.web_tools import web_search
                return web_search(arguments.get("query", ""))
            except ImportError:
                return "Web search not available"
        elif tool_name == "session_search":
            try:
                from tools.session_search_tool import session_search
                return session_search(
                    query=arguments.get("query", ""),
                    limit=arguments.get("limit", 3),
                )
            except ImportError:
                return "Session search not available"
        return f"Tool {tool_name} not implemented"
    async def start_http(self):
        """Start HTTP server for MCP endpoints."""
        try:
            from aiohttp import web
        except ImportError:
            logger.error("aiohttp required: pip install aiohttp")
            return
        app = web.Application()
        async def handle_tools_list_route(request):
            if self._auth_key:
                auth = request.headers.get("Authorization", "")
                if auth != f"Bearer {self._auth_key}":
                    return web.json_response({"error": "Unauthorized"}, status=401)
            result = await self.handle_tools_list({})
            return web.json_response(result)
        async def handle_tools_call_route(request):
            if self._auth_key:
                auth = request.headers.get("Authorization", "")
                if auth != f"Bearer {self._auth_key}":
                    return web.json_response({"error": "Unauthorized"}, status=401)
            body = await request.json()
            result = await self.handle_tools_call(body)
            return web.json_response(result)
        async def handle_health(request):
            return web.json_response({"status": "ok", "tools": len(SAFE_TOOLS)})
        app.router.add_get("/mcp/tools", handle_tools_list_route)
        app.router.add_post("/mcp/tools/call", handle_tools_call_route)
        app.router.add_get("/health", handle_health)
        runner = web.AppRunner(app)
        await runner.setup()
        site = web.TCPSite(runner, self._host, self._port)
        await site.start()
        logger.info("MCP server on http://%s:%s", self._host, self._port)
        logger.info("Tools: %s", ", ".join(SAFE_TOOLS.keys()))
        if self._auth_key:
            logger.info("Auth: Bearer token required")
        else:
            logger.warning("Auth: No MCP_AUTH_KEY set — server is open")
        try:
            await asyncio.Event().wait()
        except asyncio.CancelledError:
            pass
        finally:
            await runner.cleanup()
 def main():
    parser = argparse.ArgumentParser(description="Hermes MCP Server")
    parser.add_argument("--host", default="127.0.0.1")
    parser.add_argument("--port", type=int, default=8081)
    parser.add_argument("--auth-key", default=None, help="Bearer token for auth")
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO,
                        format="%(asctime)s [%(name)s] %(levelname)s: %(message)s")
    server = MCPServer(host=args.host, port=args.port, auth_key=args.auth_key)
    print(f"Starting MCP server on http://{args.host}:{args.port}")
    print(f"Exposed tools: {', '.join(SAFE_TOOLS.keys())}")
    asyncio.run(server.start_http())
 if __name__ == "__main__":
    main()
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -201,8 +201,31 @@ def _get_command_timeout() -> int:
 def _get_vision_model() -> Optional[str]:
-    """Model for browser_vision (screenshot analysis — multimodal)."""
+    """Model for browser_vision (screenshot analysis — multimodal).
-    return os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
+
    Priority:
    1. AUXILIARY_VISION_MODEL env var (explicit override)
    2. Gemma 4 (native multimodal, no model switching)
    3. Ollama local vision models
    4. None (fallback to text-only snapshot)
    """
    # Explicit override always wins
    explicit = os.getenv("AUXILIARY_VISION_MODEL", "").strip()
    if explicit:
        return explicit
    # Prefer Gemma 4 (native multimodal — no separate vision model needed)
    gemma = os.getenv("GEMMA_VISION_MODEL", "").strip()
    if gemma:
        return gemma
    # Check for Ollama vision models
    ollama_vision = os.getenv("OLLAMA_VISION_MODEL", "").strip()
    if ollama_vision:
        return ollama_vision
    # Default: None (text-only fallback)
    return None
 def _get_extraction_model() -> Optional[str]:
Author	SHA1	Message	Date
Alexander Whitestone	dc0a3d2024	feat: wire Gemma 4 vision into browser_tool for screenshots (#816 ) Some checks failed Supply Chain Audit / Scan PR for supply chain risks (pull_request) Has been cancelled Details Tests / test (pull_request) Has been cancelled Details Contributor Attribution Check / check-attribution (pull_request) Has been cancelled Details Docker Build and Publish / build-and-push (pull_request) Has been cancelled Details Tests / e2e (pull_request) Has been cancelled Details Resolves #816. Updated _get_vision_model() to prefer Gemma 4 native multimodal for screenshot analysis. Priority chain: 1. AUXILIARY_VISION_MODEL (explicit override) 2. GEMMA_VISION_MODEL (Gemma 4 native multimodal) 3. OLLAMA_VISION_MODEL (local Ollama vision) 4. None (text-only fallback) Reduces latency by eliminating model switching for vision tasks. Backward compatible — existing AUXILIARY_VISION_MODEL still works.	2026-04-16 01:46:35 -04:00
Alexander Whitestone	5022db9d7b	Merge pull request 'feat: self-modifying agent that improves its own prompts (#813 )' (#897 ) from fix/813 into main	2026-04-16 05:29:11 +00:00
Alexander Whitestone	0f61474b74	Merge pull request 'feat: MCP server — expose hermes tools to fleet peers (#803 )' (#896 ) from fix/803 into main Auto-merged PR #896: feat: MCP server — expose hermes tools to fleet peers (#803)	2026-04-16 05:24:27 +00:00
Alexander Whitestone	e63cdaf16f	feat: self-modifying agent that improves its own prompts (#813 ) Some checks failed Docker Build and Publish / build-and-push (pull_request) Has been cancelled Details Contributor Attribution Check / check-attribution (pull_request) Has been cancelled Details Supply Chain Audit / Scan PR for supply chain risks (pull_request) Has been cancelled Details Tests / test (pull_request) Has been cancelled Details Tests / e2e (pull_request) Has been cancelled Details Resolves #813. Agent analyzes session transcripts for failure patterns and generates prompt patches to prevent future failures. agent/self_modify.py (PromptLearner class): - analyze_session(): detects 5 failure types from transcripts: retry_loop, timeout, hallucination, context_loss, tool_failure - generate_patches(): converts patterns to prompt patches with confidence scoring (frequency-based) - apply_patches(): appends learned rules to system prompt with backup and rollback support - learn_from_session(): full cycle analyze → patch → apply Failures → patterns → patches → improved prompts → fewer failures. Safety: patches only ADD rules (append-only), never remove. Rollback: restores from timestamped backup.	2026-04-16 01:23:48 -04:00
Alexander Whitestone	2b7b12baf9	feat: MCP server — expose hermes tools to fleet peers (#803 ) Some checks failed Contributor Attribution Check / check-attribution (pull_request) Successful in 44s Details Docker Build and Publish / build-and-push (pull_request) Has been skipped Details Tests / test (pull_request) Has been cancelled Details Tests / e2e (pull_request) Has been cancelled Details Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 19m48s Details Resolves #803. Standalone MCP server that exposes safe hermes tools to other fleet agents. scripts/mcp_server.py: - Exposes: terminal, file_read, file_search, web_search, session_search - Blocks: approval, delegate, memory, config, cron, send_message - Terminal uses approval.py dangerous command detection - Auth via Bearer token (MCP_AUTH_KEY) - HTTP endpoints: GET /mcp/tools, POST /mcp/tools/call, GET /health Usage: python scripts/mcp_server.py --port 8081 --auth-key SECRET curl http://localhost:8081/mcp/tools curl -X POST http://localhost:8081/mcp/tools/call -d {"name":"file_read","arguments":{"path":"README.md"}}	2026-04-16 01:10:00 -04:00