feat: self-modifying agent that improves its own prompts (#813 )

Resolves #813. Agent analyzes session transcripts for failure patterns and generates prompt patches to prevent future failures. agent/self_modify.py (PromptLearner class): - analyze_session(): detects 5 failure types from transcripts: retry_loop, timeout, hallucination, context_loss, tool_failure - generate_patches(): converts patterns to prompt patches with confidence scoring (frequency-based) - apply_patches(): appends learned rules to system prompt with backup and rollback support - learn_from_session(): full cycle analyze → patch → apply Failures → patterns → patches → improved prompts → fewer failures. Safety: patches only ADD rules (append-only), never remove. Rollback: restores from timestamped backup.
2026-04-16 01:23:48 -04:00
1 changed files with 302 additions and 0 deletions
--- a/agent/self_modify.py
+++ b/agent/self_modify.py
@@ -0,0 +1,302 @@
 """Self-Modifying Prompt Engine — agent learns from its own failures.
 Analyzes session transcripts, identifies failure patterns, and generates
 prompt patches to prevent future failures.
 The loop: fail → analyze → rewrite → retry → verify improvement.
 Usage:
    from agent.self_modify import PromptLearner
    learner = PromptLearner()
    patches = learner.analyze_session(session_id)
    learner.apply_patches(patches)
 """
 from __future__ import annotations
 import json
 import logging
 import os
 import re
 import time
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 logger = logging.getLogger(__name__)
 HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
 PATCHES_DIR = HERMES_HOME / "prompt_patches"
 ROLLBACK_DIR = HERMES_HOME / "prompt_rollback"
@dataclass
 class FailurePattern:
    """A detected failure pattern in session transcripts."""
    pattern_type: str  # retry_loop, timeout, error_hallucination, context_loss
    description: str
    frequency: int
    example_messages: List[str] = field(default_factory=list)
    suggested_fix: str = ""
@dataclass
 class PromptPatch:
    """A modification to the system prompt based on failure analysis."""
    id: str
    failure_type: str
    original_rule: str
    new_rule: str
    confidence: float
    applied_at: Optional[float] = None
    reverted: bool = False
 # Failure detection patterns
 FAILURE_SIGNALS = {
    "retry_loop": {
        "patterns": [
            r"(?i)retry(?:ing)?\s*(?:attempt|again)",
            r"(?i)failed.*retrying",
            r"(?i)error.*again",
            r"(?i)attempt\s+\d+\s*(?:of|/)\s*\d+",
        ],
        "description": "Agent stuck in retry loop",
    },
    "timeout": {
        "patterns": [
            r"(?i)timed?\s*out",
            r"(?i)deadline\s+exceeded",
            r"(?i)took\s+(?:too\s+)?long",
        ],
        "description": "Operation timed out",
    },
    "hallucination": {
        "patterns": [
            r"(?i)i\s+(?:don't|do\s+not)\s+(?:have|see|find)\s+(?:any|that|this)\s+(?:information|data|file)",
            r"(?i)the\s+file\s+doesn't\s+exist",
            r"(?i)i\s+(?:made|invented|fabricated)\s+(?:that\s+up|this)",
        ],
        "description": "Agent hallucinated or fabricated information",
    },
    "context_loss": {
        "patterns": [
            r"(?i)i\s+(?:don't|do\s+not)\s+(?:remember|recall|know)\s+(?:what|where|when|how)",
            r"(?i)could\s+you\s+remind\s+me",
            r"(?i)what\s+were\s+we\s+(?:doing|working|talking)\s+(?:on|about)",
        ],
        "description": "Agent lost context from earlier in conversation",
    },
    "tool_failure": {
        "patterns": [
            r"(?i)tool\s+(?:call|execution)\s+failed",
            r"(?i)command\s+not\s+found",
            r"(?i)permission\s+denied",
            r"(?i)no\s+such\s+file",
        ],
        "description": "Tool execution failed",
    },
 }
 # Prompt improvement templates
 PROMPT_FIXES = {
    "retry_loop": (
        "If an operation fails more than twice, stop retrying. "
        "Report the failure and ask the user for guidance. "
        "Do not enter retry loops — they waste tokens."
    ),
    "timeout": (
        "For operations that may take long, set a timeout and report "
        "progress. If an operation takes more than 30 seconds, report "
        "what you've done so far and ask if you should continue."
    ),
    "hallucination": (
        "If you cannot find information, say 'I don't know' or "
        "'I couldn't find that.' Never fabricate information. "
        "If a file doesn't exist, say so — don't guess its contents."
    ),
    "context_loss": (
        "When you need context from earlier in the conversation, "
        "use session_search to find it. Don't ask the user to repeat themselves."
    ),
    "tool_failure": (
        "If a tool fails, check the error message and try a different approach. "
        "Don't retry the exact same command — diagnose first."
    ),
 }
 class PromptLearner:
    """Analyze session transcripts and generate prompt improvements."""
    def __init__(self):
        PATCHES_DIR.mkdir(parents=True, exist_ok=True)
        ROLLBACK_DIR.mkdir(parents=True, exist_ok=True)
    def analyze_session(self, session_data: dict) -> List[FailurePattern]:
        """Analyze a session for failure patterns.
        Args:
            session_data: Session dict with 'messages' list.
        Returns:
            List of detected failure patterns.
        """
        messages = session_data.get("messages", [])
        patterns_found: Dict[str, FailurePattern] = {}
        for msg in messages:
            content = str(msg.get("content", ""))
            role = msg.get("role", "")
            # Only analyze assistant messages and tool results
            if role not in ("assistant", "tool"):
                continue
            for failure_type, config in FAILURE_SIGNALS.items():
                for pattern in config["patterns"]:
                    if re.search(pattern, content):
                        if failure_type not in patterns_found:
                            patterns_found[failure_type] = FailurePattern(
                                pattern_type=failure_type,
                                description=config["description"],
                                frequency=0,
                                suggested_fix=PROMPT_FIXES.get(failure_type, ""),
                            )
                        patterns_found[failure_type].frequency += 1
                        if len(patterns_found[failure_type].example_messages) < 3:
                            patterns_found[failure_type].example_messages.append(
                                content[:200]
                            )
                        break  # One match per message per type is enough
        return list(patterns_found.values())
    def generate_patches(self, patterns: List[FailurePattern],
                         min_confidence: float = 0.7) -> List[PromptPatch]:
        """Generate prompt patches from failure patterns.
        Args:
            patterns: Detected failure patterns.
            min_confidence: Minimum confidence to generate a patch.
        Returns:
            List of prompt patches.
        """
        patches = []
        for pattern in patterns:
            # Confidence based on frequency
            if pattern.frequency >= 3:
                confidence = 0.9
            elif pattern.frequency >= 2:
                confidence = 0.75
            else:
                confidence = 0.5
            if confidence < min_confidence:
                continue
            if not pattern.suggested_fix:
                continue
            patch = PromptPatch(
                id=f"{pattern.pattern_type}-{int(time.time())}",
                failure_type=pattern.pattern_type,
                original_rule="(missing — no existing rule for this pattern)",
                new_rule=pattern.suggested_fix,
                confidence=confidence,
            )
            patches.append(patch)
        return patches
    def apply_patches(self, patches: List[PromptPatch],
                      prompt_path: Optional[str] = None) -> int:
        """Apply patches to the system prompt.
        Args:
            patches: Patches to apply.
            prompt_path: Path to prompt file (default: ~/.hermes/system_prompt.md)
        Returns:
            Number of patches applied.
        """
        if prompt_path is None:
            prompt_path = str(HERMES_HOME / "system_prompt.md")
        prompt_file = Path(prompt_path)
        # Backup current prompt
        if prompt_file.exists():
            backup = ROLLBACK_DIR / f"{prompt_file.name}.{int(time.time())}.bak"
            backup.write_text(prompt_file.read_text())
        # Read current prompt
        current = prompt_file.read_text() if prompt_file.exists() else ""
        # Apply patches
        applied = 0
        additions = []
        for patch in patches:
            if patch.new_rule not in current:
                additions.append(f"\n## Auto-learned: {patch.failure_type}\n{patch.new_rule}")
                patch.applied_at = time.time()
                applied += 1
        if additions:
            new_content = current + "\n".join(additions)
            prompt_file.write_text(new_content)
            # Log patches
            patches_file = PATCHES_DIR / f"patches-{int(time.time())}.json"
            with open(patches_file, "w") as f:
                json.dump([p.__dict__ for p in patches], f, indent=2, default=str)
        logger.info("Applied %d prompt patches", applied)
        return applied
    def rollback_last(self, prompt_path: Optional[str] = None) -> bool:
        """Rollback to the most recent backup.
        Args:
            prompt_path: Path to prompt file.
        Returns:
            True if rollback succeeded.
        """
        if prompt_path is None:
            prompt_path = str(HERMES_HOME / "system_prompt.md")
        backups = sorted(ROLLBACK_DIR.glob("*.bak"), reverse=True)
        if not backups:
            logger.warning("No backups to rollback to")
            return False
        latest = backups[0]
        Path(prompt_path).write_text(latest.read_text())
        logger.info("Rolled back to %s", latest.name)
        return True
    def learn_from_session(self, session_data: dict) -> Dict[str, Any]:
        """Full learning cycle: analyze → patch → apply.
        Args:
            session_data: Session dict.
        Returns:
            Summary of what was learned and applied.
        """
        patterns = self.analyze_session(session_data)
        patches = self.generate_patches(patterns)
        applied = self.apply_patches(patches)
        return {
            "patterns_detected": len(patterns),
            "patches_generated": len(patches),
            "patches_applied": applied,
            "patterns": [
                {"type": p.pattern_type, "frequency": p.frequency, "description": p.description}
                for p in patterns
            ],
        }