Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
e63cdaf16f feat: self-modifying agent that improves its own prompts (#813)
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been cancelled
Contributor Attribution Check / check-attribution (pull_request) Has been cancelled
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Has been cancelled
Tests / test (pull_request) Has been cancelled
Tests / e2e (pull_request) Has been cancelled
Resolves #813. Agent analyzes session transcripts for failure
patterns and generates prompt patches to prevent future failures.

agent/self_modify.py (PromptLearner class):
- analyze_session(): detects 5 failure types from transcripts:
  retry_loop, timeout, hallucination, context_loss, tool_failure
- generate_patches(): converts patterns to prompt patches with
  confidence scoring (frequency-based)
- apply_patches(): appends learned rules to system prompt with
  backup and rollback support
- learn_from_session(): full cycle analyze → patch → apply

Failures → patterns → patches → improved prompts → fewer failures.

Safety: patches only ADD rules (append-only), never remove.
Rollback:  restores from timestamped backup.
2026-04-16 01:23:48 -04:00
3 changed files with 302 additions and 147 deletions

View File

@@ -1,135 +0,0 @@
"""
Agent Card — A2A-compliant agent discovery.
Part of #843: fix: implement A2A agent card for fleet discovery (#819)
Provides metadata about the agent's identity, capabilities, and installed skills
for discovery by other agents in the fleet.
"""
import json
import logging
import os
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
from hermes_cli import __version__
from hermes_cli.config import load_config, get_hermes_home
from agent.skill_utils import (
iter_skill_index_files,
parse_frontmatter,
get_all_skills_dirs,
get_disabled_skill_names,
skill_matches_platform
)
logger = logging.getLogger(__name__)
@dataclass
class AgentSkill:
id: str
name: str
description: str = ""
version: str = "1.0.0"
@dataclass
class AgentCapabilities:
streaming: bool = True
tools: bool = True
vision: bool = False
reasoning: bool = False
@dataclass
class AgentCard:
name: str
description: str
url: str
version: str = __version__
capabilities: AgentCapabilities = field(default_factory=AgentCapabilities)
skills: List[AgentSkill] = field(default_factory=list)
defaultInputModes: List[str] = field(default_factory=lambda: ["text/plain"])
defaultOutputModes: List[str] = field(default_factory=lambda: ["text/plain"])
def _load_skills() -> List[AgentSkill]:
"""Scan all enabled skills and return metadata."""
skills = []
disabled = get_disabled_skill_names()
for skills_dir in get_all_skills_dirs():
if not skills_dir.is_dir():
continue
for skill_file in iter_skill_index_files(skills_dir, "SKILL.md"):
try:
raw = skill_file.read_text(encoding="utf-8")
frontmatter, _ = parse_frontmatter(raw)
except Exception:
continue
skill_name = frontmatter.get("name") or skill_file.parent.name
if str(skill_name) in disabled:
continue
if not skill_matches_platform(frontmatter):
continue
skills.append(AgentSkill(
id=str(skill_name),
name=str(frontmatter.get("name", skill_name)),
description=str(frontmatter.get("description", "")),
version=str(frontmatter.get("version", "1.0.0"))
))
return skills
def build_agent_card() -> AgentCard:
"""Build the agent card from current configuration and environment."""
config = load_config()
# Identity
name = os.environ.get("HERMES_AGENT_NAME") or config.get("agent", {}).get("name") or "hermes"
description = os.environ.get("HERMES_AGENT_DESCRIPTION") or config.get("agent", {}).get("description") or "Sovereign AI agent"
# URL - try to determine from environment or config
port = os.environ.get("HERMES_WEB_PORT") or "9119"
host = os.environ.get("HERMES_WEB_HOST") or "localhost"
url = f"http://{host}:{port}"
# Capabilities
# In a real scenario, we'd check model metadata for vision/reasoning
capabilities = AgentCapabilities(
streaming=True,
tools=True,
vision=False, # Default to false unless we can confirm
reasoning=False
)
# Skills
skills = _load_skills()
return AgentCard(
name=name,
description=description,
url=url,
version=__version__,
capabilities=capabilities,
skills=skills
)
def get_agent_card_json() -> str:
"""Return the agent card as a JSON string."""
try:
card = build_agent_card()
return json.dumps(asdict(card), indent=2)
except Exception as e:
logger.error(f"Failed to build agent card: {e}")
# Minimal fallback card
fallback = {
"name": "hermes",
"description": "Sovereign AI agent (fallback)",
"version": __version__,
"error": str(e)
}
return json.dumps(fallback, indent=2)
def validate_agent_card(card_data: Dict[str, Any]) -> bool:
"""Check if the card data complies with the A2A schema."""
required = ["name", "description", "url", "version"]
return all(k in card_data for k in required)

302
agent/self_modify.py Normal file
View File

@@ -0,0 +1,302 @@
"""Self-Modifying Prompt Engine — agent learns from its own failures.
Analyzes session transcripts, identifies failure patterns, and generates
prompt patches to prevent future failures.
The loop: fail → analyze → rewrite → retry → verify improvement.
Usage:
from agent.self_modify import PromptLearner
learner = PromptLearner()
patches = learner.analyze_session(session_id)
learner.apply_patches(patches)
"""
from __future__ import annotations
import json
import logging
import os
import re
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
PATCHES_DIR = HERMES_HOME / "prompt_patches"
ROLLBACK_DIR = HERMES_HOME / "prompt_rollback"
@dataclass
class FailurePattern:
"""A detected failure pattern in session transcripts."""
pattern_type: str # retry_loop, timeout, error_hallucination, context_loss
description: str
frequency: int
example_messages: List[str] = field(default_factory=list)
suggested_fix: str = ""
@dataclass
class PromptPatch:
"""A modification to the system prompt based on failure analysis."""
id: str
failure_type: str
original_rule: str
new_rule: str
confidence: float
applied_at: Optional[float] = None
reverted: bool = False
# Failure detection patterns
FAILURE_SIGNALS = {
"retry_loop": {
"patterns": [
r"(?i)retry(?:ing)?\s*(?:attempt|again)",
r"(?i)failed.*retrying",
r"(?i)error.*again",
r"(?i)attempt\s+\d+\s*(?:of|/)\s*\d+",
],
"description": "Agent stuck in retry loop",
},
"timeout": {
"patterns": [
r"(?i)timed?\s*out",
r"(?i)deadline\s+exceeded",
r"(?i)took\s+(?:too\s+)?long",
],
"description": "Operation timed out",
},
"hallucination": {
"patterns": [
r"(?i)i\s+(?:don't|do\s+not)\s+(?:have|see|find)\s+(?:any|that|this)\s+(?:information|data|file)",
r"(?i)the\s+file\s+doesn't\s+exist",
r"(?i)i\s+(?:made|invented|fabricated)\s+(?:that\s+up|this)",
],
"description": "Agent hallucinated or fabricated information",
},
"context_loss": {
"patterns": [
r"(?i)i\s+(?:don't|do\s+not)\s+(?:remember|recall|know)\s+(?:what|where|when|how)",
r"(?i)could\s+you\s+remind\s+me",
r"(?i)what\s+were\s+we\s+(?:doing|working|talking)\s+(?:on|about)",
],
"description": "Agent lost context from earlier in conversation",
},
"tool_failure": {
"patterns": [
r"(?i)tool\s+(?:call|execution)\s+failed",
r"(?i)command\s+not\s+found",
r"(?i)permission\s+denied",
r"(?i)no\s+such\s+file",
],
"description": "Tool execution failed",
},
}
# Prompt improvement templates
PROMPT_FIXES = {
"retry_loop": (
"If an operation fails more than twice, stop retrying. "
"Report the failure and ask the user for guidance. "
"Do not enter retry loops — they waste tokens."
),
"timeout": (
"For operations that may take long, set a timeout and report "
"progress. If an operation takes more than 30 seconds, report "
"what you've done so far and ask if you should continue."
),
"hallucination": (
"If you cannot find information, say 'I don't know' or "
"'I couldn't find that.' Never fabricate information. "
"If a file doesn't exist, say so — don't guess its contents."
),
"context_loss": (
"When you need context from earlier in the conversation, "
"use session_search to find it. Don't ask the user to repeat themselves."
),
"tool_failure": (
"If a tool fails, check the error message and try a different approach. "
"Don't retry the exact same command — diagnose first."
),
}
class PromptLearner:
"""Analyze session transcripts and generate prompt improvements."""
def __init__(self):
PATCHES_DIR.mkdir(parents=True, exist_ok=True)
ROLLBACK_DIR.mkdir(parents=True, exist_ok=True)
def analyze_session(self, session_data: dict) -> List[FailurePattern]:
"""Analyze a session for failure patterns.
Args:
session_data: Session dict with 'messages' list.
Returns:
List of detected failure patterns.
"""
messages = session_data.get("messages", [])
patterns_found: Dict[str, FailurePattern] = {}
for msg in messages:
content = str(msg.get("content", ""))
role = msg.get("role", "")
# Only analyze assistant messages and tool results
if role not in ("assistant", "tool"):
continue
for failure_type, config in FAILURE_SIGNALS.items():
for pattern in config["patterns"]:
if re.search(pattern, content):
if failure_type not in patterns_found:
patterns_found[failure_type] = FailurePattern(
pattern_type=failure_type,
description=config["description"],
frequency=0,
suggested_fix=PROMPT_FIXES.get(failure_type, ""),
)
patterns_found[failure_type].frequency += 1
if len(patterns_found[failure_type].example_messages) < 3:
patterns_found[failure_type].example_messages.append(
content[:200]
)
break # One match per message per type is enough
return list(patterns_found.values())
def generate_patches(self, patterns: List[FailurePattern],
min_confidence: float = 0.7) -> List[PromptPatch]:
"""Generate prompt patches from failure patterns.
Args:
patterns: Detected failure patterns.
min_confidence: Minimum confidence to generate a patch.
Returns:
List of prompt patches.
"""
patches = []
for pattern in patterns:
# Confidence based on frequency
if pattern.frequency >= 3:
confidence = 0.9
elif pattern.frequency >= 2:
confidence = 0.75
else:
confidence = 0.5
if confidence < min_confidence:
continue
if not pattern.suggested_fix:
continue
patch = PromptPatch(
id=f"{pattern.pattern_type}-{int(time.time())}",
failure_type=pattern.pattern_type,
original_rule="(missing — no existing rule for this pattern)",
new_rule=pattern.suggested_fix,
confidence=confidence,
)
patches.append(patch)
return patches
def apply_patches(self, patches: List[PromptPatch],
prompt_path: Optional[str] = None) -> int:
"""Apply patches to the system prompt.
Args:
patches: Patches to apply.
prompt_path: Path to prompt file (default: ~/.hermes/system_prompt.md)
Returns:
Number of patches applied.
"""
if prompt_path is None:
prompt_path = str(HERMES_HOME / "system_prompt.md")
prompt_file = Path(prompt_path)
# Backup current prompt
if prompt_file.exists():
backup = ROLLBACK_DIR / f"{prompt_file.name}.{int(time.time())}.bak"
backup.write_text(prompt_file.read_text())
# Read current prompt
current = prompt_file.read_text() if prompt_file.exists() else ""
# Apply patches
applied = 0
additions = []
for patch in patches:
if patch.new_rule not in current:
additions.append(f"\n## Auto-learned: {patch.failure_type}\n{patch.new_rule}")
patch.applied_at = time.time()
applied += 1
if additions:
new_content = current + "\n".join(additions)
prompt_file.write_text(new_content)
# Log patches
patches_file = PATCHES_DIR / f"patches-{int(time.time())}.json"
with open(patches_file, "w") as f:
json.dump([p.__dict__ for p in patches], f, indent=2, default=str)
logger.info("Applied %d prompt patches", applied)
return applied
def rollback_last(self, prompt_path: Optional[str] = None) -> bool:
"""Rollback to the most recent backup.
Args:
prompt_path: Path to prompt file.
Returns:
True if rollback succeeded.
"""
if prompt_path is None:
prompt_path = str(HERMES_HOME / "system_prompt.md")
backups = sorted(ROLLBACK_DIR.glob("*.bak"), reverse=True)
if not backups:
logger.warning("No backups to rollback to")
return False
latest = backups[0]
Path(prompt_path).write_text(latest.read_text())
logger.info("Rolled back to %s", latest.name)
return True
def learn_from_session(self, session_data: dict) -> Dict[str, Any]:
"""Full learning cycle: analyze → patch → apply.
Args:
session_data: Session dict.
Returns:
Summary of what was learned and applied.
"""
patterns = self.analyze_session(session_data)
patches = self.generate_patches(patterns)
applied = self.apply_patches(patches)
return {
"patterns_detected": len(patterns),
"patches_generated": len(patches),
"patches_applied": applied,
"patterns": [
{"type": p.pattern_type, "frequency": p.frequency, "description": p.description}
for p in patterns
],
}

View File

@@ -45,7 +45,6 @@ from hermes_cli.config import (
redact_key,
)
from gateway.status import get_running_pid, read_runtime_status
from agent.agent_card import get_agent_card_json
try:
from fastapi import FastAPI, HTTPException, Request
@@ -97,9 +96,6 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
"/api/config/defaults",
"/api/config/schema",
"/api/model/info",
"/api/agent-card",
"/agent-card.json",
"/.well-known/agent-card.json",
})
@@ -364,14 +360,6 @@ def _probe_gateway_health() -> tuple[bool, dict | None]:
return False, None
@app.get("/api/agent-card")
@app.get("/agent-card.json")
@app.get("/.well-known/agent-card.json")
async def get_agent_card():
"""Return the A2A agent card for fleet discovery."""
return JSONResponse(content=json.loads(get_agent_card_json()))
@app.get("/api/status")
async def get_status():
current_ver, latest_ver = check_config_version()