Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
4d8e004b5f fix: extend JSON repair to remaining json.loads sites in run_agent.py
Some checks failed
Contributor Attribution Check / check-attribution (pull_request) Successful in 42s
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Nix / nix (ubuntu-latest) (pull_request) Failing after 4s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 36s
Tests / test (pull_request) Failing after 1h13m6s
Tests / e2e (pull_request) Successful in 1m32s
Nix / nix (macos-latest) (pull_request) Has been cancelled
Adds `repair_and_load_json()` to utils.py using the `json_repair` library
as a fallback when `json.loads()` fails. Replaces 8 non-hot-path json.loads
sites identified in issue #809:

- L2250: trajectory/sanitization message content parsing
- L2500: tool_call dict reconstruction in trajectory conversion
- L2535: tool_content parsing (JSON-like strings in tool responses)
- L2888: session log file loading (with warning on unrecoverable parse)
- L3119: todo content parsing in message processing
- L5963: vision result_json parsing
- L6761: memory flush tool call argument parsing
- L8300: cache serialization tool call args normalization

Each site uses an appropriate default ({} for tool args, None/continue for
content parsing) and a context label for debug tracing.

Fixes #809
2026-04-15 22:56:39 -04:00
3 changed files with 89 additions and 342 deletions

View File

@@ -1,302 +0,0 @@
"""Self-Modifying Prompt Engine — agent learns from its own failures.
Analyzes session transcripts, identifies failure patterns, and generates
prompt patches to prevent future failures.
The loop: fail → analyze → rewrite → retry → verify improvement.
Usage:
from agent.self_modify import PromptLearner
learner = PromptLearner()
patches = learner.analyze_session(session_id)
learner.apply_patches(patches)
"""
from __future__ import annotations
import json
import logging
import os
import re
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
PATCHES_DIR = HERMES_HOME / "prompt_patches"
ROLLBACK_DIR = HERMES_HOME / "prompt_rollback"
@dataclass
class FailurePattern:
"""A detected failure pattern in session transcripts."""
pattern_type: str # retry_loop, timeout, error_hallucination, context_loss
description: str
frequency: int
example_messages: List[str] = field(default_factory=list)
suggested_fix: str = ""
@dataclass
class PromptPatch:
"""A modification to the system prompt based on failure analysis."""
id: str
failure_type: str
original_rule: str
new_rule: str
confidence: float
applied_at: Optional[float] = None
reverted: bool = False
# Failure detection patterns
FAILURE_SIGNALS = {
"retry_loop": {
"patterns": [
r"(?i)retry(?:ing)?\s*(?:attempt|again)",
r"(?i)failed.*retrying",
r"(?i)error.*again",
r"(?i)attempt\s+\d+\s*(?:of|/)\s*\d+",
],
"description": "Agent stuck in retry loop",
},
"timeout": {
"patterns": [
r"(?i)timed?\s*out",
r"(?i)deadline\s+exceeded",
r"(?i)took\s+(?:too\s+)?long",
],
"description": "Operation timed out",
},
"hallucination": {
"patterns": [
r"(?i)i\s+(?:don't|do\s+not)\s+(?:have|see|find)\s+(?:any|that|this)\s+(?:information|data|file)",
r"(?i)the\s+file\s+doesn't\s+exist",
r"(?i)i\s+(?:made|invented|fabricated)\s+(?:that\s+up|this)",
],
"description": "Agent hallucinated or fabricated information",
},
"context_loss": {
"patterns": [
r"(?i)i\s+(?:don't|do\s+not)\s+(?:remember|recall|know)\s+(?:what|where|when|how)",
r"(?i)could\s+you\s+remind\s+me",
r"(?i)what\s+were\s+we\s+(?:doing|working|talking)\s+(?:on|about)",
],
"description": "Agent lost context from earlier in conversation",
},
"tool_failure": {
"patterns": [
r"(?i)tool\s+(?:call|execution)\s+failed",
r"(?i)command\s+not\s+found",
r"(?i)permission\s+denied",
r"(?i)no\s+such\s+file",
],
"description": "Tool execution failed",
},
}
# Prompt improvement templates
PROMPT_FIXES = {
"retry_loop": (
"If an operation fails more than twice, stop retrying. "
"Report the failure and ask the user for guidance. "
"Do not enter retry loops — they waste tokens."
),
"timeout": (
"For operations that may take long, set a timeout and report "
"progress. If an operation takes more than 30 seconds, report "
"what you've done so far and ask if you should continue."
),
"hallucination": (
"If you cannot find information, say 'I don't know' or "
"'I couldn't find that.' Never fabricate information. "
"If a file doesn't exist, say so — don't guess its contents."
),
"context_loss": (
"When you need context from earlier in the conversation, "
"use session_search to find it. Don't ask the user to repeat themselves."
),
"tool_failure": (
"If a tool fails, check the error message and try a different approach. "
"Don't retry the exact same command — diagnose first."
),
}
class PromptLearner:
"""Analyze session transcripts and generate prompt improvements."""
def __init__(self):
PATCHES_DIR.mkdir(parents=True, exist_ok=True)
ROLLBACK_DIR.mkdir(parents=True, exist_ok=True)
def analyze_session(self, session_data: dict) -> List[FailurePattern]:
"""Analyze a session for failure patterns.
Args:
session_data: Session dict with 'messages' list.
Returns:
List of detected failure patterns.
"""
messages = session_data.get("messages", [])
patterns_found: Dict[str, FailurePattern] = {}
for msg in messages:
content = str(msg.get("content", ""))
role = msg.get("role", "")
# Only analyze assistant messages and tool results
if role not in ("assistant", "tool"):
continue
for failure_type, config in FAILURE_SIGNALS.items():
for pattern in config["patterns"]:
if re.search(pattern, content):
if failure_type not in patterns_found:
patterns_found[failure_type] = FailurePattern(
pattern_type=failure_type,
description=config["description"],
frequency=0,
suggested_fix=PROMPT_FIXES.get(failure_type, ""),
)
patterns_found[failure_type].frequency += 1
if len(patterns_found[failure_type].example_messages) < 3:
patterns_found[failure_type].example_messages.append(
content[:200]
)
break # One match per message per type is enough
return list(patterns_found.values())
def generate_patches(self, patterns: List[FailurePattern],
min_confidence: float = 0.7) -> List[PromptPatch]:
"""Generate prompt patches from failure patterns.
Args:
patterns: Detected failure patterns.
min_confidence: Minimum confidence to generate a patch.
Returns:
List of prompt patches.
"""
patches = []
for pattern in patterns:
# Confidence based on frequency
if pattern.frequency >= 3:
confidence = 0.9
elif pattern.frequency >= 2:
confidence = 0.75
else:
confidence = 0.5
if confidence < min_confidence:
continue
if not pattern.suggested_fix:
continue
patch = PromptPatch(
id=f"{pattern.pattern_type}-{int(time.time())}",
failure_type=pattern.pattern_type,
original_rule="(missing — no existing rule for this pattern)",
new_rule=pattern.suggested_fix,
confidence=confidence,
)
patches.append(patch)
return patches
def apply_patches(self, patches: List[PromptPatch],
prompt_path: Optional[str] = None) -> int:
"""Apply patches to the system prompt.
Args:
patches: Patches to apply.
prompt_path: Path to prompt file (default: ~/.hermes/system_prompt.md)
Returns:
Number of patches applied.
"""
if prompt_path is None:
prompt_path = str(HERMES_HOME / "system_prompt.md")
prompt_file = Path(prompt_path)
# Backup current prompt
if prompt_file.exists():
backup = ROLLBACK_DIR / f"{prompt_file.name}.{int(time.time())}.bak"
backup.write_text(prompt_file.read_text())
# Read current prompt
current = prompt_file.read_text() if prompt_file.exists() else ""
# Apply patches
applied = 0
additions = []
for patch in patches:
if patch.new_rule not in current:
additions.append(f"\n## Auto-learned: {patch.failure_type}\n{patch.new_rule}")
patch.applied_at = time.time()
applied += 1
if additions:
new_content = current + "\n".join(additions)
prompt_file.write_text(new_content)
# Log patches
patches_file = PATCHES_DIR / f"patches-{int(time.time())}.json"
with open(patches_file, "w") as f:
json.dump([p.__dict__ for p in patches], f, indent=2, default=str)
logger.info("Applied %d prompt patches", applied)
return applied
def rollback_last(self, prompt_path: Optional[str] = None) -> bool:
"""Rollback to the most recent backup.
Args:
prompt_path: Path to prompt file.
Returns:
True if rollback succeeded.
"""
if prompt_path is None:
prompt_path = str(HERMES_HOME / "system_prompt.md")
backups = sorted(ROLLBACK_DIR.glob("*.bak"), reverse=True)
if not backups:
logger.warning("No backups to rollback to")
return False
latest = backups[0]
Path(prompt_path).write_text(latest.read_text())
logger.info("Rolled back to %s", latest.name)
return True
def learn_from_session(self, session_data: dict) -> Dict[str, Any]:
"""Full learning cycle: analyze → patch → apply.
Args:
session_data: Session dict.
Returns:
Summary of what was learned and applied.
"""
patterns = self.analyze_session(session_data)
patches = self.generate_patches(patterns)
applied = self.apply_patches(patches)
return {
"patterns_detected": len(patterns),
"patches_generated": len(patches),
"patches_applied": applied,
"patterns": [
{"type": p.pattern_type, "frequency": p.frequency, "description": p.description}
for p in patterns
],
}

View File

@@ -106,7 +106,7 @@ from agent.trajectory import (
convert_scratchpad_to_think, has_incomplete_scratchpad,
save_trajectory as _save_trajectory_to_file,
)
from utils import atomic_json_write, env_var_enabled
from utils import atomic_json_write, env_var_enabled, repair_and_load_json
@@ -2246,9 +2246,8 @@ class AIAgent:
for msg in getattr(review_agent, "_session_messages", []):
if not isinstance(msg, dict) or msg.get("role") != "tool":
continue
try:
data = json.loads(msg.get("content", "{}"))
except (json.JSONDecodeError, TypeError):
data = repair_and_load_json(msg.get("content", "{}"), default=None, context="trajectory_content")
if data is None:
continue
if not data.get("success"):
continue
@@ -2496,13 +2495,13 @@ class AIAgent:
if not tool_call or not isinstance(tool_call, dict): continue
# Parse arguments - should always succeed since we validate during conversation
# but keep try-except as safety net
try:
arguments = json.loads(tool_call["function"]["arguments"]) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"]
except json.JSONDecodeError:
# This shouldn't happen since we validate and retry during conversation,
# but if it does, log warning and use empty dict
logging.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}")
arguments = {}
raw_args = tool_call["function"]["arguments"]
if isinstance(raw_args, str):
arguments = repair_and_load_json(raw_args, default={}, context="trajectory_tool_call")
if arguments == {} and raw_args.strip() not in ("{}", ""):
logging.warning("Unexpected invalid JSON in trajectory conversion: %.100s", raw_args)
else:
arguments = raw_args
tool_call_json = {
"name": tool_call["function"]["name"],
@@ -2530,11 +2529,10 @@ class AIAgent:
# Try to parse tool content as JSON if it looks like JSON
tool_content = tool_msg["content"]
try:
if tool_content.strip().startswith(("{", "[")):
tool_content = json.loads(tool_content)
except (json.JSONDecodeError, AttributeError):
pass # Keep as string if not valid JSON
if isinstance(tool_content, str) and tool_content.strip().startswith(("{", "[")):
parsed = repair_and_load_json(tool_content, default=None, context="trajectory_tool_content")
if parsed is not None:
tool_content = parsed
tool_index = len(tool_responses)
tool_name = (
@@ -2885,14 +2883,21 @@ class AIAgent:
# with partial history and would otherwise clobber the full JSON log.
if self.session_log_file.exists():
try:
existing = json.loads(self.session_log_file.read_text(encoding="utf-8"))
existing_count = existing.get("message_count", len(existing.get("messages", [])))
if existing_count > len(cleaned):
logging.debug(
"Skipping session log overwrite: existing has %d messages, current has %d",
existing_count, len(cleaned),
)
return
existing = repair_and_load_json(
self.session_log_file.read_text(encoding="utf-8"),
default=None,
context="session_log_load",
)
if existing is None:
logging.warning("Session log at %s could not be parsed; allowing overwrite", self.session_log_file)
else:
existing_count = existing.get("message_count", len(existing.get("messages", [])))
if existing_count > len(cleaned):
logging.debug(
"Skipping session log overwrite: existing has %d messages, current has %d",
existing_count, len(cleaned),
)
return
except Exception:
pass # corrupted existing file — allow the overwrite
@@ -3115,13 +3120,12 @@ class AIAgent:
# Quick check: todo responses contain "todos" key
if '"todos"' not in content:
continue
try:
data = json.loads(content)
if "todos" in data and isinstance(data["todos"], list):
last_todo_response = data["todos"]
break
except (json.JSONDecodeError, TypeError):
data = repair_and_load_json(content, default=None, context="todo_content")
if data is None:
continue
if "todos" in data and isinstance(data["todos"], list):
last_todo_response = data["todos"]
break
if last_todo_response:
# Replay the items into the store (replace mode)
@@ -5960,7 +5964,7 @@ class AIAgent:
result_json = asyncio.run(
vision_analyze_tool(image_url=vision_source, user_prompt=analysis_prompt)
)
result = json.loads(result_json) if isinstance(result_json, str) else {}
result = repair_and_load_json(result_json, default={}, context="vision_result") if isinstance(result_json, str) else {}
description = (result.get("analysis") or "").strip()
except Exception as e:
description = f"Image analysis failed: {e}"
@@ -6758,7 +6762,7 @@ class AIAgent:
for tc in tool_calls:
if tc.function.name == "memory":
try:
args = json.loads(tc.function.arguments)
args = repair_and_load_json(tc.function.arguments, default={}, context="memory_flush")
flush_target = args.get("target", "memory")
from tools.memory_tool import memory_tool as _memory_tool
_memory_tool(
@@ -8297,14 +8301,15 @@ class AIAgent:
for tc in tcs:
if isinstance(tc, dict) and "function" in tc:
try:
args_obj = json.loads(tc["function"]["arguments"])
tc = {**tc, "function": {
**tc["function"],
"arguments": json.dumps(
args_obj, separators=(",", ":"),
sort_keys=True,
),
}}
args_obj = repair_and_load_json(tc["function"]["arguments"], default=None, context="cache_serialization")
if args_obj is not None:
tc = {**tc, "function": {
**tc["function"],
"arguments": json.dumps(
args_obj, separators=(",", ":"),
sort_keys=True,
),
}}
except Exception:
pass
new_tcs.append(tc)

View File

@@ -145,6 +145,50 @@ def safe_json_loads(text: str, default: Any = None) -> Any:
return default
def repair_and_load_json(text: str, default: Any = None, *, context: str = "") -> Any:
"""Parse JSON with automatic repair fallback.
Tries ``json.loads`` first. On failure, attempts to repair the string
using the ``json_repair`` library before falling back to *default*.
Logs a debug-level warning when repair is triggered so that callers can
observe silent-failure patterns without raising exceptions.
Args:
text: The JSON string to parse.
default: Value returned when both parse and repair fail.
context: Optional label included in the debug log (e.g. the call-site
name) to aid tracing.
Returns:
Parsed Python object, or *default* on unrecoverable failure.
"""
if not isinstance(text, str):
return default
try:
return json.loads(text)
except (json.JSONDecodeError, ValueError):
pass
try:
import json_repair # optional dependency
repaired = json_repair.repair_json(text, return_objects=True)
# json_repair returns "" when it cannot produce a valid structure.
# Guard against returning that sentinel as if it were a successful parse.
# Exception: if the original text was a JSON empty-string literal like `""`
# then "" is the correct parse result.
if repaired == "" and text.strip() not in ('""', "''"):
tag = f" [{context}]" if context else ""
logger.debug("repair_and_load_json%s: repair yielded empty string; returning default", tag)
return default
tag = f" [{context}]" if context else ""
logger.debug("repair_and_load_json%s: repaired malformed JSON (first 120 chars): %.120s", tag, text)
return repaired
except Exception as exc:
tag = f" [{context}]" if context else ""
logger.debug("repair_and_load_json%s: repair failed (%s); returning default", tag, exc)
return default
# ─── Environment Variable Helpers ─────────────────────────────────────────────