Compare commits

..

2 Commits

Author SHA1 Message Date
7ac8d0268f test(cron): add tests for cloud-context warning injection
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 1m13s
2026-04-14 01:18:16 +00:00
2e59f8540d fix(cron): inject cloud-context warning when prompt refs localhost
Fixes #378, Closes #456

When a cron job runs on a cloud endpoint but its prompt references
local services (Ollama, localhost ports, etc.), inject a SYSTEM NOTE
telling the agent it cannot reach localhost so it reports the
limitation instead of wasting iterations on doomed connections.
2026-04-14 01:17:25 +00:00
6 changed files with 153 additions and 1181 deletions

View File

@@ -12,6 +12,7 @@ import asyncio
import concurrent.futures
import json
import logging
import re
import os
import subprocess
import sys
@@ -544,6 +545,55 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
except Exception as exc:
return False, f"Script execution failed: {exc}"
# ---------------------------------------------------------------------------
# Cloud-context warning for local-service references (#378, #456)
# ---------------------------------------------------------------------------
_LOCAL_SERVICE_PATTERNS = [
re.compile(r'localhost:\d+', re.IGNORECASE),
re.compile(r'127\.0\.0\.1:\d+'),
re.compile(r'check\s+ollama', re.IGNORECASE),
re.compile(r'ollama\s+(is\s+)?respond', re.IGNORECASE),
re.compile(r'curl\s+localhost', re.IGNORECASE),
re.compile(r'curl\s+127\.', re.IGNORECASE),
re.compile(r'curl\s+local', re.IGNORECASE),
re.compile(r'ping\s+localhost', re.IGNORECASE),
re.compile(r'poll(ing)?\s+local', re.IGNORECASE),
re.compile(r'check\s+service\s+respond', re.IGNORECASE),
re.compile(r'11434'), # Ollama default port
re.compile(r'11435'), # common alt Ollama port
]
def _detect_local_service_refs(prompt: str) -> list[str]:
"""Return list of local-service reference descriptions found in prompt."""
refs = []
for pat in _LOCAL_SERVICE_PATTERNS:
m = pat.search(prompt)
if m:
refs.append(m.group(0))
return refs
def _inject_cloud_context(prompt: str, refs: list[str], provider: str) -> str:
"""Prepend a SYSTEM NOTE so the agent knows it cannot reach localhost."""
refs_str = ", ".join(f'"{r}"' for r in refs)
warning = (
"[SYSTEM NOTE — cloud endpoint]
"
f"You are running on a cloud inference endpoint ({provider}). "
f"Your prompt references local services: {refs_str}. "
"You CANNOT reach localhost or any local network address from this endpoint. "
"Do NOT attempt curl, ping, SSH, or any network calls to localhost. "
"Instead, report to the user that this job requires a local model endpoint "
"to check local services, and suggest they re-run with a local provider.
"
)
return warning + prompt
def _build_job_prompt(job: dict) -> str:
"""Build the effective prompt for a cron job, optionally loading one or more skills first."""
@@ -817,6 +867,18 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
job_name,
)
# Inject cloud-context warning when prompt references local services (#378)
if _is_cloud:
_local_refs = _detect_local_service_refs(prompt)
if _local_refs:
_provider_name = turn_route["runtime"].get("provider", "cloud")
prompt = _inject_cloud_context(prompt, _local_refs, _provider_name)
logger.info(
"Job '%s': injected cloud-context warning for local refs: %s",
job_name,
_local_refs,
)
_agent_kwargs = _safe_agent_kwargs({
"model": turn_route["model"],
"api_key": turn_route["runtime"].get("api_key"),

View File

@@ -1,113 +0,0 @@
# Warm Session Provisioning: Revised Hypothesis
**Research Document v2.0**
**Issue:** #327
**Date:** April 2026
**Status:** Revised Based on Empirical Data
## Executive Summary
Initial hypothesis: Marathon sessions (100+ messages) have lower error rates, suggesting agents improve with experience. This was **partially incorrect**.
**Actual finding:** Error rates INCREASE within marathon sessions (avg first-half: 26.8%, second-half: 32.7%). Sessions don't improve - they degrade.
## Corrected Understanding
### What the Data Actually Shows
1. **Error rates increase over time** within sessions
2. **Marathon sessions appear more reliable** in aggregate because:
- Only well-guided sessions survive to 100+ messages
- Users who correct errors keep sessions alive
- Selection bias: failed sessions end early
3. **User guidance drives success**, not agent adaptation
### Revised Hypothesis
The "proficiency" observed in marathon sessions comes from:
- **User expertise**: Users who know how to guide the agent
- **Established context**: Shared reference points reduce ambiguity
- **Error correction patterns**: Users develop strategies to fix agent mistakes
- **Session survivorship**: Only well-managed sessions reach marathon length
## New Research Direction
### 1. User Guidance Patterns
Instead of agent proficiency, study user strategies:
- How do expert users phrase requests?
- What correction patterns work best?
- How do users establish context?
### 2. Context Window Management
Long sessions may suffer from context degradation:
- Attention dilution over many messages
- Lost context from early messages
- Compression artifacts
### 3. Warm Session v2: User-Guided Templates
Instead of pre-seeding agent patterns, pre-seed user guidance:
- Effective prompt templates
- Error correction strategies
- Context establishment patterns
## Implementation Plan
### Phase 1: User Pattern Analysis
- Analyze successful user strategies
- Extract effective prompt patterns
- Identify error correction techniques
### Phase 2: Guidance Templates
- Create user-facing templates
- Document effective patterns
- Provide prompt engineering guidance
### Phase 3: Context Management
- Optimize context window usage
- Implement smart context refresh
- Prevent attention degradation
### Phase 4: A/B Testing
- Test guided vs unguided sessions
- Measure error reduction from user guidance
- Statistical validation
## Key Metrics
1. **Error Rate by Position**
- First 10 messages: baseline
- Messages 10-50: degradation rate
- Messages 50+: long-session behavior
2. **User Intervention Rate**
- How often users correct errors
- Success rate of corrections
- Patterns in effective corrections
3. **Context Window Utilization**
- Token usage over time
- Information retention rate
- Compression effectiveness
## Paper Contributions (Revised)
1. **Counterintuitive finding**: Longer sessions have HIGHER error rates
2. **Selection bias**: Marathon sessions represent survivorship bias
3. **User expertise matters more than agent adaptation**
4. **Context degradation over long sessions**
## Next Steps
1. ✅ Correct initial hypothesis
2. ⏳ Analyze user guidance patterns
3. ⏳ Extract effective prompt strategies
4. ⏳ Create user-facing guidance templates
5. ⏳ Optimize context window management
6. ⏳ Run A/B tests on guided sessions
7. ⏳ Write paper with corrected findings
## References
- Empirical Audit 2026-04-12, Finding 4
- Follow-up Analysis: Comment on #327 (2026-04-13)
- Issue #327 (original hypothesis)

View File

@@ -5258,34 +5258,6 @@ For more help on a command:
sessions_parser.set_defaults(func=cmd_sessions)
# User guidance command (research #327 revised)
guidance_parser = subparsers.add_parser(
"guidance",
help="User guidance pattern analysis (research)",
description="Analyze effective user strategies for agent sessions"
)
guidance_subparsers = guidance_parser.add_subparsers(dest="guidance_command")
# Guidance analyze command
guidance_analyze = guidance_subparsers.add_parser("analyze", help="Analyze user guidance in a session")
guidance_analyze.add_argument("session_id", help="Session ID to analyze")
# Guidance create-template command
guidance_create = guidance_subparsers.add_parser("create-template", help="Create guidance template from sessions")
guidance_create.add_argument("session_ids", nargs="+", help="Session IDs to analyze")
guidance_create.add_argument("--name", "-n", help="Template name")
# Guidance list-templates command
guidance_subparsers.add_parser("list-templates", help="List available guidance templates")
# Guidance generate-guide command
guidance_guide = guidance_subparsers.add_parser("generate-guide", help="Generate user guide from template")
guidance_guide.add_argument("profile_id", help="Profile ID to generate guide from")
guidance_parser.set_defaults(func=cmd_guidance)
# =========================================================================
# insights command
# =========================================================================
@@ -5626,48 +5598,3 @@ Examples:
if __name__ == "__main__":
main()
def cmd_guidance(args):
"""Handle user guidance pattern analysis commands."""
from hermes_cli.colors import Colors, color
subcmd = getattr(args, 'guidance_command', None)
if subcmd is None:
print(color("User Guidance Pattern Analysis (Research #327 Revised)", Colors.CYAN))
print("\nCommands:")
print(" hermes guidance analyze SESSION_ID - Analyze user guidance patterns")
print(" hermes guidance create-template SESSION_IDS - Create guidance template")
print(" hermes guidance list-templates - List available templates")
print(" hermes guidance generate-guide PROFILE_ID - Generate user guide")
print("\nNote: Research shows user guidance matters more than agent experience.")
return 0
# Import user guidance module
try:
from tools.user_guidance import guidance_command
# Convert args to list for the module
args_list = []
if subcmd == "analyze":
args_list = ["analyze", args.session_id]
elif subcmd == "create-template":
args_list = ["create-template"] + args.session_ids
if hasattr(args, 'name') and args.name:
args_list.extend(["--name", args.name])
elif subcmd == "list-templates":
args_list = ["list-templates"]
elif subcmd == "generate-guide":
args_list = ["generate-guide", args.profile_id]
return guidance_command(args_list)
except ImportError as e:
print(color(f"Error: Cannot import user_guidance module: {e}", Colors.RED))
print("Make sure tools/user_guidance.py exists")
return 1
except Exception as e:
print(color(f"Error: {e}", Colors.RED))
return 1

View File

@@ -0,0 +1,91 @@
"""Tests for cloud-context warning injection (#378, #456)."""
import pytest
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from cron.scheduler import (
_LOCAL_SERVICE_PATTERNS,
_detect_local_service_refs,
_inject_cloud_context,
)
class TestDetectLocalServiceRefs:
"""Pattern detection for local service references in prompts."""
def test_localhost_with_port(self):
refs = _detect_local_service_refs("Check localhost:11434 is up")
assert len(refs) >= 1
assert any("11434" in r for r in refs)
def test_127_with_port(self):
refs = _detect_local_service_refs("curl http://127.0.0.1:8080/health")
assert len(refs) >= 1
def test_check_ollama(self):
refs = _detect_local_service_refs("Check Ollama is responding")
assert len(refs) >= 1
def test_ollama_responding(self):
refs = _detect_local_service_refs("Verify Ollama responding on this machine")
assert len(refs) >= 1
def test_curl_localhost(self):
refs = _detect_local_service_refs("curl localhost and report status")
assert len(refs) >= 1
def test_ping_localhost(self):
refs = _detect_local_service_refs("ping localhost to check connectivity")
assert len(refs) >= 1
def test_no_false_positive_cloud(self):
refs = _detect_local_service_refs("Check the weather in Paris today")
assert len(refs) == 0
def test_no_false_positive_api(self):
refs = _detect_local_service_refs("Call the OpenRouter API endpoint")
assert len(refs) == 0
def test_multiple_refs(self):
refs = _detect_local_service_refs("curl localhost:11434 then ping localhost")
assert len(refs) >= 2
class TestInjectCloudContext:
"""Cloud-context warning injection."""
def test_prepends_warning(self):
prompt = "Check Ollama is responding"
result = _inject_cloud_context(prompt, ["Check Ollama"], "nous")
assert result.startswith("[SYSTEM NOTE")
assert "nous" in result
assert prompt in result
def test_preserves_original_prompt(self):
prompt = "Check Ollama at localhost:11434"
result = _inject_cloud_context(prompt, ["localhost:11434"], "openrouter")
assert prompt in result
def test_mentions_cannot_reach(self):
prompt = "curl localhost"
result = _inject_cloud_context(prompt, ["curl localhost"], "nous")
assert "CANNOT reach" in result or "cannot reach" in result
def test_suggests_local_provider(self):
prompt = "Check Ollama"
result = _inject_cloud_context(prompt, ["Check Ollama"], "nous")
assert "local" in result.lower()
class TestCloudBypassLocal:
"""Local endpoints should not trigger injection."""
def test_local_endpoint_skips(self):
# The caller checks _is_cloud before calling _detect_local_service_refs
# so this is tested at integration level. Here we verify detection
# still finds refs (the bypass is the caller\'s responsibility).
refs = _detect_local_service_refs("Check Ollama at localhost:11434")
assert len(refs) > 0 # Detection works, caller decides whether to inject

View File

@@ -1,229 +0,0 @@
#!/usr/bin/env python3
"""
Test script for user guidance pattern analysis.
This script tests the revised approach for issue #327,
focusing on user guidance patterns rather than agent proficiency.
Issue: #327 (Revised hypothesis)
"""
import sys
import os
from pathlib import Path
# Add the tools directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
def test_user_guidance_analysis():
"""Test user guidance analysis functionality."""
print("=== Testing User Guidance Analysis ===\n")
try:
from tools.user_guidance import UserGuidanceAnalyzer
from hermes_state import SessionDB
session_db = SessionDB()
analyzer = UserGuidanceAnalyzer(session_db)
# Get a session to analyze
sessions = session_db.get_messages.__self__.execute_write(
"SELECT id FROM sessions ORDER BY started_at DESC LIMIT 1"
)
if not sessions:
print("No sessions found in database.")
return False
session_id = sessions[0][0]
print(f"Analyzing session: {session_id}\n")
analysis = analyzer.analyze_user_guidance(session_id)
if "error" in analysis:
print(f"Analysis error: {analysis['error']}")
return False
print(f"Message count: {analysis['message_count']}")
print("\nPrompt Patterns:")
for p in analysis.get("prompt_patterns", [])[:3]:
print(f" {p['type']}: {'' if p.get('success') else ''} ({p['length']} chars)")
print("\nCorrection Patterns:")
for c in analysis.get("correction_patterns", [])[:2]:
print(f" {c['error_content'][:50]}... -> {c['user_correction'][:50]}...")
print("\nSuccess Metrics:")
metrics = analysis.get("success_metrics", {})
print(f" Tool calls: {metrics.get('tool_calls', 0)}")
print(f" Success rate: {metrics.get('success_rate', 0):.0%}")
print(f" User corrections: {metrics.get('user_corrections', 0)}")
return True
except Exception as e:
print(f"Test failed: {e}")
return False
def test_guidance_template_creation():
"""Test guidance template creation."""
print("\n=== Testing Guidance Template Creation ===\n")
try:
from tools.user_guidance import UserGuidanceAnalyzer, GuidanceTemplateGenerator
from hermes_state import SessionDB
session_db = SessionDB()
analyzer = UserGuidanceAnalyzer(session_db)
generator = GuidanceTemplateGenerator(analyzer)
# Get sessions
sessions = session_db.get_messages.__self__.execute_write(
"SELECT id FROM sessions ORDER BY started_at DESC LIMIT 3"
)
if not sessions:
print("No sessions found.")
return False
session_ids = [s[0] for s in sessions]
print(f"Creating template from {len(session_ids)} sessions\n")
profile = generator.create_guidance_template(
session_ids,
name="Test Guidance Template"
)
print(f"Profile ID: {profile.profile_id}")
print(f"Name: {profile.name}")
print(f"Prompt patterns: {len(profile.prompt_patterns)}")
print(f"Correction patterns: {len(profile.correction_patterns)}")
# Save the template
from tools.user_guidance import GuidanceTemplateManager
manager = GuidanceTemplateManager()
path = manager.save_template(profile)
print(f"Saved to: {path}")
return True
except Exception as e:
print(f"Test failed: {e}")
return False
def test_user_guide_generation():
"""Test user guide generation."""
print("\n=== Testing User Guide Generation ===\n")
try:
from tools.user_guidance import UserGuidanceProfile, PromptPattern, CorrectionPattern, ContextStrategy, generate_user_guide
# Create a test profile
profile = UserGuidanceProfile(
profile_id="test_guidance_001",
name="Test User Guidance",
description="Test profile for guide generation",
prompt_patterns=[
PromptPattern(
pattern_type="polite_request",
template="Please [action] [details]",
success_rate=0.85,
usage_count=15,
context_requirements=[]
),
PromptPattern(
pattern_type="question",
template="How do I [action]?",
success_rate=0.75,
usage_count=20,
context_requirements=[]
)
],
correction_patterns=[
CorrectionPattern(
error_type="file_not_found",
correction_strategy="direct",
effectiveness=0.90,
common_phrases=["Use the correct path: [path]", "The file is at [location]"]
),
CorrectionPattern(
error_type="command_not_found",
correction_strategy="example",
effectiveness=0.80,
common_phrases=["Try: [command]", "Use [alternative] instead"]
)
],
context_strategies=[
ContextStrategy(
strategy_type="file_reference",
description="Reference specific files",
effectiveness=0.85,
token_cost=10
),
ContextStrategy(
strategy_type="code_example",
description="Provide code examples",
effectiveness=0.90,
token_cost=50
)
],
created_at="2026-04-13T00:00:00",
source_analysis="Test sessions"
)
guide = generate_user_guide(profile)
print("Generated User Guide:")
print("=" * 50)
print(guide[:1000] + "..." if len(guide) > 1000 else guide)
return True
except Exception as e:
print(f"Test failed: {e}")
return False
def main():
"""Run all tests."""
print("User Guidance Pattern Analysis Test Suite")
print("=" * 50)
tests = [
("User Guidance Analysis", test_user_guidance_analysis),
("Guidance Template Creation", test_guidance_template_creation),
("User Guide Generation", test_user_guide_generation)
]
results = []
for name, test_func in tests:
print(f"\nRunning: {name}")
try:
result = test_func()
results.append((name, result))
print(f"Result: {'PASS' if result else 'FAIL'}")
except Exception as e:
print(f"Error: {e}")
results.append((name, False))
print("\n" + "=" * 50)
print("Test Results:")
passed = sum(1 for _, result in results if result)
total = len(results)
for name, result in results:
status = "✓ PASS" if result else "✗ FAIL"
print(f" {status}: {name}")
print(f"\nPassed: {passed}/{total}")
return 0 if passed == total else 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,766 +0,0 @@
"""
User Guidance Patterns for Effective Agent Sessions
This module analyzes user strategies that lead to successful agent sessions,
focusing on prompt patterns, error correction techniques, and context management.
Issue: #327 (Revised hypothesis)
"""
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from dataclasses import dataclass, asdict
import re
logger = logging.getLogger(__name__)
@dataclass
class PromptPattern:
"""Effective prompt pattern."""
pattern_type: str # "instruction", "context", "constraint", "example"
template: str
success_rate: float
usage_count: int
context_requirements: List[str] = None
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class CorrectionPattern:
"""User error correction pattern."""
error_type: str
correction_strategy: str # "direct", "example", "reframe", "constraint"
effectiveness: float # Success rate of this correction
common_phrases: List[str]
@dataclass
class ContextStrategy:
"""Context establishment strategy."""
strategy_type: str # "reference", "example", "constraint", "background"
description: str
effectiveness: float
token_cost: int # Approximate token usage
@dataclass
class UserGuidanceProfile:
"""Profile of effective user guidance strategies."""
profile_id: str
name: str
description: str
prompt_patterns: List[PromptPattern]
correction_patterns: List[CorrectionPattern]
context_strategies: List[ContextStrategy]
created_at: str
source_analysis: str = None
version: str = "1.0"
def to_dict(self) -> Dict[str, Any]:
return {
"profile_id": self.profile_id,
"name": self.name,
"description": self.description,
"prompt_patterns": [p.to_dict() for p in self.prompt_patterns],
"correction_patterns": [asdict(c) for c in self.correction_patterns],
"context_strategies": [asdict(c) for c in self.context_strategies],
"created_at": self.created_at,
"source_analysis": self.source_analysis,
"version": self.version
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'UserGuidanceProfile':
"""Create profile from dictionary."""
prompt_patterns = [
PromptPattern(**p) for p in data.get("prompt_patterns", [])
]
correction_patterns = [
CorrectionPattern(**c) for c in data.get("correction_patterns", [])
]
context_strategies = [
ContextStrategy(**c) for c in data.get("context_strategies", [])
]
return cls(
profile_id=data["profile_id"],
name=data["name"],
description=data["description"],
prompt_patterns=prompt_patterns,
correction_patterns=correction_patterns,
context_strategies=context_strategies,
created_at=data.get("created_at", datetime.now().isoformat()),
source_analysis=data.get("source_analysis"),
version=data.get("version", "1.0")
)
class UserGuidanceAnalyzer:
"""Analyze user guidance patterns in sessions."""
def __init__(self, session_db=None):
self.session_db = session_db
def analyze_user_guidance(self, session_id: str) -> Dict[str, Any]:
"""
Analyze user guidance patterns in a session.
Returns:
Dict with user guidance analysis including:
- prompt_patterns: Effective prompt structures
- correction_patterns: Error correction strategies
- context_strategies: How users establish context
- success_indicators: What makes guidance effective
"""
if not self.session_db:
return {"error": "No session database available"}
try:
messages = self.session_db.get_messages(session_id)
if not messages:
return {"error": "No messages found"}
analysis = {
"session_id": session_id,
"message_count": len(messages),
"user_messages": self._extract_user_messages(messages),
"prompt_patterns": self._analyze_prompt_patterns(messages),
"correction_patterns": self._analyze_corrections(messages),
"context_strategies": self._analyze_context_strategies(messages),
"success_metrics": self._calculate_success_metrics(messages)
}
return analysis
except Exception as e:
logger.error(f"User guidance analysis failed: {e}")
return {"error": str(e)}
def _extract_user_messages(self, messages: List[Dict]) -> List[Dict]:
"""Extract user messages with context."""
user_messages = []
for i, msg in enumerate(messages):
if msg.get("role") == "user":
# Get surrounding context
context_before = []
context_after = []
# Previous assistant message
if i > 0 and messages[i-1].get("role") == "assistant":
context_before.append(messages[i-1].get("content", "")[:200])
# Next assistant message
if i < len(messages) - 1 and messages[i+1].get("role") == "assistant":
context_after.append(messages[i+1].get("content", "")[:200])
user_messages.append({
"content": msg.get("content", ""),
"position": i,
"context_before": context_before,
"context_after": context_after
})
return user_messages
def _analyze_prompt_patterns(self, messages: List[Dict]) -> List[Dict[str, Any]]:
"""Analyze prompt patterns for effectiveness."""
patterns = []
user_messages = [m for m in messages if m.get("role") == "user"]
for msg in user_messages:
content = msg.get("content", "")
# Identify prompt types
if content.startswith(("Please", "Could you", "Can you")):
patterns.append({
"type": "polite_request",
"content": content,
"length": len(content),
"success": self._check_prompt_success(messages, msg)
})
elif "?" in content:
patterns.append({
"type": "question",
"content": content,
"length": len(content),
"success": self._check_prompt_success(messages, msg)
})
elif content.startswith(("/", "!")):
patterns.append({
"type": "command",
"content": content,
"length": len(content),
"success": self._check_prompt_success(messages, msg)
})
elif len(content) > 200:
patterns.append({
"type": "detailed_request",
"content": content,
"length": len(content),
"success": self._check_prompt_success(messages, msg)
})
return patterns
def _check_prompt_success(self, messages: List[Dict], user_msg: Dict) -> bool:
"""Check if a prompt led to successful execution."""
# Find the user message position
user_pos = None
for i, msg in enumerate(messages):
if msg == user_msg:
user_pos = i
break
if user_pos is None:
return False
# Check if there's a successful tool call after this message
for i in range(user_pos + 1, min(user_pos + 5, len(messages))):
msg = messages[i]
if msg.get("role") == "assistant" and msg.get("tool_calls"):
# Check if tool result indicates success
for j in range(i + 1, min(i + 3, len(messages))):
if messages[j].get("role") == "tool":
content = messages[j].get("content", "")
if "error" not in content.lower() and "failed" not in content.lower():
return True
return False
def _analyze_corrections(self, messages: List[Dict]) -> List[Dict[str, Any]]:
"""Analyze error correction patterns."""
corrections = []
# Look for error patterns followed by corrections
for i in range(len(messages) - 2):
msg1 = messages[i]
msg2 = messages[i + 1]
msg3 = messages[i + 2]
# Pattern: Assistant error -> User correction -> Assistant success
if (msg1.get("role") == "tool" and
("error" in msg1.get("content", "").lower() or "failed" in msg1.get("content", "").lower()) and
msg2.get("role") == "user" and
msg3.get("role") == "assistant"):
corrections.append({
"error_content": msg1.get("content", "")[:200],
"user_correction": msg2.get("content", ""),
"assistant_response": msg3.get("content", "")[:200],
"success": self._check_correction_success(messages, i + 2)
})
return corrections
def _check_correction_success(self, messages: List[Dict], assistant_pos: int) -> bool:
"""Check if a correction led to success."""
# Look for successful tool calls after correction
for i in range(assistant_pos + 1, min(assistant_pos + 3, len(messages))):
if messages[i].get("role") == "tool":
content = messages[i].get("content", "")
if "error" not in content.lower() and "failed" not in content.lower():
return True
return False
def _analyze_context_strategies(self, messages: List[Dict]) -> List[Dict[str, Any]]:
"""Analyze how users establish context."""
strategies = []
user_messages = [m for m in messages if m.get("role") == "user"]
for msg in user_messages[:10]: # Analyze first 10 user messages
content = msg.get("content", "")
# Identify context establishment strategies
if re.search(r'[/.\][\w/.-]+\.\w+', content):
strategies.append({
"type": "file_reference",
"content": content[:200],
"tokens": len(content.split())
})
elif "```" in content:
strategies.append({
"type": "code_example",
"content": content[:200],
"tokens": len(content.split())
})
elif len(content) > 300:
strategies.append({
"type": "detailed_background",
"content": content[:200],
"tokens": len(content.split())
})
return strategies
def _calculate_success_metrics(self, messages: List[Dict]) -> Dict[str, Any]:
"""Calculate success metrics for the session."""
tool_calls = 0
successful_tool_calls = 0
user_corrections = 0
successful_corrections = 0
for i, msg in enumerate(messages):
if msg.get("role") == "assistant" and msg.get("tool_calls"):
tool_calls += 1
if msg.get("role") == "tool":
content = msg.get("content", "")
if "error" not in content.lower() and "failed" not in content.lower():
successful_tool_calls += 1
# Count corrections
if (msg.get("role") == "user" and i > 0 and
messages[i-1].get("role") == "tool" and
("error" in messages[i-1].get("content", "").lower() or
"failed" in messages[i-1].get("content", "").lower())):
user_corrections += 1
return {
"tool_calls": tool_calls,
"successful_tool_calls": successful_tool_calls,
"success_rate": successful_tool_calls / tool_calls if tool_calls > 0 else 0,
"user_corrections": user_corrections,
"messages_per_correction": len(messages) / user_corrections if user_corrections > 0 else 0
}
class GuidanceTemplateGenerator:
"""Generate user guidance templates from analysis."""
def __init__(self, analyzer: UserGuidanceAnalyzer = None):
self.analyzer = analyzer or UserGuidanceAnalyzer()
def create_guidance_template(self, session_ids: List[str], name: str = None) -> UserGuidanceProfile:
"""
Create a guidance template from multiple sessions.
Args:
session_ids: List of session IDs to analyze
name: Template name
Returns:
UserGuidanceProfile with extracted patterns
"""
all_patterns = []
all_corrections = []
all_strategies = []
for session_id in session_ids:
analysis = self.analyzer.analyze_user_guidance(session_id)
if "error" in analysis:
logger.warning(f"Skipping session {session_id}: {analysis['error']}")
continue
all_patterns.extend(analysis.get("prompt_patterns", []))
all_corrections.extend(分析.get("correction_patterns", []))
all_strategies.extend(analysis.get("context_strategies", []))
# Aggregate patterns
prompt_patterns = self._aggregate_prompt_patterns(all_patterns)
correction_patterns = self._aggregate_corrections(all_corrections)
context_strategies = self._aggregate_strategies(all_strategies)
profile = UserGuidanceProfile(
profile_id=f"guidance_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
name=name or f"User Guidance Template",
description=f"Extracted from {len(session_ids)} sessions",
prompt_patterns=prompt_patterns,
correction_patterns=correction_patterns,
context_strategies=context_strategies,
created_at=datetime.now().isoformat(),
source_analysis=f"Sessions: {', '.join(session_ids[:5])}{'...' if len(session_ids) > 5 else ''}"
)
return profile
def _aggregate_prompt_patterns(self, patterns: List[Dict]) -> List[PromptPattern]:
"""Aggregate prompt patterns by type."""
pattern_groups = {}
for p in patterns:
ptype = p.get("type", "unknown")
if ptype not in pattern_groups:
pattern_groups[ptype] = {"count": 0, "successes": 0, "examples": []}
pattern_groups[ptype]["count"] += 1
if p.get("success"):
pattern_groups[ptype]["successes"] += 1
if len(pattern_groups[ptype]["examples"]) < 3:
pattern_groups[ptype]["examples"].append(p.get("content", "")[:100])
result = []
for ptype, data in pattern_groups.items():
success_rate = data["successes"] / data["count"] if data["count"] > 0 else 0
# Create template from examples
template = self._create_template_from_examples(data["examples"], ptype)
result.append(PromptPattern(
pattern_type=ptype,
template=template,
success_rate=success_rate,
usage_count=data["count"],
context_requirements=[]
))
return result
def _aggregate_corrections(self, corrections: List[Dict]) -> List[CorrectionPattern]:
"""Aggregate correction patterns."""
correction_groups = {}
for c in corrections:
# Simplify error type
error_content = c.get("error_content", "").lower()
if "filenotfound" in error_content or "no such file" in error_content:
error_type = "file_not_found"
elif "permission" in error_content:
error_type = "permission_denied"
elif "command not found" in error_content:
error_type = "command_not_found"
else:
error_type = "general_error"
if error_type not in correction_groups:
correction_groups[error_type] = {"count": 0, "successes": 0, "examples": []}
correction_groups[error_type]["count"] += 1
if c.get("success"):
correction_groups[error_type]["successes"] += 1
if len(correction_groups[error_type]["examples"]) < 3:
correction_groups[error_type]["examples"].append(c.get("user_correction", "")[:100])
result = []
for error_type, data in correction_groups.items():
effectiveness = data["successes"] / data["count"] if data["count"] > 0 else 0
# Determine correction strategy
if data["examples"]:
first_example = data["examples"][0].lower()
if "try" in first_example or "instead" in first_example:
strategy = "reframe"
elif "use" in first_example or "run" in first_example:
strategy = "direct"
elif "like this" in first_example or "example" in first_example:
strategy = "example"
else:
strategy = "constraint"
else:
strategy = "unknown"
result.append(CorrectionPattern(
error_type=error_type,
correction_strategy=strategy,
effectiveness=effectiveness,
common_phrases=data["examples"][:3]
))
return result
def _aggregate_strategies(self, strategies: List[Dict]) -> List[ContextStrategy]:
"""Aggregate context strategies."""
strategy_groups = {}
for s in strategies:
stype = s.get("type", "unknown")
if stype not in strategy_groups:
strategy_groups[stype] = {"count": 0, "tokens": []}
strategy_groups[stype]["count"] += 1
strategy_groups[stype]["tokens"].append(s.get("tokens", 0))
result = []
for stype, data in strategy_groups.items():
avg_tokens = sum(data["tokens"]) / len(data["tokens"]) if data["tokens"] else 0
result.append(ContextStrategy(
strategy_type=stype,
description=f"Used {data['count']} times, avg {avg_tokens:.0f} tokens",
effectiveness=0.5, # Would need more analysis
token_cost=int(avg_tokens)
))
return result
def _create_template_from_examples(self, examples: List[str], ptype: str) -> str:
"""Create a template from examples."""
if not examples:
return f"Example {ptype} prompt"
# Simple template creation
if ptype == "polite_request":
return "Please [action] [details]"
elif ptype == "question":
return "How do I [action]?"
elif ptype == "command":
return "/[command] [arguments]"
elif ptype == "detailed_request":
return "I need to [goal]. Specifically, [details]. Context: [background]"
else:
return examples[0][:50] + "..."
class GuidanceTemplateManager:
"""Manage user guidance templates."""
def __init__(self, template_dir: Path = None):
self.template_dir = template_dir or Path.home() / ".hermes" / "guidance_templates"
self.template_dir.mkdir(parents=True, exist_ok=True)
def save_template(self, profile: UserGuidanceProfile) -> Path:
"""Save a guidance template."""
template_path = self.template_dir / f"{profile.profile_id}.json"
with open(template_path, 'w') as f:
json.dump(profile.to_dict(), f, indent=2)
logger.info(f"Saved guidance template {profile.profile_id} to {template_path}")
return template_path
def load_template(self, profile_id: str) -> Optional[UserGuidanceProfile]:
"""Load a guidance template."""
template_path = self.template_dir / f"{profile_id}.json"
if not template_path.exists():
logger.warning(f"Template {profile_id} not found")
return None
try:
with open(template_path, 'r') as f:
data = json.load(f)
return UserGuidanceProfile.from_dict(data)
except Exception as e:
logger.error(f"Failed to load template {profile_id}: {e}")
return None
def list_templates(self) -> List[Dict[str, Any]]:
"""List all available templates."""
templates = []
for template_path in self.template_dir.glob("*.json"):
try:
with open(template_path, 'r') as f:
data = json.load(f)
templates.append({
"profile_id": data.get("profile_id"),
"name": data.get("name"),
"description": data.get("description"),
"created_at": data.get("created_at"),
"prompt_patterns": len(data.get("prompt_patterns", [])),
"correction_patterns": len(data.get("correction_patterns", []))
})
except Exception as e:
logger.warning(f"Failed to read template {template_path}: {e}")
return templates
def generate_user_guide(profile: UserGuidanceProfile) -> str:
"""Generate a user-facing guide from a guidance profile."""
guide = f"""# Effective Agent Session Guide
**Template:** {profile.name}
**Generated:** {profile.created_at}
**Source:** {profile.source_analysis or "Multiple sessions"}
## Effective Prompt Patterns
"""
for pattern in sorted(profile.prompt_patterns, key=lambda x: x.success_rate, reverse=True):
guide += f"### {pattern.pattern_type.replace('_', ' ').title()}
"
guide += f"**Success Rate:** {pattern.success_rate:.0%}
"
guide += f"**Usage:** {pattern.usage_count} times
"
guide += f"**Template:** `{pattern.template}`
"
guide += "## Error Correction Strategies
"
for correction in sorted(profile.correction_patterns, key=lambda x: x.effectiveness, reverse=True):
guide += f"### {correction.error_type.replace('_', ' ').title()}
"
guide += f"**Effectiveness:** {correction.effectiveness:.0%}
"
guide += f"**Strategy:** {correction.correction_strategy}
"
if correction.common_phrases:
guide += f"**Example:** "{correction.common_phrases[0]}"
"
guide += "
"
guide += "## Context Establishment Tips
"
for strategy in profile.context_strategies:
guide += f"- **{strategy.strategy_type.replace('_', ' ').title()}:** {strategy.description}
"
guide += """
## Key Insights
1. **Be specific:** Vague prompts lead to errors
2. **Provide context:** Help the agent understand your environment
3. **Use examples:** Show what you want when possible
4. **Correct effectively:** Use the strategies above when errors occur
5. **Manage context:** Don't overload with unnecessary information
## Remember
- Agent sessions degrade over time (error rates increase)
- Your guidance matters more than agent "experience"
- Use the patterns above to improve success rates
"""
return guide
# CLI Integration
def guidance_command(args):
"""CLI command for user guidance analysis."""
import argparse
parser = argparse.ArgumentParser(description="User guidance pattern analysis")
subparsers = parser.add_subparsers(dest="command")
# Analyze command
analyze_parser = subparsers.add_parser("analyze", help="Analyze user guidance in a session")
analyze_parser.add_argument("session_id", help="Session ID to analyze")
# Create template command
create_parser = subparsers.add_parser("create-template", help="Create guidance template from sessions")
create_parser.add_argument("session_ids", nargs="+", help="Session IDs to analyze")
create_parser.add_argument("--name", "-n", help="Template name")
# List templates command
subparsers.add_parser("list-templates", help="List available guidance templates")
# Generate guide command
guide_parser = subparsers.add_parser("generate-guide", help="Generate user guide from template")
guide_parser.add_argument("profile_id", help="Profile ID to generate guide from")
# Parse args
parsed = parser.parse_args(args)
if not parsed.command:
parser.print_help()
return 1
# Import session DB
try:
from hermes_state import SessionDB
session_db = SessionDB()
except ImportError:
print("Error: Cannot import SessionDB")
return 1
if parsed.command == "analyze":
analyzer = UserGuidanceAnalyzer(session_db)
analysis = analyzer.analyze_user_guidance(parsed.session_id)
print(f"\n=== User Guidance Analysis: {parsed.session_id} ===\n")
if "error" in analysis:
print(f"Error: {analysis['error']}")
return 1
print(f"Messages: {analysis['message_count']}")
print("\nPrompt Patterns:")
for p in analysis.get("prompt_patterns", [])[:5]:
print(f" {p['type']}: {'' if p.get('success') else ''} ({p['length']} chars)")
print("\nCorrection Patterns:")
for c in analysis.get("correction_patterns", [])[:3]:
print(f" {c['error_content'][:50]}... -> {c['user_correction'][:50]}...")
print("\nSuccess Metrics:")
metrics = analysis.get("success_metrics", {})
print(f" Tool calls: {metrics.get('tool_calls', 0)}")
print(f" Success rate: {metrics.get('success_rate', 0):.0%}")
print(f" User corrections: {metrics.get('user_corrections', 0)}")
return 0
elif parsed.command == "create-template":
analyzer = UserGuidanceAnalyzer(session_db)
generator = GuidanceTemplateGenerator(analyzer)
profile = generator.create_guidance_template(
parsed.session_ids,
name=parsed.name
)
manager = GuidanceTemplateManager()
path = manager.save_template(profile)
print(f"Created guidance template: {profile.profile_id}")
print(f"Saved to: {path}")
print(f"Prompt patterns: {len(profile.prompt_patterns)}")
print(f"Correction patterns: {len(profile.correction_patterns)}")
return 0
elif parsed.command == "list-templates":
manager = GuidanceTemplateManager()
templates = manager.list_templates()
if not templates:
print("No templates found.")
return 0
print("\n=== Available Guidance Templates ===\n")
for t in templates:
print(f"ID: {t['profile_id']}")
print(f"Name: {t['name']}")
print(f"Description: {t['description']}")
print(f"Prompt patterns: {t['prompt_patterns']}")
print(f"Correction patterns: {t['correction_patterns']}")
print()
return 0
elif parsed.command == "generate-guide":
manager = GuidanceTemplateManager()
profile = manager.load_template(parsed.profile_id)
if not profile:
print(f"Template {parsed.profile_id} not found")
return 1
guide = generate_user_guide(profile)
print(guide)
# Also save to file
guide_path = manager.template_dir / f"{parsed.profile_id}_guide.md"
with open(guide_path, 'w') as f:
f.write(guide)
print(f"\nGuide saved to: {guide_path}")
return 0
return 1
if __name__ == "__main__":
import sys
sys.exit(guidance_command(sys.argv[1:]))