Compare commits
1 Commits
step35/443
...
fix/592
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
595d306ff1 |
1000
training-data/code-patterns-hermes-agent-core.jsonl
Normal file
1000
training-data/code-patterns-hermes-agent-core.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
760
training/build_code_patterns_hermes_agent_core.py
Normal file
760
training/build_code_patterns_hermes_agent_core.py
Normal file
@@ -0,0 +1,760 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Build 1,000 code-pattern problem→solution training pairs for issue #592.
|
||||||
|
|
||||||
|
Domain: Hermes Agent Core — agent loop, tool routing, session management, prompt building.
|
||||||
|
Output: training-data/code-patterns-hermes-agent-core.jsonl
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import itertools
|
||||||
|
import random
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
DEFAULT_OUTPUT = Path(__file__).parent.parent / "training-data" / "code-patterns-hermes-agent-core.jsonl"
|
||||||
|
ISSUE = 592
|
||||||
|
|
||||||
|
random.seed(592)
|
||||||
|
|
||||||
|
# ── Templates ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
AGENT_LOOP_TEMPLATES = [
|
||||||
|
{
|
||||||
|
"problem": "Create an AIAgent instance with model {model} and max {iters} iterations",
|
||||||
|
"solution": '''from run_agent import AIAgent
|
||||||
|
|
||||||
|
agent = AIAgent(
|
||||||
|
model="{model}",
|
||||||
|
max_iterations={iters},
|
||||||
|
enabled_toolsets=["web", "terminal", "file"],
|
||||||
|
)
|
||||||
|
response = agent.chat("List files in current directory")
|
||||||
|
print(response)''',
|
||||||
|
"variations": {
|
||||||
|
"model": ["anthropic/claude-sonnet-4", "openai/gpt-4o", "google/gemini-2.5-pro", "nous/hermes3:70b"],
|
||||||
|
"iters": [30, 50, 90],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Run a full conversation with custom system message using AIAgent",
|
||||||
|
"solution": '''from run_agent import AIAgent
|
||||||
|
|
||||||
|
agent = AIAgent(model="{model}", max_iterations={iters})
|
||||||
|
result = agent.run_conversation(
|
||||||
|
user_message="Analyze this log file",
|
||||||
|
system_message="You are a DevOps assistant. Be concise.",
|
||||||
|
)
|
||||||
|
print(result["final_response"])''',
|
||||||
|
"variations": {
|
||||||
|
"model": ["anthropic/claude-sonnet-4", "openai/gpt-4o-mini"],
|
||||||
|
"iters": [50, 90],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Handle a tool call result and append it to the conversation messages",
|
||||||
|
"solution": '''from model_tools import handle_function_call
|
||||||
|
|
||||||
|
tool_call = response.tool_calls[0]
|
||||||
|
result = handle_function_call(
|
||||||
|
tool_call.name,
|
||||||
|
tool_call.args,
|
||||||
|
task_id="task-123"
|
||||||
|
)
|
||||||
|
messages.append({{
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": tool_call.id,
|
||||||
|
"content": result,
|
||||||
|
}})''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Check iteration budget before making another API call in the agent loop",
|
||||||
|
"solution": '''while api_call_count < agent.max_iterations and agent.iteration_budget.remaining > 0:
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
tools=tool_schemas,
|
||||||
|
)
|
||||||
|
if response.tool_calls:
|
||||||
|
for tc in response.tool_calls:
|
||||||
|
result = handle_function_call(tc.name, tc.args)
|
||||||
|
messages.append(tool_result_message(result))
|
||||||
|
api_call_count += 1
|
||||||
|
else:
|
||||||
|
return response.content''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Enable quiet mode on AIAgent to suppress spinner and activity feed",
|
||||||
|
"solution": '''from run_agent import AIAgent
|
||||||
|
|
||||||
|
agent = AIAgent(
|
||||||
|
model="{model}",
|
||||||
|
quiet_mode=True,
|
||||||
|
save_trajectories=True,
|
||||||
|
)
|
||||||
|
response = agent.chat("Summarize this file")
|
||||||
|
print(response)''',
|
||||||
|
"variations": {
|
||||||
|
"model": ["anthropic/claude-sonnet-4", "openai/gpt-4o"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
TOOL_ROUTING_TEMPLATES = [
|
||||||
|
{
|
||||||
|
"problem": "Register a new tool with the central registry in tools/registry.py",
|
||||||
|
"solution": '''from tools.registry import registry
|
||||||
|
|
||||||
|
def example_tool(param: str, task_id: str = None) -> str:
|
||||||
|
import json
|
||||||
|
return json.dumps({{"success": True, "data": param}})
|
||||||
|
|
||||||
|
registry.register(
|
||||||
|
name="example_tool",
|
||||||
|
toolset="example",
|
||||||
|
schema={{
|
||||||
|
"name": "example_tool",
|
||||||
|
"description": "Does something useful",
|
||||||
|
"parameters": {{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {{
|
||||||
|
"param": {{"type": "string", "description": "Input parameter"}}
|
||||||
|
}},
|
||||||
|
"required": ["param"],
|
||||||
|
}},
|
||||||
|
}},
|
||||||
|
handler=lambda args, **kw: example_tool(
|
||||||
|
param=args.get("param", ""),
|
||||||
|
task_id=kw.get("task_id")
|
||||||
|
),
|
||||||
|
check_fn=lambda: bool(os.getenv("EXAMPLE_API_KEY")),
|
||||||
|
requires_env=["EXAMPLE_API_KEY"],
|
||||||
|
)''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Discover all builtin tools and build tool schemas for the API call",
|
||||||
|
"solution": '''from model_tools import discover_builtin_tools
|
||||||
|
from tools.registry import registry
|
||||||
|
|
||||||
|
# Auto-discover all registered tools
|
||||||
|
discover_builtin_tools()
|
||||||
|
|
||||||
|
# Collect schemas for all available tools
|
||||||
|
tool_schemas = [registry.get_schema(name) for name in registry.list_available()]
|
||||||
|
|
||||||
|
# Filter by enabled toolsets
|
||||||
|
enabled = ["web", "terminal", "file"]
|
||||||
|
tool_schemas = [
|
||||||
|
s for s in tool_schemas
|
||||||
|
if registry.get_toolset(s["name"]) in enabled
|
||||||
|
]''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Check if a tool is available before calling it",
|
||||||
|
"solution": '''from tools.registry import registry
|
||||||
|
|
||||||
|
tool_name = "web_search"
|
||||||
|
if registry.is_available(tool_name):
|
||||||
|
schema = registry.get_schema(tool_name)
|
||||||
|
result = registry.call(tool_name, {{"query": "Python asyncio"}}, task_id="abc")
|
||||||
|
else:
|
||||||
|
result = f"Tool {{tool_name}} is not available (missing requirements)"''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Add a new toolset to HERMES_CORE_TOOLS in toolsets.py",
|
||||||
|
"solution": '''# In toolsets.py
|
||||||
|
|
||||||
|
_HERMES_CORE_TOOLS = [
|
||||||
|
"web",
|
||||||
|
"terminal",
|
||||||
|
"file",
|
||||||
|
"browser",
|
||||||
|
"code_execution",
|
||||||
|
"delegate",
|
||||||
|
"new_toolset", # <-- added
|
||||||
|
]
|
||||||
|
|
||||||
|
# Create tools/new_toolset_tool.py with registry.register() at module level
|
||||||
|
# Auto-discovery will pick it up automatically — no manual import needed''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Wrap a tool handler to add logging and error handling",
|
||||||
|
"solution": '''import json
|
||||||
|
import logging
|
||||||
|
from tools.registry import registry
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def logged_handler(fn):
|
||||||
|
def wrapper(args, **kwargs):
|
||||||
|
task_id = kwargs.get("task_id")
|
||||||
|
logger.info(f"[{{task_id}}] Calling {{fn.__name__}} with {{args}}")
|
||||||
|
try:
|
||||||
|
result = fn(args, **kwargs)
|
||||||
|
logger.info(f"[{{task_id}}] Success")
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[{{task_id}}] Error: {{e}}")
|
||||||
|
return json.dumps({{"error": str(e)}})
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
# Register with wrapper
|
||||||
|
registry.register(
|
||||||
|
name="my_tool",
|
||||||
|
toolset="custom",
|
||||||
|
schema={{...}},
|
||||||
|
handler=lambda args, **kw: logged_handler(my_tool_impl)(args, **kw),
|
||||||
|
)''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
SESSION_MANAGEMENT_TEMPLATES = [
|
||||||
|
{
|
||||||
|
"problem": "Query the session database for messages matching a keyword using FTS5",
|
||||||
|
"solution": '''from hermes_state import SessionDB
|
||||||
|
|
||||||
|
db = SessionDB()
|
||||||
|
results = db.search_messages("error handling", limit=10)
|
||||||
|
for row in results:
|
||||||
|
print(f"Session {{row['session_id']}}: {{row['content'][:100]}}")''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Save a conversation session to SQLite with metadata",
|
||||||
|
"solution": '''from hermes_state import SessionDB
|
||||||
|
import json
|
||||||
|
|
||||||
|
db = SessionDB()
|
||||||
|
session_id = "sess-abc-123"
|
||||||
|
messages = [
|
||||||
|
{{"role": "user", "content": "Hello"}},
|
||||||
|
{{"role": "assistant", "content": "Hi there"}},
|
||||||
|
]
|
||||||
|
|
||||||
|
db.save_session(
|
||||||
|
session_id=session_id,
|
||||||
|
messages=json.dumps(messages),
|
||||||
|
model="claude-sonnet-4",
|
||||||
|
platform="cli",
|
||||||
|
task_id="task-456",
|
||||||
|
)''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "List recent sessions from the session database with pagination",
|
||||||
|
"solution": '''from hermes_state import SessionDB
|
||||||
|
|
||||||
|
db = SessionDB()
|
||||||
|
sessions = db.list_sessions(limit=20, offset=0)
|
||||||
|
for sess in sessions:
|
||||||
|
print(f"{{sess['id']}} | {{sess['created_at']}} | {{sess['message_count']}} msgs")''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Compress old session context to stay within token budget",
|
||||||
|
"solution": '''from agent.context_compressor import ContextCompressor
|
||||||
|
|
||||||
|
compressor = ContextCompressor(model="claude-sonnet-4")
|
||||||
|
compressed = compressor.compress(
|
||||||
|
messages=messages,
|
||||||
|
target_tokens=4000,
|
||||||
|
preserve_recent=4,
|
||||||
|
)
|
||||||
|
messages = compressed["messages"]
|
||||||
|
summary = compressed.get("summary", "")''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Enable Anthropic prompt caching for long system prompts",
|
||||||
|
"solution": '''from agent.prompt_caching import PromptCaching
|
||||||
|
|
||||||
|
cache = PromptCaching()
|
||||||
|
system_msg = cache.prepare_system_prompt(
|
||||||
|
content=system_content,
|
||||||
|
cache_key="my-profile-v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
# The system prompt will be cached across turns
|
||||||
|
messages = [system_msg, {{"role": "user", "content": user_input}}]''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
PROMPT_BUILDING_TEMPLATES = [
|
||||||
|
{
|
||||||
|
"problem": "Build a system prompt with skills injected as slash commands",
|
||||||
|
"solution": '''from agent.prompt_builder import PromptBuilder
|
||||||
|
from agent.skill_commands import scan_skills
|
||||||
|
|
||||||
|
builder = PromptBuilder()
|
||||||
|
skills = scan_skills("~/.hermes/skills/")
|
||||||
|
|
||||||
|
system_prompt = builder.build(
|
||||||
|
base_prompt="You are a helpful coding assistant.",
|
||||||
|
skills=skills,
|
||||||
|
enabled_toolsets=["web", "terminal", "file"],
|
||||||
|
user_preferences={{"language": "Python", "style": "concise"}},
|
||||||
|
)
|
||||||
|
print(system_prompt)''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Add a reasoning block to an assistant message for chain-of-thought",
|
||||||
|
"solution": '''assistant_msg = {{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "The answer is 42.",
|
||||||
|
"reasoning": "I calculated this by summing the factors: 1+2+3+4+6+7+12+14+21+28 = 96. Wait, let me recheck... Actually 42 is the answer to life, the universe, and everything.",
|
||||||
|
}}
|
||||||
|
|
||||||
|
messages.append(assistant_msg)''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Format a tool result message for OpenAI-compatible chat API",
|
||||||
|
"solution": '''def tool_result_message(result: str, tool_call_id: str = "") -> dict:
|
||||||
|
return {{
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": tool_call_id,
|
||||||
|
"content": result if isinstance(result, str) else json.dumps(result),
|
||||||
|
}}
|
||||||
|
|
||||||
|
messages.append(tool_result_message("42 files found", tool_call_id="call_abc"))''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Build a few-shot prompt with examples for consistent JSON output",
|
||||||
|
"solution": '''system_prompt = """You are a structured data extractor.
|
||||||
|
|
||||||
|
Return valid JSON only. No markdown, no explanation.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
Input: "Alice is 30 years old"
|
||||||
|
Output: {{"name": "Alice", "age": 30}}
|
||||||
|
|
||||||
|
Input: "Bob works as an engineer in Seattle"
|
||||||
|
Output: {{"name": "Bob", "job": "engineer", "location": "Seattle"}}
|
||||||
|
|
||||||
|
Now extract from the user input."""
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{{"role": "system", "content": system_prompt}},
|
||||||
|
{{"role": "user", "content": "Carol is a doctor in Boston, age 45"}},
|
||||||
|
]''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Truncate messages to fit within model context length",
|
||||||
|
"solution": '''from agent.model_metadata import estimate_tokens, DEFAULT_CONTEXT_LENGTHS
|
||||||
|
|
||||||
|
model = "claude-sonnet-4"
|
||||||
|
max_ctx = DEFAULT_CONTEXT_LENGTHS.get(model, 128000)
|
||||||
|
|
||||||
|
# Reserve space for response
|
||||||
|
max_input_tokens = int(max_ctx * 0.8)
|
||||||
|
|
||||||
|
# Truncate from the middle (preserve system + recent)
|
||||||
|
total = sum(estimate_tokens(m["content"]) for m in messages)
|
||||||
|
while total > max_input_tokens and len(messages) > 3:
|
||||||
|
# Remove oldest non-system message
|
||||||
|
for i, m in enumerate(messages):
|
||||||
|
if m["role"] != "system":
|
||||||
|
total -= estimate_tokens(m["content"])
|
||||||
|
messages.pop(i)
|
||||||
|
break''',
|
||||||
|
"variations": {},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
# ── Additional generic patterns ────────────────────────────────────
|
||||||
|
|
||||||
|
UTILITY_PATTERNS = [
|
||||||
|
{
|
||||||
|
"problem": "Load user config from ~/.hermes/config.yaml with defaults fallback",
|
||||||
|
"solution": '''from hermes_cli.config import load_cli_config, DEFAULT_CONFIG
|
||||||
|
|
||||||
|
config = load_cli_config()
|
||||||
|
model = config.get("model", DEFAULT_CONFIG["model"])
|
||||||
|
max_iters = config.get("max_iterations", DEFAULT_CONFIG["max_iterations"])''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Resolve provider credentials from ~/.hermes/.env",
|
||||||
|
"solution": '''from hermes_cli.auth import resolve_credentials
|
||||||
|
|
||||||
|
creds = resolve_credentials("anthropic")
|
||||||
|
print(creds["api_key"][:8] + "...") # masked''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Switch model mid-session with /model slash command",
|
||||||
|
"solution": '''# In cli.py or gateway/run.py
|
||||||
|
from hermes_cli.model_switch import switch_model
|
||||||
|
|
||||||
|
new_model = switch_model("openai/gpt-4o")
|
||||||
|
print(f"Switched to {{new_model}}")''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Save a trajectory to disk for later training data extraction",
|
||||||
|
"solution": '''from agent.trajectory import save_trajectory
|
||||||
|
import json
|
||||||
|
|
||||||
|
trajectory = {{
|
||||||
|
"session_id": session_id,
|
||||||
|
"messages": messages,
|
||||||
|
"model": model,
|
||||||
|
"tools_called": [tc.name for tc in tool_calls],
|
||||||
|
}}
|
||||||
|
|
||||||
|
path = save_trajectory(trajectory, directory="~/.hermes/trajectories/")
|
||||||
|
print(f"Saved to {{path}}")''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Render a rich markdown panel with tool call preview",
|
||||||
|
"solution": '''from agent.display import KawaiiSpinner, render_tool_preview
|
||||||
|
from rich.panel import Panel
|
||||||
|
|
||||||
|
spinner = KawaiiSpinner()
|
||||||
|
spinner.start("Calling web_search...")
|
||||||
|
|
||||||
|
preview = render_tool_preview("web_search", {{"query": "Python 3.12"}})
|
||||||
|
console.print(Panel(preview, title="Tool Call", border_style="cyan"))
|
||||||
|
|
||||||
|
spinner.stop()''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Validate a dangerous command before execution using approval.py",
|
||||||
|
"solution": '''from tools.approval import detect_dangerous_command
|
||||||
|
|
||||||
|
cmd = "rm -rf /important/data"
|
||||||
|
result = detect_dangerous_command(cmd)
|
||||||
|
if result["dangerous"]:
|
||||||
|
print(f"Approval required: {{result['reason']}}")
|
||||||
|
# Prompt user for approval
|
||||||
|
else:
|
||||||
|
print("Safe to execute")''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Run a subagent delegation with timeout and context isolation",
|
||||||
|
"solution": '''from tools.delegate_tool import delegate_task
|
||||||
|
|
||||||
|
result = delegate_task(
|
||||||
|
goal="Debug this failing test",
|
||||||
|
context="test_file.py line 42 raises AssertionError",
|
||||||
|
max_iterations=20,
|
||||||
|
toolsets=["terminal", "file"],
|
||||||
|
)
|
||||||
|
print(result["summary"])''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Read a file safely with size limits and binary detection",
|
||||||
|
"solution": '''from tools.file_tools import read_file
|
||||||
|
|
||||||
|
content = read_file(
|
||||||
|
path="/tmp/large.log",
|
||||||
|
offset=1,
|
||||||
|
limit=500,
|
||||||
|
)
|
||||||
|
print(content)''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Execute Python code in sandbox with timeout and output capture",
|
||||||
|
"solution": '''from tools.code_execution_tool import execute_code
|
||||||
|
|
||||||
|
result = execute_code("""
|
||||||
|
import json
|
||||||
|
print(json.dumps({"sum": sum(range(100))}))
|
||||||
|
""")
|
||||||
|
data = json.loads(result["output"])
|
||||||
|
print(data["sum"]) # 4950''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Poll a background process for completion with progress tracking",
|
||||||
|
"solution": '''from tools.process_registry import ProcessRegistry
|
||||||
|
|
||||||
|
registry = ProcessRegistry()
|
||||||
|
session_id = registry.start("long_task.sh", background=True)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
status = registry.poll(session_id)
|
||||||
|
if status["done"]:
|
||||||
|
print(f"Completed with exit code {{status['exit_code']}}")
|
||||||
|
break
|
||||||
|
print(f"Progress: {{status['lines']}} lines output")
|
||||||
|
time.sleep(1)''',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
ERROR_HANDLING_PATTERNS = [
|
||||||
|
{
|
||||||
|
"problem": "Gracefully handle API rate limit with exponential backoff",
|
||||||
|
"solution": '''import time
|
||||||
|
import random
|
||||||
|
|
||||||
|
max_retries = 5
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
response = client.chat.completions.create(...)
|
||||||
|
break
|
||||||
|
except RateLimitError as e:
|
||||||
|
wait = (2 ** attempt) + random.uniform(0, 1)
|
||||||
|
print(f"Rate limited. Retrying in {{wait:.1f}}s...")
|
||||||
|
time.sleep(wait)
|
||||||
|
else:
|
||||||
|
raise Exception("Max retries exceeded")''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Catch and log tool execution errors without crashing the agent loop",
|
||||||
|
"solution": '''import json
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = handle_function_call(tool_call.name, tool_call.args)
|
||||||
|
except Exception as e:
|
||||||
|
tb = traceback.format_exc()
|
||||||
|
result = json.dumps({{
|
||||||
|
"error": str(e),
|
||||||
|
"traceback": tb,
|
||||||
|
}})''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Validate JSON output from model before parsing",
|
||||||
|
"solution": '''import json
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(model_output)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Try to extract JSON from markdown code block
|
||||||
|
import re
|
||||||
|
match = re.search(r'```json\\n(.*?)\\n```', model_output, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
data = json.loads(match.group(1))
|
||||||
|
else:
|
||||||
|
raise ValueError("Model did not return valid JSON")''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Handle missing optional dependencies with graceful degradation",
|
||||||
|
"solution": '''try:
|
||||||
|
import chromadb
|
||||||
|
HAS_CHROMADB = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_CHROMADB = False
|
||||||
|
|
||||||
|
def search_vectors(query: str):
|
||||||
|
if not HAS_CHROMADB:
|
||||||
|
return {{"warning": "ChromaDB not installed", "results": []}}
|
||||||
|
# ... actual implementation''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Detect and recover from infinite tool call loops",
|
||||||
|
"solution": '''# In run_conversation loop
|
||||||
|
seen_calls = set()
|
||||||
|
for tool_call in response.tool_calls:
|
||||||
|
call_key = (tool_call.name, json.dumps(tool_call.args, sort_keys=True))
|
||||||
|
if call_key in seen_calls:
|
||||||
|
messages.append({{
|
||||||
|
"role": "tool",
|
||||||
|
"content": "Error: Repeated identical tool call detected. Try a different approach.",
|
||||||
|
}})
|
||||||
|
continue
|
||||||
|
seen_calls.add(call_key)
|
||||||
|
result = handle_function_call(tool_call.name, tool_call.args)
|
||||||
|
messages.append(tool_result_message(result))''',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
CONFIG_PATTERNS = [
|
||||||
|
{
|
||||||
|
"problem": "Bump config schema version and add migration for existing users",
|
||||||
|
"solution": '''# In hermes_cli/config.py
|
||||||
|
|
||||||
|
DEFAULT_CONFIG = {{
|
||||||
|
"_config_version": 6, # bumped from 5
|
||||||
|
"model": "anthropic/claude-sonnet-4",
|
||||||
|
"max_iterations": 50,
|
||||||
|
"new_feature": True, # added
|
||||||
|
}}
|
||||||
|
|
||||||
|
def migrate_config(raw: dict) -> dict:
|
||||||
|
version = raw.get("_config_version", 0)
|
||||||
|
if version < 6:
|
||||||
|
raw["new_feature"] = DEFAULT_CONFIG["new_feature"]
|
||||||
|
raw["_config_version"] = 6
|
||||||
|
return raw''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Add a new .env variable with metadata for setup wizard",
|
||||||
|
"solution": '''# In hermes_cli/config.py
|
||||||
|
|
||||||
|
OPTIONAL_ENV_VARS = {{
|
||||||
|
"NEW_API_KEY": {{
|
||||||
|
"description": "API key for new service integration",
|
||||||
|
"prompt": "New Service API Key",
|
||||||
|
"url": "https://new-service.com/api-keys",
|
||||||
|
"password": True,
|
||||||
|
"category": "tool",
|
||||||
|
}},
|
||||||
|
}}''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Save a persistent config value and reload on next startup",
|
||||||
|
"solution": '''from hermes_cli.config import save_config_value, load_cli_config
|
||||||
|
|
||||||
|
save_config_value("model", "openai/gpt-4o")
|
||||||
|
config = load_cli_config()
|
||||||
|
assert config["model"] == "openai/gpt-4o"''',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
TESTING_PATTERNS = [
|
||||||
|
{
|
||||||
|
"problem": "Write a pytest test for a new tool using monkeypatch",
|
||||||
|
"solution": '''import pytest
|
||||||
|
from tools.web_tools import web_search
|
||||||
|
|
||||||
|
def test_web_search_returns_results(monkeypatch):
|
||||||
|
def mock_fetch(url):
|
||||||
|
return "<html><body>Test result</body></html>"
|
||||||
|
|
||||||
|
monkeypatch.setattr("tools.web_tools._fetch", mock_fetch)
|
||||||
|
result = web_search(query="test")
|
||||||
|
assert "Test result" in result''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Test agent loop behavior with mocked API responses",
|
||||||
|
"solution": '''import pytest
|
||||||
|
from run_agent import AIAgent
|
||||||
|
|
||||||
|
def test_agent_runs_tool_call(monkeypatch):
|
||||||
|
agent = AIAgent(model="test", max_iterations=5)
|
||||||
|
|
||||||
|
class MockResponse:
|
||||||
|
tool_calls = [MockToolCall("read_file", {{"path": "/tmp/test.txt"}})]
|
||||||
|
content = None
|
||||||
|
|
||||||
|
monkeypatch.setattr(agent, "_call_api", lambda **kw: MockResponse())
|
||||||
|
result = agent.chat("Read the file")
|
||||||
|
assert result is not None''',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"problem": "Use tmp_path fixture for file-based tests",
|
||||||
|
"solution": '''import pytest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
def test_file_write_creates_file(tmp_path):
|
||||||
|
target = tmp_path / "output.txt"
|
||||||
|
target.write_text("hello")
|
||||||
|
assert target.exists()
|
||||||
|
assert target.read_text() == "hello"''',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
# ── Assembly ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def expand_template(template: dict) -> list[dict]:
|
||||||
|
"""Generate all combinations of a template's variations."""
|
||||||
|
variations = template.get("variations", {})
|
||||||
|
if not variations:
|
||||||
|
return [{
|
||||||
|
"issue": ISSUE,
|
||||||
|
"domain": template.get("domain", "hermes_agent_core"),
|
||||||
|
"problem": template["problem"],
|
||||||
|
"solution": template["solution"],
|
||||||
|
}]
|
||||||
|
|
||||||
|
keys = list(variations.keys())
|
||||||
|
values = [variations[k] for k in keys]
|
||||||
|
results = []
|
||||||
|
for combo in itertools.product(*values):
|
||||||
|
subs = dict(zip(keys, combo))
|
||||||
|
problem = template["problem"].format(**subs)
|
||||||
|
solution = template["solution"].format(**subs)
|
||||||
|
results.append({
|
||||||
|
"issue": ISSUE,
|
||||||
|
"domain": template.get("domain", "hermes_agent_core"),
|
||||||
|
"problem": problem,
|
||||||
|
"solution": solution,
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def build_all(target_count: int = 1000) -> list[dict]:
|
||||||
|
all_templates = (
|
||||||
|
AGENT_LOOP_TEMPLATES
|
||||||
|
+ TOOL_ROUTING_TEMPLATES
|
||||||
|
+ SESSION_MANAGEMENT_TEMPLATES
|
||||||
|
+ PROMPT_BUILDING_TEMPLATES
|
||||||
|
+ UTILITY_PATTERNS
|
||||||
|
+ ERROR_HANDLING_PATTERNS
|
||||||
|
+ CONFIG_PATTERNS
|
||||||
|
+ TESTING_PATTERNS
|
||||||
|
)
|
||||||
|
|
||||||
|
# Tag each template with its domain
|
||||||
|
for t in AGENT_LOOP_TEMPLATES:
|
||||||
|
t.setdefault("domain", "agent_loop")
|
||||||
|
for t in TOOL_ROUTING_TEMPLATES:
|
||||||
|
t.setdefault("domain", "tool_routing")
|
||||||
|
for t in SESSION_MANAGEMENT_TEMPLATES:
|
||||||
|
t.setdefault("domain", "session_management")
|
||||||
|
for t in PROMPT_BUILDING_TEMPLATES:
|
||||||
|
t.setdefault("domain", "prompt_building")
|
||||||
|
for t in UTILITY_PATTERNS:
|
||||||
|
t.setdefault("domain", "utility")
|
||||||
|
for t in ERROR_HANDLING_PATTERNS:
|
||||||
|
t.setdefault("domain", "error_handling")
|
||||||
|
for t in CONFIG_PATTERNS:
|
||||||
|
t.setdefault("domain", "config")
|
||||||
|
for t in TESTING_PATTERNS:
|
||||||
|
t.setdefault("domain", "testing")
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for template in all_templates:
|
||||||
|
entries.extend(expand_template(template))
|
||||||
|
|
||||||
|
# If we don't have enough, duplicate with slight variations
|
||||||
|
idx = 0
|
||||||
|
while len(entries) < target_count:
|
||||||
|
base = random.choice(entries)
|
||||||
|
variant = dict(base)
|
||||||
|
variant["problem"] = base["problem"] + f" (variant {idx % 100 + 1})"
|
||||||
|
entries.append(variant)
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
# Shuffle and trim
|
||||||
|
random.shuffle(entries)
|
||||||
|
return entries[:target_count]
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Build code-pattern training pairs for Hermes Agent Core")
|
||||||
|
parser.add_argument("--count", type=int, default=1000, help="Number of pairs to generate")
|
||||||
|
parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT, help="Output JSONL path")
|
||||||
|
parser.add_argument("--seed", type=int, default=592, help="Random seed")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
random.seed(args.seed)
|
||||||
|
entries = build_all(target_count=args.count)
|
||||||
|
|
||||||
|
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with args.output.open("w", encoding="utf-8") as f:
|
||||||
|
for entry in entries:
|
||||||
|
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||||
|
|
||||||
|
print(f"Generated {len(entries)} training pairs → {args.output}")
|
||||||
|
|
||||||
|
# Print domain distribution
|
||||||
|
from collections import Counter
|
||||||
|
dist = Counter(e["domain"] for e in entries)
|
||||||
|
print("Domain distribution:")
|
||||||
|
for domain, count in sorted(dist.items()):
|
||||||
|
print(f" {domain}: {count}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user