Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
3cee26902e feat: session templates for code-first seeding (closes #329)
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Nix / nix (ubuntu-latest) (pull_request) Failing after 4s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 46s
Docs Site Checks / docs-site-checks (pull_request) Failing after 2m30s
Tests / e2e (pull_request) Successful in 1m36s
Tests / test (pull_request) Failing after 33m47s
Nix / nix (macos-latest) (pull_request) Has been cancelled
Research finding (#327): code-heavy sessions improve over time, file-heavy
sessions degrade. Key is deterministic feedback loops in first 30 turns.

## tools/session_templates.py (new, 299 lines)

- extract_successful_tool_pairs() -- pulls successful (call, result) pairs
  from first 30 session messages. Filters errors, truncates large results.
- classify_session() -- categorizes dominant tool type by counting calls:
  code (execute_code), file (read/write/search/patch), research (web_search),
  terminal, browser, general.
- save_template() / load_template() -- JSON storage in ~/.hermes/session-templates/
  Named as {type}_{name}.json with metadata.
- list_templates() / delete_template() -- management functions.
- get_template_for_session() -- resolution chain: exact type match ->
  keyword match against user message -> most recent template.
- format_template_for_prompt() -- renders examples as system prompt section
  with tool name, arguments, truncated results.

## run_agent.py (2 changes)

1. Store _initial_user_message in run_conversation() for template matching.
2. In _build_system_prompt(), after context files injection, call
   get_template_for_session() with the user's first message as task_hint.
   Wrapped in try/except, never blocks prompt assembly.

## hermes_cli/main.py (new subcommand)

hermes templates list [--type code|file|research|terminal|general]
hermes templates extract <session-id> [--name X] [--type T] [--max-examples N]
hermes templates delete <name>

Workflow:
  1. Complete a session with good tool usage patterns
  2. hermes templates extract <session-id> --type code
  3. New sessions auto-inject the matching template into system prompt
  4. Agent starts with proven tool call examples from turn 1
2026-04-13 20:23:56 -04:00
3 changed files with 404 additions and 0 deletions

View File

@@ -5306,6 +5306,96 @@ Examples:
plugins_parser.set_defaults(func=cmd_plugins)
# =========================================================================
# templates command — session template management (issue #329)
# =========================================================================
templates_parser = subparsers.add_parser(
"templates",
help="Manage session templates for code-first seeding",
description="Extract, list, and manage session templates that pre-seed "
"new sessions with proven tool call patterns.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""\
Examples:
hermes templates list List all templates
hermes templates list --type code List code templates only
hermes templates extract SESSION Extract template from session
hermes templates delete NAME Delete a template
""",
)
templates_subparsers = templates_parser.add_subparsers(dest="templates_action")
templates_list = templates_subparsers.add_parser(
"list", aliases=["ls"], help="List available templates")
templates_list.add_argument(
"--type", dest="task_type",
choices=["code", "file", "research", "terminal", "general"],
help="Filter by task type")
templates_extract = templates_subparsers.add_parser(
"extract", help="Extract template from a completed session")
templates_extract.add_argument(
"session_id", help="Session ID or title to extract from")
templates_extract.add_argument(
"--name", help="Template name (default: auto-generated)")
templates_extract.add_argument(
"--type", dest="task_type",
choices=["code", "file", "research", "terminal", "general"],
help="Override auto-detected task type")
templates_extract.add_argument(
"--max-examples", type=int, default=10,
help="Max tool call examples (default: 10)")
templates_delete = templates_subparsers.add_parser(
"delete", aliases=["rm"], help="Delete a template")
templates_delete.add_argument("name", help="Template name to delete")
def cmd_templates(args):
from tools.session_templates import (
list_templates, extract_successful_tool_pairs,
classify_session, save_template, delete_template,
get_templates_dir,
)
action = args.templates_action
if not action or action in ("list", "ls"):
templates = list_templates(task_type=getattr(args, 'task_type', None))
if not templates:
print(f"No templates found in {get_templates_dir()}")
return
for t in templates:
age = ""
if t.get("created_at"):
import datetime
age = f" ({datetime.datetime.fromtimestamp(t['created_at']).strftime('%Y-%m-%d')})"
print(f" [{t['task_type']}] {t['name']} -- {t['example_count']} examples{age}")
if t.get("description"):
print(f" {t['description']}")
elif action == "extract":
from hermes_state import SessionDB
db = SessionDB()
session_id = args.session_id
resolved = db.get_session_by_title(session_id) or db.get_session(session_id)
if not resolved:
print(f"Session not found: {session_id}")
return
sid = resolved["id"]
messages = db.get_messages(sid)
pairs = extract_successful_tool_pairs(messages, max_pairs=args.max_examples)
if not pairs:
print(f"No successful tool calls found in first 30 turns of session {sid}")
return
task_type = args.task_type or classify_session(pairs)
name = args.name or f"{sid[:12]}"
path = save_template(name, task_type, pairs, source_session=sid)
print(f"Extracted {len(pairs)} examples as [{task_type}] template: {path.name}")
elif action in ("delete", "rm"):
if delete_template(args.name):
print(f"Deleted template: {args.name}")
else:
print(f"Template not found: {args.name}")
templates_parser.set_defaults(func=cmd_templates)
# =========================================================================
# Plugin CLI commands — dynamically registered by memory/general plugins.
# Plugins provide a register_cli(subparser) function that builds their

View File

@@ -3250,6 +3250,19 @@ class AIAgent:
if context_files_prompt:
prompt_parts.append(context_files_prompt)
# Session template injection — pre-seed with proven tool call patterns
# from past successful sessions to establish feedback loops early.
if not self.skip_context_files:
try:
from tools.session_templates import get_template_for_session
_template_prompt = get_template_for_session(
task_hint=getattr(self, "_initial_user_message", None),
)
if _template_prompt:
prompt_parts.append(_template_prompt)
except Exception:
pass # Templates are optional, never block prompt assembly
from hermes_time import now as _hermes_now
now = _hermes_now()
timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
@@ -7754,6 +7767,8 @@ class AIAgent:
self._stream_callback = stream_callback
self._persist_user_message_idx = None
self._persist_user_message_override = persist_user_message
# Store initial user message for session template matching (issue #329)
self._initial_user_message = user_message
# Generate unique task_id if not provided to isolate VMs between concurrent tasks
effective_task_id = task_id or str(uuid.uuid4())

299
tools/session_templates.py Normal file
View File

@@ -0,0 +1,299 @@
"""Session templates — pre-seed new sessions with proven tool call patterns.
After a session completes successfully, extract the first N successful tool
calls + results and store as a reusable template. New sessions can inject
a matching template into the system prompt to establish effective feedback
loops from the first turn.
Research finding (issue #327): code-heavy sessions (execute_code dominant
in first 30 turns) improve over time. File-heavy sessions degrade. The key
is deterministic feedback loops, not arbitrary context.
Templates live in ~/.hermes/session-templates/ as JSON files.
"""
import json
import logging
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
# Tool categories for template classification
_TOOL_CATEGORIES = {
"code": {"execute_code", "python", "ipython"},
"file": {"read_file", "write_file", "search_files", "patch"},
"research": {"web_search", "web_extract", "duckduckgo"},
"terminal": {"terminal", "shell", "bash"},
"browser": {"browser_navigate", "browser_click", "browser_snapshot"},
}
# Maximum tool result size to store in a template (chars)
_MAX_RESULT_CHARS = 2000
# Maximum examples per template
_MAX_EXAMPLES = 10
def get_templates_dir() -> Path:
"""Return the session-templates directory, creating if needed."""
d = get_hermes_home() / "session-templates"
d.mkdir(parents=True, exist_ok=True)
return d
def classify_tool(tool_name: str) -> str:
"""Classify a tool name into a template category."""
tool_lower = (tool_name or "").lower()
for category, names in _TOOL_CATEGORIES.items():
if tool_lower in names:
return category
for category, names in _TOOL_CATEGORIES.items():
for name in names:
if name in tool_lower or tool_lower in name:
return category
return "general"
def classify_session(tool_calls: List[Dict]) -> str:
"""Determine the dominant category for a session from its tool calls."""
if not tool_calls:
return "general"
counts: Dict[str, int] = {}
for tc in tool_calls:
cat = classify_tool(tc.get("name", ""))
counts[cat] = counts.get(cat, 0) + 1
return max(counts, key=counts.get)
def extract_successful_tool_pairs(
messages: List[Dict],
max_pairs: int = _MAX_EXAMPLES,
max_turns: int = 30,
) -> List[Dict]:
"""Extract successful (tool_call, tool_result) pairs from session messages.
Returns a list of dicts with keys: tool_name, arguments, result, turn_index.
Only processes the first max_turns messages to capture the "cold start" phase.
"""
pairs = []
tool_call_msgs: Dict[str, Dict] = {}
for i, msg in enumerate(messages[:max_turns]):
role = msg.get("role", "")
if role == "assistant":
tool_calls = msg.get("tool_calls") or []
for tc in tool_calls:
tc_id = tc.get("id", "")
func = tc.get("function", {})
tool_call_msgs[tc_id] = {
"name": func.get("name", ""),
"arguments": func.get("arguments", ""),
"turn_index": i,
}
elif role == "tool":
tc_id = msg.get("tool_call_id", "")
content = msg.get("content", "")
if tc_id in tool_call_msgs:
call_info = tool_call_msgs.pop(tc_id)
content_str = str(content)
is_error = (
"error" in content_str.lower()[:100]
or "BLOCKED" in content_str[:50]
or "denied" in content_str.lower()[:100]
)
if not is_error and content_str.strip():
if len(content_str) > _MAX_RESULT_CHARS:
content_str = (
content_str[:_MAX_RESULT_CHARS]
+ "\n... [truncated for template]"
)
pairs.append(
{
"tool_name": call_info["name"],
"arguments": call_info["arguments"],
"result": content_str,
"turn_index": call_info["turn_index"],
}
)
if len(pairs) >= max_pairs:
return pairs
return pairs
def save_template(
name: str,
task_type: str,
examples: List[Dict],
source_session: str = "",
description: str = "",
) -> Path:
"""Save a session template to disk. Returns path to saved file."""
templates_dir = get_templates_dir()
safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in name)
filename = f"{task_type}_{safe_name}.json"
template = {
"name": name,
"task_type": task_type,
"description": description,
"source_session": source_session,
"created_at": time.time(),
"example_count": len(examples),
"examples": examples,
}
path = templates_dir / filename
path.write_text(json.dumps(template, indent=2, ensure_ascii=False))
logger.info("Saved session template: %s (%d examples)", filename, len(examples))
return path
def load_template(name: str) -> Optional[Dict]:
"""Load a template by name or prefix match."""
templates_dir = get_templates_dir()
for suffix in [".json", ""]:
path = templates_dir / f"{name}{suffix}"
if path.exists():
try:
return json.loads(path.read_text())
except json.JSONDecodeError:
return None
for path in sorted(templates_dir.glob("*.json")):
if path.stem.startswith(name):
try:
return json.loads(path.read_text())
except json.JSONDecodeError:
continue
return None
def list_templates(task_type: str = None) -> List[Dict]:
"""List available templates, optionally filtered by task type."""
templates_dir = get_templates_dir()
results = []
for path in sorted(templates_dir.glob("*.json")):
try:
template = json.loads(path.read_text())
except (json.JSONDecodeError, OSError):
continue
if task_type and template.get("task_type") != task_type:
continue
results.append(
{
"name": template.get("name", path.stem),
"task_type": template.get("task_type", "unknown"),
"description": template.get("description", ""),
"example_count": template.get("example_count", 0),
"created_at": template.get("created_at", 0),
"file": str(path),
}
)
return results
def delete_template(name: str) -> bool:
"""Delete a template by name."""
templates_dir = get_templates_dir()
for path in templates_dir.glob("*.json"):
if path.stem == name or path.stem.startswith(name):
path.unlink()
logger.info("Deleted template: %s", path.name)
return True
return False
def format_template_for_prompt(template: Dict) -> str:
"""Format a template as a system prompt section for session seeding."""
task_type = template.get("task_type", "general")
examples = template.get("examples", [])
if not examples:
return ""
lines = [
f"## Session Seed: {task_type.upper()} workflow pattern",
"",
f"Successful tool calls from a previous {task_type} session.",
f"Use these as a reference for establishing your feedback loop early.",
"",
]
for i, ex in enumerate(examples, 1):
tool = ex.get("tool_name", "unknown")
args = ex.get("arguments", "{}")
result = ex.get("result", "")
result_preview = result[:500].strip()
if len(result) > 500:
result_preview += "\n... [preview truncated]"
lines.append(f"### Example {i}: `{tool}`")
lines.append(f"Arguments: `{args}`")
lines.append(f"Result:\n```\n{result_preview}\n```")
lines.append("")
lines.append(
"---\n"
"Pattern: call tools, verify results, iterate. "
"Start with tool calls early to establish your feedback loop."
)
return "\n".join(lines)
def get_template_for_session(
task_hint: str = None,
task_type: str = None,
) -> Optional[str]:
"""Get a formatted template for injection into a new session's system prompt.
Resolution order:
1. Exact task_type match
2. task_hint keyword match
3. Most recently created template
"""
templates = list_templates()
if not templates:
return None
if task_type:
for t in templates:
if t["task_type"] == task_type:
full = load_template(t["name"])
if full:
return format_template_for_prompt(full)
if task_hint:
hint_lower = task_hint.lower()
for t in templates:
name_desc = (t["name"] + " " + t["description"]).lower()
if any(word in name_desc for word in hint_lower.split()):
full = load_template(t["name"])
if full:
return format_template_for_prompt(full)
best = max(templates, key=lambda t: t.get("created_at", 0))
full = load_template(best["name"])
if full:
return format_template_for_prompt(full)
return None