feat: session templates for code-first seeding (closes #329 )

Research finding (#327): code-heavy sessions improve over time, file-heavy sessions degrade. Key is deterministic feedback loops in first 30 turns. ## tools/session_templates.py (new, 299 lines) - extract_successful_tool_pairs() -- pulls successful (call, result) pairs from first 30 session messages. Filters errors, truncates large results. - classify_session() -- categorizes dominant tool type by counting calls: code (execute_code), file (read/write/search/patch), research (web_search), terminal, browser, general. - save_template() / load_template() -- JSON storage in ~/.hermes/session-templates/ Named as {type}_{name}.json with metadata. - list_templates() / delete_template() -- management functions. - get_template_for_session() -- resolution chain: exact type match -> keyword match against user message -> most recent template. - format_template_for_prompt() -- renders examples as system prompt section with tool name, arguments, truncated results. ## run_agent.py (2 changes) 1. Store _initial_user_message in run_conversation() for template matching. 2. In _build_system_prompt(), after context files injection, call get_template_for_session() with the user's first message as task_hint. Wrapped in try/except, never blocks prompt assembly. ## hermes_cli/main.py (new subcommand) hermes templates list [--type code|file|research|terminal|general] hermes templates extract <session-id> [--name X] [--type T] [--max-examples N] hermes templates delete <name> Workflow: 1. Complete a session with good tool usage patterns 2. hermes templates extract <session-id> --type code 3. New sessions auto-inject the matching template into system prompt 4. Agent starts with proven tool call examples from turn 1
2026-04-13 20:23:56 -04:00
3 changed files with 404 additions and 0 deletions
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -5306,6 +5306,96 @@ Examples:

    plugins_parser.set_defaults(func=cmd_plugins)

+    # =========================================================================
+    # templates command — session template management (issue #329)
+    # =========================================================================
+    templates_parser = subparsers.add_parser(
+        "templates",
+        help="Manage session templates for code-first seeding",
+        description="Extract, list, and manage session templates that pre-seed "
+                    "new sessions with proven tool call patterns.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""\
+Examples:
+  hermes templates list              List all templates
+  hermes templates list --type code  List code templates only
+  hermes templates extract SESSION   Extract template from session
+  hermes templates delete NAME       Delete a template
+""",
+    )
+    templates_subparsers = templates_parser.add_subparsers(dest="templates_action")
+
+    templates_list = templates_subparsers.add_parser(
+        "list", aliases=["ls"], help="List available templates")
+    templates_list.add_argument(
+        "--type", dest="task_type",
+        choices=["code", "file", "research", "terminal", "general"],
+        help="Filter by task type")
+
+    templates_extract = templates_subparsers.add_parser(
+        "extract", help="Extract template from a completed session")
+    templates_extract.add_argument(
+        "session_id", help="Session ID or title to extract from")
+    templates_extract.add_argument(
+        "--name", help="Template name (default: auto-generated)")
+    templates_extract.add_argument(
+        "--type", dest="task_type",
+        choices=["code", "file", "research", "terminal", "general"],
+        help="Override auto-detected task type")
+    templates_extract.add_argument(
+        "--max-examples", type=int, default=10,
+        help="Max tool call examples (default: 10)")
+
+    templates_delete = templates_subparsers.add_parser(
+        "delete", aliases=["rm"], help="Delete a template")
+    templates_delete.add_argument("name", help="Template name to delete")
+
+    def cmd_templates(args):
+        from tools.session_templates import (
+            list_templates, extract_successful_tool_pairs,
+            classify_session, save_template, delete_template,
+            get_templates_dir,
+        )
+        action = args.templates_action
+        if not action or action in ("list", "ls"):
+            templates = list_templates(task_type=getattr(args, 'task_type', None))
+            if not templates:
+                print(f"No templates found in {get_templates_dir()}")
+                return
+            for t in templates:
+                age = ""
+                if t.get("created_at"):
+                    import datetime
+                    age = f" ({datetime.datetime.fromtimestamp(t['created_at']).strftime('%Y-%m-%d')})"
+                print(f"  [{t['task_type']}] {t['name']} -- {t['example_count']} examples{age}")
+                if t.get("description"):
+                    print(f"           {t['description']}")
+        elif action == "extract":
+            from hermes_state import SessionDB
+            db = SessionDB()
+            session_id = args.session_id
+            resolved = db.get_session_by_title(session_id) or db.get_session(session_id)
+            if not resolved:
+                print(f"Session not found: {session_id}")
+                return
+            sid = resolved["id"]
+            messages = db.get_messages(sid)
+            pairs = extract_successful_tool_pairs(messages, max_pairs=args.max_examples)
+            if not pairs:
+                print(f"No successful tool calls found in first 30 turns of session {sid}")
+                return
+            task_type = args.task_type or classify_session(pairs)
+            name = args.name or f"{sid[:12]}"
+            path = save_template(name, task_type, pairs, source_session=sid)
+            print(f"Extracted {len(pairs)} examples as [{task_type}] template: {path.name}")
+        elif action in ("delete", "rm"):
+            if delete_template(args.name):
+                print(f"Deleted template: {args.name}")
+            else:
+                print(f"Template not found: {args.name}")
+
+    templates_parser.set_defaults(func=cmd_templates)
+
    # =========================================================================
    # Plugin CLI commands — dynamically registered by memory/general plugins.
    # Plugins provide a register_cli(subparser) function that builds their
--- a/run_agent.py
+++ b/run_agent.py
@@ -3250,6 +3250,19 @@ class AIAgent:
            if context_files_prompt:
                prompt_parts.append(context_files_prompt)

+        # Session template injection — pre-seed with proven tool call patterns
+        # from past successful sessions to establish feedback loops early.
+        if not self.skip_context_files:
+            try:
+                from tools.session_templates import get_template_for_session
+                _template_prompt = get_template_for_session(
+                    task_hint=getattr(self, "_initial_user_message", None),
+                )
+                if _template_prompt:
+                    prompt_parts.append(_template_prompt)
+            except Exception:
+                pass  # Templates are optional, never block prompt assembly
+
        from hermes_time import now as _hermes_now
        now = _hermes_now()
        timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
@@ -7754,6 +7767,8 @@ class AIAgent:
        self._stream_callback = stream_callback
        self._persist_user_message_idx = None
        self._persist_user_message_override = persist_user_message
+        # Store initial user message for session template matching (issue #329)
+        self._initial_user_message = user_message
        # Generate unique task_id if not provided to isolate VMs between concurrent tasks
        effective_task_id = task_id or str(uuid.uuid4())
        
--- a/tools/session_templates.py
+++ b/tools/session_templates.py
@@ -0,0 +1,299 @@
+"""Session templates — pre-seed new sessions with proven tool call patterns.
+
+After a session completes successfully, extract the first N successful tool
+calls + results and store as a reusable template. New sessions can inject
+a matching template into the system prompt to establish effective feedback
+loops from the first turn.
+
+Research finding (issue #327): code-heavy sessions (execute_code dominant
+in first 30 turns) improve over time. File-heavy sessions degrade. The key
+is deterministic feedback loops, not arbitrary context.
+
+Templates live in ~/.hermes/session-templates/ as JSON files.
+"""
+
+import json
+import logging
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from hermes_constants import get_hermes_home
+
+logger = logging.getLogger(__name__)
+
+# Tool categories for template classification
+_TOOL_CATEGORIES = {
+    "code": {"execute_code", "python", "ipython"},
+    "file": {"read_file", "write_file", "search_files", "patch"},
+    "research": {"web_search", "web_extract", "duckduckgo"},
+    "terminal": {"terminal", "shell", "bash"},
+    "browser": {"browser_navigate", "browser_click", "browser_snapshot"},
+}
+
+# Maximum tool result size to store in a template (chars)
+_MAX_RESULT_CHARS = 2000
+
+# Maximum examples per template
+_MAX_EXAMPLES = 10
+
+
+def get_templates_dir() -> Path:
+    """Return the session-templates directory, creating if needed."""
+    d = get_hermes_home() / "session-templates"
+    d.mkdir(parents=True, exist_ok=True)
+    return d
+
+
+def classify_tool(tool_name: str) -> str:
+    """Classify a tool name into a template category."""
+    tool_lower = (tool_name or "").lower()
+    for category, names in _TOOL_CATEGORIES.items():
+        if tool_lower in names:
+            return category
+    for category, names in _TOOL_CATEGORIES.items():
+        for name in names:
+            if name in tool_lower or tool_lower in name:
+                return category
+    return "general"
+
+
+def classify_session(tool_calls: List[Dict]) -> str:
+    """Determine the dominant category for a session from its tool calls."""
+    if not tool_calls:
+        return "general"
+    counts: Dict[str, int] = {}
+    for tc in tool_calls:
+        cat = classify_tool(tc.get("name", ""))
+        counts[cat] = counts.get(cat, 0) + 1
+    return max(counts, key=counts.get)
+
+
+def extract_successful_tool_pairs(
+    messages: List[Dict],
+    max_pairs: int = _MAX_EXAMPLES,
+    max_turns: int = 30,
+) -> List[Dict]:
+    """Extract successful (tool_call, tool_result) pairs from session messages.
+
+    Returns a list of dicts with keys: tool_name, arguments, result, turn_index.
+    Only processes the first max_turns messages to capture the "cold start" phase.
+    """
+    pairs = []
+    tool_call_msgs: Dict[str, Dict] = {}
+
+    for i, msg in enumerate(messages[:max_turns]):
+        role = msg.get("role", "")
+
+        if role == "assistant":
+            tool_calls = msg.get("tool_calls") or []
+            for tc in tool_calls:
+                tc_id = tc.get("id", "")
+                func = tc.get("function", {})
+                tool_call_msgs[tc_id] = {
+                    "name": func.get("name", ""),
+                    "arguments": func.get("arguments", ""),
+                    "turn_index": i,
+                }
+
+        elif role == "tool":
+            tc_id = msg.get("tool_call_id", "")
+            content = msg.get("content", "")
+
+            if tc_id in tool_call_msgs:
+                call_info = tool_call_msgs.pop(tc_id)
+                content_str = str(content)
+
+                is_error = (
+                    "error" in content_str.lower()[:100]
+                    or "BLOCKED" in content_str[:50]
+                    or "denied" in content_str.lower()[:100]
+                )
+
+                if not is_error and content_str.strip():
+                    if len(content_str) > _MAX_RESULT_CHARS:
+                        content_str = (
+                            content_str[:_MAX_RESULT_CHARS]
+                            + "\n... [truncated for template]"
+                        )
+
+                    pairs.append(
+                        {
+                            "tool_name": call_info["name"],
+                            "arguments": call_info["arguments"],
+                            "result": content_str,
+                            "turn_index": call_info["turn_index"],
+                        }
+                    )
+
+                    if len(pairs) >= max_pairs:
+                        return pairs
+
+    return pairs
+
+
+def save_template(
+    name: str,
+    task_type: str,
+    examples: List[Dict],
+    source_session: str = "",
+    description: str = "",
+) -> Path:
+    """Save a session template to disk. Returns path to saved file."""
+    templates_dir = get_templates_dir()
+    safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in name)
+    filename = f"{task_type}_{safe_name}.json"
+
+    template = {
+        "name": name,
+        "task_type": task_type,
+        "description": description,
+        "source_session": source_session,
+        "created_at": time.time(),
+        "example_count": len(examples),
+        "examples": examples,
+    }
+
+    path = templates_dir / filename
+    path.write_text(json.dumps(template, indent=2, ensure_ascii=False))
+    logger.info("Saved session template: %s (%d examples)", filename, len(examples))
+    return path
+
+
+def load_template(name: str) -> Optional[Dict]:
+    """Load a template by name or prefix match."""
+    templates_dir = get_templates_dir()
+
+    for suffix in [".json", ""]:
+        path = templates_dir / f"{name}{suffix}"
+        if path.exists():
+            try:
+                return json.loads(path.read_text())
+            except json.JSONDecodeError:
+                return None
+
+    for path in sorted(templates_dir.glob("*.json")):
+        if path.stem.startswith(name):
+            try:
+                return json.loads(path.read_text())
+            except json.JSONDecodeError:
+                continue
+
+    return None
+
+
+def list_templates(task_type: str = None) -> List[Dict]:
+    """List available templates, optionally filtered by task type."""
+    templates_dir = get_templates_dir()
+    results = []
+
+    for path in sorted(templates_dir.glob("*.json")):
+        try:
+            template = json.loads(path.read_text())
+        except (json.JSONDecodeError, OSError):
+            continue
+
+        if task_type and template.get("task_type") != task_type:
+            continue
+
+        results.append(
+            {
+                "name": template.get("name", path.stem),
+                "task_type": template.get("task_type", "unknown"),
+                "description": template.get("description", ""),
+                "example_count": template.get("example_count", 0),
+                "created_at": template.get("created_at", 0),
+                "file": str(path),
+            }
+        )
+
+    return results
+
+
+def delete_template(name: str) -> bool:
+    """Delete a template by name."""
+    templates_dir = get_templates_dir()
+    for path in templates_dir.glob("*.json"):
+        if path.stem == name or path.stem.startswith(name):
+            path.unlink()
+            logger.info("Deleted template: %s", path.name)
+            return True
+    return False
+
+
+def format_template_for_prompt(template: Dict) -> str:
+    """Format a template as a system prompt section for session seeding."""
+    task_type = template.get("task_type", "general")
+    examples = template.get("examples", [])
+
+    if not examples:
+        return ""
+
+    lines = [
+        f"## Session Seed: {task_type.upper()} workflow pattern",
+        "",
+        f"Successful tool calls from a previous {task_type} session.",
+        f"Use these as a reference for establishing your feedback loop early.",
+        "",
+    ]
+
+    for i, ex in enumerate(examples, 1):
+        tool = ex.get("tool_name", "unknown")
+        args = ex.get("arguments", "{}")
+        result = ex.get("result", "")
+
+        result_preview = result[:500].strip()
+        if len(result) > 500:
+            result_preview += "\n... [preview truncated]"
+
+        lines.append(f"### Example {i}: `{tool}`")
+        lines.append(f"Arguments: `{args}`")
+        lines.append(f"Result:\n```\n{result_preview}\n```")
+        lines.append("")
+
+    lines.append(
+        "---\n"
+        "Pattern: call tools, verify results, iterate. "
+        "Start with tool calls early to establish your feedback loop."
+    )
+
+    return "\n".join(lines)
+
+
+def get_template_for_session(
+    task_hint: str = None,
+    task_type: str = None,
+) -> Optional[str]:
+    """Get a formatted template for injection into a new session's system prompt.
+
+    Resolution order:
+    1. Exact task_type match
+    2. task_hint keyword match
+    3. Most recently created template
+    """
+    templates = list_templates()
+    if not templates:
+        return None
+
+    if task_type:
+        for t in templates:
+            if t["task_type"] == task_type:
+                full = load_template(t["name"])
+                if full:
+                    return format_template_for_prompt(full)
+
+    if task_hint:
+        hint_lower = task_hint.lower()
+        for t in templates:
+            name_desc = (t["name"] + " " + t["description"]).lower()
+            if any(word in name_desc for word in hint_lower.split()):
+                full = load_template(t["name"])
+                if full:
+                    return format_template_for_prompt(full)
+
+    best = max(templates, key=lambda t: t.get("created_at", 0))
+    full = load_template(best["name"])
+    if full:
+        return format_template_for_prompt(full)
+
+    return None