feat(templates): Session templates for code-first seeding (#329 )

Implement session templates based on research: code-heavy sessions improve over time. Features: - Task type classification (code/file/research/mixed) - Template extraction from successful sessions - Template storage in ~/.hermes/session-templates/ - Template injection into new sessions - CLI: list/create/delete Resolves #329
Merge pull request 'perf: lazy session creation — defer DB write until first message (#314 )' (#449 ) from whip/314-1776127532 into main
2026-04-14 01:47:03 +00:00 · 2026-04-14 01:08:13 +00:00 · 2026-04-13 20:52:06 -04:00
2 changed files with 275 additions and 24 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -1001,30 +1001,10 @@ class AIAgent:
        self._session_db = session_db
        self._parent_session_id = parent_session_id
        self._last_flushed_db_idx = 0  # tracks DB-write cursor to prevent duplicate writes
-        if self._session_db:
-            try:
-                self._session_db.create_session(
-                    session_id=self.session_id,
-                    source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
-                    model=self.model,
-                    model_config={
-                        "max_iterations": self.max_iterations,
-                        "reasoning_config": reasoning_config,
-                        "max_tokens": max_tokens,
-                    },
-                    user_id=None,
-                    parent_session_id=self._parent_session_id,
-                )
-            except Exception as e:
-                # Transient SQLite lock contention (e.g. CLI and gateway writing
-                # concurrently) must NOT permanently disable session_search for
-                # this agent.  Keep _session_db alive — subsequent message
-                # flushes and session_search calls will still work once the
-                # lock clears.  The session row may be missing from the index
-                # for this run, but that is recoverable (flushes upsert rows).
-                logger.warning(
-                    "Session DB create_session failed (session_search still available): %s", e
-                )
+        # Lazy session creation: defer until first message flush (#314).
+        # _flush_messages_to_session_db() calls ensure_session() which uses
+        # INSERT OR IGNORE — creating the row only when messages arrive.
+        # This eliminates 32% of sessions that are created but never used.
        
        # In-memory todo list for task planning (one per agent/session)
        from tools.todo_tool import TodoStore
--- a/tools/session_templates.py
+++ b/tools/session_templates.py
@@ -0,0 +1,271 @@
+"""
+Session templates for code-first seeding.
+
+Research finding: Code-heavy sessions (execute_code dominant in first 30 turns)
+improve over time. File-heavy sessions degrade. Key is deterministic feedback loops.
+"""
+
+import json
+import logging
+import sqlite3
+import time
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass, asdict
+from enum import Enum
+
+logger = logging.getLogger(__name__)
+
+TEMPLATE_DIR = Path.home() / ".hermes" / "session-templates"
+
+
+class TaskType(Enum):
+    CODE = "code"
+    FILE = "file"
+    RESEARCH = "research"
+    MIXED = "mixed"
+
+
+@dataclass
+class ToolExample:
+    tool_name: str
+    arguments: Dict[str, Any]
+    result: str
+    success: bool
+    
+    def to_dict(self):
+        return asdict(self)
+    
+    @classmethod
+    def from_dict(cls, data):
+        return cls(**data)
+
+
+@dataclass
+class SessionTemplate:
+    name: str
+    task_type: TaskType
+    examples: List[ToolExample]
+    created_at: float = 0.0
+    usage_count: int = 0
+    
+    def __post_init__(self):
+        if self.created_at == 0.0:
+            self.created_at = time.time()
+    
+    def to_dict(self):
+        data = asdict(self)
+        data['task_type'] = self.task_type.value
+        return data
+    
+    @classmethod
+    def from_dict(cls, data):
+        data['task_type'] = TaskType(data['task_type'])
+        data['examples'] = [ToolExample.from_dict(e) for e in data.get('examples', [])]
+        return cls(**data)
+
+
+class SessionTemplates:
+    def __init__(self, template_dir=None):
+        self.template_dir = template_dir or TEMPLATE_DIR
+        self.template_dir.mkdir(parents=True, exist_ok=True)
+        self.templates = {}
+        self._load()
+    
+    def _load(self):
+        for f in self.template_dir.glob("*.json"):
+            try:
+                with open(f) as fh:
+                    data = json.load(fh)
+                    t = SessionTemplate.from_dict(data)
+                    self.templates[t.name] = t
+            except Exception as e:
+                logger.warning(f"Failed to load {f}: {e}")
+    
+    def _save(self, template):
+        path = self.template_dir / f"{template.name}.json"
+        with open(path, 'w') as f:
+            json.dump(template.to_dict(), f, indent=2)
+    
+    def classify(self, tool_calls):
+        if not tool_calls:
+            return TaskType.MIXED
+        
+        code = {'execute_code', 'code_execution'}
+        file_ops = {'read_file', 'write_file', 'patch', 'search_files'}
+        research = {'web_search', 'web_fetch', 'browser_navigate'}
+        
+        names = [tc.get('tool_name', '') for tc in tool_calls]
+        total = len(names)
+        
+        code_ratio = sum(1 for n in names if n in code) / total
+        file_ratio = sum(1 for n in names if n in file_ops) / total
+        research_ratio = sum(1 for n in names if n in research) / total
+        
+        if code_ratio > 0.6:
+            return TaskType.CODE
+        elif file_ratio > 0.6:
+            return TaskType.FILE
+        elif research_ratio > 0.6:
+            return TaskType.RESEARCH
+        return TaskType.MIXED
+    
+    def extract(self, session_id, max_examples=10):
+        db_path = Path.home() / ".hermes" / "state.db"
+        if not db_path.exists():
+            return []
+        
+        try:
+            conn = sqlite3.connect(str(db_path))
+            conn.row_factory = sqlite3.Row
+            
+            rows = conn.execute("""
+                SELECT role, content, tool_calls
+                FROM messages WHERE session_id = ?
+                ORDER BY timestamp LIMIT 100
+            """, (session_id,)).fetchall()
+            conn.close()
+            
+            examples = []
+            for row in rows:
+                if len(examples) >= max_examples:
+                    break
+                
+                if row['role'] == 'assistant' and row['tool_calls']:
+                    try:
+                        tcs = json.loads(row['tool_calls'])
+                        for tc in tcs:
+                            if len(examples) >= max_examples:
+                                break
+                            name = tc.get('function', {}).get('name')
+                            if not name:
+                                continue
+                            try:
+                                args = json.loads(tc.get('function', {}).get('arguments', '{}'))
+                            except:
+                                args = {}
+                            examples.append(ToolExample(name, args, "", True))
+                    except:
+                        continue
+                elif row['role'] == 'tool' and examples and examples[-1].result == "":
+                    examples[-1].result = row['content'] or ""
+            
+            return examples
+        except Exception as e:
+            logger.error(f"Extract failed: {e}")
+            return []
+    
+    def create(self, session_id, name=None, task_type=None, max_examples=10):
+        examples = self.extract(session_id, max_examples)
+        if not examples:
+            return None
+        
+        if task_type is None:
+            task_type = self.classify([{'tool_name': e.tool_name} for e in examples])
+        
+        if name is None:
+            name = f"{task_type.value}_{session_id[:8]}_{int(time.time())}"
+        
+        template = SessionTemplate(name, task_type, examples)
+        self.templates[name] = template
+        self._save(template)
+        logger.info(f"Created template {name} with {len(examples)} examples")
+        return template
+    
+    def get(self, task_type):
+        matching = [t for t in self.templates.values() if t.task_type == task_type]
+        if not matching:
+            return None
+        matching.sort(key=lambda t: t.usage_count)
+        return matching[0]
+    
+    def inject(self, template, messages):
+        if not template.examples:
+            return messages
+        
+        injection = [{
+            "role": "system",
+            "content": f"Template: {template.name} ({template.task_type.value})\nSuccessful tool call examples:"
+        }]
+        
+        for i, ex in enumerate(template.examples):
+            injection.append({
+                "role": "assistant",
+                "content": None,
+                "tool_calls": [{
+                    "id": f"tpl_{i}",
+                    "type": "function",
+                    "function": {"name": ex.tool_name, "arguments": json.dumps(ex.arguments)}
+                }]
+            })
+            injection.append({
+                "role": "tool",
+                "tool_call_id": f"tpl_{i}",
+                "content": ex.result
+            })
+        
+        idx = 0
+        for i, msg in enumerate(messages):
+            if msg.get("role") != "system":
+                break
+            idx = i + 1
+        
+        for i, msg in enumerate(injection):
+            messages.insert(idx + i, msg)
+        
+        template.usage_count += 1
+        self._save(template)
+        return messages
+    
+    def list(self, task_type=None):
+        templates = list(self.templates.values())
+        if task_type:
+            templates = [t for t in templates if t.task_type == task_type]
+        templates.sort(key=lambda t: t.created_at, reverse=True)
+        return templates
+    
+    def delete(self, name):
+        if name not in self.templates:
+            return False
+        del self.templates[name]
+        path = self.template_dir / f"{name}.json"
+        if path.exists():
+            path.unlink()
+        return True
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    sub = parser.add_subparsers(dest="cmd")
+    
+    list_p = sub.add_parser("list")
+    list_p.add_argument("--type", choices=["code", "file", "research", "mixed"])
+    
+    create_p = sub.add_parser("create")
+    create_p.add_argument("session_id")
+    create_p.add_argument("--name")
+    create_p.add_argument("--type", choices=["code", "file", "research", "mixed"])
+    create_p.add_argument("--max", type=int, default=10)
+    
+    delete_p = sub.add_parser("delete")
+    delete_p.add_argument("name")
+    
+    args = parser.parse_args()
+    ts = SessionTemplates()
+    
+    if args.cmd == "list":
+        tt = TaskType(args.type) if args.type else None
+        for t in ts.list(tt):
+            print(f"{t.name}: {t.task_type.value} ({len(t.examples)} examples, used {t.usage_count}x)")
+    elif args.cmd == "create":
+        tt = TaskType(args.type) if args.type else None
+        t = ts.create(args.session_id, args.name, tt, args.max)
+        if t:
+            print(f"Created: {t.name} ({len(t.examples)} examples)")
+        else:
+            print("Failed")
+    elif args.cmd == "delete":
+        print("Deleted" if ts.delete(args.name) else "Not found")
+    else:
+        parser.print_help()