Compare commits

...

1 Commits

Author SHA1 Message Date
52763d0d58 feat(templates): Session templates for code-first seeding (#329)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 51s
Implement session templates based on research: code-heavy sessions improve over time.

Features:
- Task type classification (code/file/research/mixed)
- Template extraction from successful sessions
- Template storage in ~/.hermes/session-templates/
- Template injection into new sessions
- CLI: list/create/delete

Resolves #329
2026-04-14 01:47:03 +00:00

271
tools/session_templates.py Normal file
View File

@@ -0,0 +1,271 @@
"""
Session templates for code-first seeding.
Research finding: Code-heavy sessions (execute_code dominant in first 30 turns)
improve over time. File-heavy sessions degrade. Key is deterministic feedback loops.
"""
import json
import logging
import sqlite3
import time
from pathlib import Path
from typing import Dict, List, Optional, Any
from dataclasses import dataclass, asdict
from enum import Enum
logger = logging.getLogger(__name__)
TEMPLATE_DIR = Path.home() / ".hermes" / "session-templates"
class TaskType(Enum):
CODE = "code"
FILE = "file"
RESEARCH = "research"
MIXED = "mixed"
@dataclass
class ToolExample:
tool_name: str
arguments: Dict[str, Any]
result: str
success: bool
def to_dict(self):
return asdict(self)
@classmethod
def from_dict(cls, data):
return cls(**data)
@dataclass
class SessionTemplate:
name: str
task_type: TaskType
examples: List[ToolExample]
created_at: float = 0.0
usage_count: int = 0
def __post_init__(self):
if self.created_at == 0.0:
self.created_at = time.time()
def to_dict(self):
data = asdict(self)
data['task_type'] = self.task_type.value
return data
@classmethod
def from_dict(cls, data):
data['task_type'] = TaskType(data['task_type'])
data['examples'] = [ToolExample.from_dict(e) for e in data.get('examples', [])]
return cls(**data)
class SessionTemplates:
def __init__(self, template_dir=None):
self.template_dir = template_dir or TEMPLATE_DIR
self.template_dir.mkdir(parents=True, exist_ok=True)
self.templates = {}
self._load()
def _load(self):
for f in self.template_dir.glob("*.json"):
try:
with open(f) as fh:
data = json.load(fh)
t = SessionTemplate.from_dict(data)
self.templates[t.name] = t
except Exception as e:
logger.warning(f"Failed to load {f}: {e}")
def _save(self, template):
path = self.template_dir / f"{template.name}.json"
with open(path, 'w') as f:
json.dump(template.to_dict(), f, indent=2)
def classify(self, tool_calls):
if not tool_calls:
return TaskType.MIXED
code = {'execute_code', 'code_execution'}
file_ops = {'read_file', 'write_file', 'patch', 'search_files'}
research = {'web_search', 'web_fetch', 'browser_navigate'}
names = [tc.get('tool_name', '') for tc in tool_calls]
total = len(names)
code_ratio = sum(1 for n in names if n in code) / total
file_ratio = sum(1 for n in names if n in file_ops) / total
research_ratio = sum(1 for n in names if n in research) / total
if code_ratio > 0.6:
return TaskType.CODE
elif file_ratio > 0.6:
return TaskType.FILE
elif research_ratio > 0.6:
return TaskType.RESEARCH
return TaskType.MIXED
def extract(self, session_id, max_examples=10):
db_path = Path.home() / ".hermes" / "state.db"
if not db_path.exists():
return []
try:
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
rows = conn.execute("""
SELECT role, content, tool_calls
FROM messages WHERE session_id = ?
ORDER BY timestamp LIMIT 100
""", (session_id,)).fetchall()
conn.close()
examples = []
for row in rows:
if len(examples) >= max_examples:
break
if row['role'] == 'assistant' and row['tool_calls']:
try:
tcs = json.loads(row['tool_calls'])
for tc in tcs:
if len(examples) >= max_examples:
break
name = tc.get('function', {}).get('name')
if not name:
continue
try:
args = json.loads(tc.get('function', {}).get('arguments', '{}'))
except:
args = {}
examples.append(ToolExample(name, args, "", True))
except:
continue
elif row['role'] == 'tool' and examples and examples[-1].result == "":
examples[-1].result = row['content'] or ""
return examples
except Exception as e:
logger.error(f"Extract failed: {e}")
return []
def create(self, session_id, name=None, task_type=None, max_examples=10):
examples = self.extract(session_id, max_examples)
if not examples:
return None
if task_type is None:
task_type = self.classify([{'tool_name': e.tool_name} for e in examples])
if name is None:
name = f"{task_type.value}_{session_id[:8]}_{int(time.time())}"
template = SessionTemplate(name, task_type, examples)
self.templates[name] = template
self._save(template)
logger.info(f"Created template {name} with {len(examples)} examples")
return template
def get(self, task_type):
matching = [t for t in self.templates.values() if t.task_type == task_type]
if not matching:
return None
matching.sort(key=lambda t: t.usage_count)
return matching[0]
def inject(self, template, messages):
if not template.examples:
return messages
injection = [{
"role": "system",
"content": f"Template: {template.name} ({template.task_type.value})\nSuccessful tool call examples:"
}]
for i, ex in enumerate(template.examples):
injection.append({
"role": "assistant",
"content": None,
"tool_calls": [{
"id": f"tpl_{i}",
"type": "function",
"function": {"name": ex.tool_name, "arguments": json.dumps(ex.arguments)}
}]
})
injection.append({
"role": "tool",
"tool_call_id": f"tpl_{i}",
"content": ex.result
})
idx = 0
for i, msg in enumerate(messages):
if msg.get("role") != "system":
break
idx = i + 1
for i, msg in enumerate(injection):
messages.insert(idx + i, msg)
template.usage_count += 1
self._save(template)
return messages
def list(self, task_type=None):
templates = list(self.templates.values())
if task_type:
templates = [t for t in templates if t.task_type == task_type]
templates.sort(key=lambda t: t.created_at, reverse=True)
return templates
def delete(self, name):
if name not in self.templates:
return False
del self.templates[name]
path = self.template_dir / f"{name}.json"
if path.exists():
path.unlink()
return True
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
sub = parser.add_subparsers(dest="cmd")
list_p = sub.add_parser("list")
list_p.add_argument("--type", choices=["code", "file", "research", "mixed"])
create_p = sub.add_parser("create")
create_p.add_argument("session_id")
create_p.add_argument("--name")
create_p.add_argument("--type", choices=["code", "file", "research", "mixed"])
create_p.add_argument("--max", type=int, default=10)
delete_p = sub.add_parser("delete")
delete_p.add_argument("name")
args = parser.parse_args()
ts = SessionTemplates()
if args.cmd == "list":
tt = TaskType(args.type) if args.type else None
for t in ts.list(tt):
print(f"{t.name}: {t.task_type.value} ({len(t.examples)} examples, used {t.usage_count}x)")
elif args.cmd == "create":
tt = TaskType(args.type) if args.type else None
t = ts.create(args.session_id, args.name, tt, args.max)
if t:
print(f"Created: {t.name} ({len(t.examples)} examples)")
else:
print("Failed")
elif args.cmd == "delete":
print("Deleted" if ts.delete(args.name) else "Not found")
else:
parser.print_help()