Compare commits

...

3 Commits

Author SHA1 Message Date
52763d0d58 feat(templates): Session templates for code-first seeding (#329)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 51s
Implement session templates based on research: code-heavy sessions improve over time.

Features:
- Task type classification (code/file/research/mixed)
- Template extraction from successful sessions
- Template storage in ~/.hermes/session-templates/
- Template injection into new sessions
- CLI: list/create/delete

Resolves #329
2026-04-14 01:47:03 +00:00
954fd992eb Merge pull request 'perf: lazy session creation — defer DB write until first message (#314)' (#449) from whip/314-1776127532 into main
Some checks failed
Forge CI / smoke-and-build (push) Failing after 55s
Forge CI / smoke-and-build (pull_request) Failing after 1m12s
perf: lazy session creation (#314)

Closes #314.
2026-04-14 01:08:13 +00:00
Metatron
f35f56e397 perf: lazy session creation — defer DB write until first message (closes #314)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 56s
Remove eager create_session() call from AIAgent.__init__(). Sessions
are now created lazily on first _flush_messages_to_session_db() call
via ensure_session() which uses INSERT OR IGNORE.

Impact: eliminates 32.4% of sessions (3,564 of 10,985) that were
created at agent init but never received any messages.

The existing ensure_session() fallback in _flush_messages_to_session_db()
already handles this pattern — it was originally designed for recovery
after transient SQLite lock failures. Now it's the primary creation path.

Compression-initiated sessions still use create_session() directly
(line ~5995) since they have messages to write immediately.
2026-04-13 20:52:06 -04:00
2 changed files with 275 additions and 24 deletions

View File

@@ -1001,30 +1001,10 @@ class AIAgent:
self._session_db = session_db
self._parent_session_id = parent_session_id
self._last_flushed_db_idx = 0 # tracks DB-write cursor to prevent duplicate writes
if self._session_db:
try:
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
model=self.model,
model_config={
"max_iterations": self.max_iterations,
"reasoning_config": reasoning_config,
"max_tokens": max_tokens,
},
user_id=None,
parent_session_id=self._parent_session_id,
)
except Exception as e:
# Transient SQLite lock contention (e.g. CLI and gateway writing
# concurrently) must NOT permanently disable session_search for
# this agent. Keep _session_db alive — subsequent message
# flushes and session_search calls will still work once the
# lock clears. The session row may be missing from the index
# for this run, but that is recoverable (flushes upsert rows).
logger.warning(
"Session DB create_session failed (session_search still available): %s", e
)
# Lazy session creation: defer until first message flush (#314).
# _flush_messages_to_session_db() calls ensure_session() which uses
# INSERT OR IGNORE — creating the row only when messages arrive.
# This eliminates 32% of sessions that are created but never used.
# In-memory todo list for task planning (one per agent/session)
from tools.todo_tool import TodoStore

271
tools/session_templates.py Normal file
View File

@@ -0,0 +1,271 @@
"""
Session templates for code-first seeding.
Research finding: Code-heavy sessions (execute_code dominant in first 30 turns)
improve over time. File-heavy sessions degrade. Key is deterministic feedback loops.
"""
import json
import logging
import sqlite3
import time
from pathlib import Path
from typing import Dict, List, Optional, Any
from dataclasses import dataclass, asdict
from enum import Enum
logger = logging.getLogger(__name__)
TEMPLATE_DIR = Path.home() / ".hermes" / "session-templates"
class TaskType(Enum):
CODE = "code"
FILE = "file"
RESEARCH = "research"
MIXED = "mixed"
@dataclass
class ToolExample:
tool_name: str
arguments: Dict[str, Any]
result: str
success: bool
def to_dict(self):
return asdict(self)
@classmethod
def from_dict(cls, data):
return cls(**data)
@dataclass
class SessionTemplate:
name: str
task_type: TaskType
examples: List[ToolExample]
created_at: float = 0.0
usage_count: int = 0
def __post_init__(self):
if self.created_at == 0.0:
self.created_at = time.time()
def to_dict(self):
data = asdict(self)
data['task_type'] = self.task_type.value
return data
@classmethod
def from_dict(cls, data):
data['task_type'] = TaskType(data['task_type'])
data['examples'] = [ToolExample.from_dict(e) for e in data.get('examples', [])]
return cls(**data)
class SessionTemplates:
def __init__(self, template_dir=None):
self.template_dir = template_dir or TEMPLATE_DIR
self.template_dir.mkdir(parents=True, exist_ok=True)
self.templates = {}
self._load()
def _load(self):
for f in self.template_dir.glob("*.json"):
try:
with open(f) as fh:
data = json.load(fh)
t = SessionTemplate.from_dict(data)
self.templates[t.name] = t
except Exception as e:
logger.warning(f"Failed to load {f}: {e}")
def _save(self, template):
path = self.template_dir / f"{template.name}.json"
with open(path, 'w') as f:
json.dump(template.to_dict(), f, indent=2)
def classify(self, tool_calls):
if not tool_calls:
return TaskType.MIXED
code = {'execute_code', 'code_execution'}
file_ops = {'read_file', 'write_file', 'patch', 'search_files'}
research = {'web_search', 'web_fetch', 'browser_navigate'}
names = [tc.get('tool_name', '') for tc in tool_calls]
total = len(names)
code_ratio = sum(1 for n in names if n in code) / total
file_ratio = sum(1 for n in names if n in file_ops) / total
research_ratio = sum(1 for n in names if n in research) / total
if code_ratio > 0.6:
return TaskType.CODE
elif file_ratio > 0.6:
return TaskType.FILE
elif research_ratio > 0.6:
return TaskType.RESEARCH
return TaskType.MIXED
def extract(self, session_id, max_examples=10):
db_path = Path.home() / ".hermes" / "state.db"
if not db_path.exists():
return []
try:
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
rows = conn.execute("""
SELECT role, content, tool_calls
FROM messages WHERE session_id = ?
ORDER BY timestamp LIMIT 100
""", (session_id,)).fetchall()
conn.close()
examples = []
for row in rows:
if len(examples) >= max_examples:
break
if row['role'] == 'assistant' and row['tool_calls']:
try:
tcs = json.loads(row['tool_calls'])
for tc in tcs:
if len(examples) >= max_examples:
break
name = tc.get('function', {}).get('name')
if not name:
continue
try:
args = json.loads(tc.get('function', {}).get('arguments', '{}'))
except:
args = {}
examples.append(ToolExample(name, args, "", True))
except:
continue
elif row['role'] == 'tool' and examples and examples[-1].result == "":
examples[-1].result = row['content'] or ""
return examples
except Exception as e:
logger.error(f"Extract failed: {e}")
return []
def create(self, session_id, name=None, task_type=None, max_examples=10):
examples = self.extract(session_id, max_examples)
if not examples:
return None
if task_type is None:
task_type = self.classify([{'tool_name': e.tool_name} for e in examples])
if name is None:
name = f"{task_type.value}_{session_id[:8]}_{int(time.time())}"
template = SessionTemplate(name, task_type, examples)
self.templates[name] = template
self._save(template)
logger.info(f"Created template {name} with {len(examples)} examples")
return template
def get(self, task_type):
matching = [t for t in self.templates.values() if t.task_type == task_type]
if not matching:
return None
matching.sort(key=lambda t: t.usage_count)
return matching[0]
def inject(self, template, messages):
if not template.examples:
return messages
injection = [{
"role": "system",
"content": f"Template: {template.name} ({template.task_type.value})\nSuccessful tool call examples:"
}]
for i, ex in enumerate(template.examples):
injection.append({
"role": "assistant",
"content": None,
"tool_calls": [{
"id": f"tpl_{i}",
"type": "function",
"function": {"name": ex.tool_name, "arguments": json.dumps(ex.arguments)}
}]
})
injection.append({
"role": "tool",
"tool_call_id": f"tpl_{i}",
"content": ex.result
})
idx = 0
for i, msg in enumerate(messages):
if msg.get("role") != "system":
break
idx = i + 1
for i, msg in enumerate(injection):
messages.insert(idx + i, msg)
template.usage_count += 1
self._save(template)
return messages
def list(self, task_type=None):
templates = list(self.templates.values())
if task_type:
templates = [t for t in templates if t.task_type == task_type]
templates.sort(key=lambda t: t.created_at, reverse=True)
return templates
def delete(self, name):
if name not in self.templates:
return False
del self.templates[name]
path = self.template_dir / f"{name}.json"
if path.exists():
path.unlink()
return True
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
sub = parser.add_subparsers(dest="cmd")
list_p = sub.add_parser("list")
list_p.add_argument("--type", choices=["code", "file", "research", "mixed"])
create_p = sub.add_parser("create")
create_p.add_argument("session_id")
create_p.add_argument("--name")
create_p.add_argument("--type", choices=["code", "file", "research", "mixed"])
create_p.add_argument("--max", type=int, default=10)
delete_p = sub.add_parser("delete")
delete_p.add_argument("name")
args = parser.parse_args()
ts = SessionTemplates()
if args.cmd == "list":
tt = TaskType(args.type) if args.type else None
for t in ts.list(tt):
print(f"{t.name}: {t.task_type.value} ({len(t.examples)} examples, used {t.usage_count}x)")
elif args.cmd == "create":
tt = TaskType(args.type) if args.type else None
t = ts.create(args.session_id, args.name, tt, args.max)
if t:
print(f"Created: {t.name} ({len(t.examples)} examples)")
else:
print("Failed")
elif args.cmd == "delete":
print("Deleted" if ts.delete(args.name) else "Not found")
else:
parser.print_help()