Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
a90162bafc fix: add _classify_runtime with complete cloud model prefix list (#628)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 36s
`_classify_runtime` was missing from the codebase, and the existing
`_PROVIDER_PREFIXES` set lacked several cloud vendor prefixes that users
commonly encounter via OpenRouter-style model IDs.

Changes:
- Add `_CLOUD_MODEL_PREFIXES` frozenset covering all known cloud vendors,
  including the previously missing: deepseek, cohere, mistral/mistralai,
  meta-llama, databricks, together, togetherai
- Add `_LOCAL_PROVIDER_NAMES` and `_CLOUD_PROVIDER_NAMES` frozensets for
  provider-name-based classification
- Implement `_classify_runtime(model, base_url, provider)` that classifies
  a runtime as "cloud" or "local" using URL → provider → model-prefix priority
- Extend `_PROVIDER_PREFIXES` with the same missing cloud vendors so that
  `_strip_provider_prefix` also handles cohere:, mistralai:, etc.
- Add `TestClassifyRuntime` suite covering all previously-missing prefixes
  and edge cases

Fixes #628

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-14 11:57:36 -04:00
5 changed files with 178 additions and 1021 deletions

View File

@@ -32,6 +32,27 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
"github-models", "kimi", "moonshot", "claude", "deep-seek",
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
# Additional cloud vendor prefixes (fixes #628)
"cohere", "mistralai", "mistral", "meta-llama", "databricks", "together",
"togetherai", "together-ai", "nousresearch", "moonshotai", "fireworks",
"perplexity", "ai21", "groq", "cerebras", "nebius",
})
# Vendor prefixes that appear in cloud model IDs (e.g. "openai/gpt-4").
# Used by _classify_runtime to detect cloud runtimes from the model name
# when no base URL is available.
_CLOUD_MODEL_PREFIXES: frozenset[str] = frozenset({
# Providers present before #628
"nous", "nousresearch", "openrouter", "anthropic", "openai",
"zai", "kimi", "moonshotai", "gemini", "google", "minimax",
# Providers added by #628 fix
"deepseek", "cohere", "mistralai", "mistral", "meta-llama",
"databricks", "together", "togetherai",
# Other common cloud vendors
"microsoft", "amazon", "huggingface", "fireworks",
"perplexity", "ai21", "groq", "cerebras", "nebius",
"qwen", "alibaba", "aliyuncs", "dashscope",
"github", "copilot",
})
@@ -253,6 +274,67 @@ def is_local_endpoint(base_url: str) -> bool:
return False
# Provider names that are definitively local (never cloud).
_LOCAL_PROVIDER_NAMES: frozenset[str] = frozenset({
"ollama", "custom", "local",
})
# Provider names that are definitively cloud (not local).
_CLOUD_PROVIDER_NAMES: frozenset[str] = frozenset({
"nous", "openrouter", "anthropic", "openai", "openai-codex",
"zai", "kimi-coding", "gemini", "minimax", "minimax-cn",
"deepseek", "cohere", "mistral", "meta-llama", "databricks", "together",
"huggingface", "copilot", "copilot-acp", "ai-gateway", "kilocode",
"alibaba", "opencode-zen", "opencode-go",
})
def _classify_runtime(
model: str = "",
base_url: str = "",
provider: str = "",
) -> str:
"""Classify a model/endpoint runtime as 'cloud' or 'local'.
Checks in priority order:
1. ``base_url`` — localhost / RFC-1918 → ``"local"``; known external URL → ``"cloud"``
2. ``provider`` name — matches a known local or cloud provider set
3. Model vendor prefix — e.g. ``"openai/gpt-4"`` → ``"cloud"``
4. Default — ``"cloud"`` when the runtime cannot be determined to be local
The cloud-prefix list covers both the providers present before issue #628
(nous, openrouter, anthropic, openai, zai, kimi, gemini, minimax) and the
previously missing ones (deepseek, cohere, mistral, meta-llama, databricks,
together).
Returns ``"cloud"`` or ``"local"``.
"""
# 1. URL-based check — most reliable signal
if base_url:
if is_local_endpoint(base_url):
return "local"
return "cloud"
# 2. Provider name check
provider_norm = (provider or "").strip().lower()
if provider_norm in _LOCAL_PROVIDER_NAMES:
return "local"
if provider_norm in _CLOUD_PROVIDER_NAMES:
return "cloud"
# 3. Model vendor prefix check (e.g. "openai/gpt-4" → vendor "openai")
model_norm = (model or "").strip().lower()
if "/" in model_norm:
vendor = model_norm.split("/")[0].strip()
if vendor in _CLOUD_MODEL_PREFIXES:
return "cloud"
# An unknown vendor with a slash is still likely a cloud model
return "cloud"
# 4. Default — without a URL we cannot confirm local, so assume cloud
return "cloud"
def detect_local_server_type(base_url: str) -> Optional[str]:
"""Detect which local server is running at base_url by probing known endpoints.

View File

@@ -1,206 +0,0 @@
# Session Templates for Code-First Seeding
## Overview
Session templates pre-seed new sessions with successful tool call patterns from previous sessions. Based on research finding that code-heavy sessions (execute_code dominant in first 30 turns) improve over time, while file-heavy sessions degrade.
## Key Concepts
### Task Type Classification
Sessions are classified into four types based on tool call patterns:
- **CODE**: execute_code dominant (>60% of tool calls)
- **FILE**: file operations dominant (read_file, write_file, patch, search_files)
- **RESEARCH**: research tools dominant (web_search, web_fetch, browser_navigate)
- **MIXED**: no dominant type (<60% for any category)
### Template Structure
Each template contains:
- **Name**: Unique identifier
- **Task Type**: CODE, FILE, RESEARCH, or MIXED
- **Examples**: List of successful tool calls with arguments and results
- **Description**: Human-readable description
- **Tags**: Optional categorization tags
- **Usage Count**: How many times the template has been used
- **Source Session ID**: Session from which template was extracted
## Usage
### CLI Interface
```bash
# List all templates
python -m tools.session_templates list
# List only code templates
python -m tools.session_templates list --type code
# List templates with specific tags
python -m tools.session_templates list --tags "python,testing"
# Create template from session
python -m tools.session_templates create 20260413_123456_abc123 --name my-code-template
# Create template with description and tags
python -m tools.session_templates create 20260413_123456_abc123 \
--name my-template \
--type code \
--description "Python development template" \
--tags "python,development"
# Delete template
python -m tools.session_templates delete my-template
# Show statistics
python -m tools.session_templates stats
```
### Programmatic Usage
```python
from tools.session_templates import SessionTemplates, TaskType
# Create template manager
templates = SessionTemplates()
# Get template for code tasks
template = templates.get_template(TaskType.CODE)
# Inject template into messages
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Help me write some code"}
]
updated_messages = templates.inject_into_messages(template, messages)
# Create template from session
template = templates.create_template(
session_id="20260413_123456_abc123",
name="my-template",
task_type=TaskType.CODE,
max_examples=10,
description="My template",
tags=["python", "development"]
)
# List templates
code_templates = templates.list_templates(task_type=TaskType.CODE)
all_templates = templates.list_templates()
# Get statistics
stats = templates.get_template_stats()
print(f"Total templates: {stats['total']}")
print(f"Total examples: {stats['total_examples']}")
```
## Implementation Details
### Template Extraction
1. Query SQLite database for session messages
2. Extract tool calls from assistant messages
3. Match tool calls with their results from tool responses
4. Create ToolCallExample objects with arguments and results
### Template Injection
1. Create system message about template
2. Add assistant messages with tool calls from template
3. Add tool responses with results
4. Insert after existing system messages
5. Update template usage count
### Storage
Templates are stored as JSON files in `~/.hermes/session-templates/`:
```json
{
"name": "code_python_20260413",
"task_type": "code",
"examples": [
{
"tool_name": "execute_code",
"arguments": {"code": "print('hello world')"},
"result": "hello world",
"success": true,
"turn_number": 0
}
],
"description": "Python development template",
"created_at": 1712345678.0,
"usage_count": 5,
"source_session_id": "20260413_123456_abc123",
"tags": ["python", "development"]
}
```
## Research Background
### Finding
Code-heavy sessions (execute_code dominant in first 30 turns) improve over time. File-heavy sessions (search/read/patch) degrade. The key is deterministic feedback loops, not arbitrary context.
### Hypothesis
Pre-seeding new sessions with successful tool call patterns establishes feedback loops early, leading to:
- Lower error rate in first 30 turns
- Faster time to first success
- Fewer total errors
- Better tool call diversity
### Experiment Design
A/B test: cold start vs code-seeded start on same task. Measure:
- Error rate in first 30 turns
- Time to first success
- Total errors
- Tool call diversity
## Best Practices
### Template Creation
1. **Extract from successful sessions**: Only use sessions with high success rates
2. **Limit examples**: 5-10 examples per template is optimal
3. **Use descriptive names**: Include task type and context in name
4. **Add tags**: Use tags for categorization and filtering
5. **Update regularly**: Create new templates from recent successful sessions
### Template Usage
1. **Match task type**: Use templates that match the expected task type
2. **Don't over-inject**: One template per session is sufficient
3. **Monitor effectiveness**: Track whether templates improve performance
4. **Clean up old templates**: Remove templates that are no longer effective
## Troubleshooting
### No Templates Found
- Check if `~/.hermes/session-templates/` directory exists
- Verify session database exists at `~/.hermes/state.db`
- Check if session has successful tool calls
### Template Injection Not Working
- Verify template has examples
- Check if messages list is not empty
- Ensure template is properly loaded
### Extraction Fails
- Verify session ID exists in database
- Check if session has tool calls
- Ensure database is not corrupted
## Future Enhancements
1. **Automatic template creation**: Create templates automatically from successful sessions
2. **Template optimization**: Use ML to optimize template selection
3. **Cross-session learning**: Share templates across users (with privacy controls)
4. **Template versioning**: Track template effectiveness over time
5. **Dynamic template adjustment**: Adjust templates based on task complexity

View File

@@ -7,7 +7,7 @@ terminal access.
"""
import pytest
from agent.model_metadata import is_local_endpoint
from agent.model_metadata import is_local_endpoint, _classify_runtime
class TestIsLocalEndpoint:
@@ -71,3 +71,98 @@ class TestCronDisabledToolsetsLogic:
def test_empty_url_disables_terminal(self):
disabled = self._build_disabled("")
assert "terminal" in disabled
class TestClassifyRuntime:
"""Verify _classify_runtime correctly classifies runtimes as cloud or local.
Covers the bug fixed in #628: missing cloud model prefixes for deepseek,
cohere, mistral, meta-llama, databricks, and together.
"""
# ── URL-based classification ──────────────────────────────────────────
def test_localhost_url_is_local(self):
assert _classify_runtime(base_url="http://localhost:11434/v1") == "local"
def test_127_loopback_is_local(self):
assert _classify_runtime(base_url="http://127.0.0.1:8080/v1") == "local"
def test_rfc1918_is_local(self):
assert _classify_runtime(base_url="http://192.168.1.10:11434/v1") == "local"
def test_openrouter_url_is_cloud(self):
assert _classify_runtime(base_url="https://openrouter.ai/api/v1") == "cloud"
def test_anthropic_url_is_cloud(self):
assert _classify_runtime(base_url="https://api.anthropic.com") == "cloud"
def test_deepseek_url_is_cloud(self):
assert _classify_runtime(base_url="https://api.deepseek.com/v1") == "cloud"
# ── Provider-name classification ──────────────────────────────────────
def test_ollama_provider_is_local(self):
assert _classify_runtime(provider="ollama") == "local"
def test_custom_provider_is_local(self):
assert _classify_runtime(provider="custom") == "local"
def test_openrouter_provider_is_cloud(self):
assert _classify_runtime(provider="openrouter") == "cloud"
def test_nous_provider_is_cloud(self):
assert _classify_runtime(provider="nous") == "cloud"
def test_anthropic_provider_is_cloud(self):
assert _classify_runtime(provider="anthropic") == "cloud"
# ── Previously-missing cloud prefixes (issue #628) ────────────────────
def test_deepseek_model_prefix_is_cloud(self):
assert _classify_runtime(model="deepseek/deepseek-v2") == "cloud"
def test_cohere_model_prefix_is_cloud(self):
assert _classify_runtime(model="cohere/command-r-plus") == "cloud"
def test_mistralai_model_prefix_is_cloud(self):
assert _classify_runtime(model="mistralai/mistral-large-2407") == "cloud"
def test_meta_llama_model_prefix_is_cloud(self):
assert _classify_runtime(model="meta-llama/llama-3.1-70b-instruct") == "cloud"
def test_databricks_model_prefix_is_cloud(self):
assert _classify_runtime(model="databricks/dbrx-instruct") == "cloud"
def test_together_model_prefix_is_cloud(self):
assert _classify_runtime(model="together/together-api-model") == "cloud"
# ── Providers that were already detected before #628 ─────────────────
def test_openai_model_prefix_is_cloud(self):
assert _classify_runtime(model="openai/gpt-4.1") == "cloud"
def test_anthropic_model_prefix_is_cloud(self):
assert _classify_runtime(model="anthropic/claude-opus-4.6") == "cloud"
def test_google_model_prefix_is_cloud(self):
assert _classify_runtime(model="google/gemini-3-pro") == "cloud"
def test_minimax_model_prefix_is_cloud(self):
assert _classify_runtime(model="minimax/minimax-m2.7") == "cloud"
# ── Fallback / edge cases ────────────────────────────────────────────
def test_no_args_defaults_to_cloud(self):
assert _classify_runtime() == "cloud"
def test_empty_strings_default_to_cloud(self):
assert _classify_runtime(model="", base_url="", provider="") == "cloud"
def test_url_takes_priority_over_provider(self):
# Explicit local URL wins even if provider looks like cloud
assert _classify_runtime(model="openai/gpt-4", base_url="http://localhost:11434/v1", provider="openai") == "local"
def test_bare_model_name_without_slash_defaults_to_cloud(self):
# No slash → can't infer vendor → cloud (safe default)
assert _classify_runtime(model="gpt-4o") == "cloud"

View File

@@ -1,343 +0,0 @@
"""
Tests for session templates.
"""
import json
import pytest
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
from tools.session_templates import (
SessionTemplates,
SessionTemplate,
ToolCallExample,
TaskType
)
class TestTaskTypeClassification:
"""Test task type classification."""
def test_code_heavy(self):
"""Test classification of code-heavy sessions."""
templates = SessionTemplates()
tool_calls = [
{"tool_name": "execute_code"},
{"tool_name": "execute_code"},
{"tool_name": "execute_code"},
{"tool_name": "read_file"},
]
result = templates.classify_task_type(tool_calls)
assert result == TaskType.CODE
def test_file_heavy(self):
"""Test classification of file-heavy sessions."""
templates = SessionTemplates()
tool_calls = [
{"tool_name": "read_file"},
{"tool_name": "write_file"},
{"tool_name": "patch"},
{"tool_name": "search_files"},
]
result = templates.classify_task_type(tool_calls)
assert result == TaskType.FILE
def test_research_heavy(self):
"""Test classification of research-heavy sessions."""
templates = SessionTemplates()
tool_calls = [
{"tool_name": "web_search"},
{"tool_name": "web_fetch"},
{"tool_name": "browser_navigate"},
]
result = templates.classify_task_type(tool_calls)
assert result == TaskType.RESEARCH
def test_mixed(self):
"""Test classification of mixed sessions."""
templates = SessionTemplates()
tool_calls = [
{"tool_name": "execute_code"},
{"tool_name": "read_file"},
{"tool_name": "web_search"},
]
result = templates.classify_task_type(tool_calls)
assert result == TaskType.MIXED
def test_empty(self):
"""Test classification of empty sessions."""
templates = SessionTemplates()
result = templates.classify_task_type([])
assert result == TaskType.MIXED
class TestToolCallExample:
"""Test ToolCallExample dataclass."""
def test_to_dict(self):
"""Test conversion to dictionary."""
example = ToolCallExample(
tool_name="execute_code",
arguments={"code": "print('hello')"},
result="hello",
success=True,
turn_number=0
)
data = example.to_dict()
assert data["tool_name"] == "execute_code"
assert data["arguments"] == {"code": "print('hello')"}
assert data["result"] == "hello"
assert data["success"] is True
def test_from_dict(self):
"""Test creation from dictionary."""
data = {
"tool_name": "execute_code",
"arguments": {"code": "print('hello')"},
"result": "hello",
"success": True,
"turn_number": 0
}
example = ToolCallExample.from_dict(data)
assert example.tool_name == "execute_code"
assert example.arguments == {"code": "print('hello')"}
assert example.result == "hello"
class TestSessionTemplate:
"""Test SessionTemplate dataclass."""
def test_to_dict(self):
"""Test conversion to dictionary."""
examples = [
ToolCallExample(
tool_name="execute_code",
arguments={"code": "print('hello')"},
result="hello",
success=True
)
]
template = SessionTemplate(
name="test_template",
task_type=TaskType.CODE,
examples=examples,
description="Test template"
)
data = template.to_dict()
assert data["name"] == "test_template"
assert data["task_type"] == "code"
assert len(data["examples"]) == 1
def test_from_dict(self):
"""Test creation from dictionary."""
data = {
"name": "test_template",
"task_type": "code",
"examples": [
{
"tool_name": "execute_code",
"arguments": {"code": "print('hello')"},
"result": "hello",
"success": True,
"turn_number": 0
}
],
"description": "Test template",
"created_at": 1234567890.0,
"usage_count": 0,
"source_session_id": None,
"tags": []
}
template = SessionTemplate.from_dict(data)
assert template.name == "test_template"
assert template.task_type == TaskType.CODE
assert len(template.examples) == 1
class TestSessionTemplates:
"""Test SessionTemplates manager."""
def test_create_and_list(self):
"""Test creating and listing templates."""
with tempfile.TemporaryDirectory() as tmpdir:
template_dir = Path(tmpdir)
manager = SessionTemplates(template_dir=template_dir)
# Create a mock template
examples = [
ToolCallExample(
tool_name="execute_code",
arguments={"code": "print('hello')"},
result="hello",
success=True
)
]
template = SessionTemplate(
name="test_template",
task_type=TaskType.CODE,
examples=examples
)
manager.templates["test_template"] = template
manager._save_template(template)
# List templates
templates = manager.list_templates()
assert len(templates) == 1
assert templates[0].name == "test_template"
def test_get_template(self):
"""Test getting template by task type."""
with tempfile.TemporaryDirectory() as tmpdir:
template_dir = Path(tmpdir)
manager = SessionTemplates(template_dir=template_dir)
# Create templates
code_template = SessionTemplate(
name="code_template",
task_type=TaskType.CODE,
examples=[]
)
file_template = SessionTemplate(
name="file_template",
task_type=TaskType.FILE,
examples=[]
)
manager.templates["code_template"] = code_template
manager.templates["file_template"] = file_template
# Get code template
result = manager.get_template(TaskType.CODE)
assert result is not None
assert result.name == "code_template"
# Get file template
result = manager.get_template(TaskType.FILE)
assert result is not None
assert result.name == "file_template"
# Get non-existent template
result = manager.get_template(TaskType.RESEARCH)
assert result is None
def test_inject_into_messages(self):
"""Test injecting template into messages."""
with tempfile.TemporaryDirectory() as tmpdir:
template_dir = Path(tmpdir)
manager = SessionTemplates(template_dir=template_dir)
# Create template
examples = [
ToolCallExample(
tool_name="execute_code",
arguments={"code": "print('hello')"},
result="hello",
success=True
)
]
template = SessionTemplate(
name="test_template",
task_type=TaskType.CODE,
examples=examples
)
manager.templates["test_template"] = template
# Test injection
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello"}
]
result = manager.inject_into_messages(template, messages)
# Should have added template messages
assert len(result) > len(messages)
assert any("Session template loaded" in str(msg.get("content", ""))
for msg in result)
# Usage count should be updated
assert template.usage_count == 1
def test_delete_template(self):
"""Test deleting templates."""
with tempfile.TemporaryDirectory() as tmpdir:
template_dir = Path(tmpdir)
manager = SessionTemplates(template_dir=template_dir)
# Create template
template = SessionTemplate(
name="test_template",
task_type=TaskType.CODE,
examples=[]
)
manager.templates["test_template"] = template
manager._save_template(template)
# Verify it exists
assert "test_template" in manager.templates
assert (template_dir / "test_template.json").exists()
# Delete it
result = manager.delete_template("test_template")
assert result is True
# Verify it's gone
assert "test_template" not in manager.templates
assert not (template_dir / "test_template.json").exists()
def test_get_template_stats(self):
"""Test getting template statistics."""
with tempfile.TemporaryDirectory() as tmpdir:
template_dir = Path(tmpdir)
manager = SessionTemplates(template_dir=template_dir)
# Create templates
code_template = SessionTemplate(
name="code_template",
task_type=TaskType.CODE,
examples=[
ToolCallExample("execute_code", {}, "", True),
ToolCallExample("execute_code", {}, "", True)
],
usage_count=5
)
file_template = SessionTemplate(
name="file_template",
task_type=TaskType.FILE,
examples=[
ToolCallExample("read_file", {}, "", True)
],
usage_count=3
)
manager.templates["code_template"] = code_template
manager.templates["file_template"] = file_template
stats = manager.get_template_stats()
assert stats["total"] == 2
assert stats["total_examples"] == 3
assert stats["total_usage"] == 8
assert stats["by_type"]["code"] == 1
assert stats["by_type"]["file"] == 1
if __name__ == "__main__":
pytest.main([__file__])

View File

@@ -1,471 +0,0 @@
"""
Session templates for code-first seeding.
Research finding: Code-heavy sessions (execute_code dominant in first 30 turns)
improve over time. File-heavy sessions degrade. Key is deterministic feedback loops.
This module provides:
1. Template extraction from successful sessions
2. Task type classification (code, file, research, mixed)
3. Template storage in ~/.hermes/session-templates/
4. Template injection into new sessions
5. CLI interface for template management
"""
import json
import logging
import os
import sqlite3
import time
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, asdict, field
from enum import Enum
logger = logging.getLogger(__name__)
# Default template directory
DEFAULT_TEMPLATE_DIR = Path.home() / ".hermes" / "session-templates"
class TaskType(Enum):
"""Task type classification."""
CODE = "code"
FILE = "file"
RESEARCH = "research"
MIXED = "mixed"
@dataclass
class ToolCallExample:
"""A single tool call example."""
tool_name: str
arguments: Dict[str, Any]
result: str
success: bool
turn_number: int = 0
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ToolCallExample':
return cls(**data)
@dataclass
class SessionTemplate:
"""A session template with tool call examples."""
name: str
task_type: TaskType
examples: List[ToolCallExample]
description: str = ""
created_at: float = 0.0
usage_count: int = 0
source_session_id: Optional[str] = None
tags: List[str] = field(default_factory=list)
def __post_init__(self):
if self.created_at == 0.0:
self.created_at = time.time()
def to_dict(self) -> Dict[str, Any]:
data = asdict(self)
data['task_type'] = self.task_type.value
return data
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'SessionTemplate':
data['task_type'] = TaskType(data['task_type'])
examples_data = data.get('examples', [])
data['examples'] = [ToolCallExample.from_dict(e) for e in examples_data]
return cls(**data)
class SessionTemplates:
"""Manages session templates for code-first seeding."""
def __init__(self, template_dir: Optional[Path] = None):
self.template_dir = template_dir or DEFAULT_TEMPLATE_DIR
self.template_dir.mkdir(parents=True, exist_ok=True)
self.templates: Dict[str, SessionTemplate] = {}
self._load_templates()
def _load_templates(self):
"""Load all templates from disk."""
for template_file in self.template_dir.glob("*.json"):
try:
with open(template_file, 'r') as f:
data = json.load(f)
template = SessionTemplate.from_dict(data)
self.templates[template.name] = template
except Exception as e:
logger.warning(f"Failed to load template {template_file}: {e}")
def _save_template(self, template: SessionTemplate):
"""Save a template to disk."""
template_file = self.template_dir / f"{template.name}.json"
with open(template_file, 'w') as f:
json.dump(template.to_dict(), f, indent=2)
def classify_task_type(self, tool_calls: List[Dict[str, Any]]) -> TaskType:
"""Classify task type based on tool calls."""
if not tool_calls:
return TaskType.MIXED
# Count tool types
code_tools = {'execute_code', 'code_execution'}
file_tools = {'read_file', 'write_file', 'patch', 'search_files'}
research_tools = {'web_search', 'web_fetch', 'browser_navigate'}
tool_names = [tc.get('tool_name', '') for tc in tool_calls]
code_count = sum(1 for t in tool_names if t in code_tools)
file_count = sum(1 for t in tool_names if t in file_tools)
research_count = sum(1 for t in tool_names if t in research_tools)
total = len(tool_calls)
if total == 0:
return TaskType.MIXED
# Determine dominant type (60% threshold)
code_ratio = code_count / total
file_ratio = file_count / total
research_ratio = research_count / total
if code_ratio > 0.6:
return TaskType.CODE
elif file_ratio > 0.6:
return TaskType.FILE
elif research_ratio > 0.6:
return TaskType.RESEARCH
else:
return TaskType.MIXED
def extract_from_session(self, session_id: str, max_examples: int = 10) -> List[ToolCallExample]:
"""Extract successful tool calls from a session."""
db_path = Path.home() / ".hermes" / "state.db"
if not db_path.exists():
logger.warning(f"Session database not found: {db_path}")
return []
try:
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
# Get messages with tool calls
cursor = conn.execute("""
SELECT role, content, tool_calls, tool_name, timestamp
FROM messages
WHERE session_id = ?
ORDER BY timestamp
LIMIT 100
""", (session_id,))
messages = cursor.fetchall()
conn.close()
examples = []
turn_number = 0
for msg in messages:
if len(examples) >= max_examples:
break
if msg['role'] == 'assistant' and msg['tool_calls']:
try:
tool_calls = json.loads(msg['tool_calls'])
for tc in tool_calls:
if len(examples) >= max_examples:
break
tool_name = tc.get('function', {}).get('name')
if not tool_name:
continue
try:
arguments = json.loads(tc.get('function', {}).get('arguments', '{}'))
except:
arguments = {}
examples.append(ToolCallExample(
tool_name=tool_name,
arguments=arguments,
result="", # Will be filled from tool response
success=True,
turn_number=turn_number
))
turn_number += 1
except json.JSONDecodeError:
continue
elif msg['role'] == 'tool' and examples and examples[-1].result == "":
examples[-1].result = msg['content'] or ""
return examples
except Exception as e:
logger.error(f"Failed to extract from session {session_id}: {e}")
return []
def create_template(self, session_id: str, name: Optional[str] = None,
task_type: Optional[TaskType] = None,
max_examples: int = 10,
description: str = "",
tags: Optional[List[str]] = None) -> Optional[SessionTemplate]:
"""Create a template from a session."""
examples = self.extract_from_session(session_id, max_examples)
if not examples:
logger.warning(f"No successful tool calls found in session {session_id}")
return None
# Classify task type if not provided
if task_type is None:
tool_calls = [{'tool_name': e.tool_name} for e in examples]
task_type = self.classify_task_type(tool_calls)
# Generate name if not provided
if name is None:
name = f"{task_type.value}_{session_id[:8]}_{int(time.time())}"
# Create template
template = SessionTemplate(
name=name,
task_type=task_type,
examples=examples,
description=description or f"Template with {len(examples)} examples",
source_session_id=session_id,
tags=tags or []
)
# Save template
self.templates[name] = template
self._save_template(template)
logger.info(f"Created template {name} with {len(examples)} examples")
return template
def get_template(self, task_type: TaskType, tags: Optional[List[str]] = None) -> Optional[SessionTemplate]:
"""Get the best template for a task type and optional tags."""
matching = [t for t in self.templates.values() if t.task_type == task_type]
# Filter by tags if provided
if tags:
matching = [t for t in matching if any(tag in t.tags for tag in tags)]
if not matching:
return None
# Sort by usage count (prefer less used templates)
matching.sort(key=lambda t: t.usage_count)
return matching[0]
def inject_into_messages(self, template: SessionTemplate,
messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Inject template examples into messages."""
if not template.examples:
return messages
# Create injection messages
injection = []
# Add system message about template
injection.append({
"role": "system",
"content": f"Session template loaded: {template.name} ({template.task_type.value})\n"
f"Description: {template.description}\n"
f"This template contains {len(template.examples)} successful tool calls "
f"to establish a feedback loop early."
})
# Add tool call examples
for i, example in enumerate(template.examples):
# Assistant message with tool call
injection.append({
"role": "assistant",
"content": None,
"tool_calls": [{
"id": f"template_{template.name}_{i}",
"type": "function",
"function": {
"name": example.tool_name,
"arguments": json.dumps(example.arguments)
}
}]
})
# Tool response
injection.append({
"role": "tool",
"tool_call_id": f"template_{template.name}_{i}",
"content": example.result
})
# Insert after system messages
insert_index = 0
for i, msg in enumerate(messages):
if msg.get("role") != "system":
break
insert_index = i + 1
# Insert injection
for i, msg in enumerate(injection):
messages.insert(insert_index + i, msg)
# Update usage count
template.usage_count += 1
self._save_template(template)
return messages
def list_templates(self, task_type: Optional[TaskType] = None,
tags: Optional[List[str]] = None) -> List[SessionTemplate]:
"""List templates, optionally filtered by task type and tags."""
templates = list(self.templates.values())
if task_type:
templates = [t for t in templates if t.task_type == task_type]
if tags:
templates = [t for t in templates if any(tag in t.tags for tag in tags)]
templates.sort(key=lambda t: t.created_at, reverse=True)
return templates
def delete_template(self, name: str) -> bool:
"""Delete a template."""
if name not in self.templates:
return False
del self.templates[name]
template_file = self.template_dir / f"{name}.json"
if template_file.exists():
template_file.unlink()
logger.info(f"Deleted template {name}")
return True
def get_template_stats(self) -> Dict[str, Any]:
"""Get statistics about templates."""
if not self.templates:
return {
"total": 0,
"by_type": {},
"total_examples": 0,
"total_usage": 0
}
by_type = {}
total_examples = 0
total_usage = 0
for template in self.templates.values():
task_type = template.task_type.value
by_type[task_type] = by_type.get(task_type, 0) + 1
total_examples += len(template.examples)
total_usage += template.usage_count
return {
"total": len(self.templates),
"by_type": by_type,
"total_examples": total_examples,
"total_usage": total_usage
}
# CLI interface
def main():
"""CLI for session templates."""
import argparse
parser = argparse.ArgumentParser(description="Session Templates")
subparsers = parser.add_subparsers(dest="command")
# List templates
list_parser = subparsers.add_parser("list", help="List templates")
list_parser.add_argument("--type", choices=["code", "file", "research", "mixed"],
help="Filter by task type")
list_parser.add_argument("--tags", help="Filter by tags (comma-separated)")
# Create template
create_parser = subparsers.add_parser("create", help="Create template from session")
create_parser.add_argument("session_id", help="Session ID")
create_parser.add_argument("--name", help="Template name")
create_parser.add_argument("--type", choices=["code", "file", "research", "mixed"],
help="Task type")
create_parser.add_argument("--max-examples", type=int, default=10,
help="Maximum examples to extract")
create_parser.add_argument("--description", help="Template description")
create_parser.add_argument("--tags", help="Tags (comma-separated)")
# Delete template
delete_parser = subparsers.add_parser("delete", help="Delete template")
delete_parser.add_argument("name", help="Template name")
# Show stats
stats_parser = subparsers.add_parser("stats", help="Show template statistics")
args = parser.parse_args()
templates = SessionTemplates()
if args.command == "list":
task_type = TaskType(args.type) if args.type else None
tags = args.tags.split(",") if args.tags else None
template_list = templates.list_templates(task_type, tags)
if not template_list:
print("No templates found")
return
print(f"Found {len(template_list)} templates:")
for t in template_list:
tags_str = f" [tags: {', '.join(t.tags)}]" if t.tags else ""
print(f" {t.name}: {t.task_type.value} ({len(t.examples)} examples, "
f"used {t.usage_count} times){tags_str}")
elif args.command == "create":
task_type = TaskType(args.type) if args.type else None
tags = args.tags.split(",") if args.tags else None
template = templates.create_template(
args.session_id,
name=args.name,
task_type=task_type,
max_examples=args.max_examples,
description=args.description or "",
tags=tags
)
if template:
print(f"Created template: {template.name}")
print(f" Type: {template.task_type.value}")
print(f" Examples: {len(template.examples)}")
if template.tags:
print(f" Tags: {', '.join(template.tags)}")
else:
print("Failed to create template")
elif args.command == "delete":
if templates.delete_template(args.name):
print(f"Deleted template: {args.name}")
else:
print(f"Template not found: {args.name}")
elif args.command == "stats":
stats = templates.get_template_stats()
print("Template Statistics:")
print(f" Total templates: {stats['total']}")
print(f" Total examples: {stats['total_examples']}")
print(f" Total usage: {stats['total_usage']}")
if stats['by_type']:
print(" By type:")
for task_type, count in stats['by_type'].items():
print(f" {task_type}: {count}")
else:
parser.print_help()
if __name__ == "__main__":
main()