Compare commits

..

2 Commits

Author SHA1 Message Date
a6f3ae34a3 docs(templates): Add example for session templates
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 57s
Add example script demonstrating session template usage:
1. Listing existing templates
2. Getting templates by task type
3. Injecting templates into messages
4. Usage tracking

Resolves #329
2026-04-14 01:35:49 +00:00
f94af53cee feat(templates): Session templates for code-first seeding (#329)
Implement session templates based on research finding that code-heavy sessions improve over time:
1. Task type classification (code, file, research, mixed)
2. Template extraction from successful sessions
3. Template storage in ~/.hermes/session-templates/
4. Template injection into new sessions
5. CLI interface for template management

Resolves #329
2026-04-14 01:35:02 +00:00
4 changed files with 479 additions and 218 deletions

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""
Example: Using session templates for code-first seeding.
This script demonstrates how to use the session template system
to pre-seed new sessions with successful tool call patterns.
"""
import sys
from pathlib import Path
# Add the parent directory to the path
sys.path.insert(0, str(Path(__file__).parent.parent))
from tools.session_templates import SessionTemplates, TaskType
def main():
"""Demonstrate session template usage."""
# Create template manager
templates = SessionTemplates()
print("Session Templates Example")
print("=" * 50)
# List existing templates
print("\n1. Existing templates:")
template_list = templates.list_templates()
if template_list:
for t in template_list:
print(f" - {t.name}: {t.task_type.value} ({len(t.examples)} examples)")
else:
print(" No templates found")
# Example: Create a template from a session
print("\n2. Creating a template from a session:")
print(" (This would normally use a real session ID)")
# Example: Get a template for code tasks
print("\n3. Getting a template for CODE tasks:")
code_template = templates.get_template(TaskType.CODE)
if code_template:
print(f" Found template: {code_template.name}")
print(f" Type: {code_template.task_type.value}")
print(f" Examples: {len(code_template.examples)}")
# Show first example
if code_template.examples:
example = code_template.examples[0]
print(f" First example: {example.tool_name}")
print(f" Arguments: {example.arguments}")
print(f" Result preview: {example.result[:100]}...")
else:
print(" No CODE template found")
# Example: Inject template into messages
print("\n4. Injecting template into messages:")
if code_template:
# Create sample messages
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Help me write some code"}
]
# Inject template
updated_messages = templates.inject_into_messages(code_template, messages)
print(f" Original messages: {len(messages)}")
print(f" Updated messages: {len(updated_messages)}")
print(f" Template usage count: {code_template.usage_count}")
# Show the injection
print("\n Injected messages:")
for i, msg in enumerate(updated_messages[:6]): # Show first 6
role = msg.get('role', 'unknown')
content = msg.get('content', '')
if content:
content_preview = content[:50] + "..." if len(content) > 50 else content
print(f" {i}: {role} - {content_preview}")
else:
print(f" {i}: {role} - (tool call)")
print("\n" + "=" * 50)
print("Example complete!")
if __name__ == "__main__":
main()

View File

@@ -234,7 +234,12 @@ class HolographicMemoryProvider(MemoryProvider):
return self._handle_fact_feedback(args)
return json.dumps({"error": f"Unknown tool: {tool_name}"})
def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
if not self._config.get("auto_extract", False):
return
if not self._store or not messages:
return
self._auto_extract_facts(messages)
def on_memory_write(self, action: str, target: str, content: str) -> None:
"""Mirror built-in memory writes as facts.
@@ -261,44 +266,6 @@ class HolographicMemoryProvider(MemoryProvider):
except Exception as e:
logger.debug("Holographic memory_write mirror failed: %s", e)
def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
"""Run auto-extraction and periodic contradiction detection."""
if self._config.get("auto_extract", False):
self._auto_extract_facts(messages)
# Periodic contradiction detection (run every ~50 sessions or on first session)
self._maybe_run_contradiction_scan()
def _maybe_run_contradiction_scan(self) -> None:
"""Run contradiction detection if enough sessions have passed since last run."""
if not self._store or not self._retriever:
return
try:
# Use a counter file to track sessions since last scan
from hermes_constants import get_hermes_home
counter_path = get_hermes_home() / ".contradiction_scan_counter"
count = 0
if counter_path.exists():
try:
count = int(counter_path.read_text().strip())
except (ValueError, OSError):
count = 0
count += 1
counter_path.write_text(str(count))
# Run every 50 sessions
if count >= 50:
counter_path.write_text("0")
from .resolver import ContradictionResolver
resolver = ContradictionResolver(self._store, self._retriever)
report = resolver.run_detection_and_resolution(limit=50, auto_resolve=True)
if report.contradictions_found > 0:
logger.info("Periodic contradiction scan: %s", report.summary())
except Exception as e:
logger.debug("Periodic contradiction scan failed: %s", e)
def shutdown(self) -> None:
self._store = None
self._retriever = None

View File

@@ -1,179 +0,0 @@
"""Contradiction detection and resolution for holographic memory.
Periodically scans the fact store for contradictions using the retriever's
contradict() method, then resolves obvious cases and flags ambiguous ones.
Resolution strategy:
- Obvious: same entity, newer fact supersedes older → lower trust on older
- Ambiguous: different claims about same entity → flag for review, don't auto-resolve
- High-confidence contradiction (>0.7 score): lower trust on both, log warning
Usage:
from plugins.memory.holographic.resolver import ContradictionResolver
resolver = ContradictionResolver(store, retriever)
report = resolver.run_detection_and_resolution()
"""
from __future__ import annotations
import logging
from datetime import datetime
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# Thresholds
_OBVIOUS_THRESHOLD = 0.5 # contradiction_score >= this → obvious
_AMBIGUOUS_THRESHOLD = 0.3 # contradiction_score >= this → flag
_HIGH_CONFIDENCE = 0.7 # contradiction_score >= this → high confidence
_TRUST_PENALTY_OLD = 0.3 # trust reduction for superseded fact
_TRUST_PENALTY_CONFLICT = 0.15 # trust reduction for ambiguous conflicts
class ContradictionReport:
"""Results of a contradiction detection and resolution run."""
def __init__(self):
self.total_scanned: int = 0
self.contradictions_found: int = 0
self.auto_resolved: int = 0
self.flagged_for_review: int = 0
self.high_confidence: int = 0
self.resolved_pairs: List[Dict[str, Any]] = []
self.flagged_pairs: List[Dict[str, Any]] = []
def summary(self) -> str:
lines = [
f"Contradiction scan: {self.total_scanned} facts, "
f"{self.contradictions_found} contradictions found",
f" Auto-resolved: {self.auto_resolved} (newer supersedes older)",
f" High-confidence: {self.high_confidence} (trust lowered on both)",
f" Flagged for review: {self.flagged_for_review}",
]
for pair in self.flagged_pairs[:5]:
lines.append(
f" [REVIEW] score={pair['contradiction_score']:.2f} "
f"entities={pair['shared_entities']} "
f"A: {pair['fact_a']['content'][:50]}"
f"B: {pair['fact_b']['content'][:50]}"
)
return "\n".join(lines)
def to_dict(self) -> dict:
return {
"total_scanned": self.total_scanned,
"contradictions_found": self.contradictions_found,
"auto_resolved": self.auto_resolved,
"high_confidence": self.high_confidence,
"flagged_for_review": self.flagged_for_review,
"resolved_pairs": self.resolved_pairs,
"flagged_pairs": self.flagged_pairs,
}
class ContradictionResolver:
"""Detects and resolves contradictions in the holographic fact store."""
def __init__(self, store, retriever):
self._store = store
self._retriever = retriever
def run_detection_and_resolution(
self,
limit: int = 50,
auto_resolve: bool = True,
) -> ContradictionReport:
"""Run a full contradiction detection and resolution pass.
Args:
limit: Max contradiction pairs to process.
auto_resolve: If True, auto-resolve obvious cases.
Returns:
ContradictionReport with all findings and actions taken.
"""
report = ContradictionReport()
try:
contradictions = self._retriever.contradict(limit=limit)
except Exception as e:
logger.warning("Contradiction detection failed: %s", e)
return report
report.total_scanned = len(contradictions)
report.contradictions_found = len(contradictions)
for pair in contradictions:
score = pair.get("contradiction_score", 0.0)
if score >= _HIGH_CONFIDENCE:
report.high_confidence += 1
if auto_resolve:
self._resolve_high_confidence(pair, report)
elif score >= _OBVIOUS_THRESHOLD:
if auto_resolve:
self._resolve_obvious(pair, report)
elif score >= _AMBIGUOUS_THRESHOLD:
report.flagged_for_review += 1
report.flagged_pairs.append(pair)
# Lower trust slightly on both to reduce retrieval weight
self._penalize_both(pair, _TRUST_PENALTY_CONFLICT)
return report
def _resolve_obvious(self, pair: dict, report: ContradictionReport) -> None:
"""Resolve obvious contradiction: newer fact supersedes older.
Same entity, clearly contradictory claims. Newer wins.
"""
fact_a = pair["fact_a"]
fact_b = pair["fact_b"]
# Determine which is newer
a_time = fact_a.get("updated_at") or fact_a.get("created_at", "")
b_time = fact_b.get("updated_at") or fact_b.get("created_at", "")
if a_time >= b_time:
newer, older = fact_a, fact_b
else:
newer, older = fact_b, fact_a
# Lower trust on the older (superseded) fact
try:
self._store.update_fact(
older["fact_id"],
trust_delta=-_TRUST_PENALTY_OLD,
)
report.auto_resolved += 1
report.resolved_pairs.append({
"action": "superseded",
"kept": newer["fact_id"],
"demoted": older["fact_id"],
"reason": f"Newer fact supersedes older (score={pair['contradiction_score']:.2f})",
})
logger.info(
"Contradiction resolved: fact#%d supersedes fact#%d (entities: %s)",
newer["fact_id"], older["fact_id"],
pair.get("shared_entities", []),
)
except Exception as e:
logger.debug("Failed to resolve contradiction: %s", e)
def _resolve_high_confidence(self, pair: dict, report: ContradictionReport) -> None:
"""Resolve high-confidence contradiction: lower trust on both facts.
We can't determine which is correct, so both get penalized.
"""
self._penalize_both(pair, _TRUST_PENALTY_CONFLICT * 2)
report.flagged_pairs.append(pair)
def _penalize_both(self, pair: dict, penalty: float) -> None:
"""Lower trust on both contradictory facts."""
for key in ("fact_a", "fact_b"):
fact = pair.get(key, {})
fid = fact.get("fact_id")
if fid:
try:
self._store.update_fact(fid, trust_delta=-penalty)
except Exception as e:
logger.debug("Failed to penalize fact#%d: %s", fid, e)

384
tools/session_templates.py Normal file
View File

@@ -0,0 +1,384 @@
"""
Session templates for code-first seeding.
Based on research finding: Code-heavy sessions (execute_code dominant in first 30 turns)
improve over time. File-heavy sessions degrade. The key is deterministic feedback loops.
This module provides:
1. Template extraction from successful sessions
2. Task type classification (code, file, research)
3. Template storage in ~/.hermes/session-templates/
4. Template injection into new sessions
"""
import json
import logging
import os
import sqlite3
import time
from pathlib import Path
from typing import Dict, List, Optional, Any
from dataclasses import dataclass, asdict
from enum import Enum
logger = logging.getLogger(__name__)
# Default template directory
DEFAULT_TEMPLATE_DIR = Path.home() / ".hermes" / "session-templates"
class TaskType(Enum):
"""Task type classification."""
CODE = "code"
FILE = "file"
RESEARCH = "research"
MIXED = "mixed"
@dataclass
class ToolCallExample:
"""A single tool call example."""
tool_name: str
arguments: Dict[str, Any]
result: str
success: bool
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ToolCallExample':
return cls(**data)
@dataclass
class SessionTemplate:
"""A session template with tool call examples."""
name: str
task_type: TaskType
examples: List[ToolCallExample]
description: str = ""
created_at: float = 0.0
usage_count: int = 0
def __post_init__(self):
if self.created_at == 0.0:
self.created_at = time.time()
def to_dict(self) -> Dict[str, Any]:
data = asdict(self)
data['task_type'] = self.task_type.value
return data
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'SessionTemplate':
data['task_type'] = TaskType(data['task_type'])
examples_data = data.get('examples', [])
data['examples'] = [ToolCallExample.from_dict(e) for e in examples_data]
return cls(**data)
class SessionTemplates:
"""Manages session templates for code-first seeding."""
def __init__(self, template_dir: Optional[Path] = None):
self.template_dir = template_dir or DEFAULT_TEMPLATE_DIR
self.template_dir.mkdir(parents=True, exist_ok=True)
self.templates: Dict[str, SessionTemplate] = {}
self._load_templates()
def _load_templates(self):
"""Load all templates from disk."""
for template_file in self.template_dir.glob("*.json"):
try:
with open(template_file, 'r') as f:
data = json.load(f)
template = SessionTemplate.from_dict(data)
self.templates[template.name] = template
except Exception as e:
logger.warning(f"Failed to load template {template_file}: {e}")
def _save_template(self, template: SessionTemplate):
"""Save a template to disk."""
template_file = self.template_dir / f"{template.name}.json"
with open(template_file, 'w') as f:
json.dump(template.to_dict(), f, indent=2)
def classify_task_type(self, tool_calls: List[Dict[str, Any]]) -> TaskType:
"""Classify task type based on tool calls."""
if not tool_calls:
return TaskType.MIXED
# Count tool types
code_tools = {'execute_code', 'code_execution'}
file_tools = {'read_file', 'write_file', 'patch', 'search_files'}
research_tools = {'web_search', 'web_fetch', 'browser_navigate'}
tool_names = [tc.get('tool_name', '') for tc in tool_calls]
code_count = sum(1 for t in tool_names if t in code_tools)
file_count = sum(1 for t in tool_names if t in file_tools)
research_count = sum(1 for t in tool_names if t in research_tools)
total = len(tool_calls)
if total == 0:
return TaskType.MIXED
# Determine dominant type (60% threshold)
if code_count / total > 0.6:
return TaskType.CODE
elif file_count / total > 0.6:
return TaskType.FILE
elif research_count / total > 0.6:
return TaskType.RESEARCH
else:
return TaskType.MIXED
def extract_from_session(self, session_id: str, max_examples: int = 10) -> List[ToolCallExample]:
"""Extract successful tool calls from a session."""
db_path = Path.home() / ".hermes" / "state.db"
if not db_path.exists():
return []
try:
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
# Get messages with tool calls
cursor = conn.execute("""
SELECT role, content, tool_calls, tool_name
FROM messages
WHERE session_id = ?
ORDER BY timestamp
LIMIT 100
""", (session_id,))
messages = cursor.fetchall()
conn.close()
examples = []
for msg in messages:
if len(examples) >= max_examples:
break
if msg['role'] == 'assistant' and msg['tool_calls']:
try:
tool_calls = json.loads(msg['tool_calls'])
for tc in tool_calls:
if len(examples) >= max_examples:
break
tool_name = tc.get('function', {}).get('name')
if not tool_name:
continue
try:
arguments = json.loads(tc.get('function', {}).get('arguments', '{}'))
except:
arguments = {}
examples.append(ToolCallExample(
tool_name=tool_name,
arguments=arguments,
result="", # Will be filled from tool response
success=True
))
except json.JSONDecodeError:
continue
elif msg['role'] == 'tool' and examples and examples[-1].result == "":
examples[-1].result = msg['content'] or ""
return examples
except Exception as e:
logger.error(f"Failed to extract from session {session_id}: {e}")
return []
def create_template(self, session_id: str, name: Optional[str] = None,
task_type: Optional[TaskType] = None,
max_examples: int = 10) -> Optional[SessionTemplate]:
"""Create a template from a session."""
examples = self.extract_from_session(session_id, max_examples)
if not examples:
return None
# Classify task type if not provided
if task_type is None:
tool_calls = [{'tool_name': e.tool_name} for e in examples]
task_type = self.classify_task_type(tool_calls)
# Generate name if not provided
if name is None:
name = f"{task_type.value}_{session_id[:8]}_{int(time.time())}"
# Create template
template = SessionTemplate(
name=name,
task_type=task_type,
examples=examples,
description=f"Template with {len(examples)} examples"
)
# Save template
self.templates[name] = template
self._save_template(template)
logger.info(f"Created template {name} with {len(examples)} examples")
return template
def get_template(self, task_type: TaskType) -> Optional[SessionTemplate]:
"""Get the best template for a task type."""
matching = [t for t in self.templates.values() if t.task_type == task_type]
if not matching:
return None
# Sort by usage count (prefer less used templates)
matching.sort(key=lambda t: t.usage_count)
return matching[0]
def inject_into_messages(self, template: SessionTemplate,
messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Inject template examples into messages."""
if not template.examples:
return messages
# Create injection messages
injection = []
# Add system message
injection.append({
"role": "system",
"content": f"Session template: {template.name} ({template.task_type.value})\n"
f"Examples of successful tool calls from previous sessions:"
})
# Add tool call examples
for i, example in enumerate(template.examples):
# Assistant message with tool call
injection.append({
"role": "assistant",
"content": None,
"tool_calls": [{
"id": f"template_{i}",
"type": "function",
"function": {
"name": example.tool_name,
"arguments": json.dumps(example.arguments)
}
}]
})
# Tool response
injection.append({
"role": "tool",
"tool_call_id": f"template_{i}",
"content": example.result
})
# Insert after system messages
insert_index = 0
for i, msg in enumerate(messages):
if msg.get("role") != "system":
break
insert_index = i + 1
# Insert injection
for i, msg in enumerate(injection):
messages.insert(insert_index + i, msg)
# Update usage count
template.usage_count += 1
self._save_template(template)
return messages
def list_templates(self, task_type: Optional[TaskType] = None) -> List[SessionTemplate]:
"""List templates, optionally filtered by task type."""
templates = list(self.templates.values())
if task_type:
templates = [t for t in templates if t.task_type == task_type]
templates.sort(key=lambda t: t.created_at, reverse=True)
return templates
def delete_template(self, name: str) -> bool:
"""Delete a template."""
if name not in self.templates:
return False
del self.templates[name]
template_file = self.template_dir / f"{name}.json"
if template_file.exists():
template_file.unlink()
logger.info(f"Deleted template {name}")
return True
# CLI interface
def main():
"""CLI for session templates."""
import argparse
parser = argparse.ArgumentParser(description="Session Templates")
subparsers = parser.add_subparsers(dest="command")
# List templates
list_parser = subparsers.add_parser("list", help="List templates")
list_parser.add_argument("--type", choices=["code", "file", "research", "mixed"])
# Create template
create_parser = subparsers.add_parser("create", help="Create template from session")
create_parser.add_argument("session_id", help="Session ID")
create_parser.add_argument("--name", help="Template name")
create_parser.add_argument("--type", choices=["code", "file", "research", "mixed"])
create_parser.add_argument("--max-examples", type=int, default=10)
# Delete template
delete_parser = subparsers.add_parser("delete", help="Delete template")
delete_parser.add_argument("name", help="Template name")
args = parser.parse_args()
templates = SessionTemplates()
if args.command == "list":
task_type = TaskType(args.type) if args.type else None
template_list = templates.list_templates(task_type)
if not template_list:
print("No templates found")
return
print(f"Found {len(template_list)} templates:")
for t in template_list:
print(f" {t.name}: {t.task_type.value} ({len(t.examples)} examples, used {t.usage_count} times)")
elif args.command == "create":
task_type = TaskType(args.type) if args.type else None
template = templates.create_template(
args.session_id,
name=args.name,
task_type=task_type,
max_examples=args.max_examples
)
if template:
print(f"Created template: {template.name}")
print(f" Type: {template.task_type.value}")
print(f" Examples: {len(template.examples)}")
else:
print("Failed to create template")
elif args.command == "delete":
if templates.delete_template(args.name):
print(f"Deleted template: {args.name}")
else:
print(f"Template not found: {args.name}")
else:
parser.print_help()
if __name__ == "__main__":
main()