2026-02-19 00:57:31 -08:00
#!/usr/bin/env python3
"""
Session Search Tool - Long - Term Conversation Recall
Searches past session transcripts in SQLite via FTS5 , then summarizes the top
matching sessions using a cheap / fast model ( same pattern as web_extract ) .
Returns focused summaries of past conversations rather than raw transcripts ,
keeping the main model ' s context window clean.
Flow :
1. FTS5 search finds matching messages ranked by relevance
2. Groups by session , takes the top N unique sessions ( default 3 )
3. Loads each session ' s conversation, truncates to ~100k chars centered on matches
4. Sends to Gemini Flash with a focused summarization prompt
5. Returns per - session summaries with metadata
"""
import asyncio
import concurrent . futures
import json
import logging
2026-03-02 01:50:37 +03:00
from typing import Dict , Any , List , Optional , Union
2026-02-19 00:57:31 -08:00
2026-03-27 15:28:19 -07:00
from agent . auxiliary_client import async_call_llm , extract_content_or_reasoning
2026-02-19 00:57:31 -08:00
MAX_SESSION_CHARS = 100_000
2026-02-28 21:47:51 -08:00
MAX_SUMMARY_TOKENS = 10000
2026-02-19 00:57:31 -08:00
2026-03-04 21:25:54 -08:00
def _format_timestamp ( ts : Union [ int , float , str , None ] ) - > str :
""" Convert a Unix timestamp (float/int) or ISO string to a human-readable date.
Returns " unknown " for None , str ( ts ) if conversion fails .
2026-03-02 01:50:37 +03:00
"""
2026-02-22 02:37:26 -08:00
if ts is None :
return " unknown "
try :
if isinstance ( ts , ( int , float ) ) :
from datetime import datetime
dt = datetime . fromtimestamp ( ts )
return dt . strftime ( " % B %d , % Y at % I: % M % p " )
if isinstance ( ts , str ) :
if ts . replace ( " . " , " " ) . replace ( " - " , " " ) . isdigit ( ) :
from datetime import datetime
dt = datetime . fromtimestamp ( float ( ts ) )
return dt . strftime ( " % B %d , % Y at % I: % M % p " )
return ts
2026-03-02 01:50:37 +03:00
except ( ValueError , OSError , OverflowError ) as e :
# Log specific errors for debugging while gracefully handling edge cases
2026-03-16 15:22:00 +03:00
logging . debug ( " Failed to format timestamp %s : %s " , ts , e , exc_info = True )
2026-03-02 01:50:37 +03:00
except Exception as e :
2026-03-16 15:22:00 +03:00
logging . debug ( " Unexpected error formatting timestamp %s : %s " , ts , e , exc_info = True )
2026-02-22 02:37:26 -08:00
return str ( ts )
2026-02-19 00:57:31 -08:00
def _format_conversation ( messages : List [ Dict [ str , Any ] ] ) - > str :
""" Format session messages into a readable transcript for summarization. """
parts = [ ]
for msg in messages :
role = msg . get ( " role " , " unknown " ) . upper ( )
content = msg . get ( " content " ) or " "
tool_name = msg . get ( " tool_name " )
if role == " TOOL " and tool_name :
# Truncate long tool outputs
if len ( content ) > 500 :
content = content [ : 250 ] + " \n ...[truncated]... \n " + content [ - 250 : ]
parts . append ( f " [TOOL: { tool_name } ]: { content } " )
elif role == " ASSISTANT " :
# Include tool call names if present
tool_calls = msg . get ( " tool_calls " )
if tool_calls and isinstance ( tool_calls , list ) :
tc_names = [ ]
for tc in tool_calls :
if isinstance ( tc , dict ) :
name = tc . get ( " name " ) or tc . get ( " function " , { } ) . get ( " name " , " ? " )
tc_names . append ( name )
if tc_names :
parts . append ( f " [ASSISTANT]: [Called: { ' , ' . join ( tc_names ) } ] " )
if content :
parts . append ( f " [ASSISTANT]: { content } " )
else :
parts . append ( f " [ASSISTANT]: { content } " )
else :
parts . append ( f " [ { role } ]: { content } " )
return " \n \n " . join ( parts )
def _truncate_around_matches (
full_text : str , query : str , max_chars : int = MAX_SESSION_CHARS
) - > str :
"""
Truncate a conversation transcript to max_chars , centered around
where the query terms appear . Keeps content near matches , trims the edges .
"""
if len ( full_text ) < = max_chars :
return full_text
# Find the first occurrence of any query term
query_terms = query . lower ( ) . split ( )
text_lower = full_text . lower ( )
first_match = len ( full_text )
for term in query_terms :
pos = text_lower . find ( term )
if pos != - 1 and pos < first_match :
first_match = pos
if first_match == len ( full_text ) :
# No match found, take from the start
first_match = 0
# Center the window around the first match
half = max_chars / / 2
start = max ( 0 , first_match - half )
end = min ( len ( full_text ) , start + max_chars )
if end - start < max_chars :
start = max ( 0 , end - max_chars )
truncated = full_text [ start : end ]
prefix = " ...[earlier conversation truncated]... \n \n " if start > 0 else " "
suffix = " \n \n ...[later conversation truncated]... " if end < len ( full_text ) else " "
return prefix + truncated + suffix
async def _summarize_session (
conversation_text : str , query : str , session_meta : Dict [ str , Any ]
) - > Optional [ str ] :
""" Summarize a single session conversation focused on the search query. """
system_prompt = (
" You are reviewing a past conversation transcript to help recall what happened. "
" Summarize the conversation with a focus on the search topic. Include: \n "
" 1. What the user asked about or wanted to accomplish \n "
" 2. What actions were taken and what the outcomes were \n "
" 3. Key decisions, solutions found, or conclusions reached \n "
" 4. Any specific commands, files, URLs, or technical details that were important \n "
" 5. Anything left unresolved or notable \n \n "
" Be thorough but concise. Preserve specific details (commands, paths, error messages) "
" that would be useful to recall. Write in past tense as a factual recap. "
)
source = session_meta . get ( " source " , " unknown " )
2026-02-22 02:37:26 -08:00
started = _format_timestamp ( session_meta . get ( " started_at " ) )
2026-02-19 00:57:31 -08:00
user_prompt = (
f " Search topic: { query } \n "
f " Session source: { source } \n "
2026-02-22 02:37:26 -08:00
f " Session date: { started } \n \n "
2026-02-19 00:57:31 -08:00
f " CONVERSATION TRANSCRIPT: \n { conversation_text } \n \n "
f " Summarize this conversation with focus on: { query } "
)
max_retries = 3
for attempt in range ( max_retries ) :
try :
2026-03-11 20:52:19 -07:00
response = await async_call_llm (
task = " session_search " ,
2026-02-19 00:57:31 -08:00
messages = [
{ " role " : " system " , " content " : system_prompt } ,
{ " role " : " user " , " content " : user_prompt } ,
] ,
temperature = 0.1 ,
2026-03-11 20:52:19 -07:00
max_tokens = MAX_SUMMARY_TOKENS ,
2026-02-19 00:57:31 -08:00
)
2026-03-27 15:28:19 -07:00
content = extract_content_or_reasoning ( response )
if content :
return content
# Reasoning-only / empty — let the retry loop handle it
logging . warning ( " Session search LLM returned empty content (attempt %d / %d ) " , attempt + 1 , max_retries )
if attempt < max_retries - 1 :
await asyncio . sleep ( 1 * ( attempt + 1 ) )
continue
return content
2026-03-11 20:52:19 -07:00
except RuntimeError :
logging . warning ( " No auxiliary model available for session summarization " )
return None
2026-02-19 00:57:31 -08:00
except Exception as e :
if attempt < max_retries - 1 :
await asyncio . sleep ( 1 * ( attempt + 1 ) )
else :
2026-03-16 15:22:00 +03:00
logging . warning (
" Session summarization failed after %d attempts: %s " ,
max_retries ,
e ,
exc_info = True ,
)
2026-02-19 00:57:31 -08:00
return None
2026-03-26 14:35:31 -07:00
# Sources that are excluded from session browsing/searching by default.
# Third-party integrations (Paperclip agents, etc.) tag their sessions with
# HERMES_SESSION_SOURCE=tool so they don't clutter the user's session history.
_HIDDEN_SESSION_SOURCES = ( " tool " , )
2026-03-22 11:22:10 -07:00
def _list_recent_sessions ( db , limit : int , current_session_id : str = None ) - > str :
""" Return metadata for the most recent sessions (no LLM calls). """
try :
2026-03-26 14:35:31 -07:00
sessions = db . list_sessions_rich ( limit = limit + 5 , exclude_sources = list ( _HIDDEN_SESSION_SOURCES ) ) # fetch extra to skip current
2026-03-22 11:22:10 -07:00
# Resolve current session lineage to exclude it
current_root = None
if current_session_id :
try :
sid = current_session_id
visited = set ( )
while sid and sid not in visited :
visited . add ( sid )
s = db . get_session ( sid )
parent = s . get ( " parent_session_id " ) if s else None
sid = parent if parent else None
current_root = max ( visited , key = len ) if visited else current_session_id
except Exception :
current_root = current_session_id
results = [ ]
for s in sessions :
sid = s . get ( " id " , " " )
if current_root and ( sid == current_root or sid == current_session_id ) :
continue
# Skip child/delegation sessions (they have parent_session_id)
if s . get ( " parent_session_id " ) :
continue
results . append ( {
" session_id " : sid ,
" title " : s . get ( " title " ) or None ,
" source " : s . get ( " source " , " " ) ,
" started_at " : s . get ( " started_at " , " " ) ,
" last_active " : s . get ( " last_active " , " " ) ,
" message_count " : s . get ( " message_count " , 0 ) ,
" preview " : s . get ( " preview " , " " ) ,
} )
if len ( results ) > = limit :
break
return json . dumps ( {
" success " : True ,
" mode " : " recent " ,
" results " : results ,
" count " : len ( results ) ,
" message " : f " Showing { len ( results ) } most recent sessions. Use a keyword query to search specific topics. " ,
} , ensure_ascii = False )
except Exception as e :
logging . error ( " Error listing recent sessions: %s " , e , exc_info = True )
return json . dumps ( { " success " : False , " error " : f " Failed to list recent sessions: { e } " } , ensure_ascii = False )
2026-02-19 00:57:31 -08:00
def session_search (
query : str ,
role_filter : str = None ,
limit : int = 3 ,
db = None ,
2026-03-04 06:06:40 -08:00
current_session_id : str = None ,
2026-02-19 00:57:31 -08:00
) - > str :
"""
Search past sessions and return focused summaries of matching conversations .
Uses FTS5 to find matches , then summarizes the top sessions with Gemini Flash .
2026-03-04 06:06:40 -08:00
The current session is excluded from results since the agent already has that context .
2026-02-19 00:57:31 -08:00
"""
if db is None :
return json . dumps ( { " success " : False , " error " : " Session database not available. " } , ensure_ascii = False )
2026-03-22 11:22:10 -07:00
limit = min ( limit , 5 ) # Cap at 5 sessions to avoid excessive LLM calls
# Recent sessions mode: when query is empty, return metadata for recent sessions.
# No LLM calls — just DB queries for titles, previews, timestamps.
2026-02-19 00:57:31 -08:00
if not query or not query . strip ( ) :
2026-03-22 11:22:10 -07:00
return _list_recent_sessions ( db , limit , current_session_id )
2026-02-19 00:57:31 -08:00
query = query . strip ( )
try :
# Parse role filter
role_list = None
if role_filter and role_filter . strip ( ) :
role_list = [ r . strip ( ) for r in role_filter . split ( " , " ) if r . strip ( ) ]
# FTS5 search -- get matches ranked by relevance
raw_results = db . search_messages (
query = query ,
role_filter = role_list ,
2026-03-26 14:35:31 -07:00
exclude_sources = list ( _HIDDEN_SESSION_SOURCES ) ,
2026-02-19 00:57:31 -08:00
limit = 50 , # Get more matches to find unique sessions
offset = 0 ,
)
if not raw_results :
return json . dumps ( {
" success " : True ,
" query " : query ,
" results " : [ ] ,
" count " : 0 ,
" message " : " No matching sessions found. " ,
} , ensure_ascii = False )
2026-02-24 04:07:37 -08:00
# Resolve child sessions to their parent — delegation stores detailed
# content in child sessions, but the user's conversation is the parent.
2026-03-04 21:25:54 -08:00
def _resolve_to_parent ( session_id : str ) - > str :
""" Walk delegation chain to find the root parent session ID. """
2026-02-24 04:07:37 -08:00
visited = set ( )
sid = session_id
while sid and sid not in visited :
visited . add ( sid )
2026-03-02 01:50:37 +03:00
try :
session = db . get_session ( sid )
if not session :
break
parent = session . get ( " parent_session_id " )
if parent :
sid = parent
else :
break
except Exception as e :
2026-03-16 15:22:00 +03:00
logging . debug (
" Error resolving parent for session %s : %s " ,
sid ,
e ,
exc_info = True ,
)
2026-02-24 04:07:37 -08:00
break
return sid
2026-03-20 11:56:02 -07:00
current_lineage_root = (
_resolve_to_parent ( current_session_id ) if current_session_id else None
)
# Group by resolved (parent) session_id, dedup, skip the current
# session lineage. Compression and delegation create child sessions
# that still belong to the same active conversation.
2026-02-19 00:57:31 -08:00
seen_sessions = { }
for result in raw_results :
2026-02-24 04:07:37 -08:00
raw_sid = result [ " session_id " ]
resolved_sid = _resolve_to_parent ( raw_sid )
2026-03-20 11:56:02 -07:00
# Skip the current session lineage — the agent already has that
# context, even if older turns live in parent fragments.
if current_lineage_root and resolved_sid == current_lineage_root :
2026-03-04 06:06:40 -08:00
continue
if current_session_id and raw_sid == current_session_id :
continue
2026-02-24 04:07:37 -08:00
if resolved_sid not in seen_sessions :
result = dict ( result )
result [ " session_id " ] = resolved_sid
seen_sessions [ resolved_sid ] = result
2026-02-19 00:57:31 -08:00
if len ( seen_sessions ) > = limit :
break
2026-02-24 04:07:37 -08:00
# Prepare all sessions for parallel summarization
tasks = [ ]
2026-02-19 00:57:31 -08:00
for session_id , match_info in seen_sessions . items ( ) :
try :
messages = db . get_messages_as_conversation ( session_id )
if not messages :
continue
session_meta = db . get_session ( session_id ) or { }
conversation_text = _format_conversation ( messages )
conversation_text = _truncate_around_matches ( conversation_text , query )
2026-02-24 04:07:37 -08:00
tasks . append ( ( session_id , match_info , conversation_text , session_meta ) )
except Exception as e :
2026-03-16 15:22:00 +03:00
logging . warning (
" Failed to prepare session %s : %s " ,
session_id ,
e ,
exc_info = True ,
)
2026-02-19 00:57:31 -08:00
2026-02-24 04:07:37 -08:00
# Summarize all sessions in parallel
2026-03-02 01:50:37 +03:00
async def _summarize_all ( ) - > List [ Union [ str , Exception ] ] :
""" Summarize all sessions in parallel. """
2026-02-24 04:07:37 -08:00
coros = [
_summarize_session ( text , query , meta )
for _ , _ , text , meta in tasks
]
return await asyncio . gather ( * coros , return_exceptions = True )
2026-02-19 00:57:31 -08:00
2026-02-24 04:07:37 -08:00
try :
2026-03-25 17:31:56 -07:00
# Use _run_async() which properly manages event loops across
# CLI, gateway, and worker-thread contexts. The previous
# pattern (asyncio.run() in a ThreadPoolExecutor) created a
# disposable event loop that conflicted with cached
# AsyncOpenAI/httpx clients bound to a different loop,
# causing deadlocks in gateway mode (#2681).
from model_tools import _run_async
results = _run_async ( _summarize_all ( ) )
2026-03-02 01:50:37 +03:00
except concurrent . futures . TimeoutError :
2026-03-16 15:22:00 +03:00
logging . warning (
" Session summarization timed out after 60 seconds " ,
exc_info = True ,
)
2026-03-02 01:50:37 +03:00
return json . dumps ( {
" success " : False ,
" error " : " Session summarization timed out. Try a more specific query or reduce the limit. " ,
} , ensure_ascii = False )
2026-02-24 04:07:37 -08:00
summaries = [ ]
2026-03-27 21:27:51 -07:00
for ( session_id , match_info , conversation_text , _ ) , result in zip ( tasks , results ) :
2026-02-24 04:07:37 -08:00
if isinstance ( result , Exception ) :
2026-03-16 15:22:00 +03:00
logging . warning (
" Failed to summarize session %s : %s " ,
2026-03-27 21:27:51 -07:00
session_id , result , exc_info = True ,
2026-03-16 15:22:00 +03:00
)
2026-03-27 21:27:51 -07:00
result = None
entry = {
" session_id " : session_id ,
" when " : _format_timestamp ( match_info . get ( " session_started " ) ) ,
" source " : match_info . get ( " source " , " unknown " ) ,
" model " : match_info . get ( " model " ) ,
}
2026-02-24 04:07:37 -08:00
if result :
2026-03-27 21:27:51 -07:00
entry [ " summary " ] = result
else :
# Fallback: raw preview so matched sessions aren't silently
# dropped when the summarizer is unavailable (fixes #3409).
preview = ( conversation_text [ : 500 ] + " \n …[truncated] " ) if conversation_text else " No preview available. "
entry [ " summary " ] = f " [Raw preview — summarization unavailable] \n { preview } "
summaries . append ( entry )
2026-02-19 00:57:31 -08:00
return json . dumps ( {
" success " : True ,
" query " : query ,
" results " : summaries ,
" count " : len ( summaries ) ,
" sessions_searched " : len ( seen_sessions ) ,
} , ensure_ascii = False )
except Exception as e :
2026-03-16 15:22:00 +03:00
logging . error ( " Session search failed: %s " , e , exc_info = True )
2026-02-19 00:57:31 -08:00
return json . dumps ( { " success " : False , " error " : f " Search failed: { str ( e ) } " } , ensure_ascii = False )
def check_session_search_requirements ( ) - > bool :
2026-02-22 02:16:11 -08:00
""" Requires SQLite state database and an auxiliary text model. """
2026-02-19 00:57:31 -08:00
try :
from hermes_state import DEFAULT_DB_PATH
return DEFAULT_DB_PATH . parent . exists ( )
except ImportError :
return False
SESSION_SEARCH_SCHEMA = {
" name " : " session_search " ,
" description " : (
2026-03-24 18:08:06 -07:00
" Search your long-term memory of past conversations, or browse recent sessions. This is your recall -- "
2026-02-22 02:31:52 -08:00
" every past session is searchable, and this tool summarizes what happened. \n \n "
2026-03-24 18:08:06 -07:00
" TWO MODES: \n "
" 1. Recent sessions (no query): Call with no arguments to see what was worked on recently. "
" Returns titles, previews, and timestamps. Zero LLM cost, instant. "
" Start here when the user asks what were we working on or what did we do recently. \n "
" 2. Keyword search (with query): Search for specific topics across all past sessions. "
" Returns LLM-generated summaries of matching sessions. \n \n "
2026-02-22 02:31:52 -08:00
" USE THIS PROACTIVELY when: \n "
" - The user says ' we did this before ' , ' remember when ' , ' last time ' , ' as I mentioned ' \n "
" - The user asks about a topic you worked on before but don ' t have in current context \n "
" - The user references a project, person, or concept that seems familiar but isn ' t in memory \n "
" - You want to check if you ' ve solved a similar problem before \n "
" - The user asks ' what did we do about X? ' or ' how did we fix Y? ' \n \n "
2026-03-14 11:26:18 -07:00
" Don ' t hesitate to search when it is actually cross-session -- it ' s fast and cheap. "
" Better to search and confirm than to guess or ask the user to repeat themselves. \n \n "
2026-02-24 04:07:37 -08:00
" Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
" phrases for exact match ( \" docker networking \" ), boolean (python NOT java), prefix (deploy*). "
" IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "
" sessions that only mention some terms. If a broad OR query returns nothing, try individual "
" keyword searches in parallel. Returns summaries of the top matching sessions. "
2026-02-19 00:57:31 -08:00
) ,
" parameters " : {
" type " : " object " ,
" properties " : {
" query " : {
" type " : " string " ,
2026-03-24 18:08:06 -07:00
" description " : " Search query — keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead (returns titles, previews, timestamps with no LLM cost). " ,
2026-02-19 00:57:31 -08:00
} ,
" role_filter " : {
" type " : " string " ,
" description " : " Optional: only search messages from specific roles (comma-separated). E.g. ' user,assistant ' to skip tool outputs. " ,
} ,
" limit " : {
" type " : " integer " ,
" description " : " Max sessions to summarize (default: 3, max: 5). " ,
" default " : 3 ,
} ,
} ,
2026-03-22 11:22:10 -07:00
" required " : [ ] ,
2026-02-19 00:57:31 -08:00
} ,
}
2026-02-21 20:22:33 -08:00
# --- Registry ---
from tools . registry import registry
registry . register (
name = " session_search " ,
toolset = " session_search " ,
schema = SESSION_SEARCH_SCHEMA ,
handler = lambda args , * * kw : session_search (
2026-03-22 11:22:10 -07:00
query = args . get ( " query " ) or " " ,
2026-02-21 20:22:33 -08:00
role_filter = args . get ( " role_filter " ) ,
limit = args . get ( " limit " , 3 ) ,
2026-03-04 06:06:40 -08:00
db = kw . get ( " db " ) ,
current_session_id = kw . get ( " current_session_id " ) ) ,
2026-02-21 20:22:33 -08:00
check_fn = check_session_search_requirements ,
2026-03-15 20:21:21 -07:00
emoji = " 🔍 " ,
2026-02-21 20:22:33 -08:00
)