"""Persistent chat session for Timmy. Holds a singleton Agno Agent and a stable session_id so conversation history persists across HTTP requests via Agno's SQLite storage. This is the primary entry point for dashboard chat — instead of creating a new agent per request, we reuse a single instance and let Agno's session_id mechanism handle conversation continuity. """ import logging import re import httpx from timmy.cognitive_state import cognitive_tracker from timmy.confidence import estimate_confidence from timmy.session_logger import get_session_logger logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Confidence annotation (SOUL.md: visible uncertainty) # --------------------------------------------------------------------------- _CONFIDENCE_THRESHOLD = 0.7 def _annotate_confidence(text: str, confidence: float | None) -> str: """Append a confidence tag when below threshold. SOUL.md: "When I am uncertain, I must say so in proportion to my uncertainty." """ if confidence is not None and confidence < _CONFIDENCE_THRESHOLD: return text + f"\n\n[confidence: {confidence:.0%}]" return text # Default session ID for the dashboard (stable across requests) _DEFAULT_SESSION_ID = "dashboard" # Module-level singleton agent (lazy-initialized, reused for all requests) _agent = None # --------------------------------------------------------------------------- # Response sanitization patterns # --------------------------------------------------------------------------- # Matches raw JSON tool calls: {"name": "python", "parameters": {...}} _TOOL_CALL_JSON = re.compile( r'\{\s*"name"\s*:\s*"[^"]+?"\s*,\s*"(?:parameters|arguments)"\s*:\s*\{.*?\}\s*\}', re.DOTALL, ) # Matches function-call-style text: memory_search(query="...") etc. _FUNC_CALL_TEXT = re.compile( r"\b(?:memory_search|shell|python|read_file|write_file|list_files|calculator)" r"\s*\([^)]*\)", ) # Matches chain-of-thought narration lines the model should keep internal _COT_PATTERNS = [ re.compile( r"^(?:Since |Using |Let me |I'll use |I will use |Here's a possible ).*$", re.MULTILINE ), re.compile(r"^(?:I found a relevant |This context suggests ).*$", re.MULTILINE), ] def _get_agent(): """Lazy-initialize the singleton agent.""" global _agent if _agent is None: from timmy.agent import create_timmy try: _agent = create_timmy(session_id=_DEFAULT_SESSION_ID) logger.info("Session: Timmy agent initialized (singleton)") except Exception as exc: logger.error("Session: Failed to create Timmy agent: %s", exc) raise return _agent async def chat(message: str, session_id: str | None = None) -> str: """Send a message to Timmy and get a response. Uses a persistent agent and session_id so Agno's SQLite history provides multi-turn conversation context. Uses ``arun()`` so MCP tool servers are auto-connected. Args: message: The user's message. session_id: Optional session identifier (defaults to "dashboard"). Returns: The agent's response text. """ sid = session_id or _DEFAULT_SESSION_ID agent = _get_agent() session_logger = get_session_logger() # Record user message before sending to agent session_logger.record_message("user", message) # Pre-processing: extract user facts _extract_facts(message) # Inject deep-focus context when active message = _prepend_focus_context(message) # Run with session_id so Agno retrieves history from SQLite try: run = await agent.arun(message, stream=False, session_id=sid) response_text = run.content if hasattr(run, "content") else str(run) except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc: logger.error("Ollama disconnected: %s", exc) session_logger.record_error(str(exc), context="chat") session_logger.flush() return "Ollama appears to be disconnected. Check that ollama serve is running." except Exception as exc: logger.error("Session: agent.arun() failed: %s", exc) session_logger.record_error(str(exc), context="chat") session_logger.flush() return ( "I'm having trouble reaching my inference backend right now. Please try again shortly." ) # Post-processing: clean up any leaked tool calls or chain-of-thought response_text = _clean_response(response_text) # Estimate confidence of the response confidence = estimate_confidence(response_text) logger.debug("Response confidence: %.2f", confidence) response_text = _annotate_confidence(response_text, confidence) # Record Timmy response after getting it session_logger.record_message("timmy", response_text, confidence=confidence) # Update cognitive state (observable signal for Matrix avatar) cognitive_tracker.update(message, response_text) # Flush session logs to disk session_logger.flush() return response_text async def chat_with_tools(message: str, session_id: str | None = None): """Send a message and return the full Agno RunOutput. Callers should check ``run_output.status``: - ``RunStatus.paused`` — tools need confirmation (see ``run_output.requirements``) - ``RunStatus.completed`` — response ready in ``run_output.content`` Uses ``arun()`` so MCP tool servers are auto-connected. Returns: An Agno ``RunOutput`` object (or a lightweight surrogate on error). """ sid = session_id or _DEFAULT_SESSION_ID agent = _get_agent() session_logger = get_session_logger() # Record user message before sending to agent session_logger.record_message("user", message) _extract_facts(message) # Inject deep-focus context when active message = _prepend_focus_context(message) try: run_output = await agent.arun(message, stream=False, session_id=sid) # Record Timmy response after getting it response_text = ( run_output.content if hasattr(run_output, "content") and run_output.content else "" ) confidence = estimate_confidence(response_text) if response_text else None logger.debug("Response confidence: %.2f", confidence) response_text = _annotate_confidence(response_text, confidence) run_output.content = response_text session_logger.record_message("timmy", response_text, confidence=confidence) session_logger.flush() return run_output except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc: logger.error("Ollama disconnected: %s", exc) session_logger.record_error(str(exc), context="chat_with_tools") session_logger.flush() return _ErrorRunOutput( "Ollama appears to be disconnected. Check that ollama serve is running." ) except Exception as exc: logger.error("Session: agent.arun() failed: %s", exc) session_logger.record_error(str(exc), context="chat_with_tools") session_logger.flush() # Return a duck-typed object that callers can handle uniformly return _ErrorRunOutput( "I'm having trouble reaching my inference backend right now. Please try again shortly." ) async def continue_chat(run_output, session_id: str | None = None): """Resume a paused run after tool confirmation / rejection. Args: run_output: The paused ``RunOutput`` returned by ``chat_with_tools()``. Returns: A new ``RunOutput`` with the resumed execution results. """ sid = session_id or _DEFAULT_SESSION_ID agent = _get_agent() session_logger = get_session_logger() try: result = await agent.acontinue_run(run_response=run_output, stream=False, session_id=sid) # Record Timmy response after getting it response_text = result.content if hasattr(result, "content") and result.content else "" confidence = estimate_confidence(response_text) if response_text else None logger.debug("Response confidence: %.2f", confidence) response_text = _annotate_confidence(response_text, confidence) result.content = response_text session_logger.record_message("timmy", response_text, confidence=confidence) session_logger.flush() return result except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc: logger.error("Ollama disconnected: %s", exc) session_logger.record_error(str(exc), context="continue_chat") session_logger.flush() return _ErrorRunOutput( "Ollama appears to be disconnected. Check that ollama serve is running." ) except Exception as exc: logger.error("Session: agent.acontinue_run() failed: %s", exc) session_logger.record_error(str(exc), context="continue_chat") session_logger.flush() return _ErrorRunOutput(f"Error continuing run: {exc}") class _ErrorRunOutput: """Lightweight stand-in for RunOutput when the model is unreachable.""" def __init__(self, message: str): self.content = message self.status = "ERROR" self.requirements = [] self.tools = [] @property def active_requirements(self): return [] async def chat_raw(message: str, session_id: str | None = None) -> tuple[str, str]: """Send a message and return both cleaned and raw responses. Backward-compatible wrapper around :func:`chat_with_tools`. Returns: (cleaned_response, raw_response) — cleaned has tool-call JSON and chain-of-thought stripped; raw is the model's original output. """ run = await chat_with_tools(message, session_id) raw_response = run.content if hasattr(run, "content") and run.content else "" cleaned = _clean_response(raw_response) return cleaned, raw_response def reset_session(session_id: str | None = None) -> None: """Reset a session (clear conversation context). This clears the ConversationManager state. Agno's SQLite history is not cleared — that provides long-term continuity. """ sid = session_id or _DEFAULT_SESSION_ID try: from timmy.conversation import conversation_manager conversation_manager.clear_context(sid) except Exception as exc: logger.debug("Session: context clear failed for %s: %s", sid, exc) def _extract_facts(message: str) -> None: """Extract user facts from message and persist to memory system. Ported from TimmyWithMemory._extract_and_store_facts(). Runs as a best-effort post-processor — failures are logged, not raised. """ try: from timmy.conversation import conversation_manager name = conversation_manager.extract_user_name(message) if name: try: from timmy.memory_system import memory_system memory_system.update_user_fact("Name", name) logger.info("Session: Learned user name: %s", name) except Exception as exc: logger.debug("Session: fact persist failed: %s", exc) except Exception as exc: logger.debug("Session: Fact extraction skipped: %s", exc) def _prepend_focus_context(message: str) -> str: """Prepend deep-focus context to a message when focus mode is active.""" try: from timmy.focus import focus_manager ctx = focus_manager.get_focus_context() if ctx: return f"{ctx}\n\n{message}" except Exception as exc: logger.debug("Focus context injection skipped: %s", exc) return message def _clean_response(text: str) -> str: """Remove hallucinated tool calls and chain-of-thought narration. Small models sometimes output raw JSON tool calls or narrate their internal reasoning instead of just answering. This strips those artifacts from the response. """ if not text: return text # Convert literal \n escape sequences to actual newlines # (models sometimes output these in tool-result text) text = text.replace("\\n", "\n") # Strip JSON tool call blocks text = _TOOL_CALL_JSON.sub("", text) # Strip function-call-style text text = _FUNC_CALL_TEXT.sub("", text) # Strip chain-of-thought narration lines for pattern in _COT_PATTERNS: text = pattern.sub("", text) # Clean up leftover blank lines and whitespace lines = [line for line in text.split("\n") if line.strip()] text = "\n".join(lines) return text.strip()