src/timmy/session.py

"""Persistent chat session for Timmy.

Holds a singleton Agno Agent and a stable session_id so conversation
history persists across HTTP requests via Agno's SQLite storage.

This is the primary entry point for dashboard chat — instead of
creating a new agent per request, we reuse a single instance and
let Agno's session_id mechanism handle conversation continuity.
"""

import logging
import re

import httpx

from timmy.cognitive_state import cognitive_tracker
from timmy.confidence import estimate_confidence
from timmy.session_logger import get_session_logger

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Confidence annotation (SOUL.md: visible uncertainty)
# ---------------------------------------------------------------------------

_CONFIDENCE_THRESHOLD = 0.7


def _annotate_confidence(text: str, confidence: float | None) -> str:
    """Append a confidence tag when below threshold.

    SOUL.md: "When I am uncertain, I must say so in proportion to my uncertainty."
    """
    if confidence is not None and confidence < _CONFIDENCE_THRESHOLD:
        return text + f"\n\n[confidence: {confidence:.0%}]"
    return text


# Default session ID for the dashboard (stable across requests)
_DEFAULT_SESSION_ID = "dashboard"

# Module-level singleton agent (lazy-initialized, reused for all requests)
_agent = None

# ---------------------------------------------------------------------------
# Response sanitization patterns
# ---------------------------------------------------------------------------

# Matches raw JSON tool calls: {"name": "python", "parameters": {...}}
_TOOL_CALL_JSON = re.compile(
    r'\{\s*"name"\s*:\s*"[^"]+?"\s*,\s*"(?:parameters|arguments)"\s*:\s*\{.*?\}\s*\}',
    re.DOTALL,
)

# Matches function-call-style text: memory_search(query="...") etc.
_FUNC_CALL_TEXT = re.compile(
    r"\b(?:memory_search|shell|python|read_file|write_file|list_files|calculator)"
    r"\s*\([^)]*\)",
)

# Matches chain-of-thought narration lines the model should keep internal
_COT_PATTERNS = [
    re.compile(
        r"^(?:Since |Using |Let me |I'll use |I will use |Here's a possible ).*$", re.MULTILINE
    ),
    re.compile(r"^(?:I found a relevant |This context suggests ).*$", re.MULTILINE),
]


def _get_agent():
    """Lazy-initialize the singleton agent."""
    global _agent
    if _agent is None:
        from timmy.agent import create_timmy

        try:
            _agent = create_timmy(session_id=_DEFAULT_SESSION_ID)
            logger.info("Session: Timmy agent initialized (singleton)")
        except Exception as exc:
            logger.error("Session: Failed to create Timmy agent: %s", exc)
            raise
    return _agent


async def chat(message: str, session_id: str | None = None) -> str:
    """Send a message to Timmy and get a response.

    Uses a persistent agent and session_id so Agno's SQLite history
    provides multi-turn conversation context.  Uses ``arun()`` so MCP
    tool servers are auto-connected.

    Args:
        message:    The user's message.
        session_id: Optional session identifier (defaults to "dashboard").

    Returns:
        The agent's response text.
    """
    sid = session_id or _DEFAULT_SESSION_ID
    agent = _get_agent()
    session_logger = get_session_logger()

    # Record user message before sending to agent
    session_logger.record_message("user", message)

    # Pre-processing: extract user facts
    _extract_facts(message)

    # Inject deep-focus context when active
    message = _prepend_focus_context(message)

    # Run with session_id so Agno retrieves history from SQLite
    try:
        run = await agent.arun(message, stream=False, session_id=sid)
        response_text = run.content if hasattr(run, "content") else str(run)
    except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc:
        logger.error("Ollama disconnected: %s", exc)
        session_logger.record_error(str(exc), context="chat")
        session_logger.flush()
        return "Ollama appears to be disconnected. Check that ollama serve is running."
    except Exception as exc:
        logger.error("Session: agent.arun() failed: %s", exc)
        session_logger.record_error(str(exc), context="chat")
        session_logger.flush()
        return (
            "I'm having trouble reaching my inference backend right now. Please try again shortly."
        )

    # Post-processing: clean up any leaked tool calls or chain-of-thought
    response_text = _clean_response(response_text)

    # Estimate confidence of the response
    confidence = estimate_confidence(response_text)
    logger.debug("Response confidence: %.2f", confidence)

    response_text = _annotate_confidence(response_text, confidence)

    # Record Timmy response after getting it
    session_logger.record_message("timmy", response_text, confidence=confidence)

    # Update cognitive state (observable signal for Matrix avatar)
    cognitive_tracker.update(message, response_text)

    # Flush session logs to disk
    session_logger.flush()

    return response_text


async def chat_with_tools(message: str, session_id: str | None = None):
    """Send a message and return the full Agno RunOutput.

    Callers should check ``run_output.status``:
    - ``RunStatus.paused``    — tools need confirmation (see ``run_output.requirements``)
    - ``RunStatus.completed`` — response ready in ``run_output.content``

    Uses ``arun()`` so MCP tool servers are auto-connected.

    Returns:
        An Agno ``RunOutput`` object (or a lightweight surrogate on error).
    """
    sid = session_id or _DEFAULT_SESSION_ID
    agent = _get_agent()
    session_logger = get_session_logger()

    # Record user message before sending to agent
    session_logger.record_message("user", message)

    _extract_facts(message)

    # Inject deep-focus context when active
    message = _prepend_focus_context(message)

    try:
        run_output = await agent.arun(message, stream=False, session_id=sid)
        # Record Timmy response after getting it
        response_text = (
            run_output.content if hasattr(run_output, "content") and run_output.content else ""
        )
        confidence = estimate_confidence(response_text) if response_text else None
        logger.debug("Response confidence: %.2f", confidence)

        response_text = _annotate_confidence(response_text, confidence)
        run_output.content = response_text

        session_logger.record_message("timmy", response_text, confidence=confidence)
        session_logger.flush()
        return run_output
    except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc:
        logger.error("Ollama disconnected: %s", exc)
        session_logger.record_error(str(exc), context="chat_with_tools")
        session_logger.flush()
        return _ErrorRunOutput(
            "Ollama appears to be disconnected. Check that ollama serve is running."
        )
    except Exception as exc:
        logger.error("Session: agent.arun() failed: %s", exc)
        session_logger.record_error(str(exc), context="chat_with_tools")
        session_logger.flush()
        # Return a duck-typed object that callers can handle uniformly
        return _ErrorRunOutput(
            "I'm having trouble reaching my inference backend right now. Please try again shortly."
        )


async def continue_chat(run_output, session_id: str | None = None):
    """Resume a paused run after tool confirmation / rejection.

    Args:
        run_output: The paused ``RunOutput`` returned by ``chat_with_tools()``.

    Returns:
        A new ``RunOutput`` with the resumed execution results.
    """
    sid = session_id or _DEFAULT_SESSION_ID
    agent = _get_agent()
    session_logger = get_session_logger()

    try:
        result = await agent.acontinue_run(run_response=run_output, stream=False, session_id=sid)
        # Record Timmy response after getting it
        response_text = result.content if hasattr(result, "content") and result.content else ""
        confidence = estimate_confidence(response_text) if response_text else None
        logger.debug("Response confidence: %.2f", confidence)

        response_text = _annotate_confidence(response_text, confidence)
        result.content = response_text

        session_logger.record_message("timmy", response_text, confidence=confidence)
        session_logger.flush()
        return result
    except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc:
        logger.error("Ollama disconnected: %s", exc)
        session_logger.record_error(str(exc), context="continue_chat")
        session_logger.flush()
        return _ErrorRunOutput(
            "Ollama appears to be disconnected. Check that ollama serve is running."
        )
    except Exception as exc:
        logger.error("Session: agent.acontinue_run() failed: %s", exc)
        session_logger.record_error(str(exc), context="continue_chat")
        session_logger.flush()
        return _ErrorRunOutput(f"Error continuing run: {exc}")


class _ErrorRunOutput:
    """Lightweight stand-in for RunOutput when the model is unreachable."""

    def __init__(self, message: str):
        self.content = message
        self.status = "ERROR"
        self.requirements = []
        self.tools = []

    @property
    def active_requirements(self):
        return []


async def chat_raw(message: str, session_id: str | None = None) -> tuple[str, str]:
    """Send a message and return both cleaned and raw responses.

    Backward-compatible wrapper around :func:`chat_with_tools`.

    Returns:
        (cleaned_response, raw_response) — cleaned has tool-call JSON and
        chain-of-thought stripped; raw is the model's original output.
    """
    run = await chat_with_tools(message, session_id)
    raw_response = run.content if hasattr(run, "content") and run.content else ""
    cleaned = _clean_response(raw_response)
    return cleaned, raw_response


def reset_session(session_id: str | None = None) -> None:
    """Reset a session (clear conversation context).

    This clears the ConversationManager state. Agno's SQLite history
    is not cleared — that provides long-term continuity.
    """
    sid = session_id or _DEFAULT_SESSION_ID
    try:
        from timmy.conversation import conversation_manager

        conversation_manager.clear_context(sid)
    except Exception as exc:
        logger.debug("Session: context clear failed for %s: %s", sid, exc)


def _extract_facts(message: str) -> None:
    """Extract user facts from message and persist to memory system.

    Ported from TimmyWithMemory._extract_and_store_facts().
    Runs as a best-effort post-processor — failures are logged, not raised.
    """
    try:
        from timmy.conversation import conversation_manager

        name = conversation_manager.extract_user_name(message)
        if name:
            try:
                from timmy.memory_system import memory_system

                memory_system.update_user_fact("Name", name)
                logger.info("Session: Learned user name: %s", name)
            except Exception as exc:
                logger.debug("Session: fact persist failed: %s", exc)
    except Exception as exc:
        logger.debug("Session: Fact extraction skipped: %s", exc)


def _prepend_focus_context(message: str) -> str:
    """Prepend deep-focus context to a message when focus mode is active."""
    try:
        from timmy.focus import focus_manager

        ctx = focus_manager.get_focus_context()
        if ctx:
            return f"{ctx}\n\n{message}"
    except Exception as exc:
        logger.debug("Focus context injection skipped: %s", exc)
    return message


def _clean_response(text: str) -> str:
    """Remove hallucinated tool calls and chain-of-thought narration.

    Small models sometimes output raw JSON tool calls or narrate their
    internal reasoning instead of just answering. This strips those
    artifacts from the response.
    """
    if not text:
        return text

    # Convert literal \n escape sequences to actual newlines
    # (models sometimes output these in tool-result text)
    text = text.replace("\\n", "\n")

    # Strip JSON tool call blocks
    text = _TOOL_CALL_JSON.sub("", text)

    # Strip function-call-style text
    text = _FUNC_CALL_TEXT.sub("", text)

    # Strip chain-of-thought narration lines
    for pattern in _COT_PATTERNS:
        text = pattern.sub("", text)

    # Clean up leftover blank lines and whitespace
    lines = [line for line in text.split("\n") if line.strip()]
    text = "\n".join(lines)

    return text.strip()