Timmy-time-dashboard/src/content/archive/indexer.py

"""Meilisearch indexing for the searchable episode archive.

Each published episode is indexed as a document with searchable fields:
    id          : str  — unique episode identifier (slug or UUID)
    title       : str  — episode title
    description : str  — episode description / summary
    tags        : list — content tags
    published_at: str  — ISO-8601 timestamp
    youtube_url : str  — YouTube watch URL (if uploaded)
    blossom_url : str  — Blossom content-addressed URL (if uploaded)
    duration    : float — episode duration in seconds
    clip_count  : int  — number of highlight clips
    highlight_ids: list — IDs of constituent highlights

Meilisearch is an optional dependency.  If the ``meilisearch`` Python client
is not installed, or the server is unreachable, :func:`index_episode` returns
a failure result without crashing.

Usage
-----
    from content.archive.indexer import index_episode, search_episodes

    result = await index_episode(
        episode_id="ep-2026-03-23-001",
        title="Top Highlights — March 2026",
        description="...",
        tags=["highlights", "gaming"],
        published_at="2026-03-23T18:00:00Z",
        youtube_url="https://www.youtube.com/watch?v=abc123",
    )

    hits = await search_episodes("highlights march")
"""

from __future__ import annotations

import asyncio
import logging
from dataclasses import dataclass, field
from typing import Any

from config import settings

logger = logging.getLogger(__name__)

_INDEX_NAME = "episodes"


@dataclass
class IndexResult:
    """Result of an indexing operation."""

    success: bool
    document_id: str | None = None
    error: str | None = None


@dataclass
class EpisodeDocument:
    """A single episode document for the Meilisearch index."""

    id: str
    title: str
    description: str = ""
    tags: list[str] = field(default_factory=list)
    published_at: str = ""
    youtube_url: str = ""
    blossom_url: str = ""
    duration: float = 0.0
    clip_count: int = 0
    highlight_ids: list[str] = field(default_factory=list)

    def to_dict(self) -> dict[str, Any]:
        return {
            "id": self.id,
            "title": self.title,
            "description": self.description,
            "tags": self.tags,
            "published_at": self.published_at,
            "youtube_url": self.youtube_url,
            "blossom_url": self.blossom_url,
            "duration": self.duration,
            "clip_count": self.clip_count,
            "highlight_ids": self.highlight_ids,
        }


def _meilisearch_available() -> bool:
    """Return True if the meilisearch Python client is importable."""
    try:
        import importlib.util

        return importlib.util.find_spec("meilisearch") is not None
    except Exception:
        return False


def _get_client():
    """Return a Meilisearch client configured from settings."""
    import meilisearch  # type: ignore[import]

    url = settings.content_meilisearch_url
    key = settings.content_meilisearch_api_key
    return meilisearch.Client(url, key or None)


def _ensure_index_sync(client) -> None:
    """Create the episodes index with appropriate searchable attributes."""
    try:
        client.create_index(_INDEX_NAME, {"primaryKey": "id"})
    except Exception:
        pass  # Index already exists
    idx = client.index(_INDEX_NAME)
    try:
        idx.update_searchable_attributes(
            ["title", "description", "tags", "highlight_ids"]
        )
        idx.update_filterable_attributes(["tags", "published_at"])
        idx.update_sortable_attributes(["published_at", "duration"])
    except Exception as exc:
        logger.warning("Could not configure Meilisearch index attributes: %s", exc)


def _index_document_sync(doc: EpisodeDocument) -> IndexResult:
    """Synchronous Meilisearch document indexing."""
    try:
        client = _get_client()
        _ensure_index_sync(client)
        idx = client.index(_INDEX_NAME)
        idx.add_documents([doc.to_dict()])
        return IndexResult(success=True, document_id=doc.id)
    except Exception as exc:
        logger.warning("Meilisearch indexing failed: %s", exc)
        return IndexResult(success=False, error=str(exc))


def _search_sync(query: str, limit: int) -> list[dict[str, Any]]:
    """Synchronous Meilisearch search."""
    client = _get_client()
    idx = client.index(_INDEX_NAME)
    result = idx.search(query, {"limit": limit})
    return result.get("hits", [])


async def index_episode(
    episode_id: str,
    title: str,
    description: str = "",
    tags: list[str] | None = None,
    published_at: str = "",
    youtube_url: str = "",
    blossom_url: str = "",
    duration: float = 0.0,
    clip_count: int = 0,
    highlight_ids: list[str] | None = None,
) -> IndexResult:
    """Index a published episode in Meilisearch.

    Parameters
    ----------
    episode_id:
        Unique episode identifier.
    title:
        Episode title.
    description:
        Summary or full description.
    tags:
        Content tags for filtering.
    published_at:
        ISO-8601 publication timestamp.
    youtube_url:
        YouTube watch URL.
    blossom_url:
        Blossom content-addressed storage URL.
    duration:
        Episode duration in seconds.
    clip_count:
        Number of highlight clips.
    highlight_ids:
        IDs of the constituent highlight clips.

    Returns
    -------
    IndexResult
        Always returns a result; never raises.
    """
    if not episode_id.strip():
        return IndexResult(success=False, error="episode_id must not be empty")

    if not _meilisearch_available():
        logger.warning("meilisearch client not installed — episode indexing disabled")
        return IndexResult(
            success=False,
            error="meilisearch not available — pip install meilisearch",
        )

    doc = EpisodeDocument(
        id=episode_id,
        title=title,
        description=description,
        tags=tags or [],
        published_at=published_at,
        youtube_url=youtube_url,
        blossom_url=blossom_url,
        duration=duration,
        clip_count=clip_count,
        highlight_ids=highlight_ids or [],
    )

    try:
        return await asyncio.to_thread(_index_document_sync, doc)
    except Exception as exc:
        logger.warning("Episode indexing error: %s", exc)
        return IndexResult(success=False, error=str(exc))


async def search_episodes(
    query: str,
    limit: int = 20,
) -> list[dict[str, Any]]:
    """Search the episode archive.

    Parameters
    ----------
    query:
        Full-text search query.
    limit:
        Maximum number of results to return.

    Returns
    -------
    list[dict]
        Matching episode documents.  Returns empty list on error.
    """
    if not _meilisearch_available():
        logger.warning("meilisearch client not installed — episode search disabled")
        return []

    try:
        return await asyncio.to_thread(_search_sync, query, limit)
    except Exception as exc:
        logger.warning("Episode search error: %s", exc)
        return []