This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/src/content/archive/indexer.py

244 lines
7.0 KiB
Python

"""Meilisearch indexing for the searchable episode archive.
Each published episode is indexed as a document with searchable fields:
id : str — unique episode identifier (slug or UUID)
title : str — episode title
description : str — episode description / summary
tags : list — content tags
published_at: str — ISO-8601 timestamp
youtube_url : str — YouTube watch URL (if uploaded)
blossom_url : str — Blossom content-addressed URL (if uploaded)
duration : float — episode duration in seconds
clip_count : int — number of highlight clips
highlight_ids: list — IDs of constituent highlights
Meilisearch is an optional dependency. If the ``meilisearch`` Python client
is not installed, or the server is unreachable, :func:`index_episode` returns
a failure result without crashing.
Usage
-----
from content.archive.indexer import index_episode, search_episodes
result = await index_episode(
episode_id="ep-2026-03-23-001",
title="Top Highlights — March 2026",
description="...",
tags=["highlights", "gaming"],
published_at="2026-03-23T18:00:00Z",
youtube_url="https://www.youtube.com/watch?v=abc123",
)
hits = await search_episodes("highlights march")
"""
from __future__ import annotations
import asyncio
import logging
from dataclasses import dataclass, field
from typing import Any
from config import settings
logger = logging.getLogger(__name__)
_INDEX_NAME = "episodes"
@dataclass
class IndexResult:
"""Result of an indexing operation."""
success: bool
document_id: str | None = None
error: str | None = None
@dataclass
class EpisodeDocument:
"""A single episode document for the Meilisearch index."""
id: str
title: str
description: str = ""
tags: list[str] = field(default_factory=list)
published_at: str = ""
youtube_url: str = ""
blossom_url: str = ""
duration: float = 0.0
clip_count: int = 0
highlight_ids: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
return {
"id": self.id,
"title": self.title,
"description": self.description,
"tags": self.tags,
"published_at": self.published_at,
"youtube_url": self.youtube_url,
"blossom_url": self.blossom_url,
"duration": self.duration,
"clip_count": self.clip_count,
"highlight_ids": self.highlight_ids,
}
def _meilisearch_available() -> bool:
"""Return True if the meilisearch Python client is importable."""
try:
import importlib.util
return importlib.util.find_spec("meilisearch") is not None
except Exception:
return False
def _get_client():
"""Return a Meilisearch client configured from settings."""
import meilisearch # type: ignore[import]
url = settings.content_meilisearch_url
key = settings.content_meilisearch_api_key
return meilisearch.Client(url, key or None)
def _ensure_index_sync(client) -> None:
"""Create the episodes index with appropriate searchable attributes."""
try:
client.create_index(_INDEX_NAME, {"primaryKey": "id"})
except Exception:
pass # Index already exists
idx = client.index(_INDEX_NAME)
try:
idx.update_searchable_attributes(
["title", "description", "tags", "highlight_ids"]
)
idx.update_filterable_attributes(["tags", "published_at"])
idx.update_sortable_attributes(["published_at", "duration"])
except Exception as exc:
logger.warning("Could not configure Meilisearch index attributes: %s", exc)
def _index_document_sync(doc: EpisodeDocument) -> IndexResult:
"""Synchronous Meilisearch document indexing."""
try:
client = _get_client()
_ensure_index_sync(client)
idx = client.index(_INDEX_NAME)
idx.add_documents([doc.to_dict()])
return IndexResult(success=True, document_id=doc.id)
except Exception as exc:
logger.warning("Meilisearch indexing failed: %s", exc)
return IndexResult(success=False, error=str(exc))
def _search_sync(query: str, limit: int) -> list[dict[str, Any]]:
"""Synchronous Meilisearch search."""
client = _get_client()
idx = client.index(_INDEX_NAME)
result = idx.search(query, {"limit": limit})
return result.get("hits", [])
async def index_episode(
episode_id: str,
title: str,
description: str = "",
tags: list[str] | None = None,
published_at: str = "",
youtube_url: str = "",
blossom_url: str = "",
duration: float = 0.0,
clip_count: int = 0,
highlight_ids: list[str] | None = None,
) -> IndexResult:
"""Index a published episode in Meilisearch.
Parameters
----------
episode_id:
Unique episode identifier.
title:
Episode title.
description:
Summary or full description.
tags:
Content tags for filtering.
published_at:
ISO-8601 publication timestamp.
youtube_url:
YouTube watch URL.
blossom_url:
Blossom content-addressed storage URL.
duration:
Episode duration in seconds.
clip_count:
Number of highlight clips.
highlight_ids:
IDs of the constituent highlight clips.
Returns
-------
IndexResult
Always returns a result; never raises.
"""
if not episode_id.strip():
return IndexResult(success=False, error="episode_id must not be empty")
if not _meilisearch_available():
logger.warning("meilisearch client not installed — episode indexing disabled")
return IndexResult(
success=False,
error="meilisearch not available — pip install meilisearch",
)
doc = EpisodeDocument(
id=episode_id,
title=title,
description=description,
tags=tags or [],
published_at=published_at,
youtube_url=youtube_url,
blossom_url=blossom_url,
duration=duration,
clip_count=clip_count,
highlight_ids=highlight_ids or [],
)
try:
return await asyncio.to_thread(_index_document_sync, doc)
except Exception as exc:
logger.warning("Episode indexing error: %s", exc)
return IndexResult(success=False, error=str(exc))
async def search_episodes(
query: str,
limit: int = 20,
) -> list[dict[str, Any]]:
"""Search the episode archive.
Parameters
----------
query:
Full-text search query.
limit:
Maximum number of results to return.
Returns
-------
list[dict]
Matching episode documents. Returns empty list on error.
"""
if not _meilisearch_available():
logger.warning("meilisearch client not installed — episode search disabled")
return []
try:
return await asyncio.to_thread(_search_sync, query, limit)
except Exception as exc:
logger.warning("Episode search error: %s", exc)
return []