forked from Rockachopa/Timmy-time-dashboard
244 lines
7.0 KiB
Python
244 lines
7.0 KiB
Python
"""Meilisearch indexing for the searchable episode archive.
|
|
|
|
Each published episode is indexed as a document with searchable fields:
|
|
id : str — unique episode identifier (slug or UUID)
|
|
title : str — episode title
|
|
description : str — episode description / summary
|
|
tags : list — content tags
|
|
published_at: str — ISO-8601 timestamp
|
|
youtube_url : str — YouTube watch URL (if uploaded)
|
|
blossom_url : str — Blossom content-addressed URL (if uploaded)
|
|
duration : float — episode duration in seconds
|
|
clip_count : int — number of highlight clips
|
|
highlight_ids: list — IDs of constituent highlights
|
|
|
|
Meilisearch is an optional dependency. If the ``meilisearch`` Python client
|
|
is not installed, or the server is unreachable, :func:`index_episode` returns
|
|
a failure result without crashing.
|
|
|
|
Usage
|
|
-----
|
|
from content.archive.indexer import index_episode, search_episodes
|
|
|
|
result = await index_episode(
|
|
episode_id="ep-2026-03-23-001",
|
|
title="Top Highlights — March 2026",
|
|
description="...",
|
|
tags=["highlights", "gaming"],
|
|
published_at="2026-03-23T18:00:00Z",
|
|
youtube_url="https://www.youtube.com/watch?v=abc123",
|
|
)
|
|
|
|
hits = await search_episodes("highlights march")
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from typing import Any
|
|
|
|
from config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_INDEX_NAME = "episodes"
|
|
|
|
|
|
@dataclass
|
|
class IndexResult:
|
|
"""Result of an indexing operation."""
|
|
|
|
success: bool
|
|
document_id: str | None = None
|
|
error: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class EpisodeDocument:
|
|
"""A single episode document for the Meilisearch index."""
|
|
|
|
id: str
|
|
title: str
|
|
description: str = ""
|
|
tags: list[str] = field(default_factory=list)
|
|
published_at: str = ""
|
|
youtube_url: str = ""
|
|
blossom_url: str = ""
|
|
duration: float = 0.0
|
|
clip_count: int = 0
|
|
highlight_ids: list[str] = field(default_factory=list)
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"id": self.id,
|
|
"title": self.title,
|
|
"description": self.description,
|
|
"tags": self.tags,
|
|
"published_at": self.published_at,
|
|
"youtube_url": self.youtube_url,
|
|
"blossom_url": self.blossom_url,
|
|
"duration": self.duration,
|
|
"clip_count": self.clip_count,
|
|
"highlight_ids": self.highlight_ids,
|
|
}
|
|
|
|
|
|
def _meilisearch_available() -> bool:
|
|
"""Return True if the meilisearch Python client is importable."""
|
|
try:
|
|
import importlib.util
|
|
|
|
return importlib.util.find_spec("meilisearch") is not None
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def _get_client():
|
|
"""Return a Meilisearch client configured from settings."""
|
|
import meilisearch # type: ignore[import]
|
|
|
|
url = settings.content_meilisearch_url
|
|
key = settings.content_meilisearch_api_key
|
|
return meilisearch.Client(url, key or None)
|
|
|
|
|
|
def _ensure_index_sync(client) -> None:
|
|
"""Create the episodes index with appropriate searchable attributes."""
|
|
try:
|
|
client.create_index(_INDEX_NAME, {"primaryKey": "id"})
|
|
except Exception:
|
|
pass # Index already exists
|
|
idx = client.index(_INDEX_NAME)
|
|
try:
|
|
idx.update_searchable_attributes(
|
|
["title", "description", "tags", "highlight_ids"]
|
|
)
|
|
idx.update_filterable_attributes(["tags", "published_at"])
|
|
idx.update_sortable_attributes(["published_at", "duration"])
|
|
except Exception as exc:
|
|
logger.warning("Could not configure Meilisearch index attributes: %s", exc)
|
|
|
|
|
|
def _index_document_sync(doc: EpisodeDocument) -> IndexResult:
|
|
"""Synchronous Meilisearch document indexing."""
|
|
try:
|
|
client = _get_client()
|
|
_ensure_index_sync(client)
|
|
idx = client.index(_INDEX_NAME)
|
|
idx.add_documents([doc.to_dict()])
|
|
return IndexResult(success=True, document_id=doc.id)
|
|
except Exception as exc:
|
|
logger.warning("Meilisearch indexing failed: %s", exc)
|
|
return IndexResult(success=False, error=str(exc))
|
|
|
|
|
|
def _search_sync(query: str, limit: int) -> list[dict[str, Any]]:
|
|
"""Synchronous Meilisearch search."""
|
|
client = _get_client()
|
|
idx = client.index(_INDEX_NAME)
|
|
result = idx.search(query, {"limit": limit})
|
|
return result.get("hits", [])
|
|
|
|
|
|
async def index_episode(
|
|
episode_id: str,
|
|
title: str,
|
|
description: str = "",
|
|
tags: list[str] | None = None,
|
|
published_at: str = "",
|
|
youtube_url: str = "",
|
|
blossom_url: str = "",
|
|
duration: float = 0.0,
|
|
clip_count: int = 0,
|
|
highlight_ids: list[str] | None = None,
|
|
) -> IndexResult:
|
|
"""Index a published episode in Meilisearch.
|
|
|
|
Parameters
|
|
----------
|
|
episode_id:
|
|
Unique episode identifier.
|
|
title:
|
|
Episode title.
|
|
description:
|
|
Summary or full description.
|
|
tags:
|
|
Content tags for filtering.
|
|
published_at:
|
|
ISO-8601 publication timestamp.
|
|
youtube_url:
|
|
YouTube watch URL.
|
|
blossom_url:
|
|
Blossom content-addressed storage URL.
|
|
duration:
|
|
Episode duration in seconds.
|
|
clip_count:
|
|
Number of highlight clips.
|
|
highlight_ids:
|
|
IDs of the constituent highlight clips.
|
|
|
|
Returns
|
|
-------
|
|
IndexResult
|
|
Always returns a result; never raises.
|
|
"""
|
|
if not episode_id.strip():
|
|
return IndexResult(success=False, error="episode_id must not be empty")
|
|
|
|
if not _meilisearch_available():
|
|
logger.warning("meilisearch client not installed — episode indexing disabled")
|
|
return IndexResult(
|
|
success=False,
|
|
error="meilisearch not available — pip install meilisearch",
|
|
)
|
|
|
|
doc = EpisodeDocument(
|
|
id=episode_id,
|
|
title=title,
|
|
description=description,
|
|
tags=tags or [],
|
|
published_at=published_at,
|
|
youtube_url=youtube_url,
|
|
blossom_url=blossom_url,
|
|
duration=duration,
|
|
clip_count=clip_count,
|
|
highlight_ids=highlight_ids or [],
|
|
)
|
|
|
|
try:
|
|
return await asyncio.to_thread(_index_document_sync, doc)
|
|
except Exception as exc:
|
|
logger.warning("Episode indexing error: %s", exc)
|
|
return IndexResult(success=False, error=str(exc))
|
|
|
|
|
|
async def search_episodes(
|
|
query: str,
|
|
limit: int = 20,
|
|
) -> list[dict[str, Any]]:
|
|
"""Search the episode archive.
|
|
|
|
Parameters
|
|
----------
|
|
query:
|
|
Full-text search query.
|
|
limit:
|
|
Maximum number of results to return.
|
|
|
|
Returns
|
|
-------
|
|
list[dict]
|
|
Matching episode documents. Returns empty list on error.
|
|
"""
|
|
if not _meilisearch_available():
|
|
logger.warning("meilisearch client not installed — episode search disabled")
|
|
return []
|
|
|
|
try:
|
|
return await asyncio.to_thread(_search_sync, query, limit)
|
|
except Exception as exc:
|
|
logger.warning("Episode search error: %s", exc)
|
|
return []
|