forked from Rockachopa/Timmy-time-dashboard
This commit is contained in:
243
src/content/archive/indexer.py
Normal file
243
src/content/archive/indexer.py
Normal file
@@ -0,0 +1,243 @@
|
||||
"""Meilisearch indexing for the searchable episode archive.
|
||||
|
||||
Each published episode is indexed as a document with searchable fields:
|
||||
id : str — unique episode identifier (slug or UUID)
|
||||
title : str — episode title
|
||||
description : str — episode description / summary
|
||||
tags : list — content tags
|
||||
published_at: str — ISO-8601 timestamp
|
||||
youtube_url : str — YouTube watch URL (if uploaded)
|
||||
blossom_url : str — Blossom content-addressed URL (if uploaded)
|
||||
duration : float — episode duration in seconds
|
||||
clip_count : int — number of highlight clips
|
||||
highlight_ids: list — IDs of constituent highlights
|
||||
|
||||
Meilisearch is an optional dependency. If the ``meilisearch`` Python client
|
||||
is not installed, or the server is unreachable, :func:`index_episode` returns
|
||||
a failure result without crashing.
|
||||
|
||||
Usage
|
||||
-----
|
||||
from content.archive.indexer import index_episode, search_episodes
|
||||
|
||||
result = await index_episode(
|
||||
episode_id="ep-2026-03-23-001",
|
||||
title="Top Highlights — March 2026",
|
||||
description="...",
|
||||
tags=["highlights", "gaming"],
|
||||
published_at="2026-03-23T18:00:00Z",
|
||||
youtube_url="https://www.youtube.com/watch?v=abc123",
|
||||
)
|
||||
|
||||
hits = await search_episodes("highlights march")
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_INDEX_NAME = "episodes"
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexResult:
|
||||
"""Result of an indexing operation."""
|
||||
|
||||
success: bool
|
||||
document_id: str | None = None
|
||||
error: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class EpisodeDocument:
|
||||
"""A single episode document for the Meilisearch index."""
|
||||
|
||||
id: str
|
||||
title: str
|
||||
description: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
published_at: str = ""
|
||||
youtube_url: str = ""
|
||||
blossom_url: str = ""
|
||||
duration: float = 0.0
|
||||
clip_count: int = 0
|
||||
highlight_ids: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"id": self.id,
|
||||
"title": self.title,
|
||||
"description": self.description,
|
||||
"tags": self.tags,
|
||||
"published_at": self.published_at,
|
||||
"youtube_url": self.youtube_url,
|
||||
"blossom_url": self.blossom_url,
|
||||
"duration": self.duration,
|
||||
"clip_count": self.clip_count,
|
||||
"highlight_ids": self.highlight_ids,
|
||||
}
|
||||
|
||||
|
||||
def _meilisearch_available() -> bool:
|
||||
"""Return True if the meilisearch Python client is importable."""
|
||||
try:
|
||||
import importlib.util
|
||||
|
||||
return importlib.util.find_spec("meilisearch") is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _get_client():
|
||||
"""Return a Meilisearch client configured from settings."""
|
||||
import meilisearch # type: ignore[import]
|
||||
|
||||
url = settings.content_meilisearch_url
|
||||
key = settings.content_meilisearch_api_key
|
||||
return meilisearch.Client(url, key or None)
|
||||
|
||||
|
||||
def _ensure_index_sync(client) -> None:
|
||||
"""Create the episodes index with appropriate searchable attributes."""
|
||||
try:
|
||||
client.create_index(_INDEX_NAME, {"primaryKey": "id"})
|
||||
except Exception:
|
||||
pass # Index already exists
|
||||
idx = client.index(_INDEX_NAME)
|
||||
try:
|
||||
idx.update_searchable_attributes(
|
||||
["title", "description", "tags", "highlight_ids"]
|
||||
)
|
||||
idx.update_filterable_attributes(["tags", "published_at"])
|
||||
idx.update_sortable_attributes(["published_at", "duration"])
|
||||
except Exception as exc:
|
||||
logger.warning("Could not configure Meilisearch index attributes: %s", exc)
|
||||
|
||||
|
||||
def _index_document_sync(doc: EpisodeDocument) -> IndexResult:
|
||||
"""Synchronous Meilisearch document indexing."""
|
||||
try:
|
||||
client = _get_client()
|
||||
_ensure_index_sync(client)
|
||||
idx = client.index(_INDEX_NAME)
|
||||
idx.add_documents([doc.to_dict()])
|
||||
return IndexResult(success=True, document_id=doc.id)
|
||||
except Exception as exc:
|
||||
logger.warning("Meilisearch indexing failed: %s", exc)
|
||||
return IndexResult(success=False, error=str(exc))
|
||||
|
||||
|
||||
def _search_sync(query: str, limit: int) -> list[dict[str, Any]]:
|
||||
"""Synchronous Meilisearch search."""
|
||||
client = _get_client()
|
||||
idx = client.index(_INDEX_NAME)
|
||||
result = idx.search(query, {"limit": limit})
|
||||
return result.get("hits", [])
|
||||
|
||||
|
||||
async def index_episode(
|
||||
episode_id: str,
|
||||
title: str,
|
||||
description: str = "",
|
||||
tags: list[str] | None = None,
|
||||
published_at: str = "",
|
||||
youtube_url: str = "",
|
||||
blossom_url: str = "",
|
||||
duration: float = 0.0,
|
||||
clip_count: int = 0,
|
||||
highlight_ids: list[str] | None = None,
|
||||
) -> IndexResult:
|
||||
"""Index a published episode in Meilisearch.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
episode_id:
|
||||
Unique episode identifier.
|
||||
title:
|
||||
Episode title.
|
||||
description:
|
||||
Summary or full description.
|
||||
tags:
|
||||
Content tags for filtering.
|
||||
published_at:
|
||||
ISO-8601 publication timestamp.
|
||||
youtube_url:
|
||||
YouTube watch URL.
|
||||
blossom_url:
|
||||
Blossom content-addressed storage URL.
|
||||
duration:
|
||||
Episode duration in seconds.
|
||||
clip_count:
|
||||
Number of highlight clips.
|
||||
highlight_ids:
|
||||
IDs of the constituent highlight clips.
|
||||
|
||||
Returns
|
||||
-------
|
||||
IndexResult
|
||||
Always returns a result; never raises.
|
||||
"""
|
||||
if not episode_id.strip():
|
||||
return IndexResult(success=False, error="episode_id must not be empty")
|
||||
|
||||
if not _meilisearch_available():
|
||||
logger.warning("meilisearch client not installed — episode indexing disabled")
|
||||
return IndexResult(
|
||||
success=False,
|
||||
error="meilisearch not available — pip install meilisearch",
|
||||
)
|
||||
|
||||
doc = EpisodeDocument(
|
||||
id=episode_id,
|
||||
title=title,
|
||||
description=description,
|
||||
tags=tags or [],
|
||||
published_at=published_at,
|
||||
youtube_url=youtube_url,
|
||||
blossom_url=blossom_url,
|
||||
duration=duration,
|
||||
clip_count=clip_count,
|
||||
highlight_ids=highlight_ids or [],
|
||||
)
|
||||
|
||||
try:
|
||||
return await asyncio.to_thread(_index_document_sync, doc)
|
||||
except Exception as exc:
|
||||
logger.warning("Episode indexing error: %s", exc)
|
||||
return IndexResult(success=False, error=str(exc))
|
||||
|
||||
|
||||
async def search_episodes(
|
||||
query: str,
|
||||
limit: int = 20,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Search the episode archive.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
query:
|
||||
Full-text search query.
|
||||
limit:
|
||||
Maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[dict]
|
||||
Matching episode documents. Returns empty list on error.
|
||||
"""
|
||||
if not _meilisearch_available():
|
||||
logger.warning("meilisearch client not installed — episode search disabled")
|
||||
return []
|
||||
|
||||
try:
|
||||
return await asyncio.to_thread(_search_sync, query, limit)
|
||||
except Exception as exc:
|
||||
logger.warning("Episode search error: %s", exc)
|
||||
return []
|
||||
Reference in New Issue
Block a user