#!/usr/bin/env python3 """ Relevance Engine for Deep Dive Filters and ranks content by Hermes/Timmy relevance """ import chromadb from chromadb.utils import embedding_functions from typing import List, Dict, Any import json from dataclasses import asdict # Hermes codebase snippets for similarity comparison HERMES_CONTEXT = [ "Hermes agent system with tool calling and conversation loop", "LLM inference with tool orchestration", "Retrieval augmented generation RAG architecture", "Multi-agent orchestration and delegation", "Reinforcement learning RL for agent training", "Model quantization and efficient inference", "Vector database Chroma for embeddings", "MCP Model Context Protocol integration", "Gateway pattern for messaging platforms", "Agent trajectory logging and replay", ] class RelevanceEngine: def __init__(self, collection_name: str = "deep_dive"): self.client = chromadb.PersistentClient(path="./chroma_db") self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction( model_name="all-MiniLM-L6-v2" ) # Get or create collection try: self.collection = self.client.get_collection( name=collection_name, embedding_function=self.embedding_fn ) except: self.collection = self.client.create_collection( name=collection_name, embedding_function=self.embedding_fn ) self._seed_context() def _seed_context(self): """Seed the collection with Hermes context.""" self.collection.add( documents=HERMES_CONTEXT, ids=[f"ctx_{i}" for i in range(len(HERMES_CONTEXT))], metadatas=[{"type": "context"} for _ in HERMES_CONTEXT] ) def rank_items(self, items: List[Any], text_fn, top_k: int = 10) -> List[tuple]: """Rank items by similarity to Hermes context.""" texts = [text_fn(item) for item in items] # Query against context results = self.collection.query( query_texts=texts, n_results=3, include=["distances"] ) # Calculate relevance scores (inverse distance, averaged) scored = [] for item, distances in zip(items, results["distances"]): avg_similarity = sum(1/(1+d) for d in distances) / len(distances) scored.append((item, avg_similarity)) # Sort by score descending scored.sort(key=lambda x: x[1], reverse=True) return scored[:top_k] def filter_by_keywords(self, items: List[Any], text_fn, keywords: List[str]) -> List[Any]: """Filter items that match at least one keyword.""" filtered = [] for item in items: text = text_fn(item).lower() if any(kw.lower() in text for kw in keywords): filtered.append(item) return filtered def rank_papers(papers: List[Any], top_k: int = 10) -> List[tuple]: """Convenience function for paper ranking.""" engine = RelevanceEngine() return engine.rank_items( papers, text_fn=lambda p: f"{p.title} {p.abstract}", top_k=top_k ) if __name__ == "__main__": # Test with sample data engine = RelevanceEngine() print("Relevance engine initialized") print(f"Collection count: {engine.collection.count()}")