Files
the-nexus/scaffold/deep-dive/relevance/relevance_engine.py
Ezra cec82bf991
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
[DEEP-DIVE] Scaffold component — #830
2026-04-05 07:42:27 +00:00

99 lines
3.4 KiB
Python

#!/usr/bin/env python3
"""
Relevance Engine for Deep Dive
Filters and ranks content by Hermes/Timmy relevance
"""
import chromadb
from chromadb.utils import embedding_functions
from typing import List, Dict, Any
import json
from dataclasses import asdict
# Hermes codebase snippets for similarity comparison
HERMES_CONTEXT = [
"Hermes agent system with tool calling and conversation loop",
"LLM inference with tool orchestration",
"Retrieval augmented generation RAG architecture",
"Multi-agent orchestration and delegation",
"Reinforcement learning RL for agent training",
"Model quantization and efficient inference",
"Vector database Chroma for embeddings",
"MCP Model Context Protocol integration",
"Gateway pattern for messaging platforms",
"Agent trajectory logging and replay",
]
class RelevanceEngine:
def __init__(self, collection_name: str = "deep_dive"):
self.client = chromadb.PersistentClient(path="./chroma_db")
self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2"
)
# Get or create collection
try:
self.collection = self.client.get_collection(
name=collection_name,
embedding_function=self.embedding_fn
)
except:
self.collection = self.client.create_collection(
name=collection_name,
embedding_function=self.embedding_fn
)
self._seed_context()
def _seed_context(self):
"""Seed the collection with Hermes context."""
self.collection.add(
documents=HERMES_CONTEXT,
ids=[f"ctx_{i}" for i in range(len(HERMES_CONTEXT))],
metadatas=[{"type": "context"} for _ in HERMES_CONTEXT]
)
def rank_items(self, items: List[Any], text_fn, top_k: int = 10) -> List[tuple]:
"""Rank items by similarity to Hermes context."""
texts = [text_fn(item) for item in items]
# Query against context
results = self.collection.query(
query_texts=texts,
n_results=3,
include=["distances"]
)
# Calculate relevance scores (inverse distance, averaged)
scored = []
for item, distances in zip(items, results["distances"]):
avg_similarity = sum(1/(1+d) for d in distances) / len(distances)
scored.append((item, avg_similarity))
# Sort by score descending
scored.sort(key=lambda x: x[1], reverse=True)
return scored[:top_k]
def filter_by_keywords(self, items: List[Any], text_fn, keywords: List[str]) -> List[Any]:
"""Filter items that match at least one keyword."""
filtered = []
for item in items:
text = text_fn(item).lower()
if any(kw.lower() in text for kw in keywords):
filtered.append(item)
return filtered
def rank_papers(papers: List[Any], top_k: int = 10) -> List[tuple]:
"""Convenience function for paper ranking."""
engine = RelevanceEngine()
return engine.rank_items(
papers,
text_fn=lambda p: f"{p.title} {p.abstract}",
top_k=top_k
)
if __name__ == "__main__":
# Test with sample data
engine = RelevanceEngine()
print("Relevance engine initialized")
print(f"Collection count: {engine.collection.count()}")