[DEEP-DIVE] Scaffold component — #830
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
This commit is contained in:
98
scaffold/deep-dive/relevance/relevance_engine.py
Normal file
98
scaffold/deep-dive/relevance/relevance_engine.py
Normal file
@@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Relevance Engine for Deep Dive
|
||||
Filters and ranks content by Hermes/Timmy relevance
|
||||
"""
|
||||
|
||||
import chromadb
|
||||
from chromadb.utils import embedding_functions
|
||||
from typing import List, Dict, Any
|
||||
import json
|
||||
from dataclasses import asdict
|
||||
|
||||
# Hermes codebase snippets for similarity comparison
|
||||
HERMES_CONTEXT = [
|
||||
"Hermes agent system with tool calling and conversation loop",
|
||||
"LLM inference with tool orchestration",
|
||||
"Retrieval augmented generation RAG architecture",
|
||||
"Multi-agent orchestration and delegation",
|
||||
"Reinforcement learning RL for agent training",
|
||||
"Model quantization and efficient inference",
|
||||
"Vector database Chroma for embeddings",
|
||||
"MCP Model Context Protocol integration",
|
||||
"Gateway pattern for messaging platforms",
|
||||
"Agent trajectory logging and replay",
|
||||
]
|
||||
|
||||
class RelevanceEngine:
|
||||
def __init__(self, collection_name: str = "deep_dive"):
|
||||
self.client = chromadb.PersistentClient(path="./chroma_db")
|
||||
self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
|
||||
model_name="all-MiniLM-L6-v2"
|
||||
)
|
||||
|
||||
# Get or create collection
|
||||
try:
|
||||
self.collection = self.client.get_collection(
|
||||
name=collection_name,
|
||||
embedding_function=self.embedding_fn
|
||||
)
|
||||
except:
|
||||
self.collection = self.client.create_collection(
|
||||
name=collection_name,
|
||||
embedding_function=self.embedding_fn
|
||||
)
|
||||
self._seed_context()
|
||||
|
||||
def _seed_context(self):
|
||||
"""Seed the collection with Hermes context."""
|
||||
self.collection.add(
|
||||
documents=HERMES_CONTEXT,
|
||||
ids=[f"ctx_{i}" for i in range(len(HERMES_CONTEXT))],
|
||||
metadatas=[{"type": "context"} for _ in HERMES_CONTEXT]
|
||||
)
|
||||
|
||||
def rank_items(self, items: List[Any], text_fn, top_k: int = 10) -> List[tuple]:
|
||||
"""Rank items by similarity to Hermes context."""
|
||||
texts = [text_fn(item) for item in items]
|
||||
|
||||
# Query against context
|
||||
results = self.collection.query(
|
||||
query_texts=texts,
|
||||
n_results=3,
|
||||
include=["distances"]
|
||||
)
|
||||
|
||||
# Calculate relevance scores (inverse distance, averaged)
|
||||
scored = []
|
||||
for item, distances in zip(items, results["distances"]):
|
||||
avg_similarity = sum(1/(1+d) for d in distances) / len(distances)
|
||||
scored.append((item, avg_similarity))
|
||||
|
||||
# Sort by score descending
|
||||
scored.sort(key=lambda x: x[1], reverse=True)
|
||||
return scored[:top_k]
|
||||
|
||||
def filter_by_keywords(self, items: List[Any], text_fn, keywords: List[str]) -> List[Any]:
|
||||
"""Filter items that match at least one keyword."""
|
||||
filtered = []
|
||||
for item in items:
|
||||
text = text_fn(item).lower()
|
||||
if any(kw.lower() in text for kw in keywords):
|
||||
filtered.append(item)
|
||||
return filtered
|
||||
|
||||
def rank_papers(papers: List[Any], top_k: int = 10) -> List[tuple]:
|
||||
"""Convenience function for paper ranking."""
|
||||
engine = RelevanceEngine()
|
||||
return engine.rank_items(
|
||||
papers,
|
||||
text_fn=lambda p: f"{p.title} {p.abstract}",
|
||||
top_k=top_k
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test with sample data
|
||||
engine = RelevanceEngine()
|
||||
print("Relevance engine initialized")
|
||||
print(f"Collection count: {engine.collection.count()}")
|
||||
Reference in New Issue
Block a user