[DEEP-DIVE] Scaffold component — #830
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
This commit is contained in:
98
scaffold/deep-dive/relevance/relevance_engine.py
Normal file
98
scaffold/deep-dive/relevance/relevance_engine.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Relevance Engine for Deep Dive
|
||||||
|
Filters and ranks content by Hermes/Timmy relevance
|
||||||
|
"""
|
||||||
|
|
||||||
|
import chromadb
|
||||||
|
from chromadb.utils import embedding_functions
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
import json
|
||||||
|
from dataclasses import asdict
|
||||||
|
|
||||||
|
# Hermes codebase snippets for similarity comparison
|
||||||
|
HERMES_CONTEXT = [
|
||||||
|
"Hermes agent system with tool calling and conversation loop",
|
||||||
|
"LLM inference with tool orchestration",
|
||||||
|
"Retrieval augmented generation RAG architecture",
|
||||||
|
"Multi-agent orchestration and delegation",
|
||||||
|
"Reinforcement learning RL for agent training",
|
||||||
|
"Model quantization and efficient inference",
|
||||||
|
"Vector database Chroma for embeddings",
|
||||||
|
"MCP Model Context Protocol integration",
|
||||||
|
"Gateway pattern for messaging platforms",
|
||||||
|
"Agent trajectory logging and replay",
|
||||||
|
]
|
||||||
|
|
||||||
|
class RelevanceEngine:
|
||||||
|
def __init__(self, collection_name: str = "deep_dive"):
|
||||||
|
self.client = chromadb.PersistentClient(path="./chroma_db")
|
||||||
|
self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
|
||||||
|
model_name="all-MiniLM-L6-v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get or create collection
|
||||||
|
try:
|
||||||
|
self.collection = self.client.get_collection(
|
||||||
|
name=collection_name,
|
||||||
|
embedding_function=self.embedding_fn
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
self.collection = self.client.create_collection(
|
||||||
|
name=collection_name,
|
||||||
|
embedding_function=self.embedding_fn
|
||||||
|
)
|
||||||
|
self._seed_context()
|
||||||
|
|
||||||
|
def _seed_context(self):
|
||||||
|
"""Seed the collection with Hermes context."""
|
||||||
|
self.collection.add(
|
||||||
|
documents=HERMES_CONTEXT,
|
||||||
|
ids=[f"ctx_{i}" for i in range(len(HERMES_CONTEXT))],
|
||||||
|
metadatas=[{"type": "context"} for _ in HERMES_CONTEXT]
|
||||||
|
)
|
||||||
|
|
||||||
|
def rank_items(self, items: List[Any], text_fn, top_k: int = 10) -> List[tuple]:
|
||||||
|
"""Rank items by similarity to Hermes context."""
|
||||||
|
texts = [text_fn(item) for item in items]
|
||||||
|
|
||||||
|
# Query against context
|
||||||
|
results = self.collection.query(
|
||||||
|
query_texts=texts,
|
||||||
|
n_results=3,
|
||||||
|
include=["distances"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate relevance scores (inverse distance, averaged)
|
||||||
|
scored = []
|
||||||
|
for item, distances in zip(items, results["distances"]):
|
||||||
|
avg_similarity = sum(1/(1+d) for d in distances) / len(distances)
|
||||||
|
scored.append((item, avg_similarity))
|
||||||
|
|
||||||
|
# Sort by score descending
|
||||||
|
scored.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
return scored[:top_k]
|
||||||
|
|
||||||
|
def filter_by_keywords(self, items: List[Any], text_fn, keywords: List[str]) -> List[Any]:
|
||||||
|
"""Filter items that match at least one keyword."""
|
||||||
|
filtered = []
|
||||||
|
for item in items:
|
||||||
|
text = text_fn(item).lower()
|
||||||
|
if any(kw.lower() in text for kw in keywords):
|
||||||
|
filtered.append(item)
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
def rank_papers(papers: List[Any], top_k: int = 10) -> List[tuple]:
|
||||||
|
"""Convenience function for paper ranking."""
|
||||||
|
engine = RelevanceEngine()
|
||||||
|
return engine.rank_items(
|
||||||
|
papers,
|
||||||
|
text_fn=lambda p: f"{p.title} {p.abstract}",
|
||||||
|
top_k=top_k
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test with sample data
|
||||||
|
engine = RelevanceEngine()
|
||||||
|
print("Relevance engine initialized")
|
||||||
|
print(f"Collection count: {engine.collection.count()}")
|
||||||
Reference in New Issue
Block a user