feat: Add hybrid query router (#663)
This commit is contained in:
209
tools/memory_query_router.py
Normal file
209
tools/memory_query_router.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
Hybrid Memory Query Router
|
||||
|
||||
Routes queries to the best search method:
|
||||
- HRR: Compositional/conceptual queries
|
||||
- Vector: Semantic similarity
|
||||
- FTS5: Exact keyword matching
|
||||
|
||||
Issue: #663
|
||||
"""
|
||||
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
class SearchMethod(Enum):
|
||||
"""Available search methods."""
|
||||
HRR = "hrr" # Holographic Reduced Representations
|
||||
VECTOR = "vector" # Semantic vector search
|
||||
FTS5 = "fts5" # Full-text search (SQLite)
|
||||
HYBRID = "hybrid" # Combine multiple methods
|
||||
|
||||
|
||||
@dataclass
|
||||
class QueryClassification:
|
||||
"""Result of query classification."""
|
||||
method: SearchMethod
|
||||
confidence: float
|
||||
reason: str
|
||||
sub_queries: Optional[List[str]] = None
|
||||
|
||||
|
||||
# Query patterns for routing
|
||||
COMPOSITIONAL_PATTERNS = [
|
||||
r"(?i)\brelated\s+to\b",
|
||||
r"(?i)\bcombined\s+with\b",
|
||||
r"(?i)\bbound\s+to\b",
|
||||
r"(?i)\bassociated\s+with\b",
|
||||
r"(?i)\bwhat\s+connects?\b",
|
||||
r"(?i)\bhow\s+.*\s+relate\b",
|
||||
r"(?i)\brelationship\s+between\b",
|
||||
]
|
||||
|
||||
CONTRADICTION_PATTERNS = [
|
||||
r"(?i)\bcontradicts?\b",
|
||||
r"(?i)\bconflicts?\s+with\b",
|
||||
r"(?i)\binconsistent\b",
|
||||
r"(?i)\bopposite\s+of\b",
|
||||
r"(?i)\bopposes?\b",
|
||||
r"(?i)\bdisagrees?\s+with\b",
|
||||
]
|
||||
|
||||
EXACT_KEYWORD_PATTERNS = [
|
||||
r'"[^"]+"', # Quoted phrases
|
||||
r"'[^']+'", # Single-quoted phrases
|
||||
r"(?i)\bexact\b",
|
||||
r"(?i)\bprecisely\b",
|
||||
r"(?i)\bspecifically\b",
|
||||
]
|
||||
|
||||
TEMPORAL_PATTERNS = [
|
||||
r"(?i)\brecent\b",
|
||||
r"(?i)\btoday\b",
|
||||
r"(?i)\byesterday\b",
|
||||
r"(?i)\blast\s+(week|month|hour)\b",
|
||||
r"(?i)\bsince\b",
|
||||
r"(?i)\bbefore\b",
|
||||
r"(?i)\bafter\b",
|
||||
]
|
||||
|
||||
|
||||
class QueryRouter:
|
||||
"""Route queries to the best search method."""
|
||||
|
||||
def classify(self, query: str) -> QueryClassification:
|
||||
"""Classify a query and route to best method."""
|
||||
|
||||
# Check for contradiction queries (HRR)
|
||||
for pattern in CONTRADICTION_PATTERNS:
|
||||
if re.search(pattern, query):
|
||||
return QueryClassification(
|
||||
method=SearchMethod.HRR,
|
||||
confidence=0.95,
|
||||
reason="Contradiction detection query"
|
||||
)
|
||||
|
||||
# Check for compositional queries (HRR)
|
||||
for pattern in COMPOSITIONAL_PATTERNS:
|
||||
if re.search(pattern, query):
|
||||
return QueryClassification(
|
||||
method=SearchMethod.HRR,
|
||||
confidence=0.90,
|
||||
reason="Compositional/conceptual query"
|
||||
)
|
||||
|
||||
# Check for exact keyword queries (FTS5)
|
||||
for pattern in EXACT_KEYWORD_PATTERNS:
|
||||
if re.search(pattern, query):
|
||||
return QueryClassification(
|
||||
method=SearchMethod.FTS5,
|
||||
confidence=0.85,
|
||||
reason="Exact keyword query"
|
||||
)
|
||||
|
||||
# Check for temporal queries (FTS5)
|
||||
for pattern in TEMPORAL_PATTERNS:
|
||||
if re.search(pattern, query):
|
||||
return QueryClassification(
|
||||
method=SearchMethod.FTS5,
|
||||
confidence=0.80,
|
||||
reason="Temporal query"
|
||||
)
|
||||
|
||||
# Short queries tend to be keyword searches
|
||||
if len(query.split()) <= 3:
|
||||
return QueryClassification(
|
||||
method=SearchMethod.FTS5,
|
||||
confidence=0.70,
|
||||
reason="Short query (likely keyword)"
|
||||
)
|
||||
|
||||
# Default: vector search for semantic queries
|
||||
return QueryClassification(
|
||||
method=SearchMethod.VECTOR,
|
||||
confidence=0.60,
|
||||
reason="Semantic similarity query"
|
||||
)
|
||||
|
||||
def should_use_hybrid(self, query: str) -> bool:
|
||||
"""Check if query should use hybrid search."""
|
||||
classification = self.classify(query)
|
||||
|
||||
# Low confidence -> use hybrid
|
||||
if classification.confidence < 0.70:
|
||||
return True
|
||||
|
||||
# Mixed signals -> use hybrid
|
||||
has_compositional = any(re.search(p, query) for p in COMPOSITIONAL_PATTERNS)
|
||||
has_keywords = any(re.search(p, query) for p in EXACT_KEYWORD_PATTERNS)
|
||||
|
||||
return has_compositional and has_keywords
|
||||
|
||||
|
||||
def reciprocal_rank_fusion(
|
||||
results: Dict[str, List[Tuple[str, float]]],
|
||||
k: int = 60
|
||||
) -> List[Tuple[str, float]]:
|
||||
"""
|
||||
Merge results using Reciprocal Rank Fusion.
|
||||
|
||||
Args:
|
||||
results: Dict of method -> [(item_id, score), ...]
|
||||
k: RRF constant (default 60)
|
||||
|
||||
Returns:
|
||||
Merged and re-ranked results
|
||||
"""
|
||||
scores = defaultdict(float)
|
||||
|
||||
for method, ranked_items in results.items():
|
||||
for rank, (item_id, _) in enumerate(ranked_items, 1):
|
||||
scores[item_id] += 1.0 / (k + rank)
|
||||
|
||||
return sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
|
||||
def merge_with_hrr_priority(
|
||||
hrr_results: List[Tuple[str, float]],
|
||||
vector_results: List[Tuple[str, float]],
|
||||
fts5_results: List[Tuple[str, float]],
|
||||
query_type: str = "default"
|
||||
) -> List[Tuple[str, float]]:
|
||||
"""
|
||||
Merge results with HRR priority for compositional queries.
|
||||
"""
|
||||
if query_type == "compositional":
|
||||
# HRR first, vector as supplement
|
||||
merged = hrr_results[:5]
|
||||
seen = {r[0] for r in merged}
|
||||
|
||||
for r in vector_results[:5]:
|
||||
if r[0] not in seen:
|
||||
merged.append(r)
|
||||
|
||||
return merged
|
||||
|
||||
# Default: RRF merge
|
||||
return reciprocal_rank_fusion({
|
||||
"hrr": hrr_results,
|
||||
"vector": vector_results,
|
||||
"fts5": fts5_results
|
||||
})
|
||||
|
||||
|
||||
# Module-level router
|
||||
_router = QueryRouter()
|
||||
|
||||
|
||||
def route_query(query: str) -> QueryClassification:
|
||||
"""Route a query to the best search method."""
|
||||
return _router.classify(query)
|
||||
|
||||
|
||||
def should_use_hybrid(query: str) -> bool:
|
||||
"""Check if query should use hybrid search."""
|
||||
return _router.should_use_hybrid(query)
|
||||
Reference in New Issue
Block a user