""" Hybrid Memory Query Router Routes queries to the best search method: - HRR: Compositional/conceptual queries - Vector: Semantic similarity - FTS5: Exact keyword matching Issue: #663 """ import re from collections import defaultdict from dataclasses import dataclass from enum import Enum from typing import Any, Dict, List, Optional, Tuple class SearchMethod(Enum): """Available search methods.""" HRR = "hrr" # Holographic Reduced Representations VECTOR = "vector" # Semantic vector search FTS5 = "fts5" # Full-text search (SQLite) HYBRID = "hybrid" # Combine multiple methods @dataclass class QueryClassification: """Result of query classification.""" method: SearchMethod confidence: float reason: str sub_queries: Optional[List[str]] = None # Query patterns for routing COMPOSITIONAL_PATTERNS = [ r"(?i)\brelated\s+to\b", r"(?i)\bcombined\s+with\b", r"(?i)\bbound\s+to\b", r"(?i)\bassociated\s+with\b", r"(?i)\bwhat\s+connects?\b", r"(?i)\bhow\s+.*\s+relate\b", r"(?i)\brelationship\s+between\b", ] CONTRADICTION_PATTERNS = [ r"(?i)\bcontradicts?\b", r"(?i)\bconflicts?\s+with\b", r"(?i)\binconsistent\b", r"(?i)\bopposite\s+of\b", r"(?i)\bopposes?\b", r"(?i)\bdisagrees?\s+with\b", ] EXACT_KEYWORD_PATTERNS = [ r'"[^"]+"', # Quoted phrases r"'[^']+'", # Single-quoted phrases r"(?i)\bexact\b", r"(?i)\bprecisely\b", r"(?i)\bspecifically\b", ] TEMPORAL_PATTERNS = [ r"(?i)\brecent\b", r"(?i)\btoday\b", r"(?i)\byesterday\b", r"(?i)\blast\s+(week|month|hour)\b", r"(?i)\bsince\b", r"(?i)\bbefore\b", r"(?i)\bafter\b", ] class QueryRouter: """Route queries to the best search method.""" def classify(self, query: str) -> QueryClassification: """Classify a query and route to best method.""" # Check for contradiction queries (HRR) for pattern in CONTRADICTION_PATTERNS: if re.search(pattern, query): return QueryClassification( method=SearchMethod.HRR, confidence=0.95, reason="Contradiction detection query" ) # Check for compositional queries (HRR) for pattern in COMPOSITIONAL_PATTERNS: if re.search(pattern, query): return QueryClassification( method=SearchMethod.HRR, confidence=0.90, reason="Compositional/conceptual query" ) # Check for exact keyword queries (FTS5) for pattern in EXACT_KEYWORD_PATTERNS: if re.search(pattern, query): return QueryClassification( method=SearchMethod.FTS5, confidence=0.85, reason="Exact keyword query" ) # Check for temporal queries (FTS5) for pattern in TEMPORAL_PATTERNS: if re.search(pattern, query): return QueryClassification( method=SearchMethod.FTS5, confidence=0.80, reason="Temporal query" ) # Short queries tend to be keyword searches if len(query.split()) <= 3: return QueryClassification( method=SearchMethod.FTS5, confidence=0.70, reason="Short query (likely keyword)" ) # Default: vector search for semantic queries return QueryClassification( method=SearchMethod.VECTOR, confidence=0.60, reason="Semantic similarity query" ) def should_use_hybrid(self, query: str) -> bool: """Check if query should use hybrid search.""" classification = self.classify(query) # Low confidence -> use hybrid if classification.confidence < 0.70: return True # Mixed signals -> use hybrid has_compositional = any(re.search(p, query) for p in COMPOSITIONAL_PATTERNS) has_keywords = any(re.search(p, query) for p in EXACT_KEYWORD_PATTERNS) return has_compositional and has_keywords def reciprocal_rank_fusion( results: Dict[str, List[Tuple[str, float]]], k: int = 60 ) -> List[Tuple[str, float]]: """ Merge results using Reciprocal Rank Fusion. Args: results: Dict of method -> [(item_id, score), ...] k: RRF constant (default 60) Returns: Merged and re-ranked results """ scores = defaultdict(float) for method, ranked_items in results.items(): for rank, (item_id, _) in enumerate(ranked_items, 1): scores[item_id] += 1.0 / (k + rank) return sorted(scores.items(), key=lambda x: x[1], reverse=True) def merge_with_hrr_priority( hrr_results: List[Tuple[str, float]], vector_results: List[Tuple[str, float]], fts5_results: List[Tuple[str, float]], query_type: str = "default" ) -> List[Tuple[str, float]]: """ Merge results with HRR priority for compositional queries. """ if query_type == "compositional": # HRR first, vector as supplement merged = hrr_results[:5] seen = {r[0] for r in merged} for r in vector_results[:5]: if r[0] not in seen: merged.append(r) return merged # Default: RRF merge return reciprocal_rank_fusion({ "hrr": hrr_results, "vector": vector_results, "fts5": fts5_results }) # Module-level router _router = QueryRouter() def route_query(query: str) -> QueryClassification: """Route a query to the best search method.""" return _router.classify(query) def should_use_hybrid(query: str) -> bool: """Check if query should use hybrid search.""" return _router.should_use_hybrid(query)