diff --git a/tools/memory_query_router.py b/tools/memory_query_router.py new file mode 100644 index 000000000..48991da6b --- /dev/null +++ b/tools/memory_query_router.py @@ -0,0 +1,209 @@ +""" +Hybrid Memory Query Router + +Routes queries to the best search method: +- HRR: Compositional/conceptual queries +- Vector: Semantic similarity +- FTS5: Exact keyword matching + +Issue: #663 +""" + +import re +from collections import defaultdict +from dataclasses import dataclass +from enum import Enum +from typing import Any, Dict, List, Optional, Tuple + + +class SearchMethod(Enum): + """Available search methods.""" + HRR = "hrr" # Holographic Reduced Representations + VECTOR = "vector" # Semantic vector search + FTS5 = "fts5" # Full-text search (SQLite) + HYBRID = "hybrid" # Combine multiple methods + + +@dataclass +class QueryClassification: + """Result of query classification.""" + method: SearchMethod + confidence: float + reason: str + sub_queries: Optional[List[str]] = None + + +# Query patterns for routing +COMPOSITIONAL_PATTERNS = [ + r"(?i)\brelated\s+to\b", + r"(?i)\bcombined\s+with\b", + r"(?i)\bbound\s+to\b", + r"(?i)\bassociated\s+with\b", + r"(?i)\bwhat\s+connects?\b", + r"(?i)\bhow\s+.*\s+relate\b", + r"(?i)\brelationship\s+between\b", +] + +CONTRADICTION_PATTERNS = [ + r"(?i)\bcontradicts?\b", + r"(?i)\bconflicts?\s+with\b", + r"(?i)\binconsistent\b", + r"(?i)\bopposite\s+of\b", + r"(?i)\bopposes?\b", + r"(?i)\bdisagrees?\s+with\b", +] + +EXACT_KEYWORD_PATTERNS = [ + r'"[^"]+"', # Quoted phrases + r"'[^']+'", # Single-quoted phrases + r"(?i)\bexact\b", + r"(?i)\bprecisely\b", + r"(?i)\bspecifically\b", +] + +TEMPORAL_PATTERNS = [ + r"(?i)\brecent\b", + r"(?i)\btoday\b", + r"(?i)\byesterday\b", + r"(?i)\blast\s+(week|month|hour)\b", + r"(?i)\bsince\b", + r"(?i)\bbefore\b", + r"(?i)\bafter\b", +] + + +class QueryRouter: + """Route queries to the best search method.""" + + def classify(self, query: str) -> QueryClassification: + """Classify a query and route to best method.""" + + # Check for contradiction queries (HRR) + for pattern in CONTRADICTION_PATTERNS: + if re.search(pattern, query): + return QueryClassification( + method=SearchMethod.HRR, + confidence=0.95, + reason="Contradiction detection query" + ) + + # Check for compositional queries (HRR) + for pattern in COMPOSITIONAL_PATTERNS: + if re.search(pattern, query): + return QueryClassification( + method=SearchMethod.HRR, + confidence=0.90, + reason="Compositional/conceptual query" + ) + + # Check for exact keyword queries (FTS5) + for pattern in EXACT_KEYWORD_PATTERNS: + if re.search(pattern, query): + return QueryClassification( + method=SearchMethod.FTS5, + confidence=0.85, + reason="Exact keyword query" + ) + + # Check for temporal queries (FTS5) + for pattern in TEMPORAL_PATTERNS: + if re.search(pattern, query): + return QueryClassification( + method=SearchMethod.FTS5, + confidence=0.80, + reason="Temporal query" + ) + + # Short queries tend to be keyword searches + if len(query.split()) <= 3: + return QueryClassification( + method=SearchMethod.FTS5, + confidence=0.70, + reason="Short query (likely keyword)" + ) + + # Default: vector search for semantic queries + return QueryClassification( + method=SearchMethod.VECTOR, + confidence=0.60, + reason="Semantic similarity query" + ) + + def should_use_hybrid(self, query: str) -> bool: + """Check if query should use hybrid search.""" + classification = self.classify(query) + + # Low confidence -> use hybrid + if classification.confidence < 0.70: + return True + + # Mixed signals -> use hybrid + has_compositional = any(re.search(p, query) for p in COMPOSITIONAL_PATTERNS) + has_keywords = any(re.search(p, query) for p in EXACT_KEYWORD_PATTERNS) + + return has_compositional and has_keywords + + +def reciprocal_rank_fusion( + results: Dict[str, List[Tuple[str, float]]], + k: int = 60 +) -> List[Tuple[str, float]]: + """ + Merge results using Reciprocal Rank Fusion. + + Args: + results: Dict of method -> [(item_id, score), ...] + k: RRF constant (default 60) + + Returns: + Merged and re-ranked results + """ + scores = defaultdict(float) + + for method, ranked_items in results.items(): + for rank, (item_id, _) in enumerate(ranked_items, 1): + scores[item_id] += 1.0 / (k + rank) + + return sorted(scores.items(), key=lambda x: x[1], reverse=True) + + +def merge_with_hrr_priority( + hrr_results: List[Tuple[str, float]], + vector_results: List[Tuple[str, float]], + fts5_results: List[Tuple[str, float]], + query_type: str = "default" +) -> List[Tuple[str, float]]: + """ + Merge results with HRR priority for compositional queries. + """ + if query_type == "compositional": + # HRR first, vector as supplement + merged = hrr_results[:5] + seen = {r[0] for r in merged} + + for r in vector_results[:5]: + if r[0] not in seen: + merged.append(r) + + return merged + + # Default: RRF merge + return reciprocal_rank_fusion({ + "hrr": hrr_results, + "vector": vector_results, + "fts5": fts5_results + }) + + +# Module-level router +_router = QueryRouter() + + +def route_query(query: str) -> QueryClassification: + """Route a query to the best search method.""" + return _router.classify(query) + + +def should_use_hybrid(query: str) -> bool: + """Check if query should use hybrid search.""" + return _router.should_use_hybrid(query)