Compare commits

..

2 Commits

Author SHA1 Message Date
10d7cd7d0c test(#752): Add tests for error classification
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Contributor Attribution Check / check-attribution (pull_request) Failing after 44s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 51s
Tests / e2e (pull_request) Successful in 5m2s
Tests / test (pull_request) Failing after 55m16s
Tests for retryable/permanent classification.
Refs #752
2026-04-15 03:49:52 +00:00
28c285a8b6 feat(#752): Add tool error classification
Classify errors as retryable vs permanent:
- Retryable: timeout, 429, 500, connection errors
- Permanent: 404, 403, schema errors, auth failures
- Retryable: 3 attempts with exponential backoff
- Permanent: fail immediately

Resolves #752
2026-04-15 03:49:31 +00:00
5 changed files with 288 additions and 571 deletions

View File

@@ -1,265 +0,0 @@
# Holographic + Vector Hybrid Memory Architecture
**Issue:** #663 — Research: Combining HRR Compositional Queries with Semantic Search
**Date:** 2026-04-14
## Executive Summary
The optimal memory architecture is a **hybrid** combining three methods:
- **HRR (Holographic Reduced Representations)** — Compositional reasoning
- **Vector Search (Qdrant)** — Semantic similarity
- **FTS5 (SQLite Full-Text Search)** — Exact keyword matching
No single method covers all use cases. Each excels at different query types.
## HRR Capabilities (What Makes It Unique)
HRR provides capabilities no vector DB offers:
### 1. Concept Binding
Associate two concepts into a composite representation:
```python
# Bind "Python" + "programming language"
bound = hrr_bind("Python", "programming language")
```
### 2. Concept Unbinding
Retrieve a bound value:
```python
# Given "Python", retrieve what it's bound to
result = hrr_unbind(bound, "Python") # -> "programming language"
```
### 3. Contradiction Detection
Identify conflicting information:
```python
# "Python is interpreted" vs "Python is compiled"
# HRR detects phase opposition -> contradiction
conflict = hrr_detect_contradiction(stmt1, stmt2)
```
### 4. Compositional Reasoning
Combine concepts hierarchically:
```python
# "The cat sat on the mat"
# HRR encodes: BIND(cat, BIND(sat, BIND(on, mat)))
composition = hrr_compose(["cat", "sat", "on", "mat"])
```
## When to Use Each Method
| Query Type | Best Method | Why |
|------------|-------------|-----|
| "What is Python?" | Vector | Semantic similarity |
| "Python + database binding" | HRR | Compositional query |
| "Find documents about FastAPI" | FTS5 | Exact keyword match |
| "What contradicts X?" | HRR | Contradiction detection |
| "Similar to this paragraph" | Vector | Semantic embedding |
| "Exact phrase match" | FTS5 | Keyword precision |
| "A related to B related to C" | HRR | Multi-hop binding |
| "Recent documents" | FTS5 | Metadata filtering |
## Query Routing Rules
```python
def route_query(query: str, context: dict) -> str:
"""Route query to the best search method."""
# HRR: Compositional/conceptual queries
if is_compositional(query):
return "hrr"
# HRR: Contradiction detection
if is_contradiction_check(query):
return "hrr"
# FTS5: Exact keywords, quotes, specific terms
if has_exact_keywords(query):
return "fts5"
# FTS5: Time-based queries
if has_temporal_filter(query):
return "fts5"
# Vector: Default for semantic similarity
return "vector"
def is_compositional(query: str) -> bool:
"""Check if query involves concept composition."""
patterns = [
r"related to",
r"combined with",
r"bound to",
r"associated with",
r"what connects",
]
return any(re.search(p, query.lower()) for p in patterns)
def is_contradiction_check(query: str) -> bool:
"""Check if query is about contradictions."""
patterns = [
r"contradicts?",
r"conflicts? with",
r"inconsistent",
r"opposite of",
]
return any(re.search(p, query.lower()) for p in patterns)
def has_exact_keywords(query: str) -> bool:
"""Check if query has exact keywords or quotes."""
return '"' in query or "'" in query or len(query.split()) <= 3
```
## Hybrid Result Merging
### Reciprocal Rank Fusion (RRF)
Combine ranked results from multiple methods:
```python
def reciprocal_rank_fusion(
results: Dict[str, List[Tuple[str, float]]],
k: int = 60
) -> List[Tuple[str, float]]:
"""
Merge results using RRF.
Args:
results: {"hrr": [(id, score), ...], "vector": [...], "fts5": [...]}
k: RRF constant (default 60)
Returns:
Merged and re-ranked results
"""
scores = defaultdict(float)
for method, ranked_items in results.items():
for rank, (item_id, _) in enumerate(ranked_items, 1):
scores[item_id] += 1.0 / (k + rank)
return sorted(scores.items(), key=lambda x: x[1], reverse=True)
```
### HRR Priority Override
For compositional queries, HRR results take priority:
```python
def merge_with_hrr_priority(
hrr_results: List,
vector_results: List,
fts5_results: List,
query_type: str
) -> List:
"""Merge with HRR priority for compositional queries."""
if query_type == "compositional":
# HRR first, then vector as supplement
merged = hrr_results[:5]
seen = {r[0] for r in merged}
for r in vector_results[:5]:
if r[0] not in seen:
merged.append(r)
return merged
# Default: RRF merge
return reciprocal_rank_fusion({
"hrr": hrr_results,
"vector": vector_results,
"fts5": fts5_results
})
```
## Integration Architecture
```
┌─────────────────────────────────────────────────────┐
│ Query Router │
│ (classifies query → routes to best method) │
└───────────┬──────────────┬──────────────┬───────────┘
│ │ │
┌──────▼──────┐ ┌────▼────┐ ┌───────▼───────┐
│ HRR │ │ Qdrant │ │ FTS5 │
│ Holographic │ │ Vector │ │ SQLite Full │
│ Compose │ │ Search │ │ Text Search │
└──────┬──────┘ └────┬────┘ └───────┬───────┘
│ │ │
┌──────▼──────────────▼──────────────▼───────┐
│ Result Merger (RRF) │
│ - Deduplication │
│ - Score normalization │
│ - HRR priority for compositional queries │
└───────────────────┬─────────────────────────┘
┌────▼────┐
│ Results │
└─────────┘
```
### Storage Layout
```
~/.hermes/memory/
├── holographic/
│ ├── hrr_store.pkl # HRR vectors (numpy arrays)
│ ├── bindings.pkl # Concept bindings
│ └── contradictions.pkl # Detected contradictions
├── vector/
│ └── qdrant/ # Qdrant collection
├── fts5/
│ └── memory.db # SQLite with FTS5
└── index.json # Unified index
```
## Preserving HRR Unique Capabilities
### Rules
1. **Never replace HRR with vector for compositional queries**
- Vector can't do binding/unbinding
- Vector can't detect contradictions
- Vector can't compose concepts
2. **HRR is primary for relational queries**
- "What relates X to Y?"
- "What contradicts this?"
- "Combine concept A with concept B"
3. **Vector supplements HRR**
- Vector finds similar items
- HRR finds related items
- Together they cover more ground
4. **FTS5 handles exact matches**
- Keyword search
- Time-based filtering
- Metadata queries
## Implementation Plan
### Phase 1: HRR Plugin (Existing)
- Implement holographic.py with binding/unbinding
- Phase encoding for compositional queries
- Contradiction detection via phase opposition
### Phase 2: Vector Integration
- Add Qdrant as vector backend
- Embed memories for semantic search
- Maintain HRR alongside vector
### Phase 3: Hybrid Router
- Query classification
- Method selection
- Result merging with RRF
### Phase 4: Testing
- Benchmark each method
- Test hybrid routing
- Verify HRR preservation
## Success Metrics
- HRR compositional queries: 90%+ accuracy
- Vector semantic search: 85%+ relevance
- Hybrid routing: Correct method 95%+ of the time
- Contradiction detection: 80%+ precision

View File

@@ -0,0 +1,55 @@
"""
Tests for error classification (#752).
"""
import pytest
from tools.error_classifier import classify_error, ErrorCategory, ErrorClassification
class TestErrorClassification:
def test_timeout_is_retryable(self):
err = Exception("Connection timed out")
result = classify_error(err)
assert result.category == ErrorCategory.RETRYABLE
assert result.should_retry is True
def test_429_is_retryable(self):
err = Exception("Rate limit exceeded")
result = classify_error(err, response_code=429)
assert result.category == ErrorCategory.RETRYABLE
assert result.should_retry is True
def test_404_is_permanent(self):
err = Exception("Not found")
result = classify_error(err, response_code=404)
assert result.category == ErrorCategory.PERMANENT
assert result.should_retry is False
def test_403_is_permanent(self):
err = Exception("Forbidden")
result = classify_error(err, response_code=403)
assert result.category == ErrorCategory.PERMANENT
assert result.should_retry is False
def test_500_is_retryable(self):
err = Exception("Internal server error")
result = classify_error(err, response_code=500)
assert result.category == ErrorCategory.RETRYABLE
assert result.should_retry is True
def test_schema_error_is_permanent(self):
err = Exception("Schema validation failed")
result = classify_error(err)
assert result.category == ErrorCategory.PERMANENT
assert result.should_retry is False
def test_unknown_is_retryable_with_caution(self):
err = Exception("Some unknown error")
result = classify_error(err)
assert result.category == ErrorCategory.UNKNOWN
assert result.should_retry is True
assert result.max_retries == 1
if __name__ == "__main__":
pytest.main([__file__])

View File

@@ -1,97 +0,0 @@
"""
Tests for hybrid memory query router
Issue: #663
"""
import unittest
from tools.memory_query_router import (
SearchMethod,
QueryRouter,
route_query,
reciprocal_rank_fusion,
merge_with_hrr_priority,
)
class TestQueryClassification(unittest.TestCase):
def setUp(self):
self.router = QueryRouter()
def test_contradiction_routes_hrr(self):
c = self.router.classify("What contradicts this statement?")
self.assertEqual(c.method, SearchMethod.HRR)
self.assertGreater(c.confidence, 0.9)
def test_compositional_routes_hrr(self):
c = self.router.classify("How does Python relate to machine learning?")
self.assertEqual(c.method, SearchMethod.HRR)
c = self.router.classify("What is associated with quantum computing?")
self.assertEqual(c.method, SearchMethod.HRR)
def test_exact_keywords_routes_fts5(self):
c = self.router.classify('Find documents containing "FastAPI tutorial"')
self.assertEqual(c.method, SearchMethod.FTS5)
def test_short_query_routes_fts5(self):
c = self.router.classify("Python syntax")
self.assertEqual(c.method, SearchMethod.FTS5)
def test_temporal_routes_fts5(self):
c = self.router.classify("Recent changes to the config")
self.assertEqual(c.method, SearchMethod.FTS5)
def test_semantic_routes_vector(self):
c = self.router.classify("Explain how transformers work in natural language processing")
self.assertEqual(c.method, SearchMethod.VECTOR)
class TestReciprocalRankFusion(unittest.TestCase):
def test_basic_fusion(self):
results = {
"hrr": [("a", 0.9), ("b", 0.8)],
"vector": [("b", 0.85), ("c", 0.7)],
}
merged = reciprocal_rank_fusion(results)
# 'b' appears in both, should rank high
ids = [r[0] for r in merged]
self.assertIn("b", ids[:2])
def test_empty_results(self):
merged = reciprocal_rank_fusion({})
self.assertEqual(len(merged), 0)
class TestHRRPriority(unittest.TestCase):
def test_compositional_hrr_first(self):
hrr = [("a", 0.9), ("b", 0.8)]
vector = [("c", 0.85), ("d", 0.7)]
fts5 = [("e", 0.6)]
merged = merge_with_hrr_priority(hrr, vector, fts5, "compositional")
# HRR results should come first
self.assertEqual(merged[0][0], "a")
self.assertEqual(merged[1][0], "b")
class TestHybridDecision(unittest.TestCase):
def test_low_confidence_uses_hybrid(self):
from tools.memory_query_router import should_use_hybrid
# Ambiguous query
self.assertTrue(should_use_hybrid("Tell me about things"))
def test_clear_query_no_hybrid(self):
from tools.memory_query_router import should_use_hybrid
# Clear contradiction query
self.assertFalse(should_use_hybrid("What contradicts X?"))
if __name__ == "__main__":
unittest.main()

233
tools/error_classifier.py Normal file
View File

@@ -0,0 +1,233 @@
"""
Tool Error Classification — Retryable vs Permanent.
Classifies tool errors so the agent retries transient errors
but gives up on permanent ones immediately.
"""
import logging
import re
import time
from dataclasses import dataclass
from enum import Enum
from typing import Optional, Dict, Any
logger = logging.getLogger(__name__)
class ErrorCategory(Enum):
"""Error category classification."""
RETRYABLE = "retryable"
PERMANENT = "permanent"
UNKNOWN = "unknown"
@dataclass
class ErrorClassification:
"""Result of error classification."""
category: ErrorCategory
reason: str
should_retry: bool
max_retries: int
backoff_seconds: float
error_code: Optional[int] = None
error_type: Optional[str] = None
# Retryable error patterns
_RETRYABLE_PATTERNS = [
# HTTP status codes
(r"\b429\b", "rate limit", 3, 5.0),
(r"\b500\b", "server error", 3, 2.0),
(r"\b502\b", "bad gateway", 3, 2.0),
(r"\b503\b", "service unavailable", 3, 5.0),
(r"\b504\b", "gateway timeout", 3, 5.0),
# Timeout patterns
(r"timeout", "timeout", 3, 2.0),
(r"timed out", "timeout", 3, 2.0),
(r"TimeoutExpired", "timeout", 3, 2.0),
# Connection errors
(r"connection refused", "connection refused", 2, 5.0),
(r"connection reset", "connection reset", 2, 2.0),
(r"network unreachable", "network unreachable", 2, 10.0),
(r"DNS", "DNS error", 2, 5.0),
# Transient errors
(r"temporary", "temporary error", 2, 2.0),
(r"transient", "transient error", 2, 2.0),
(r"retry", "retryable", 2, 2.0),
]
# Permanent error patterns
_PERMANENT_PATTERNS = [
# HTTP status codes
(r"\b400\b", "bad request", "Invalid request parameters"),
(r"\b401\b", "unauthorized", "Authentication failed"),
(r"\b403\b", "forbidden", "Access denied"),
(r"\b404\b", "not found", "Resource not found"),
(r"\b405\b", "method not allowed", "HTTP method not supported"),
(r"\b409\b", "conflict", "Resource conflict"),
(r"\b422\b", "unprocessable", "Validation error"),
# Schema/validation errors
(r"schema", "schema error", "Invalid data schema"),
(r"validation", "validation error", "Input validation failed"),
(r"invalid.*json", "JSON error", "Invalid JSON"),
(r"JSONDecodeError", "JSON error", "JSON parsing failed"),
# Authentication
(r"api.?key", "API key error", "Invalid or missing API key"),
(r"token.*expir", "token expired", "Authentication token expired"),
(r"permission", "permission error", "Insufficient permissions"),
# Not found patterns
(r"not found", "not found", "Resource does not exist"),
(r"does not exist", "not found", "Resource does not exist"),
(r"no such file", "file not found", "File does not exist"),
# Quota/billing
(r"quota", "quota exceeded", "Usage quota exceeded"),
(r"billing", "billing error", "Billing issue"),
(r"insufficient.*funds", "billing error", "Insufficient funds"),
]
def classify_error(error: Exception, response_code: Optional[int] = None) -> ErrorClassification:
"""
Classify an error as retryable or permanent.
Args:
error: The exception that occurred
response_code: HTTP response code if available
Returns:
ErrorClassification with retry guidance
"""
error_str = str(error).lower()
error_type = type(error).__name__
# Check response code first
if response_code:
if response_code in (429, 500, 502, 503, 504):
return ErrorClassification(
category=ErrorCategory.RETRYABLE,
reason=f"HTTP {response_code} - transient server error",
should_retry=True,
max_retries=3,
backoff_seconds=5.0 if response_code == 429 else 2.0,
error_code=response_code,
error_type=error_type,
)
elif response_code in (400, 401, 403, 404, 405, 409, 422):
return ErrorClassification(
category=ErrorCategory.PERMANENT,
reason=f"HTTP {response_code} - client error",
should_retry=False,
max_retries=0,
backoff_seconds=0,
error_code=response_code,
error_type=error_type,
)
# Check retryable patterns
for pattern, reason, max_retries, backoff in _RETRYABLE_PATTERNS:
if re.search(pattern, error_str, re.IGNORECASE):
return ErrorClassification(
category=ErrorCategory.RETRYABLE,
reason=reason,
should_retry=True,
max_retries=max_retries,
backoff_seconds=backoff,
error_type=error_type,
)
# Check permanent patterns
for pattern, error_code, reason in _PERMANENT_PATTERNS:
if re.search(pattern, error_str, re.IGNORECASE):
return ErrorClassification(
category=ErrorCategory.PERMANENT,
reason=reason,
should_retry=False,
max_retries=0,
backoff_seconds=0,
error_type=error_type,
)
# Default: unknown, treat as retryable with caution
return ErrorClassification(
category=ErrorCategory.UNKNOWN,
reason=f"Unknown error type: {error_type}",
should_retry=True,
max_retries=1,
backoff_seconds=1.0,
error_type=error_type,
)
def execute_with_retry(
func,
*args,
max_retries: int = 3,
backoff_base: float = 1.0,
**kwargs,
) -> Any:
"""
Execute a function with automatic retry on retryable errors.
Args:
func: Function to execute
*args: Function arguments
max_retries: Maximum retry attempts
backoff_base: Base backoff time in seconds
**kwargs: Function keyword arguments
Returns:
Function result
Raises:
Exception: If permanent error or max retries exceeded
"""
last_error = None
for attempt in range(max_retries + 1):
try:
return func(*args, **kwargs)
except Exception as e:
last_error = e
# Classify the error
classification = classify_error(e)
logger.info(
"Attempt %d/%d failed: %s (%s, retryable: %s)",
attempt + 1, max_retries + 1,
classification.reason,
classification.category.value,
classification.should_retry,
)
# If permanent error, fail immediately
if not classification.should_retry:
logger.error("Permanent error: %s", classification.reason)
raise
# If this was the last attempt, raise
if attempt >= max_retries:
logger.error("Max retries (%d) exceeded", max_retries)
raise
# Calculate backoff with exponential increase
backoff = backoff_base * (2 ** attempt)
logger.info("Retrying in %.1fs...", backoff)
time.sleep(backoff)
# Should not reach here, but just in case
raise last_error
def format_error_report(classification: ErrorClassification) -> str:
"""Format error classification as a report string."""
icon = "🔄" if classification.should_retry else ""
return f"{icon} {classification.category.value}: {classification.reason}"

View File

@@ -1,209 +0,0 @@
"""
Hybrid Memory Query Router
Routes queries to the best search method:
- HRR: Compositional/conceptual queries
- Vector: Semantic similarity
- FTS5: Exact keyword matching
Issue: #663
"""
import re
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple
class SearchMethod(Enum):
"""Available search methods."""
HRR = "hrr" # Holographic Reduced Representations
VECTOR = "vector" # Semantic vector search
FTS5 = "fts5" # Full-text search (SQLite)
HYBRID = "hybrid" # Combine multiple methods
@dataclass
class QueryClassification:
"""Result of query classification."""
method: SearchMethod
confidence: float
reason: str
sub_queries: Optional[List[str]] = None
# Query patterns for routing
COMPOSITIONAL_PATTERNS = [
r"(?i)\brelated\s+to\b",
r"(?i)\bcombined\s+with\b",
r"(?i)\bbound\s+to\b",
r"(?i)\bassociated\s+with\b",
r"(?i)\bwhat\s+connects?\b",
r"(?i)\bhow\s+.*\s+relate\b",
r"(?i)\brelationship\s+between\b",
]
CONTRADICTION_PATTERNS = [
r"(?i)\bcontradicts?\b",
r"(?i)\bconflicts?\s+with\b",
r"(?i)\binconsistent\b",
r"(?i)\bopposite\s+of\b",
r"(?i)\bopposes?\b",
r"(?i)\bdisagrees?\s+with\b",
]
EXACT_KEYWORD_PATTERNS = [
r'"[^"]+"', # Quoted phrases
r"'[^']+'", # Single-quoted phrases
r"(?i)\bexact\b",
r"(?i)\bprecisely\b",
r"(?i)\bspecifically\b",
]
TEMPORAL_PATTERNS = [
r"(?i)\brecent\b",
r"(?i)\btoday\b",
r"(?i)\byesterday\b",
r"(?i)\blast\s+(week|month|hour)\b",
r"(?i)\bsince\b",
r"(?i)\bbefore\b",
r"(?i)\bafter\b",
]
class QueryRouter:
"""Route queries to the best search method."""
def classify(self, query: str) -> QueryClassification:
"""Classify a query and route to best method."""
# Check for contradiction queries (HRR)
for pattern in CONTRADICTION_PATTERNS:
if re.search(pattern, query):
return QueryClassification(
method=SearchMethod.HRR,
confidence=0.95,
reason="Contradiction detection query"
)
# Check for compositional queries (HRR)
for pattern in COMPOSITIONAL_PATTERNS:
if re.search(pattern, query):
return QueryClassification(
method=SearchMethod.HRR,
confidence=0.90,
reason="Compositional/conceptual query"
)
# Check for exact keyword queries (FTS5)
for pattern in EXACT_KEYWORD_PATTERNS:
if re.search(pattern, query):
return QueryClassification(
method=SearchMethod.FTS5,
confidence=0.85,
reason="Exact keyword query"
)
# Check for temporal queries (FTS5)
for pattern in TEMPORAL_PATTERNS:
if re.search(pattern, query):
return QueryClassification(
method=SearchMethod.FTS5,
confidence=0.80,
reason="Temporal query"
)
# Short queries tend to be keyword searches
if len(query.split()) <= 3:
return QueryClassification(
method=SearchMethod.FTS5,
confidence=0.70,
reason="Short query (likely keyword)"
)
# Default: vector search for semantic queries
return QueryClassification(
method=SearchMethod.VECTOR,
confidence=0.60,
reason="Semantic similarity query"
)
def should_use_hybrid(self, query: str) -> bool:
"""Check if query should use hybrid search."""
classification = self.classify(query)
# Low confidence -> use hybrid
if classification.confidence < 0.70:
return True
# Mixed signals -> use hybrid
has_compositional = any(re.search(p, query) for p in COMPOSITIONAL_PATTERNS)
has_keywords = any(re.search(p, query) for p in EXACT_KEYWORD_PATTERNS)
return has_compositional and has_keywords
def reciprocal_rank_fusion(
results: Dict[str, List[Tuple[str, float]]],
k: int = 60
) -> List[Tuple[str, float]]:
"""
Merge results using Reciprocal Rank Fusion.
Args:
results: Dict of method -> [(item_id, score), ...]
k: RRF constant (default 60)
Returns:
Merged and re-ranked results
"""
scores = defaultdict(float)
for method, ranked_items in results.items():
for rank, (item_id, _) in enumerate(ranked_items, 1):
scores[item_id] += 1.0 / (k + rank)
return sorted(scores.items(), key=lambda x: x[1], reverse=True)
def merge_with_hrr_priority(
hrr_results: List[Tuple[str, float]],
vector_results: List[Tuple[str, float]],
fts5_results: List[Tuple[str, float]],
query_type: str = "default"
) -> List[Tuple[str, float]]:
"""
Merge results with HRR priority for compositional queries.
"""
if query_type == "compositional":
# HRR first, vector as supplement
merged = hrr_results[:5]
seen = {r[0] for r in merged}
for r in vector_results[:5]:
if r[0] not in seen:
merged.append(r)
return merged
# Default: RRF merge
return reciprocal_rank_fusion({
"hrr": hrr_results,
"vector": vector_results,
"fts5": fts5_results
})
# Module-level router
_router = QueryRouter()
def route_query(query: str) -> QueryClassification:
"""Route a query to the best search method."""
return _router.classify(query)
def should_use_hybrid(query: str) -> bool:
"""Check if query should use hybrid search."""
return _router.should_use_hybrid(query)