#!/usr/bin/env python3 # grounding.py - Grounding before generation. # SOUL.md: "When I have verified sources, I must consult them # before I generate from pattern alone. Retrieval is not a feature. # It is the primary mechanism by which I avoid lying." # Part of #792 import json import os from pathlib import Path from typing import Any, Dict, List, Optional, Tuple from dataclasses import dataclass, field HERMES_HOME = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) MEMORY_DIR = HERMES_HOME / "memory" @dataclass class GroundingResult: query: str sources_found: List[Dict[str, Any]] = field(default_factory=list) grounded: bool = False confidence: float = 0.0 source_text: str = "" source_type: str = "" # memory, file, chain, tool_result @property def needs_hedging(self): return not self.grounded class GroundingLayer: def __init__(self, memory_dir=None): self.memory_dir = Path(memory_dir) if memory_dir else MEMORY_DIR def ground(self, query, context=None): """Query local sources before generation.""" sources = [] # 1. Search memory files memory_hits = self._search_memory(query) sources.extend(memory_hits) # 2. Search context files if provided if context: context_hits = self._search_context(query, context) sources.extend(context_hits) # 3. Build result grounded = len(sources) > 0 confidence = min(0.95, 0.3 + len(sources) * 0.2) if grounded else 0.0 source_text = "" source_type = "" if sources: best = max(sources, key=lambda s: s.get("score", 0)) source_text = best.get("text", "")[:200] source_type = best.get("type", "unknown") return GroundingResult( query=query, sources_found=sources, grounded=grounded, confidence=confidence, source_text=source_text, source_type=source_type, ) def _search_memory(self, query): """Search memory files for relevant content.""" results = [] if not self.memory_dir.exists(): return results query_lower = query.lower() query_words = set(query_lower.split()) for mem_file in self.memory_dir.rglob("*.md"): try: content = mem_file.read_text(encoding="utf-8", errors="replace") except Exception: continue content_lower = content.lower() # Simple relevance: count query word matches matches = sum(1 for w in query_words if w in content_lower) if matches > 0: score = matches / max(len(query_words), 1) # Extract relevant snippet lines = content.split("\n") snippet = "" for line in lines: if any(w in line.lower() for w in query_words): snippet = line.strip()[:200] break results.append({ "text": snippet or content[:200], "source": str(mem_file.relative_to(self.memory_dir)), "type": "memory", "score": round(score, 3), }) return sorted(results, key=lambda r: -r["score"])[:5] def _search_context(self, query, context): """Search provided context text for relevant content.""" results = [] if not context: return results query_lower = query.lower() query_words = set(query_lower.split()) for ctx in context: if isinstance(ctx, dict): text = ctx.get("content", "") or ctx.get("text", "") source = ctx.get("source", "context") else: text = str(ctx) source = "context" text_lower = text.lower() matches = sum(1 for w in query_words if w in text_lower) if matches > 0: score = matches / max(len(query_words), 1) results.append({ "text": text[:200], "source": source, "type": "context", "score": round(score, 3), }) return sorted(results, key=lambda r: -r["score"])[:5] def format_sources(self, result): """Format grounding result for display.""" if not result.grounded: return "No verified sources found. Proceeding from pattern matching." lines = ["Based on verified sources:"] for s in result.sources_found[:3]: ref = s.get("source", "unknown") text = s.get("text", "")[:100] lines.append(" - [" + ref + "] " + text) return "\n".join(lines) # Convenience _default_layer = None def get_grounding_layer(): global _default_layer if _default_layer is None: _default_layer = GroundingLayer() return _default_layer def ground(query, **kwargs): return get_grounding_layer().ground(query, **kwargs)