test(#922 ): Add tests for tool hallucination detection

Tests for validation firewall: - Unknown tool detection - Missing required params - Wrong type detection - Hallucination patterns - Rejection stats Refs #922
fix(#922 ): Poka-yoke — detect and block tool hallucination
2026-04-21 05:38:54 +00:00 · 2026-04-21 05:38:22 +00:00
5 changed files with 379 additions and 198 deletions
--- a/.githooks/pre-commit-hardcoded-path.py
+++ b/.githooks/pre-commit-hardcoded-path.py
@@ -1,78 +0,0 @@
-#!/usr/bin/env python3
-"""
-Pre-commit hook: Reject hardcoded home-directory paths.
-
-Install:
-    cp pre-commit-hardcoded-path.py .git/hooks/pre-commit-hardcoded-path
-    chmod +x .git/hooks/pre-commit-hardcoded-path
-    
-    Or add to .pre-commit-config.yaml
-"""
-
-import sys
-import subprocess
-import re
-
-PATTERNS = [
-    (r"/Users/[\w.\-]+/", "macOS home directory"),
-    (r"/home/[\w.\-]+/", "Linux home directory"),
-    (r"(?<![\w/])~/", "unexpanded tilde"),
-]
-
-NOQA = re.compile(r"#\s*noqa:?\s*hardcoded-path-ok")
-
-def get_staged_files():
-    result = subprocess.run(
-        ["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"],
-        capture_output=True, text=True
-    )
-    return [f for f in result.stdout.strip().split("\n") if f.endswith(".py")]
-
-def check_file(filepath):
-    try:
-        result = subprocess.run(
-            ["git", "show", f":{filepath}"],
-            capture_output=True, text=True
-        )
-        content = result.stdout
-    except Exception:
-        return []
-    
-    violations = []
-    for i, line in enumerate(content.split("\n"), 1):
-        if line.strip().startswith("#"):
-            continue
-        if line.strip().startswith(("import ", "from ")):
-            continue
-        if NOQA.search(line):
-            continue
-        for pattern, desc in PATTERNS:
-            if re.search(pattern, line):
-                violations.append((filepath, i, line.strip(), desc))
-                break
-    return violations
-
-def main():
-    files = get_staged_files()
-    if not files:
-        sys.exit(0)
-    
-    all_violations = []
-    for f in files:
-        all_violations.extend(check_file(f))
-    
-    if all_violations:
-        print("ERROR: Hardcoded home directory paths detected:")
-        print()
-        for filepath, line_no, line, desc in all_violations:
-            print(f"  {filepath}:{line_no}: {desc}")
-            print(f"    {line[:100]}")
-            print()
-        print("Fix: Use $HOME, relative paths, or get_hermes_home().")
-        print("Override: Add '# noqa: hardcoded-path-ok' to the line.")
-        sys.exit(1)
-    
-    sys.exit(0)
-
-if __name__ == "__main__":
-    main()
--- a/model_tools.py
+++ b/model_tools.py
@@ -28,7 +28,6 @@ from typing import Dict, Any, List, Optional, Tuple

 from tools.registry import discover_builtin_tools, registry
 from tools.tool_pokayoke import validate_tool_call, reset_circuit_breaker, get_hallucination_stats
-from tools.hardcoded_path_guard import guard_tool_dispatch as _guard_hardcoded_paths
 from toolsets import resolve_toolset, validate_toolset
 from agent.tool_orchestrator import orchestrator

@@ -502,12 +501,6 @@ def handle_function_call(
            # Prefer the caller-provided list so subagents can't overwrite
            # the parent's tool set via the process-global.
            sandbox_enabled = enabled_tools if enabled_tools is not None else _last_resolved_tool_names
-            # Poka-yoke #921: guard against hardcoded home-directory paths
-            _hardcoded_err = _guard_hardcoded_paths(function_name, function_args)
-            if _hardcoded_err:
-                logger.warning(f"Hardcoded path blocked: {function_name}")
-                return _hardcoded_err
-
            # Poka-yoke: validate tool call before dispatch
            is_valid, corrected_name, corrected_params, pokayoke_messages = validate_tool_call(function_name, function_args)
            if not is_valid:
--- a/tests/test_tool_validator.py
+++ b/tests/test_tool_validator.py
@@ -0,0 +1,67 @@
+"""
+Tests for tool hallucination detection (#922).
+"""
+
+import pytest
+from tools.tool_validator import ToolHallucinationDetector, ValidationSeverity
+
+
+class TestToolHallucinationDetector:
+    def setup_method(self):
+        self.detector = ToolHallucinationDetector()
+        self.detector.register_tool("read_file", {
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {"type": "string"},
+                    "encoding": {"type": "string"},
+                },
+                "required": ["path"]
+            }
+        })
+    
+    def test_valid_tool_call(self):
+        result = self.detector.validate_tool_call("read_file", {"path": "/tmp/file.txt"})
+        assert result.valid is True
+        assert len(result.blocking_issues) == 0
+    
+    def test_unknown_tool(self):
+        result = self.detector.validate_tool_call("hallucinated_tool", {})
+        assert result.valid is False
+        assert any(i.code == "UNKNOWN_TOOL" for i in result.issues)
+    
+    def test_missing_required_param(self):
+        result = self.detector.validate_tool_call("read_file", {})
+        assert result.valid is False
+        assert any(i.code == "MISSING_REQUIRED" for i in result.issues)
+    
+    def test_wrong_type(self):
+        result = self.detector.validate_tool_call("read_file", {"path": 123})
+        assert result.valid is False
+        assert any(i.code == "WRONG_TYPE" for i in result.issues)
+    
+    def test_unknown_param_warning(self):
+        result = self.detector.validate_tool_call("read_file", {"path": "/tmp/file.txt", "unknown": "value"})
+        assert result.valid is True  # Warning, not blocking
+        assert any(i.code == "UNKNOWN_PARAM" for i in result.issues)
+    
+    def test_placeholder_detection(self):
+        result = self.detector.validate_tool_call("read_file", {"path": "<placeholder>"})
+        assert any(i.code == "PLACEHOLDER_VALUE" for i in result.issues)
+    
+    def test_rejection_stats(self):
+        self.detector.validate_tool_call("unknown_tool", {})
+        self.detector.validate_tool_call("read_file", {})
+        stats = self.detector.get_rejection_stats()
+        assert stats["total"] >= 2
+    
+    def test_rejection_response(self):
+        from tools.tool_validator import create_rejection_response
+        result = self.detector.validate_tool_call("unknown_tool", {})
+        response = create_rejection_response(result)
+        assert response["role"] == "tool"
+        assert "rejected" in response["content"].lower()
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])
--- a/tools/hardcoded_path_guard.py
+++ b/tools/hardcoded_path_guard.py
@@ -1,113 +0,0 @@
-#!/usr/bin/env python3
-"""
-Hardcoded Path Guard — Poka-Yoke #921
-
-Detects and blocks hardcoded home-directory paths in tool arguments.
-These paths work on one machine but break on others, VPS deployments,
-or when HOME changes.
-
-Usage:
-    from tools.hardcoded_path_guard import check_path, validate_tool_args
-    
-    # Check a single path
-    err = check_path("/Users/apayne/.hermes/config.yaml")
-    
-    # Validate all path-like args in a tool call
-    clean_args, warnings = validate_tool_args("read_file", {"path": "/home/user/file.txt"})
-"""
-
-import os
-import re
-import json as _json
-from typing import Dict, List, Optional, Tuple, Any
-
-# Patterns that indicate hardcoded home directories
-HARDCODED_PATTERNS = [
-    (r"/Users/[\w.\-]+/", "macOS home directory (/Users/...)"),
-    (r"/home/[\w.\-]+/", "Linux home directory (/home/...)"),
-    (r"(?<![\w/])~/", "unexpanded tilde (~/)"),
-    (r"/root/", "root home directory (/root/)"),
-]
-
-_COMPILED_PATTERNS = [(re.compile(p), desc) for p, desc in HARDCODED_PATTERNS]
-_NOQA_PATTERN = re.compile(r"#\s*noqa:?\s*hardcoded-path-ok")
-
-_PATH_ARG_NAMES = frozenset({
-    "path", "file_path", "filepath", "dir", "directory", "dest", "source",
-    "input", "output", "src", "dst", "target", "location", "file",
-    "image_path", "script", "config", "log_file",
-})
-
-
-def has_hardcoded_path(text: str) -> Optional[str]:
-    if _NOQA_PATTERN.search(text):
-        return None
-    for pattern, desc in _COMPILED_PATTERNS:
-        if pattern.search(text):
-            return desc
-    return None
-
-
-def check_path(path_value: str) -> Optional[str]:
-    if not isinstance(path_value, str):
-        return None
-    match_desc = has_hardcoded_path(path_value)
-    if match_desc:
-        return (
-            f"Path contains hardcoded home directory ({match_desc}): '{path_value}'. "
-            f"Use $HOME, relative paths, or get_hermes_home(). "
-            f"Add '# noqa: hardcoded-path-ok' if intentional."
-        )
-    return None
-
-
-def validate_tool_args(tool_name: str, args: Dict[str, Any]) -> Tuple[Dict[str, Any], List[str]]:
-    warnings = []
-    for key, value in args.items():
-        if key.lower() not in _PATH_ARG_NAMES:
-            continue
-        if isinstance(value, str):
-            err = check_path(value)
-            if err:
-                warnings.append(err)
-        elif isinstance(value, list):
-            for item in value:
-                if isinstance(item, str):
-                    err = check_path(item)
-                    if err:
-                        warnings.append(err)
-    return args, warnings
-
-
-def scan_source_for_violations(source_code: str, filename: str = "") -> List[Tuple[int, str, str]]:
-    violations = []
-    lines = source_code.split("\n")
-    for i, line in enumerate(lines, 1):
-        stripped = line.strip()
-        if stripped.startswith("#"):
-            if _NOQA_PATTERN.search(line):
-                continue
-            continue
-        if stripped.startswith("import ") or stripped.startswith("from "):
-            continue
-        for pattern, desc in _COMPILED_PATTERNS:
-            match = pattern.search(line)
-            if match:
-                if _NOQA_PATTERN.search(line):
-                    continue
-                violations.append((i, line.strip(), desc))
-                break
-    return violations
-
-
-def guard_tool_dispatch(tool_name: str, args: Dict[str, Any]) -> Optional[str]:
-    _, warnings = validate_tool_args(tool_name, args)
-    if warnings:
-        return _json.dumps({
-            "error": "Hardcoded home directory path detected",
-            "details": warnings,
-            "suggestion": "Use $HOME, relative paths, or get_hermes_home() instead of hardcoded paths.",
-            "pokayoke": True,
-            "rule": "hardcoded-path-guard"
-        })
-    return None
--- a/tools/tool_validator.py
+++ b/tools/tool_validator.py
@@ -0,0 +1,312 @@
+"""
+Poka-Yoke: Tool Hallucination Detection — #922.
+
+Validation firewall between LLM tool-call output and actual execution.
+
+Detects and blocks:
+1. Unknown tool names (hallucinated tools)
+2. Malformed parameters (wrong types)
+3. Missing required arguments
+4. Extra unknown parameters
+
+Poka-Yoke Type: Detection (catches errors at the boundary before harm)
+"""
+
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional, Set
+
+logger = logging.getLogger(__name__)
+
+
+class ValidationSeverity(Enum):
+    """Severity of validation failure."""
+    BLOCK = "block"      # Must block execution
+    WARN = "warn"        # Warning, may proceed
+    INFO = "info"        # Informational
+
+
+@dataclass
+class ValidationIssue:
+    """A validation issue found."""
+    severity: ValidationSeverity
+    code: str
+    message: str
+    tool_name: str
+    parameter: Optional[str] = None
+    expected: Optional[str] = None
+    actual: Optional[Any] = None
+
+
+@dataclass
+class ValidationResult:
+    """Result of tool call validation."""
+    valid: bool
+    tool_name: str
+    issues: List[ValidationIssue] = field(default_factory=list)
+    corrected_args: Optional[Dict[str, Any]] = None
+    
+    @property
+    def blocking_issues(self) -> List[ValidationIssue]:
+        return [i for i in self.issues if i.severity == ValidationSeverity.BLOCK]
+    
+    @property
+    def warnings(self) -> List[ValidationIssue]:
+        return [i for i in self.issues if i.severity == ValidationSeverity.WARN]
+
+
+class ToolHallucinationDetector:
+    """
+    Poka-yoke detector for tool hallucinations.
+    
+    Validates tool calls against registered schemas before execution.
+    """
+    
+    def __init__(self, tool_registry: Optional[Dict] = None):
+        """
+        Initialize detector.
+        
+        Args:
+            tool_registry: Dict of tool_name -> tool_schema
+        """
+        self.registry = tool_registry or {}
+        self._rejection_log: List[Dict] = []
+    
+    def register_tool(self, name: str, schema: Dict):
+        """Register a tool with its JSON Schema."""
+        self.registry[name] = schema
+    
+    def register_tools(self, tools: Dict[str, Dict]):
+        """Register multiple tools."""
+        self.registry.update(tools)
+    
+    def validate_tool_call(
+        self,
+        tool_name: str,
+        arguments: Dict[str, Any],
+        model: str = "unknown",
+    ) -> ValidationResult:
+        """
+        Validate a tool call against the registry.
+        
+        Args:
+            tool_name: Name of the tool being called
+            arguments: Arguments passed to the tool
+            model: Model that generated the call (for logging)
+            
+        Returns:
+            ValidationResult with validation status
+        """
+        issues = []
+        
+        # 1. Check if tool exists
+        if tool_name not in self.registry:
+            issue = ValidationIssue(
+                severity=ValidationSeverity.BLOCK,
+                code="UNKNOWN_TOOL",
+                message=f"Tool '{tool_name}' does not exist. Available: {', '.join(sorted(self.registry.keys())[:10])}...",
+                tool_name=tool_name,
+            )
+            issues.append(issue)
+            self._log_rejection(tool_name, arguments, model, "UNKNOWN_TOOL")
+            return ValidationResult(valid=False, tool_name=tool_name, issues=issues)
+        
+        schema = self.registry[tool_name]
+        params_schema = schema.get("parameters", {}).get("properties", {})
+        required = set(schema.get("parameters", {}).get("required", []))
+        
+        # 2. Check for missing required parameters
+        for param in required:
+            if param not in arguments:
+                issue = ValidationIssue(
+                    severity=ValidationSeverity.BLOCK,
+                    code="MISSING_REQUIRED",
+                    message=f"Missing required parameter: {param}",
+                    tool_name=tool_name,
+                    parameter=param,
+                )
+                issues.append(issue)
+        
+        # 3. Check parameter types
+        for param_name, param_value in arguments.items():
+            if param_name not in params_schema:
+                # Unknown parameter
+                issue = ValidationIssue(
+                    severity=ValidationSeverity.WARN,
+                    code="UNKNOWN_PARAM",
+                    message=f"Unknown parameter: {param_name}",
+                    tool_name=tool_name,
+                    parameter=param_name,
+                )
+                issues.append(issue)
+                continue
+            
+            param_schema = params_schema[param_name]
+            expected_type = param_schema.get("type")
+            
+            if expected_type and not self._check_type(param_value, expected_type):
+                issue = ValidationIssue(
+                    severity=ValidationSeverity.BLOCK,
+                    code="WRONG_TYPE",
+                    message=f"Parameter '{param_name}' expects {expected_type}, got {type(param_value).__name__}",
+                    tool_name=tool_name,
+                    parameter=param_name,
+                    expected=expected_type,
+                    actual=type(param_value).__name__,
+                )
+                issues.append(issue)
+        
+        # 4. Check for common hallucination patterns
+        hallucination_issues = self._detect_hallucination_patterns(tool_name, arguments)
+        issues.extend(hallucination_issues)
+        
+        # Determine validity
+        has_blocking = any(i.severity == ValidationSeverity.BLOCK for i in issues)
+        
+        if has_blocking:
+            self._log_rejection(tool_name, arguments, model, 
+                              "; ".join(i.code for i in issues if i.severity == ValidationSeverity.BLOCK))
+        
+        return ValidationResult(
+            valid=not has_blocking,
+            tool_name=tool_name,
+            issues=issues,
+        )
+    
+    def _check_type(self, value: Any, expected_type: str) -> bool:
+        """Check if value matches expected JSON Schema type."""
+        type_map = {
+            "string": str,
+            "number": (int, float),
+            "integer": int,
+            "boolean": bool,
+            "array": list,
+            "object": dict,
+        }
+        
+        expected = type_map.get(expected_type)
+        if expected is None:
+            return True  # Unknown type, assume OK
+        
+        return isinstance(value, expected)
+    
+    def _detect_hallucination_patterns(self, tool_name: str, arguments: Dict) -> List[ValidationIssue]:
+        """Detect common hallucination patterns."""
+        issues = []
+        
+        # Pattern 1: Placeholder values
+        placeholder_patterns = [
+            r"^<.*>$",           # <placeholder>
+            r"^\[.*\]$",         # [placeholder]
+            r"^TODO$|^FIXME$",   # TODO/FIXME
+            r"^example\.com$",   # example.com
+            r"^127\.0\.0\.1$",   # localhost
+        ]
+        
+        for param_name, param_value in arguments.items():
+            if isinstance(param_value, str):
+                for pattern in placeholder_patterns:
+                    if re.match(pattern, param_value, re.IGNORECASE):
+                        issues.append(ValidationIssue(
+                            severity=ValidationSeverity.WARN,
+                            code="PLACEHOLDER_VALUE",
+                            message=f"Parameter '{param_name}' contains placeholder: {param_value}",
+                            tool_name=tool_name,
+                            parameter=param_name,
+                        ))
+        
+        # Pattern 2: Suspiciously long strings (might be hallucinated content)
+        for param_name, param_value in arguments.items():
+            if isinstance(param_value, str) and len(param_value) > 10000:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.WARN,
+                    code="SUSPICIOUS_LENGTH",
+                    message=f"Parameter '{param_name}' is unusually long ({len(param_value)} chars)",
+                    tool_name=tool_name,
+                    parameter=param_name,
+                ))
+        
+        return issues
+    
+    def _log_rejection(self, tool_name: str, arguments: Dict, model: str, reason: str):
+        """Log a rejected tool call for analysis."""
+        import time
+        
+        entry = {
+            "timestamp": time.time(),
+            "tool_name": tool_name,
+            "arguments": {k: str(v)[:100] for k, v in arguments.items()},
+            "model": model,
+            "reason": reason,
+        }
+        
+        self._rejection_log.append(entry)
+        
+        # Keep log bounded
+        if len(self._rejection_log) > 1000:
+            self._rejection_log = self._rejection_log[-500:]
+        
+        logger.warning(
+            "Tool hallucination blocked: tool=%s, model=%s, reason=%s",
+            tool_name, model, reason
+        )
+    
+    def get_rejection_stats(self) -> Dict:
+        """Get statistics on rejected tool calls."""
+        if not self._rejection_log:
+            return {"total": 0, "by_reason": {}, "by_tool": {}}
+        
+        by_reason = {}
+        by_tool = {}
+        
+        for entry in self._rejection_log:
+            reason = entry["reason"]
+            tool = entry["tool_name"]
+            
+            by_reason[reason] = by_reason.get(reason, 0) + 1
+            by_tool[tool] = by_tool.get(tool, 0) + 1
+        
+        return {
+            "total": len(self._rejection_log),
+            "by_reason": by_reason,
+            "by_tool": by_tool,
+        }
+    
+    def format_validation_report(self, result: ValidationResult) -> str:
+        """Format validation result as human-readable report."""
+        if result.valid:
+            return f"✅ {result.tool_name}: valid"
+        
+        lines = [f"❌ {result.tool_name}: BLOCKED"]
+        for issue in result.blocking_issues:
+            lines.append(f"   [{issue.code}] {issue.message}")
+        
+        for issue in result.warnings:
+            lines.append(f"   ⚠️ [{issue.code}] {issue.message}")
+        
+        return "\n".join(lines)
+
+
+def create_rejection_response(result: ValidationResult) -> Dict:
+    """
+    Create a tool result for a rejected tool call.
+    
+    This allows the agent to see the rejection and self-correct.
+    """
+    issues_text = "\n".join(
+        f"- [{i.code}] {i.message}" 
+        for i in result.blocking_issues
+    )
+    
+    return {
+        "role": "tool",
+        "content": f"""Tool call rejected: {result.tool_name}
+
+Issues found:
+{issues_text}
+
+Please check the tool name and parameters, then try again with valid arguments.""",
+    }
Author	SHA1	Message	Date
Alexander Whitestone	6eeee39c10	test(#922 ): Add tests for tool hallucination detection Some checks failed Contributor Attribution Check / check-attribution (pull_request) Failing after 1m15s Details Docker Build and Publish / build-and-push (pull_request) Has been skipped Details Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 1m8s Details Tests / e2e (pull_request) Successful in 3m44s Details Tests / test (pull_request) Failing after 1h9m15s Details Tests for validation firewall: - Unknown tool detection - Missing required params - Wrong type detection - Hallucination patterns - Rejection stats Refs #922	2026-04-21 05:38:54 +00:00
Alexander Whitestone	b2d2d2c650	fix(#922 ): Poka-yoke — detect and block tool hallucination Validation firewall between LLM tool-call output and execution: 1. Unknown tool names rejected 2. Malformed parameters caught 3. Missing required arguments detected 4. Hallucination patterns detected All rejections logged with model provenance. Agent receives rejection as tool result for self-correction. Resolves #922	2026-04-21 05:38:22 +00:00