Some checks failed
Contributor Attribution Check / check-attribution (pull_request) Failing after 1m15s
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 1m8s
Tests / e2e (pull_request) Successful in 3m44s
Tests / test (pull_request) Failing after 1h9m15s
Tests for validation firewall: - Unknown tool detection - Missing required params - Wrong type detection - Hallucination patterns - Rejection stats Refs #922
68 lines
2.5 KiB
Python
68 lines
2.5 KiB
Python
"""
|
|
Tests for tool hallucination detection (#922).
|
|
"""
|
|
|
|
import pytest
|
|
from tools.tool_validator import ToolHallucinationDetector, ValidationSeverity
|
|
|
|
|
|
class TestToolHallucinationDetector:
|
|
def setup_method(self):
|
|
self.detector = ToolHallucinationDetector()
|
|
self.detector.register_tool("read_file", {
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"path": {"type": "string"},
|
|
"encoding": {"type": "string"},
|
|
},
|
|
"required": ["path"]
|
|
}
|
|
})
|
|
|
|
def test_valid_tool_call(self):
|
|
result = self.detector.validate_tool_call("read_file", {"path": "/tmp/file.txt"})
|
|
assert result.valid is True
|
|
assert len(result.blocking_issues) == 0
|
|
|
|
def test_unknown_tool(self):
|
|
result = self.detector.validate_tool_call("hallucinated_tool", {})
|
|
assert result.valid is False
|
|
assert any(i.code == "UNKNOWN_TOOL" for i in result.issues)
|
|
|
|
def test_missing_required_param(self):
|
|
result = self.detector.validate_tool_call("read_file", {})
|
|
assert result.valid is False
|
|
assert any(i.code == "MISSING_REQUIRED" for i in result.issues)
|
|
|
|
def test_wrong_type(self):
|
|
result = self.detector.validate_tool_call("read_file", {"path": 123})
|
|
assert result.valid is False
|
|
assert any(i.code == "WRONG_TYPE" for i in result.issues)
|
|
|
|
def test_unknown_param_warning(self):
|
|
result = self.detector.validate_tool_call("read_file", {"path": "/tmp/file.txt", "unknown": "value"})
|
|
assert result.valid is True # Warning, not blocking
|
|
assert any(i.code == "UNKNOWN_PARAM" for i in result.issues)
|
|
|
|
def test_placeholder_detection(self):
|
|
result = self.detector.validate_tool_call("read_file", {"path": "<placeholder>"})
|
|
assert any(i.code == "PLACEHOLDER_VALUE" for i in result.issues)
|
|
|
|
def test_rejection_stats(self):
|
|
self.detector.validate_tool_call("unknown_tool", {})
|
|
self.detector.validate_tool_call("read_file", {})
|
|
stats = self.detector.get_rejection_stats()
|
|
assert stats["total"] >= 2
|
|
|
|
def test_rejection_response(self):
|
|
from tools.tool_validator import create_rejection_response
|
|
result = self.detector.validate_tool_call("unknown_tool", {})
|
|
response = create_rejection_response(result)
|
|
assert response["role"] == "tool"
|
|
assert "rejected" in response["content"].lower()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__])
|