{ "timestamp": "2026-04-16T01:56:48.462512+00:00", "model": "dry-run", "endpoint": "none", "kv_type": "none", "total": 10, "passed": 10, "failed": 0, "accuracy": 1.0, "meets_threshold": true, "threshold": 1.0, "results": [ { "id": "read_file_basic", "name": "Read File \u2014 basic path", "passed": true, "tool_called": null, "expected_tool": "read_file", "schema_valid": true, "args_valid": true, "latency_ms": 0.0, "raw_response": "", "error": null }, { "id": "read_file_offset", "name": "Read File \u2014 with offset", "passed": true, "tool_called": null, "expected_tool": "read_file", "schema_valid": true, "args_valid": true, "latency_ms": 0.0, "raw_response": "", "error": null }, { "id": "web_search_basic", "name": "Web Search \u2014 basic query", "passed": true, "tool_called": null, "expected_tool": "web_search", "schema_valid": true, "args_valid": true, "latency_ms": 0.0, "raw_response": "", "error": null }, { "id": "terminal_basic", "name": "Terminal \u2014 simple command", "passed": true, "tool_called": null, "expected_tool": "terminal", "schema_valid": true, "args_valid": true, "latency_ms": 0.0, "raw_response": "", "error": null }, { "id": "terminal_complex", "name": "Terminal \u2014 complex command", "passed": true, "tool_called": null, "expected_tool": "terminal", "schema_valid": true, "args_valid": true, "latency_ms": 0.0, "raw_response": "", "error": null }, { "id": "code_exec_basic", "name": "Code Execution \u2014 python", "passed": true, "tool_called": null, "expected_tool": "execute_code", "schema_valid": true, "args_valid": true, "latency_ms": 0.0, "raw_response": "", "error": null }, { "id": "code_exec_complex", "name": "Code Execution \u2014 multi-line", "passed": true, "tool_called": null, "expected_tool": "execute_code", "schema_valid": true, "args_valid": true, "latency_ms": 0.0, "raw_response": "", "error": null }, { "id": "delegate_basic", "name": "Delegate Task \u2014 simple", "passed": true, "tool_called": null, "expected_tool": "delegate_task", "schema_valid": true, "args_valid": true, "latency_ms": 0.0, "raw_response": "", "error": null }, { "id": "delegate_context", "name": "Delegate Task \u2014 with context", "passed": true, "tool_called": null, "expected_tool": "delegate_task", "schema_valid": true, "args_valid": true, "latency_ms": 0.0, "raw_response": "", "error": null }, { "id": "parallel_two", "name": "Parallel Tools \u2014 two in one response", "passed": true, "tool_called": null, "expected_tool": "read_file", "schema_valid": true, "args_valid": true, "latency_ms": 0.0, "raw_response": "", "error": null } ], "error": null }